casync: manifest compare script (#25129)

* casync compare script * typo * cleanup output
3 years ago · 205f6f7414
parent f0b5ff5c1a
commit 205f6f7414
1 changed files with 74 additions and 0 deletions
--- a/system/hardware/tici/tests/compare_casync_manifest.py
+++ b/system/hardware/tici/tests/compare_casync_manifest.py
@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+import argparse
+import collections
+import multiprocessing
+import os
+from typing import Dict, List
+
+import requests
+from tqdm import tqdm
+
+import system.hardware.tici.casync as casync
+
+
+def get_chunk_download_size(chunk):
+  sha = chunk.sha.hex()
+  path = os.path.join(remote_url, sha[:4], sha + ".cacnk")
+  if os.path.isfile(path):
+    return os.path.getsize(path)
+  else:
+    r = requests.head(path)
+    r.raise_for_status()
+    return int(r.headers['content-length'])
+
+
+if __name__ == "__main__":
+
+  parser = argparse.ArgumentParser(description='Compute overlap between two casync manifests')
+  parser.add_argument('frm')
+  parser.add_argument('to')
+  args = parser.parse_args()
+
+  frm = casync.parse_caibx(args.frm)
+  to = casync.parse_caibx(args.to)
+  remote_url = args.to.replace('.caibx', '')
+
+  most_common = collections.Counter(t.sha for t in to).most_common(1)[0][0]
+
+  frm_dict = casync.build_chunk_dict(frm)
+
+  # Get content-length for each chunk
+  with multiprocessing.Pool() as pool:
+    szs = list(tqdm(pool.imap(get_chunk_download_size, to), total=len(to)))
+  chunk_sizes = {t.sha: sz for (t, sz) in zip(to, szs)}
+
+  sources: Dict[str, List[int]] = {
+    'seed': [],
+    'remote_uncompressed': [],
+    'remote_compressed': [],
+  }
+
+  for chunk in to:
+    # Assume most common chunk is the zero chunk
+    if chunk.sha == most_common:
+      continue
+
+    if chunk.sha in frm_dict:
+      sources['seed'].append(chunk.length)
+    else:
+      sources['remote_uncompressed'].append(chunk.length)
+      sources['remote_compressed'].append(chunk_sizes[chunk.sha])
+
+  print()
+  print("Update statistics (excluding zeros)")
+  print()
+  print("Download only with no seed:")
+  print(f"  Remote (uncompressed)\t\t{sum(sources['seed'] + sources['remote_uncompressed']) / 1000 / 1000:.2f} MB\tn = {len(to)}")
+  print(f"  Remote (compressed download)\t{sum(chunk_sizes.values()) / 1000 / 1000:.2f} MB\tn = {len(to)}")
+  print()
+  print("Upgrade with seed partition:")
+  print(f"  Seed   (uncompressed)\t\t{sum(sources['seed']) / 1000 / 1000:.2f} MB\t\t\t\tn = {len(sources['seed'])}")
+  sz, n = sum(sources['remote_uncompressed']), len(sources['remote_uncompressed'])
+  print(f"  Remote (uncompressed)\t\t{sz / 1000 / 1000:.2f} MB\t(avg {sz / 1000 / 1000 / n:4f} MB)\tn = {n}")
+  sz, n = sum(sources['remote_compressed']), len(sources['remote_compressed'])
+  print(f"  Remote (compressed download)\t{sz / 1000 / 1000:.2f} MB\t(avg {sz / 1000 / 1000 / n:4f} MB)\tn = {n}")