diff --git a/system/hardware/tici/tests/compare_casync_manifest.py b/system/hardware/tici/tests/compare_casync_manifest.py new file mode 100755 index 0000000000..5e5fa24556 --- /dev/null +++ b/system/hardware/tici/tests/compare_casync_manifest.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +import argparse +import collections +import multiprocessing +import os +from typing import Dict, List + +import requests +from tqdm import tqdm + +import system.hardware.tici.casync as casync + + +def get_chunk_download_size(chunk): + sha = chunk.sha.hex() + path = os.path.join(remote_url, sha[:4], sha + ".cacnk") + if os.path.isfile(path): + return os.path.getsize(path) + else: + r = requests.head(path) + r.raise_for_status() + return int(r.headers['content-length']) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Compute overlap between two casync manifests') + parser.add_argument('frm') + parser.add_argument('to') + args = parser.parse_args() + + frm = casync.parse_caibx(args.frm) + to = casync.parse_caibx(args.to) + remote_url = args.to.replace('.caibx', '') + + most_common = collections.Counter(t.sha for t in to).most_common(1)[0][0] + + frm_dict = casync.build_chunk_dict(frm) + + # Get content-length for each chunk + with multiprocessing.Pool() as pool: + szs = list(tqdm(pool.imap(get_chunk_download_size, to), total=len(to))) + chunk_sizes = {t.sha: sz for (t, sz) in zip(to, szs)} + + sources: Dict[str, List[int]] = { + 'seed': [], + 'remote_uncompressed': [], + 'remote_compressed': [], + } + + for chunk in to: + # Assume most common chunk is the zero chunk + if chunk.sha == most_common: + continue + + if chunk.sha in frm_dict: + sources['seed'].append(chunk.length) + else: + sources['remote_uncompressed'].append(chunk.length) + sources['remote_compressed'].append(chunk_sizes[chunk.sha]) + + print() + print("Update statistics (excluding zeros)") + print() + print("Download only with no seed:") + print(f" Remote (uncompressed)\t\t{sum(sources['seed'] + sources['remote_uncompressed']) / 1000 / 1000:.2f} MB\tn = {len(to)}") + print(f" Remote (compressed download)\t{sum(chunk_sizes.values()) / 1000 / 1000:.2f} MB\tn = {len(to)}") + print() + print("Upgrade with seed partition:") + print(f" Seed (uncompressed)\t\t{sum(sources['seed']) / 1000 / 1000:.2f} MB\t\t\t\tn = {len(sources['seed'])}") + sz, n = sum(sources['remote_uncompressed']), len(sources['remote_uncompressed']) + print(f" Remote (uncompressed)\t\t{sz / 1000 / 1000:.2f} MB\t(avg {sz / 1000 / 1000 / n:4f} MB)\tn = {n}") + sz, n = sum(sources['remote_compressed']), len(sources['remote_compressed']) + print(f" Remote (compressed download)\t{sz / 1000 / 1000:.2f} MB\t(avg {sz / 1000 / 1000 / n:4f} MB)\tn = {n}")