diff --git a/system/hardware/tici/casync.py b/system/updated/casync/casync.py similarity index 83% rename from system/hardware/tici/casync.py rename to system/updated/casync/casync.py index 986228c1cd..7a2820cb1d 100755 --- a/system/hardware/tici/casync.py +++ b/system/updated/casync/casync.py @@ -2,15 +2,18 @@ import io import lzma import os +import pathlib import struct import sys import time from abc import ABC, abstractmethod from collections import defaultdict, namedtuple from collections.abc import Callable +from typing import IO import requests from Crypto.Hash import SHA512 +from openpilot.system.updated.casync import tar CA_FORMAT_INDEX = 0x96824d9c7b129ff9 CA_FORMAT_TABLE = 0xe75b9e112f17417d @@ -37,20 +40,25 @@ class ChunkReader(ABC): ... -class FileChunkReader(ChunkReader): +class BinaryChunkReader(ChunkReader): """Reads chunks from a local file""" - def __init__(self, fn: str) -> None: + def __init__(self, file_like: IO[bytes]) -> None: super().__init__() - self.f = open(fn, 'rb') - - def __del__(self): - self.f.close() + self.f = file_like def read(self, chunk: Chunk) -> bytes: self.f.seek(chunk.offset) return self.f.read(chunk.length) +class FileChunkReader(BinaryChunkReader): + def __init__(self, path: str) -> None: + super().__init__(open(path, 'rb')) + + def __del__(self): + self.f.close() + + class RemoteChunkReader(ChunkReader): """Reads lzma compressed chunks from a remote store""" @@ -83,6 +91,16 @@ class RemoteChunkReader(ChunkReader): return decompressor.decompress(contents) +class DirectoryTarChunkReader(BinaryChunkReader): + """creates a tar archive of a directory and reads chunks from it""" + + def __init__(self, path: str, cache_file: str) -> None: + with open(cache_file, "wb") as f: + tar.create_tar_archive(f, pathlib.Path(path)) + + return super().__init__(f) + + def parse_caibx(caibx_path: str) -> list[Chunk]: """Parses the chunks from a caibx file. Can handle both local and remote files. Returns a list of chunks with hash, offset and length""" @@ -181,6 +199,21 @@ def extract(target: list[Chunk], return stats +def extract_directory(target: list[Chunk], + sources: list[tuple[str, ChunkReader, ChunkDict]], + out_path: str, + tmp_file: str, + progress: Callable[[int], None] = None): + """extract a directory stored as a casync tar archive""" + + stats = extract(target, sources, tmp_file, progress) + + with open(tmp_file, "rb") as f: + tar.extract_tar_archive(f, pathlib.Path(out_path)) + + return stats + + def print_stats(stats: dict[str, int]): total_bytes = sum(stats.values()) print(f"Total size: {total_bytes / 1024 / 1024:.2f} MB") diff --git a/system/updated/casync/common.py b/system/updated/casync/common.py index b5fb4d5802..0e01f5c12e 100644 --- a/system/updated/casync/common.py +++ b/system/updated/casync/common.py @@ -46,8 +46,11 @@ def create_build_metadata_file(path: pathlib.Path, build_metadata: BuildMetadata f.write(json.dumps(build_metadata_dict)) -def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caidx_name: str): - caidx_file = output_dir / f"{caidx_name}.caidx" - run(["casync", "make", *CASYNC_ARGS, caidx_file, target_dir]) +def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caibx_name: str): + tar_file = output_dir / f"{caibx_name}.tar" + run(["tar", "-cf", str(tar_file), target_dir]) + caidx_file = output_dir / f"{caibx_name}.caibx" + run(["casync", "make", *CASYNC_ARGS, caidx_file, str(tar_file)]) + tar_file.unlink() digest = run(["casync", "digest", *CASYNC_ARGS, target_dir]).decode("utf-8").strip() return digest, caidx_file diff --git a/system/updated/casync/tar.py b/system/updated/casync/tar.py new file mode 100644 index 0000000000..09a48e3f76 --- /dev/null +++ b/system/updated/casync/tar.py @@ -0,0 +1,34 @@ +import pathlib +import tarfile +from typing import IO + + +def create_tar_archive(fh: IO[bytes], directory: pathlib.Path): + """Creates a tar archive of a directory""" + + tar = tarfile.open(fileobj=fh, mode='w') + for file in directory.rglob("*"): + relative_path = str(file.relative_to(directory)) + if file.is_symlink(): + info = tarfile.TarInfo(relative_path) + info.type = tarfile.SYMTYPE + info.linkpath = str(file.readlink()) + tar.addfile(info) + + elif file.is_file(): + info = tarfile.TarInfo(relative_path) + info.size = file.stat().st_size + info.type = tarfile.REGTYPE + with file.open('rb') as f: + tar.addfile(info, f) + + tar.close() + fh.seek(0) + + +def extract_tar_archive(fh: IO[bytes], directory: pathlib.Path): + """Extracts a tar archive to a directory""" + + tar = tarfile.open(fileobj=fh, mode='r') + tar.extractall(str(directory), filter=lambda info, path: info) + tar.close() diff --git a/system/hardware/tici/tests/test_casync.py b/system/updated/tests/test_casync.py similarity index 56% rename from system/hardware/tici/tests/test_casync.py rename to system/updated/tests/test_casync.py index 94b32a9f76..9031c30e07 100755 --- a/system/hardware/tici/tests/test_casync.py +++ b/system/updated/tests/test_casync.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 import os +import pathlib import unittest import tempfile import subprocess -import openpilot.system.hardware.tici.casync as casync +from openpilot.system.updated.casync import casync +from openpilot.system.updated.casync import tar # dd if=/dev/zero of=/tmp/img.raw bs=1M count=2 # sudo losetup -f /tmp/img.raw @@ -149,5 +151,117 @@ class TestCasync(unittest.TestCase): self.assertLess(stats['remote'], len(self.contents)) +class TestCasyncDirectory(unittest.TestCase): + """Tests extracting a directory stored as a casync tar archive""" + + NUM_FILES = 16 + + @classmethod + def setup_cache(cls, directory, files=None): + if files is None: + files = range(cls.NUM_FILES) + + chunk_a = [i % 256 for i in range(1024)] * 512 + chunk_b = [(256 - i) % 256 for i in range(1024)] * 512 + zeroes = [0] * (1024 * 128) + cls.contents = chunk_a + chunk_b + zeroes + chunk_a + cls.contents = bytes(cls.contents) + + for i in files: + with open(os.path.join(directory, f"file_{i}.txt"), "wb") as f: + f.write(cls.contents) + + os.symlink(f"file_{i}.txt", os.path.join(directory, f"link_{i}.txt")) + + @classmethod + def setUpClass(cls): + cls.tmpdir = tempfile.TemporaryDirectory() + + # Create casync files + cls.manifest_fn = os.path.join(cls.tmpdir.name, 'orig.caibx') + cls.store_fn = os.path.join(cls.tmpdir.name, 'store') + + cls.directory_to_extract = tempfile.TemporaryDirectory() + cls.setup_cache(cls.directory_to_extract.name) + + cls.orig_fn = os.path.join(cls.tmpdir.name, 'orig.tar') + with open(cls.orig_fn, "wb") as f: + tar.create_tar_archive(f, pathlib.Path(cls.directory_to_extract.name)) + + subprocess.check_output(["casync", "make", "--compression=xz", "--store", cls.store_fn, cls.manifest_fn, cls.orig_fn]) + + @classmethod + def tearDownClass(cls): + cls.tmpdir.cleanup() + cls.directory_to_extract.cleanup() + + def setUp(self): + self.cache_dir = tempfile.TemporaryDirectory() + self.working_dir = tempfile.TemporaryDirectory() + self.out_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.cache_dir.cleanup() + self.out_dir.cleanup() + + def run_test(self): + target = casync.parse_caibx(self.manifest_fn) + + cache_filename = os.path.join(self.working_dir.name, "cache.tar") + tmp_filename = os.path.join(self.working_dir.name, "tmp.tar") + + sources = [('cache', casync.DirectoryTarChunkReader(self.cache_dir.name, cache_filename), casync.build_chunk_dict(target))] + sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))] + + stats = casync.extract_directory(target, sources, pathlib.Path(self.out_dir.name), tmp_filename) + + with open(os.path.join(self.out_dir.name, "file_0.txt"), "rb") as f: + self.assertEqual(f.read(), self.contents) + + with open(os.path.join(self.out_dir.name, "link_0.txt"), "rb") as f: + self.assertEqual(f.read(), self.contents) + self.assertEqual(os.readlink(os.path.join(self.out_dir.name, "link_0.txt")), "file_0.txt") + + return stats + + def test_no_cache(self): + self.setup_cache(self.cache_dir.name, []) + stats = self.run_test() + self.assertGreater(stats['remote'], 0) + self.assertEqual(stats['cache'], 0) + + def test_full_cache(self): + self.setup_cache(self.cache_dir.name, range(self.NUM_FILES)) + stats = self.run_test() + self.assertEqual(stats['remote'], 0) + self.assertGreater(stats['cache'], 0) + + def test_one_file_cache(self): + self.setup_cache(self.cache_dir.name, range(1)) + stats = self.run_test() + self.assertGreater(stats['remote'], 0) + self.assertGreater(stats['cache'], 0) + self.assertLess(stats['cache'], stats['remote']) + + def test_one_file_incorrect_cache(self): + self.setup_cache(self.cache_dir.name, range(self.NUM_FILES)) + with open(os.path.join(self.cache_dir.name, "file_0.txt"), "wb") as f: + f.write(b"1234") + + stats = self.run_test() + self.assertGreater(stats['remote'], 0) + self.assertGreater(stats['cache'], 0) + self.assertGreater(stats['cache'], stats['remote']) + + def test_one_file_missing_cache(self): + self.setup_cache(self.cache_dir.name, range(self.NUM_FILES)) + os.unlink(os.path.join(self.cache_dir.name, "file_12.txt")) + + stats = self.run_test() + self.assertGreater(stats['remote'], 0) + self.assertGreater(stats['cache'], 0) + self.assertGreater(stats['cache'], stats['remote']) + + if __name__ == "__main__": unittest.main()