From b33aef01b826c97345bfe4d810f0f8deb12a4632 Mon Sep 17 00:00:00 2001 From: Justin Newberry Date: Fri, 5 Apr 2024 14:00:45 -0400 Subject: [PATCH] move casync release creation to use a tarball of files (#32089) * tar archive instead * fix * move this here * migrate these * fix this * update readme * fix that * try to build nightly * Revert "try to build nightly" This reverts commit 4ea680cb6a1f985c0490168724c99bcb45af9899. * caexclude is no longer required * finish up * sorted * need this * and that * context mnager * path based old-commit-hash: 69982d43cd61e9b085fbd19f1e3ac6c747a51793 --- release/README.md | 6 +- release/create_casync_release.py | 7 +- release/files_common | 1 - system/hardware/tici/agnos.py | 2 +- .../tici => updated/casync}/casync.py | 50 +++++++- system/updated/casync/common.py | 32 ++--- system/updated/casync/tar.py | 38 ++++++ .../casync}/tests/test_casync.py | 116 +++++++++++++++++- 8 files changed, 221 insertions(+), 31 deletions(-) rename system/{hardware/tici => updated/casync}/casync.py (81%) create mode 100644 system/updated/casync/tar.py rename system/{hardware/tici => updated/casync}/tests/test_casync.py (56%) diff --git a/release/README.md b/release/README.md index 77cd15ad69..7a4b2cde3e 100644 --- a/release/README.md +++ b/release/README.md @@ -3,7 +3,7 @@ ## terms -- `channel` - a named version of openpilot (git branch, casync caidx) which receives updates +- `channel` - a named version of openpilot (git branch, casync caibx) which receives updates - `build` - a release which is already built for the comma 3/3x and contains only required files for running openpilot and identifying the release - `build_style` - type of build, either `debug` or `release` @@ -28,8 +28,8 @@ ```bash # run on a tici, within the directory you want to create the build from. -# creates a prebuilt version of openpilot into BUILD_DIR and outputs the caidx -# and other casync files into CASYNC_DIR for uploading to openpilot-releases. +# creates a prebuilt version of openpilot into BUILD_DIR and outputs the caibx +# of a tarball containing the full prebuilt openpilot release BUILD_DIR=/data/openpilot_build \ CASYNC_DIR=/data/casync \ OPENPILOT_CHANNEL=nightly \ diff --git a/release/create_casync_release.py b/release/create_casync_release.py index 9aa75eca5d..4c90c31909 100755 --- a/release/create_casync_release.py +++ b/release/create_casync_release.py @@ -4,7 +4,7 @@ import argparse import os import pathlib -from openpilot.system.updated.casync.common import create_caexclude_file, create_casync_release, create_build_metadata_file +from openpilot.system.updated.casync.common import create_casync_release, create_build_metadata_file from openpilot.system.version import get_build_metadata @@ -22,8 +22,7 @@ if __name__ == "__main__": build_metadata.openpilot.build_style = "release" if os.environ.get("RELEASE", None) is not None else "debug" create_build_metadata_file(target_dir, build_metadata, args.channel) - create_caexclude_file(target_dir) - digest, caidx = create_casync_release(target_dir, output_dir, build_metadata.canonical) + digest, caibx = create_casync_release(target_dir, output_dir, build_metadata.canonical) - print(f"Created casync release from {target_dir} to {caidx} with digest {digest}") + print(f"Created casync release from {target_dir} to {caibx} with digest {digest}") diff --git a/release/files_common b/release/files_common index b00f99906b..4ba9200d22 100644 --- a/release/files_common +++ b/release/files_common @@ -170,7 +170,6 @@ system/hardware/tici/hardware.h system/hardware/tici/hardware.py system/hardware/tici/pins.py system/hardware/tici/agnos.py -system/hardware/tici/casync.py system/hardware/tici/agnos.json system/hardware/tici/amplifier.py system/hardware/tici/updater diff --git a/system/hardware/tici/agnos.py b/system/hardware/tici/agnos.py index 502295be07..8f09b30850 100755 --- a/system/hardware/tici/agnos.py +++ b/system/hardware/tici/agnos.py @@ -10,7 +10,7 @@ from collections.abc import Generator import requests -import openpilot.system.hardware.tici.casync as casync +import openpilot.system.updated.casync.casync as casync SPARSE_CHUNK_FMT = struct.Struct('H2xI4x') CAIBX_URL = "https://commadist.azureedge.net/agnosupdate/" diff --git a/system/hardware/tici/casync.py b/system/updated/casync/casync.py similarity index 81% rename from system/hardware/tici/casync.py rename to system/updated/casync/casync.py index 986228c1cd..7a3303a9e9 100755 --- a/system/hardware/tici/casync.py +++ b/system/updated/casync/casync.py @@ -2,15 +2,19 @@ import io import lzma import os +import pathlib import struct import sys import time from abc import ABC, abstractmethod from collections import defaultdict, namedtuple from collections.abc import Callable +from typing import IO import requests from Crypto.Hash import SHA512 +from openpilot.system.updated.casync import tar +from openpilot.system.updated.casync.common import create_casync_tar_package CA_FORMAT_INDEX = 0x96824d9c7b129ff9 CA_FORMAT_TABLE = 0xe75b9e112f17417d @@ -37,20 +41,25 @@ class ChunkReader(ABC): ... -class FileChunkReader(ChunkReader): +class BinaryChunkReader(ChunkReader): """Reads chunks from a local file""" - def __init__(self, fn: str) -> None: + def __init__(self, file_like: IO[bytes]) -> None: super().__init__() - self.f = open(fn, 'rb') - - def __del__(self): - self.f.close() + self.f = file_like def read(self, chunk: Chunk) -> bytes: self.f.seek(chunk.offset) return self.f.read(chunk.length) +class FileChunkReader(BinaryChunkReader): + def __init__(self, path: str) -> None: + super().__init__(open(path, 'rb')) + + def __del__(self): + self.f.close() + + class RemoteChunkReader(ChunkReader): """Reads lzma compressed chunks from a remote store""" @@ -83,6 +92,20 @@ class RemoteChunkReader(ChunkReader): return decompressor.decompress(contents) +class DirectoryTarChunkReader(BinaryChunkReader): + """creates a tar archive of a directory and reads chunks from it""" + + def __init__(self, path: str, cache_file: str) -> None: + create_casync_tar_package(pathlib.Path(path), pathlib.Path(cache_file)) + + self.f = open(cache_file, "rb") + return super().__init__(self.f) + + def __del__(self): + self.f.close() + os.unlink(self.f.name) + + def parse_caibx(caibx_path: str) -> list[Chunk]: """Parses the chunks from a caibx file. Can handle both local and remote files. Returns a list of chunks with hash, offset and length""" @@ -181,6 +204,21 @@ def extract(target: list[Chunk], return stats +def extract_directory(target: list[Chunk], + sources: list[tuple[str, ChunkReader, ChunkDict]], + out_path: str, + tmp_file: str, + progress: Callable[[int], None] = None): + """extract a directory stored as a casync tar archive""" + + stats = extract(target, sources, tmp_file, progress) + + with open(tmp_file, "rb") as f: + tar.extract_tar_archive(f, pathlib.Path(out_path)) + + return stats + + def print_stats(stats: dict[str, int]): total_bytes = sum(stats.values()) print(f"Total size: {total_bytes / 1024 / 1024:.2f} MB") diff --git a/system/updated/casync/common.py b/system/updated/casync/common.py index b5fb4d5802..b30494b2bb 100644 --- a/system/updated/casync/common.py +++ b/system/updated/casync/common.py @@ -4,10 +4,11 @@ import pathlib import subprocess from openpilot.system.version import BUILD_METADATA_FILENAME, BuildMetadata +from openpilot.system.updated.casync import tar CASYNC_ARGS = ["--with=symlinks", "--with=permissions", "--compression=xz"] -CASYNC_FILES = [BUILD_METADATA_FILENAME, ".caexclude"] +CASYNC_FILES = [BUILD_METADATA_FILENAME] def run(cmd): @@ -28,16 +29,6 @@ def get_exclude_set(path) -> set[str]: return exclude_set -def create_caexclude_file(path: pathlib.Path): - with open(path / ".caexclude", "w") as f: - # exclude everything except the paths already in the release - f.write("*\n") - f.write(".*\n") - - for file in sorted(get_exclude_set(path)): - f.write(f"!{file}\n") - - def create_build_metadata_file(path: pathlib.Path, build_metadata: BuildMetadata, channel: str): with open(path / BUILD_METADATA_FILENAME, "w") as f: build_metadata_dict = dataclasses.asdict(build_metadata) @@ -46,8 +37,19 @@ def create_build_metadata_file(path: pathlib.Path, build_metadata: BuildMetadata f.write(json.dumps(build_metadata_dict)) -def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caidx_name: str): - caidx_file = output_dir / f"{caidx_name}.caidx" - run(["casync", "make", *CASYNC_ARGS, caidx_file, target_dir]) +def is_not_git(path: pathlib.Path) -> bool: + return ".git" not in path.parts + + +def create_casync_tar_package(target_dir: pathlib.Path, output_path: pathlib.Path): + tar.create_tar_archive(output_path, target_dir, is_not_git) + + +def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caibx_name: str): + tar_file = output_dir / f"{caibx_name}.tar" + create_casync_tar_package(target_dir, tar_file) + caibx_file = output_dir / f"{caibx_name}.caibx" + run(["casync", "make", *CASYNC_ARGS, caibx_file, str(tar_file)]) + tar_file.unlink() digest = run(["casync", "digest", *CASYNC_ARGS, target_dir]).decode("utf-8").strip() - return digest, caidx_file + return digest, caibx_file diff --git a/system/updated/casync/tar.py b/system/updated/casync/tar.py new file mode 100644 index 0000000000..725ab4251d --- /dev/null +++ b/system/updated/casync/tar.py @@ -0,0 +1,38 @@ +import pathlib +import tarfile +from typing import IO, Callable + + +def include_default(_) -> bool: + return True + + +def create_tar_archive(filename: pathlib.Path, directory: pathlib.Path, include: Callable[[pathlib.Path], bool] = include_default): + """Creates a tar archive of a directory""" + + with tarfile.open(filename, 'w') as tar: + for file in sorted(directory.rglob("*"), key=lambda f: f.stat().st_size if f.is_file() else 0, reverse=True): + if not include(file): + continue + relative_path = str(file.relative_to(directory)) + if file.is_symlink(): + info = tarfile.TarInfo(relative_path) + info.type = tarfile.SYMTYPE + info.linkpath = str(file.readlink()) + tar.addfile(info) + + elif file.is_file(): + info = tarfile.TarInfo(relative_path) + info.size = file.stat().st_size + info.type = tarfile.REGTYPE + with file.open('rb') as f: + tar.addfile(info, f) + + + +def extract_tar_archive(fh: IO[bytes], directory: pathlib.Path): + """Extracts a tar archive to a directory""" + + tar = tarfile.open(fileobj=fh, mode='r') + tar.extractall(str(directory), filter=lambda info, path: info) + tar.close() diff --git a/system/hardware/tici/tests/test_casync.py b/system/updated/casync/tests/test_casync.py similarity index 56% rename from system/hardware/tici/tests/test_casync.py rename to system/updated/casync/tests/test_casync.py index 94b32a9f76..34427d5625 100755 --- a/system/hardware/tici/tests/test_casync.py +++ b/system/updated/casync/tests/test_casync.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 import os +import pathlib import unittest import tempfile import subprocess -import openpilot.system.hardware.tici.casync as casync +from openpilot.system.updated.casync import casync +from openpilot.system.updated.casync import tar # dd if=/dev/zero of=/tmp/img.raw bs=1M count=2 # sudo losetup -f /tmp/img.raw @@ -149,5 +151,117 @@ class TestCasync(unittest.TestCase): self.assertLess(stats['remote'], len(self.contents)) +class TestCasyncDirectory(unittest.TestCase): + """Tests extracting a directory stored as a casync tar archive""" + + NUM_FILES = 16 + + @classmethod + def setup_cache(cls, directory, files=None): + if files is None: + files = range(cls.NUM_FILES) + + chunk_a = [i % 256 for i in range(1024)] * 512 + chunk_b = [(256 - i) % 256 for i in range(1024)] * 512 + zeroes = [0] * (1024 * 128) + cls.contents = chunk_a + chunk_b + zeroes + chunk_a + cls.contents = bytes(cls.contents) + + for i in files: + with open(os.path.join(directory, f"file_{i}.txt"), "wb") as f: + f.write(cls.contents) + + os.symlink(f"file_{i}.txt", os.path.join(directory, f"link_{i}.txt")) + + @classmethod + def setUpClass(cls): + cls.tmpdir = tempfile.TemporaryDirectory() + + # Create casync files + cls.manifest_fn = os.path.join(cls.tmpdir.name, 'orig.caibx') + cls.store_fn = os.path.join(cls.tmpdir.name, 'store') + + cls.directory_to_extract = tempfile.TemporaryDirectory() + cls.setup_cache(cls.directory_to_extract.name) + + cls.orig_fn = os.path.join(cls.tmpdir.name, 'orig.tar') + tar.create_tar_archive(cls.orig_fn, pathlib.Path(cls.directory_to_extract.name)) + + subprocess.check_output(["casync", "make", "--compression=xz", "--store", cls.store_fn, cls.manifest_fn, cls.orig_fn]) + + @classmethod + def tearDownClass(cls): + cls.tmpdir.cleanup() + cls.directory_to_extract.cleanup() + + def setUp(self): + self.cache_dir = tempfile.TemporaryDirectory() + self.working_dir = tempfile.TemporaryDirectory() + self.out_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.cache_dir.cleanup() + self.working_dir.cleanup() + self.out_dir.cleanup() + + def run_test(self): + target = casync.parse_caibx(self.manifest_fn) + + cache_filename = os.path.join(self.working_dir.name, "cache.tar") + tmp_filename = os.path.join(self.working_dir.name, "tmp.tar") + + sources = [('cache', casync.DirectoryTarChunkReader(self.cache_dir.name, cache_filename), casync.build_chunk_dict(target))] + sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))] + + stats = casync.extract_directory(target, sources, pathlib.Path(self.out_dir.name), tmp_filename) + + with open(os.path.join(self.out_dir.name, "file_0.txt"), "rb") as f: + self.assertEqual(f.read(), self.contents) + + with open(os.path.join(self.out_dir.name, "link_0.txt"), "rb") as f: + self.assertEqual(f.read(), self.contents) + self.assertEqual(os.readlink(os.path.join(self.out_dir.name, "link_0.txt")), "file_0.txt") + + return stats + + def test_no_cache(self): + self.setup_cache(self.cache_dir.name, []) + stats = self.run_test() + self.assertGreater(stats['remote'], 0) + self.assertEqual(stats['cache'], 0) + + def test_full_cache(self): + self.setup_cache(self.cache_dir.name, range(self.NUM_FILES)) + stats = self.run_test() + self.assertEqual(stats['remote'], 0) + self.assertGreater(stats['cache'], 0) + + def test_one_file_cache(self): + self.setup_cache(self.cache_dir.name, range(1)) + stats = self.run_test() + self.assertGreater(stats['remote'], 0) + self.assertGreater(stats['cache'], 0) + self.assertLess(stats['cache'], stats['remote']) + + def test_one_file_incorrect_cache(self): + self.setup_cache(self.cache_dir.name, range(self.NUM_FILES)) + with open(os.path.join(self.cache_dir.name, "file_0.txt"), "wb") as f: + f.write(b"1234") + + stats = self.run_test() + self.assertGreater(stats['remote'], 0) + self.assertGreater(stats['cache'], 0) + self.assertGreater(stats['cache'], stats['remote']) + + def test_one_file_missing_cache(self): + self.setup_cache(self.cache_dir.name, range(self.NUM_FILES)) + os.unlink(os.path.join(self.cache_dir.name, "file_12.txt")) + + stats = self.run_test() + self.assertGreater(stats['remote'], 0) + self.assertGreater(stats['cache'], 0) + self.assertGreater(stats['cache'], stats['remote']) + + if __name__ == "__main__": unittest.main()