tar archive instead

pull/32089/head
Justin Newberry 1 year ago
parent 1c0161589b
commit b92eedc7ee
  1. 45
      system/updated/casync/casync.py
  2. 9
      system/updated/casync/common.py
  3. 34
      system/updated/casync/tar.py
  4. 116
      system/updated/tests/test_casync.py

@ -2,15 +2,18 @@
import io
import lzma
import os
import pathlib
import struct
import sys
import time
from abc import ABC, abstractmethod
from collections import defaultdict, namedtuple
from collections.abc import Callable
from typing import IO
import requests
from Crypto.Hash import SHA512
from openpilot.system.updated.casync import tar
CA_FORMAT_INDEX = 0x96824d9c7b129ff9
CA_FORMAT_TABLE = 0xe75b9e112f17417d
@ -37,20 +40,25 @@ class ChunkReader(ABC):
...
class FileChunkReader(ChunkReader):
class BinaryChunkReader(ChunkReader):
"""Reads chunks from a local file"""
def __init__(self, fn: str) -> None:
def __init__(self, file_like: IO[bytes]) -> None:
super().__init__()
self.f = open(fn, 'rb')
def __del__(self):
self.f.close()
self.f = file_like
def read(self, chunk: Chunk) -> bytes:
self.f.seek(chunk.offset)
return self.f.read(chunk.length)
class FileChunkReader(BinaryChunkReader):
def __init__(self, path: str) -> None:
super().__init__(open(path, 'rb'))
def __del__(self):
self.f.close()
class RemoteChunkReader(ChunkReader):
"""Reads lzma compressed chunks from a remote store"""
@ -83,6 +91,16 @@ class RemoteChunkReader(ChunkReader):
return decompressor.decompress(contents)
class DirectoryTarChunkReader(BinaryChunkReader):
"""creates a tar archive of a directory and reads chunks from it"""
def __init__(self, path: str, cache_file: str) -> None:
with open(cache_file, "wb") as f:
tar.create_tar_archive(f, pathlib.Path(path))
return super().__init__(f)
def parse_caibx(caibx_path: str) -> list[Chunk]:
"""Parses the chunks from a caibx file. Can handle both local and remote files.
Returns a list of chunks with hash, offset and length"""
@ -181,6 +199,21 @@ def extract(target: list[Chunk],
return stats
def extract_directory(target: list[Chunk],
sources: list[tuple[str, ChunkReader, ChunkDict]],
out_path: str,
tmp_file: str,
progress: Callable[[int], None] = None):
"""extract a directory stored as a casync tar archive"""
stats = extract(target, sources, tmp_file, progress)
with open(tmp_file, "rb") as f:
tar.extract_tar_archive(f, pathlib.Path(out_path))
return stats
def print_stats(stats: dict[str, int]):
total_bytes = sum(stats.values())
print(f"Total size: {total_bytes / 1024 / 1024:.2f} MB")

@ -46,8 +46,11 @@ def create_build_metadata_file(path: pathlib.Path, build_metadata: BuildMetadata
f.write(json.dumps(build_metadata_dict))
def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caidx_name: str):
caidx_file = output_dir / f"{caidx_name}.caidx"
run(["casync", "make", *CASYNC_ARGS, caidx_file, target_dir])
def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caibx_name: str):
tar_file = output_dir / f"{caibx_name}.tar"
run(["tar", "-cf", str(tar_file), target_dir])
caidx_file = output_dir / f"{caibx_name}.caibx"
run(["casync", "make", *CASYNC_ARGS, caidx_file, str(tar_file)])
tar_file.unlink()
digest = run(["casync", "digest", *CASYNC_ARGS, target_dir]).decode("utf-8").strip()
return digest, caidx_file

@ -0,0 +1,34 @@
import pathlib
import tarfile
from typing import IO
def create_tar_archive(fh: IO[bytes], directory: pathlib.Path):
"""Creates a tar archive of a directory"""
tar = tarfile.open(fileobj=fh, mode='w')
for file in directory.rglob("*"):
relative_path = str(file.relative_to(directory))
if file.is_symlink():
info = tarfile.TarInfo(relative_path)
info.type = tarfile.SYMTYPE
info.linkpath = str(file.readlink())
tar.addfile(info)
elif file.is_file():
info = tarfile.TarInfo(relative_path)
info.size = file.stat().st_size
info.type = tarfile.REGTYPE
with file.open('rb') as f:
tar.addfile(info, f)
tar.close()
fh.seek(0)
def extract_tar_archive(fh: IO[bytes], directory: pathlib.Path):
"""Extracts a tar archive to a directory"""
tar = tarfile.open(fileobj=fh, mode='r')
tar.extractall(str(directory), filter=lambda info, path: info)
tar.close()

@ -1,10 +1,12 @@
#!/usr/bin/env python3
import os
import pathlib
import unittest
import tempfile
import subprocess
import openpilot.system.hardware.tici.casync as casync
from openpilot.system.updated.casync import casync
from openpilot.system.updated.casync import tar
# dd if=/dev/zero of=/tmp/img.raw bs=1M count=2
# sudo losetup -f /tmp/img.raw
@ -149,5 +151,117 @@ class TestCasync(unittest.TestCase):
self.assertLess(stats['remote'], len(self.contents))
class TestCasyncDirectory(unittest.TestCase):
"""Tests extracting a directory stored as a casync tar archive"""
NUM_FILES = 16
@classmethod
def setup_cache(cls, directory, files=None):
if files is None:
files = range(cls.NUM_FILES)
chunk_a = [i % 256 for i in range(1024)] * 512
chunk_b = [(256 - i) % 256 for i in range(1024)] * 512
zeroes = [0] * (1024 * 128)
cls.contents = chunk_a + chunk_b + zeroes + chunk_a
cls.contents = bytes(cls.contents)
for i in files:
with open(os.path.join(directory, f"file_{i}.txt"), "wb") as f:
f.write(cls.contents)
os.symlink(f"file_{i}.txt", os.path.join(directory, f"link_{i}.txt"))
@classmethod
def setUpClass(cls):
cls.tmpdir = tempfile.TemporaryDirectory()
# Create casync files
cls.manifest_fn = os.path.join(cls.tmpdir.name, 'orig.caibx')
cls.store_fn = os.path.join(cls.tmpdir.name, 'store')
cls.directory_to_extract = tempfile.TemporaryDirectory()
cls.setup_cache(cls.directory_to_extract.name)
cls.orig_fn = os.path.join(cls.tmpdir.name, 'orig.tar')
with open(cls.orig_fn, "wb") as f:
tar.create_tar_archive(f, pathlib.Path(cls.directory_to_extract.name))
subprocess.check_output(["casync", "make", "--compression=xz", "--store", cls.store_fn, cls.manifest_fn, cls.orig_fn])
@classmethod
def tearDownClass(cls):
cls.tmpdir.cleanup()
cls.directory_to_extract.cleanup()
def setUp(self):
self.cache_dir = tempfile.TemporaryDirectory()
self.working_dir = tempfile.TemporaryDirectory()
self.out_dir = tempfile.TemporaryDirectory()
def tearDown(self):
self.cache_dir.cleanup()
self.out_dir.cleanup()
def run_test(self):
target = casync.parse_caibx(self.manifest_fn)
cache_filename = os.path.join(self.working_dir.name, "cache.tar")
tmp_filename = os.path.join(self.working_dir.name, "tmp.tar")
sources = [('cache', casync.DirectoryTarChunkReader(self.cache_dir.name, cache_filename), casync.build_chunk_dict(target))]
sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
stats = casync.extract_directory(target, sources, pathlib.Path(self.out_dir.name), tmp_filename)
with open(os.path.join(self.out_dir.name, "file_0.txt"), "rb") as f:
self.assertEqual(f.read(), self.contents)
with open(os.path.join(self.out_dir.name, "link_0.txt"), "rb") as f:
self.assertEqual(f.read(), self.contents)
self.assertEqual(os.readlink(os.path.join(self.out_dir.name, "link_0.txt")), "file_0.txt")
return stats
def test_no_cache(self):
self.setup_cache(self.cache_dir.name, [])
stats = self.run_test()
self.assertGreater(stats['remote'], 0)
self.assertEqual(stats['cache'], 0)
def test_full_cache(self):
self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
stats = self.run_test()
self.assertEqual(stats['remote'], 0)
self.assertGreater(stats['cache'], 0)
def test_one_file_cache(self):
self.setup_cache(self.cache_dir.name, range(1))
stats = self.run_test()
self.assertGreater(stats['remote'], 0)
self.assertGreater(stats['cache'], 0)
self.assertLess(stats['cache'], stats['remote'])
def test_one_file_incorrect_cache(self):
self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
with open(os.path.join(self.cache_dir.name, "file_0.txt"), "wb") as f:
f.write(b"1234")
stats = self.run_test()
self.assertGreater(stats['remote'], 0)
self.assertGreater(stats['cache'], 0)
self.assertGreater(stats['cache'], stats['remote'])
def test_one_file_missing_cache(self):
self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
os.unlink(os.path.join(self.cache_dir.name, "file_12.txt"))
stats = self.run_test()
self.assertGreater(stats['remote'], 0)
self.assertGreater(stats['cache'], 0)
self.assertGreater(stats['cache'], stats['remote'])
if __name__ == "__main__":
unittest.main()
Loading…
Cancel
Save