tar archive instead

pull/32089/head
Justin Newberry 1 year ago
parent 1c0161589b
commit b92eedc7ee
  1. 45
      system/updated/casync/casync.py
  2. 9
      system/updated/casync/common.py
  3. 34
      system/updated/casync/tar.py
  4. 116
      system/updated/tests/test_casync.py

@ -2,15 +2,18 @@
import io import io
import lzma import lzma
import os import os
import pathlib
import struct import struct
import sys import sys
import time import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from collections import defaultdict, namedtuple from collections import defaultdict, namedtuple
from collections.abc import Callable from collections.abc import Callable
from typing import IO
import requests import requests
from Crypto.Hash import SHA512 from Crypto.Hash import SHA512
from openpilot.system.updated.casync import tar
CA_FORMAT_INDEX = 0x96824d9c7b129ff9 CA_FORMAT_INDEX = 0x96824d9c7b129ff9
CA_FORMAT_TABLE = 0xe75b9e112f17417d CA_FORMAT_TABLE = 0xe75b9e112f17417d
@ -37,20 +40,25 @@ class ChunkReader(ABC):
... ...
class FileChunkReader(ChunkReader): class BinaryChunkReader(ChunkReader):
"""Reads chunks from a local file""" """Reads chunks from a local file"""
def __init__(self, fn: str) -> None: def __init__(self, file_like: IO[bytes]) -> None:
super().__init__() super().__init__()
self.f = open(fn, 'rb') self.f = file_like
def __del__(self):
self.f.close()
def read(self, chunk: Chunk) -> bytes: def read(self, chunk: Chunk) -> bytes:
self.f.seek(chunk.offset) self.f.seek(chunk.offset)
return self.f.read(chunk.length) return self.f.read(chunk.length)
class FileChunkReader(BinaryChunkReader):
def __init__(self, path: str) -> None:
super().__init__(open(path, 'rb'))
def __del__(self):
self.f.close()
class RemoteChunkReader(ChunkReader): class RemoteChunkReader(ChunkReader):
"""Reads lzma compressed chunks from a remote store""" """Reads lzma compressed chunks from a remote store"""
@ -83,6 +91,16 @@ class RemoteChunkReader(ChunkReader):
return decompressor.decompress(contents) return decompressor.decompress(contents)
class DirectoryTarChunkReader(BinaryChunkReader):
"""creates a tar archive of a directory and reads chunks from it"""
def __init__(self, path: str, cache_file: str) -> None:
with open(cache_file, "wb") as f:
tar.create_tar_archive(f, pathlib.Path(path))
return super().__init__(f)
def parse_caibx(caibx_path: str) -> list[Chunk]: def parse_caibx(caibx_path: str) -> list[Chunk]:
"""Parses the chunks from a caibx file. Can handle both local and remote files. """Parses the chunks from a caibx file. Can handle both local and remote files.
Returns a list of chunks with hash, offset and length""" Returns a list of chunks with hash, offset and length"""
@ -181,6 +199,21 @@ def extract(target: list[Chunk],
return stats return stats
def extract_directory(target: list[Chunk],
sources: list[tuple[str, ChunkReader, ChunkDict]],
out_path: str,
tmp_file: str,
progress: Callable[[int], None] = None):
"""extract a directory stored as a casync tar archive"""
stats = extract(target, sources, tmp_file, progress)
with open(tmp_file, "rb") as f:
tar.extract_tar_archive(f, pathlib.Path(out_path))
return stats
def print_stats(stats: dict[str, int]): def print_stats(stats: dict[str, int]):
total_bytes = sum(stats.values()) total_bytes = sum(stats.values())
print(f"Total size: {total_bytes / 1024 / 1024:.2f} MB") print(f"Total size: {total_bytes / 1024 / 1024:.2f} MB")

@ -46,8 +46,11 @@ def create_build_metadata_file(path: pathlib.Path, build_metadata: BuildMetadata
f.write(json.dumps(build_metadata_dict)) f.write(json.dumps(build_metadata_dict))
def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caidx_name: str): def create_casync_release(target_dir: pathlib.Path, output_dir: pathlib.Path, caibx_name: str):
caidx_file = output_dir / f"{caidx_name}.caidx" tar_file = output_dir / f"{caibx_name}.tar"
run(["casync", "make", *CASYNC_ARGS, caidx_file, target_dir]) run(["tar", "-cf", str(tar_file), target_dir])
caidx_file = output_dir / f"{caibx_name}.caibx"
run(["casync", "make", *CASYNC_ARGS, caidx_file, str(tar_file)])
tar_file.unlink()
digest = run(["casync", "digest", *CASYNC_ARGS, target_dir]).decode("utf-8").strip() digest = run(["casync", "digest", *CASYNC_ARGS, target_dir]).decode("utf-8").strip()
return digest, caidx_file return digest, caidx_file

@ -0,0 +1,34 @@
import pathlib
import tarfile
from typing import IO
def create_tar_archive(fh: IO[bytes], directory: pathlib.Path):
"""Creates a tar archive of a directory"""
tar = tarfile.open(fileobj=fh, mode='w')
for file in directory.rglob("*"):
relative_path = str(file.relative_to(directory))
if file.is_symlink():
info = tarfile.TarInfo(relative_path)
info.type = tarfile.SYMTYPE
info.linkpath = str(file.readlink())
tar.addfile(info)
elif file.is_file():
info = tarfile.TarInfo(relative_path)
info.size = file.stat().st_size
info.type = tarfile.REGTYPE
with file.open('rb') as f:
tar.addfile(info, f)
tar.close()
fh.seek(0)
def extract_tar_archive(fh: IO[bytes], directory: pathlib.Path):
"""Extracts a tar archive to a directory"""
tar = tarfile.open(fileobj=fh, mode='r')
tar.extractall(str(directory), filter=lambda info, path: info)
tar.close()

@ -1,10 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os import os
import pathlib
import unittest import unittest
import tempfile import tempfile
import subprocess import subprocess
import openpilot.system.hardware.tici.casync as casync from openpilot.system.updated.casync import casync
from openpilot.system.updated.casync import tar
# dd if=/dev/zero of=/tmp/img.raw bs=1M count=2 # dd if=/dev/zero of=/tmp/img.raw bs=1M count=2
# sudo losetup -f /tmp/img.raw # sudo losetup -f /tmp/img.raw
@ -149,5 +151,117 @@ class TestCasync(unittest.TestCase):
self.assertLess(stats['remote'], len(self.contents)) self.assertLess(stats['remote'], len(self.contents))
class TestCasyncDirectory(unittest.TestCase):
"""Tests extracting a directory stored as a casync tar archive"""
NUM_FILES = 16
@classmethod
def setup_cache(cls, directory, files=None):
if files is None:
files = range(cls.NUM_FILES)
chunk_a = [i % 256 for i in range(1024)] * 512
chunk_b = [(256 - i) % 256 for i in range(1024)] * 512
zeroes = [0] * (1024 * 128)
cls.contents = chunk_a + chunk_b + zeroes + chunk_a
cls.contents = bytes(cls.contents)
for i in files:
with open(os.path.join(directory, f"file_{i}.txt"), "wb") as f:
f.write(cls.contents)
os.symlink(f"file_{i}.txt", os.path.join(directory, f"link_{i}.txt"))
@classmethod
def setUpClass(cls):
cls.tmpdir = tempfile.TemporaryDirectory()
# Create casync files
cls.manifest_fn = os.path.join(cls.tmpdir.name, 'orig.caibx')
cls.store_fn = os.path.join(cls.tmpdir.name, 'store')
cls.directory_to_extract = tempfile.TemporaryDirectory()
cls.setup_cache(cls.directory_to_extract.name)
cls.orig_fn = os.path.join(cls.tmpdir.name, 'orig.tar')
with open(cls.orig_fn, "wb") as f:
tar.create_tar_archive(f, pathlib.Path(cls.directory_to_extract.name))
subprocess.check_output(["casync", "make", "--compression=xz", "--store", cls.store_fn, cls.manifest_fn, cls.orig_fn])
@classmethod
def tearDownClass(cls):
cls.tmpdir.cleanup()
cls.directory_to_extract.cleanup()
def setUp(self):
self.cache_dir = tempfile.TemporaryDirectory()
self.working_dir = tempfile.TemporaryDirectory()
self.out_dir = tempfile.TemporaryDirectory()
def tearDown(self):
self.cache_dir.cleanup()
self.out_dir.cleanup()
def run_test(self):
target = casync.parse_caibx(self.manifest_fn)
cache_filename = os.path.join(self.working_dir.name, "cache.tar")
tmp_filename = os.path.join(self.working_dir.name, "tmp.tar")
sources = [('cache', casync.DirectoryTarChunkReader(self.cache_dir.name, cache_filename), casync.build_chunk_dict(target))]
sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))]
stats = casync.extract_directory(target, sources, pathlib.Path(self.out_dir.name), tmp_filename)
with open(os.path.join(self.out_dir.name, "file_0.txt"), "rb") as f:
self.assertEqual(f.read(), self.contents)
with open(os.path.join(self.out_dir.name, "link_0.txt"), "rb") as f:
self.assertEqual(f.read(), self.contents)
self.assertEqual(os.readlink(os.path.join(self.out_dir.name, "link_0.txt")), "file_0.txt")
return stats
def test_no_cache(self):
self.setup_cache(self.cache_dir.name, [])
stats = self.run_test()
self.assertGreater(stats['remote'], 0)
self.assertEqual(stats['cache'], 0)
def test_full_cache(self):
self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
stats = self.run_test()
self.assertEqual(stats['remote'], 0)
self.assertGreater(stats['cache'], 0)
def test_one_file_cache(self):
self.setup_cache(self.cache_dir.name, range(1))
stats = self.run_test()
self.assertGreater(stats['remote'], 0)
self.assertGreater(stats['cache'], 0)
self.assertLess(stats['cache'], stats['remote'])
def test_one_file_incorrect_cache(self):
self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
with open(os.path.join(self.cache_dir.name, "file_0.txt"), "wb") as f:
f.write(b"1234")
stats = self.run_test()
self.assertGreater(stats['remote'], 0)
self.assertGreater(stats['cache'], 0)
self.assertGreater(stats['cache'], stats['remote'])
def test_one_file_missing_cache(self):
self.setup_cache(self.cache_dir.name, range(self.NUM_FILES))
os.unlink(os.path.join(self.cache_dir.name, "file_12.txt"))
stats = self.run_test()
self.assertGreater(stats['remote'], 0)
self.assertGreater(stats['cache'], 0)
self.assertGreater(stats['cache'], stats['remote'])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Loading…
Cancel
Save