add casync support to agnos updater (#23654)
* add casync option to agnos updater
* open if necessary
* add python implementation
* last chunk can be small
* check flags
* cleaner check
* add remote and file stores
* remote caibx file
* print stats
* use python implementation
* clean up imports
* add progress
* fix logging
* fix duplicate chunks
* add comments
* json stats
* cleanup tmp
* normal image is still sparse
* Update system/hardware/tici/agnos.py
Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
* Update system/hardware/tici/agnos.py
Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
* add some types
* remove comment
* create Chunk type
* make readers a class
* try agnos 5.2
* add download retries
* catch all exceptions
* sleep between retry
* revert agnos.json changes
Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
old-commit-hash: 3900781092
taco
parent
297a0bd65b
commit
3230474724
2 changed files with 265 additions and 22 deletions
@ -0,0 +1,192 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
import io |
||||||
|
import lzma |
||||||
|
import os |
||||||
|
import struct |
||||||
|
import sys |
||||||
|
import time |
||||||
|
from abc import ABC, abstractmethod |
||||||
|
from collections import defaultdict, namedtuple |
||||||
|
from typing import Callable, Dict, List, Optional, Tuple |
||||||
|
|
||||||
|
import requests |
||||||
|
from Crypto.Hash import SHA512 |
||||||
|
|
||||||
|
CA_FORMAT_INDEX = 0x96824d9c7b129ff9 |
||||||
|
CA_FORMAT_TABLE = 0xe75b9e112f17417d |
||||||
|
CA_FORMAT_TABLE_TAIL_MARKER = 0xe75b9e112f17417 |
||||||
|
FLAGS = 0xb000000000000000 |
||||||
|
|
||||||
|
CA_HEADER_LEN = 48 |
||||||
|
CA_TABLE_HEADER_LEN = 16 |
||||||
|
CA_TABLE_ENTRY_LEN = 40 |
||||||
|
CA_TABLE_MIN_LEN = CA_TABLE_HEADER_LEN + CA_TABLE_ENTRY_LEN |
||||||
|
|
||||||
|
CHUNK_DOWNLOAD_TIMEOUT = 10 |
||||||
|
CHUNK_DOWNLOAD_RETRIES = 3 |
||||||
|
|
||||||
|
CAIBX_DOWNLOAD_TIMEOUT = 120 |
||||||
|
|
||||||
|
Chunk = namedtuple('Chunk', ['sha', 'offset', 'length']) |
||||||
|
ChunkDict = Dict[bytes, Chunk] |
||||||
|
|
||||||
|
|
||||||
|
class ChunkReader(ABC): |
||||||
|
@abstractmethod |
||||||
|
def read(self, chunk: Chunk) -> bytes: |
||||||
|
... |
||||||
|
|
||||||
|
|
||||||
|
class FileChunkReader(ChunkReader): |
||||||
|
"""Reads chunks from a local file""" |
||||||
|
def __init__(self, fn: str) -> None: |
||||||
|
|
||||||
|
super().__init__() |
||||||
|
self.f = open(fn, 'rb') |
||||||
|
|
||||||
|
def read(self, chunk: Chunk) -> bytes: |
||||||
|
self.f.seek(chunk.offset) |
||||||
|
return self.f.read(chunk.length) |
||||||
|
|
||||||
|
|
||||||
|
class RemoteChunkReader(ChunkReader): |
||||||
|
"""Reads lzma compressed chunks from a remote store""" |
||||||
|
|
||||||
|
def __init__(self, url: str) -> None: |
||||||
|
super().__init__() |
||||||
|
self.url = url |
||||||
|
|
||||||
|
def read(self, chunk: Chunk) -> bytes: |
||||||
|
sha_hex = chunk.sha.hex() |
||||||
|
url = os.path.join(self.url, sha_hex[:4], sha_hex + ".cacnk") |
||||||
|
|
||||||
|
for i in range(CHUNK_DOWNLOAD_RETRIES): |
||||||
|
try: |
||||||
|
resp = requests.get(url, timeout=CHUNK_DOWNLOAD_TIMEOUT) |
||||||
|
break |
||||||
|
except Exception: |
||||||
|
if i == CHUNK_DOWNLOAD_RETRIES - 1: |
||||||
|
raise |
||||||
|
time.sleep(CHUNK_DOWNLOAD_TIMEOUT) |
||||||
|
|
||||||
|
resp.raise_for_status() |
||||||
|
|
||||||
|
decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_AUTO) |
||||||
|
return decompressor.decompress(resp.content) |
||||||
|
|
||||||
|
|
||||||
|
def parse_caibx(caibx_path: str) -> List[Chunk]: |
||||||
|
"""Parses the chunks from a caibx file. Can handle both local and remote files. |
||||||
|
Returns a list of chunks with hash, offset and length""" |
||||||
|
if os.path.isfile(caibx_path): |
||||||
|
caibx = open(caibx_path, 'rb') |
||||||
|
else: |
||||||
|
resp = requests.get(caibx_path, timeout=CAIBX_DOWNLOAD_TIMEOUT) |
||||||
|
resp.raise_for_status() |
||||||
|
caibx = io.BytesIO(resp.content) |
||||||
|
|
||||||
|
caibx.seek(0, os.SEEK_END) |
||||||
|
caibx_len = caibx.tell() |
||||||
|
caibx.seek(0, os.SEEK_SET) |
||||||
|
|
||||||
|
# Parse header |
||||||
|
length, magic, flags, min_size, _, max_size = struct.unpack("<QQQQQQ", caibx.read(CA_HEADER_LEN)) |
||||||
|
assert flags == flags |
||||||
|
assert length == CA_HEADER_LEN |
||||||
|
assert magic == CA_FORMAT_INDEX |
||||||
|
|
||||||
|
# Parse table header |
||||||
|
length, magic = struct.unpack("<QQ", caibx.read(CA_TABLE_HEADER_LEN)) |
||||||
|
assert magic == CA_FORMAT_TABLE |
||||||
|
|
||||||
|
# Parse chunks |
||||||
|
num_chunks = (caibx_len - CA_HEADER_LEN - CA_TABLE_MIN_LEN) // CA_TABLE_ENTRY_LEN |
||||||
|
chunks = [] |
||||||
|
|
||||||
|
offset = 0 |
||||||
|
for i in range(num_chunks): |
||||||
|
new_offset = struct.unpack("<Q", caibx.read(8))[0] |
||||||
|
|
||||||
|
sha = caibx.read(32) |
||||||
|
length = new_offset - offset |
||||||
|
|
||||||
|
assert length <= max_size |
||||||
|
|
||||||
|
# Last chunk can be smaller |
||||||
|
if i < num_chunks - 1: |
||||||
|
assert length >= min_size |
||||||
|
|
||||||
|
chunks.append(Chunk(sha, offset, length)) |
||||||
|
offset = new_offset |
||||||
|
|
||||||
|
return chunks |
||||||
|
|
||||||
|
|
||||||
|
def build_chunk_dict(chunks: List[Chunk]) -> ChunkDict: |
||||||
|
"""Turn a list of chunks into a dict for faster lookups based on hash""" |
||||||
|
return {c.sha: c for c in chunks} |
||||||
|
|
||||||
|
|
||||||
|
def extract(target: List[Chunk], |
||||||
|
sources: List[Tuple[str, ChunkReader, ChunkDict]], |
||||||
|
out_path: str, |
||||||
|
progress: Optional[Callable[[int], None]] = None): |
||||||
|
stats: Dict[str, int] = defaultdict(int) |
||||||
|
|
||||||
|
with open(out_path, 'wb') as out: |
||||||
|
for cur_chunk in target: |
||||||
|
|
||||||
|
# Find source for desired chunk |
||||||
|
for name, chunk_reader, store_chunks in sources: |
||||||
|
if cur_chunk.sha in store_chunks: |
||||||
|
bts = chunk_reader.read(store_chunks[cur_chunk.sha]) |
||||||
|
|
||||||
|
# Check length |
||||||
|
if len(bts) != cur_chunk.length: |
||||||
|
continue |
||||||
|
|
||||||
|
# Check hash |
||||||
|
if SHA512.new(bts, truncate="256").digest() != cur_chunk.sha: |
||||||
|
continue |
||||||
|
|
||||||
|
# Write to output |
||||||
|
out.seek(cur_chunk.offset) |
||||||
|
out.write(bts) |
||||||
|
|
||||||
|
stats[name] += cur_chunk.length |
||||||
|
|
||||||
|
if progress is not None: |
||||||
|
progress(sum(stats.values())) |
||||||
|
|
||||||
|
break |
||||||
|
else: |
||||||
|
raise RuntimeError("Desired chunk not found in provided stores") |
||||||
|
|
||||||
|
return stats |
||||||
|
|
||||||
|
|
||||||
|
def print_stats(stats: Dict[str, int]): |
||||||
|
total_bytes = sum(stats.values()) |
||||||
|
print(f"Total size: {total_bytes / 1024 / 1024:.2f} MB") |
||||||
|
for name, total in stats.items(): |
||||||
|
print(f" {name}: {total / 1024 / 1024:.2f} MB ({total / total_bytes * 100:.1f}%)") |
||||||
|
|
||||||
|
|
||||||
|
def extract_simple(caibx_path, out_path, store_path): |
||||||
|
# (name, callback, chunks) |
||||||
|
target = parse_caibx(caibx_path) |
||||||
|
sources = [ |
||||||
|
# (store_path, RemoteChunkReader(store_path), build_chunk_dict(target)), |
||||||
|
(store_path, FileChunkReader(store_path), build_chunk_dict(target)), |
||||||
|
] |
||||||
|
|
||||||
|
return extract(target, sources, out_path) |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
caibx = sys.argv[1] |
||||||
|
out = sys.argv[2] |
||||||
|
store = sys.argv[3] |
||||||
|
|
||||||
|
stats = extract_simple(caibx, out, store) |
||||||
|
print_stats(stats) |
Loading…
Reference in new issue