diff --git a/tools/lib/filereader.py b/tools/lib/filereader.py index af3dc5e658..e9b8b4b2ce 100644 --- a/tools/lib/filereader.py +++ b/tools/lib/filereader.py @@ -6,6 +6,7 @@ from openpilot.tools.lib.url_file import URLFile DATA_ENDPOINT = os.getenv("DATA_ENDPOINT", "http://data-raw.comma.internal/") + def internal_source_available(): try: hostname = urlparse(DATA_ENDPOINT).hostname @@ -16,17 +17,20 @@ def internal_source_available(): pass return False + def resolve_name(fn): if fn.startswith("cd:/"): return fn.replace("cd:/", DATA_ENDPOINT) return fn + def file_exists(fn): fn = resolve_name(fn) if fn.startswith(("http://", "https://")): return URLFile(fn).get_length_online() != -1 return os.path.exists(fn) + def FileReader(fn, debug=False): fn = resolve_name(fn) if fn.startswith(("http://", "https://")): diff --git a/tools/lib/helpers.py b/tools/lib/helpers.py index 5ec6c62339..cc4c5e148e 100644 --- a/tools/lib/helpers.py +++ b/tools/lib/helpers.py @@ -3,9 +3,10 @@ import datetime TIME_FMT = "%Y-%m-%d--%H-%M-%S" + # regex patterns class RE: - DONGLE_ID = r'(?P[a-f0-9]{16})' + DONGLE_ID = r'(?P[a-f0-9]{16})' TIMESTAMP = r'(?P[0-9]{4}-[0-9]{2}-[0-9]{2}--[0-9]{2}-[0-9]{2}-[0-9]{2})' LOG_ID_V2 = r'(?P[a-f0-9]{8})--(?P[a-z0-9]{10})' LOG_ID = r'(?P(?:{}|{}))'.format(TIMESTAMP, LOG_ID_V2) diff --git a/tools/lib/logreader.py b/tools/lib/logreader.py index f7548b8c0f..0657a63fbd 100755 --- a/tools/lib/logreader.py +++ b/tools/lib/logreader.py @@ -72,11 +72,12 @@ class _LogFileReader: class ReadMode(enum.StrEnum): - RLOG = "r" # only read rlogs - QLOG = "q" # only read qlogs - SANITIZED = "s" # read from the commaCarSegments database - AUTO = "a" # default to rlogs, fallback to qlogs - AUTO_INTERACIVE = "i" # default to rlogs, fallback to qlogs with a prompt from the user + RLOG = "r" # only read rlogs + QLOG = "q" # only read qlogs + SANITIZED = "s" # read from the commaCarSegments database + AUTO = "a" # default to rlogs, fallback to qlogs + AUTO_INTERACIVE = "i" # default to rlogs, fallback to qlogs with a prompt from the user + def create_slice_from_string(s: str): m = re.fullmatch(RE.SLICE, s) @@ -90,9 +91,11 @@ def create_slice_from_string(s: str): return start return slice(start, end, step) + def default_valid_file(fn): return fn is not None and file_exists(fn) + def auto_strategy(rlog_paths, qlog_paths, interactive, valid_file): # auto select logs based on availability if any(rlog is None or not valid_file(rlog) for rlog in rlog_paths): @@ -103,9 +106,10 @@ def auto_strategy(rlog_paths, qlog_paths, interactive, valid_file): cloudlog.warning("Some rlogs were not found, falling back to qlogs for those segments...") return [rlog if (valid_file(rlog)) else (qlog if (valid_file(qlog)) else None) - for (rlog, qlog) in zip(rlog_paths, qlog_paths, strict=True)] + for (rlog, qlog) in zip(rlog_paths, qlog_paths, strict=True)] return rlog_paths + def apply_strategy(mode: ReadMode, rlog_paths, qlog_paths, valid_file=default_valid_file): if mode == ReadMode.RLOG: return rlog_paths @@ -116,11 +120,12 @@ def apply_strategy(mode: ReadMode, rlog_paths, qlog_paths, valid_file=default_va elif mode == ReadMode.AUTO_INTERACIVE: return auto_strategy(rlog_paths, qlog_paths, True, valid_file) + def parse_slice(sr: SegmentRange): s = create_slice_from_string(sr._slice) if isinstance(s, slice): - if s.stop is None or s.stop < 0 or (s.start is not None and s.start < 0): # we need the number of segments in order to parse this slice - segs = np.arange(sr.get_max_seg_number()+1) + if s.stop is None or s.stop < 0 or (s.start is not None and s.start < 0): # we need the number of segments in order to parse this slice + segs = np.arange(sr.get_max_seg_number() + 1) else: segs = np.arange(s.stop + 1) return segs[s] @@ -129,6 +134,7 @@ def parse_slice(sr: SegmentRange): s = sr.get_max_seg_number() + s + 1 return [s] + def comma_api_source(sr: SegmentRange, mode: ReadMode): segs = parse_slice(sr) @@ -143,6 +149,7 @@ def comma_api_source(sr: SegmentRange, mode: ReadMode): return apply_strategy(mode, rlog_paths, qlog_paths, valid_file=valid_file) + def internal_source(sr: SegmentRange, mode: ReadMode): if not internal_source_available(): raise Exception("Internal source not available") @@ -153,31 +160,36 @@ def internal_source(sr: SegmentRange, mode: ReadMode): return f"cd:/{sr.dongle_id}/{sr.timestamp}/{seg}/{file}.bz2" rlog_paths = [get_internal_url(sr, seg, "rlog") for seg in segs] - qlog_paths = [get_internal_url(sr, seg, "qlog") for seg in segs] + qlog_paths = [get_internal_url(sr, seg, "qlog") for seg in segs] return apply_strategy(mode, rlog_paths, qlog_paths) + def openpilotci_source(sr: SegmentRange, mode: ReadMode): segs = parse_slice(sr) rlog_paths = [get_url(sr.route_name, seg, "rlog") for seg in segs] - qlog_paths = [get_url(sr.route_name, seg, "qlog") for seg in segs] + qlog_paths = [get_url(sr.route_name, seg, "qlog") for seg in segs] return apply_strategy(mode, rlog_paths, qlog_paths) + def comma_car_segments_source(sr: SegmentRange, mode=ReadMode.RLOG): segs = parse_slice(sr) return [get_comma_segments_url(sr.route_name, seg) for seg in segs] + def direct_source(file_or_url): return [file_or_url] + def get_invalid_files(files): for f in files: if f is None or not file_exists(f): yield f + def check_source(source, *args): try: files = source(*args) @@ -186,6 +198,7 @@ def check_source(source, *args): except Exception as e: return e, None + def auto_source(sr: SegmentRange, mode=ReadMode.RLOG): if mode == ReadMode.SANITIZED: return comma_car_segments_source(sr, mode) @@ -201,23 +214,27 @@ def auto_source(sr: SegmentRange, mode=ReadMode.RLOG): raise Exception(f"auto_source could not find any valid source, exceptions for sources: {exceptions}") + def parse_useradmin(identifier): if "useradmin.comma.ai" in identifier: query = parse_qs(urlparse(identifier).query) return query["onebox"][0] return None + def parse_cabana(identifier): if "cabana.comma.ai" in identifier: query = parse_qs(urlparse(identifier).query) return query["route"][0] return None + def parse_direct(identifier): if identifier.startswith(("http://", "https://", "cd:/")) or pathlib.Path(identifier).exists(): return identifier return None + def parse_indirect(identifier): parsed = parse_useradmin(identifier) or parse_cabana(identifier) @@ -295,6 +312,7 @@ are uploaded or auto fallback to qlogs with '/a' selector at the end of the rout if __name__ == "__main__": import codecs + # capnproto <= 0.8.0 throws errors converting byte data to string # below line catches those errors and replaces the bytes with \x__ codecs.register_error("strict", codecs.backslashreplace_errors) diff --git a/tools/lib/route.py b/tools/lib/route.py index 55f7d20a3b..529e42e8e6 100644 --- a/tools/lib/route.py +++ b/tools/lib/route.py @@ -17,6 +17,7 @@ CAMERA_FILENAMES = ['fcamera.hevc', 'video.hevc'] DCAMERA_FILENAMES = ['dcamera.hevc'] ECAMERA_FILENAMES = ['ecamera.hevc'] + class Route: def __init__(self, name, data_dir=None): self._name = RouteName(name) @@ -37,27 +38,27 @@ class Route: def log_paths(self): log_path_by_seg_num = {s.name.segment_num: s.log_path for s in self._segments} - return [log_path_by_seg_num.get(i, None) for i in range(self.max_seg_number+1)] + return [log_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] def qlog_paths(self): qlog_path_by_seg_num = {s.name.segment_num: s.qlog_path for s in self._segments} - return [qlog_path_by_seg_num.get(i, None) for i in range(self.max_seg_number+1)] + return [qlog_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] def camera_paths(self): camera_path_by_seg_num = {s.name.segment_num: s.camera_path for s in self._segments} - return [camera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number+1)] + return [camera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] def dcamera_paths(self): dcamera_path_by_seg_num = {s.name.segment_num: s.dcamera_path for s in self._segments} - return [dcamera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number+1)] + return [dcamera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] def ecamera_paths(self): ecamera_path_by_seg_num = {s.name.segment_num: s.ecamera_path for s in self._segments} - return [ecamera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number+1)] + return [ecamera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] def qcamera_paths(self): qcamera_path_by_seg_num = {s.name.segment_num: s.qcamera_path for s in self._segments} - return [qcamera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number+1)] + return [qcamera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] # TODO: refactor this, it's super repetitive def _get_segments_remote(self): @@ -159,6 +160,7 @@ class Route: raise ValueError(f'Could not find segments for route {self.name.canonical_name} in data directory {data_dir}') return sorted(segments, key=lambda seg: seg.name.segment_num) + class Segment: def __init__(self, name, log_path, qlog_path, camera_path, dcamera_path, ecamera_path, qcamera_path): self._name = SegmentName(name) @@ -173,6 +175,7 @@ class Segment: def name(self): return self._name + class RouteName: def __init__(self, name_str: str): self._name_str = name_str @@ -194,6 +197,7 @@ class RouteName: def __str__(self) -> str: return self._canonical_name + class SegmentName: # TODO: add constructor that takes dongle_id, time_str, segment_num and then create instances # of this class instead of manually constructing a segment name (use canonical_name prop instead) @@ -206,7 +210,7 @@ class SegmentName: seg_num_delim = "--" if self._name_str.count("--") == 2 else "/" name_parts = self._name_str.rsplit(seg_num_delim, 1) if allow_route_name and len(name_parts) == 1: - name_parts.append("-1") # no segment number + name_parts.append("-1") # no segment number self._route_name = RouteName(name_parts[0]) self._num = int(name_parts[1]) self._canonical_name = f"{self._route_name._dongle_id}|{self._route_name._time_str}--{self._num}" diff --git a/tools/lib/tests/test_logreader.py b/tools/lib/tests/test_logreader.py index d04f4ce899..5131835017 100644 --- a/tools/lib/tests/test_logreader.py +++ b/tools/lib/tests/test_logreader.py @@ -12,7 +12,7 @@ from unittest import mock from openpilot.tools.lib.logreader import LogIterable, LogReader, comma_api_source, parse_indirect, parse_slice, ReadMode from openpilot.tools.lib.route import SegmentRange -NUM_SEGS = 17 # number of segments in the test route +NUM_SEGS = 17 # number of segments in the test route ALL_SEGS = list(np.arange(NUM_SEGS)) TEST_ROUTE = "344c5c15b34f2d8a/2024-01-03--09-37-12" QLOG_FILE = "https://commadataci.blob.core.windows.net/openpilotci/0375fdf7b1ce594d/2019-06-13--08-32-25/3/qlog.bz2" @@ -110,7 +110,7 @@ class TestLogReader(unittest.TestCase): qlog_len = len(list(LogReader(f"{TEST_ROUTE}/0/q"))) qlog_len_2 = len(list(LogReader([f"{TEST_ROUTE}/0/q", f"{TEST_ROUTE}/0/q"]))) - self.assertEqual(qlog_len*2, qlog_len_2) + self.assertEqual(qlog_len * 2, qlog_len_2) @pytest.mark.slow @mock.patch("openpilot.tools.lib.logreader._LogFileReader")