From 5ec9aee2166ec2fefd15128360e4ca26180e6afb Mon Sep 17 00:00:00 2001 From: Shane Smiskol Date: Tue, 19 Aug 2025 15:39:44 -0700 Subject: [PATCH] File sourcing: simplify return type (#36028) * rm str | none pattern * clean up * more clean up * stash * Revert "stash" This reverts commit 3e2472160cc97e9d11922137757d9ef942a0312d. * fix da prints * fix cmt --- tools/lib/logreader.py | 43 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/tools/lib/logreader.py b/tools/lib/logreader.py index 833f9825bb..50e3b9dc7a 100755 --- a/tools/lib/logreader.py +++ b/tools/lib/logreader.py @@ -140,9 +140,8 @@ class ReadMode(enum.StrEnum): AUTO_INTERACTIVE = "i" # default to rlogs, fallback to qlogs with a prompt from the user -LogPath = str | None LogFileName = tuple[str, ...] -Source = Callable[[SegmentRange, list[int], LogFileName], dict[int, LogPath]] +Source = Callable[[SegmentRange, list[int], LogFileName], dict[int, str]] InternalUnavailableException = Exception("Internal source not available") @@ -151,17 +150,17 @@ class LogsUnavailable(Exception): pass -def comma_api_source(sr: SegmentRange, seg_idxs: list[int], fns: LogFileName) -> dict[int, LogPath]: +def comma_api_source(sr: SegmentRange, seg_idxs: list[int], fns: LogFileName) -> dict[int, str]: route = Route(sr.route_name) # comma api will have already checked if the file exists if fns == FileName.RLOG: - return {seg: route.log_paths()[seg] for seg in seg_idxs} + return {seg: route.log_paths()[seg] for seg in seg_idxs if route.log_paths()[seg] is not None} else: - return {seg: route.qlog_paths()[seg] for seg in seg_idxs} + return {seg: route.qlog_paths()[seg] for seg in seg_idxs if route.qlog_paths()[seg] is not None} -def internal_source(sr: SegmentRange, seg_idxs: list[int], fns: LogFileName, endpoint_url: str = DATA_ENDPOINT) -> dict[int, LogPath]: +def internal_source(sr: SegmentRange, seg_idxs: list[int], fns: LogFileName, endpoint_url: str = DATA_ENDPOINT) -> dict[int, str]: if not internal_source_available(endpoint_url): raise InternalUnavailableException @@ -171,11 +170,11 @@ def internal_source(sr: SegmentRange, seg_idxs: list[int], fns: LogFileName, end return eval_source({seg: [get_internal_url(sr, seg, fn) for fn in fns] for seg in seg_idxs}) -def openpilotci_source(sr: SegmentRange, seg_idxs: list[int], fns: LogFileName) -> dict[int, LogPath]: +def openpilotci_source(sr: SegmentRange, seg_idxs: list[int], fns: LogFileName) -> dict[int, str]: return eval_source({seg: [get_url(sr.route_name, seg, fn) for fn in fns] for seg in seg_idxs}) -def comma_car_segments_source(sr: SegmentRange, seg_idxs: list[int], fns: LogFileName) -> dict[int, LogPath]: +def comma_car_segments_source(sr: SegmentRange, seg_idxs: list[int], fns: LogFileName) -> dict[int, str]: return eval_source({seg: get_comma_segments_url(sr.route_name, seg) for seg in seg_idxs}) @@ -183,21 +182,19 @@ def direct_source(file_or_url: str) -> list[str]: return [file_or_url] -def eval_source(files: dict[int, list[str] | str]) -> dict[int, LogPath]: +def eval_source(files: dict[int, list[str] | str]) -> dict[int, str]: # Returns valid file URLs given a list of possible file URLs for each segment (e.g. rlog.bz2, rlog.zst) - valid_files: dict[int, LogPath] = {} + valid_files: dict[int, str] = {} for seg_idx, urls in files.items(): if isinstance(urls, str): urls = [urls] - # Add first valid file URL or None + # Add first valid file URL for url in urls: if file_exists(url): valid_files[seg_idx] = url break - else: - valid_files[seg_idx] = None return valid_files @@ -220,37 +217,35 @@ def auto_source(identifier: str, sources: list[Source], default_mode: ReadMode) # Build a dict of valid files as we evaluate each source. May contain mix of rlogs, qlogs, and None. # This function only returns when we've sourced all files, or throws an exception - valid_files: dict[int, LogPath] = {} + valid_files: dict[int, str] = {} for fn in try_fns: for source in sources: try: files = source(sr, needed_seg_idxs, fn) # Build a dict of valid files - for idx, f in files.items(): - if valid_files.get(idx) is None: - valid_files[idx] = f + valid_files |= files # Don't check for segment files that have already been found - needed_seg_idxs = [idx for idx in needed_seg_idxs if valid_files.get(idx) is None] + needed_seg_idxs = [idx for idx in needed_seg_idxs if idx not in valid_files] # We've found all files, return them - if all(f is not None for f in valid_files.values()): + if len(needed_seg_idxs) == 0: return cast(list[str], list(valid_files.values())) except Exception as e: exceptions[source.__name__] = e if fn == try_fns[0]: - missing_logs = list(valid_files.values()).count(None) + missing_logs = len(needed_seg_idxs) if mode == ReadMode.AUTO: - cloudlog.warning(f"{missing_logs}/{len(valid_files)} rlogs were not found, falling back to qlogs for those segments...") + cloudlog.warning(f"{missing_logs}/{len(sr.seg_idxs)} rlogs were not found, falling back to qlogs for those segments...") elif mode == ReadMode.AUTO_INTERACTIVE: - if input(f"{missing_logs}/{len(valid_files)} rlogs were not found, would you like to fallback to qlogs for those segments? (y/N) ").lower() != "y": + if input(f"{missing_logs}/{len(sr.seg_idxs)} rlogs were not found, would you like to fallback to qlogs for those segments? (y/N) ").lower() != "y": break - missing_logs = list(valid_files.values()).count(None) - raise LogsUnavailable(f"{missing_logs}/{len(valid_files)} logs were not found, please ensure all logs " + + missing_logs = len(needed_seg_idxs) + raise LogsUnavailable(f"{missing_logs}/{len(sr.seg_idxs)} logs were not found, please ensure all logs " + "are uploaded. You can fall back to qlogs with '/a' selector at the end of the route name.\n\n" + "Exceptions for sources:\n - " + "\n - ".join([f"{k}: {repr(v)}" for k, v in exceptions.items()]))