LogReader: simplify sourcing logic and improve AUTO performance (#35753)

* sourcing supports any file type!

* stuff

* no camera for now

* i guess these are fine without

* rename

* get rid of these too!

* fix

* fix

* this is better

* start to clean up!

* better

* holy

holy

* clean up sources

* more robust

* working but needs some clean up

* clean up

* remove some trash

* nl

* auto_source can only return if it finds acceptable logs from sources

* double negative is confusing

* default

* list isn't hashable

* fix typing

* clean up

* speed up -- test_models got zst before bz2 in openpilotci, so do that (some segments have both bz2 and zst!)

* don't be a hero

* same behavior for now
pull/35755/head
Shane Smiskol 5 days ago committed by GitHub
parent b50b351b15
commit 618a25a612
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 6
      selfdrive/car/tests/test_models.py
  2. 2
      tools/lib/filereader.py
  3. 185
      tools/lib/logreader.py

@ -21,8 +21,7 @@ from openpilot.common.basedir import BASEDIR
from openpilot.selfdrive.pandad import can_capnp_to_list from openpilot.selfdrive.pandad import can_capnp_to_list
from openpilot.selfdrive.test.helpers import read_segment_list from openpilot.selfdrive.test.helpers import read_segment_list
from openpilot.system.hardware.hw import DEFAULT_DOWNLOAD_CACHE_ROOT from openpilot.system.hardware.hw import DEFAULT_DOWNLOAD_CACHE_ROOT
from openpilot.tools.lib.logreader import LogReader, LogsUnavailable, openpilotci_source_zst, openpilotci_source, internal_source, \ from openpilot.tools.lib.logreader import LogReader, LogsUnavailable, openpilotci_source, internal_source, comma_api_source
internal_source_zst, comma_api_source
from openpilot.tools.lib.route import SegmentName from openpilot.tools.lib.route import SegmentName
SafetyModel = car.CarParams.SafetyModel SafetyModel = car.CarParams.SafetyModel
@ -124,8 +123,7 @@ class TestCarModelBase(unittest.TestCase):
segment_range = f"{cls.test_route.route}/{seg}" segment_range = f"{cls.test_route.route}/{seg}"
try: try:
sources = ([internal_source, internal_source_zst] if len(INTERNAL_SEG_LIST) else sources = [internal_source] if len(INTERNAL_SEG_LIST) else [openpilotci_source, comma_api_source]
[openpilotci_source_zst, openpilotci_source, comma_api_source])
lr = LogReader(segment_range, sources=sources, sort_by_time=True) lr = LogReader(segment_range, sources=sources, sort_by_time=True)
return cls.get_testing_data_from_logreader(lr) return cls.get_testing_data_from_logreader(lr)
except (LogsUnavailable, AssertionError): except (LogsUnavailable, AssertionError):

@ -1,6 +1,7 @@
import os import os
import posixpath import posixpath
import socket import socket
from functools import cache
from urllib.parse import urlparse from urllib.parse import urlparse
from openpilot.tools.lib.url_file import URLFile from openpilot.tools.lib.url_file import URLFile
@ -30,6 +31,7 @@ def resolve_name(fn):
return fn return fn
@cache
def file_exists(fn): def file_exists(fn):
fn = resolve_name(fn) fn = resolve_name(fn)
if fn.startswith(("http://", "https://")): if fn.startswith(("http://", "https://")):

@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import bz2 import bz2
from functools import cache, partial from functools import partial
import multiprocessing import multiprocessing
import capnp import capnp
import enum import enum
@ -13,6 +13,7 @@ import warnings
import zstandard as zstd import zstandard as zstd
from collections.abc import Callable, Iterable, Iterator from collections.abc import Callable, Iterable, Iterator
from typing import cast
from urllib.parse import parse_qs, urlparse from urllib.parse import parse_qs, urlparse
from cereal import log as capnp_log from cereal import log as capnp_log
@ -100,9 +101,13 @@ class ReadMode(enum.StrEnum):
AUTO_INTERACTIVE = "i" # default to rlogs, fallback to qlogs with a prompt from the user AUTO_INTERACTIVE = "i" # default to rlogs, fallback to qlogs with a prompt from the user
class FileName(enum.Enum):
RLOG = ("rlog.zst", "rlog.bz2")
QLOG = ("qlog.zst", "qlog.bz2")
LogPath = str | None LogPath = str | None
ValidFileCallable = Callable[[LogPath], bool] Source = Callable[[SegmentRange, FileName], list[LogPath]]
Source = Callable[[SegmentRange, ReadMode], list[LogPath]]
InternalUnavailableException = Exception("Internal source not available") InternalUnavailableException = Exception("Internal source not available")
@ -111,129 +116,112 @@ class LogsUnavailable(Exception):
pass pass
@cache def comma_api_source(sr: SegmentRange, fns: FileName) -> list[LogPath]:
def default_valid_file(fn: LogPath) -> bool:
return fn is not None and file_exists(fn)
def auto_strategy(rlog_paths: list[LogPath], qlog_paths: list[LogPath], interactive: bool, valid_file: ValidFileCallable) -> list[LogPath]:
# auto select logs based on availability
missing_rlogs = [rlog is None or not valid_file(rlog) for rlog in rlog_paths].count(True)
if missing_rlogs != 0:
if interactive:
if input(f"{missing_rlogs}/{len(rlog_paths)} rlogs were not found, would you like to fallback to qlogs for those segments? (y/n) ").lower() != "y":
return rlog_paths
else:
cloudlog.warning(f"{missing_rlogs}/{len(rlog_paths)} rlogs were not found, falling back to qlogs for those segments...")
return [rlog if valid_file(rlog) else (qlog if valid_file(qlog) else None)
for (rlog, qlog) in zip(rlog_paths, qlog_paths, strict=True)]
return rlog_paths
def apply_strategy(mode: ReadMode, rlog_paths: list[LogPath], qlog_paths: list[LogPath], valid_file: ValidFileCallable = default_valid_file) -> list[LogPath]:
if mode == ReadMode.RLOG:
return rlog_paths
elif mode == ReadMode.QLOG:
return qlog_paths
elif mode == ReadMode.AUTO:
return auto_strategy(rlog_paths, qlog_paths, False, valid_file)
elif mode == ReadMode.AUTO_INTERACTIVE:
return auto_strategy(rlog_paths, qlog_paths, True, valid_file)
raise ValueError(f"invalid mode: {mode}")
def comma_api_source(sr: SegmentRange, mode: ReadMode) -> list[LogPath]:
route = Route(sr.route_name) route = Route(sr.route_name)
rlog_paths = [route.log_paths()[seg] for seg in sr.seg_idxs]
qlog_paths = [route.qlog_paths()[seg] for seg in sr.seg_idxs]
# comma api will have already checked if the file exists # comma api will have already checked if the file exists
def valid_file(fn): if fns == FileName.RLOG:
return fn is not None return [route.log_paths()[seg] for seg in sr.seg_idxs]
else:
return apply_strategy(mode, rlog_paths, qlog_paths, valid_file=valid_file) return [route.qlog_paths()[seg] for seg in sr.seg_idxs]
def internal_source(sr: SegmentRange, mode: ReadMode, file_ext: str = "bz2", def internal_source(sr: SegmentRange, fns: FileName, endpoint_url: str = DATA_ENDPOINT) -> list[LogPath]:
endpoint_url: str = DATA_ENDPOINT) -> list[LogPath]:
if not internal_source_available(endpoint_url): if not internal_source_available(endpoint_url):
raise InternalUnavailableException raise InternalUnavailableException
def get_internal_url(sr: SegmentRange, seg, file): def get_internal_url(sr: SegmentRange, seg, file):
return f"{endpoint_url.rstrip('/')}/{sr.dongle_id}/{sr.log_id}/{seg}/{file}.{file_ext}" return f"{endpoint_url.rstrip('/')}/{sr.dongle_id}/{sr.log_id}/{seg}/{file}"
# TODO: list instead of using static URLs to support routes with multiple file extensions
rlog_paths = [get_internal_url(sr, seg, "rlog") for seg in sr.seg_idxs]
qlog_paths = [get_internal_url(sr, seg, "qlog") for seg in sr.seg_idxs]
return apply_strategy(mode, rlog_paths, qlog_paths)
def internal_source_zst(sr: SegmentRange, mode: ReadMode, file_ext: str = "zst",
endpoint_url: str = DATA_ENDPOINT) -> list[LogPath]:
return internal_source(sr, mode, file_ext, endpoint_url)
def openpilotci_source(sr: SegmentRange, mode: ReadMode, file_ext: str = "bz2") -> list[LogPath]: return eval_source([[get_internal_url(sr, seg, fn) for fn in fns.value] for seg in sr.seg_idxs])
rlog_paths = [get_url(sr.route_name, seg, f"rlog.{file_ext}") for seg in sr.seg_idxs]
qlog_paths = [get_url(sr.route_name, seg, f"qlog.{file_ext}") for seg in sr.seg_idxs]
return apply_strategy(mode, rlog_paths, qlog_paths)
def openpilotci_source(sr: SegmentRange, fns: FileName) -> list[LogPath]:
return eval_source([[get_url(sr.route_name, seg, fn) for fn in fns.value] for seg in sr.seg_idxs])
def openpilotci_source_zst(sr: SegmentRange, mode: ReadMode) -> list[LogPath]:
return openpilotci_source(sr, mode, "zst")
def comma_car_segments_source(sr: SegmentRange, fns: FileName) -> list[LogPath]:
return eval_source([get_comma_segments_url(sr.route_name, seg) for seg in sr.seg_idxs])
def comma_car_segments_source(sr: SegmentRange, mode: ReadMode = ReadMode.RLOG) -> list[LogPath]:
return [get_comma_segments_url(sr.route_name, seg) for seg in sr.seg_idxs]
def testing_closet_source(sr: SegmentRange, fns: FileName) -> list[LogPath]:
def testing_closet_source(sr: SegmentRange, mode=ReadMode.RLOG) -> list[LogPath]:
if not internal_source_available('http://testing.comma.life'): if not internal_source_available('http://testing.comma.life'):
raise InternalUnavailableException raise InternalUnavailableException
return [f"http://testing.comma.life/download/{sr.route_name.replace('|', '/')}/{seg}/rlog" for seg in sr.seg_idxs] return eval_source([f"http://testing.comma.life/download/{sr.route_name.replace('|', '/')}/{seg}/rlog" for seg in sr.seg_idxs])
def direct_source(file_or_url: str) -> list[LogPath]: def direct_source(file_or_url: str) -> list[str]:
return [file_or_url] return [file_or_url]
def get_invalid_files(files): def eval_source(files: list[list[str] | str]) -> list[LogPath]:
for f in files: # Returns valid file URLs given a list of possible file URLs for each segment (e.g. rlog.bz2, rlog.zst)
if f is None or not file_exists(f): valid_files: list[LogPath] = []
yield f
for urls in files:
if isinstance(urls, str):
urls = [urls]
for url in urls:
if file_exists(url):
valid_files.append(url)
break
else:
valid_files.append(None)
def check_source(source: Source, *args) -> list[LogPath]: return valid_files
files = source(*args)
assert len(files) > 0, "No files on source"
assert next(get_invalid_files(files), False) is False, "Some files are invalid"
return files
def auto_source(sr: SegmentRange, sources: list[Source], mode: ReadMode = ReadMode.RLOG) -> list[LogPath]: def auto_source(identifier: str, sources: list[Source], default_mode: ReadMode) -> list[str]:
exceptions = {} exceptions = {}
# for automatic fallback modes, auto_source needs to first check if rlogs exist for any source sr = SegmentRange(identifier)
if mode in [ReadMode.AUTO, ReadMode.AUTO_INTERACTIVE]: mode = default_mode if sr.selector is None else ReadMode(sr.selector)
for source in sources:
try: if mode == ReadMode.QLOG:
return check_source(source, sr, ReadMode.RLOG) try_fns = [FileName.QLOG]
except Exception: else:
pass try_fns = [FileName.RLOG]
# If selector allows it, fallback to qlogs
if mode in (ReadMode.AUTO, ReadMode.AUTO_INTERACTIVE):
try_fns.append(FileName.QLOG)
# Automatically determine viable source # Build a dict of valid files as we evaluate each source. May contain mix of rlogs, qlogs, and None.
# This function only returns when we've sourced all files, or throws an exception
valid_files: dict[int, LogPath] = {}
for fn in try_fns:
for source in sources: for source in sources:
try: try:
return check_source(source, sr, mode) files = source(sr, fn)
# Check every source returns an expected number of files
assert len(files) == len(valid_files) or len(valid_files) == 0, f"Source {source.__name__} returned unexpected number of files"
# Build a dict of valid files
for idx, f in enumerate(files):
if valid_files.get(idx) is None:
valid_files[idx] = f
# We've found all files, return them
if all(f is not None for f in valid_files.values()):
return cast(list[str], list(valid_files.values()))
except Exception as e: except Exception as e:
exceptions[source.__name__] = e exceptions[source.__name__] = e
raise LogsUnavailable("auto_source could not find any valid source, exceptions for sources:\n - " + if fn == try_fns[0]:
"\n - ".join([f"{k}: {repr(v)}" for k, v in exceptions.items()])) missing_logs = list(valid_files.values()).count(None)
if mode == ReadMode.AUTO:
cloudlog.warning(f"{missing_logs}/{len(valid_files)} rlogs were not found, falling back to qlogs for those segments...")
elif mode == ReadMode.AUTO_INTERACTIVE:
if input(f"{missing_logs}/{len(valid_files)} rlogs were not found, would you like to fallback to qlogs for those segments? (y/N) ").lower() != "y":
break
missing_logs = list(valid_files.values()).count(None)
raise LogsUnavailable(f"{missing_logs}/{len(valid_files)} logs were not found, please ensure all logs " +
"are uploaded. You can fall back to qlogs with '/a' selector at the end of the route name.\n\n" +
"Exceptions for sources:\n - " + "\n - ".join([f"{k}: {repr(v)}" for k, v in exceptions.items()]))
def parse_indirect(identifier: str) -> str: def parse_indirect(identifier: str) -> str:
@ -250,7 +238,7 @@ def parse_direct(identifier: str):
class LogReader: class LogReader:
def _parse_identifier(self, identifier: str) -> list[LogPath]: def _parse_identifier(self, identifier: str) -> list[str]:
# useradmin, etc. # useradmin, etc.
identifier = parse_indirect(identifier) identifier = parse_indirect(identifier)
@ -259,21 +247,14 @@ class LogReader:
if direct_parsed is not None: if direct_parsed is not None:
return direct_source(identifier) return direct_source(identifier)
sr = SegmentRange(identifier) identifiers = auto_source(identifier, self.sources, self.default_mode)
mode = self.default_mode if sr.selector is None else ReadMode(sr.selector)
identifiers = auto_source(sr, self.sources, mode)
invalid_count = len(list(get_invalid_files(identifiers)))
assert invalid_count == 0, (f"{invalid_count}/{len(identifiers)} invalid log(s) found, please ensure all logs " +
"are uploaded or auto fallback to qlogs with '/a' selector at the end of the route name.")
return identifiers return identifiers
def __init__(self, identifier: str | list[str], default_mode: ReadMode = ReadMode.RLOG, def __init__(self, identifier: str | list[str], default_mode: ReadMode = ReadMode.RLOG,
sources: list[Source] = None, sort_by_time=False, only_union_types=False): sources: list[Source] = None, sort_by_time=False, only_union_types=False):
if sources is None: if sources is None:
sources = [internal_source, internal_source_zst, openpilotci_source, openpilotci_source_zst, sources = [internal_source, openpilotci_source, comma_api_source,
comma_api_source, comma_car_segments_source, testing_closet_source] comma_car_segments_source, testing_closet_source]
self.default_mode = default_mode self.default_mode = default_mode
self.sources = sources self.sources = sources

Loading…
Cancel
Save