openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

319 lines
10 KiB

#!/usr/bin/env python3
import bz2
from functools import partial
import multiprocessing
import capnp
import enum
import os
import pathlib
import sys
import tqdm
import urllib.parse
import warnings
import zstandard as zstd
from collections.abc import Callable, Iterable, Iterator
from typing import cast
from urllib.parse import parse_qs, urlparse
from cereal import log as capnp_log
from openpilot.common.swaglog import cloudlog
from openpilot.tools.lib.comma_car_segments import get_url as get_comma_segments_url
from openpilot.tools.lib.openpilotci import get_url
from openpilot.tools.lib.filereader import DATA_ENDPOINT, FileReader, file_exists, internal_source_available
from openpilot.tools.lib.route import Route, SegmentRange
from openpilot.tools.lib.log_time_series import msgs_to_time_series
LogMessage = type[capnp._DynamicStructReader]
LogIterable = Iterable[LogMessage]
RawLogIterable = Iterable[bytes]
def save_log(dest, log_msgs, compress=True):
dat = b"".join(msg.as_builder().to_bytes() for msg in log_msgs)
if compress and dest.endswith(".bz2"):
dat = bz2.compress(dat)
elif compress and dest.endswith(".zst"):
dat = zstd.compress(dat, 10)
with open(dest, "wb") as f:
f.write(dat)
def decompress_stream(data: bytes):
dctx = zstd.ZstdDecompressor()
decompressed_data = b""
with dctx.stream_reader(data) as reader:
decompressed_data = reader.read()
return decompressed_data
class _LogFileReader:
def __init__(self, fn, canonicalize=True, only_union_types=False, sort_by_time=False, dat=None):
self.data_version = None
self._only_union_types = only_union_types
ext = None
if not dat:
_, ext = os.path.splitext(urllib.parse.urlparse(fn).path)
if ext not in ('', '.bz2', '.zst'):
# old rlogs weren't compressed
raise ValueError(f"unknown extension {ext}")
with FileReader(fn) as f:
dat = f.read()
if ext == ".bz2" or dat.startswith(b'BZh9'):
dat = bz2.decompress(dat)
elif ext == ".zst" or dat.startswith(b'\x28\xB5\x2F\xFD'):
# https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#zstandard-frames
dat = decompress_stream(dat)
ents = capnp_log.Event.read_multiple_bytes(dat)
self._ents = []
try:
for e in ents:
self._ents.append(e)
except capnp.KjException:
warnings.warn("Corrupted events detected", RuntimeWarning, stacklevel=1)
if sort_by_time:
self._ents.sort(key=lambda x: x.logMonoTime)
def __iter__(self) -> Iterator[capnp._DynamicStructReader]:
for ent in self._ents:
if self._only_union_types:
try:
ent.which()
yield ent
except capnp.lib.capnp.KjException:
pass
else:
yield ent
class ReadMode(enum.StrEnum):
RLOG = "r" # only read rlogs
QLOG = "q" # only read qlogs
AUTO = "a" # default to rlogs, fallback to qlogs
AUTO_INTERACTIVE = "i" # default to rlogs, fallback to qlogs with a prompt from the user
class FileName(enum.Enum):
RLOG = ("rlog.zst", "rlog.bz2")
QLOG = ("qlog.zst", "qlog.bz2")
LogPath = str | None
Source = Callable[[SegmentRange, FileName], list[LogPath]]
InternalUnavailableException = Exception("Internal source not available")
class LogsUnavailable(Exception):
pass
def comma_api_source(sr: SegmentRange, fns: FileName) -> list[LogPath]:
route = Route(sr.route_name)
# comma api will have already checked if the file exists
if fns == FileName.RLOG:
return [route.log_paths()[seg] for seg in sr.seg_idxs]
else:
return [route.qlog_paths()[seg] for seg in sr.seg_idxs]
def internal_source(sr: SegmentRange, fns: FileName, endpoint_url: str = DATA_ENDPOINT) -> list[LogPath]:
if not internal_source_available(endpoint_url):
raise InternalUnavailableException
def get_internal_url(sr: SegmentRange, seg, file):
return f"{endpoint_url.rstrip('/')}/{sr.dongle_id}/{sr.log_id}/{seg}/{file}"
return eval_source([[get_internal_url(sr, seg, fn) for fn in fns.value] for seg in sr.seg_idxs])
def openpilotci_source(sr: SegmentRange, fns: FileName) -> list[LogPath]:
return eval_source([[get_url(sr.route_name, seg, fn) for fn in fns.value] for seg in sr.seg_idxs])
def comma_car_segments_source(sr: SegmentRange, fns: FileName) -> list[LogPath]:
return eval_source([get_comma_segments_url(sr.route_name, seg) for seg in sr.seg_idxs])
def testing_closet_source(sr: SegmentRange, fns: FileName) -> list[LogPath]:
if not internal_source_available('http://testing.comma.life'):
raise InternalUnavailableException
return eval_source([f"http://testing.comma.life/download/{sr.route_name.replace('|', '/')}/{seg}/rlog" for seg in sr.seg_idxs])
def direct_source(file_or_url: str) -> list[str]:
return [file_or_url]
def eval_source(files: list[list[str] | str]) -> list[LogPath]:
# Returns valid file URLs given a list of possible file URLs for each segment (e.g. rlog.bz2, rlog.zst)
valid_files: list[LogPath] = []
for urls in files:
if isinstance(urls, str):
urls = [urls]
for url in urls:
if file_exists(url):
valid_files.append(url)
break
else:
valid_files.append(None)
return valid_files
def auto_source(identifier: str, sources: list[Source], default_mode: ReadMode) -> list[str]:
exceptions = {}
sr = SegmentRange(identifier)
mode = default_mode if sr.selector is None else ReadMode(sr.selector)
if mode == ReadMode.QLOG:
try_fns = [FileName.QLOG]
else:
try_fns = [FileName.RLOG]
# If selector allows it, fallback to qlogs
if mode in (ReadMode.AUTO, ReadMode.AUTO_INTERACTIVE):
try_fns.append(FileName.QLOG)
# Build a dict of valid files as we evaluate each source. May contain mix of rlogs, qlogs, and None.
# This function only returns when we've sourced all files, or throws an exception
valid_files: dict[int, LogPath] = {}
for fn in try_fns:
for source in sources:
try:
files = source(sr, fn)
# Check every source returns an expected number of files
assert len(files) == len(valid_files) or len(valid_files) == 0, f"Source {source.__name__} returned unexpected number of files"
# Build a dict of valid files
for idx, f in enumerate(files):
if valid_files.get(idx) is None:
valid_files[idx] = f
# We've found all files, return them
if all(f is not None for f in valid_files.values()):
return cast(list[str], list(valid_files.values()))
except Exception as e:
exceptions[source.__name__] = e
if fn == try_fns[0]:
missing_logs = list(valid_files.values()).count(None)
if mode == ReadMode.AUTO:
cloudlog.warning(f"{missing_logs}/{len(valid_files)} rlogs were not found, falling back to qlogs for those segments...")
elif mode == ReadMode.AUTO_INTERACTIVE:
if input(f"{missing_logs}/{len(valid_files)} rlogs were not found, would you like to fallback to qlogs for those segments? (y/N) ").lower() != "y":
break
missing_logs = list(valid_files.values()).count(None)
raise LogsUnavailable(f"{missing_logs}/{len(valid_files)} logs were not found, please ensure all logs " +
"are uploaded. You can fall back to qlogs with '/a' selector at the end of the route name.\n\n" +
"Exceptions for sources:\n - " + "\n - ".join([f"{k}: {repr(v)}" for k, v in exceptions.items()]))
def parse_indirect(identifier: str) -> str:
if "useradmin.comma.ai" in identifier:
query = parse_qs(urlparse(identifier).query)
return query["onebox"][0]
return identifier
def parse_direct(identifier: str):
if identifier.startswith(("http://", "https://", "cd:/")) or pathlib.Path(identifier).exists():
return identifier
return None
class LogReader:
def _parse_identifier(self, identifier: str) -> list[str]:
# useradmin, etc.
identifier = parse_indirect(identifier)
# direct url or file
direct_parsed = parse_direct(identifier)
if direct_parsed is not None:
return direct_source(identifier)
identifiers = auto_source(identifier, self.sources, self.default_mode)
return identifiers
def __init__(self, identifier: str | list[str], default_mode: ReadMode = ReadMode.RLOG,
sources: list[Source] = None, sort_by_time=False, only_union_types=False):
if sources is None:
sources = [internal_source, openpilotci_source, comma_api_source,
comma_car_segments_source, testing_closet_source]
self.default_mode = default_mode
self.sources = sources
self.identifier = identifier
if isinstance(identifier, str):
self.identifier = [identifier]
self.sort_by_time = sort_by_time
self.only_union_types = only_union_types
self.__lrs: dict[int, _LogFileReader] = {}
self.reset()
def _get_lr(self, i):
if i not in self.__lrs:
self.__lrs[i] = _LogFileReader(self.logreader_identifiers[i], sort_by_time=self.sort_by_time, only_union_types=self.only_union_types)
return self.__lrs[i]
def __iter__(self):
for i in range(len(self.logreader_identifiers)):
yield from self._get_lr(i)
def _run_on_segment(self, func, i):
return func(self._get_lr(i))
def run_across_segments(self, num_processes, func, disable_tqdm=False, desc=None):
with multiprocessing.Pool(num_processes) as pool:
ret = []
num_segs = len(self.logreader_identifiers)
for p in tqdm.tqdm(pool.imap(partial(self._run_on_segment, func), range(num_segs)), total=num_segs, disable=disable_tqdm, desc=desc):
ret.extend(p)
return ret
def reset(self):
self.logreader_identifiers = []
for identifier in self.identifier:
self.logreader_identifiers.extend(self._parse_identifier(identifier))
@staticmethod
def from_bytes(dat):
return _LogFileReader("", dat=dat)
def filter(self, msg_type: str):
return (getattr(m, m.which()) for m in filter(lambda m: m.which() == msg_type, self))
def first(self, msg_type: str):
return next(self.filter(msg_type), None)
@property
def time_series(self):
return msgs_to_time_series(self)
if __name__ == "__main__":
import codecs
# capnproto <= 0.8.0 throws errors converting byte data to string
# below line catches those errors and replaces the bytes with \x__
codecs.register_error("strict", codecs.backslashreplace_errors)
log_path = sys.argv[1]
lr = LogReader(log_path, sort_by_time=True)
for msg in lr:
print(msg)