This commit is contained in:
Your Name
2024-05-12 00:33:45 -05:00
parent 65bcdde866
commit 62115b2e34
179 changed files with 19155 additions and 390 deletions

View File

@@ -1,32 +1,82 @@
#!/usr/bin/env python3
import bz2
from functools import partial
import multiprocessing
import capnp
import enum
import os
import pathlib
import sys
import tqdm
import bz2
import urllib.parse
import capnp
import warnings
from typing import Callable, Dict, Iterable, Iterator, List, Optional, Type
from urllib.parse import parse_qs, urlparse
from typing import Iterable, Iterator
from cereal import log as capnp_log
from openpilot.common.swaglog import cloudlog
from openpilot.tools.lib.comma_car_segments import get_url as get_comma_segments_url
from openpilot.tools.lib.openpilotci import get_url
from openpilot.tools.lib.filereader import FileReader, file_exists, internal_source_available
from openpilot.tools.lib.route import Route, SegmentRange
from openpilot.tools.lib.filereader import FileReader
from openpilot.tools.lib.route import Route, SegmentName
LogMessage = Type[capnp._DynamicStructReader]
LogIterable = Iterable[LogMessage]
RawLogIterable = Iterable[bytes]
LogIterable = Iterable[capnp._DynamicStructReader]
# this is an iterator itself, and uses private variables from LogReader
class MultiLogIterator:
def __init__(self, log_paths, sort_by_time=False):
self._log_paths = log_paths
self.sort_by_time = sort_by_time
self._first_log_idx = next(i for i in range(len(log_paths)) if log_paths[i] is not None)
self._current_log = self._first_log_idx
self._idx = 0
self._log_readers = [None]*len(log_paths)
self.start_time = self._log_reader(self._first_log_idx)._ts[0]
def _log_reader(self, i):
if self._log_readers[i] is None and self._log_paths[i] is not None:
log_path = self._log_paths[i]
self._log_readers[i] = LogReader(log_path, sort_by_time=self.sort_by_time)
return self._log_readers[i]
def __iter__(self) -> Iterator[capnp._DynamicStructReader]:
return self
def _inc(self):
lr = self._log_reader(self._current_log)
if self._idx < len(lr._ents)-1:
self._idx += 1
else:
self._idx = 0
self._current_log = next(i for i in range(self._current_log + 1, len(self._log_readers) + 1)
if i == len(self._log_readers) or self._log_paths[i] is not None)
if self._current_log == len(self._log_readers):
raise StopIteration
def __next__(self):
while 1:
lr = self._log_reader(self._current_log)
ret = lr._ents[self._idx]
self._inc()
return ret
def tell(self):
# returns seconds from start of log
return (self._log_reader(self._current_log)._ts[self._idx] - self.start_time) * 1e-9
def seek(self, ts):
# seek to nearest minute
minute = int(ts/60)
if minute >= len(self._log_paths) or self._log_paths[minute] is None:
return False
self._current_log = minute
# HACK: O(n) seek afterward
self._idx = 0
while self.tell() < ts:
self._inc()
return True
def reset(self):
self.__init__(self._log_paths, sort_by_time=self.sort_by_time)
class _LogFileReader:
class LogReader:
def __init__(self, fn, canonicalize=True, only_union_types=False, sort_by_time=False, dat=None):
self.data_version = None
self._only_union_types = only_union_types
@@ -56,6 +106,10 @@ class _LogFileReader:
self._ents = list(sorted(_ents, key=lambda x: x.logMonoTime) if sort_by_time else _ents)
self._ts = [x.logMonoTime for x in self._ents]
@classmethod
def from_bytes(cls, dat):
return cls("", dat=dat)
def __iter__(self) -> Iterator[capnp._DynamicStructReader]:
for ent in self._ents:
if self._only_union_types:
@@ -67,222 +121,17 @@ class _LogFileReader:
else:
yield ent
class ReadMode(enum.StrEnum):
RLOG = "r" # only read rlogs
QLOG = "q" # only read qlogs
SANITIZED = "s" # read from the commaCarSegments database
AUTO = "a" # default to rlogs, fallback to qlogs
AUTO_INTERACTIVE = "i" # default to rlogs, fallback to qlogs with a prompt from the user
LogPath = Optional[str]
LogPaths = List[LogPath]
ValidFileCallable = Callable[[LogPath], bool]
Source = Callable[[SegmentRange, ReadMode], LogPaths]
InternalUnavailableException = Exception("Internal source not available")
def default_valid_file(fn: LogPath) -> bool:
return fn is not None and file_exists(fn)
def auto_strategy(rlog_paths: LogPaths, qlog_paths: LogPaths, interactive: bool, valid_file: ValidFileCallable) -> LogPaths:
# auto select logs based on availability
if any(rlog is None or not valid_file(rlog) for rlog in rlog_paths):
if interactive:
if input("Some rlogs were not found, would you like to fallback to qlogs for those segments? (y/n) ").lower() != "y":
return rlog_paths
else:
cloudlog.warning("Some rlogs were not found, falling back to qlogs for those segments...")
return [rlog if valid_file(rlog) else (qlog if valid_file(qlog) else None)
for (rlog, qlog) in zip(rlog_paths, qlog_paths, strict=True)]
return rlog_paths
def apply_strategy(mode: ReadMode, rlog_paths: LogPaths, qlog_paths: LogPaths, valid_file: ValidFileCallable = default_valid_file) -> LogPaths:
if mode == ReadMode.RLOG:
return rlog_paths
elif mode == ReadMode.QLOG:
return qlog_paths
elif mode == ReadMode.AUTO:
return auto_strategy(rlog_paths, qlog_paths, False, valid_file)
elif mode == ReadMode.AUTO_INTERACTIVE:
return auto_strategy(rlog_paths, qlog_paths, True, valid_file)
raise Exception(f"invalid mode: {mode}")
def comma_api_source(sr: SegmentRange, mode: ReadMode) -> LogPaths:
route = Route(sr.route_name)
rlog_paths = [route.log_paths()[seg] for seg in sr.seg_idxs]
qlog_paths = [route.qlog_paths()[seg] for seg in sr.seg_idxs]
# comma api will have already checked if the file exists
def valid_file(fn):
return fn is not None
return apply_strategy(mode, rlog_paths, qlog_paths, valid_file=valid_file)
def internal_source(sr: SegmentRange, mode: ReadMode) -> LogPaths:
if not internal_source_available():
raise InternalUnavailableException
def get_internal_url(sr: SegmentRange, seg, file):
return f"cd:/{sr.dongle_id}/{sr.timestamp}/{seg}/{file}.bz2"
rlog_paths = [get_internal_url(sr, seg, "rlog") for seg in sr.seg_idxs]
qlog_paths = [get_internal_url(sr, seg, "qlog") for seg in sr.seg_idxs]
return apply_strategy(mode, rlog_paths, qlog_paths)
def openpilotci_source(sr: SegmentRange, mode: ReadMode) -> LogPaths:
rlog_paths = [get_url(sr.route_name, seg, "rlog") for seg in sr.seg_idxs]
qlog_paths = [get_url(sr.route_name, seg, "qlog") for seg in sr.seg_idxs]
return apply_strategy(mode, rlog_paths, qlog_paths)
def comma_car_segments_source(sr: SegmentRange, mode=ReadMode.RLOG) -> LogPaths:
return [get_comma_segments_url(sr.route_name, seg) for seg in sr.seg_idxs]
def direct_source(file_or_url: str) -> LogPaths:
return [file_or_url]
def get_invalid_files(files):
for f in files:
if f is None or not file_exists(f):
yield f
def check_source(source: Source, *args) -> LogPaths:
files = source(*args)
assert next(get_invalid_files(files), False) is False
return files
def auto_source(sr: SegmentRange, mode=ReadMode.RLOG) -> LogPaths:
if mode == ReadMode.SANITIZED:
return comma_car_segments_source(sr, mode)
SOURCES: List[Source] = [internal_source, openpilotci_source, comma_api_source, comma_car_segments_source,]
exceptions = []
# Automatically determine viable source
for source in SOURCES:
try:
return check_source(source, sr, mode)
except Exception as e:
exceptions.append(e)
raise Exception(f"auto_source could not find any valid source, exceptions for sources: {exceptions}")
def parse_useradmin(identifier: str):
if "useradmin.comma.ai" in identifier:
query = parse_qs(urlparse(identifier).query)
return query["onebox"][0]
return None
def parse_cabana(identifier: str):
if "cabana.comma.ai" in identifier:
query = parse_qs(urlparse(identifier).query)
return query["route"][0]
return None
def parse_direct(identifier: str):
if identifier.startswith(("http://", "https://", "cd:/")) or pathlib.Path(identifier).exists():
return identifier
return None
def parse_indirect(identifier: str):
parsed = parse_useradmin(identifier) or parse_cabana(identifier)
if parsed is not None:
return parsed, comma_api_source, True
return identifier, None, False
class LogReader:
def _parse_identifiers(self, identifier: str | List[str]):
if isinstance(identifier, list):
return [i for j in identifier for i in self._parse_identifiers(j)]
parsed, source, is_indirect = parse_indirect(identifier)
if not is_indirect:
direct_parsed = parse_direct(identifier)
if direct_parsed is not None:
return direct_source(identifier)
sr = SegmentRange(parsed)
mode = self.default_mode if sr.selector is None else ReadMode(sr.selector)
source = self.default_source if source is None else source
identifiers = source(sr, mode)
invalid_count = len(list(get_invalid_files(identifiers)))
assert invalid_count == 0, f"{invalid_count}/{len(identifiers)} invalid log(s) found, please ensure all logs \
are uploaded or auto fallback to qlogs with '/a' selector at the end of the route name."
return identifiers
def __init__(self, identifier: str | List[str], default_mode: ReadMode = ReadMode.RLOG,
default_source=auto_source, sort_by_time=False, only_union_types=False):
self.default_mode = default_mode
self.default_source = default_source
self.identifier = identifier
self.sort_by_time = sort_by_time
self.only_union_types = only_union_types
self.__lrs: Dict[int, _LogFileReader] = {}
self.reset()
def _get_lr(self, i):
if i not in self.__lrs:
self.__lrs[i] = _LogFileReader(self.logreader_identifiers[i])
return self.__lrs[i]
def __iter__(self):
for i in range(len(self.logreader_identifiers)):
yield from self._get_lr(i)
def _run_on_segment(self, func, i):
return func(self._get_lr(i))
def run_across_segments(self, num_processes, func):
with multiprocessing.Pool(num_processes) as pool:
ret = []
num_segs = len(self.logreader_identifiers)
for p in tqdm.tqdm(pool.imap(partial(self._run_on_segment, func), range(num_segs)), total=num_segs):
ret.extend(p)
return ret
def reset(self):
self.logreader_identifiers = self._parse_identifiers(self.identifier)
@staticmethod
def from_bytes(dat):
return _LogFileReader("", dat=dat)
def filter(self, msg_type: str):
return (getattr(m, m.which()) for m in filter(lambda m: m.which() == msg_type, self))
def first(self, msg_type: str):
return next(self.filter(msg_type), None)
def logreader_from_route_or_segment(r, sort_by_time=False):
sn = SegmentName(r, allow_route_name=True)
route = Route(sn.route_name.canonical_name)
if sn.segment_num < 0:
return MultiLogIterator(route.log_paths(), sort_by_time=sort_by_time)
else:
return LogReader(route.log_paths()[sn.segment_num], sort_by_time=sort_by_time)
if __name__ == "__main__":
import codecs
# capnproto <= 0.8.0 throws errors converting byte data to string
# below line catches those errors and replaces the bytes with \x__
codecs.register_error("strict", codecs.backslashreplace_errors)