fix log values with non-utf8 characters and remove index_log (#20173)

* remove index_log

* handle non-unicode characters in values

* dump.py better handling of printing non-unicode characters

* only replace errors in main
pull/20200/head^2
Greg Hogan 4 years ago committed by GitHub
parent 167fa565c0
commit 78808961c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      .dockerignore
  2. 1
      SConstruct
  3. 2
      selfdrive/debug/dump.py
  4. 1
      tools/lib/index_log/.gitignore
  5. 9
      tools/lib/index_log/Makefile
  6. 3
      tools/lib/index_log/SConscript
  7. 63
      tools/lib/index_log/index_log.cc
  8. 45
      tools/lib/logreader.py

@ -26,10 +26,8 @@ chffr/backend/env
selfdrive/nav selfdrive/nav
selfdrive/baseui selfdrive/baseui
chffr/lib/vidindex/vidindex chffr/lib/vidindex/vidindex
chffr/lib/index_log/index_log
selfdrive/test/simulator2 selfdrive/test/simulator2
**/cache_data **/cache_data
xx/chffr/lib/index_log/index_log
xx/chffr/lib/vidindex/vidindex xx/chffr/lib/vidindex/vidindex
xx/plus xx/plus
xx/community xx/community

@ -392,7 +392,6 @@ if arch != "Darwin":
if real_arch == "x86_64": if real_arch == "x86_64":
SConscript(['tools/nui/SConscript']) SConscript(['tools/nui/SConscript'])
SConscript(['tools/lib/index_log/SConscript'])
external_sconscript = GetOption('external_sconscript') external_sconscript = GetOption('external_sconscript')
if external_sconscript: if external_sconscript:

@ -4,6 +4,8 @@ import sys
import argparse import argparse
import json import json
from hexdump import hexdump from hexdump import hexdump
import codecs
codecs.register_error("strict", codecs.backslashreplace_errors)
from cereal import log from cereal import log
import cereal.messaging as messaging import cereal.messaging as messaging

@ -1 +0,0 @@
index_log

@ -1,9 +0,0 @@
CC := gcc
CXX := g++
index_log: index_log.cc
$(eval $@_TMP := $(shell mktemp))
$(CXX) -std=c++1z -o $($@_TMP) \
index_log.cc \
-lcapnp -lkj
mv $($@_TMP) $@

@ -1,3 +0,0 @@
Import('env', 'arch', 'messaging')
env.Program('index_log', ['index_log.cc'], LIBS=[messaging, 'kj', 'capnp'])

@ -1,63 +0,0 @@
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <string>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <kj/io.h>
#include <capnp/serialize.h>
int main(int argc, char** argv) {
if (argc != 3) {
printf("usage: %s <log_path> <index_output_path>\n", argv[0]);
return 1;
}
const std::string log_fn = argv[1];
const std::string index_fn = argv[2];
int log_fd = open(log_fn.c_str(), O_RDONLY, 0);
assert(log_fd >= 0);
off_t log_size = lseek(log_fd, 0, SEEK_END);
lseek(log_fd, 0, SEEK_SET);
FILE* index_f = NULL;
if (index_fn == "-") {
index_f = stdout;
} else {
index_f = fopen(index_fn.c_str(), "wb");
}
assert(index_f);
void* log_data = mmap(NULL, log_size, PROT_READ, MAP_PRIVATE, log_fd, 0);
assert(log_data);
auto words = kj::arrayPtr((const capnp::word*)log_data, log_size/sizeof(capnp::word));
while (words.size() > 0) {
uint64_t idx = ((uintptr_t)words.begin() - (uintptr_t)log_data);
// printf("%llu - %ld\n", idx, words.size());
const char* idx_bytes = (const char*)&idx;
fwrite(idx_bytes, 8, 1, index_f);
try {
capnp::FlatArrayMessageReader reader(words);
words = kj::arrayPtr(reader.getEnd(), words.end());
} catch (const kj::Exception& exc) {
break;
}
}
munmap(log_data, log_size);
fclose(index_f);
close(log_fd);
return 0;
}

@ -2,48 +2,15 @@
import os import os
import sys import sys
import bz2 import bz2
import tempfile
import subprocess
import urllib.parse import urllib.parse
import capnp import capnp
import numpy as np
from tools.lib.exceptions import DataUnreadableError
try: try:
from xx.chffr.lib.filereader import FileReader from xx.chffr.lib.filereader import FileReader
except ImportError: except ImportError:
from tools.lib.filereader import FileReader from tools.lib.filereader import FileReader
from cereal import log as capnp_log from cereal import log as capnp_log
OP_PATH = os.path.dirname(os.path.dirname(capnp_log.__file__))
def index_log(fn):
index_log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "index_log")
index_log = os.path.join(index_log_dir, "index_log")
if not os.path.exists(index_log):
phonelibs_dir = os.path.join(OP_PATH, 'phonelibs')
subprocess.check_call(["make", "PHONELIBS=" + phonelibs_dir], cwd=index_log_dir, stdout=subprocess.DEVNULL)
try:
dat = subprocess.check_output([index_log, fn, "-"])
except subprocess.CalledProcessError as e:
raise DataUnreadableError("%s capnp is corrupted/truncated" % fn) from e
return np.frombuffer(dat, dtype=np.uint64)
def event_read_multiple_bytes(dat):
with tempfile.NamedTemporaryFile() as dat_f:
dat_f.write(dat)
dat_f.flush()
idx = index_log(dat_f.name)
end_idx = np.uint64(len(dat))
idx = np.append(idx, end_idx)
return [capnp_log.Event.from_bytes(dat[idx[i]:idx[i+1]])
for i in range(len(idx)-1)]
# this is an iterator itself, and uses private variables from LogReader # this is an iterator itself, and uses private variables from LogReader
class MultiLogIterator(object): class MultiLogIterator(object):
def __init__(self, log_paths, wraparound=True): def __init__(self, log_paths, wraparound=True):
@ -117,17 +84,17 @@ class LogReader(object):
if ext == "": if ext == "":
# old rlogs weren't bz2 compressed # old rlogs weren't bz2 compressed
ents = event_read_multiple_bytes(dat) ents = capnp_log.Event.read_multiple_bytes(dat)
elif ext == ".bz2": elif ext == ".bz2":
dat = bz2.decompress(dat) dat = bz2.decompress(dat)
ents = event_read_multiple_bytes(dat) ents = capnp_log.Event.read_multiple_bytes(dat)
else: else:
raise Exception(f"unknown extension {ext}") raise Exception(f"unknown extension {ext}")
self._ts = [x.logMonoTime for x in ents] self._ents = list(ents)
self._ts = [x.logMonoTime for x in self._ents]
self.data_version = data_version self.data_version = data_version
self._only_union_types = only_union_types self._only_union_types = only_union_types
self._ents = ents
def __iter__(self): def __iter__(self):
for ent in self._ents: for ent in self._ents:
@ -141,6 +108,10 @@ class LogReader(object):
yield ent yield ent
if __name__ == "__main__": if __name__ == "__main__":
import codecs
# capnproto <= 0.8.0 throws errors converting byte data to string
# below line catches those errors and replaces the bytes with \x__
codecs.register_error("strict", codecs.backslashreplace_errors)
log_path = sys.argv[1] log_path = sys.argv[1]
lr = LogReader(log_path) lr = LogReader(log_path)
for msg in lr: for msg in lr:

Loading…
Cancel
Save