diff --git a/common/logging_extra.py b/common/logging_extra.py index 53f4c0a536..5baaac1f90 100644 --- a/common/logging_extra.py +++ b/common/logging_extra.py @@ -3,6 +3,7 @@ import os import sys import copy import json +import time import uuid import socket import logging @@ -11,6 +12,8 @@ from threading import local from collections import OrderedDict from contextlib import contextmanager +LOG_TIMESTAMPS = "LOG_TIMESTAMPS" in os.environ + def json_handler(obj): # if isinstance(obj, (datetime.date, datetime.time)): # return obj.isoformat() @@ -163,6 +166,15 @@ class SwagLogger(logging.Logger): else: self.info(evt) + def timestamp(self, event_name): + if LOG_TIMESTAMPS: + t = time.monotonic() + tstp = NiceOrderedDict() + tstp['timestamp'] = NiceOrderedDict() + tstp['timestamp']["event"] = event_name + tstp['timestamp']["time"] = t*1e9 + self.debug(tstp) + def findCaller(self, stack_info=False, stacklevel=1): """ Find the stack frame of the caller so that we can note the source diff --git a/selfdrive/boardd/boardd.cc b/selfdrive/boardd/boardd.cc index d980f6bbf9..ffa167629f 100644 --- a/selfdrive/boardd/boardd.cc +++ b/selfdrive/boardd/boardd.cc @@ -227,7 +227,9 @@ void can_send_thread(std::vector pandas, bool fake_send) { //Dont send if older than 1 second if ((nanos_since_boot() - event.getLogMonoTime() < 1e9) && !fake_send) { for (const auto& panda : pandas) { + LOGT("sending sendcan to panda: %s", (panda->usb_serial).c_str()); panda->can_send(event.getSendcan()); + LOGT("sendcan sent to panda: %s", (panda->usb_serial).c_str()); } } } diff --git a/selfdrive/camerad/cameras/camera_qcom2.cc b/selfdrive/camerad/cameras/camera_qcom2.cc index e42723ecff..152fbc542f 100644 --- a/selfdrive/camerad/cameras/camera_qcom2.cc +++ b/selfdrive/camerad/cameras/camera_qcom2.cc @@ -1084,8 +1084,10 @@ void process_road_camera(MultiCameraState *s, CameraState *c, int cnt) { if ((c == &s->road_cam && env_send_road) || (c == &s->wide_road_cam && env_send_wide_road)) { framed.setImage(get_frame_image(b)); } + LOGT("%s: Image set", c == &s->road_cam ? "RoadCamera" : "WideRoadCamera"); if (c == &s->road_cam) { framed.setTransform(b->yuv_transform.v); + LOGT("%s: Transformed", c == &s->road_cam ? "RoadCamera" : "WideRoadCamera"); } s->pm->send(c == &s->road_cam ? "roadCameraState" : "wideRoadCameraState", msg); diff --git a/selfdrive/common/swaglog.cc b/selfdrive/common/swaglog.cc index 1fe700415d..618421194c 100644 --- a/selfdrive/common/swaglog.cc +++ b/selfdrive/common/swaglog.cc @@ -63,6 +63,7 @@ class SwaglogState : public LogState { }; static SwaglogState s = {}; +bool LOG_TIMESTAMPS = getenv("LOG_TIMESTAMPS"); static void log(int levelnum, const char* filename, int lineno, const char* func, const char* msg, const std::string& log_s) { if (levelnum >= s.print_level) { @@ -71,23 +72,17 @@ static void log(int levelnum, const char* filename, int lineno, const char* func char levelnum_c = levelnum; zmq_send(s.sock, (levelnum_c + log_s).c_str(), log_s.length() + 1, ZMQ_NOBLOCK); } - -void cloudlog_e(int levelnum, const char* filename, int lineno, const char* func, - const char* fmt, ...) { +static void cloudlog_common(int levelnum, bool is_timestamp, const char* filename, int lineno, const char* func, + const char* fmt, va_list args) { char* msg_buf = nullptr; - va_list args; - va_start(args, fmt); int ret = vasprintf(&msg_buf, fmt, args); - va_end(args); - if (ret <= 0 || !msg_buf) return; std::lock_guard lk(s.lock); if (!s.initialized) s.initialize(); - json11::Json log_j = json11::Json::object { - {"msg", msg_buf}, + json11::Json::object log_j = json11::Json::object { {"ctx", s.ctx_j}, {"levelnum", levelnum}, {"filename", filename}, @@ -95,7 +90,37 @@ void cloudlog_e(int levelnum, const char* filename, int lineno, const char* func {"funcname", func}, {"created", seconds_since_epoch()} }; - std::string log_s = log_j.dump(); + + if (is_timestamp) { + json11::Json::object tspt_j = json11::Json::object { + {"timestamp", json11::Json::object{ + {"event", msg_buf}, + {"time", std::to_string(nanos_since_boot())}} + } + }; + log_j["msg"] = tspt_j; + } else { + log_j["msg"] = msg_buf; + } + + std::string log_s = ((json11::Json)log_j).dump(); log(levelnum, filename, lineno, func, msg_buf, log_s); free(msg_buf); } + +void cloudlog_e(int levelnum, const char* filename, int lineno, const char* func, + const char* fmt, ...){ + va_list args; + va_start(args, fmt); + cloudlog_common(levelnum, false, filename, lineno, func, fmt, args); + va_end(args); +} + +void cloudlog_t(int levelnum, const char* filename, int lineno, const char* func, + const char* fmt, ...){ + if (!LOG_TIMESTAMPS) return; + va_list args; + va_start(args, fmt); + cloudlog_common(levelnum, true, filename, lineno, func, fmt, args); + va_end(args); +} diff --git a/selfdrive/common/swaglog.h b/selfdrive/common/swaglog.h index 6403820ac8..1973724880 100644 --- a/selfdrive/common/swaglog.h +++ b/selfdrive/common/swaglog.h @@ -9,13 +9,19 @@ #define CLOUDLOG_CRITICAL 50 - void cloudlog_e(int levelnum, const char* filename, int lineno, const char* func, const char* fmt, ...) /*__attribute__ ((format (printf, 6, 7)))*/; +void cloudlog_t(int levelnum, const char* filename, int lineno, const char* func, + const char* fmt, ...) /*__attribute__ ((format (printf, 6, 7)))*/; + #define cloudlog(lvl, fmt, ...) cloudlog_e(lvl, __FILE__, __LINE__, \ __func__, \ - fmt, ## __VA_ARGS__) + fmt, ## __VA_ARGS__); + +#define cloudlog_t_m(lvl, fmt, ...) cloudlog_t(lvl, __FILE__, __LINE__, \ + __func__, \ + fmt, ## __VA_ARGS__); #define cloudlog_rl(burst, millis, lvl, fmt, ...) \ { \ @@ -46,6 +52,7 @@ void cloudlog_e(int levelnum, const char* filename, int lineno, const char* func } \ } +#define LOGT(fmt, ...) cloudlog_t_m(CLOUDLOG_DEBUG, fmt,## __VA_ARGS__) #define LOGD(fmt, ...) cloudlog(CLOUDLOG_DEBUG, fmt, ## __VA_ARGS__) #define LOG(fmt, ...) cloudlog(CLOUDLOG_INFO, fmt, ## __VA_ARGS__) #define LOGW(fmt, ...) cloudlog(CLOUDLOG_WARNING, fmt, ## __VA_ARGS__) diff --git a/selfdrive/controls/controlsd.py b/selfdrive/controls/controlsd.py index 922fa7e36b..0b8be1e9ea 100755 --- a/selfdrive/controls/controlsd.py +++ b/selfdrive/controls/controlsd.py @@ -763,9 +763,11 @@ class Controls: # Sample data from sockets and get a carState CS = self.data_sample() + cloudlog.timestamp("Data sampled") self.prof.checkpoint("Sample") self.update_events(CS) + cloudlog.timestamp("Events updated") if not self.read_only and self.initialized: # Update control state diff --git a/selfdrive/controls/lib/lateral_planner.py b/selfdrive/controls/lib/lateral_planner.py index a9c6411394..2590465713 100644 --- a/selfdrive/controls/lib/lateral_planner.py +++ b/selfdrive/controls/lib/lateral_planner.py @@ -103,6 +103,7 @@ class LateralPlanner: plan_send.valid = sm.all_alive_and_valid(service_list=['carState', 'controlsState', 'modelV2']) lateralPlan = plan_send.lateralPlan + lateralPlan.modelMonoTime = sm.logMonoTime['modelV2'] lateralPlan.laneWidth = float(self.LP.lane_width) lateralPlan.dPathPoints = self.y_pts.tolist() lateralPlan.psis = self.lat_mpc.x_sol[0:CONTROL_N, 2].tolist() diff --git a/selfdrive/modeld/models/driving.cc b/selfdrive/modeld/models/driving.cc index 18ab82223b..422ad21cde 100644 --- a/selfdrive/modeld/models/driving.cc +++ b/selfdrive/modeld/models/driving.cc @@ -11,6 +11,7 @@ #include "selfdrive/common/clutil.h" #include "selfdrive/common/params.h" #include "selfdrive/common/timing.h" +#include "selfdrive/common/swaglog.h" constexpr float FCW_THRESHOLD_5MS2_HIGH = 0.15; constexpr float FCW_THRESHOLD_5MS2_LOW = 0.05; @@ -74,12 +75,15 @@ ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf, // if getInputBuf is not NULL, net_input_buf will be auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, transform, static_cast(s->m->getInputBuf())); s->m->addImage(net_input_buf, s->frame->buf_size); + LOGT("Image added"); if (wbuf != nullptr) { auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, transform_wide, static_cast(s->m->getExtraBuf())); s->m->addExtra(net_extra_buf, s->wide_frame->buf_size); + LOGT("Extra image added"); } s->m->execute(); + LOGT("Execution finished"); return (ModelOutput*)&s->output; } diff --git a/selfdrive/test/process_replay/ref_commit b/selfdrive/test/process_replay/ref_commit index 3f29976e98..e7e2bd70d2 100644 --- a/selfdrive/test/process_replay/ref_commit +++ b/selfdrive/test/process_replay/ref_commit @@ -1 +1 @@ -fa91a008e1f8078fe02520e31d8b007425194cb8 \ No newline at end of file +156328998163a970ec1e8cb394d3394ce1ae4daa \ No newline at end of file diff --git a/tools/latencylogger/README.md b/tools/latencylogger/README.md new file mode 100644 index 0000000000..3e4b8bffef --- /dev/null +++ b/tools/latencylogger/README.md @@ -0,0 +1,87 @@ +# LatencyLogger + +LatencyLogger is a tool to track the time from first pixel to actuation. Timestamps are printed in a table as well as plotted in a graph. Start openpilot with `LOG_TIMESTAMPS=1` set to enable the necessary logging. + +## Usage + +``` +$ python latency_logger.py -h +usage: latency_logger.py [-h] [--relative] [--demo] [--plot] [route_or_segment_name] + +A tool for analyzing openpilot's end-to-end latency + +positional arguments: + route_or_segment_name + The route to print (default: None) + +optional arguments: + -h, --help show this help message and exit + --relative Make timestamps relative to the start of each frame (default: False) + --demo Use the demo route instead of providing one (default: False) + --plot If a plot should be generated (default: False) +``` + +## Examples +Plotting with relative starts each process at time=0 and gives a nice overview. +![relself](https://user-images.githubusercontent.com/42323981/161629832-c6f28874-4b0b-437a-961e-d80adbf8dd97.png) +Plotting without relative provides info about the frames relative time. +![relfirst](https://user-images.githubusercontent.com/42323981/161629886-3283e7c8-1bb0-4f3c-bede-4ceac1d2e140.png) + + +Printed timestamps of a frame with internal durations. +``` +Frame ID: 303 + camerad + roadCameraState start of frame 0.0 + wideRoadCameraState start of frame 0.091926 + RoadCamera: Image set 1.691696 + RoadCamera: Transformed 1.812841 + roadCameraState published 51.775466 + wideRoadCameraState published 54.935164 + roadCameraState.processingTime 1.6455530421808362 + wideRoadCameraState.processingTime 4.790564067661762 + modeld + Image added 56.628788 + Extra image added 57.459923 + Execution finished 75.091306 + modelV2 published 75.24797 + modelV2.modelExecutionTime 20.00947669148445 + modelV2.gpuExecutionTime 0.0 + plannerd + lateralPlan published 80.426861 + longitudinalPlan published 85.722781 + lateralPlan.solverExecutionTime 1.0600379901006818 + longitudinalPlan.solverExecutionTime 1.3830000534653664 + controlsd + Data sampled 89.436221 + Events updated 90.356522 + sendcan published 91.396092 + controlsState published 91.77843 + Data sampled 99.885876 + Events updated 100.696855 + sendcan published 101.600489 + controlsState published 101.941839 + Data sampled 110.087669 + Events updated 111.025365 + sendcan published 112.305921 + controlsState published 112.70451 + Data sampled 119.692803 + Events updated 120.56774 + sendcan published 121.735016 + controlsState published 122.142823 + Data sampled 129.264761 + Events updated 130.024282 + sendcan published 130.950364 + controlsState published 131.281558 + boardd + sending sendcan to panda: 250027001751393037323631 101.705487 + sendcan sent to panda: 250027001751393037323631 102.042462 + sending sendcan to panda: 250027001751393037323631 112.416961 + sendcan sent to panda: 250027001751393037323631 112.792269 + sending sendcan to panda: 250027001751393037323631 121.850952 + sendcan sent to panda: 250027001751393037323631 122.231103 + sending sendcan to panda: 250027001751393037323631 131.045206 + sendcan sent to panda: 250027001751393037323631 131.351296 + sending sendcan to panda: 250027001751393037323631 141.340592 + sendcan sent to panda: 250027001751393037323631 141.700744 +``` diff --git a/tools/latencylogger/latency_logger.py b/tools/latencylogger/latency_logger.py new file mode 100644 index 0000000000..d4d1950756 --- /dev/null +++ b/tools/latencylogger/latency_logger.py @@ -0,0 +1,189 @@ +import argparse +import json +import mpld3 +import sys +from collections import defaultdict +import matplotlib.pyplot as plt + +from tools.lib.logreader import logreader_from_route_or_segment + +DEMO_ROUTE = "9f583b1d93915c31|2022-04-01--17-51-29" + +SERVICES = ['camerad', 'modeld', 'plannerd', 'controlsd', 'boardd'] +# Retrive controlsd frameId from lateralPlan, mismatch with longitudinalPlan will be ignored +MONOTIME_KEYS = ['modelMonoTime', 'lateralPlanMonoTime'] +MSGQ_TO_SERVICE = { + 'roadCameraState': 'camerad', + 'wideRoadCameraState': 'camerad', + 'modelV2': 'modeld', + 'lateralPlan': 'plannerd', + 'longitudinalPlan': 'plannerd', + 'sendcan': 'controlsd', + 'controlsState': 'controlsd' +} +SERVICE_TO_DURATIONS = { + 'camerad': ['processingTime'], + 'modeld': ['modelExecutionTime', 'gpuExecutionTime'], + 'plannerd': ["solverExecutionTime"], +} + +def read_logs(lr): + data = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) + mono_to_frame = {} + frame_mismatches = [] + latest_sendcan_monotime = 0 + for msg in lr: + if msg.which() == 'sendcan': + latest_sendcan_monotime = msg.logMonoTime + continue + + if msg.which() in MSGQ_TO_SERVICE: + service = MSGQ_TO_SERVICE[msg.which()] + msg_obj = getattr(msg, msg.which()) + + frame_id = -1 + if hasattr(msg_obj, "frameId"): + frame_id = msg_obj.frameId + else: + continue_outer = False + for key in MONOTIME_KEYS: + if hasattr(msg_obj, key): + if getattr(msg_obj, key) == 0: + # Filter out controlsd messages which arrive before the camera loop + continue_outer = True + elif getattr(msg_obj, key) in mono_to_frame: + frame_id = mono_to_frame[getattr(msg_obj, key)] + if continue_outer: + continue + mono_to_frame[msg.logMonoTime] = frame_id + + data['timestamp'][frame_id][service].append((msg.which()+" published", msg.logMonoTime)) + if service in SERVICE_TO_DURATIONS: + for duration in SERVICE_TO_DURATIONS[service]: + data['duration'][frame_id][service].append((msg.which()+"."+duration, getattr(msg_obj, duration))) + + if service == SERVICES[0]: + data['timestamp'][frame_id][service].append((msg.which()+" start of frame", msg_obj.timestampSof)) + elif msg.which() == 'controlsState': + # Sendcan is published before controlsState, but the frameId is retrived in CS + data['timestamp'][frame_id][service].append(("sendcan published", latest_sendcan_monotime)) + elif msg.which() == 'modelV2': + if msg_obj.frameIdExtra != frame_id: + frame_mismatches.append(frame_id) + + # Failed frameId fetches are stored in -1 + assert sum([len(data['timestamp'][-1][service]) for service in data['timestamp'][-1].keys()]) < 20, "Too many frameId fetch fails" + del data['timestamp'][-1] + assert len(frame_mismatches) < 20, "Too many frame mismatches" + return (data, frame_mismatches) + +def get_min_time(frame_id, service, timestamps, use_max=False): + return min(timestamps[frame_id][service], key=lambda x: x[1])[1] if not use_max else max(timestamps[frame_id][service], key=lambda x: x[1])[1] + +def get_interval(frame_id, service, timestamps): + try: + service_max = get_min_time(frame_id, service, timestamps, use_max=True) + if service == SERVICES[0]: + service_min = get_min_time(frame_id, service, timestamps) + return (service_min, service_max) + prev_service = SERVICES[SERVICES.index(service)-1] + prev_service_max = get_min_time(frame_id, prev_service, timestamps, use_max=True) + return (prev_service_max, service_max) + except ValueError: + return (-1,-1) + +def find_frame_id(time, service, timestamps): + for frame_id in reversed(timestamps): + start, end = get_interval(frame_id, service, timestamps) + if start <= time <= end: + return frame_id + return -1 + +## ASSUMES THAT AT LEAST ONE CLOUDLOG IS MADE IN CONTROLSD +def insert_cloudlogs(lr, timestamps): + t0 = get_min_time(min(timestamps.keys()), SERVICES[0], timestamps) + failed_inserts = 0 + latest_controls_frameid = 0 + for msg in lr: + if msg.which() == "logMessage": + jmsg = json.loads(msg.logMessage) + if "timestamp" in jmsg['msg']: + time = int(jmsg['msg']['timestamp']['time']) + service = jmsg['ctx']['daemon'] + event = jmsg['msg']['timestamp']['event'] + if time < t0: + # Filter out controlsd messages which arrive before the camera loop + continue + + frame_id = latest_controls_frameid if service == "boardd" else find_frame_id(time, service, timestamps) + if frame_id > -1: + timestamps[frame_id][service].append((event, time)) + if service == 'controlsd': + latest_controls_frameid = frame_id + else: + failed_inserts += 1 + assert failed_inserts < len(timestamps), "Too many failed cloudlog inserts" + +def print_timestamps(timestamps, durations ,relative): + t0 = get_min_time(min(timestamps.keys()), SERVICES[0], timestamps) + for frame_id in timestamps.keys(): + print('='*80) + print("Frame ID:", frame_id) + if relative: + t0 = get_min_time(frame_id, SERVICES[0], timestamps) + for service in SERVICES: + print(" "+service) + events = timestamps[frame_id][service] + for event, time in sorted(events, key = lambda x: x[1]): + print(" "+'%-53s%-53s' %(event, str((time-t0)/1e6))) + for event, time in durations[frame_id][service]: + print(" "+'%-53s%-53s' %(event, str(time*1000))) + +def graph_timestamps(timestamps, relative): + t0 = get_min_time(min(timestamps.keys()), SERVICES[0], timestamps) + fig, ax = plt.subplots() + ax.set_xlim(0, 150 if relative else 750) + ax.set_ylim(0, 15) + + points = {"x": [], "y": [], "labels": []} + for frame_id, services in timestamps.items(): + if relative: + t0 = get_min_time(frame_id, SERVICES[0], timestamps) + service_bars = [] + for service, events in services.items(): + start, end = get_interval(frame_id, service,timestamps) + service_bars.append(((start-t0)/1e6,(end-start)/1e6)) + for event in events: + points["x"].append((event[1]-t0)/1e6) + points["y"].append(frame_id+0.45) + points["labels"].append(event[0]) + ax.broken_barh(service_bars, (frame_id, 0.9), facecolors=(["blue", 'green', 'red', 'yellow', 'purple'])) + + scatter = ax.scatter(points['x'], points['y'], marker="d", edgecolor='black') + tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=points["labels"]) + ax.legend() + mpld3.plugins.connect(fig, tooltip) + #mpld3.save_html(fig, 'test.html') + mpld3.show() + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="A tool for analyzing openpilot's end-to-end latency", + formatter_class = argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("--relative", action="store_true", help="Make timestamps relative to the start of each frame") + parser.add_argument("--demo", action="store_true", help="Use the demo route instead of providing one") + parser.add_argument("--plot", action="store_true", help="If a plot should be generated") + parser.add_argument("route_or_segment_name", nargs='?', help="The route to print") + + if len(sys.argv) == 1: + parser.print_help() + sys.exit() + args = parser.parse_args() + + r = DEMO_ROUTE if args.demo else args.route_or_segment_name.strip() + lr = logreader_from_route_or_segment(r, sort_by_time=True) + data, frame_mismatches = read_logs(lr) + lr.reset() + insert_cloudlogs(lr, data['timestamp']) + print_timestamps(data['timestamp'], data['duration'], args.relative) + if args.plot: + graph_timestamps(data['timestamp'], args.relative) diff --git a/tools/lib/logreader.py b/tools/lib/logreader.py index f3326f7841..06dd248bf1 100755 --- a/tools/lib/logreader.py +++ b/tools/lib/logreader.py @@ -103,13 +103,13 @@ class LogReader: yield ent -def logreader_from_route_or_segment(r): +def logreader_from_route_or_segment(r, sort_by_time=False): sn = SegmentName(r, allow_route_name=True) route = Route(sn.route_name.canonical_name) if sn.segment_num < 0: - return MultiLogIterator(route.log_paths()) + return MultiLogIterator(route.log_paths(), sort_by_time) else: - return LogReader(route.log_paths()[sn.segment_num]) + return LogReader(route.log_paths()[sn.segment_num], sort_by_time) if __name__ == "__main__":