openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

422 lines
15 KiB

import math
import json
import os
import pathlib
import psutil
import pytest
import shutil
import subprocess
import time
import numpy as np
from collections import Counter, defaultdict
from pathlib import Path
from tabulate import tabulate
from cereal import car, log
import cereal.messaging as messaging
from cereal.services import SERVICE_LIST
from openpilot.common.basedir import BASEDIR
from openpilot.common.timeout import Timeout
from openpilot.common.params import Params
from openpilot.selfdrive.selfdrived.events import EVENTS, ET
from openpilot.selfdrive.test.helpers import set_params_enabled, release_only
from openpilot.system.hardware import HARDWARE
from openpilot.system.hardware.hw import Paths
from openpilot.tools.lib.logreader import LogReader
from openpilot.tools.lib.log_time_series import msgs_to_time_series
"""
CPU usage budget
* each process is entitled to at least 8%
* total CPU usage of openpilot (sum(PROCS.values())
should not exceed MAX_TOTAL_CPU
"""
TEST_DURATION = 25
LOG_OFFSET = 8
MAX_TOTAL_CPU = 280. # total for all 8 cores
PROCS = {
# Baseline CPU usage by process
"selfdrive.controls.controlsd": 16.0,
"selfdrive.selfdrived.selfdrived": 16.0,
"selfdrive.car.card": 26.0,
"./loggerd": 14.0,
"./encoderd": 17.0,
"./camerad": 10.0,
"selfdrive.controls.plannerd": 9.0,
"./ui": 18.0,
"selfdrive.locationd.paramsd": 9.0,
"./sensord": 7.0,
"selfdrive.controls.radard": 2.0,
"selfdrive.modeld.modeld": 22.0,
"selfdrive.modeld.dmonitoringmodeld": 21.0,
"system.hardware.hardwared": 4.0,
"selfdrive.locationd.calibrationd": 2.0,
Live torque (#25456) * wip torqued * add basic logic * setup in manager * check sanity and publish msg * add first order filter to outputs * wire up controlsd, and update gains * rename intercept to offset * add cloudlog, live values are not updated * fix bugs, do not reset points for now * fix crashes * rename to main * fix bugs, works offline * fix float in cereal bug * add latacc filter * randomly choose points, approx for iid * add variable decay * local param to capnp instead of dict * verify works in replay * use torqued output in controlsd * use in controlsd; use points from past routes * controlsd bugfix * filter before updating gains, needs to be replaced * save all points to ensure smooth transition across routes, revert friction factor to 1.5 * add filters to prevent noisy low-speed data points; improve fit sanity * add engaged buffer * revert lat_acc thresh * use paramsd realtime process config * make latacc-to-torque generic, and overrideable * move freq to 4Hz, avoid storing in np.array, don't publish points in the message * float instead of np * remove constant while storing pts * rename slope, offset to lat_accet_factor, offset * resolve issues * use camelcase in all capnp params * use camelcase everywhere * reduce latacc threshold or sanity, add car_sane todo, save points properly * add and check tag * write param to disk at end of route * remove args * rebase op, cereal * save on exit * restore default handler * cpu usage check * add to process replay * handle reset better, reduce unnecessary computation * always publish raw values - useful for debug * regen routes * update refs * checks on cache restore * check tuning vals too * clean that up * reduce cpu usage * reduce cpu usage by 75% * cleanup * optimize further * handle reset condition better, don't put points in init, use only in corolla * bump cereal after rebasing * update refs * Update common/params.cc Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com> * remove unnecessary checks * Update RELEASES.md Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com> old-commit-hash: 4fa62f146426f76c9c1c2867d9729b33ec612b59
3 years ago
"selfdrive.locationd.torqued": 5.0,
locationd no GPS (#33029) * Pose kf draft old-commit-hash: 17dd4d576e597792f0e18826498c00076739f92b * Fix it old-commit-hash: 13ac120affe58fd22e871586ea5f4d335b3e9d2b * Add translation noise old-commit-hash: 166529cb612858c4ce80649367ac35b2b6007e1d * Add gravity to acc old-commit-hash: 8fcfed544b8e090ccc86b189c13bc03c6c190613 * Use pyx old-commit-hash: 8e69e0baa0a4c43b4d0c22535711f296f05420aa * Indent old-commit-hash: 25b19a73644cdcb571ccf1a1d8acb88a0d066c67 * Reset function old-commit-hash: ca5d2736da15e4fd6539f7268133320735b7c9cc * Add device_from_ned and ned_from_device transformations old-commit-hash: a60d25da0edc311e583549dc015fa595749fd4ae * Fix rotations old-commit-hash: d6d582f7f6d19a2bc2308dbcb0c9f81363e325b6 * kms old-commit-hash: 681bc4e50374795ccc61422c3ce4ffb51389fce2 * Centripetal acceleration old-commit-hash: 6e574506d27e5b76a04b2097d94efa4ca91ead71 * Rewrite draft old-commit-hash: 4a2aad0146267460e5d30036c8cdb2bef94d1d7c * Remove old locationd stuff old-commit-hash: c2be9f7dbf22fb5cd29e437cd7891a7d52266fba * Python process now old-commit-hash: 83fac85f28c0b546b6965aafe1dd8a089e67f5b3 * Process replay fix old-commit-hash: c44f9de98583c49dad0b22497869b3bb0266fcd9 * Add checks for timing and validity old-commit-hash: aed4fbe2d00ca620e01a0e0ee99a4871c939de36 * Fixes old-commit-hash: 3f052c658c16984a34915f38afdfbfd0fb19a267 * Process replay config fixes old-commit-hash: 1c56690ee7ceb3c23c9ec2b2713352191212716e * static analysis fixes old-commit-hash: 6145e2c140ea9aa97e75069c3ddd82172cadc866 * lp in latcontrol old-commit-hash: 9abf7359d68e794c69052724f3aca14b04dd3cca * Fix SensorEvent name for acceleration old-commit-hash: 91a1ad6c604727c9c898ba4aefe9478022b167fd * Ignore sensor readings from segments with multiple imus old-commit-hash: 1f05af63d6cc605ea98d7da0d727a5bd8d7819b0 * Update shebang old-commit-hash: e3f2f5c10df3a4ba698421335bfeffc63d1a8797 * Replace llk with lp old-commit-hash: 99b6c7ba08de6b703708fef0b8fd2d8cb24b92c0 * Refactor locationd scenario test old-commit-hash: 7f5f788f071b7647e36f854df927cc5b6f819a84 * Add more debugging tools old-commit-hash: 8d4e364867e143ea35f4bfd00d8212aaf170a1d1 * Param name update old-commit-hash: 5151e8f5520f067e7808e3f0baa628fbf8fb7337 * Fix expected observations old-commit-hash: d6a0d4c1a96c438fb6893e8b6ff43becf6061b75 * Handle invalid measurements old-commit-hash: 549362571e74ad1e7ec9368f6026378c54a29adf * Fix spelling old-commit-hash: eefd7c4c92fb486452e9b83c7121d2599811852b * Include observations in debug info too old-commit-hash: 625806d1110b3bffe249cd1d03416f2a3f2c1868 * Store error instead of expected observation old-commit-hash: 1cb7a799b67e56af4eddc6608d5b0e295f2d888c * Renames old-commit-hash: a7eec74640fc5cc7a5e86172d2087b66cb93d17d * Zero the yaw old-commit-hash: 96150779590fcb8ac50e8ffe8f8df03105983179 * New state_dot for orientation old-commit-hash: c1456bf3a0c5af2f888aa6ff0b5ffc2e5516ddf7 * Fix the state transformations old-commit-hash: 7cb9b91e2f99caa4ac3fb748c7f23bb8bf9f65db * Update process in test_onroad old-commit-hash: 854afab7c39ca7dec2b42974cecbb5310b82b617 * Test polling on cameraOdometry old-commit-hash: a78e8b7d61618177f15c9892e2fa1e51620deca8 * Keep the copy of x and P returned from predict old-commit-hash: 3c4159a6a7d7383265a99f3f78f8805d2fcfc8cd * Remove polling again old-commit-hash: f7228675c5fd2de5f879c4786859f1abcec27d68 * Remove locationd.cc old-commit-hash: d7005599b2b178e688c3bd1959d6b69357d3a663 * Optim in _finite_check old-commit-hash: 58ad6a06b9380960e9f69eb98663ddb97461e8d5 * Access .t once old-commit-hash: d03252e75ed4cbdb49291a60c904568e6a3d3399 * Move the timing check to cam odo code path old-commit-hash: 6a1c26f8c201e1feb601753f0cb7299cf981b47e * Call all_checks only once old-commit-hash: 373809cebf8d9db89d1ab00f4c8c933f33038e78 * Do not sort old-commit-hash: 2984cd02c0ab76827b8c7e32f7e637b261425025 * Check sm.updated old-commit-hash: 11c48de3f0802eb4783899f6a37737078dbf2da4 * Remove test_params_gps old-commit-hash: 82db4fd1a876cc2402702edc74eba0a8ac5da858 * Increase tolerance old-commit-hash: 927d6f05249d2c8ec40b32e2a0dcde8e1a469fb3 * Fix static old-commit-hash: 2d86d62c74d5ac0ad56ec3855a126e00a26cd490 * Try separate sockets for sensors old-commit-hash: 5dade63947ab237f0b4555f45d941a8851449ab1 * sensor_all_checks old-commit-hash: e25f40dd6b37ee76cd9cc2b19be552baf1355ec3 * Fix static old-commit-hash: 328cf1ad86079746b4f3fde55539e4acb92d285e * Set the cpu limit to 25 old-commit-hash: 7ba696ff54c5d3bfa42e42624d124f2a1914a96d * Make it prettier old-commit-hash: cd6270dec80d8b9dac784ddd4767a1a46bcff4b7 * Prettier old-commit-hash: 1b17931d23d37f299dad54139eaf283a89592bf5 * Increase the cpu budget to 260 old-commit-hash: 20173afb937a2609c8a9905aee0b2b093cb8bba4 * Change trans std mult. 2 works better * Update ref commit * Update ref commit
8 months ago
"selfdrive.locationd.locationd": 25.0,
"selfdrive.ui.soundd": 3.0,
"selfdrive.monitoring.dmonitoringd": 4.0,
"./proclogd": 2.0,
"system.logmessaged": 1.0,
"system.tombstoned": 0,
"./logcatd": 1.0,
"system.micd": 5.0,
"system.timed": 0,
"selfdrive.pandad.pandad": 0,
"system.statsd": 1.0,
"system.loggerd.uploader": 15.0,
"system.loggerd.deleter": 1.0,
}
PROCS.update({
"tici": {
"./pandad": 5.0,
"./ubloxd": 1.0,
"system.ubloxd.pigeond": 6.0,
},
"tizi": {
"./pandad": 19.0,
"system.qcomgpsd.qcomgpsd": 1.0,
}
}.get(HARDWARE.get_device_type(), {}))
TIMINGS = {
# rtols: max/min, rsd
"can": [2.5, 0.35],
"pandaStates": [2.5, 0.35],
"peripheralState": [2.5, 0.35],
"sendcan": [2.5, 0.35],
"carState": [2.5, 0.35],
"carControl": [2.5, 0.35],
"controlsState": [2.5, 0.35],
"longitudinalPlan": [2.5, 0.5],
"driverAssistance": [2.5, 0.5],
"roadCameraState": [2.5, 0.35],
"driverCameraState": [2.5, 0.35],
"modelV2": [2.5, 0.35],
"driverStateV2": [2.5, 0.40],
"livePose": [2.5, 0.35],
"wideRoadCameraState": [1.5, 0.35],
}
LOGS_SIZE = { # MB per segment
"qlog.zst": 0.5,
"rlog.zst": 8.1,
"qcamera.ts": 2.3,
}
LOGS_SIZE.update(dict.fromkeys(['ecamera.hevc', 'fcamera.hevc', 'dcamera.hevc'], 76.5))
def cputime_total(ct):
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
@pytest.mark.tici
class TestOnroad:
@classmethod
def setup_class(cls):
if "DEBUG" in os.environ:
segs = filter(lambda x: os.path.exists(os.path.join(x, "rlog.zst")), Path(Paths.log_root()).iterdir())
segs = sorted(segs, key=lambda x: x.stat().st_mtime)
cls.lr = list(LogReader(os.path.join(segs[-1], "rlog.zst")))
cls.ts = msgs_to_time_series(cls.lr)
return
# setup env
params = Params()
params.remove("CurrentRoute")
params.put_bool("RecordFront", True)
set_params_enabled()
os.environ['REPLAY'] = '1'
os.environ['TESTING_CLOSET'] = '1'
if os.path.exists(Paths.log_root()):
shutil.rmtree(Paths.log_root())
# start manager and run openpilot for TEST_DURATION
proc = None
try:
manager_path = os.path.join(BASEDIR, "system/manager/manager.py")
cls.manager_st = time.monotonic()
proc = subprocess.Popen(["python", manager_path])
sm = messaging.SubMaster(['carState'])
with Timeout(150, "controls didn't start"):
while sm.recv_frame['carState'] < 0:
sm.update(1000)
route = None
cls.segments = []
with Timeout(300, "timed out waiting for logs"):
while route is None:
route = params.get("CurrentRoute", encoding="utf-8")
time.sleep(0.01)
# test car params caching
params.put("CarParamsCache", car.CarParams().to_bytes())
while len(cls.segments) < 1:
segs = set()
if Path(Paths.log_root()).exists():
segs = set(Path(Paths.log_root()).glob(f"{route}--*"))
cls.segments = sorted(segs, key=lambda s: int(str(s).rsplit('--')[-1]))
time.sleep(0.01)
time.sleep(TEST_DURATION)
finally:
cls.gpu_procs = {psutil.Process(int(f.name)).name() for f in pathlib.Path('/sys/devices/virtual/kgsl/kgsl/proc/').iterdir() if f.is_dir()}
if proc is not None:
proc.terminate()
if proc.wait(60) is None:
proc.kill()
cls.lrs = [list(LogReader(os.path.join(str(s), "rlog.zst"))) for s in cls.segments]
cls.lr = cls.lrs[0]
cls.ts = msgs_to_time_series(cls.lr)
cls.log_path = cls.segments[0]
cls.log_sizes = {}
for f in cls.log_path.iterdir():
assert f.is_file()
cls.log_sizes[f] = f.stat().st_size / 1e6
cls.msgs = defaultdict(list)
for m in cls.lr:
cls.msgs[m.which()].append(m)
def test_service_frequencies(self, subtests):
for s, msgs in self.msgs.items():
if s in ('initData', 'sentinel'):
continue
# skip gps services for now
if s in ('ubloxGnss', 'ubloxRaw', 'gnssMeasurements', 'gpsLocation', 'gpsLocationExternal', 'qcomGnss'):
continue
with subtests.test(service=s):
assert len(msgs) >= math.floor(SERVICE_LIST[s].frequency*int(TEST_DURATION*0.8))
def test_manager_starting_time(self):
st = self.ts['managerState']['t'][0]
assert (st - self.manager_st) < 10, f"manager.py took {st - self.manager_st}s to publish the first 'managerState' msg"
def test_cloudlog_size(self):
msgs = self.msgs['logMessage']
total_size = sum(len(m.as_builder().to_bytes()) for m in msgs)
assert total_size < 3.5e5
cnt = Counter(json.loads(m.logMessage)['filename'] for m in msgs)
big_logs = [f for f, n in cnt.most_common(3) if n / sum(cnt.values()) > 30.]
assert len(big_logs) == 0, f"Log spam: {big_logs}"
def test_log_sizes(self, subtests):
# TODO: this isn't super stable between different devices
for f, sz in self.log_sizes.items():
rate = LOGS_SIZE[f.name]/60.
minn = rate * TEST_DURATION * 0.5
maxx = rate * TEST_DURATION * 1.5
with subtests.test(file=f.name):
assert minn < sz < maxx
def test_ui_timings(self):
result = "\n"
result += "------------------------------------------------\n"
result += "-------------- UI Draw Timing ------------------\n"
result += "------------------------------------------------\n"
ts = self.ts['uiDebug']['drawTimeMillis']
result += f"min {min(ts):.2f}ms\n"
result += f"max {max(ts):.2f}ms\n"
result += f"std {np.std(ts):.2f}ms\n"
result += f"mean {np.mean(ts):.2f}ms\n"
result += "------------------------------------------------\n"
print(result)
assert max(ts) < 250.
assert np.mean(ts) < 10.
#self.assertLess(np.std(ts), 5.)
# some slow frames are expected since camerad/modeld can preempt ui
veryslow = [x for x in ts if x > 40.]
assert len(veryslow) < 5, f"Too many slow frame draw times: {veryslow}"
def test_cpu_usage(self, subtests):
print("\n------------------------------------------------")
print("------------------ CPU Usage -------------------")
print("------------------------------------------------")
plogs_by_proc = defaultdict(list)
for pl in self.msgs['procLog']:
for x in pl.procLog.procs:
if len(x.cmdline) > 0:
n = list(x.cmdline)[0]
plogs_by_proc[n].append(x)
cpu_ok = True
dt = (self.msgs['procLog'][-1].logMonoTime - self.msgs['procLog'][0].logMonoTime) / 1e9
header = ['process', 'usage', 'expected', 'max allowed', 'test result']
rows = []
for proc_name, expected in PROCS.items():
error = ""
usage = 0.
x = plogs_by_proc[proc_name]
if len(x) > 2:
cpu_time = cputime_total(x[-1]) - cputime_total(x[0])
usage = cpu_time / dt * 100.
max_allowed = max(expected * 1.8, expected + 5.0)
if usage > max_allowed:
error = "❌ USING MORE CPU THAN EXPECTED ❌"
cpu_ok = False
else:
error = "❌ NO METRICS FOUND ❌"
cpu_ok = False
rows.append([proc_name, usage, expected, max_allowed, error or ""])
print(tabulate(rows, header, tablefmt="simple_grid", stralign="center", numalign="center", floatfmt=".2f"))
# Ensure there's no missing procs
all_procs = {p.name for p in self.msgs['managerState'][0].managerState.processes if p.shouldBeRunning}
for p in all_procs:
with subtests.test(proc=p):
assert any(p in pp for pp in PROCS.keys()), f"Expected CPU usage missing for {p}"
# total CPU check
procs_tot = sum([(max(x) if isinstance(x, tuple) else x) for x in PROCS.values()])
with subtests.test(name="total CPU"):
assert procs_tot < MAX_TOTAL_CPU, "Total CPU budget exceeded"
print("------------------------------------------------")
print(f"Total allocated CPU usage is {procs_tot}%, budget is {MAX_TOTAL_CPU}%, {MAX_TOTAL_CPU-procs_tot:.1f}% left")
print("------------------------------------------------")
assert cpu_ok
def test_memory_usage(self):
print("\n------------------------------------------------")
print("--------------- Memory Usage -------------------")
print("------------------------------------------------")
offset = int(SERVICE_LIST['deviceState'].frequency * LOG_OFFSET)
mems = [m.deviceState.memoryUsagePercent for m in self.msgs['deviceState'][offset:]]
print("Memory usage: ", mems)
# check for big leaks. note that memory usage is
# expected to go up while the MSGQ buffers fill up
assert np.average(mems) <= 65, "Average memory usage above 65%"
assert np.max(np.diff(mems)) <= 4, "Max memory increase too high"
assert np.average(np.diff(mems)) <= 1, "Average memory increase too high"
def test_gpu_usage(self):
assert self.gpu_procs == {"weston", "ui", "camerad", "selfdrive.modeld.modeld", "selfdrive.modeld.dmonitoringmodeld"}
def test_camera_frame_timings(self, subtests):
# test timing within a single camera
result = "\n"
result += "------------------------------------------------\n"
result += "----------------- SOF Timing ------------------\n"
result += "------------------------------------------------\n"
for name in ['roadCameraState', 'wideRoadCameraState', 'driverCameraState']:
ts = self.ts[name]['timestampSof']
d_ms = np.diff(ts) / 1e6
d50 = np.abs(d_ms-50)
result += f"{name} sof delta vs 50ms: min {min(d50):.2f}ms\n"
result += f"{name} sof delta vs 50ms: max {max(d50):.2f}ms\n"
result += f"{name} sof delta vs 50ms: mean {d50.mean():.2f}ms\n"
with subtests.test(camera=name):
assert max(d50) < 5.0, f"high SOF delta vs 50ms: {max(d50)}"
result += "------------------------------------------------\n"
print(result)
def test_mpc_execution_timings(self):
result = "\n"
result += "------------------------------------------------\n"
result += "----------------- MPC Timing ------------------\n"
result += "------------------------------------------------\n"
cfgs = [("longitudinalPlan", 0.05, 0.05),]
for (s, instant_max, avg_max) in cfgs:
ts = [getattr(m, s).solverExecutionTime for m in self.msgs[s]]
assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}"
assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}"
result += f"'{s}' execution time: min {min(ts):.5f}s\n"
result += f"'{s}' execution time: max {max(ts):.5f}s\n"
result += f"'{s}' execution time: mean {np.mean(ts):.5f}s\n"
result += "------------------------------------------------\n"
print(result)
def test_model_execution_timings(self, subtests):
result = "\n"
result += "------------------------------------------------\n"
result += "----------------- Model Timing -----------------\n"
result += "------------------------------------------------\n"
cfgs = [
("modelV2", 0.045, 0.035),
("driverStateV2", 0.045, 0.035),
]
for (s, instant_max, avg_max) in cfgs:
ts = [getattr(m, s).modelExecutionTime for m in self.msgs[s]]
# TODO some init can happen in first iteration
ts = ts[1:]
result += f"'{s}' execution time: min {min(ts):.5f}s\n"
result += f"'{s}' execution time: max {max(ts):.5f}s\n"
result += f"'{s}' execution time: mean {np.mean(ts):.5f}s\n"
with subtests.test(s):
assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}"
assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}"
result += "------------------------------------------------\n"
print(result)
def test_timings(self):
passed = True
print("\n------------------------------------------------")
print("----------------- Service Timings --------------")
print("------------------------------------------------")
header = ['service', 'max', 'min', 'mean', 'expected mean', 'rsd', 'max allowed rsd', 'test result']
rows = []
for s, (maxmin, rsd) in TIMINGS.items():
offset = int(SERVICE_LIST[s].frequency * LOG_OFFSET)
msgs = [m.logMonoTime for m in self.msgs[s][offset:]]
if not len(msgs):
raise Exception(f"missing {s}")
ts = np.diff(msgs) / 1e9
dt = 1 / SERVICE_LIST[s].frequency
errors = []
if not np.allclose(np.mean(ts), dt, rtol=0.03, atol=0):
errors.append("❌ FAILED MEAN TIMING CHECK ❌")
if not np.allclose([np.max(ts), np.min(ts)], dt, rtol=maxmin, atol=0):
errors.append("❌ FAILED MAX/MIN TIMING CHECK ❌")
if (np.std(ts)/dt) > rsd:
errors.append("❌ FAILED RSD TIMING CHECK ❌")
passed = not errors and passed
rows.append([s, *(np.array([np.max(ts), np.min(ts), np.mean(ts), dt])*1e3), np.std(ts)/dt, rsd, "\n".join(errors) or ""])
print(tabulate(rows, header, tablefmt="simple_grid", stralign="center", numalign="center", floatfmt=".2f"))
assert passed
@release_only
def test_startup(self):
startup_alert = self.ts['selfdriveState']['alertText1'][0]
expected = EVENTS[log.OnroadEvent.EventName.startup][ET.PERMANENT].alert_text_1
assert startup_alert == expected, "wrong startup alert"
def test_engagable(self):
no_entries = Counter()
for m in self.msgs['onroadEvents']:
for evt in m.onroadEvents:
if evt.noEntry:
no_entries[evt.name] += 1
offset = int(SERVICE_LIST['selfdriveState'].frequency * LOG_OFFSET)
eng = [m.selfdriveState.engageable for m in self.msgs['selfdriveState'][offset:]]
assert all(eng), \
f"Not engageable for whole segment:\n- selfdriveState.engageable: {Counter(eng)}\n- No entry events: {no_entries}"