ci: faster test_onroad (#33906)

* 30s

* ruff

* remove

* timeout

* global

* value

* get value

* better

* format

* cleaner
pull/33922/head^2
Maxime Desroches 6 months ago committed by GitHub
parent 747b963a29
commit 32c5254415
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 4
      Jenkinsfile
  2. 88
      selfdrive/test/test_onroad.py

4
Jenkinsfile vendored

@ -66,7 +66,7 @@ fi
ln -snf ${env.TEST_DIR} /data/pythonpath ln -snf ${env.TEST_DIR} /data/pythonpath
cd ${env.TEST_DIR} || true cd ${env.TEST_DIR} || true
${cmd} time ${cmd}
END""" END"""
sh script: ssh_cmd, label: step_label sh script: ssh_cmd, label: step_label
@ -198,7 +198,7 @@ node {
//["build master-ci", "cd $SOURCE_DIR/release && TARGET_DIR=$TEST_DIR $SOURCE_DIR/scripts/retry.sh ./build_devel.sh"], //["build master-ci", "cd $SOURCE_DIR/release && TARGET_DIR=$TEST_DIR $SOURCE_DIR/scripts/retry.sh ./build_devel.sh"],
step("build openpilot", "cd system/manager && ./build.py"), step("build openpilot", "cd system/manager && ./build.py"),
step("check dirty", "release/check-dirty.sh"), step("check dirty", "release/check-dirty.sh"),
step("onroad tests", "pytest selfdrive/test/test_onroad.py -s"), step("onroad tests", "pytest selfdrive/test/test_onroad.py -s", [timeout: 60]),
//["time to onroad", "pytest selfdrive/test/test_time_to_onroad.py"], //["time to onroad", "pytest selfdrive/test/test_time_to_onroad.py"],
]) ])
}, },

@ -10,7 +10,6 @@ import time
import numpy as np import numpy as np
import zstandard as zstd import zstandard as zstd
from collections import Counter, defaultdict from collections import Counter, defaultdict
from functools import cached_property
from pathlib import Path from pathlib import Path
from cereal import car, log from cereal import car, log
@ -33,6 +32,9 @@ CPU usage budget
should not exceed MAX_TOTAL_CPU should not exceed MAX_TOTAL_CPU
""" """
TEST_DURATION = 25
LOG_OFFSET = 8
MAX_TOTAL_CPU = 265. # total for all 8 cores MAX_TOTAL_CPU = 265. # total for all 8 cores
PROCS = { PROCS = {
# Baseline CPU usage by process # Baseline CPU usage by process
@ -58,12 +60,12 @@ PROCS = {
"./proclogd": 1.54, "./proclogd": 1.54,
"system.logmessaged": 0.2, "system.logmessaged": 0.2,
"system.tombstoned": 0, "system.tombstoned": 0,
"./logcatd": 0, "./logcatd": 1.0,
"system.micd": 5.0, "system.micd": 5.0,
"system.timed": 0, "system.timed": 0,
"selfdrive.pandad.pandad": 0, "selfdrive.pandad.pandad": 0,
"system.statsd": 0.4, "system.statsd": 0.4,
"system.loggerd.uploader": (0.5, 15.0), "system.loggerd.uploader": (0.0, 15.0),
"system.loggerd.deleter": 0.1, "system.loggerd.deleter": 0.1,
} }
@ -98,6 +100,13 @@ TIMINGS = {
"wideRoadCameraState": [1.5, 0.35], "wideRoadCameraState": [1.5, 0.35],
} }
LOGS_SIZE_RATE = {
"qlog": 0.0083,
"rlog": 0.1528,
"qcamera.ts": 0.03828,
}
LOGS_SIZE_RATE.update(dict.fromkeys(['ecamera.hevc', 'fcamera.hevc'], 1.2740))
def cputime_total(ct): def cputime_total(ct):
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
@ -124,7 +133,7 @@ class TestOnroad:
if os.path.exists(Paths.log_root()): if os.path.exists(Paths.log_root()):
shutil.rmtree(Paths.log_root()) shutil.rmtree(Paths.log_root())
# start manager and run openpilot for a minute # start manager and run openpilot for TEST_DURATION
proc = None proc = None
try: try:
manager_path = os.path.join(BASEDIR, "system/manager/manager.py") manager_path = os.path.join(BASEDIR, "system/manager/manager.py")
@ -135,26 +144,24 @@ class TestOnroad:
while sm.recv_frame['carState'] < 0: while sm.recv_frame['carState'] < 0:
sm.update(1000) sm.update(1000)
# make sure we get at least two full segments
route = None route = None
cls.segments = [] cls.segments = []
with Timeout(300, "timed out waiting for logs"): with Timeout(300, "timed out waiting for logs"):
while route is None: while route is None:
route = params.get("CurrentRoute", encoding="utf-8") route = params.get("CurrentRoute", encoding="utf-8")
time.sleep(0.1) time.sleep(0.01)
# test car params caching # test car params caching
params.put("CarParamsCache", car.CarParams().to_bytes()) params.put("CarParamsCache", car.CarParams().to_bytes())
while len(cls.segments) < 3: while len(cls.segments) < 1:
segs = set() segs = set()
if Path(Paths.log_root()).exists(): if Path(Paths.log_root()).exists():
segs = set(Path(Paths.log_root()).glob(f"{route}--*")) segs = set(Path(Paths.log_root()).glob(f"{route}--*"))
cls.segments = sorted(segs, key=lambda s: int(str(s).rsplit('--')[-1])) cls.segments = sorted(segs, key=lambda s: int(str(s).rsplit('--')[-1]))
time.sleep(2) time.sleep(0.01)
# chop off last, incomplete segment time.sleep(TEST_DURATION)
cls.segments = cls.segments[:-1]
finally: finally:
cls.gpu_procs = {psutil.Process(int(f.name)).name() for f in pathlib.Path('/sys/devices/virtual/kgsl/kgsl/proc/').iterdir() if f.is_dir()} cls.gpu_procs = {psutil.Process(int(f.name)).name() for f in pathlib.Path('/sys/devices/virtual/kgsl/kgsl/proc/').iterdir() if f.is_dir()}
@ -166,9 +173,8 @@ class TestOnroad:
cls.lrs = [list(LogReader(os.path.join(str(s), "rlog"))) for s in cls.segments] cls.lrs = [list(LogReader(os.path.join(str(s), "rlog"))) for s in cls.segments]
# use the second segment by default as it's the first full segment cls.lr = list(LogReader(os.path.join(str(cls.segments[0]), "rlog")))
cls.lr = list(LogReader(os.path.join(str(cls.segments[1]), "rlog"))) cls.log_path = cls.segments[0]
cls.log_path = cls.segments[1]
cls.log_sizes = {} cls.log_sizes = {}
for f in cls.log_path.iterdir(): for f in cls.log_path.iterdir():
@ -178,16 +184,13 @@ class TestOnroad:
with open(f, 'rb') as ff: with open(f, 'rb') as ff:
cls.log_sizes[f] = len(zstd.compress(ff.read(), LOG_COMPRESSION_LEVEL)) / 1e6 cls.log_sizes[f] = len(zstd.compress(ff.read(), LOG_COMPRESSION_LEVEL)) / 1e6
cls.msgs = defaultdict(list)
for m in cls.lr:
cls.msgs[m.which()].append(m)
@cached_property
def service_msgs(self):
msgs = defaultdict(list)
for m in self.lr:
msgs[m.which()].append(m)
return msgs
def test_service_frequencies(self, subtests): def test_service_frequencies(self, subtests):
for s, msgs in self.service_msgs.items(): for s, msgs in self.msgs.items():
if s in ('initData', 'sentinel'): if s in ('initData', 'sentinel'):
continue continue
@ -196,10 +199,10 @@ class TestOnroad:
continue continue
with subtests.test(service=s): with subtests.test(service=s):
assert len(msgs) >= math.floor(SERVICE_LIST[s].frequency*55) assert len(msgs) >= math.floor(SERVICE_LIST[s].frequency*int(TEST_DURATION*0.8))
def test_cloudlog_size(self): def test_cloudlog_size(self):
msgs = [m for m in self.lr if m.which() == 'logMessage'] msgs = self.msgs['logMessage']
total_size = sum(len(m.as_builder().to_bytes()) for m in msgs) total_size = sum(len(m.as_builder().to_bytes()) for m in msgs)
assert total_size < 3.5e5 assert total_size < 3.5e5
@ -210,16 +213,10 @@ class TestOnroad:
def test_log_sizes(self): def test_log_sizes(self):
for f, sz in self.log_sizes.items(): for f, sz in self.log_sizes.items():
if f.name == "qcamera.ts": rate = LOGS_SIZE_RATE[f.name]
assert 2.15 < sz < 2.6 minn = rate * TEST_DURATION * 0.8
elif f.name == "qlog": maxx = rate * TEST_DURATION * 1.2
assert 0.4 < sz < 0.55 assert minn < sz < maxx
elif f.name == "rlog":
assert 5 < sz < 50
elif f.name.endswith('.hevc'):
assert 70 < sz < 80
else:
raise NotImplementedError
def test_ui_timings(self): def test_ui_timings(self):
result = "\n" result = "\n"
@ -227,7 +224,7 @@ class TestOnroad:
result += "-------------- UI Draw Timing ------------------\n" result += "-------------- UI Draw Timing ------------------\n"
result += "------------------------------------------------\n" result += "------------------------------------------------\n"
ts = [m.uiDebug.drawTimeMillis for m in self.service_msgs['uiDebug']] ts = [m.uiDebug.drawTimeMillis for m in self.msgs['uiDebug']]
result += f"min {min(ts):.2f}ms\n" result += f"min {min(ts):.2f}ms\n"
result += f"max {max(ts):.2f}ms\n" result += f"max {max(ts):.2f}ms\n"
result += f"std {np.std(ts):.2f}ms\n" result += f"std {np.std(ts):.2f}ms\n"
@ -250,7 +247,7 @@ class TestOnroad:
result += "------------------------------------------------\n" result += "------------------------------------------------\n"
plogs_by_proc = defaultdict(list) plogs_by_proc = defaultdict(list)
for pl in self.service_msgs['procLog']: for pl in self.msgs['procLog']:
for x in pl.procLog.procs: for x in pl.procLog.procs:
if len(x.cmdline) > 0: if len(x.cmdline) > 0:
n = list(x.cmdline)[0] n = list(x.cmdline)[0]
@ -258,7 +255,7 @@ class TestOnroad:
print(plogs_by_proc.keys()) print(plogs_by_proc.keys())
cpu_ok = True cpu_ok = True
dt = (self.service_msgs['procLog'][-1].logMonoTime - self.service_msgs['procLog'][0].logMonoTime) / 1e9 dt = (self.msgs['procLog'][-1].logMonoTime - self.msgs['procLog'][0].logMonoTime) / 1e9
for proc_name, expected_cpu in PROCS.items(): for proc_name, expected_cpu in PROCS.items():
err = "" err = ""
@ -290,7 +287,7 @@ class TestOnroad:
result += "------------------------------------------------\n" result += "------------------------------------------------\n"
# Ensure there's no missing procs # Ensure there's no missing procs
all_procs = {p.name for p in self.service_msgs['managerState'][0].managerState.processes if p.shouldBeRunning} all_procs = {p.name for p in self.msgs['managerState'][0].managerState.processes if p.shouldBeRunning}
for p in all_procs: for p in all_procs:
with subtests.test(proc=p): with subtests.test(proc=p):
assert any(p in pp for pp in PROCS.keys()), f"Expected CPU usage missing for {p}" assert any(p in pp for pp in PROCS.keys()), f"Expected CPU usage missing for {p}"
@ -308,7 +305,8 @@ class TestOnroad:
assert cpu_ok assert cpu_ok
def test_memory_usage(self): def test_memory_usage(self):
mems = [m.deviceState.memoryUsagePercent for m in self.service_msgs['deviceState']] offset = int(SERVICE_LIST['deviceState'].frequency * LOG_OFFSET)
mems = [m.deviceState.memoryUsagePercent for m in self.msgs['deviceState'][offset:]]
print("Memory usage: ", mems) print("Memory usage: ", mems)
# check for big leaks. note that memory usage is # check for big leaks. note that memory usage is
@ -324,7 +322,9 @@ class TestOnroad:
result += "-------------- ImgProc Timing ------------------\n" result += "-------------- ImgProc Timing ------------------\n"
result += "------------------------------------------------\n" result += "------------------------------------------------\n"
ts = [getattr(m, m.which()).processingTime for m in self.lr if 'CameraState' in m.which()] ts = []
for s in ['roadCameraState', 'driverCameraState', 'wideCameraState']:
ts.extend(getattr(m, s).processingTime for m in self.msgs[s])
assert min(ts) < 0.025, f"high execution time: {min(ts)}" assert min(ts) < 0.025, f"high execution time: {min(ts)}"
result += f"execution time: min {min(ts):.5f}s\n" result += f"execution time: min {min(ts):.5f}s\n"
result += f"execution time: max {max(ts):.5f}s\n" result += f"execution time: max {max(ts):.5f}s\n"
@ -357,7 +357,7 @@ class TestOnroad:
cfgs = [("longitudinalPlan", 0.05, 0.05),] cfgs = [("longitudinalPlan", 0.05, 0.05),]
for (s, instant_max, avg_max) in cfgs: for (s, instant_max, avg_max) in cfgs:
ts = [getattr(m, s).solverExecutionTime for m in self.service_msgs[s]] ts = [getattr(m, s).solverExecutionTime for m in self.msgs[s]]
assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}" assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}"
assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}" assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}"
result += f"'{s}' execution time: min {min(ts):.5f}s\n" result += f"'{s}' execution time: min {min(ts):.5f}s\n"
@ -377,7 +377,7 @@ class TestOnroad:
("driverStateV2", 0.050, 0.026), ("driverStateV2", 0.050, 0.026),
] ]
for (s, instant_max, avg_max) in cfgs: for (s, instant_max, avg_max) in cfgs:
ts = [getattr(m, s).modelExecutionTime for m in self.service_msgs[s]] ts = [getattr(m, s).modelExecutionTime for m in self.msgs[s]]
assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}" assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}"
assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}" assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}"
result += f"'{s}' execution time: min {min(ts):.5f}s\n" result += f"'{s}' execution time: min {min(ts):.5f}s\n"
@ -393,7 +393,8 @@ class TestOnroad:
result += "----------------- Service Timings --------------\n" result += "----------------- Service Timings --------------\n"
result += "------------------------------------------------\n" result += "------------------------------------------------\n"
for s, (maxmin, rsd) in TIMINGS.items(): for s, (maxmin, rsd) in TIMINGS.items():
msgs = [m.logMonoTime for m in self.service_msgs[s]] offset = int(SERVICE_LIST[s].frequency * LOG_OFFSET)
msgs = [m.logMonoTime for m in self.msgs[s][offset:]]
if not len(msgs): if not len(msgs):
raise Exception(f"missing {s}") raise Exception(f"missing {s}")
@ -430,11 +431,12 @@ class TestOnroad:
def test_engagable(self): def test_engagable(self):
no_entries = Counter() no_entries = Counter()
for m in self.service_msgs['onroadEvents']: for m in self.msgs['onroadEvents']:
for evt in m.onroadEvents: for evt in m.onroadEvents:
if evt.noEntry: if evt.noEntry:
no_entries[evt.name] += 1 no_entries[evt.name] += 1
eng = [m.selfdriveState.engageable for m in self.service_msgs['selfdriveState']] offset = int(SERVICE_LIST['selfdriveState'].frequency * LOG_OFFSET)
eng = [m.selfdriveState.engageable for m in self.msgs['selfdriveState'][offset:]]
assert all(eng), \ assert all(eng), \
f"Not engageable for whole segment:\n- selfdriveState.engageable: {Counter(eng)}\n- No entry events: {no_entries}" f"Not engageable for whole segment:\n- selfdriveState.engageable: {Counter(eng)}\n- No entry events: {no_entries}"

Loading…
Cancel
Save