test_onroad: enforce CPU usage test for all procs (#28024)

* cleanup

* add missing procs

* ugh not systemd

* floor

* unused

* handle laikad

* continue

---------

Co-authored-by: Comma Device <device@comma.ai>
old-commit-hash: 64df41c9af
beeps
Adeeb Shihadeh 2 years ago committed by GitHub
parent 56b63e12bf
commit a25d2f95de
  1. 131
      selfdrive/test/test_onroad.py

@ -1,4 +1,5 @@
#!/usr/bin/env python3
import math
import json
import os
import subprocess
@ -6,6 +7,7 @@ import time
import numpy as np
import unittest
from collections import Counter, defaultdict
from functools import cached_property
from pathlib import Path
from cereal import car
@ -45,6 +47,15 @@ PROCS = {
"./ubloxd": 0.02,
"selfdrive.tombstoned": 0,
"./logcatd": 0,
"system.micd": 10.0,
"system.timezoned": 0,
"system.sensord.pigeond": 6.0,
"selfdrive.boardd.pandad": 0,
"selfdrive.statsd": 0.4,
"selfdrive.navd.navd": 0.4,
"system.loggerd.uploader": 30.0,
"system.loggerd.deleter": 0.1,
"selfdrive.locationd.laikad": None, # TODO: laikad cpu usage is sporadic
}
TIMINGS = {
@ -71,48 +82,6 @@ def cputime_total(ct):
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
def check_cpu_usage(proclogs):
result = "\n"
result += "------------------------------------------------\n"
result += "------------------ CPU Usage -------------------\n"
result += "------------------------------------------------\n"
plogs_by_proc = defaultdict(list)
for pl in proclogs:
for x in pl.procLog.procs:
if len(x.cmdline) > 0:
n = list(x.cmdline)[0]
plogs_by_proc[n].append(x)
print(plogs_by_proc.keys())
r = True
dt = (proclogs[-1].logMonoTime - proclogs[0].logMonoTime) / 1e9
for proc_name, expected_cpu in PROCS.items():
err = ""
cpu_usage = 0.
x = plogs_by_proc[proc_name]
if len(x) > 2:
cpu_time = cputime_total(x[-1]) - cputime_total(x[0])
cpu_usage = cpu_time / dt * 100.
if cpu_usage > max(expected_cpu * 1.15, expected_cpu + 5.0):
# cpu usage is high while playing sounds
if not (proc_name == "./_soundd" and cpu_usage < 65.):
err = "using more CPU than normal"
elif cpu_usage < min(expected_cpu * 0.65, max(expected_cpu - 1.0, 0.0)):
err = "using less CPU than normal"
else:
err = "NO METRICS FOUND"
result += f"{proc_name.ljust(35)} {cpu_usage:5.2f}% ({expected_cpu:5.2f}%) {err}\n"
if len(err) > 0:
r = False
result += "------------------------------------------------\n"
print(result)
return r
class TestOnroad(unittest.TestCase):
@classmethod
@ -120,8 +89,8 @@ class TestOnroad(unittest.TestCase):
if "DEBUG" in os.environ:
segs = filter(lambda x: os.path.exists(os.path.join(x, "rlog")), Path(ROOT).iterdir())
segs = sorted(segs, key=lambda x: x.stat().st_mtime)
print(segs[-2])
cls.lr = list(LogReader(os.path.join(segs[-2], "rlog")))
print(segs[-3])
cls.lr = list(LogReader(os.path.join(segs[-3], "rlog")))
return
# setup env
@ -177,6 +146,21 @@ class TestOnroad(unittest.TestCase):
# use the second segment by default as it's the first full segment
cls.lr = list(LogReader(os.path.join(str(cls.segments[1]), "rlog")))
@cached_property
def service_msgs(self):
msgs = defaultdict(list)
for m in self.lr:
msgs[m.which()].append(m)
return msgs
def test_service_frequencies(self):
for s, msgs in self.service_msgs.items():
if s in ('initData', 'sentinel'):
continue
with self.subTest(service=s):
assert len(msgs) >= math.floor(service_list[s].frequency*55)
def test_cloudlog_size(self):
msgs = [m for m in self.lr if m.which() == 'logMessage']
@ -193,7 +177,7 @@ class TestOnroad(unittest.TestCase):
result += "-------------- UI Draw Timing ------------------\n"
result += "------------------------------------------------\n"
ts = [m.uiDebug.drawTimeMillis for m in self.lr if m.which() == 'uiDebug']
ts = [m.uiDebug.drawTimeMillis for m in self.service_msgs['uiDebug']]
result += f"min {min(ts):.2f}ms\n"
result += f"max {max(ts):.2f}ms\n"
result += f"std {np.std(ts):.2f}ms\n"
@ -201,15 +185,60 @@ class TestOnroad(unittest.TestCase):
result += "------------------------------------------------\n"
print(result)
self.assertGreater(len(ts), 20*50, "insufficient samples")
#self.assertLess(max(ts), 30.)
self.assertLess(np.mean(ts), 10.)
#self.assertLess(np.std(ts), 5.)
def test_cpu_usage(self):
proclogs = [m for m in self.lr if m.which() == 'procLog']
self.assertGreater(len(proclogs), service_list['procLog'].frequency * 45, "insufficient samples")
cpu_ok = check_cpu_usage(proclogs)
result = "\n"
result += "------------------------------------------------\n"
result += "------------------ CPU Usage -------------------\n"
result += "------------------------------------------------\n"
plogs_by_proc = defaultdict(list)
for pl in self.service_msgs['procLog']:
for x in pl.procLog.procs:
if len(x.cmdline) > 0:
n = list(x.cmdline)[0]
plogs_by_proc[n].append(x)
print(plogs_by_proc.keys())
cpu_ok = True
dt = (self.service_msgs['procLog'][-1].logMonoTime - self.service_msgs['procLog'][0].logMonoTime) / 1e9
for proc_name, expected_cpu in PROCS.items():
err = ""
cpu_usage = 0.
x = plogs_by_proc[proc_name]
if len(x) > 2:
cpu_time = cputime_total(x[-1]) - cputime_total(x[0])
cpu_usage = cpu_time / dt * 100.
if expected_cpu is None:
result += f"{proc_name.ljust(35)} {cpu_usage:5.2f}% ({expected_cpu}) SKIPPED\n"
continue
elif cpu_usage > max(expected_cpu * 1.15, expected_cpu + 5.0):
# cpu usage is high while playing sounds
if not (proc_name == "./_soundd" and cpu_usage < 65.):
err = "using more CPU than normal"
elif cpu_usage < min(expected_cpu * 0.65, max(expected_cpu - 1.0, 0.0)):
err = "using less CPU than normal"
else:
err = "NO METRICS FOUND"
result += f"{proc_name.ljust(35)} {cpu_usage:5.2f}% ({expected_cpu:5.2f}%) {err}\n"
if len(err) > 0:
cpu_ok = False
# Ensure there's no missing procs
all_procs = set([p.name for p in self.service_msgs['managerState'][0].managerState.processes if p.shouldBeRunning])
for p in all_procs:
with self.subTest(proc=p):
assert any(p in pp for pp in PROCS.keys()), f"Expected CPU usage missing for {p}"
result += "------------------------------------------------\n"
print(result)
self.assertTrue(cpu_ok)
def test_camera_processing_time(self):
@ -234,7 +263,7 @@ class TestOnroad(unittest.TestCase):
cfgs = [("lateralPlan", 0.05, 0.05), ("longitudinalPlan", 0.05, 0.05)]
for (s, instant_max, avg_max) in cfgs:
ts = [getattr(getattr(m, s), "solverExecutionTime") for m in self.lr if m.which() == s]
ts = [getattr(getattr(m, s), "solverExecutionTime") for m in self.service_msgs[s]]
self.assertLess(max(ts), instant_max, f"high '{s}' execution time: {max(ts)}")
self.assertLess(np.mean(ts), avg_max, f"high avg '{s}' execution time: {np.mean(ts)}")
result += f"'{s}' execution time: min {min(ts):.5f}s\n"
@ -254,7 +283,7 @@ class TestOnroad(unittest.TestCase):
("driverStateV2", 0.050, 0.026),
]
for (s, instant_max, avg_max) in cfgs:
ts = [getattr(getattr(m, s), "modelExecutionTime") for m in self.lr if m.which() == s]
ts = [getattr(getattr(m, s), "modelExecutionTime") for m in self.service_msgs[s]]
self.assertLess(max(ts), instant_max, f"high '{s}' execution time: {max(ts)}")
self.assertLess(np.mean(ts), avg_max, f"high avg '{s}' execution time: {np.mean(ts)}")
result += f"'{s}' execution time: min {min(ts):.5f}s\n"

Loading…
Cancel
Save