diff --git a/common/manager_helpers.py b/common/manager_helpers.py index 3668ea112c..8b13789179 100644 --- a/common/manager_helpers.py +++ b/common/manager_helpers.py @@ -1,50 +1 @@ -def cputime_total(ct): - return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem - -def print_cpu_usage(first_proc, last_proc): - r = 0 - procs = [ - ("selfdrive.controls.controlsd", 59.46), - ("./_modeld", 12.74), - ("./loggerd", 28.49), - ("selfdrive.controls.plannerd", 19.77), - ("selfdrive.controls.radard", 9.54), - ("./_ui", 9.54), - ("./camerad", 7.07), - ("selfdrive.locationd.locationd", 34.38), - ("selfdrive.locationd.paramsd", 11.53), - ("./_sensord", 6.17), - ("selfdrive.monitoring.dmonitoringd", 5.48), - ("./boardd", 3.63), - ("./_dmonitoringmodeld", 2.67), - ("selfdrive.logmessaged", 2.71), - ("selfdrive.thermald.thermald", 2.41), - ("selfdrive.locationd.calibrationd", 6.81), - ("./proclogd", 1.54), - ("./_gpsd", 0.09), - ("./clocksd", 0.02), - ("./ubloxd", 0.02), - ("selfdrive.tombstoned", 0), - ("./logcatd", 0), - ] - - dt = (last_proc.logMonoTime - first_proc.logMonoTime) / 1e9 - print("------------------------------------------------") - for proc_name, normal_cpu_usage in procs: - try: - first = [p for p in first_proc.procLog.procs if proc_name in p.cmdline][0] - last = [p for p in last_proc.procLog.procs if proc_name in p.cmdline][0] - cpu_time = cputime_total(last) - cputime_total(first) - cpu_usage = cpu_time / dt * 100. - if cpu_usage > max(normal_cpu_usage * 1.1, normal_cpu_usage + 5.0): - print(f"Warning {proc_name} using more CPU than normal") - r = 1 - - print(f"{proc_name.ljust(35)} {cpu_usage:.2f}%") - except IndexError: - print(f"{proc_name.ljust(35)} NO METRICS FOUND") - r = 1 - print("------------------------------------------------") - - return r diff --git a/release/build_devel.sh b/release/build_devel.sh index 8addd776ba..6218336870 100755 --- a/release/build_devel.sh +++ b/release/build_devel.sh @@ -80,7 +80,7 @@ echo -n "1" > /data/params/d/HasCompletedSetup echo -n "1" > /data/params/d/CommunityFeaturesToggle PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" nosetests -s selfdrive/test/test_openpilot.py -PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" GET_CPU_USAGE=1 selfdrive/manager.py +PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" selfdrive/test/test_cpu_usage.py PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" selfdrive/car/tests/test_car_interfaces.py echo "[-] testing panda build T=$SECONDS" diff --git a/release/files_common b/release/files_common index c35df3976f..c68dcf1cfa 100644 --- a/release/files_common +++ b/release/files_common @@ -320,6 +320,7 @@ selfdrive/test/__init__.py selfdrive/test/test_openpilot.py selfdrive/test/test_fingerprints.py selfdrive/test/test_car_models.py +selfdrive/test/test_cpu_usage.py selfdrive/ui/SConscript selfdrive/ui/*.cc diff --git a/selfdrive/manager.py b/selfdrive/manager.py index 1ff62d93c2..3e55841d06 100755 --- a/selfdrive/manager.py +++ b/selfdrive/manager.py @@ -161,7 +161,6 @@ from selfdrive.loggerd.config import ROOT from selfdrive.launcher import launcher from common import android from common.apk import update_apks, pm_apply_packages, start_offroad -from common.manager_helpers import print_cpu_usage ThermalStatus = cereal.log.ThermalData.ThermalStatus @@ -428,9 +427,6 @@ def manager_thread(): # now loop thermal_sock = messaging.sub_sock('thermal') - if os.getenv("GET_CPU_USAGE"): - proc_sock = messaging.sub_sock('procLog', conflate=True) - cloudlog.info("manager start") cloudlog.info({"environ": os.environ}) @@ -461,9 +457,6 @@ def manager_thread(): logger_dead = False - start_t = time.time() - first_proc = None - while 1: msg = messaging.recv_sock(thermal_sock, wait=True) @@ -504,26 +497,6 @@ def manager_thread(): if params.get("DoUninstall", encoding='utf8') == "1": break - if os.getenv("GET_CPU_USAGE"): - dt = time.time() - start_t - - # Get first sample - if dt > 30 and first_proc is None: - first_proc = messaging.recv_sock(proc_sock) - - # Get last sample and exit - if dt > 90: - last_proc = messaging.recv_sock(proc_sock, wait=True) - - all_running = all(running[p].is_alive() for p in car_started_processes) - - cleanup_all_processes(None, None) - return_code = print_cpu_usage(first_proc, last_proc) - - if not all_running: - return_code = 1 - sys.exit(return_code) - def manager_prepare(spinner=None): # build all processes os.chdir(os.path.dirname(os.path.abspath(__file__))) diff --git a/selfdrive/test/test_cpu_usage.py b/selfdrive/test/test_cpu_usage.py new file mode 100755 index 0000000000..0ed651ad4b --- /dev/null +++ b/selfdrive/test/test_cpu_usage.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +import time +import threading +import _thread +import signal +import sys + +import cereal.messaging as messaging +import selfdrive.manager as manager + + +def cputime_total(ct): + return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem + + +def print_cpu_usage(first_proc, last_proc): + procs = [ + ("selfdrive.controls.controlsd", 59.46), + ("selfdrive.locationd.locationd", 34.38), + ("./loggerd", 28.49), + ("selfdrive.controls.plannerd", 19.77), + ("./_modeld", 12.74), + ("selfdrive.locationd.paramsd", 11.53), + ("selfdrive.controls.radard", 9.54), + ("./_ui", 9.54), + ("./camerad", 7.07), + ("selfdrive.locationd.calibrationd", 6.81), + ("./_sensord", 6.17), + ("selfdrive.monitoring.dmonitoringd", 5.48), + ("./boardd", 3.63), + ("./_dmonitoringmodeld", 2.67), + ("selfdrive.logmessaged", 2.71), + ("selfdrive.thermald.thermald", 2.41), + ("./proclogd", 1.54), + ("./_gpsd", 0.09), + ("./clocksd", 0.02), + ("./ubloxd", 0.02), + ("selfdrive.tombstoned", 0), + ("./logcatd", 0), + ] + + r = 0 + dt = (last_proc.logMonoTime - first_proc.logMonoTime) / 1e9 + result = "------------------------------------------------\n" + for proc_name, normal_cpu_usage in procs: + try: + first = [p for p in first_proc.procLog.procs if proc_name in p.cmdline][0] + last = [p for p in last_proc.procLog.procs if proc_name in p.cmdline][0] + cpu_time = cputime_total(last) - cputime_total(first) + cpu_usage = cpu_time / dt * 100. + if cpu_usage > max(normal_cpu_usage * 1.1, normal_cpu_usage + 5.0): + result += f"Warning {proc_name} using more CPU than normal\n" + r = 1 + elif cpu_usage < min(normal_cpu_usage * 0.3, max(normal_cpu_usage - 1.0, 0.0)): + result += f"Warning {proc_name} using less CPU than normal\n" + r = 1 + result += f"{proc_name.ljust(35)} {cpu_usage:.2f}%\n" + except IndexError: + result += f"{proc_name.ljust(35)} NO METRICS FOUND\n" + r = 1 + result += "------------------------------------------------\n" + print(result) + return r + +return_code = 1 +def test_thread(): + global return_code + proc_sock = messaging.sub_sock('procLog', conflate=True) + + # wait until everything's started and get first sample + time.sleep(30) + first_proc = messaging.recv_sock(proc_sock, wait=True) + + # run for a minute and get last sample + time.sleep(60) + last_proc = messaging.recv_sock(proc_sock, wait=True) + + running = manager.get_running() + all_running = all(p in running and running[p].is_alive() for p in manager.car_started_processes) + return_code = print_cpu_usage(first_proc, last_proc) + if not all_running: + return_code = 1 + _thread.interrupt_main() + +if __name__ == "__main__": + + # setup signal handler to exit with test status + def handle_exit(sig, frame): + sys.exit(return_code) + signal.signal(signal.SIGINT, handle_exit) + + # start manager and test thread + t = threading.Thread(target=test_thread) + t.daemon = True + t.start() + manager.main()