From bdf52941c8808f340b07bdcf6d672bdddbb3eecd Mon Sep 17 00:00:00 2001 From: Adeeb Shihadeh Date: Wed, 16 Sep 2020 15:33:12 -0700 Subject: [PATCH] Improve realtime performance on NEOS (#2166) * fix setting core affinity * dmonitoringd doesn't need rt priority * android only gets two cores * model and planner get core 2 * log missed cycle count * neos update * prod image * revert NEOS changes * still need this old-commit-hash: ef21b83937d25b824018ac1d07fa7759ae6f8128 --- common/realtime.py | 41 +++++++++++----------------- launch_chffrplus.sh | 16 +++++------ selfdrive/boardd/boardd.cc | 2 +- selfdrive/camerad/main.cc | 3 ++ selfdrive/controls/controlsd.py | 7 ++--- selfdrive/controls/plannerd.py | 8 ++---- selfdrive/controls/radard.py | 4 +-- selfdrive/modeld/modeld.cc | 4 ++- selfdrive/monitoring/dmonitoringd.py | 6 ---- selfdrive/test/test_cpu_usage.py | 2 +- 10 files changed, 38 insertions(+), 55 deletions(-) diff --git a/common/realtime.py b/common/realtime.py index 99ba19e293..4326eab139 100644 --- a/common/realtime.py +++ b/common/realtime.py @@ -1,9 +1,8 @@ """Utilities for reading real time clocks and keeping soft real time constraints.""" +import gc +import os import time -import platform -import subprocess import multiprocessing -from cffi import FFI from common.hardware import PC from common.common_pyx import sec_since_boot # pylint: disable=no-name-in-module, import-error @@ -16,34 +15,26 @@ DT_DMON = 0.1 # driver monitoring DT_TRML = 0.5 # thermald and manager -ffi = FFI() -ffi.cdef("long syscall(long number, ...);") -libc = ffi.dlopen(None) - - -def _get_tid(): - if platform.machine() == "x86_64": - NR_gettid = 186 - elif platform.machine() == "aarch64": - NR_gettid = 178 - else: - raise NotImplementedError - - return libc.syscall(NR_gettid) +class Priority: + MIN_REALTIME = 52 # highest android process priority is 51 + CTRL_LOW = MIN_REALTIME + CTRL_HIGH = MIN_REALTIME + 1 def set_realtime_priority(level): - if PC: - return -1 - else: - return subprocess.call(['chrt', '-f', '-p', str(level), str(_get_tid())]) + if not PC: + os.sched_setscheduler(0, os.SCHED_FIFO, os.sched_param(level)) def set_core_affinity(core): - if PC: - return -1 - else: - return subprocess.call(['taskset', '-p', str(core), str(_get_tid())]) + if not PC: + os.sched_setaffinity(0, [core,]) + + +def config_rt_process(core, priority): + gc.disable() + set_core_affinity(core) + set_realtime_priority(priority) class Ratekeeper(): diff --git a/launch_chffrplus.sh b/launch_chffrplus.sh index 09f18c5dc2..7124ef3c4f 100755 --- a/launch_chffrplus.sh +++ b/launch_chffrplus.sh @@ -9,13 +9,14 @@ source "$BASEDIR/launch_env.sh" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" function two_init { - # Android and other system processes are not permitted to run on CPU 3 - # NEOS installed app processes can run anywhere - echo 0-2 > /dev/cpuset/background/cpus - echo 0-2 > /dev/cpuset/system-background/cpus - [ -d "/dev/cpuset/foreground/boost/cpus" ] && echo 0-2 > /dev/cpuset/foreground/boost/cpus # Not present in < NEOS 15 - echo 0-2 > /dev/cpuset/foreground/cpus - echo 0-2 > /dev/cpuset/android/cpus + # Restrict Android and other system processes to the first two cores + echo 0-1 > /dev/cpuset/background/cpus + echo 0-1 > /dev/cpuset/system-background/cpus + echo 0-1 > /dev/cpuset/foreground/boost/cpus + echo 0-1 > /dev/cpuset/foreground/cpus + echo 0-1 > /dev/cpuset/android/cpus + + # openpilot gets all the cores echo 0-3 > /dev/cpuset/app/cpus # Collect RIL and other possibly long-running I/O interrupts onto CPU 1 @@ -52,7 +53,6 @@ function two_init { # Remove and regenerate qcom sensor registry. Only done on OP3T mainboards. # Performed exactly once. The old registry is preserved just-in-case, and # doubles as a flag denoting we've already done the reset. - # TODO: we should really grow per-platform detect and setup routines if ! $(grep -q "letv" /proc/cmdline) && [ ! -f "/persist/comma/op3t-sns-reg-backup" ]; then echo "Performing OP3T sensor registry reset" mv /persist/sensors/sns.reg /persist/comma/op3t-sns-reg-backup && diff --git a/selfdrive/boardd/boardd.cc b/selfdrive/boardd/boardd.cc index 55f9a5afd5..9823ad9850 100644 --- a/selfdrive/boardd/boardd.cc +++ b/selfdrive/boardd/boardd.cc @@ -254,7 +254,7 @@ void can_recv_thread() { useconds_t sleep = remaining / 1000; usleep(sleep); } else { - LOGW("missed cycle"); + LOGW("missed cycles (%d) %lld", (int)-1*remaining/dt, remaining); next_frame_time = cur_time; } diff --git a/selfdrive/camerad/main.cc b/selfdrive/camerad/main.cc index 87a18ebe3d..549fde0251 100644 --- a/selfdrive/camerad/main.cc +++ b/selfdrive/camerad/main.cc @@ -1589,6 +1589,9 @@ void party(VisionState *s) { int main(int argc, char *argv[]) { set_realtime_priority(51); +#ifdef QCOM + set_core_affinity(2); +#endif zsys_handler_set(NULL); signal(SIGINT, (sighandler_t)set_do_exit); diff --git a/selfdrive/controls/controlsd.py b/selfdrive/controls/controlsd.py index 7b0f54c0db..53374ec4fd 100755 --- a/selfdrive/controls/controlsd.py +++ b/selfdrive/controls/controlsd.py @@ -1,10 +1,9 @@ #!/usr/bin/env python3 import os -import gc from cereal import car, log from common.hardware import HARDWARE from common.numpy_fast import clip -from common.realtime import sec_since_boot, set_realtime_priority, set_core_affinity, Ratekeeper, DT_CTRL +from common.realtime import sec_since_boot, config_rt_process, Priority, Ratekeeper, DT_CTRL from common.profiler import Profiler from common.params import Params, put_nonblocking import cereal.messaging as messaging @@ -43,9 +42,7 @@ EventName = car.CarEvent.EventName class Controls: def __init__(self, sm=None, pm=None, can_sock=None): - gc.disable() - set_realtime_priority(53) - set_core_affinity(3) + config_rt_process(3, Priority.CTRL_HIGH) # Setup sockets self.pm = pm diff --git a/selfdrive/controls/plannerd.py b/selfdrive/controls/plannerd.py index 4a8abcfd5b..44dd1ec621 100755 --- a/selfdrive/controls/plannerd.py +++ b/selfdrive/controls/plannerd.py @@ -1,9 +1,7 @@ #!/usr/bin/env python3 -import gc - from cereal import car from common.params import Params -from common.realtime import set_realtime_priority +from common.realtime import Priority, config_rt_process from selfdrive.swaglog import cloudlog from selfdrive.controls.lib.planner import Planner from selfdrive.controls.lib.vehicle_model import VehicleModel @@ -12,10 +10,8 @@ import cereal.messaging as messaging def plannerd_thread(sm=None, pm=None): - gc.disable() - # start the loop - set_realtime_priority(52) + config_rt_process(2, Priority.CTRL_LOW) cloudlog.info("plannerd is waiting for CarParams") CP = car.CarParams.from_bytes(Params().get("CarParams", block=True)) diff --git a/selfdrive/controls/radard.py b/selfdrive/controls/radard.py index 0ba14dcf26..808b04c340 100755 --- a/selfdrive/controls/radard.py +++ b/selfdrive/controls/radard.py @@ -7,7 +7,7 @@ import cereal.messaging as messaging from cereal import car from common.numpy_fast import interp from common.params import Params -from common.realtime import Ratekeeper, set_realtime_priority +from common.realtime import Ratekeeper, Priority, set_realtime_priority from selfdrive.config import RADAR_TO_CAMERA from selfdrive.controls.lib.cluster.fastcluster_py import cluster_points_centroid from selfdrive.controls.lib.radar_helpers import Cluster, Track @@ -174,7 +174,7 @@ class RadarD(): # fuses camera and radar data for best lead detection def radard_thread(sm=None, pm=None, can_sock=None): - set_realtime_priority(52) + set_realtime_priority(Priority.CTRL_LOW) # wait for stats about the car to come in from controls cloudlog.info("radard is waiting for CarParams") diff --git a/selfdrive/modeld/modeld.cc b/selfdrive/modeld/modeld.cc index 7369ed47ee..0f1c22c342 100644 --- a/selfdrive/modeld/modeld.cc +++ b/selfdrive/modeld/modeld.cc @@ -92,7 +92,9 @@ int main(int argc, char **argv) { int err; set_realtime_priority(51); -#ifdef QCOM2 +#ifdef QCOM + set_core_affinity(2); +#elif QCOM2 // CPU usage is much lower when pinned to a single big core set_core_affinity(4); #endif diff --git a/selfdrive/monitoring/dmonitoringd.py b/selfdrive/monitoring/dmonitoringd.py index e492ea7a04..c8d6d15873 100755 --- a/selfdrive/monitoring/dmonitoringd.py +++ b/selfdrive/monitoring/dmonitoringd.py @@ -1,7 +1,5 @@ #!/usr/bin/env python3 -import gc from cereal import car -from common.realtime import set_realtime_priority from common.params import Params import cereal.messaging as messaging from selfdrive.controls.lib.events import Events @@ -10,10 +8,6 @@ from selfdrive.locationd.calibrationd import Calibration def dmonitoringd_thread(sm=None, pm=None): - gc.disable() - set_realtime_priority(53) - - # Pub/Sub Sockets if pm is None: pm = messaging.PubMaster(['dMonitoringState']) diff --git a/selfdrive/test/test_cpu_usage.py b/selfdrive/test/test_cpu_usage.py index 4fffc0d1d5..0e06e0ab4d 100755 --- a/selfdrive/test/test_cpu_usage.py +++ b/selfdrive/test/test_cpu_usage.py @@ -15,7 +15,7 @@ def cputime_total(ct): def print_cpu_usage(first_proc, last_proc): procs = [ - ("selfdrive.controls.controlsd", 66.15), + ("selfdrive.controls.controlsd", 45.0), ("./loggerd", 33.90), ("selfdrive.locationd.locationd", 29.5), ("selfdrive.controls.plannerd", 11.84),