Improve realtime performance on NEOS (#2166)

* fix setting core affinity

* dmonitoringd doesn't need rt priority

* android only gets two cores

* model and planner get core 2

* log missed cycle count

* neos update

* prod image

* revert NEOS changes

* still need this
pull/2193/head
Adeeb Shihadeh 5 years ago committed by GitHub
parent d559678c4e
commit ef21b83937
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 41
      common/realtime.py
  2. 16
      launch_chffrplus.sh
  3. 2
      selfdrive/boardd/boardd.cc
  4. 3
      selfdrive/camerad/main.cc
  5. 7
      selfdrive/controls/controlsd.py
  6. 8
      selfdrive/controls/plannerd.py
  7. 4
      selfdrive/controls/radard.py
  8. 4
      selfdrive/modeld/modeld.cc
  9. 6
      selfdrive/monitoring/dmonitoringd.py
  10. 2
      selfdrive/test/test_cpu_usage.py

@ -1,9 +1,8 @@
"""Utilities for reading real time clocks and keeping soft real time constraints.""" """Utilities for reading real time clocks and keeping soft real time constraints."""
import gc
import os
import time import time
import platform
import subprocess
import multiprocessing import multiprocessing
from cffi import FFI
from common.hardware import PC from common.hardware import PC
from common.common_pyx import sec_since_boot # pylint: disable=no-name-in-module, import-error from common.common_pyx import sec_since_boot # pylint: disable=no-name-in-module, import-error
@ -16,34 +15,26 @@ DT_DMON = 0.1 # driver monitoring
DT_TRML = 0.5 # thermald and manager DT_TRML = 0.5 # thermald and manager
ffi = FFI() class Priority:
ffi.cdef("long syscall(long number, ...);") MIN_REALTIME = 52 # highest android process priority is 51
libc = ffi.dlopen(None) CTRL_LOW = MIN_REALTIME
CTRL_HIGH = MIN_REALTIME + 1
def _get_tid():
if platform.machine() == "x86_64":
NR_gettid = 186
elif platform.machine() == "aarch64":
NR_gettid = 178
else:
raise NotImplementedError
return libc.syscall(NR_gettid)
def set_realtime_priority(level): def set_realtime_priority(level):
if PC: if not PC:
return -1 os.sched_setscheduler(0, os.SCHED_FIFO, os.sched_param(level))
else:
return subprocess.call(['chrt', '-f', '-p', str(level), str(_get_tid())])
def set_core_affinity(core): def set_core_affinity(core):
if PC: if not PC:
return -1 os.sched_setaffinity(0, [core,])
else:
return subprocess.call(['taskset', '-p', str(core), str(_get_tid())])
def config_rt_process(core, priority):
gc.disable()
set_core_affinity(core)
set_realtime_priority(priority)
class Ratekeeper(): class Ratekeeper():

@ -9,13 +9,14 @@ source "$BASEDIR/launch_env.sh"
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
function two_init { function two_init {
# Android and other system processes are not permitted to run on CPU 3 # Restrict Android and other system processes to the first two cores
# NEOS installed app processes can run anywhere echo 0-1 > /dev/cpuset/background/cpus
echo 0-2 > /dev/cpuset/background/cpus echo 0-1 > /dev/cpuset/system-background/cpus
echo 0-2 > /dev/cpuset/system-background/cpus echo 0-1 > /dev/cpuset/foreground/boost/cpus
[ -d "/dev/cpuset/foreground/boost/cpus" ] && echo 0-2 > /dev/cpuset/foreground/boost/cpus # Not present in < NEOS 15 echo 0-1 > /dev/cpuset/foreground/cpus
echo 0-2 > /dev/cpuset/foreground/cpus echo 0-1 > /dev/cpuset/android/cpus
echo 0-2 > /dev/cpuset/android/cpus
# openpilot gets all the cores
echo 0-3 > /dev/cpuset/app/cpus echo 0-3 > /dev/cpuset/app/cpus
# Collect RIL and other possibly long-running I/O interrupts onto CPU 1 # Collect RIL and other possibly long-running I/O interrupts onto CPU 1
@ -52,7 +53,6 @@ function two_init {
# Remove and regenerate qcom sensor registry. Only done on OP3T mainboards. # Remove and regenerate qcom sensor registry. Only done on OP3T mainboards.
# Performed exactly once. The old registry is preserved just-in-case, and # Performed exactly once. The old registry is preserved just-in-case, and
# doubles as a flag denoting we've already done the reset. # doubles as a flag denoting we've already done the reset.
# TODO: we should really grow per-platform detect and setup routines
if ! $(grep -q "letv" /proc/cmdline) && [ ! -f "/persist/comma/op3t-sns-reg-backup" ]; then if ! $(grep -q "letv" /proc/cmdline) && [ ! -f "/persist/comma/op3t-sns-reg-backup" ]; then
echo "Performing OP3T sensor registry reset" echo "Performing OP3T sensor registry reset"
mv /persist/sensors/sns.reg /persist/comma/op3t-sns-reg-backup && mv /persist/sensors/sns.reg /persist/comma/op3t-sns-reg-backup &&

@ -254,7 +254,7 @@ void can_recv_thread() {
useconds_t sleep = remaining / 1000; useconds_t sleep = remaining / 1000;
usleep(sleep); usleep(sleep);
} else { } else {
LOGW("missed cycle"); LOGW("missed cycles (%d) %lld", (int)-1*remaining/dt, remaining);
next_frame_time = cur_time; next_frame_time = cur_time;
} }

@ -1589,6 +1589,9 @@ void party(VisionState *s) {
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
set_realtime_priority(51); set_realtime_priority(51);
#ifdef QCOM
set_core_affinity(2);
#endif
zsys_handler_set(NULL); zsys_handler_set(NULL);
signal(SIGINT, (sighandler_t)set_do_exit); signal(SIGINT, (sighandler_t)set_do_exit);

@ -1,10 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os import os
import gc
from cereal import car, log from cereal import car, log
from common.hardware import HARDWARE from common.hardware import HARDWARE
from common.numpy_fast import clip from common.numpy_fast import clip
from common.realtime import sec_since_boot, set_realtime_priority, set_core_affinity, Ratekeeper, DT_CTRL from common.realtime import sec_since_boot, config_rt_process, Priority, Ratekeeper, DT_CTRL
from common.profiler import Profiler from common.profiler import Profiler
from common.params import Params, put_nonblocking from common.params import Params, put_nonblocking
import cereal.messaging as messaging import cereal.messaging as messaging
@ -43,9 +42,7 @@ EventName = car.CarEvent.EventName
class Controls: class Controls:
def __init__(self, sm=None, pm=None, can_sock=None): def __init__(self, sm=None, pm=None, can_sock=None):
gc.disable() config_rt_process(3, Priority.CTRL_HIGH)
set_realtime_priority(53)
set_core_affinity(3)
# Setup sockets # Setup sockets
self.pm = pm self.pm = pm

@ -1,9 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import gc
from cereal import car from cereal import car
from common.params import Params from common.params import Params
from common.realtime import set_realtime_priority from common.realtime import Priority, config_rt_process
from selfdrive.swaglog import cloudlog from selfdrive.swaglog import cloudlog
from selfdrive.controls.lib.planner import Planner from selfdrive.controls.lib.planner import Planner
from selfdrive.controls.lib.vehicle_model import VehicleModel from selfdrive.controls.lib.vehicle_model import VehicleModel
@ -12,10 +10,8 @@ import cereal.messaging as messaging
def plannerd_thread(sm=None, pm=None): def plannerd_thread(sm=None, pm=None):
gc.disable()
# start the loop config_rt_process(2, Priority.CTRL_LOW)
set_realtime_priority(52)
cloudlog.info("plannerd is waiting for CarParams") cloudlog.info("plannerd is waiting for CarParams")
CP = car.CarParams.from_bytes(Params().get("CarParams", block=True)) CP = car.CarParams.from_bytes(Params().get("CarParams", block=True))

@ -7,7 +7,7 @@ import cereal.messaging as messaging
from cereal import car from cereal import car
from common.numpy_fast import interp from common.numpy_fast import interp
from common.params import Params from common.params import Params
from common.realtime import Ratekeeper, set_realtime_priority from common.realtime import Ratekeeper, Priority, set_realtime_priority
from selfdrive.config import RADAR_TO_CAMERA from selfdrive.config import RADAR_TO_CAMERA
from selfdrive.controls.lib.cluster.fastcluster_py import cluster_points_centroid from selfdrive.controls.lib.cluster.fastcluster_py import cluster_points_centroid
from selfdrive.controls.lib.radar_helpers import Cluster, Track from selfdrive.controls.lib.radar_helpers import Cluster, Track
@ -174,7 +174,7 @@ class RadarD():
# fuses camera and radar data for best lead detection # fuses camera and radar data for best lead detection
def radard_thread(sm=None, pm=None, can_sock=None): def radard_thread(sm=None, pm=None, can_sock=None):
set_realtime_priority(52) set_realtime_priority(Priority.CTRL_LOW)
# wait for stats about the car to come in from controls # wait for stats about the car to come in from controls
cloudlog.info("radard is waiting for CarParams") cloudlog.info("radard is waiting for CarParams")

@ -92,7 +92,9 @@ int main(int argc, char **argv) {
int err; int err;
set_realtime_priority(51); set_realtime_priority(51);
#ifdef QCOM2 #ifdef QCOM
set_core_affinity(2);
#elif QCOM2
// CPU usage is much lower when pinned to a single big core // CPU usage is much lower when pinned to a single big core
set_core_affinity(4); set_core_affinity(4);
#endif #endif

@ -1,7 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import gc
from cereal import car from cereal import car
from common.realtime import set_realtime_priority
from common.params import Params from common.params import Params
import cereal.messaging as messaging import cereal.messaging as messaging
from selfdrive.controls.lib.events import Events from selfdrive.controls.lib.events import Events
@ -10,10 +8,6 @@ from selfdrive.locationd.calibrationd import Calibration
def dmonitoringd_thread(sm=None, pm=None): def dmonitoringd_thread(sm=None, pm=None):
gc.disable()
set_realtime_priority(53)
# Pub/Sub Sockets
if pm is None: if pm is None:
pm = messaging.PubMaster(['dMonitoringState']) pm = messaging.PubMaster(['dMonitoringState'])

@ -15,7 +15,7 @@ def cputime_total(ct):
def print_cpu_usage(first_proc, last_proc): def print_cpu_usage(first_proc, last_proc):
procs = [ procs = [
("selfdrive.controls.controlsd", 66.15), ("selfdrive.controls.controlsd", 45.0),
("./loggerd", 33.90), ("./loggerd", 33.90),
("selfdrive.locationd.locationd", 29.5), ("selfdrive.locationd.locationd", 29.5),
("selfdrive.controls.plannerd", 11.84), ("selfdrive.controls.plannerd", 11.84),

Loading…
Cancel
Save