From b6a220a133640c24fc596f6c3b181f5d0dd921a2 Mon Sep 17 00:00:00 2001 From: Willem Melching Date: Tue, 26 Jan 2021 14:49:55 +0100 Subject: [PATCH] manager.py broadcast process states (#19880) * broadcast managerstate * Remove boardd * show alert * only trigger on actually receiving a manager state * add managerstate to simulation exclude for longitudinal test * broadcast managerState in process replay * update alert text * bump cereal Co-authored-by: Adeeb Shihadeh --- cereal | 2 +- selfdrive/controls/controlsd.py | 10 ++++++++-- selfdrive/controls/lib/events.py | 5 +++++ selfdrive/manager.py | 17 ++++++++++++++++- selfdrive/test/process_replay/process_replay.py | 2 +- 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/cereal b/cereal index ab79999e5d..46f56a2977 160000 --- a/cereal +++ b/cereal @@ -1 +1 @@ -Subproject commit ab79999e5dc2de25c3eb0c9acbed47c9e81f5d79 +Subproject commit 46f56a29771e9a1d47c417d722e6f0e2fa33a01c diff --git a/selfdrive/controls/controlsd.py b/selfdrive/controls/controlsd.py index cc55764410..03d77dc6c6 100755 --- a/selfdrive/controls/controlsd.py +++ b/selfdrive/controls/controlsd.py @@ -29,6 +29,7 @@ STEER_ANGLE_SATURATION_THRESHOLD = 2.5 # Degrees SIMULATION = "SIMULATION" in os.environ NOSENSOR = "NOSENSOR" in os.environ +IGNORE_PROCESSES = set(["rtshield", "uploader", "deleter", "loggerd", "logmessaged", "tombstoned", "logcatd", "proclogd", "clocksd", "gpsd", "updated"]) ThermalStatus = log.ThermalData.ThermalStatus State = log.ControlsState.OpenpilotState @@ -52,10 +53,10 @@ class Controls: self.sm = sm if self.sm is None: - ignore = ['ubloxRaw', 'frontFrame'] if SIMULATION else None + ignore = ['ubloxRaw', 'frontFrame', 'managerState'] if SIMULATION else None self.sm = messaging.SubMaster(['thermal', 'health', 'modelV2', 'liveCalibration', 'ubloxRaw', 'dMonitoringState', 'plan', 'pathPlan', 'liveLocationKalman', - 'frame', 'frontFrame'], ignore_alive=ignore) + 'frame', 'frontFrame', 'managerState'], ignore_alive=ignore) self.can_sock = can_sock if can_sock is None: @@ -166,6 +167,11 @@ class Controls: if self.sm['thermal'].memUsedPercent > 90: self.events.add(EventName.lowMemory) + # Check if all manager processes are running + not_running = set(p.name for p in self.sm['managerState'].processes if not p.running) + if self.sm.rcv_frame['managerState'] and (not_running - IGNORE_PROCESSES): + self.events.add(EventName.processNotRunning) + # Alert if fan isn't spinning for 5 seconds if self.sm['health'].hwType in [HwType.uno, HwType.dos]: if self.sm['health'].fanSpeedRpm == 0 and self.sm['thermal'].fanSpeed > 50: diff --git a/selfdrive/controls/lib/events.py b/selfdrive/controls/lib/events.py index 2d6c0be68f..c16804aa82 100644 --- a/selfdrive/controls/lib/events.py +++ b/selfdrive/controls/lib/events.py @@ -615,6 +615,11 @@ EVENTS: Dict[int, Dict[str, Union[Alert, Callable[[Any, messaging.SubMaster, boo audible_alert=AudibleAlert.chimeDisengage), }, + EventName.processNotRunning: { + ET.NO_ENTRY: NoEntryAlert("System Malfunction: Reboot Your Device", + audible_alert=AudibleAlert.chimeDisengage), + }, + EventName.radarCommIssue: { ET.SOFT_DISABLE: SoftDisableAlert("Radar Communication Issue"), ET.NO_ENTRY: NoEntryAlert("Radar Communication Issue", diff --git a/selfdrive/manager.py b/selfdrive/manager.py index ab8ed37287..59eb53ec18 100755 --- a/selfdrive/manager.py +++ b/selfdrive/manager.py @@ -143,6 +143,7 @@ if __name__ == "__main__" and not PREBUILT: build() import cereal.messaging as messaging +from cereal import log from common.params import Params from selfdrive.registration import register @@ -165,7 +166,6 @@ managed_processes = { "tombstoned": "selfdrive.tombstoned", "logcatd": ("selfdrive/logcatd", ["./logcatd"]), "proclogd": ("selfdrive/proclogd", ["./proclogd"]), - "boardd": ("selfdrive/boardd", ["./boardd"]), # not used directly "pandad": "selfdrive.pandad", "ui": ("selfdrive/ui", ["./ui"]), "calibrationd": "selfdrive.locationd.calibrationd", @@ -462,6 +462,7 @@ def manager_thread(): logger_dead = False params = Params() thermal_sock = messaging.sub_sock('thermal') + pm = messaging.PubMaster(['managerState']) while 1: msg = messaging.recv_sock(thermal_sock, wait=True) @@ -501,6 +502,20 @@ def manager_thread(): running_list = ["%s%s\u001b[0m" % ("\u001b[32m" if running[p].is_alive() else "\u001b[31m", p) for p in running] cloudlog.debug(' '.join(running_list)) + # send managerState + states = [] + for p in managed_processes: + state = log.ManagerState.ProcessState.new_message() + state.name = p + if p in running: + state.running = running[p].is_alive() + state.pid = running[p].pid + state.exitCode = running[p].exitcode or 0 + states.append(state) + msg = messaging.new_message('managerState') + msg.managerState.processes = states + pm.send('managerState', msg) + # Exit main loop when uninstall is needed if params.get("DoUninstall", encoding='utf8') == "1": break diff --git a/selfdrive/test/process_replay/process_replay.py b/selfdrive/test/process_replay/process_replay.py index 316467d247..b85fdc538c 100755 --- a/selfdrive/test/process_replay/process_replay.py +++ b/selfdrive/test/process_replay/process_replay.py @@ -222,7 +222,7 @@ CONFIGS = [ pub_sub={ "can": ["controlsState", "carState", "carControl", "sendcan", "carEvents", "carParams"], "thermal": [], "health": [], "liveCalibration": [], "dMonitoringState": [], "plan": [], "pathPlan": [], "gpsLocation": [], "liveLocationKalman": [], - "modelV2": [], "frontFrame": [], "frame": [], "ubloxRaw": [], + "modelV2": [], "frontFrame": [], "frame": [], "ubloxRaw": [], "managerState": [], }, ignore=["logMonoTime", "valid", "controlsState.startMonoTime", "controlsState.cumLagMs"], init_callback=fingerprint,