diff --git a/cereal b/cereal index 29f4fe89ef..a057aed167 160000 --- a/cereal +++ b/cereal @@ -1 +1 @@ -Subproject commit 29f4fe89ef710ff86a5aeb998a357187d0619fb8 +Subproject commit a057aed16747d0e414145d83d4861c50315781ad diff --git a/common/realtime.py b/common/realtime.py index 4a37efadab..6ef27ed3a3 100644 --- a/common/realtime.py +++ b/common/realtime.py @@ -2,6 +2,7 @@ import gc import os import time +from collections import deque from typing import Optional, List, Union from setproctitle import getproctitle # pylint: disable=no-name-in-module @@ -59,6 +60,8 @@ class Ratekeeper: self._frame = 0 self._remaining = 0.0 self._process_name = getproctitle() + self._dts = deque([self._interval], maxlen=100) + self._last_monitor_time = sec_since_boot() @property def frame(self) -> int: @@ -68,6 +71,12 @@ class Ratekeeper: def remaining(self) -> float: return self._remaining + @property + def lagging(self) -> bool: + avg_dt = sum(self._dts) / len(self._dts) + expected_dt = self._interval * (1 / 0.9) + return avg_dt > expected_dt + # Maintain loop rate by calling this at the end of each loop def keep_time(self) -> bool: lagged = self.monitor_time() @@ -77,6 +86,10 @@ class Ratekeeper: # this only monitor the cumulative lag, but does not enforce a rate def monitor_time(self) -> bool: + prev = self._last_monitor_time + self._last_monitor_time = sec_since_boot() + self._dts.append(self._last_monitor_time - prev) + lagged = False remaining = self._next_frame_time - sec_since_boot() self._next_frame_time += self._interval diff --git a/selfdrive/controls/controlsd.py b/selfdrive/controls/controlsd.py index ffc52cb3e2..958de45995 100755 --- a/selfdrive/controls/controlsd.py +++ b/selfdrive/controls/controlsd.py @@ -285,9 +285,11 @@ class Controls: self.events.add(EventName.relayMalfunction) # Check for HW or system issues - if len(self.sm['radarState'].radarErrors): + if self.rk.lagging: + self.events.add(EventName.controlsdLagging) + elif len(self.sm['radarState'].radarErrors): self.events.add(EventName.radarFault) - elif not self.sm.valid["pandaStates"]: + elif not self.sm.valid['pandaStates']: self.events.add(EventName.usbError) elif not self.sm.all_checks() or self.can_rcv_error: @@ -352,10 +354,11 @@ class Controls: # Not show in first 1 km to allow for driving out of garage. This event shows after 5 minutes self.events.add(EventName.noGps) - if not self.sm.all_alive(self.camera_packets): - self.events.add(EventName.cameraMalfunction) - elif not self.sm.all_freq_ok(self.camera_packets): - self.events.add(EventName.cameraFrameRate) + if not self.rk.lagging: + if not self.sm.all_alive(self.camera_packets): + self.events.add(EventName.cameraMalfunction) + elif not self.sm.all_freq_ok(self.camera_packets): + self.events.add(EventName.cameraFrameRate) if self.sm['modelV2'].frameDropPerc > 20: self.events.add(EventName.modeldLagging) diff --git a/selfdrive/controls/lib/events.py b/selfdrive/controls/lib/events.py index 19620cbf4b..12a42a7e3c 100644 --- a/selfdrive/controls/lib/events.py +++ b/selfdrive/controls/lib/events.py @@ -695,6 +695,11 @@ EVENTS: Dict[int, Dict[str, Union[Alert, AlertCallbackType]]] = { ET.NO_ENTRY: NoEntryAlert("Low Communication Rate between Processes"), }, + EventName.controlsdLagging: { + ET.SOFT_DISABLE: soft_disable_alert("Controls Lagging"), + ET.NO_ENTRY: NoEntryAlert("Controls Process Lagging: Reboot Your Device"), + }, + # Thrown when manager detects a service exited unexpectedly while driving EventName.processNotRunning: { ET.NO_ENTRY: NoEntryAlert("System Malfunction: Reboot Your Device"),