controlsd: add lagging alert (#24360)

* controlsd: add lagging alert

* move those

* just this for now

* no camera malfunction
old-commit-hash: 3abbe827de
taco
Adeeb Shihadeh 3 years ago committed by GitHub
parent 32075d4681
commit fdf13d565b
  1. 2
      cereal
  2. 13
      common/realtime.py
  3. 15
      selfdrive/controls/controlsd.py
  4. 5
      selfdrive/controls/lib/events.py

@ -1 +1 @@
Subproject commit 29f4fe89ef710ff86a5aeb998a357187d0619fb8 Subproject commit a057aed16747d0e414145d83d4861c50315781ad

@ -2,6 +2,7 @@
import gc import gc
import os import os
import time import time
from collections import deque
from typing import Optional, List, Union from typing import Optional, List, Union
from setproctitle import getproctitle # pylint: disable=no-name-in-module from setproctitle import getproctitle # pylint: disable=no-name-in-module
@ -59,6 +60,8 @@ class Ratekeeper:
self._frame = 0 self._frame = 0
self._remaining = 0.0 self._remaining = 0.0
self._process_name = getproctitle() self._process_name = getproctitle()
self._dts = deque([self._interval], maxlen=100)
self._last_monitor_time = sec_since_boot()
@property @property
def frame(self) -> int: def frame(self) -> int:
@ -68,6 +71,12 @@ class Ratekeeper:
def remaining(self) -> float: def remaining(self) -> float:
return self._remaining return self._remaining
@property
def lagging(self) -> bool:
avg_dt = sum(self._dts) / len(self._dts)
expected_dt = self._interval * (1 / 0.9)
return avg_dt > expected_dt
# Maintain loop rate by calling this at the end of each loop # Maintain loop rate by calling this at the end of each loop
def keep_time(self) -> bool: def keep_time(self) -> bool:
lagged = self.monitor_time() lagged = self.monitor_time()
@ -77,6 +86,10 @@ class Ratekeeper:
# this only monitor the cumulative lag, but does not enforce a rate # this only monitor the cumulative lag, but does not enforce a rate
def monitor_time(self) -> bool: def monitor_time(self) -> bool:
prev = self._last_monitor_time
self._last_monitor_time = sec_since_boot()
self._dts.append(self._last_monitor_time - prev)
lagged = False lagged = False
remaining = self._next_frame_time - sec_since_boot() remaining = self._next_frame_time - sec_since_boot()
self._next_frame_time += self._interval self._next_frame_time += self._interval

@ -285,9 +285,11 @@ class Controls:
self.events.add(EventName.relayMalfunction) self.events.add(EventName.relayMalfunction)
# Check for HW or system issues # Check for HW or system issues
if len(self.sm['radarState'].radarErrors): if self.rk.lagging:
self.events.add(EventName.controlsdLagging)
elif len(self.sm['radarState'].radarErrors):
self.events.add(EventName.radarFault) self.events.add(EventName.radarFault)
elif not self.sm.valid["pandaStates"]: elif not self.sm.valid['pandaStates']:
self.events.add(EventName.usbError) self.events.add(EventName.usbError)
elif not self.sm.all_checks() or self.can_rcv_error: elif not self.sm.all_checks() or self.can_rcv_error:
@ -352,10 +354,11 @@ class Controls:
# Not show in first 1 km to allow for driving out of garage. This event shows after 5 minutes # Not show in first 1 km to allow for driving out of garage. This event shows after 5 minutes
self.events.add(EventName.noGps) self.events.add(EventName.noGps)
if not self.sm.all_alive(self.camera_packets): if not self.rk.lagging:
self.events.add(EventName.cameraMalfunction) if not self.sm.all_alive(self.camera_packets):
elif not self.sm.all_freq_ok(self.camera_packets): self.events.add(EventName.cameraMalfunction)
self.events.add(EventName.cameraFrameRate) elif not self.sm.all_freq_ok(self.camera_packets):
self.events.add(EventName.cameraFrameRate)
if self.sm['modelV2'].frameDropPerc > 20: if self.sm['modelV2'].frameDropPerc > 20:
self.events.add(EventName.modeldLagging) self.events.add(EventName.modeldLagging)

@ -695,6 +695,11 @@ EVENTS: Dict[int, Dict[str, Union[Alert, AlertCallbackType]]] = {
ET.NO_ENTRY: NoEntryAlert("Low Communication Rate between Processes"), ET.NO_ENTRY: NoEntryAlert("Low Communication Rate between Processes"),
}, },
EventName.controlsdLagging: {
ET.SOFT_DISABLE: soft_disable_alert("Controls Lagging"),
ET.NO_ENTRY: NoEntryAlert("Controls Process Lagging: Reboot Your Device"),
},
# Thrown when manager detects a service exited unexpectedly while driving # Thrown when manager detects a service exited unexpectedly while driving
EventName.processNotRunning: { EventName.processNotRunning: {
ET.NO_ENTRY: NoEntryAlert("System Malfunction: Reboot Your Device"), ET.NO_ENTRY: NoEntryAlert("System Malfunction: Reboot Your Device"),

Loading…
Cancel
Save