From 5af1099fbfe503bdc50dbfa734d3cd83bf754a39 Mon Sep 17 00:00:00 2001 From: Adeeb Shihadeh Date: Wed, 22 Oct 2025 15:36:09 -0700 Subject: [PATCH] rm watchdog (#36425) --- common/SConscript | 1 - common/watchdog.cc | 12 --------- common/watchdog.h | 5 ---- common/watchdog.py | 22 ----------------- selfdrive/ui/qt/offroad/settings.cc | 2 -- selfdrive/ui/ui.cc | 4 --- system/manager/process.py | 38 ++--------------------------- system/manager/process_config.py | 2 +- 8 files changed, 3 insertions(+), 83 deletions(-) delete mode 100644 common/watchdog.cc delete mode 100644 common/watchdog.h delete mode 100644 common/watchdog.py diff --git a/common/SConscript b/common/SConscript index 0891b79039..c771ee78b7 100644 --- a/common/SConscript +++ b/common/SConscript @@ -4,7 +4,6 @@ common_libs = [ 'params.cc', 'swaglog.cc', 'util.cc', - 'watchdog.cc', 'ratekeeper.cc', 'clutil.cc', ] diff --git a/common/watchdog.cc b/common/watchdog.cc deleted file mode 100644 index 44e8c83e6d..0000000000 --- a/common/watchdog.cc +++ /dev/null @@ -1,12 +0,0 @@ -#include - -#include "common/watchdog.h" -#include "common/util.h" -#include "system/hardware/hw.h" - -const std::string watchdog_fn_prefix = Path::shm_path() + "/wd_"; // + - -bool watchdog_kick(uint64_t ts) { - static std::string fn = watchdog_fn_prefix + std::to_string(getpid()); - return util::write_file(fn.c_str(), &ts, sizeof(ts), O_WRONLY | O_CREAT) > 0; -} diff --git a/common/watchdog.h b/common/watchdog.h deleted file mode 100644 index 12dd2ca035..0000000000 --- a/common/watchdog.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#include - -bool watchdog_kick(uint64_t ts); diff --git a/common/watchdog.py b/common/watchdog.py deleted file mode 100644 index ddb6f744e9..0000000000 --- a/common/watchdog.py +++ /dev/null @@ -1,22 +0,0 @@ -import os -import time -import struct -from openpilot.system.hardware.hw import Paths - -WATCHDOG_FN = f"{Paths.shm_path()}/wd_" -_LAST_KICK = 0.0 - -def kick_watchdog(): - global _LAST_KICK - current_time = time.monotonic() - - if current_time - _LAST_KICK < 1.0: - return - - try: - with open(f"{WATCHDOG_FN}{os.getpid()}", 'wb') as f: - f.write(struct.pack(' -#include "common/watchdog.h" #include "common/util.h" #include "selfdrive/ui/qt/network/networking.h" #include "selfdrive/ui/qt/offroad/settings.h" @@ -270,7 +269,6 @@ DevicePanel::DevicePanel(SettingsWindow *parent) : ListWidget(parent) { // put language setting, exit Qt UI, and trigger fast restart params.put("LanguageSetting", langs[selection].toStdString()); qApp->exit(18); - watchdog_kick(0); } }); addItem(translateBtn); diff --git a/selfdrive/ui/ui.cc b/selfdrive/ui/ui.cc index 9ec61b9b81..ed851a41c8 100644 --- a/selfdrive/ui/ui.cc +++ b/selfdrive/ui/ui.cc @@ -8,7 +8,6 @@ #include "common/transformations/orientation.hpp" #include "common/swaglog.h" #include "common/util.h" -#include "common/watchdog.h" #include "system/hardware/hw.h" #define BACKLIGHT_DT 0.05 @@ -116,9 +115,6 @@ void UIState::update() { update_state(this); updateStatus(); - if (sm->frame % UI_FREQ == 0) { - watchdog_kick(nanos_since_boot()); - } emit uiUpdate(*this); } diff --git a/system/manager/process.py b/system/manager/process.py index 5e86e87c76..e6b6a44c40 100644 --- a/system/manager/process.py +++ b/system/manager/process.py @@ -1,7 +1,6 @@ import importlib import os import signal -import struct import time import subprocess from collections.abc import Callable, ValuesView @@ -16,9 +15,6 @@ import openpilot.system.sentry as sentry from openpilot.common.basedir import BASEDIR from openpilot.common.params import Params from openpilot.common.swaglog import cloudlog -from openpilot.common.watchdog import WATCHDOG_FN - -ENABLE_WATCHDOG = os.getenv("NO_WATCHDOG") is None def launcher(proc: str, name: str) -> None: @@ -70,10 +66,6 @@ class ManagerProcess(ABC): proc: Process | None = None enabled = True name = "" - - last_watchdog_time = 0 - watchdog_max_dt: int | None = None - watchdog_seen = False shutting_down = False @abstractmethod @@ -88,26 +80,6 @@ class ManagerProcess(ABC): self.stop(sig=signal.SIGKILL) self.start() - def check_watchdog(self, started: bool) -> None: - if self.watchdog_max_dt is None or self.proc is None: - return - - try: - fn = WATCHDOG_FN + str(self.proc.pid) - with open(fn, "rb") as f: - self.last_watchdog_time = struct.unpack('Q', f.read())[0] - except Exception: - pass - - dt = time.monotonic() - self.last_watchdog_time / 1e9 - - if dt > self.watchdog_max_dt: - if self.watchdog_seen and ENABLE_WATCHDOG: - cloudlog.error(f"Watchdog timeout for {self.name} (exitcode {self.proc.exitcode}) restarting ({started=})") - self.restart() - else: - self.watchdog_seen = True - def stop(self, retry: bool = True, block: bool = True, sig: signal.Signals = None) -> int | None: if self.proc is None: return None @@ -167,14 +139,13 @@ class ManagerProcess(ABC): class NativeProcess(ManagerProcess): - def __init__(self, name, cwd, cmdline, should_run, enabled=True, sigkill=False, watchdog_max_dt=None): + def __init__(self, name, cwd, cmdline, should_run, enabled=True, sigkill=False): self.name = name self.cwd = cwd self.cmdline = cmdline self.should_run = should_run self.enabled = enabled self.sigkill = sigkill - self.watchdog_max_dt = watchdog_max_dt self.launcher = nativelauncher def prepare(self) -> None: @@ -192,18 +163,16 @@ class NativeProcess(ManagerProcess): cloudlog.info(f"starting process {self.name}") self.proc = Process(name=self.name, target=self.launcher, args=(self.cmdline, cwd, self.name)) self.proc.start() - self.watchdog_seen = False self.shutting_down = False class PythonProcess(ManagerProcess): - def __init__(self, name, module, should_run, enabled=True, sigkill=False, watchdog_max_dt=None): + def __init__(self, name, module, should_run, enabled=True, sigkill=False): self.name = name self.module = module self.should_run = should_run self.enabled = enabled self.sigkill = sigkill - self.watchdog_max_dt = watchdog_max_dt self.launcher = launcher def prepare(self) -> None: @@ -226,7 +195,6 @@ class PythonProcess(ManagerProcess): cloudlog.info(f"starting python {self.module}") self.proc = Process(name=name, target=self.launcher, args=(self.module, self.name)) self.proc.start() - self.watchdog_seen = False self.shutting_down = False @@ -288,8 +256,6 @@ def ensure_running(procs: ValuesView[ManagerProcess], started: bool, params=None else: p.stop(block=False) - p.check_watchdog(started) - for p in running: p.start() diff --git a/system/manager/process_config.py b/system/manager/process_config.py index 0c35a3d3c9..5c02227ca5 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -80,7 +80,7 @@ procs = [ PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(WEBCAM or not PC)), PythonProcess("sensord", "system.sensord.sensord", only_onroad, enabled=not PC), - # NativeProcess("ui", "selfdrive/ui", ["./ui"], always_run, enabled=False, watchdog_max_dt=(5 if not PC else None)), + # NativeProcess("ui", "selfdrive/ui", ["./ui"], always_run, enabled=False), PythonProcess("ui", "selfdrive.ui.ui", always_run), PythonProcess("soundd", "selfdrive.ui.soundd", only_onroad), PythonProcess("locationd", "selfdrive.locationd.locationd", only_onroad),