android health daemon (#21965)

* androidd

* three strikes

* better logging

* persistent

* add to release files

* cleanup

* no cpu usage

* colon

Co-authored-by: Comma Device <device@comma.ai>
pull/21988/head
Adeeb Shihadeh 4 years ago committed by GitHub
parent f7bd264db8
commit c8ca56dddd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      release/files_common
  2. 78
      selfdrive/hardware/eon/androidd.py
  3. 5
      selfdrive/manager/process_config.py
  4. 7
      selfdrive/test/test_onroad.py

@ -275,6 +275,7 @@ selfdrive/hardware/hw.h
selfdrive/hardware/eon/__init__.py selfdrive/hardware/eon/__init__.py
selfdrive/hardware/eon/hardware.h selfdrive/hardware/eon/hardware.h
selfdrive/hardware/eon/hardware.py selfdrive/hardware/eon/hardware.py
selfdrive/hardware/eon/androidd.py
selfdrive/hardware/tici/__init__.py selfdrive/hardware/tici/__init__.py
selfdrive/hardware/tici/hardware.py selfdrive/hardware/tici/hardware.py
selfdrive/hardware/tici/amplifier.py selfdrive/hardware/tici/amplifier.py

@ -0,0 +1,78 @@
#!/usr/bin/env python3
import os
import time
import psutil
from typing import Optional
from common.realtime import set_core_affinity, set_realtime_priority
from selfdrive.swaglog import cloudlog
MAX_MODEM_CRASHES = 3
MODEM_PATH = "/sys/devices/soc/2080000.qcom,mss/subsys5"
WATCHED_PROCS = ["zygote", "zygote64", "/system/bin/servicemanager", "/system/bin/surfaceflinger"]
def get_modem_crash_count() -> Optional[int]:
try:
with open(os.path.join(MODEM_PATH, "crash_count")) as f:
return int(f.read())
except Exception:
cloudlog.exception("Error reading modem crash count")
return None
def get_modem_state() -> str:
try:
with open(os.path.join(MODEM_PATH, "state")) as f:
return f.read().strip()
except Exception:
cloudlog.exception("Error reading modem state")
return ""
def main():
set_core_affinity(1)
set_realtime_priority(1)
procs = {}
crash_count = 0
modem_killed = False
modem_state = "ONLINE"
while True:
# check critical android services
if any(p is None or not p.is_running() for p in procs.values()) or not len(procs):
cur = {p: None for p in WATCHED_PROCS}
for p in psutil.process_iter(attrs=['cmdline']):
cmdline = None if not len(p.info['cmdline']) else p.info['cmdline'][0]
if cmdline in WATCHED_PROCS:
cur[cmdline] = p
if len(procs):
for p in WATCHED_PROCS:
if cur[p] != procs[p]:
cloudlog.event("android service pid changed", proc=p, cur=cur[p], prev=procs[p])
procs.update(cur)
# check modem state
state = get_modem_state()
if state != modem_state and not modem_killed:
cloudlog.event("modem state changed", state=state)
modem_state = state
# check modem crashes
cnt = get_modem_crash_count()
if cnt is not None:
if cnt > crash_count:
cloudlog.event("modem crash", count=cnt)
crash_count = cnt
# handle excessive modem crashes
if crash_count > MAX_MODEM_CRASHES and not modem_killed:
cloudlog.event("killing modem")
with open("/sys/kernel/debug/msm_subsys/modem", "w") as f:
f.write("put")
modem_killed = True
time.sleep(1)
if __name__ == "__main__":
main()

@ -30,12 +30,15 @@ procs = [
PythonProcess("paramsd", "selfdrive.locationd.paramsd"), PythonProcess("paramsd", "selfdrive.locationd.paramsd"),
PythonProcess("plannerd", "selfdrive.controls.plannerd"), PythonProcess("plannerd", "selfdrive.controls.plannerd"),
PythonProcess("radard", "selfdrive.controls.radard"), PythonProcess("radard", "selfdrive.controls.radard"),
PythonProcess("rtshield", "selfdrive.rtshield", enabled=EON),
PythonProcess("thermald", "selfdrive.thermald.thermald", persistent=True), PythonProcess("thermald", "selfdrive.thermald.thermald", persistent=True),
PythonProcess("timezoned", "selfdrive.timezoned", enabled=TICI, persistent=True), PythonProcess("timezoned", "selfdrive.timezoned", enabled=TICI, persistent=True),
PythonProcess("tombstoned", "selfdrive.tombstoned", enabled=not PC, persistent=True), PythonProcess("tombstoned", "selfdrive.tombstoned", enabled=not PC, persistent=True),
PythonProcess("updated", "selfdrive.updated", enabled=not PC, persistent=True), PythonProcess("updated", "selfdrive.updated", enabled=not PC, persistent=True),
PythonProcess("uploader", "selfdrive.loggerd.uploader", persistent=True), PythonProcess("uploader", "selfdrive.loggerd.uploader", persistent=True),
# EON only
PythonProcess("rtshield", "selfdrive.rtshield", enabled=EON),
PythonProcess("androidd", "selfdrive.hardware.eon.androidd", enabled=EON, persistent=True),
] ]
managed_processes = {p.name: p for p in procs} managed_processes = {p.name: p for p in procs}

@ -13,7 +13,7 @@ from cereal.services import service_list
from common.basedir import BASEDIR from common.basedir import BASEDIR
from common.timeout import Timeout from common.timeout import Timeout
from common.params import Params from common.params import Params
from selfdrive.hardware import TICI from selfdrive.hardware import EON, TICI
from selfdrive.loggerd.config import ROOT from selfdrive.loggerd.config import ROOT
from selfdrive.test.helpers import set_params_enabled from selfdrive.test.helpers import set_params_enabled
from tools.lib.logreader import LogReader from tools.lib.logreader import LogReader
@ -44,6 +44,11 @@ PROCS = {
"./logcatd": 0, "./logcatd": 0,
} }
if EON:
PROCS.update({
"selfdrive.hardware.eon.androidd": 0.4,
})
if TICI: if TICI:
PROCS.update({ PROCS.update({
"./loggerd": 60.0, "./loggerd": 60.0,

Loading…
Cancel
Save