#!/usr/bin/env python3 import os import time import psutil from typing import Optional import cereal.messaging as messaging from common.realtime import set_core_affinity, set_realtime_priority from selfdrive.swaglog import cloudlog MAX_MODEM_CRASHES = 3 MODEM_PATH = "/sys/devices/soc/2080000.qcom,mss/subsys5" WATCHED_PROCS = ["zygote", "zygote64", "system_server", "/system/bin/servicemanager", "/system/bin/surfaceflinger"] def get_modem_crash_count() -> Optional[int]: try: with open(os.path.join(MODEM_PATH, "crash_count")) as f: return int(f.read()) except Exception: cloudlog.exception("Error reading modem crash count") return None def get_modem_state() -> str: try: with open(os.path.join(MODEM_PATH, "state")) as f: return f.read().strip() except Exception: cloudlog.exception("Error reading modem state") return "" def main(): set_core_affinity(1) set_realtime_priority(1) procs = {} crash_count = 0 modem_killed = False modem_state = "ONLINE" androidLog = messaging.sub_sock('androidLog') while True: # check critical android services if any(p is None or not p.is_running() for p in procs.values()) or not len(procs): cur = {p: None for p in WATCHED_PROCS} for p in psutil.process_iter(attrs=['cmdline']): cmdline = None if not len(p.info['cmdline']) else p.info['cmdline'][0] if cmdline in WATCHED_PROCS: cur[cmdline] = p if len(procs): for p in WATCHED_PROCS: if cur[p] != procs[p]: cloudlog.event("android service pid changed", proc=p, cur=cur[p], prev=procs[p], error=True) procs.update(cur) # log caught NetworkPolicy exceptions msgs = messaging.drain_sock(androidLog) for m in msgs: try: if m.androidLog.tag == "NetworkPolicy" and m.androidLog.message.startswith("problem with advise persist threshold"): print(m) cloudlog.event("network policy exception caught", androidLog=m.androidLog, error=True) except UnicodeDecodeError: pass if os.path.exists(MODEM_PATH): # check modem state state = get_modem_state() if state != modem_state and not modem_killed: cloudlog.event("modem state changed", state=state) modem_state = state # check modem crashes cnt = get_modem_crash_count() if cnt is not None: if cnt > crash_count: cloudlog.event("modem crash", count=cnt) crash_count = cnt # handle excessive modem crashes if crash_count > MAX_MODEM_CRASHES and not modem_killed: cloudlog.event("killing modem", error=True) with open("/sys/kernel/debug/msm_subsys/modem", "w") as f: f.write("put") modem_killed = True time.sleep(1) if __name__ == "__main__": main()