Manager cleanup (#20231)
	
		
	
				
					
				
			* move manager in folder * inital refactor * call start * small cleanup * add comments * use self.signal() * order shouldnt matter * newlines * add helpers * newlines * add process config * split out build part of manager * this should fix most tests * no sensord on pc * dont start athena * remove comment * fix old athena test * fix inject model * fix test car models * should be not none * fix helpers exitcode * ignore manage_athenad * Use time.monotonic() Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com> * combine init, remove spinner * move manager test Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>pull/20249/head
							parent
							
								
									59698344a2
								
							
						
					
					
						commit
						5a3b511306
					
				
				 31 changed files with 783 additions and 752 deletions
			
			
		| @ -1,27 +0,0 @@ | |||||||
| import importlib |  | ||||||
| from setproctitle import setproctitle  # pylint: disable=no-name-in-module |  | ||||||
| 
 |  | ||||||
| import cereal.messaging as messaging |  | ||||||
| import selfdrive.crash as crash |  | ||||||
| from selfdrive.swaglog import cloudlog |  | ||||||
| 
 |  | ||||||
| def launcher(proc): |  | ||||||
|   try: |  | ||||||
|     # import the process |  | ||||||
|     mod = importlib.import_module(proc) |  | ||||||
| 
 |  | ||||||
|     # rename the process |  | ||||||
|     setproctitle(proc) |  | ||||||
| 
 |  | ||||||
|     # create new context since we forked |  | ||||||
|     messaging.context = messaging.Context() |  | ||||||
| 
 |  | ||||||
|     # exec the process |  | ||||||
|     mod.main() |  | ||||||
|   except KeyboardInterrupt: |  | ||||||
|     cloudlog.warning("child %s got SIGINT" % proc) |  | ||||||
|   except Exception: |  | ||||||
|     # can't install the crash handler becuase sys.excepthook doesn't play nice |  | ||||||
|     # with threads, so catch it here. |  | ||||||
|     crash.capture_exception() |  | ||||||
|     raise |  | ||||||
| @ -1,596 +0,0 @@ | |||||||
| #!/usr/bin/env python3 |  | ||||||
| import datetime |  | ||||||
| import importlib |  | ||||||
| import os |  | ||||||
| import sys |  | ||||||
| import fcntl |  | ||||||
| import errno |  | ||||||
| import signal |  | ||||||
| import shutil |  | ||||||
| import subprocess |  | ||||||
| import textwrap |  | ||||||
| import time |  | ||||||
| import traceback |  | ||||||
| 
 |  | ||||||
| from multiprocessing import Process |  | ||||||
| from typing import Dict |  | ||||||
| 
 |  | ||||||
| from common.basedir import BASEDIR |  | ||||||
| from common.spinner import Spinner |  | ||||||
| from common.text_window import TextWindow |  | ||||||
| import selfdrive.crash as crash |  | ||||||
| from selfdrive.hardware import HARDWARE, EON, PC, TICI |  | ||||||
| from selfdrive.hardware.eon.apk import update_apks, pm_apply_packages, start_offroad |  | ||||||
| from selfdrive.swaglog import cloudlog, add_logentries_handler |  | ||||||
| from selfdrive.version import version, dirty |  | ||||||
| 
 |  | ||||||
| os.environ['BASEDIR'] = BASEDIR |  | ||||||
| sys.path.append(os.path.join(BASEDIR, "pyextra")) |  | ||||||
| 
 |  | ||||||
| TOTAL_SCONS_NODES = 1225 |  | ||||||
| MAX_BUILD_PROGRESS = 70 |  | ||||||
| WEBCAM = os.getenv("WEBCAM") is not None |  | ||||||
| PREBUILT = os.path.exists(os.path.join(BASEDIR, 'prebuilt')) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def unblock_stdout(): |  | ||||||
|   # get a non-blocking stdout |  | ||||||
|   child_pid, child_pty = os.forkpty() |  | ||||||
|   if child_pid != 0:  # parent |  | ||||||
| 
 |  | ||||||
|     # child is in its own process group, manually pass kill signals |  | ||||||
|     signal.signal(signal.SIGINT, lambda signum, frame: os.kill(child_pid, signal.SIGINT)) |  | ||||||
|     signal.signal(signal.SIGTERM, lambda signum, frame: os.kill(child_pid, signal.SIGTERM)) |  | ||||||
| 
 |  | ||||||
|     fcntl.fcntl(sys.stdout, fcntl.F_SETFL, fcntl.fcntl(sys.stdout, fcntl.F_GETFL) | os.O_NONBLOCK) |  | ||||||
| 
 |  | ||||||
|     while True: |  | ||||||
|       try: |  | ||||||
|         dat = os.read(child_pty, 4096) |  | ||||||
|       except OSError as e: |  | ||||||
|         if e.errno == errno.EIO: |  | ||||||
|           break |  | ||||||
|         continue |  | ||||||
| 
 |  | ||||||
|       if not dat: |  | ||||||
|         break |  | ||||||
| 
 |  | ||||||
|       try: |  | ||||||
|         sys.stdout.write(dat.decode('utf8')) |  | ||||||
|       except (OSError, IOError, UnicodeDecodeError): |  | ||||||
|         pass |  | ||||||
| 
 |  | ||||||
|     # os.wait() returns a tuple with the pid and a 16 bit value |  | ||||||
|     # whose low byte is the signal number and whose high byte is the exit satus |  | ||||||
|     exit_status = os.wait()[1] >> 8 |  | ||||||
|     os._exit(exit_status) |  | ||||||
| 
 |  | ||||||
| if __name__ == "__main__": |  | ||||||
|   unblock_stdout() |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # Start spinner |  | ||||||
| spinner = Spinner() |  | ||||||
| spinner.update_progress(0, 100) |  | ||||||
| if __name__ != "__main__": |  | ||||||
|   spinner.close() |  | ||||||
| 
 |  | ||||||
| def build(): |  | ||||||
|   env = os.environ.copy() |  | ||||||
|   env['SCONS_PROGRESS'] = "1" |  | ||||||
|   env['SCONS_CACHE'] = "1" |  | ||||||
|   nproc = os.cpu_count() |  | ||||||
|   j_flag = "" if nproc is None else f"-j{nproc - 1}" |  | ||||||
| 
 |  | ||||||
|   for retry in [True, False]: |  | ||||||
|     scons = subprocess.Popen(["scons", j_flag], cwd=BASEDIR, env=env, stderr=subprocess.PIPE) |  | ||||||
| 
 |  | ||||||
|     compile_output = [] |  | ||||||
| 
 |  | ||||||
|     # Read progress from stderr and update spinner |  | ||||||
|     while scons.poll() is None: |  | ||||||
|       try: |  | ||||||
|         line = scons.stderr.readline() |  | ||||||
|         if line is None: |  | ||||||
|           continue |  | ||||||
|         line = line.rstrip() |  | ||||||
| 
 |  | ||||||
|         prefix = b'progress: ' |  | ||||||
|         if line.startswith(prefix): |  | ||||||
|           i = int(line[len(prefix):]) |  | ||||||
|           spinner.update_progress(MAX_BUILD_PROGRESS * min(1., i / TOTAL_SCONS_NODES), 100.) |  | ||||||
|         elif len(line): |  | ||||||
|           compile_output.append(line) |  | ||||||
|           print(line.decode('utf8', 'replace')) |  | ||||||
|       except Exception: |  | ||||||
|         pass |  | ||||||
| 
 |  | ||||||
|     if scons.returncode != 0: |  | ||||||
|       # Read remaining output |  | ||||||
|       r = scons.stderr.read().split(b'\n') |  | ||||||
|       compile_output += r |  | ||||||
| 
 |  | ||||||
|       if retry and (not dirty): |  | ||||||
|         if not os.getenv("CI"): |  | ||||||
|           print("scons build failed, cleaning in") |  | ||||||
|           for i in range(3, -1, -1): |  | ||||||
|             print("....%d" % i) |  | ||||||
|             time.sleep(1) |  | ||||||
|           subprocess.check_call(["scons", "-c"], cwd=BASEDIR, env=env) |  | ||||||
|           shutil.rmtree("/tmp/scons_cache", ignore_errors=True) |  | ||||||
|           shutil.rmtree("/data/scons_cache", ignore_errors=True) |  | ||||||
|         else: |  | ||||||
|           print("scons build failed after retry") |  | ||||||
|           sys.exit(1) |  | ||||||
|       else: |  | ||||||
|         # Build failed log errors |  | ||||||
|         errors = [line.decode('utf8', 'replace') for line in compile_output |  | ||||||
|                   if any([err in line for err in [b'error: ', b'not found, needed by target']])] |  | ||||||
|         error_s = "\n".join(errors) |  | ||||||
|         add_logentries_handler(cloudlog) |  | ||||||
|         cloudlog.error("scons build failed\n" + error_s) |  | ||||||
| 
 |  | ||||||
|         # Show TextWindow |  | ||||||
|         spinner.close() |  | ||||||
|         error_s = "\n \n".join(["\n".join(textwrap.wrap(e, 65)) for e in errors]) |  | ||||||
|         with TextWindow("openpilot failed to build\n \n" + error_s) as t: |  | ||||||
|           t.wait_for_exit() |  | ||||||
|         exit(1) |  | ||||||
|     else: |  | ||||||
|       break |  | ||||||
| 
 |  | ||||||
| if __name__ == "__main__" and not PREBUILT: |  | ||||||
|   build() |  | ||||||
| 
 |  | ||||||
| import cereal.messaging as messaging |  | ||||||
| from cereal import log |  | ||||||
| 
 |  | ||||||
| from common.params import Params |  | ||||||
| from selfdrive.registration import register |  | ||||||
| from selfdrive.launcher import launcher |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # comment out anything you don't want to run |  | ||||||
| managed_processes = { |  | ||||||
|   "thermald": "selfdrive.thermald.thermald", |  | ||||||
|   "uploader": "selfdrive.loggerd.uploader", |  | ||||||
|   "deleter": "selfdrive.loggerd.deleter", |  | ||||||
|   "controlsd": "selfdrive.controls.controlsd", |  | ||||||
|   "plannerd": "selfdrive.controls.plannerd", |  | ||||||
|   "radard": "selfdrive.controls.radard", |  | ||||||
|   "dmonitoringd": "selfdrive.monitoring.dmonitoringd", |  | ||||||
|   "ubloxd": ("selfdrive/locationd", ["./ubloxd"]), |  | ||||||
|   "loggerd": ("selfdrive/loggerd", ["./loggerd"]), |  | ||||||
|   "logmessaged": "selfdrive.logmessaged", |  | ||||||
|   "locationd": "selfdrive.locationd.locationd", |  | ||||||
|   "tombstoned": "selfdrive.tombstoned", |  | ||||||
|   "logcatd": ("selfdrive/logcatd", ["./logcatd"]), |  | ||||||
|   "proclogd": ("selfdrive/proclogd", ["./proclogd"]), |  | ||||||
|   "pandad": "selfdrive.pandad", |  | ||||||
|   "ui": ("selfdrive/ui", ["./ui"]), |  | ||||||
|   "calibrationd": "selfdrive.locationd.calibrationd", |  | ||||||
|   "paramsd": "selfdrive.locationd.paramsd", |  | ||||||
|   "camerad": ("selfdrive/camerad", ["./camerad"]), |  | ||||||
|   "sensord": ("selfdrive/sensord", ["./sensord"]), |  | ||||||
|   "clocksd": ("selfdrive/clocksd", ["./clocksd"]), |  | ||||||
|   "updated": "selfdrive.updated", |  | ||||||
|   "dmonitoringmodeld": ("selfdrive/modeld", ["./dmonitoringmodeld"]), |  | ||||||
|   "modeld": ("selfdrive/modeld", ["./modeld"]), |  | ||||||
|   "rtshield": "selfdrive.rtshield", |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| daemon_processes = { |  | ||||||
|   "manage_athenad": ("selfdrive.athena.manage_athenad", "AthenadPid"), |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| running: Dict[str, Process] = {} |  | ||||||
| def get_running(): |  | ||||||
|   return running |  | ||||||
| 
 |  | ||||||
| # due to qualcomm kernel bugs SIGKILLing camerad sometimes causes page table corruption |  | ||||||
| unkillable_processes = ['camerad'] |  | ||||||
| 
 |  | ||||||
| # processes to end with SIGKILL instead of SIGTERM |  | ||||||
| kill_processes = [] |  | ||||||
| if EON: |  | ||||||
|   kill_processes += [ |  | ||||||
|     'sensord', |  | ||||||
|   ] |  | ||||||
| 
 |  | ||||||
| persistent_processes = [ |  | ||||||
|   'pandad', |  | ||||||
|   'thermald', |  | ||||||
|   'logmessaged', |  | ||||||
|   'ui', |  | ||||||
|   'uploader', |  | ||||||
|   'deleter', |  | ||||||
| ] |  | ||||||
| 
 |  | ||||||
| if not PC: |  | ||||||
|   persistent_processes += [ |  | ||||||
|     'updated', |  | ||||||
|     'tombstoned', |  | ||||||
|   ] |  | ||||||
| 
 |  | ||||||
| if EON: |  | ||||||
|   persistent_processes += [ |  | ||||||
|     'sensord', |  | ||||||
|   ] |  | ||||||
| 
 |  | ||||||
| if TICI: |  | ||||||
|   managed_processes["timezoned"] = "selfdrive.timezoned" |  | ||||||
|   persistent_processes += ['timezoned'] |  | ||||||
| 
 |  | ||||||
| car_started_processes = [ |  | ||||||
|   'controlsd', |  | ||||||
|   'plannerd', |  | ||||||
|   'loggerd', |  | ||||||
|   'radard', |  | ||||||
|   'calibrationd', |  | ||||||
|   'paramsd', |  | ||||||
|   'camerad', |  | ||||||
|   'modeld', |  | ||||||
|   'proclogd', |  | ||||||
|   'locationd', |  | ||||||
|   'clocksd', |  | ||||||
|   'logcatd', |  | ||||||
| ] |  | ||||||
| 
 |  | ||||||
| driver_view_processes = [ |  | ||||||
|   'camerad', |  | ||||||
|   'dmonitoringd', |  | ||||||
|   'dmonitoringmodeld' |  | ||||||
| ] |  | ||||||
| 
 |  | ||||||
| if not PC or WEBCAM: |  | ||||||
|   car_started_processes += [ |  | ||||||
|     'ubloxd', |  | ||||||
|     'dmonitoringd', |  | ||||||
|     'dmonitoringmodeld', |  | ||||||
|   ] |  | ||||||
| 
 |  | ||||||
| if EON: |  | ||||||
|   car_started_processes += [ |  | ||||||
|     'rtshield', |  | ||||||
|   ] |  | ||||||
| else: |  | ||||||
|    car_started_processes += [ |  | ||||||
|     'sensord', |  | ||||||
|   ] |  | ||||||
| 
 |  | ||||||
| def register_managed_process(name, desc, car_started=False): |  | ||||||
|   global managed_processes, car_started_processes, persistent_processes |  | ||||||
|   managed_processes[name] = desc |  | ||||||
|   if car_started: |  | ||||||
|     car_started_processes.append(name) |  | ||||||
|   else: |  | ||||||
|     persistent_processes.append(name) |  | ||||||
| 
 |  | ||||||
| # ****************** process management functions ****************** |  | ||||||
| def nativelauncher(pargs, cwd): |  | ||||||
|   # exec the process |  | ||||||
|   os.chdir(cwd) |  | ||||||
|   os.execvp(pargs[0], pargs) |  | ||||||
| 
 |  | ||||||
| def start_managed_process(name): |  | ||||||
|   if name in running or name not in managed_processes: |  | ||||||
|     return |  | ||||||
|   proc = managed_processes[name] |  | ||||||
|   if isinstance(proc, str): |  | ||||||
|     cloudlog.info("starting python %s" % proc) |  | ||||||
|     running[name] = Process(name=name, target=launcher, args=(proc,)) |  | ||||||
|   else: |  | ||||||
|     pdir, pargs = proc |  | ||||||
|     cwd = os.path.join(BASEDIR, pdir) |  | ||||||
|     cloudlog.info("starting process %s" % name) |  | ||||||
|     running[name] = Process(name=name, target=nativelauncher, args=(pargs, cwd)) |  | ||||||
|   running[name].start() |  | ||||||
| 
 |  | ||||||
| def start_daemon_process(name): |  | ||||||
|   params = Params() |  | ||||||
|   proc, pid_param = daemon_processes[name] |  | ||||||
|   pid = params.get(pid_param, encoding='utf-8') |  | ||||||
| 
 |  | ||||||
|   if pid is not None: |  | ||||||
|     try: |  | ||||||
|       os.kill(int(pid), 0) |  | ||||||
|       with open(f'/proc/{pid}/cmdline') as f: |  | ||||||
|         if proc in f.read(): |  | ||||||
|           # daemon is running |  | ||||||
|           return |  | ||||||
|     except (OSError, FileNotFoundError): |  | ||||||
|       # process is dead |  | ||||||
|       pass |  | ||||||
| 
 |  | ||||||
|   cloudlog.info("starting daemon %s" % name) |  | ||||||
|   proc = subprocess.Popen(['python', '-m', proc],  # pylint: disable=subprocess-popen-preexec-fn |  | ||||||
|                           stdin=open('/dev/null', 'r'), |  | ||||||
|                           stdout=open('/dev/null', 'w'), |  | ||||||
|                           stderr=open('/dev/null', 'w'), |  | ||||||
|                           preexec_fn=os.setpgrp) |  | ||||||
| 
 |  | ||||||
|   params.put(pid_param, str(proc.pid)) |  | ||||||
| 
 |  | ||||||
| def prepare_managed_process(p): |  | ||||||
|   proc = managed_processes[p] |  | ||||||
|   if isinstance(proc, str): |  | ||||||
|     # import this python |  | ||||||
|     cloudlog.info("preimporting %s" % proc) |  | ||||||
|     importlib.import_module(proc) |  | ||||||
| 
 |  | ||||||
| def join_process(process, timeout): |  | ||||||
|   # Process().join(timeout) will hang due to a python 3 bug: https://bugs.python.org/issue28382 |  | ||||||
|   # We have to poll the exitcode instead |  | ||||||
|   t = time.time() |  | ||||||
|   while time.time() - t < timeout and process.exitcode is None: |  | ||||||
|     time.sleep(0.001) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def kill_managed_process(name, retry=True): |  | ||||||
|   if name not in running or name not in managed_processes: |  | ||||||
|     return |  | ||||||
|   cloudlog.info(f"killing {name}") |  | ||||||
| 
 |  | ||||||
|   if running[name].exitcode is None: |  | ||||||
|     sig = signal.SIGKILL if name in kill_processes else signal.SIGINT |  | ||||||
|     os.kill(running[name].pid, sig) |  | ||||||
| 
 |  | ||||||
|     join_process(running[name], 5) |  | ||||||
| 
 |  | ||||||
|     if running[name].exitcode is None: |  | ||||||
|       if not retry: |  | ||||||
|         raise Exception(f"{name} failed to die") |  | ||||||
| 
 |  | ||||||
|       if name in unkillable_processes: |  | ||||||
|         cloudlog.critical("unkillable process %s failed to exit! rebooting in 15 if it doesn't die" % name) |  | ||||||
|         join_process(running[name], 15) |  | ||||||
|         if running[name].exitcode is None: |  | ||||||
|           cloudlog.critical("unkillable process %s failed to die!" % name) |  | ||||||
|           os.system("date >> /data/unkillable_reboot") |  | ||||||
|           os.sync() |  | ||||||
|           HARDWARE.reboot() |  | ||||||
|           raise RuntimeError |  | ||||||
|       else: |  | ||||||
|         cloudlog.info("killing %s with SIGKILL" % name) |  | ||||||
|         os.kill(running[name].pid, signal.SIGKILL) |  | ||||||
|         running[name].join() |  | ||||||
| 
 |  | ||||||
|   ret = running[name].exitcode |  | ||||||
|   cloudlog.info(f"{name} is dead with {ret}") |  | ||||||
|   del running[name] |  | ||||||
|   return ret |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def cleanup_all_processes(signal, frame): |  | ||||||
|   cloudlog.info("caught ctrl-c %s %s" % (signal, frame)) |  | ||||||
| 
 |  | ||||||
|   if EON: |  | ||||||
|     pm_apply_packages('disable') |  | ||||||
| 
 |  | ||||||
|   for name in list(running.keys()): |  | ||||||
|     kill_managed_process(name) |  | ||||||
|   cloudlog.info("everything is dead") |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def send_managed_process_signal(name, sig): |  | ||||||
|   if name not in running or name not in managed_processes or \ |  | ||||||
|      running[name].exitcode is not None: |  | ||||||
|     return |  | ||||||
| 
 |  | ||||||
|   cloudlog.info(f"sending signal {sig} to {name}") |  | ||||||
|   os.kill(running[name].pid, sig) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # ****************** run loop ****************** |  | ||||||
| 
 |  | ||||||
| def manager_init(): |  | ||||||
|   os.umask(0)  # Make sure we can create files with 777 permissions |  | ||||||
| 
 |  | ||||||
|   # Create folders needed for msgq |  | ||||||
|   try: |  | ||||||
|     os.mkdir("/dev/shm") |  | ||||||
|   except FileExistsError: |  | ||||||
|     pass |  | ||||||
|   except PermissionError: |  | ||||||
|     print("WARNING: failed to make /dev/shm") |  | ||||||
| 
 |  | ||||||
|   # set dongle id |  | ||||||
|   reg_res = register(spinner) |  | ||||||
|   if reg_res: |  | ||||||
|     dongle_id = reg_res |  | ||||||
|   else: |  | ||||||
|     raise Exception("server registration failed") |  | ||||||
|   os.environ['DONGLE_ID'] = dongle_id |  | ||||||
| 
 |  | ||||||
|   if not dirty: |  | ||||||
|     os.environ['CLEAN'] = '1' |  | ||||||
| 
 |  | ||||||
|   cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty, |  | ||||||
|                        device=HARDWARE.get_device_type()) |  | ||||||
|   crash.bind_user(id=dongle_id) |  | ||||||
|   crash.bind_extra(version=version, dirty=dirty, device=HARDWARE.get_device_type()) |  | ||||||
| 
 |  | ||||||
|   # ensure shared libraries are readable by apks |  | ||||||
|   if EON: |  | ||||||
|     os.chmod(BASEDIR, 0o755) |  | ||||||
|     os.chmod("/dev/shm", 0o777) |  | ||||||
|     os.chmod(os.path.join(BASEDIR, "cereal"), 0o755) |  | ||||||
|     os.chmod(os.path.join(BASEDIR, "cereal", "libmessaging_shared.so"), 0o755) |  | ||||||
| 
 |  | ||||||
| def manager_thread(): |  | ||||||
| 
 |  | ||||||
|   cloudlog.info("manager start") |  | ||||||
|   cloudlog.info({"environ": os.environ}) |  | ||||||
| 
 |  | ||||||
|   # save boot log |  | ||||||
|   subprocess.call("./bootlog", cwd=os.path.join(BASEDIR, "selfdrive/loggerd")) |  | ||||||
| 
 |  | ||||||
|   # start daemon processes |  | ||||||
|   for p in daemon_processes: |  | ||||||
|     start_daemon_process(p) |  | ||||||
| 
 |  | ||||||
|   # start persistent processes |  | ||||||
|   for p in persistent_processes: |  | ||||||
|     start_managed_process(p) |  | ||||||
| 
 |  | ||||||
|   # start offroad |  | ||||||
|   if EON and "QT" not in os.environ: |  | ||||||
|     pm_apply_packages('enable') |  | ||||||
|     start_offroad() |  | ||||||
| 
 |  | ||||||
|   if os.getenv("NOBOARD") is not None: |  | ||||||
|     del managed_processes["pandad"] |  | ||||||
| 
 |  | ||||||
|   if os.getenv("BLOCK") is not None: |  | ||||||
|     for k in os.getenv("BLOCK").split(","): |  | ||||||
|       del managed_processes[k] |  | ||||||
| 
 |  | ||||||
|   started_prev = False |  | ||||||
|   logger_dead = False |  | ||||||
|   params = Params() |  | ||||||
|   device_state_sock = messaging.sub_sock('deviceState') |  | ||||||
|   pm = messaging.PubMaster(['managerState']) |  | ||||||
| 
 |  | ||||||
|   while 1: |  | ||||||
|     msg = messaging.recv_sock(device_state_sock, wait=True) |  | ||||||
| 
 |  | ||||||
|     if msg.deviceState.freeSpacePercent < 5: |  | ||||||
|       logger_dead = True |  | ||||||
| 
 |  | ||||||
|     if msg.deviceState.started: |  | ||||||
|       for p in car_started_processes: |  | ||||||
|         if p == "loggerd" and logger_dead: |  | ||||||
|           kill_managed_process(p) |  | ||||||
|         else: |  | ||||||
|           start_managed_process(p) |  | ||||||
|     else: |  | ||||||
|       logger_dead = False |  | ||||||
|       driver_view = params.get("IsDriverViewEnabled") == b"1" |  | ||||||
| 
 |  | ||||||
|       # TODO: refactor how manager manages processes |  | ||||||
|       for p in reversed(car_started_processes): |  | ||||||
|         if p not in driver_view_processes or not driver_view: |  | ||||||
|           kill_managed_process(p) |  | ||||||
| 
 |  | ||||||
|       for p in driver_view_processes: |  | ||||||
|         if driver_view: |  | ||||||
|           start_managed_process(p) |  | ||||||
|         else: |  | ||||||
|           kill_managed_process(p) |  | ||||||
| 
 |  | ||||||
|       # trigger an update after going offroad |  | ||||||
|       if started_prev: |  | ||||||
|         os.sync() |  | ||||||
|         send_managed_process_signal("updated", signal.SIGHUP) |  | ||||||
| 
 |  | ||||||
|     started_prev = msg.deviceState.started |  | ||||||
| 
 |  | ||||||
|     # check the status of all processes, did any of them die? |  | ||||||
|     running_list = ["%s%s\u001b[0m" % ("\u001b[32m" if running[p].is_alive() else "\u001b[31m", p) for p in running] |  | ||||||
|     cloudlog.debug(' '.join(running_list)) |  | ||||||
| 
 |  | ||||||
|     # send managerState |  | ||||||
|     states = [] |  | ||||||
|     for p in managed_processes: |  | ||||||
|       state = log.ManagerState.ProcessState.new_message() |  | ||||||
|       state.name = p |  | ||||||
|       if p in running: |  | ||||||
|         state.running = running[p].is_alive() |  | ||||||
|         state.pid = running[p].pid |  | ||||||
|         state.exitCode = running[p].exitcode or 0 |  | ||||||
|       states.append(state) |  | ||||||
|     msg = messaging.new_message('managerState') |  | ||||||
|     msg.managerState.processes = states |  | ||||||
|     pm.send('managerState', msg) |  | ||||||
| 
 |  | ||||||
|     # Exit main loop when uninstall is needed |  | ||||||
|     if params.get("DoUninstall", encoding='utf8') == "1": |  | ||||||
|       break |  | ||||||
| 
 |  | ||||||
| def manager_prepare(): |  | ||||||
|   # build all processes |  | ||||||
|   os.chdir(os.path.dirname(os.path.abspath(__file__))) |  | ||||||
| 
 |  | ||||||
|   total = 100.0 - (0 if PREBUILT else MAX_BUILD_PROGRESS) |  | ||||||
| 
 |  | ||||||
|   for i, p in enumerate(managed_processes): |  | ||||||
|     perc = (100.0 - total) + total * (i + 1) / len(managed_processes) |  | ||||||
|     spinner.update_progress(perc, 100.) |  | ||||||
|     prepare_managed_process(p) |  | ||||||
| 
 |  | ||||||
| def main(): |  | ||||||
|   params = Params() |  | ||||||
|   params.manager_start() |  | ||||||
| 
 |  | ||||||
|   default_params = [ |  | ||||||
|     ("CommunityFeaturesToggle", "0"), |  | ||||||
|     ("CompletedTrainingVersion", "0"), |  | ||||||
|     ("IsRHD", "0"), |  | ||||||
|     ("IsMetric", "0"), |  | ||||||
|     ("RecordFront", "0"), |  | ||||||
|     ("HasAcceptedTerms", "0"), |  | ||||||
|     ("HasCompletedSetup", "0"), |  | ||||||
|     ("IsUploadRawEnabled", "1"), |  | ||||||
|     ("IsLdwEnabled", "1"), |  | ||||||
|     ("LastUpdateTime", datetime.datetime.utcnow().isoformat().encode('utf8')), |  | ||||||
|     ("OpenpilotEnabledToggle", "1"), |  | ||||||
|     ("VisionRadarToggle", "0"), |  | ||||||
|     ("LaneChangeEnabled", "1"), |  | ||||||
|     ("IsDriverViewEnabled", "0"), |  | ||||||
|   ] |  | ||||||
| 
 |  | ||||||
|   # set unset params |  | ||||||
|   for k, v in default_params: |  | ||||||
|     if params.get(k) is None: |  | ||||||
|       params.put(k, v) |  | ||||||
| 
 |  | ||||||
|   # is this dashcam? |  | ||||||
|   if os.getenv("PASSIVE") is not None: |  | ||||||
|     params.put("Passive", str(int(os.getenv("PASSIVE")))) |  | ||||||
| 
 |  | ||||||
|   if params.get("Passive") is None: |  | ||||||
|     raise Exception("Passive must be set to continue") |  | ||||||
| 
 |  | ||||||
|   if EON: |  | ||||||
|     update_apks() |  | ||||||
|   manager_init() |  | ||||||
|   manager_prepare() |  | ||||||
|   spinner.close() |  | ||||||
| 
 |  | ||||||
|   if os.getenv("PREPAREONLY") is not None: |  | ||||||
|     return |  | ||||||
| 
 |  | ||||||
|   # SystemExit on sigterm |  | ||||||
|   signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit(1)) |  | ||||||
| 
 |  | ||||||
|   try: |  | ||||||
|     manager_thread() |  | ||||||
|   except Exception: |  | ||||||
|     traceback.print_exc() |  | ||||||
|     crash.capture_exception() |  | ||||||
|   finally: |  | ||||||
|     cleanup_all_processes(None, None) |  | ||||||
| 
 |  | ||||||
|   if params.get("DoUninstall", encoding='utf8') == "1": |  | ||||||
|     cloudlog.warning("uninstalling") |  | ||||||
|     HARDWARE.uninstall() |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| if __name__ == "__main__": |  | ||||||
|   try: |  | ||||||
|     main() |  | ||||||
|   except Exception: |  | ||||||
|     add_logentries_handler(cloudlog) |  | ||||||
|     cloudlog.exception("Manager failed to start") |  | ||||||
| 
 |  | ||||||
|     # Show last 3 lines of traceback |  | ||||||
|     error = traceback.format_exc(-3) |  | ||||||
|     error = "Manager failed to start\n\n" + error |  | ||||||
|     spinner.close() |  | ||||||
|     with TextWindow(error) as t: |  | ||||||
|       t.wait_for_exit() |  | ||||||
| 
 |  | ||||||
|     raise |  | ||||||
| 
 |  | ||||||
|   # manual exit because we are forked |  | ||||||
|   sys.exit(0) |  | ||||||
| @ -0,0 +1,89 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  | import os | ||||||
|  | import shutil | ||||||
|  | import subprocess | ||||||
|  | import sys | ||||||
|  | import time | ||||||
|  | import textwrap | ||||||
|  | 
 | ||||||
|  | # NOTE: Do NOT import anything here that needs be built (e.g. params) | ||||||
|  | from common.basedir import BASEDIR | ||||||
|  | from common.spinner import Spinner | ||||||
|  | from common.text_window import TextWindow | ||||||
|  | from selfdrive.swaglog import add_logentries_handler, cloudlog | ||||||
|  | from selfdrive.version import dirty | ||||||
|  | 
 | ||||||
|  | TOTAL_SCONS_NODES = 1225 | ||||||
|  | MAX_BUILD_PROGRESS = 70 | ||||||
|  | PREBUILT = os.path.exists(os.path.join(BASEDIR, 'prebuilt')) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def build(spinner, dirty=False): | ||||||
|  |   env = os.environ.copy() | ||||||
|  |   env['SCONS_PROGRESS'] = "1" | ||||||
|  |   env['SCONS_CACHE'] = "1" | ||||||
|  |   nproc = os.cpu_count() | ||||||
|  |   j_flag = "" if nproc is None else f"-j{nproc - 1}" | ||||||
|  | 
 | ||||||
|  |   for retry in [True, False]: | ||||||
|  |     scons = subprocess.Popen(["scons", j_flag], cwd=BASEDIR, env=env, stderr=subprocess.PIPE) | ||||||
|  | 
 | ||||||
|  |     compile_output = [] | ||||||
|  | 
 | ||||||
|  |     # Read progress from stderr and update spinner | ||||||
|  |     while scons.poll() is None: | ||||||
|  |       try: | ||||||
|  |         line = scons.stderr.readline() | ||||||
|  |         if line is None: | ||||||
|  |           continue | ||||||
|  |         line = line.rstrip() | ||||||
|  | 
 | ||||||
|  |         prefix = b'progress: ' | ||||||
|  |         if line.startswith(prefix): | ||||||
|  |           i = int(line[len(prefix):]) | ||||||
|  |           spinner.update_progress(MAX_BUILD_PROGRESS * min(1., i / TOTAL_SCONS_NODES), 100.) | ||||||
|  |         elif len(line): | ||||||
|  |           compile_output.append(line) | ||||||
|  |           print(line.decode('utf8', 'replace')) | ||||||
|  |       except Exception: | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     if scons.returncode != 0: | ||||||
|  |       # Read remaining output | ||||||
|  |       r = scons.stderr.read().split(b'\n') | ||||||
|  |       compile_output += r | ||||||
|  | 
 | ||||||
|  |       if retry and (not dirty): | ||||||
|  |         if not os.getenv("CI"): | ||||||
|  |           print("scons build failed, cleaning in") | ||||||
|  |           for i in range(3, -1, -1): | ||||||
|  |             print("....%d" % i) | ||||||
|  |             time.sleep(1) | ||||||
|  |           subprocess.check_call(["scons", "-c"], cwd=BASEDIR, env=env) | ||||||
|  |           shutil.rmtree("/tmp/scons_cache", ignore_errors=True) | ||||||
|  |           shutil.rmtree("/data/scons_cache", ignore_errors=True) | ||||||
|  |         else: | ||||||
|  |           print("scons build failed after retry") | ||||||
|  |           sys.exit(1) | ||||||
|  |       else: | ||||||
|  |         # Build failed log errors | ||||||
|  |         errors = [line.decode('utf8', 'replace') for line in compile_output | ||||||
|  |                   if any([err in line for err in [b'error: ', b'not found, needed by target']])] | ||||||
|  |         error_s = "\n".join(errors) | ||||||
|  |         add_logentries_handler(cloudlog) | ||||||
|  |         cloudlog.error("scons build failed\n" + error_s) | ||||||
|  | 
 | ||||||
|  |         # Show TextWindow | ||||||
|  |         spinner.close() | ||||||
|  |         error_s = "\n \n".join(["\n".join(textwrap.wrap(e, 65)) for e in errors]) | ||||||
|  |         with TextWindow("openpilot failed to build\n \n" + error_s) as t: | ||||||
|  |           t.wait_for_exit() | ||||||
|  |         exit(1) | ||||||
|  |     else: | ||||||
|  |       break | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__" and not PREBUILT: | ||||||
|  |   spinner = Spinner() | ||||||
|  |   spinner.update_progress(0, 100) | ||||||
|  |   build(spinner, dirty) | ||||||
| @ -0,0 +1,38 @@ | |||||||
|  | import os | ||||||
|  | import sys | ||||||
|  | import fcntl | ||||||
|  | import errno | ||||||
|  | import signal | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def unblock_stdout(): | ||||||
|  |   # get a non-blocking stdout | ||||||
|  |   child_pid, child_pty = os.forkpty() | ||||||
|  |   if child_pid != 0:  # parent | ||||||
|  | 
 | ||||||
|  |     # child is in its own process group, manually pass kill signals | ||||||
|  |     signal.signal(signal.SIGINT, lambda signum, frame: os.kill(child_pid, signal.SIGINT)) | ||||||
|  |     signal.signal(signal.SIGTERM, lambda signum, frame: os.kill(child_pid, signal.SIGTERM)) | ||||||
|  | 
 | ||||||
|  |     fcntl.fcntl(sys.stdout, fcntl.F_SETFL, fcntl.fcntl(sys.stdout, fcntl.F_GETFL) | os.O_NONBLOCK) | ||||||
|  | 
 | ||||||
|  |     while True: | ||||||
|  |       try: | ||||||
|  |         dat = os.read(child_pty, 4096) | ||||||
|  |       except OSError as e: | ||||||
|  |         if e.errno == errno.EIO: | ||||||
|  |           break | ||||||
|  |         continue | ||||||
|  | 
 | ||||||
|  |       if not dat: | ||||||
|  |         break | ||||||
|  | 
 | ||||||
|  |       try: | ||||||
|  |         sys.stdout.write(dat.decode('utf8')) | ||||||
|  |       except (OSError, IOError, UnicodeDecodeError): | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     # os.wait() returns a tuple with the pid and a 16 bit value | ||||||
|  |     # whose low byte is the signal number and whose high byte is the exit satus | ||||||
|  |     exit_status = os.wait()[1] >> 8 | ||||||
|  |     os._exit(exit_status) | ||||||
| @ -0,0 +1,222 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  | import datetime | ||||||
|  | import os | ||||||
|  | import signal | ||||||
|  | import subprocess | ||||||
|  | import sys | ||||||
|  | import traceback | ||||||
|  | 
 | ||||||
|  | import cereal.messaging as messaging | ||||||
|  | import selfdrive.crash as crash | ||||||
|  | from common.basedir import BASEDIR | ||||||
|  | from common.params import Params | ||||||
|  | from common.spinner import Spinner | ||||||
|  | from common.text_window import TextWindow | ||||||
|  | from selfdrive.hardware import EON, HARDWARE | ||||||
|  | from selfdrive.hardware.eon.apk import (pm_apply_packages, start_offroad, | ||||||
|  |                                         update_apks) | ||||||
|  | from selfdrive.manager.build import MAX_BUILD_PROGRESS, PREBUILT | ||||||
|  | from selfdrive.manager.helpers import unblock_stdout | ||||||
|  | from selfdrive.manager.process import ensure_running | ||||||
|  | from selfdrive.manager.process_config import managed_processes | ||||||
|  | from selfdrive.registration import register | ||||||
|  | from selfdrive.swaglog import add_logentries_handler, cloudlog | ||||||
|  | from selfdrive.version import dirty, version | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def manager_init(spinner=None): | ||||||
|  |   params = Params() | ||||||
|  |   params.manager_start() | ||||||
|  | 
 | ||||||
|  |   default_params = [ | ||||||
|  |     ("CommunityFeaturesToggle", "0"), | ||||||
|  |     ("CompletedTrainingVersion", "0"), | ||||||
|  |     ("IsRHD", "0"), | ||||||
|  |     ("IsMetric", "0"), | ||||||
|  |     ("RecordFront", "0"), | ||||||
|  |     ("HasAcceptedTerms", "0"), | ||||||
|  |     ("HasCompletedSetup", "0"), | ||||||
|  |     ("IsUploadRawEnabled", "1"), | ||||||
|  |     ("IsLdwEnabled", "1"), | ||||||
|  |     ("LastUpdateTime", datetime.datetime.utcnow().isoformat().encode('utf8')), | ||||||
|  |     ("OpenpilotEnabledToggle", "1"), | ||||||
|  |     ("VisionRadarToggle", "0"), | ||||||
|  |     ("LaneChangeEnabled", "1"), | ||||||
|  |     ("IsDriverViewEnabled", "0"), | ||||||
|  |   ] | ||||||
|  | 
 | ||||||
|  |   # set unset params | ||||||
|  |   for k, v in default_params: | ||||||
|  |     if params.get(k) is None: | ||||||
|  |       params.put(k, v) | ||||||
|  | 
 | ||||||
|  |   # is this dashcam? | ||||||
|  |   if os.getenv("PASSIVE") is not None: | ||||||
|  |     params.put("Passive", str(int(os.getenv("PASSIVE")))) | ||||||
|  | 
 | ||||||
|  |   if params.get("Passive") is None: | ||||||
|  |     raise Exception("Passive must be set to continue") | ||||||
|  | 
 | ||||||
|  |   if EON: | ||||||
|  |     update_apks() | ||||||
|  | 
 | ||||||
|  |   os.umask(0)  # Make sure we can create files with 777 permissions | ||||||
|  | 
 | ||||||
|  |   # Create folders needed for msgq | ||||||
|  |   try: | ||||||
|  |     os.mkdir("/dev/shm") | ||||||
|  |   except FileExistsError: | ||||||
|  |     pass | ||||||
|  |   except PermissionError: | ||||||
|  |     print("WARNING: failed to make /dev/shm") | ||||||
|  | 
 | ||||||
|  |   # set dongle id | ||||||
|  |   reg_res = register(spinner) | ||||||
|  |   if reg_res: | ||||||
|  |     dongle_id = reg_res | ||||||
|  |   else: | ||||||
|  |     raise Exception("server registration failed") | ||||||
|  |   os.environ['DONGLE_ID'] = dongle_id  # Needed for swaglog and loggerd | ||||||
|  | 
 | ||||||
|  |   if not dirty: | ||||||
|  |     os.environ['CLEAN'] = '1' | ||||||
|  | 
 | ||||||
|  |   cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty, | ||||||
|  |                        device=HARDWARE.get_device_type()) | ||||||
|  |   crash.bind_user(id=dongle_id) | ||||||
|  |   crash.bind_extra(version=version, dirty=dirty, device=HARDWARE.get_device_type()) | ||||||
|  | 
 | ||||||
|  |   # ensure shared libraries are readable by apks | ||||||
|  |   if EON: | ||||||
|  |     os.chmod(BASEDIR, 0o755) | ||||||
|  |     os.chmod("/dev/shm", 0o777) | ||||||
|  |     os.chmod(os.path.join(BASEDIR, "cereal"), 0o755) | ||||||
|  |     os.chmod(os.path.join(BASEDIR, "cereal", "libmessaging_shared.so"), 0o755) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def manager_prepare(spinner=None): | ||||||
|  |   # build all processes | ||||||
|  |   os.chdir(os.path.dirname(os.path.abspath(__file__))) | ||||||
|  | 
 | ||||||
|  |   total = 100.0 - (0 if PREBUILT else MAX_BUILD_PROGRESS) | ||||||
|  | 
 | ||||||
|  |   for i, p in enumerate(managed_processes.values()): | ||||||
|  |     perc = (100.0 - total) + total * (i + 1) / len(managed_processes) | ||||||
|  | 
 | ||||||
|  |     if spinner: | ||||||
|  |       spinner.update_progress(perc, 100.) | ||||||
|  |     p.prepare() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def manager_cleanup(): | ||||||
|  |   if EON: | ||||||
|  |     pm_apply_packages('disable') | ||||||
|  | 
 | ||||||
|  |   for p in managed_processes.values(): | ||||||
|  |     p.stop() | ||||||
|  | 
 | ||||||
|  |   cloudlog.info("everything is dead") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def manager_thread(): | ||||||
|  |   cloudlog.info("manager start") | ||||||
|  |   cloudlog.info({"environ": os.environ}) | ||||||
|  | 
 | ||||||
|  |   # save boot log | ||||||
|  |   subprocess.call("./bootlog", cwd=os.path.join(BASEDIR, "selfdrive/loggerd")) | ||||||
|  | 
 | ||||||
|  |   ignore = [] | ||||||
|  |   if os.getenv("NOBOARD") is not None: | ||||||
|  |     ignore.append("pandad") | ||||||
|  |   if os.getenv("BLOCK") is not None: | ||||||
|  |     ignore += os.getenv("BLOCK").split(",") | ||||||
|  | 
 | ||||||
|  |   # start offroad | ||||||
|  |   if EON and "QT" not in os.environ: | ||||||
|  |     pm_apply_packages('enable') | ||||||
|  |     start_offroad() | ||||||
|  | 
 | ||||||
|  |   started_prev = False | ||||||
|  |   params = Params() | ||||||
|  |   sm = messaging.SubMaster(['deviceState']) | ||||||
|  |   pm = messaging.PubMaster(['managerState']) | ||||||
|  | 
 | ||||||
|  |   while True: | ||||||
|  |     sm.update() | ||||||
|  |     not_run = ignore[:] | ||||||
|  | 
 | ||||||
|  |     if sm['deviceState'].freeSpacePercent < 5: | ||||||
|  |       not_run.append("loggerd") | ||||||
|  | 
 | ||||||
|  |     started = sm['deviceState'].started | ||||||
|  |     driverview = params.get("IsDriverViewEnabled") == b"1" | ||||||
|  |     ensure_running(managed_processes.values(), started, driverview, not_run) | ||||||
|  | 
 | ||||||
|  |     # trigger an update after going offroad | ||||||
|  |     if started_prev and not started: | ||||||
|  |       os.sync() | ||||||
|  |       managed_processes['updated'].signal(signal.SIGHUP) | ||||||
|  | 
 | ||||||
|  |     started_prev = started | ||||||
|  | 
 | ||||||
|  |     running_list = ["%s%s\u001b[0m" % ("\u001b[32m" if p.proc.is_alive() else "\u001b[31m", p.name) | ||||||
|  |                     for p in managed_processes.values() if p.proc] | ||||||
|  |     cloudlog.debug(' '.join(running_list)) | ||||||
|  | 
 | ||||||
|  |     # send managerState | ||||||
|  |     msg = messaging.new_message('managerState') | ||||||
|  |     msg.managerState.processes = [p.get_process_state_msg() for p in managed_processes.values()] | ||||||
|  |     pm.send('managerState', msg) | ||||||
|  | 
 | ||||||
|  |     # Exit main loop when uninstall is needed | ||||||
|  |     if params.get("DoUninstall", encoding='utf8') == "1": | ||||||
|  |       break | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def main(spinner=None): | ||||||
|  |   manager_init(spinner) | ||||||
|  |   manager_prepare(spinner) | ||||||
|  | 
 | ||||||
|  |   if spinner: | ||||||
|  |     spinner.close() | ||||||
|  | 
 | ||||||
|  |   if os.getenv("PREPAREONLY") is not None: | ||||||
|  |     return | ||||||
|  | 
 | ||||||
|  |   # SystemExit on sigterm | ||||||
|  |   signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit(1)) | ||||||
|  | 
 | ||||||
|  |   try: | ||||||
|  |     manager_thread() | ||||||
|  |   except Exception: | ||||||
|  |     traceback.print_exc() | ||||||
|  |     crash.capture_exception() | ||||||
|  |   finally: | ||||||
|  |     manager_cleanup() | ||||||
|  | 
 | ||||||
|  |   if Params().params.get("DoUninstall", encoding='utf8') == "1": | ||||||
|  |     cloudlog.warning("uninstalling") | ||||||
|  |     HARDWARE.uninstall() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |   unblock_stdout() | ||||||
|  |   spinner = Spinner() | ||||||
|  | 
 | ||||||
|  |   try: | ||||||
|  |     main(spinner) | ||||||
|  |   except Exception: | ||||||
|  |     add_logentries_handler(cloudlog) | ||||||
|  |     cloudlog.exception("Manager failed to start") | ||||||
|  | 
 | ||||||
|  |     # Show last 3 lines of traceback | ||||||
|  |     error = traceback.format_exc(-3) | ||||||
|  |     error = "Manager failed to start\n\n" + error | ||||||
|  |     spinner.close() | ||||||
|  |     with TextWindow(error) as t: | ||||||
|  |       t.wait_for_exit() | ||||||
|  | 
 | ||||||
|  |     raise | ||||||
|  | 
 | ||||||
|  |   # manual exit because we are forked | ||||||
|  |   sys.exit(0) | ||||||
| @ -0,0 +1,225 @@ | |||||||
|  | import importlib | ||||||
|  | import os | ||||||
|  | import signal | ||||||
|  | import time | ||||||
|  | import subprocess | ||||||
|  | from abc import ABC, abstractmethod | ||||||
|  | from multiprocessing import Process | ||||||
|  | 
 | ||||||
|  | from setproctitle import setproctitle  # pylint: disable=no-name-in-module | ||||||
|  | 
 | ||||||
|  | import cereal.messaging as messaging | ||||||
|  | import selfdrive.crash as crash | ||||||
|  | from common.basedir import BASEDIR | ||||||
|  | from common.params import Params | ||||||
|  | from selfdrive.swaglog import cloudlog | ||||||
|  | from selfdrive.hardware import HARDWARE | ||||||
|  | from cereal import log | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def launcher(proc): | ||||||
|  |   try: | ||||||
|  |     # import the process | ||||||
|  |     mod = importlib.import_module(proc) | ||||||
|  | 
 | ||||||
|  |     # rename the process | ||||||
|  |     setproctitle(proc) | ||||||
|  | 
 | ||||||
|  |     # create new context since we forked | ||||||
|  |     messaging.context = messaging.Context() | ||||||
|  | 
 | ||||||
|  |     # exec the process | ||||||
|  |     mod.main() | ||||||
|  |   except KeyboardInterrupt: | ||||||
|  |     cloudlog.warning("child %s got SIGINT" % proc) | ||||||
|  |   except Exception: | ||||||
|  |     # can't install the crash handler becuase sys.excepthook doesn't play nice | ||||||
|  |     # with threads, so catch it here. | ||||||
|  |     crash.capture_exception() | ||||||
|  |     raise | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def nativelauncher(pargs, cwd): | ||||||
|  |   # exec the process | ||||||
|  |   os.chdir(cwd) | ||||||
|  |   os.execvp(pargs[0], pargs) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def join_process(process, timeout): | ||||||
|  |   # Process().join(timeout) will hang due to a python 3 bug: https://bugs.python.org/issue28382 | ||||||
|  |   # We have to poll the exitcode instead | ||||||
|  |   t = time.monotonic() | ||||||
|  |   while time.monotonic() - t < timeout and process.exitcode is None: | ||||||
|  |     time.sleep(0.001) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ManagerProcess(ABC): | ||||||
|  |   unkillable = False | ||||||
|  |   daemon = False | ||||||
|  |   sigkill = False | ||||||
|  |   proc = None | ||||||
|  |   name = "" | ||||||
|  | 
 | ||||||
|  |   @abstractmethod | ||||||
|  |   def prepare(self): | ||||||
|  |     pass | ||||||
|  | 
 | ||||||
|  |   @abstractmethod | ||||||
|  |   def start(self): | ||||||
|  |     pass | ||||||
|  | 
 | ||||||
|  |   def stop(self, retry=True): | ||||||
|  |     if self.proc is None: | ||||||
|  |       return | ||||||
|  | 
 | ||||||
|  |     cloudlog.info(f"killing {self.name}") | ||||||
|  | 
 | ||||||
|  |     if self.proc.exitcode is None: | ||||||
|  |       sig = signal.SIGKILL if self.sigkill else signal.SIGINT | ||||||
|  |       self.signal(sig) | ||||||
|  | 
 | ||||||
|  |       join_process(self.proc, 5) | ||||||
|  | 
 | ||||||
|  |       # If process failed to die send SIGKILL or reboot | ||||||
|  |       if self.proc.exitcode is None and retry: | ||||||
|  |         if self.unkillable: | ||||||
|  |           cloudlog.critical(f"unkillable process {self.name} failed to exit! rebooting in 15 if it doesn't die") | ||||||
|  |           join_process(self.proc, 15) | ||||||
|  | 
 | ||||||
|  |           if self.proc.exitcode is None: | ||||||
|  |             cloudlog.critical(f"unkillable process {self.name} failed to die!") | ||||||
|  |             os.system("date >> /data/unkillable_reboot") | ||||||
|  |             os.sync() | ||||||
|  |             HARDWARE.reboot() | ||||||
|  |             raise RuntimeError | ||||||
|  |         else: | ||||||
|  |           cloudlog.info(f"killing {self.name} with SIGKILL") | ||||||
|  |           self.signal(signal.SIGKILL) | ||||||
|  |           self.proc.join() | ||||||
|  | 
 | ||||||
|  |     ret = self.proc.exitcode | ||||||
|  |     cloudlog.info(f"{self.name} is dead with {ret}") | ||||||
|  | 
 | ||||||
|  |     if self.proc.exitcode is not None: | ||||||
|  |       self.proc = None | ||||||
|  | 
 | ||||||
|  |     return ret | ||||||
|  | 
 | ||||||
|  |   def signal(self, sig): | ||||||
|  |     if self.proc.exitcode is not None and self.proc.pid is not None: | ||||||
|  |       return | ||||||
|  | 
 | ||||||
|  |     cloudlog.info(f"sending signal {sig} to {self.name}") | ||||||
|  |     os.kill(self.proc.pid, sig) | ||||||
|  | 
 | ||||||
|  |   def get_process_state_msg(self): | ||||||
|  |     state = log.ManagerState.ProcessState.new_message() | ||||||
|  |     state.name = self.name | ||||||
|  |     if self.proc: | ||||||
|  |       state.running = self.proc.is_alive() | ||||||
|  |       state.pid = self.proc.pid or 0 | ||||||
|  |       state.exitCode = self.proc.exitcode or 0 | ||||||
|  |     return state | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class NativeProcess(ManagerProcess): | ||||||
|  |   def __init__(self, name, cwd, cmdline, persistent=False, driverview=False, unkillable=False, sigkill=False): | ||||||
|  |     self.name = name | ||||||
|  |     self.cwd = cwd | ||||||
|  |     self.cmdline = cmdline | ||||||
|  |     self.persistent = persistent | ||||||
|  |     self.driverview = driverview | ||||||
|  |     self.unkillable = unkillable | ||||||
|  |     self.sigkill = sigkill | ||||||
|  | 
 | ||||||
|  |   def prepare(self): | ||||||
|  |     pass | ||||||
|  | 
 | ||||||
|  |   def start(self): | ||||||
|  |     if self.proc is not None: | ||||||
|  |       return | ||||||
|  | 
 | ||||||
|  |     cwd = os.path.join(BASEDIR, self.cwd) | ||||||
|  |     cloudlog.info("starting process %s" % self.name) | ||||||
|  |     self.proc = Process(name=self.name, target=nativelauncher, args=(self.cmdline, cwd)) | ||||||
|  |     self.proc.start() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class PythonProcess(ManagerProcess): | ||||||
|  |   def __init__(self, name, module, persistent=False, driverview=False, unkillable=False, sigkill=False): | ||||||
|  |     self.name = name | ||||||
|  |     self.module = module | ||||||
|  |     self.persistent = persistent | ||||||
|  |     self.driverview = driverview | ||||||
|  |     self.unkillable = unkillable | ||||||
|  |     self.sigkill = sigkill | ||||||
|  | 
 | ||||||
|  |   def prepare(self): | ||||||
|  |     cloudlog.info("preimporting %s" % self.module) | ||||||
|  |     importlib.import_module(self.module) | ||||||
|  | 
 | ||||||
|  |   def start(self): | ||||||
|  |     if self.proc is not None: | ||||||
|  |       return | ||||||
|  | 
 | ||||||
|  |     cloudlog.info("starting python %s" % self.module) | ||||||
|  |     self.proc = Process(name=self.name, target=launcher, args=(self.module,)) | ||||||
|  |     self.proc.start() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class DaemonProcess(ManagerProcess): | ||||||
|  |   """Python process that has to stay running accross manager restart. | ||||||
|  |   This is used for athena so you don't lose SSH access when restarting manager.""" | ||||||
|  |   def __init__(self, name, module, param_name): | ||||||
|  |     self.name = name | ||||||
|  |     self.module = module | ||||||
|  |     self.param_name = param_name | ||||||
|  |     self.persistent = True | ||||||
|  | 
 | ||||||
|  |   def prepare(self): | ||||||
|  |     pass | ||||||
|  | 
 | ||||||
|  |   def start(self): | ||||||
|  |     params = Params() | ||||||
|  |     pid = params.get(self.param_name, encoding='utf-8') | ||||||
|  | 
 | ||||||
|  |     if pid is not None: | ||||||
|  |       try: | ||||||
|  |         os.kill(int(pid), 0) | ||||||
|  |         with open(f'/proc/{pid}/cmdline') as f: | ||||||
|  |           if self.module in f.read(): | ||||||
|  |             # daemon is running | ||||||
|  |             return | ||||||
|  |       except (OSError, FileNotFoundError): | ||||||
|  |         # process is dead | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     cloudlog.info("starting daemon %s" % self.name) | ||||||
|  |     proc = subprocess.Popen(['python', '-m', self.module],  # pylint: disable=subprocess-popen-preexec-fn | ||||||
|  |                                stdin=open('/dev/null', 'r'), | ||||||
|  |                                stdout=open('/dev/null', 'w'), | ||||||
|  |                                stderr=open('/dev/null', 'w'), | ||||||
|  |                                preexec_fn=os.setpgrp) | ||||||
|  | 
 | ||||||
|  |     params.put(self.param_name, str(proc.pid)) | ||||||
|  | 
 | ||||||
|  |   def stop(self, retry=True): | ||||||
|  |     pass | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def ensure_running(procs, started, driverview=False, not_run=None): | ||||||
|  |   if not_run is None: | ||||||
|  |     not_run = [] | ||||||
|  | 
 | ||||||
|  |   # TODO: can we do this in parallel? | ||||||
|  |   for p in procs: | ||||||
|  |     if p.name in not_run: | ||||||
|  |       p.stop() | ||||||
|  |     elif p.persistent: | ||||||
|  |       p.start() | ||||||
|  |     elif p.driverview and driverview: | ||||||
|  |       p.start() | ||||||
|  |     elif started: | ||||||
|  |       p.start() | ||||||
|  |     else: | ||||||
|  |       p.stop() | ||||||
| @ -0,0 +1,48 @@ | |||||||
|  | from selfdrive.manager.process import PythonProcess, NativeProcess, DaemonProcess | ||||||
|  | from selfdrive.hardware import EON, TICI, PC | ||||||
|  | 
 | ||||||
|  | procs = [ | ||||||
|  |   DaemonProcess("manage_athenad", "selfdrive.athena.manage_athenad", "AthenadPid"), | ||||||
|  |   # due to qualcomm kernel bugs SIGKILLing camerad sometimes causes page table corruption | ||||||
|  |   NativeProcess("camerad", "selfdrive/camerad", ["./camerad"], unkillable=True, driverview=True), | ||||||
|  |   NativeProcess("clocksd", "selfdrive/clocksd", ["./clocksd"]), | ||||||
|  |   NativeProcess("dmonitoringmodeld", "selfdrive/modeld", ["./dmonitoringmodeld"], driverview=True), | ||||||
|  |   NativeProcess("logcatd", "selfdrive/logcatd", ["./logcatd"]), | ||||||
|  |   NativeProcess("loggerd", "selfdrive/loggerd", ["./loggerd"]), | ||||||
|  |   NativeProcess("modeld", "selfdrive/modeld", ["./modeld"]), | ||||||
|  |   NativeProcess("proclogd", "selfdrive/proclogd", ["./proclogd"]), | ||||||
|  |   NativeProcess("ubloxd", "selfdrive/locationd", ["./ubloxd"]), | ||||||
|  |   NativeProcess("ui", "selfdrive/ui", ["./ui"], persistent=True), | ||||||
|  |   PythonProcess("calibrationd", "selfdrive.locationd.calibrationd"), | ||||||
|  |   PythonProcess("controlsd", "selfdrive.controls.controlsd"), | ||||||
|  |   PythonProcess("deleter", "selfdrive.loggerd.deleter", persistent=True), | ||||||
|  |   PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", driverview=True), | ||||||
|  |   PythonProcess("locationd", "selfdrive.locationd.locationd"), | ||||||
|  |   PythonProcess("logmessaged", "selfdrive.logmessaged", persistent=True), | ||||||
|  |   PythonProcess("pandad", "selfdrive.pandad", persistent=True), | ||||||
|  |   PythonProcess("paramsd", "selfdrive.locationd.paramsd"), | ||||||
|  |   PythonProcess("plannerd", "selfdrive.controls.plannerd"), | ||||||
|  |   PythonProcess("radard", "selfdrive.controls.radard"), | ||||||
|  |   PythonProcess("thermald", "selfdrive.thermald.thermald", persistent=True), | ||||||
|  |   PythonProcess("uploader", "selfdrive.loggerd.uploader", persistent=True), | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | if not PC: | ||||||
|  |   procs += [ | ||||||
|  |     NativeProcess("sensord", "selfdrive/sensord", ["./sensord"], persistent=EON, sigkill=EON), | ||||||
|  |     PythonProcess("tombstoned", "selfdrive.tombstoned", persistent=True), | ||||||
|  |     PythonProcess("updated", "selfdrive.updated", persistent=True), | ||||||
|  |   ] | ||||||
|  | 
 | ||||||
|  | if TICI: | ||||||
|  |   procs += [ | ||||||
|  |     PythonProcess("timezoned", "selfdrive.timezoned", persistent=True), | ||||||
|  |   ] | ||||||
|  | 
 | ||||||
|  | if EON: | ||||||
|  |   procs += [ | ||||||
|  |     PythonProcess("rtshield", "selfdrive.rtshield"), | ||||||
|  |   ] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | managed_processes = {p.name: p for p in procs} | ||||||
					Loading…
					
					
				
		Reference in new issue