@ -68,6 +68,7 @@ class ManagerProcess(ABC):
last_watchdog_time = 0
last_watchdog_time = 0
watchdog_max_dt = None
watchdog_max_dt = None
watchdog_seen = False
watchdog_seen = False
shutting_down = False
@abstractmethod
@abstractmethod
def prepare ( self ) :
def prepare ( self ) :
@ -101,15 +102,19 @@ class ManagerProcess(ABC):
else :
else :
self . watchdog_seen = True
self . watchdog_seen = True
def stop ( self , retry = True ) :
def stop ( self , retry = True , block = True ) :
if self . proc is None :
if self . proc is None :
return
return
cloudlog . info ( f " killing { self . name } " )
if self . proc . exitcode is None :
if self . proc . exitcode is None :
sig = signal . SIGKILL if self . sigkill else signal . SIGINT
if not self . shutting_down :
self . signal ( sig )
cloudlog . info ( f " killing { self . name } " )
sig = signal . SIGKILL if self . sigkill else signal . SIGINT
self . signal ( sig )
self . shutting_down = True
if not block :
return
join_process ( self . proc , 5 )
join_process ( self . proc , 5 )
@ -134,6 +139,7 @@ class ManagerProcess(ABC):
cloudlog . info ( f " { self . name } is dead with { ret } " )
cloudlog . info ( f " { self . name } is dead with { ret } " )
if self . proc . exitcode is not None :
if self . proc . exitcode is not None :
self . shutting_down = False
self . proc = None
self . proc = None
return ret
return ret
@ -175,6 +181,10 @@ class NativeProcess(ManagerProcess):
pass
pass
def start ( self ) :
def start ( self ) :
# In case we only tried a non blocking stop we need to stop it before restarting
if self . shutting_down :
self . stop ( )
if self . proc is not None :
if self . proc is not None :
return
return
@ -183,6 +193,7 @@ class NativeProcess(ManagerProcess):
self . proc = Process ( name = self . name , target = nativelauncher , args = ( self . cmdline , cwd ) )
self . proc = Process ( name = self . name , target = nativelauncher , args = ( self . cmdline , cwd ) )
self . proc . start ( )
self . proc . start ( )
self . watchdog_seen = False
self . watchdog_seen = False
self . shutting_down = False
class PythonProcess ( ManagerProcess ) :
class PythonProcess ( ManagerProcess ) :
@ -202,6 +213,10 @@ class PythonProcess(ManagerProcess):
importlib . import_module ( self . module )
importlib . import_module ( self . module )
def start ( self ) :
def start ( self ) :
# In case we only tried a non blocking stop we need to stop it before restarting
if self . shutting_down :
self . stop ( )
if self . proc is not None :
if self . proc is not None :
return
return
@ -209,6 +224,7 @@ class PythonProcess(ManagerProcess):
self . proc = Process ( name = self . name , target = launcher , args = ( self . module , ) )
self . proc = Process ( name = self . name , target = launcher , args = ( self . module , ) )
self . proc . start ( )
self . proc . start ( )
self . watchdog_seen = False
self . watchdog_seen = False
self . shutting_down = False
class DaemonProcess ( ManagerProcess ) :
class DaemonProcess ( ManagerProcess ) :
@ -248,7 +264,7 @@ class DaemonProcess(ManagerProcess):
params . put ( self . param_name , str ( proc . pid ) )
params . put ( self . param_name , str ( proc . pid ) )
def stop ( self , retry = True ) :
def stop ( self , retry = True , block = True ) :
pass
pass
@ -256,12 +272,11 @@ def ensure_running(procs, started, driverview=False, not_run=None):
if not_run is None :
if not_run is None :
not_run = [ ]
not_run = [ ]
# TODO: can we do this in parallel?
for p in procs :
for p in procs :
if p . name in not_run :
if p . name in not_run :
p . stop ( )
p . stop ( block = False )
elif not p . enabled :
elif not p . enabled :
p . stop ( )
p . stop ( block = False )
elif p . persistent :
elif p . persistent :
p . start ( )
p . start ( )
elif p . driverview and driverview :
elif p . driverview and driverview :
@ -269,7 +284,7 @@ def ensure_running(procs, started, driverview=False, not_run=None):
elif started :
elif started :
p . start ( )
p . start ( )
else :
else :
p . stop ( )
p . stop ( block = False )
p . check_watchdog ( started )
p . check_watchdog ( started )