@ -4,6 +4,7 @@ import signal
import struct
import struct
import time
import time
import subprocess
import subprocess
from typing import Optional , List , ValuesView
from abc import ABC , abstractmethod
from abc import ABC , abstractmethod
from multiprocessing import Process
from multiprocessing import Process
@ -22,7 +23,7 @@ WATCHDOG_FN = "/dev/shm/wd_"
ENABLE_WATCHDOG = os . getenv ( " NO_WATCHDOG " ) is None
ENABLE_WATCHDOG = os . getenv ( " NO_WATCHDOG " ) is None
def launcher ( proc , name ) :
def launcher ( proc : str , name : str ) - > None :
try :
try :
# import the process
# import the process
mod = importlib . import_module ( proc )
mod = importlib . import_module ( proc )
@ -37,7 +38,7 @@ def launcher(proc, name):
cloudlog . bind ( daemon = name )
cloudlog . bind ( daemon = name )
# exec the process
# exec the process
mod . main ( )
getattr ( mod , ' main ' ) ( )
except KeyboardInterrupt :
except KeyboardInterrupt :
cloudlog . warning ( f " child { proc } got SIGINT " )
cloudlog . warning ( f " child { proc } got SIGINT " )
except Exception :
except Exception :
@ -47,13 +48,13 @@ def launcher(proc, name):
raise
raise
def nativelauncher ( pargs , cwd ) :
def nativelauncher ( pargs : List [ str ] , cwd : str ) - > None :
# exec the process
# exec the process
os . chdir ( cwd )
os . chdir ( cwd )
os . execvp ( pargs [ 0 ] , pargs )
os . execvp ( pargs [ 0 ] , pargs )
def join_process ( process , timeout ) :
def join_process ( process : Process , timeout : float ) - > None :
# Process().join(timeout) will hang due to a python 3 bug: https://bugs.python.org/issue28382
# Process().join(timeout) will hang due to a python 3 bug: https://bugs.python.org/issue28382
# We have to poll the exitcode instead
# We have to poll the exitcode instead
t = time . monotonic ( )
t = time . monotonic ( )
@ -65,7 +66,8 @@ class ManagerProcess(ABC):
unkillable = False
unkillable = False
daemon = False
daemon = False
sigkill = False
sigkill = False
proc = None
persistent = False
proc : Optional [ Process ] = None
enabled = True
enabled = True
name = " "
name = " "
@ -75,24 +77,25 @@ class ManagerProcess(ABC):
shutting_down = False
shutting_down = False
@abstractmethod
@abstractmethod
def prepare ( self ) :
def prepare ( self ) - > None :
pass
pass
@abstractmethod
@abstractmethod
def start ( self ) :
def start ( self ) - > None :
pass
pass
def restart ( self ) :
def restart ( self ) - > None :
self . stop ( )
self . stop ( )
self . start ( )
self . start ( )
def check_watchdog ( self , started ) :
def check_watchdog ( self , started : bool ) - > None :
if self . watchdog_max_dt is None or self . proc is None :
if self . watchdog_max_dt is None or self . proc is None :
return
return
try :
try :
fn = WATCHDOG_FN + str ( self . proc . pid )
fn = WATCHDOG_FN + str ( self . proc . pid )
self . last_watchdog_time = struct . unpack ( ' Q ' , open ( fn , " rb " ) . read ( ) ) [ 0 ]
# TODO: why can't pylint find struct.unpack?
self . last_watchdog_time = struct . unpack ( ' Q ' , open ( fn , " rb " ) . read ( ) ) [ 0 ] # pylint: disable=no-member
except Exception :
except Exception :
pass
pass
@ -106,9 +109,9 @@ class ManagerProcess(ABC):
else :
else :
self . watchdog_seen = True
self . watchdog_seen = True
def stop ( self , retry = True , block = True ) :
def stop ( self , retry : bool = True , block : bool = True ) - > Optional [ int ] :
if self . proc is None :
if self . proc is None :
return
return None
if self . proc . exitcode is None :
if self . proc . exitcode is None :
if not self . shutting_down :
if not self . shutting_down :
@ -118,7 +121,7 @@ class ManagerProcess(ABC):
self . shutting_down = True
self . shutting_down = True
if not block :
if not block :
return
return None
join_process ( self . proc , 5 )
join_process ( self . proc , 5 )
@ -148,7 +151,7 @@ class ManagerProcess(ABC):
return ret
return ret
def signal ( self , sig ) :
def signal ( self , sig : int ) - > None :
if self . proc is None :
if self . proc is None :
return
return
@ -156,6 +159,10 @@ class ManagerProcess(ABC):
if self . proc . exitcode is not None and self . proc . pid is not None :
if self . proc . exitcode is not None and self . proc . pid is not None :
return
return
# Can't signal if we don't have a pid
if self . proc . pid is None :
return
cloudlog . info ( f " sending signal { sig } to { self . name } " )
cloudlog . info ( f " sending signal { sig } to { self . name } " )
os . kill ( self . proc . pid , sig )
os . kill ( self . proc . pid , sig )
@ -182,10 +189,10 @@ class NativeProcess(ManagerProcess):
self . sigkill = sigkill
self . sigkill = sigkill
self . watchdog_max_dt = watchdog_max_dt
self . watchdog_max_dt = watchdog_max_dt
def prepare ( self ) :
def prepare ( self ) - > None :
pass
pass
def start ( self ) :
def start ( self ) - > None :
# In case we only tried a non blocking stop we need to stop it before restarting
# In case we only tried a non blocking stop we need to stop it before restarting
if self . shutting_down :
if self . shutting_down :
self . stop ( )
self . stop ( )
@ -212,12 +219,12 @@ class PythonProcess(ManagerProcess):
self . sigkill = sigkill
self . sigkill = sigkill
self . watchdog_max_dt = watchdog_max_dt
self . watchdog_max_dt = watchdog_max_dt
def prepare ( self ) :
def prepare ( self ) - > None :
if self . enabled :
if self . enabled :
cloudlog . info ( f " preimporting { self . module } " )
cloudlog . info ( f " preimporting { self . module } " )
importlib . import_module ( self . module )
importlib . import_module ( self . module )
def start ( self ) :
def start ( self ) - > None :
# In case we only tried a non blocking stop we need to stop it before restarting
# In case we only tried a non blocking stop we need to stop it before restarting
if self . shutting_down :
if self . shutting_down :
self . stop ( )
self . stop ( )
@ -242,10 +249,10 @@ class DaemonProcess(ManagerProcess):
self . enabled = enabled
self . enabled = enabled
self . persistent = True
self . persistent = True
def prepare ( self ) :
def prepare ( self ) - > None :
pass
pass
def start ( self ) :
def start ( self ) - > None :
params = Params ( )
params = Params ( )
pid = params . get ( self . param_name , encoding = ' utf-8 ' )
pid = params . get ( self . param_name , encoding = ' utf-8 ' )
@ -269,11 +276,11 @@ class DaemonProcess(ManagerProcess):
params . put ( self . param_name , str ( proc . pid ) )
params . put ( self . param_name , str ( proc . pid ) )
def stop ( self , retry = True , block = True ) :
def stop ( self , retry = True , block = True ) - > None :
pass
pass
def ensure_running ( procs , started , driverview = False , not_run = None ) :
def ensure_running ( procs : ValuesView [ ManagerProcess ] , started : bool , driverview : bool = False , not_run : Optional [ List [ str ] ] = None ) - > None :
if not_run is None :
if not_run is None :
not_run = [ ]
not_run = [ ]
@ -284,7 +291,8 @@ def ensure_running(procs, started, driverview=False, not_run=None):
p . stop ( block = False )
p . stop ( block = False )
elif p . persistent :
elif p . persistent :
p . start ( )
p . start ( )
elif p . driverview and driverview :
elif getattr ( p , ' driverview ' , False ) and driverview :
# TODO: why is driverview an argument here? can this be done with the name?
p . start ( )
p . start ( )
elif started :
elif started :
p . start ( )
p . start ( )