@ -32,13 +32,14 @@ import fcntl
import time
import threading
from pathlib import Path
from typing import List , Tuple , Optional
from common . hardware import ANDROID
from common . basedir import BASEDIR
from common . params import Params
from selfdrive . swaglog import cloudlog
from selfdrive . controls . lib . alertmanager import set_offroad_alert
TEST_IP = os . getenv ( " UPDATER_TEST_IP " , " 8.8.8.8 " )
LOCK_FILE = os . getenv ( " UPDATER_LOCK_FILE " , " /tmp/safe_staging_overlay.lock " )
STAGING_ROOT = os . getenv ( " UPDATER_STAGING_ROOT " , " /data/safe_staging " )
@ -60,7 +61,7 @@ class WaitTimeHelper:
signal . signal ( signal . SIGINT , self . graceful_shutdown )
signal . signal ( signal . SIGHUP , self . update_now )
def graceful_shutdown ( self , signum , frame ) :
def graceful_shutdown ( self , signum : int , frame ) - > None :
# umount -f doesn't appear effective in avoiding "device busy" on NEOS,
# so don't actually die until the next convenient opportunity in main().
cloudlog . info ( " caught SIGINT/SIGTERM, dismounting overlay at next opportunity " )
@ -73,35 +74,42 @@ class WaitTimeHelper:
self . shutdown = True
self . ready_event . set ( )
def update_now ( self , signum , frame ) :
def update_now ( self , signum : int , frame ) - > None :
cloudlog . info ( " caught SIGHUP, running update check immediately " )
self . ready_event . set ( )
def sleep ( self , t ) :
def sleep ( self , t : float ) - > None :
self . ready_event . wait ( timeout = t )
def run ( cmd , cwd = None , low_priority = False ) :
def run ( cmd : List [ str ] , cwd : Optional [ str ] = None , low_priority : bool = False ) :
if low_priority :
cmd = [ " nice " , " -n " , " 19 " ] + cmd
return subprocess . check_output ( cmd , cwd = cwd , stderr = subprocess . STDOUT , encoding = ' utf8 ' )
def set_consistent_flag ( consistent ) :
os . system ( " sync " )
def set_consistent_flag ( consistent : bool ) - > None :
os . sync ( )
consistent_file = Path ( os . path . join ( FINALIZED , " .overlay_consistent " ) )
if consistent :
consistent_file . touch ( )
elif not consistent and consistent_file . exists ( ) :
consistent_file . unlink ( )
os . system ( " sync " )
os . sync ( )
def set_update_available_params ( new_version ) :
def set_params ( new_version : bool , failed_count : int , exception : Optional [ str ] ) - > None :
params = Params ( )
t = datetime . datetime . utcnow ( ) . isoformat ( )
params . put ( " LastUpdateTime " , t . encode ( ' utf8 ' ) )
params . put ( " UpdateFailedCount " , str ( failed_count ) )
if failed_count == 0 :
t = datetime . datetime . utcnow ( ) . isoformat ( )
params . put ( " LastUpdateTime " , t . encode ( ' utf8 ' ) )
if exception is None :
params . delete ( " LastUpdateException " )
else :
params . put ( " LastUpdateException " , exception )
if new_version :
try :
@ -114,13 +122,7 @@ def set_update_available_params(new_version):
params . put ( " UpdateAvailable " , " 1 " )
def dismount_ovfs ( ) :
if os . path . ismount ( OVERLAY_MERGED ) :
cloudlog . error ( " unmounting existing overlay " )
run ( [ " umount " , " -l " , OVERLAY_MERGED ] )
def setup_git_options ( cwd ) :
def setup_git_options ( cwd : str ) - > None :
# We sync FS object atimes (which NEOS doesn't use) and mtimes, but ctimes
# are outside user control. Make sure Git is set up to ignore system ctimes,
# because they change when we make hard links during finalize. Otherwise,
@ -134,66 +136,128 @@ def setup_git_options(cwd):
( " core.checkStat " , " minimal " ) ,
]
for option , value in git_cfg :
try :
ret = run ( [ " git " , " config " , " --get " , option ] , cwd )
config_ok = ret . strip ( ) == value
except subprocess . CalledProcessError :
config_ok = False
run ( [ " git " , " config " , option , value ] , cwd )
def dismount_overlay ( ) - > None :
if os . path . ismount ( OVERLAY_MERGED ) :
cloudlog . info ( " unmounting existing overlay " )
run ( [ " umount " , " -l " , OVERLAY_MERGED ] )
if not config_ok :
cloudlog . info ( f " Setting git ' { option } ' to ' { value } ' " )
run ( [ " git " , " config " , option , value ] , cwd )
def init_overlay ( ) - > None :
overlay_init_file = Path ( os . path . join ( BASEDIR , " .overlay_init " ) )
# Re-create the overlay if BASEDIR/.git has changed since we created the overlay
if overlay_init_file . is_file ( ) :
git_dir_path = os . path . join ( BASEDIR , " .git " )
new_files = run ( [ " find " , git_dir_path , " -newer " , str ( overlay_init_file ) ] )
if not len ( new_files . splitlines ( ) ) :
# A valid overlay already exists
return
else :
cloudlog . info ( " .git directory changed, recreating overlay " )
def init_ovfs ( ) :
cloudlog . info ( " preparing new safe staging area " )
Params ( ) . put ( " UpdateAvailable " , " 0 " )
Params ( ) . put ( " UpdateAvailable " , " 0 " )
set_consistent_flag ( False )
dismount_ovfs ( )
dismount_overlay ( )
if os . path . isdir ( STAGING_ROOT ) :
shutil . rmtree ( STAGING_ROOT )
for dirname in [ STAGING_ROOT , OVERLAY_UPPER , OVERLAY_METADATA , OVERLAY_MERGED , FINALIZED ] :
for dirname in [ STAGING_ROOT , OVERLAY_UPPER , OVERLAY_METADATA , OVERLAY_MERGED ] :
os . mkdir ( dirname , 0o755 )
if not os . lstat ( BASEDIR ) . st_dev = = os . lstat ( OVERLAY_MERGED ) . st_dev :
if os . lstat ( BASEDIR ) . st_dev ! = os . lstat ( OVERLAY_MERGED ) . st_dev :
raise RuntimeError ( " base and overlay merge directories are on different filesystems; not valid for overlay FS! " )
# Remove consistent flag from current BASEDIR so it's not copied over
if os . path . isfile ( os . path . join ( BASEDIR , " .overlay_consistent " ) ) :
os . remove ( os . path . join ( BASEDIR , " .overlay_consistent " ) )
# Leave a timestamped canary in BASEDIR to check at startup. The device clock
# should be correct by the time we get here. If the init file disappears, or
# critical mtimes in BASEDIR are newer than .overlay_init, continue.sh can
# assume that BASEDIR has used for local development or otherwise modified,
# and skips the update activation attempt.
Path ( os . path . join ( BASEDIR , " .overlay_init " ) ) . touch ( )
consistent_file = Path ( os . path . join ( BASEDIR , " .overlay_consistent " ) )
if consistent_file . is_file ( ) :
consistent_file . unlink ( )
overlay_init_file . touch ( )
os . system ( " sync " )
os . sync ( )
overlay_opts = f " lowerdir= { BASEDIR } ,upperdir= { OVERLAY_UPPER } ,workdir= { OVERLAY_METADATA } "
run ( [ " mount " , " -t " , " overlay " , " -o " , overlay_opts , " none " , OVERLAY_MERGED ] )
def finalize_from_ovfs ( ) :
def finalize_update ( ) - > None :
""" Take the current OverlayFS merged view and finalize a copy outside of
OverlayFS , ready to be swapped - in at BASEDIR . Copy using shutil . copytree """
# Remove the update ready flag and any old updates
cloudlog . info ( " creating finalized version of the overlay " )
set_consistent_flag ( False )
shutil . rmtree ( FINALIZED )
# Copy the merged overlay view and set the update ready flag
if os . path . exists ( FINALIZED ) :
shutil . rmtree ( FINALIZED )
shutil . copytree ( OVERLAY_MERGED , FINALIZED , symlinks = True )
set_consistent_flag ( True )
cloudlog . info ( " done finalizing overlay " )
def attempt_update ( wait_helper ) :
cloudlog . info ( " attempting git update inside staging overlay " )
def handle_neos_update ( wait_helper : WaitTimeHelper ) - > None :
with open ( NEOS_VERSION , " r " ) as f :
cur_neos = f . read ( ) . strip ( )
updated_neos = run ( [ " bash " , " -c " , r " unset REQUIRED_NEOS_VERSION && source launch_env.sh && \
echo - n $ REQUIRED_NEOS_VERSION " ], OVERLAY_MERGED).strip()
cloudlog . info ( f " NEOS version check: { cur_neos } vs { updated_neos } " )
if cur_neos == updated_neos :
return
cloudlog . info ( f " Beginning background download for NEOS { updated_neos } " )
set_offroad_alert ( " Offroad_NeosUpdate " , True )
updater_path = os . path . join ( OVERLAY_MERGED , " installer/updater/updater " )
update_manifest = f " file:// { OVERLAY_MERGED } /installer/updater/update.json "
neos_downloaded = False
start_time = time . monotonic ( )
# Try to download for one day
while not neos_downloaded and not wait_helper . shutdown and \
( time . monotonic ( ) - start_time < 60 * 60 * 24 ) :
wait_helper . ready_event . clear ( )
try :
run ( [ updater_path , " bgcache " , update_manifest ] , OVERLAY_MERGED , low_priority = True )
neos_downloaded = True
except subprocess . CalledProcessError :
cloudlog . info ( " NEOS background download failed, retrying " )
wait_helper . sleep ( 120 )
# If the download failed, we'll show the alert again when we retry
set_offroad_alert ( " Offroad_NeosUpdate " , False )
if not neos_downloaded :
raise Exception ( " Failed to download NEOS update " )
cloudlog . info ( f " NEOS background download successful, took { time . monotonic ( ) - start_time } seconds " )
def check_git_fetch_result ( fetch_txt ) :
err_msg = " Failed to add the host to the list of known hosts (/data/data/com.termux/files/home/.ssh/known_hosts). \n "
return len ( fetch_txt ) > 0 and ( fetch_txt != err_msg )
def check_for_update ( ) - > Tuple [ bool , bool ] :
setup_git_options ( OVERLAY_MERGED )
try :
git_fetch_output = run ( [ " git " , " fetch " , " --dry-run " ] , OVERLAY_MERGED , low_priority = True )
return True , check_git_fetch_result ( git_fetch_output )
except subprocess . CalledProcessError :
return False , False
def fetch_update ( wait_helper : WaitTimeHelper ) - > bool :
cloudlog . info ( " attempting git fetch inside staging overlay " )
setup_git_options ( OVERLAY_MERGED )
@ -203,9 +267,7 @@ def attempt_update(wait_helper):
cur_hash = run ( [ " git " , " rev-parse " , " HEAD " ] , OVERLAY_MERGED ) . rstrip ( )
upstream_hash = run ( [ " git " , " rev-parse " , " @ {u} " ] , OVERLAY_MERGED ) . rstrip ( )
new_version = cur_hash != upstream_hash
err_msg = " Failed to add the host to the list of known hosts (/data/data/com.termux/files/home/.ssh/known_hosts). \n "
git_fetch_result = len ( git_fetch_output ) > 0 and ( git_fetch_output != err_msg )
git_fetch_result = check_git_fetch_result ( git_fetch_output )
cloudlog . info ( " comparing %s to %s " % ( cur_hash , upstream_hash ) )
if new_version or git_fetch_result :
@ -221,48 +283,15 @@ def attempt_update(wait_helper):
]
cloudlog . info ( " git reset success: %s " , ' \n ' . join ( r ) )
# Download the accompanying NEOS version if it doesn't match the current version
with open ( NEOS_VERSION , " r " ) as f :
cur_neos = f . read ( ) . strip ( )
updated_neos = run ( [ " bash " , " -c " , r " unset REQUIRED_NEOS_VERSION && source launch_env.sh && \
echo - n $ REQUIRED_NEOS_VERSION " ], OVERLAY_MERGED).strip()
cloudlog . info ( f " NEOS version check: { cur_neos } vs { updated_neos } " )
if cur_neos != updated_neos :
cloudlog . info ( f " Beginning background download for NEOS { updated_neos } " )
set_offroad_alert ( " Offroad_NeosUpdate " , True )
updater_path = os . path . join ( OVERLAY_MERGED , " installer/updater/updater " )
update_manifest = f " file:// { OVERLAY_MERGED } /installer/updater/update.json "
neos_downloaded = False
start_time = time . monotonic ( )
# Try to download for one day
while ( time . monotonic ( ) - start_time < 60 * 60 * 24 ) and not wait_helper . shutdown :
wait_helper . ready_event . clear ( )
try :
run ( [ updater_path , " bgcache " , update_manifest ] , OVERLAY_MERGED , low_priority = True )
neos_downloaded = True
break
except subprocess . CalledProcessError :
cloudlog . info ( " NEOS background download failed, retrying " )
wait_helper . sleep ( 120 )
# If the download failed, we'll show the alert again when we retry
set_offroad_alert ( " Offroad_NeosUpdate " , False )
if not neos_downloaded :
raise Exception ( " Failed to download NEOS update " )
cloudlog . info ( f " NEOS background download successful, took { time . monotonic ( ) - start_time } seconds " )
if ANDROID :
handle_neos_update ( wait_helper )
# Create the finalized, ready-to-swap update
finalize_from_ovfs ( )
finalize_update ( )
cloudlog . info ( " openpilot update successful! " )
else :
cloudlog . info ( " nothing new from git at this time " )
set_update_available_params ( new_version )
return new_version
@ -272,7 +301,7 @@ def main():
if params . get ( " DisableUpdates " ) == b " 1 " :
raise RuntimeError ( " updates are disabled by the DisableUpdates param " )
if os . geteuid ( ) != 0 :
if ANDROID and os . geteuid ( ) != 0 :
raise RuntimeError ( " updated must be launched as root! " )
# Set low io priority
@ -290,45 +319,45 @@ def main():
wait_helper = WaitTimeHelper ( proc )
wait_helper . sleep ( 30 )
first_run = True
last_fetch_time = 0
update_failed_count = 0
update_available = False
overlay_initialized = False
# Run the update loop
# * every 1m, do a lightweight internet/update check
# * every 10m, do a full git fetch
while not wait_helper . shutdown :
update_now = wait_helper . ready_event . is_set ( )
wait_helper . ready_event . clear ( )
# Check for internet every 30s
# Don't run updater while onroad or if the time's wrong
time_wrong = datetime . datetime . utcnow ( ) . year < 2019
ping_failed = os . system ( f " ping -W 4 -c 1 { TEST_IP } " ) != 0
if ping_faile d or time_wrong :
is_onroad = params . get ( " IsOffroad " ) != b " 1 "
if is_onroa d or time_wrong :
wait_helper . sleep ( 30 )
cloudlog . info ( " not running updater, not offroad " )
continue
# Attempt an update
exception = None
new_version = False
update_failed_count + = 1
try :
# Re-create the overlay if BASEDIR/.git has changed since we created the overlay
if overlay_initialized :
overlay_init_fn = os . path . join ( BASEDIR , " .overlay_init " )
git_dir_path = os . path . join ( BASEDIR , " .git " )
new_files = run ( [ " find " , git_dir_path , " -newer " , overlay_init_fn ] )
if len ( new_files . splitlines ( ) ) :
cloudlog . info ( " .git directory changed, recreating overlay " )
overlay_initialized = False
if not overlay_initialized :
init_ovfs ( )
overlay_initialized = True
if params . get ( " IsOffroad " ) == b " 1 " :
update_available = attempt_update ( wait_helper ) or update_available
init_overlay ( )
internet_ok , update_available = check_for_update ( )
if internet_ok and not update_available :
update_failed_count = 0
if not update_available and os . path . isdir ( NEOSUPDATE_DIR ) :
shutil . rmtree ( NEOSUPDATE_DIR )
else :
cloudlog . info ( " not running updater, openpilot running " )
# Fetch updates at most every 10 minutes
if internet_ok and ( update_now or time . monotonic ( ) - last_fetch_time > 60 * 10 ) :
new_version = fetch_update ( wait_helper )
update_failed_count = 0
last_fetch_time = time . monotonic ( )
if first_run and not new_version and os . path . isdir ( NEOSUPDATE_DIR ) :
shutil . rmtree ( NEOSUPDATE_DIR )
first_run = False
except subprocess . CalledProcessError as e :
cloudlog . event (
" update process failed " ,
@ -336,21 +365,15 @@ def main():
output = e . output ,
returncode = e . returncode
)
exception = e
overlay_initialized = False
except Exception :
exception = f " command failed: { e . cmd } \n { e . output } "
except Exception as e :
cloudlog . exception ( " uncaught updated exception, shouldn ' t happen " )
exception = str ( e )
params . put ( " UpdateFailedCount " , str ( update_failed_count ) )
if exception is None :
params . delete ( " LastUpdateException " )
else :
params . put ( " LastUpdateException " , f " command failed: { exception . cmd } \n { exception . output } " )
# Wait 10 minutes between update attempts
wait_helper . sleep ( 60 * 10 )
set_params ( new_version , update_failed_count , exception )
wait_helper . sleep ( 60 )
dismount_ovfs ( )
dismount_overlay ( )
if __name__ == " __main__ " :
main ( )