|
|
|
@ -32,13 +32,14 @@ import fcntl |
|
|
|
|
import time |
|
|
|
|
import threading |
|
|
|
|
from pathlib import Path |
|
|
|
|
from typing import List, Tuple, Optional |
|
|
|
|
|
|
|
|
|
from common.hardware import ANDROID |
|
|
|
|
from common.basedir import BASEDIR |
|
|
|
|
from common.params import Params |
|
|
|
|
from selfdrive.swaglog import cloudlog |
|
|
|
|
from selfdrive.controls.lib.alertmanager import set_offroad_alert |
|
|
|
|
|
|
|
|
|
TEST_IP = os.getenv("UPDATER_TEST_IP", "8.8.8.8") |
|
|
|
|
LOCK_FILE = os.getenv("UPDATER_LOCK_FILE", "/tmp/safe_staging_overlay.lock") |
|
|
|
|
STAGING_ROOT = os.getenv("UPDATER_STAGING_ROOT", "/data/safe_staging") |
|
|
|
|
|
|
|
|
@ -60,7 +61,7 @@ class WaitTimeHelper: |
|
|
|
|
signal.signal(signal.SIGINT, self.graceful_shutdown) |
|
|
|
|
signal.signal(signal.SIGHUP, self.update_now) |
|
|
|
|
|
|
|
|
|
def graceful_shutdown(self, signum, frame): |
|
|
|
|
def graceful_shutdown(self, signum: int, frame) -> None: |
|
|
|
|
# umount -f doesn't appear effective in avoiding "device busy" on NEOS, |
|
|
|
|
# so don't actually die until the next convenient opportunity in main(). |
|
|
|
|
cloudlog.info("caught SIGINT/SIGTERM, dismounting overlay at next opportunity") |
|
|
|
@ -73,35 +74,42 @@ class WaitTimeHelper: |
|
|
|
|
self.shutdown = True |
|
|
|
|
self.ready_event.set() |
|
|
|
|
|
|
|
|
|
def update_now(self, signum, frame): |
|
|
|
|
def update_now(self, signum: int, frame) -> None: |
|
|
|
|
cloudlog.info("caught SIGHUP, running update check immediately") |
|
|
|
|
self.ready_event.set() |
|
|
|
|
|
|
|
|
|
def sleep(self, t): |
|
|
|
|
def sleep(self, t: float) -> None: |
|
|
|
|
self.ready_event.wait(timeout=t) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run(cmd, cwd=None, low_priority=False): |
|
|
|
|
def run(cmd: List[str], cwd: Optional[str] = None, low_priority: bool = False): |
|
|
|
|
if low_priority: |
|
|
|
|
cmd = ["nice", "-n", "19"] + cmd |
|
|
|
|
return subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT, encoding='utf8') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_consistent_flag(consistent): |
|
|
|
|
os.system("sync") |
|
|
|
|
def set_consistent_flag(consistent: bool) -> None: |
|
|
|
|
os.sync() |
|
|
|
|
consistent_file = Path(os.path.join(FINALIZED, ".overlay_consistent")) |
|
|
|
|
if consistent: |
|
|
|
|
consistent_file.touch() |
|
|
|
|
elif not consistent and consistent_file.exists(): |
|
|
|
|
consistent_file.unlink() |
|
|
|
|
os.system("sync") |
|
|
|
|
os.sync() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_update_available_params(new_version): |
|
|
|
|
def set_params(new_version: bool, failed_count: int, exception: Optional[str]) -> None: |
|
|
|
|
params = Params() |
|
|
|
|
|
|
|
|
|
t = datetime.datetime.utcnow().isoformat() |
|
|
|
|
params.put("LastUpdateTime", t.encode('utf8')) |
|
|
|
|
params.put("UpdateFailedCount", str(failed_count)) |
|
|
|
|
if failed_count == 0: |
|
|
|
|
t = datetime.datetime.utcnow().isoformat() |
|
|
|
|
params.put("LastUpdateTime", t.encode('utf8')) |
|
|
|
|
|
|
|
|
|
if exception is None: |
|
|
|
|
params.delete("LastUpdateException") |
|
|
|
|
else: |
|
|
|
|
params.put("LastUpdateException", exception) |
|
|
|
|
|
|
|
|
|
if new_version: |
|
|
|
|
try: |
|
|
|
@ -114,13 +122,7 @@ def set_update_available_params(new_version): |
|
|
|
|
params.put("UpdateAvailable", "1") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def dismount_ovfs(): |
|
|
|
|
if os.path.ismount(OVERLAY_MERGED): |
|
|
|
|
cloudlog.error("unmounting existing overlay") |
|
|
|
|
run(["umount", "-l", OVERLAY_MERGED]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def setup_git_options(cwd): |
|
|
|
|
def setup_git_options(cwd: str) -> None: |
|
|
|
|
# We sync FS object atimes (which NEOS doesn't use) and mtimes, but ctimes |
|
|
|
|
# are outside user control. Make sure Git is set up to ignore system ctimes, |
|
|
|
|
# because they change when we make hard links during finalize. Otherwise, |
|
|
|
@ -134,66 +136,128 @@ def setup_git_options(cwd): |
|
|
|
|
("core.checkStat", "minimal"), |
|
|
|
|
] |
|
|
|
|
for option, value in git_cfg: |
|
|
|
|
try: |
|
|
|
|
ret = run(["git", "config", "--get", option], cwd) |
|
|
|
|
config_ok = ret.strip() == value |
|
|
|
|
except subprocess.CalledProcessError: |
|
|
|
|
config_ok = False |
|
|
|
|
run(["git", "config", option, value], cwd) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def dismount_overlay() -> None: |
|
|
|
|
if os.path.ismount(OVERLAY_MERGED): |
|
|
|
|
cloudlog.info("unmounting existing overlay") |
|
|
|
|
run(["umount", "-l", OVERLAY_MERGED]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not config_ok: |
|
|
|
|
cloudlog.info(f"Setting git '{option}' to '{value}'") |
|
|
|
|
run(["git", "config", option, value], cwd) |
|
|
|
|
def init_overlay() -> None: |
|
|
|
|
|
|
|
|
|
overlay_init_file = Path(os.path.join(BASEDIR, ".overlay_init")) |
|
|
|
|
|
|
|
|
|
# Re-create the overlay if BASEDIR/.git has changed since we created the overlay |
|
|
|
|
if overlay_init_file.is_file(): |
|
|
|
|
git_dir_path = os.path.join(BASEDIR, ".git") |
|
|
|
|
new_files = run(["find", git_dir_path, "-newer", str(overlay_init_file)]) |
|
|
|
|
if not len(new_files.splitlines()): |
|
|
|
|
# A valid overlay already exists |
|
|
|
|
return |
|
|
|
|
else: |
|
|
|
|
cloudlog.info(".git directory changed, recreating overlay") |
|
|
|
|
|
|
|
|
|
def init_ovfs(): |
|
|
|
|
cloudlog.info("preparing new safe staging area") |
|
|
|
|
Params().put("UpdateAvailable", "0") |
|
|
|
|
|
|
|
|
|
Params().put("UpdateAvailable", "0") |
|
|
|
|
set_consistent_flag(False) |
|
|
|
|
|
|
|
|
|
dismount_ovfs() |
|
|
|
|
dismount_overlay() |
|
|
|
|
if os.path.isdir(STAGING_ROOT): |
|
|
|
|
shutil.rmtree(STAGING_ROOT) |
|
|
|
|
|
|
|
|
|
for dirname in [STAGING_ROOT, OVERLAY_UPPER, OVERLAY_METADATA, OVERLAY_MERGED, FINALIZED]: |
|
|
|
|
for dirname in [STAGING_ROOT, OVERLAY_UPPER, OVERLAY_METADATA, OVERLAY_MERGED]: |
|
|
|
|
os.mkdir(dirname, 0o755) |
|
|
|
|
|
|
|
|
|
if not os.lstat(BASEDIR).st_dev == os.lstat(OVERLAY_MERGED).st_dev: |
|
|
|
|
if os.lstat(BASEDIR).st_dev != os.lstat(OVERLAY_MERGED).st_dev: |
|
|
|
|
raise RuntimeError("base and overlay merge directories are on different filesystems; not valid for overlay FS!") |
|
|
|
|
|
|
|
|
|
# Remove consistent flag from current BASEDIR so it's not copied over |
|
|
|
|
if os.path.isfile(os.path.join(BASEDIR, ".overlay_consistent")): |
|
|
|
|
os.remove(os.path.join(BASEDIR, ".overlay_consistent")) |
|
|
|
|
|
|
|
|
|
# Leave a timestamped canary in BASEDIR to check at startup. The device clock |
|
|
|
|
# should be correct by the time we get here. If the init file disappears, or |
|
|
|
|
# critical mtimes in BASEDIR are newer than .overlay_init, continue.sh can |
|
|
|
|
# assume that BASEDIR has used for local development or otherwise modified, |
|
|
|
|
# and skips the update activation attempt. |
|
|
|
|
Path(os.path.join(BASEDIR, ".overlay_init")).touch() |
|
|
|
|
consistent_file = Path(os.path.join(BASEDIR, ".overlay_consistent")) |
|
|
|
|
if consistent_file.is_file(): |
|
|
|
|
consistent_file.unlink() |
|
|
|
|
overlay_init_file.touch() |
|
|
|
|
|
|
|
|
|
os.system("sync") |
|
|
|
|
os.sync() |
|
|
|
|
overlay_opts = f"lowerdir={BASEDIR},upperdir={OVERLAY_UPPER},workdir={OVERLAY_METADATA}" |
|
|
|
|
run(["mount", "-t", "overlay", "-o", overlay_opts, "none", OVERLAY_MERGED]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def finalize_from_ovfs(): |
|
|
|
|
def finalize_update() -> None: |
|
|
|
|
"""Take the current OverlayFS merged view and finalize a copy outside of |
|
|
|
|
OverlayFS, ready to be swapped-in at BASEDIR. Copy using shutil.copytree""" |
|
|
|
|
|
|
|
|
|
# Remove the update ready flag and any old updates |
|
|
|
|
cloudlog.info("creating finalized version of the overlay") |
|
|
|
|
set_consistent_flag(False) |
|
|
|
|
shutil.rmtree(FINALIZED) |
|
|
|
|
|
|
|
|
|
# Copy the merged overlay view and set the update ready flag |
|
|
|
|
if os.path.exists(FINALIZED): |
|
|
|
|
shutil.rmtree(FINALIZED) |
|
|
|
|
shutil.copytree(OVERLAY_MERGED, FINALIZED, symlinks=True) |
|
|
|
|
|
|
|
|
|
set_consistent_flag(True) |
|
|
|
|
cloudlog.info("done finalizing overlay") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def attempt_update(wait_helper): |
|
|
|
|
cloudlog.info("attempting git update inside staging overlay") |
|
|
|
|
def handle_neos_update(wait_helper: WaitTimeHelper) -> None: |
|
|
|
|
with open(NEOS_VERSION, "r") as f: |
|
|
|
|
cur_neos = f.read().strip() |
|
|
|
|
|
|
|
|
|
updated_neos = run(["bash", "-c", r"unset REQUIRED_NEOS_VERSION && source launch_env.sh && \ |
|
|
|
|
echo -n $REQUIRED_NEOS_VERSION"], OVERLAY_MERGED).strip() |
|
|
|
|
|
|
|
|
|
cloudlog.info(f"NEOS version check: {cur_neos} vs {updated_neos}") |
|
|
|
|
if cur_neos == updated_neos: |
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
cloudlog.info(f"Beginning background download for NEOS {updated_neos}") |
|
|
|
|
set_offroad_alert("Offroad_NeosUpdate", True) |
|
|
|
|
|
|
|
|
|
updater_path = os.path.join(OVERLAY_MERGED, "installer/updater/updater") |
|
|
|
|
update_manifest = f"file://{OVERLAY_MERGED}/installer/updater/update.json" |
|
|
|
|
|
|
|
|
|
neos_downloaded = False |
|
|
|
|
start_time = time.monotonic() |
|
|
|
|
# Try to download for one day |
|
|
|
|
while not neos_downloaded and not wait_helper.shutdown and \ |
|
|
|
|
(time.monotonic() - start_time < 60*60*24): |
|
|
|
|
wait_helper.ready_event.clear() |
|
|
|
|
try: |
|
|
|
|
run([updater_path, "bgcache", update_manifest], OVERLAY_MERGED, low_priority=True) |
|
|
|
|
neos_downloaded = True |
|
|
|
|
except subprocess.CalledProcessError: |
|
|
|
|
cloudlog.info("NEOS background download failed, retrying") |
|
|
|
|
wait_helper.sleep(120) |
|
|
|
|
|
|
|
|
|
# If the download failed, we'll show the alert again when we retry |
|
|
|
|
set_offroad_alert("Offroad_NeosUpdate", False) |
|
|
|
|
if not neos_downloaded: |
|
|
|
|
raise Exception("Failed to download NEOS update") |
|
|
|
|
cloudlog.info(f"NEOS background download successful, took {time.monotonic() - start_time} seconds") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_git_fetch_result(fetch_txt): |
|
|
|
|
err_msg = "Failed to add the host to the list of known hosts (/data/data/com.termux/files/home/.ssh/known_hosts).\n" |
|
|
|
|
return len(fetch_txt) > 0 and (fetch_txt != err_msg) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_for_update() -> Tuple[bool, bool]: |
|
|
|
|
setup_git_options(OVERLAY_MERGED) |
|
|
|
|
try: |
|
|
|
|
git_fetch_output = run(["git", "fetch", "--dry-run"], OVERLAY_MERGED, low_priority=True) |
|
|
|
|
return True, check_git_fetch_result(git_fetch_output) |
|
|
|
|
except subprocess.CalledProcessError: |
|
|
|
|
return False, False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_update(wait_helper: WaitTimeHelper) -> bool: |
|
|
|
|
cloudlog.info("attempting git fetch inside staging overlay") |
|
|
|
|
|
|
|
|
|
setup_git_options(OVERLAY_MERGED) |
|
|
|
|
|
|
|
|
@ -203,9 +267,7 @@ def attempt_update(wait_helper): |
|
|
|
|
cur_hash = run(["git", "rev-parse", "HEAD"], OVERLAY_MERGED).rstrip() |
|
|
|
|
upstream_hash = run(["git", "rev-parse", "@{u}"], OVERLAY_MERGED).rstrip() |
|
|
|
|
new_version = cur_hash != upstream_hash |
|
|
|
|
|
|
|
|
|
err_msg = "Failed to add the host to the list of known hosts (/data/data/com.termux/files/home/.ssh/known_hosts).\n" |
|
|
|
|
git_fetch_result = len(git_fetch_output) > 0 and (git_fetch_output != err_msg) |
|
|
|
|
git_fetch_result = check_git_fetch_result(git_fetch_output) |
|
|
|
|
|
|
|
|
|
cloudlog.info("comparing %s to %s" % (cur_hash, upstream_hash)) |
|
|
|
|
if new_version or git_fetch_result: |
|
|
|
@ -221,48 +283,15 @@ def attempt_update(wait_helper): |
|
|
|
|
] |
|
|
|
|
cloudlog.info("git reset success: %s", '\n'.join(r)) |
|
|
|
|
|
|
|
|
|
# Download the accompanying NEOS version if it doesn't match the current version |
|
|
|
|
with open(NEOS_VERSION, "r") as f: |
|
|
|
|
cur_neos = f.read().strip() |
|
|
|
|
|
|
|
|
|
updated_neos = run(["bash", "-c", r"unset REQUIRED_NEOS_VERSION && source launch_env.sh && \ |
|
|
|
|
echo -n $REQUIRED_NEOS_VERSION"], OVERLAY_MERGED).strip() |
|
|
|
|
|
|
|
|
|
cloudlog.info(f"NEOS version check: {cur_neos} vs {updated_neos}") |
|
|
|
|
if cur_neos != updated_neos: |
|
|
|
|
cloudlog.info(f"Beginning background download for NEOS {updated_neos}") |
|
|
|
|
|
|
|
|
|
set_offroad_alert("Offroad_NeosUpdate", True) |
|
|
|
|
updater_path = os.path.join(OVERLAY_MERGED, "installer/updater/updater") |
|
|
|
|
update_manifest = f"file://{OVERLAY_MERGED}/installer/updater/update.json" |
|
|
|
|
|
|
|
|
|
neos_downloaded = False |
|
|
|
|
start_time = time.monotonic() |
|
|
|
|
# Try to download for one day |
|
|
|
|
while (time.monotonic() - start_time < 60*60*24) and not wait_helper.shutdown: |
|
|
|
|
wait_helper.ready_event.clear() |
|
|
|
|
try: |
|
|
|
|
run([updater_path, "bgcache", update_manifest], OVERLAY_MERGED, low_priority=True) |
|
|
|
|
neos_downloaded = True |
|
|
|
|
break |
|
|
|
|
except subprocess.CalledProcessError: |
|
|
|
|
cloudlog.info("NEOS background download failed, retrying") |
|
|
|
|
wait_helper.sleep(120) |
|
|
|
|
|
|
|
|
|
# If the download failed, we'll show the alert again when we retry |
|
|
|
|
set_offroad_alert("Offroad_NeosUpdate", False) |
|
|
|
|
if not neos_downloaded: |
|
|
|
|
raise Exception("Failed to download NEOS update") |
|
|
|
|
|
|
|
|
|
cloudlog.info(f"NEOS background download successful, took {time.monotonic() - start_time} seconds") |
|
|
|
|
if ANDROID: |
|
|
|
|
handle_neos_update(wait_helper) |
|
|
|
|
|
|
|
|
|
# Create the finalized, ready-to-swap update |
|
|
|
|
finalize_from_ovfs() |
|
|
|
|
finalize_update() |
|
|
|
|
cloudlog.info("openpilot update successful!") |
|
|
|
|
else: |
|
|
|
|
cloudlog.info("nothing new from git at this time") |
|
|
|
|
|
|
|
|
|
set_update_available_params(new_version) |
|
|
|
|
return new_version |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -272,7 +301,7 @@ def main(): |
|
|
|
|
if params.get("DisableUpdates") == b"1": |
|
|
|
|
raise RuntimeError("updates are disabled by the DisableUpdates param") |
|
|
|
|
|
|
|
|
|
if os.geteuid() != 0: |
|
|
|
|
if ANDROID and os.geteuid() != 0: |
|
|
|
|
raise RuntimeError("updated must be launched as root!") |
|
|
|
|
|
|
|
|
|
# Set low io priority |
|
|
|
@ -290,45 +319,45 @@ def main(): |
|
|
|
|
wait_helper = WaitTimeHelper(proc) |
|
|
|
|
wait_helper.sleep(30) |
|
|
|
|
|
|
|
|
|
first_run = True |
|
|
|
|
last_fetch_time = 0 |
|
|
|
|
update_failed_count = 0 |
|
|
|
|
update_available = False |
|
|
|
|
overlay_initialized = False |
|
|
|
|
|
|
|
|
|
# Run the update loop |
|
|
|
|
# * every 1m, do a lightweight internet/update check |
|
|
|
|
# * every 10m, do a full git fetch |
|
|
|
|
while not wait_helper.shutdown: |
|
|
|
|
update_now = wait_helper.ready_event.is_set() |
|
|
|
|
wait_helper.ready_event.clear() |
|
|
|
|
|
|
|
|
|
# Check for internet every 30s |
|
|
|
|
# Don't run updater while onroad or if the time's wrong |
|
|
|
|
time_wrong = datetime.datetime.utcnow().year < 2019 |
|
|
|
|
ping_failed = os.system(f"ping -W 4 -c 1 {TEST_IP}") != 0 |
|
|
|
|
if ping_failed or time_wrong: |
|
|
|
|
is_onroad = params.get("IsOffroad") != b"1" |
|
|
|
|
if is_onroad or time_wrong: |
|
|
|
|
wait_helper.sleep(30) |
|
|
|
|
cloudlog.info("not running updater, not offroad") |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
# Attempt an update |
|
|
|
|
exception = None |
|
|
|
|
new_version = False |
|
|
|
|
update_failed_count += 1 |
|
|
|
|
try: |
|
|
|
|
# Re-create the overlay if BASEDIR/.git has changed since we created the overlay |
|
|
|
|
if overlay_initialized: |
|
|
|
|
overlay_init_fn = os.path.join(BASEDIR, ".overlay_init") |
|
|
|
|
git_dir_path = os.path.join(BASEDIR, ".git") |
|
|
|
|
new_files = run(["find", git_dir_path, "-newer", overlay_init_fn]) |
|
|
|
|
|
|
|
|
|
if len(new_files.splitlines()): |
|
|
|
|
cloudlog.info(".git directory changed, recreating overlay") |
|
|
|
|
overlay_initialized = False |
|
|
|
|
|
|
|
|
|
if not overlay_initialized: |
|
|
|
|
init_ovfs() |
|
|
|
|
overlay_initialized = True |
|
|
|
|
|
|
|
|
|
if params.get("IsOffroad") == b"1": |
|
|
|
|
update_available = attempt_update(wait_helper) or update_available |
|
|
|
|
init_overlay() |
|
|
|
|
|
|
|
|
|
internet_ok, update_available = check_for_update() |
|
|
|
|
if internet_ok and not update_available: |
|
|
|
|
update_failed_count = 0 |
|
|
|
|
if not update_available and os.path.isdir(NEOSUPDATE_DIR): |
|
|
|
|
shutil.rmtree(NEOSUPDATE_DIR) |
|
|
|
|
else: |
|
|
|
|
cloudlog.info("not running updater, openpilot running") |
|
|
|
|
|
|
|
|
|
# Fetch updates at most every 10 minutes |
|
|
|
|
if internet_ok and (update_now or time.monotonic() - last_fetch_time > 60*10): |
|
|
|
|
new_version = fetch_update(wait_helper) |
|
|
|
|
update_failed_count = 0 |
|
|
|
|
last_fetch_time = time.monotonic() |
|
|
|
|
|
|
|
|
|
if first_run and not new_version and os.path.isdir(NEOSUPDATE_DIR): |
|
|
|
|
shutil.rmtree(NEOSUPDATE_DIR) |
|
|
|
|
first_run = False |
|
|
|
|
except subprocess.CalledProcessError as e: |
|
|
|
|
cloudlog.event( |
|
|
|
|
"update process failed", |
|
|
|
@ -336,21 +365,15 @@ def main(): |
|
|
|
|
output=e.output, |
|
|
|
|
returncode=e.returncode |
|
|
|
|
) |
|
|
|
|
exception = e |
|
|
|
|
overlay_initialized = False |
|
|
|
|
except Exception: |
|
|
|
|
exception = f"command failed: {e.cmd}\n{e.output}" |
|
|
|
|
except Exception as e: |
|
|
|
|
cloudlog.exception("uncaught updated exception, shouldn't happen") |
|
|
|
|
exception = str(e) |
|
|
|
|
|
|
|
|
|
params.put("UpdateFailedCount", str(update_failed_count)) |
|
|
|
|
if exception is None: |
|
|
|
|
params.delete("LastUpdateException") |
|
|
|
|
else: |
|
|
|
|
params.put("LastUpdateException", f"command failed: {exception.cmd}\n{exception.output}") |
|
|
|
|
|
|
|
|
|
# Wait 10 minutes between update attempts |
|
|
|
|
wait_helper.sleep(60*10) |
|
|
|
|
set_params(new_version, update_failed_count, exception) |
|
|
|
|
wait_helper.sleep(60) |
|
|
|
|
|
|
|
|
|
dismount_ovfs() |
|
|
|
|
dismount_overlay() |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
main() |
|
|
|
|