From b93f6ce4f6fd34d33f990e86bde22b5cec49f2da Mon Sep 17 00:00:00 2001 From: Justin Newberry Date: Fri, 8 Mar 2024 13:46:57 -0500 Subject: [PATCH] updated: prep for new updater (#31695) * wip * wip * wip * wip * wip * wip * wip * proc * release * fix * this should move here * e2e update test * that too * fix * fix * fix running in docker * don't think GHA will work * also test switching branches * it's a test * lets not delete that yet * fix * fix2 * fix * fix * tests too * fix * cleanup / init * test agnos update * test agnos * move this back up * no diff --- selfdrive/updated/common.py | 100 ++++++ selfdrive/updated/git.py | 241 ++++++++++++++ .../tests/{test_updated.py => test_base.py} | 40 ++- selfdrive/updated/tests/test_git.py | 22 ++ selfdrive/updated/updated.py | 300 +++--------------- 5 files changed, 419 insertions(+), 284 deletions(-) create mode 100644 selfdrive/updated/common.py create mode 100644 selfdrive/updated/git.py rename selfdrive/updated/tests/{test_updated.py => test_base.py} (86%) mode change 100755 => 100644 create mode 100644 selfdrive/updated/tests/test_git.py diff --git a/selfdrive/updated/common.py b/selfdrive/updated/common.py new file mode 100644 index 0000000000..c522e075d5 --- /dev/null +++ b/selfdrive/updated/common.py @@ -0,0 +1,100 @@ +import abc +import os + +from pathlib import Path +import subprocess +from typing import List + +from markdown_it import MarkdownIt +from openpilot.common.params import Params +from openpilot.common.swaglog import cloudlog + + +LOCK_FILE = os.getenv("UPDATER_LOCK_FILE", "/tmp/safe_staging_overlay.lock") +STAGING_ROOT = os.getenv("UPDATER_STAGING_ROOT", "/data/safe_staging") +FINALIZED = os.path.join(STAGING_ROOT, "finalized") + + +def run(cmd: list[str], cwd: str = None) -> str: + return subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT, encoding='utf8') + + +class UpdateStrategy(abc.ABC): + def __init__(self): + self.params = Params() + + @abc.abstractmethod + def init(self) -> None: + pass + + @abc.abstractmethod + def cleanup(self) -> None: + pass + + @abc.abstractmethod + def get_available_channels(self) -> List[str]: + """List of available channels to install, (branches, releases, etc)""" + + @abc.abstractmethod + def current_channel(self) -> str: + """Current channel installed""" + + @abc.abstractmethod + def fetched_path(self) -> str: + """Path to the fetched update""" + + @property + def target_channel(self) -> str: + """Target Channel""" + b: str | None = self.params.get("UpdaterTargetBranch", encoding='utf-8') + if b is None: + b = self.current_channel() + return b + + @abc.abstractmethod + def update_ready(self) -> bool: + """Check if an update is ready to be installed""" + + @abc.abstractmethod + def update_available(self) -> bool: + """Check if an update is available for the current channel""" + + @abc.abstractmethod + def describe_current_channel(self) -> tuple[str, str]: + """Describe the current channel installed, (description, release_notes)""" + + @abc.abstractmethod + def describe_ready_channel(self) -> tuple[str, str]: + """Describe the channel that is ready to be installed, (description, release_notes)""" + + @abc.abstractmethod + def fetch_update(self) -> None: + pass + + @abc.abstractmethod + def finalize_update(self) -> None: + pass + + +def set_consistent_flag(consistent: bool) -> None: + os.sync() + consistent_file = Path(os.path.join(FINALIZED, ".overlay_consistent")) + if consistent: + consistent_file.touch() + elif not consistent: + consistent_file.unlink(missing_ok=True) + os.sync() + + +def parse_release_notes(releases_md: str) -> str: + try: + r = releases_md.split('\n\n', 1)[0] # Slice latest release notes + try: + return str(MarkdownIt().render(r)) + except Exception: + return r + "\n" + except FileNotFoundError: + pass + except Exception: + cloudlog.exception("failed to parse release notes") + return "" diff --git a/selfdrive/updated/git.py b/selfdrive/updated/git.py new file mode 100644 index 0000000000..29f95eeeff --- /dev/null +++ b/selfdrive/updated/git.py @@ -0,0 +1,241 @@ +import datetime +import os +import re +import shutil +import subprocess +import time + +from collections import defaultdict +from pathlib import Path +from typing import List + +from openpilot.common.basedir import BASEDIR +from openpilot.common.params import Params +from openpilot.common.swaglog import cloudlog +from openpilot.selfdrive.updated.common import FINALIZED, STAGING_ROOT, UpdateStrategy, parse_release_notes, set_consistent_flag, run + + +OVERLAY_UPPER = os.path.join(STAGING_ROOT, "upper") +OVERLAY_METADATA = os.path.join(STAGING_ROOT, "metadata") +OVERLAY_MERGED = os.path.join(STAGING_ROOT, "merged") +OVERLAY_INIT = Path(os.path.join(BASEDIR, ".overlay_init")) + + +def setup_git_options(cwd: str) -> None: + # We sync FS object atimes (which NEOS doesn't use) and mtimes, but ctimes + # are outside user control. Make sure Git is set up to ignore system ctimes, + # because they change when we make hard links during finalize. Otherwise, + # there is a lot of unnecessary churn. This appears to be a common need on + # OSX as well: https://www.git-tower.com/blog/make-git-rebase-safe-on-osx/ + + # We are using copytree to copy the directory, which also changes + # inode numbers. Ignore those changes too. + + # Set protocol to the new version (default after git 2.26) to reduce data + # usage on git fetch --dry-run from about 400KB to 18KB. + git_cfg = [ + ("core.trustctime", "false"), + ("core.checkStat", "minimal"), + ("protocol.version", "2"), + ("gc.auto", "0"), + ("gc.autoDetach", "false"), + ] + for option, value in git_cfg: + run(["git", "config", option, value], cwd) + + +def dismount_overlay() -> None: + if os.path.ismount(OVERLAY_MERGED): + cloudlog.info("unmounting existing overlay") + run(["sudo", "umount", "-l", OVERLAY_MERGED]) + + +def init_overlay() -> None: + + # Re-create the overlay if BASEDIR/.git has changed since we created the overlay + if OVERLAY_INIT.is_file() and os.path.ismount(OVERLAY_MERGED): + git_dir_path = os.path.join(BASEDIR, ".git") + new_files = run(["find", git_dir_path, "-newer", str(OVERLAY_INIT)]) + if not len(new_files.splitlines()): + # A valid overlay already exists + return + else: + cloudlog.info(".git directory changed, recreating overlay") + + cloudlog.info("preparing new safe staging area") + + params = Params() + params.put_bool("UpdateAvailable", False) + set_consistent_flag(False) + dismount_overlay() + run(["sudo", "rm", "-rf", STAGING_ROOT]) + if os.path.isdir(STAGING_ROOT): + shutil.rmtree(STAGING_ROOT) + + for dirname in [STAGING_ROOT, OVERLAY_UPPER, OVERLAY_METADATA, OVERLAY_MERGED]: + os.mkdir(dirname, 0o755) + + if os.lstat(BASEDIR).st_dev != os.lstat(OVERLAY_MERGED).st_dev: + raise RuntimeError("base and overlay merge directories are on different filesystems; not valid for overlay FS!") + + # Leave a timestamped canary in BASEDIR to check at startup. The device clock + # should be correct by the time we get here. If the init file disappears, or + # critical mtimes in BASEDIR are newer than .overlay_init, continue.sh can + # assume that BASEDIR has used for local development or otherwise modified, + # and skips the update activation attempt. + consistent_file = Path(os.path.join(BASEDIR, ".overlay_consistent")) + if consistent_file.is_file(): + consistent_file.unlink() + OVERLAY_INIT.touch() + + os.sync() + overlay_opts = f"lowerdir={BASEDIR},upperdir={OVERLAY_UPPER},workdir={OVERLAY_METADATA}" + + mount_cmd = ["mount", "-t", "overlay", "-o", overlay_opts, "none", OVERLAY_MERGED] + run(["sudo"] + mount_cmd) + run(["sudo", "chmod", "755", os.path.join(OVERLAY_METADATA, "work")]) + + git_diff = run(["git", "diff"], OVERLAY_MERGED) + params.put("GitDiff", git_diff) + cloudlog.info(f"git diff output:\n{git_diff}") + + +class GitUpdateStrategy(UpdateStrategy): + + def init(self) -> None: + init_overlay() + + def cleanup(self) -> None: + OVERLAY_INIT.unlink(missing_ok=True) + + def sync_branches(self): + excluded_branches = ('release2', 'release2-staging') + + output = run(["git", "ls-remote", "--heads"], OVERLAY_MERGED) + + self.branches = defaultdict(lambda: None) + for line in output.split('\n'): + ls_remotes_re = r'(?P\b[0-9a-f]{5,40}\b)(\s+)(refs\/heads\/)(?P.*$)' + x = re.fullmatch(ls_remotes_re, line.strip()) + if x is not None and x.group('branch_name') not in excluded_branches: + self.branches[x.group('branch_name')] = x.group('commit_sha') + + return self.branches + + def get_available_channels(self) -> List[str]: + self.sync_branches() + return list(self.branches.keys()) + + def update_ready(self) -> bool: + consistent_file = Path(os.path.join(FINALIZED, ".overlay_consistent")) + if consistent_file.is_file(): + hash_mismatch = self.get_commit_hash(BASEDIR) != self.branches[self.target_channel] + branch_mismatch = self.get_branch(BASEDIR) != self.target_channel + on_target_channel = self.get_branch(FINALIZED) == self.target_channel + return ((hash_mismatch or branch_mismatch) and on_target_channel) + return False + + def update_available(self) -> bool: + if os.path.isdir(OVERLAY_MERGED) and len(self.get_available_channels()) > 0: + hash_mismatch = self.get_commit_hash(OVERLAY_MERGED) != self.branches[self.target_channel] + branch_mismatch = self.get_branch(OVERLAY_MERGED) != self.target_channel + return hash_mismatch or branch_mismatch + return False + + def get_branch(self, path: str) -> str: + return run(["git", "rev-parse", "--abbrev-ref", "HEAD"], path).rstrip() + + def get_commit_hash(self, path) -> str: + return run(["git", "rev-parse", "HEAD"], path).rstrip() + + def get_current_channel(self) -> str: + return self.get_branch(BASEDIR) + + def current_channel(self) -> str: + return self.get_branch(BASEDIR) + + def describe_branch(self, basedir) -> str: + if not os.path.exists(basedir): + return "" + + version = "" + branch = "" + commit = "" + commit_date = "" + try: + branch = self.get_branch(basedir) + commit = self.get_commit_hash(basedir)[:7] + with open(os.path.join(basedir, "common", "version.h")) as f: + version = f.read().split('"')[1] + + commit_unix_ts = run(["git", "show", "-s", "--format=%ct", "HEAD"], basedir).rstrip() + dt = datetime.datetime.fromtimestamp(int(commit_unix_ts)) + commit_date = dt.strftime("%b %d") + except Exception: + cloudlog.exception("updater.get_description") + return f"{version} / {branch} / {commit} / {commit_date}" + + def release_notes_branch(self, basedir) -> str: + with open(os.path.join(basedir, "RELEASES.md"), "r") as f: + return parse_release_notes(f.read()) + + def describe_current_channel(self) -> tuple[str, str]: + return self.describe_branch(BASEDIR), self.release_notes_branch(BASEDIR) + + def describe_ready_channel(self) -> tuple[str, str]: + if self.update_ready(): + return self.describe_branch(FINALIZED), self.release_notes_branch(FINALIZED) + + return "", "" + + def fetch_update(self): + cloudlog.info("attempting git fetch inside staging overlay") + + setup_git_options(OVERLAY_MERGED) + + branch = self.target_channel + git_fetch_output = run(["git", "fetch", "origin", branch], OVERLAY_MERGED) + cloudlog.info("git fetch success: %s", git_fetch_output) + + cloudlog.info("git reset in progress") + cmds = [ + ["git", "checkout", "--force", "--no-recurse-submodules", "-B", branch, "FETCH_HEAD"], + ["git", "reset", "--hard"], + ["git", "clean", "-xdff"], + ["git", "submodule", "sync"], + ["git", "submodule", "update", "--init", "--recursive"], + ["git", "submodule", "foreach", "--recursive", "git", "reset", "--hard"], + ] + r = [run(cmd, OVERLAY_MERGED) for cmd in cmds] + cloudlog.info("git reset success: %s", '\n'.join(r)) + + def fetched_path(self): + return str(OVERLAY_MERGED) + + def finalize_update(self) -> None: + """Take the current OverlayFS merged view and finalize a copy outside of + OverlayFS, ready to be swapped-in at BASEDIR. Copy using shutil.copytree""" + + # Remove the update ready flag and any old updates + cloudlog.info("creating finalized version of the overlay") + set_consistent_flag(False) + + # Copy the merged overlay view and set the update ready flag + if os.path.exists(FINALIZED): + shutil.rmtree(FINALIZED) + shutil.copytree(OVERLAY_MERGED, FINALIZED, symlinks=True) + + run(["git", "reset", "--hard"], FINALIZED) + run(["git", "submodule", "foreach", "--recursive", "git", "reset", "--hard"], FINALIZED) + + cloudlog.info("Starting git cleanup in finalized update") + t = time.monotonic() + try: + run(["git", "gc"], FINALIZED) + run(["git", "lfs", "prune"], FINALIZED) + cloudlog.event("Done git cleanup", duration=time.monotonic() - t) + except subprocess.CalledProcessError: + cloudlog.exception(f"Failed git cleanup, took {time.monotonic() - t:.3f} s") + + set_consistent_flag(True) + cloudlog.info("done finalizing overlay") diff --git a/selfdrive/updated/tests/test_updated.py b/selfdrive/updated/tests/test_base.py old mode 100755 new mode 100644 similarity index 86% rename from selfdrive/updated/tests/test_updated.py rename to selfdrive/updated/tests/test_base.py index 4245763e99..f500145b6f --- a/selfdrive/updated/tests/test_updated.py +++ b/selfdrive/updated/tests/test_base.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 import os import pathlib import shutil @@ -33,12 +32,14 @@ def update_release(directory, name, version, agnos_version, release_notes): with open(directory / "launch_env.sh", "w") as f: f.write(f'export AGNOS_VERSION="{agnos_version}"') - run(["git", "add", "."], cwd=directory) - run(["git", "commit", "-m", f"openpilot release {version}"], cwd=directory) - @pytest.mark.slow # TODO: can we test overlayfs in GHA? -class TestUpdateD(unittest.TestCase): +class BaseUpdateTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + if "Base" in cls.__name__: + raise unittest.SkipTest + def setUp(self): self.tmpdir = tempfile.mkdtemp() @@ -73,15 +74,15 @@ class TestUpdateD(unittest.TestCase): def setup_basedir_release(self, release): self.params = Params() self.set_target_branch(release) - run(["git", "clone", "-b", release, self.remote_dir, self.basedir]) def update_remote_release(self, release): - update_release(self.remote_dir, release, *self.MOCK_RELEASES[release]) + raise NotImplementedError("") def setup_remote_release(self, release): - run(["git", "init"], cwd=self.remote_dir) - run(["git", "checkout", "-b", release], cwd=self.remote_dir) - self.update_remote_release(release) + raise NotImplementedError("") + + def additional_context(self): + raise NotImplementedError("") def tearDown(self): mock.patch.stopall() @@ -134,7 +135,7 @@ class TestUpdateD(unittest.TestCase): self.setup_remote_release("release3") self.setup_basedir_release("release3") - with processes_context(["updated"]) as [updated]: + with self.additional_context(), processes_context(["updated"]) as [updated]: self._test_params("release3", False, False) self.wait_for_idle() self._test_params("release3", False, False) @@ -150,7 +151,7 @@ class TestUpdateD(unittest.TestCase): self.setup_remote_release("release3") self.setup_basedir_release("release3") - with processes_context(["updated"]) as [updated]: + with self.additional_context(), processes_context(["updated"]) as [updated]: self._test_params("release3", False, False) self.wait_for_idle() self._test_params("release3", False, False) @@ -177,7 +178,7 @@ class TestUpdateD(unittest.TestCase): self.setup_remote_release("master") self.setup_basedir_release("release3") - with processes_context(["updated"]) as [updated]: + with self.additional_context(), processes_context(["updated"]) as [updated]: self._test_params("release3", False, False) self.wait_for_idle() self._test_params("release3", False, False) @@ -201,10 +202,11 @@ class TestUpdateD(unittest.TestCase): self.setup_remote_release("release3") self.setup_basedir_release("release3") - with mock.patch("openpilot.system.hardware.AGNOS", "True"), \ - mock.patch("openpilot.system.hardware.tici.hardware.Tici.get_os_version", "1.2"), \ - mock.patch("openpilot.system.hardware.tici.agnos.get_target_slot_number"), \ - mock.patch("openpilot.system.hardware.tici.agnos.flash_agnos_update"), \ + with self.additional_context(), \ + mock.patch("openpilot.system.hardware.AGNOS", "True"), \ + mock.patch("openpilot.system.hardware.tici.hardware.Tici.get_os_version", "1.2"), \ + mock.patch("openpilot.system.hardware.tici.agnos.get_target_slot_number"), \ + mock.patch("openpilot.system.hardware.tici.agnos.flash_agnos_update"), \ processes_context(["updated"]) as [updated]: self._test_params("release3", False, False) @@ -226,7 +228,3 @@ class TestUpdateD(unittest.TestCase): self._test_params("release3", False, True) self._test_update_params("release3", *self.MOCK_RELEASES["release3"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/selfdrive/updated/tests/test_git.py b/selfdrive/updated/tests/test_git.py new file mode 100644 index 0000000000..1a9c78242d --- /dev/null +++ b/selfdrive/updated/tests/test_git.py @@ -0,0 +1,22 @@ +import contextlib +from openpilot.selfdrive.updated.tests.test_base import BaseUpdateTest, run, update_release + + +class TestUpdateDGitStrategy(BaseUpdateTest): + def update_remote_release(self, release): + update_release(self.remote_dir, release, *self.MOCK_RELEASES[release]) + run(["git", "add", "."], cwd=self.remote_dir) + run(["git", "commit", "-m", f"openpilot release {release}"], cwd=self.remote_dir) + + def setup_remote_release(self, release): + run(["git", "init"], cwd=self.remote_dir) + run(["git", "checkout", "-b", release], cwd=self.remote_dir) + self.update_remote_release(release) + + def setup_basedir_release(self, release): + super().setup_basedir_release(release) + run(["git", "clone", "-b", release, self.remote_dir, self.basedir]) + + @contextlib.contextmanager + def additional_context(self): + yield diff --git a/selfdrive/updated/updated.py b/selfdrive/updated/updated.py index b6b395f254..92034cc806 100755 --- a/selfdrive/updated/updated.py +++ b/selfdrive/updated/updated.py @@ -1,35 +1,21 @@ #!/usr/bin/env python3 import os -import re +from pathlib import Path import datetime import subprocess import psutil -import shutil import signal import fcntl -import time import threading -from collections import defaultdict -from pathlib import Path -from markdown_it import MarkdownIt -from openpilot.common.basedir import BASEDIR from openpilot.common.params import Params from openpilot.common.time import system_time_valid +from openpilot.selfdrive.updated.common import LOCK_FILE, STAGING_ROOT, UpdateStrategy, run, set_consistent_flag from openpilot.system.hardware import AGNOS, HARDWARE from openpilot.common.swaglog import cloudlog from openpilot.selfdrive.controls.lib.alertmanager import set_offroad_alert from openpilot.system.version import is_tested_branch - -LOCK_FILE = os.getenv("UPDATER_LOCK_FILE", "/tmp/safe_staging_overlay.lock") -STAGING_ROOT = os.getenv("UPDATER_STAGING_ROOT", "/data/safe_staging") - -OVERLAY_UPPER = os.path.join(STAGING_ROOT, "upper") -OVERLAY_METADATA = os.path.join(STAGING_ROOT, "metadata") -OVERLAY_MERGED = os.path.join(STAGING_ROOT, "merged") -FINALIZED = os.path.join(STAGING_ROOT, "finalized") - -OVERLAY_INIT = Path(os.path.join(BASEDIR, ".overlay_init")) +from openpilot.selfdrive.updated.git import GitUpdateStrategy DAYS_NO_CONNECTIVITY_MAX = 14 # do not allow to engage after this many days DAYS_NO_CONNECTIVITY_PROMPT = 10 # send an offroad prompt after this many days @@ -71,147 +57,13 @@ def read_time_from_param(params, param) -> datetime.datetime | None: pass return None -def run(cmd: list[str], cwd: str = None) -> str: - return subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT, encoding='utf8') - - -def set_consistent_flag(consistent: bool) -> None: - os.sync() - consistent_file = Path(os.path.join(FINALIZED, ".overlay_consistent")) - if consistent: - consistent_file.touch() - elif not consistent: - consistent_file.unlink(missing_ok=True) - os.sync() - -def parse_release_notes(basedir: str) -> bytes: - try: - with open(os.path.join(basedir, "RELEASES.md"), "rb") as f: - r = f.read().split(b'\n\n', 1)[0] # Slice latest release notes - try: - return bytes(MarkdownIt().render(r.decode("utf-8")), encoding="utf-8") - except Exception: - return r + b"\n" - except FileNotFoundError: - pass - except Exception: - cloudlog.exception("failed to parse release notes") - return b"" - -def setup_git_options(cwd: str) -> None: - # We sync FS object atimes (which NEOS doesn't use) and mtimes, but ctimes - # are outside user control. Make sure Git is set up to ignore system ctimes, - # because they change when we make hard links during finalize. Otherwise, - # there is a lot of unnecessary churn. This appears to be a common need on - # OSX as well: https://www.git-tower.com/blog/make-git-rebase-safe-on-osx/ - - # We are using copytree to copy the directory, which also changes - # inode numbers. Ignore those changes too. - - # Set protocol to the new version (default after git 2.26) to reduce data - # usage on git fetch --dry-run from about 400KB to 18KB. - git_cfg = [ - ("core.trustctime", "false"), - ("core.checkStat", "minimal"), - ("protocol.version", "2"), - ("gc.auto", "0"), - ("gc.autoDetach", "false"), - ] - for option, value in git_cfg: - run(["git", "config", option, value], cwd) - - -def dismount_overlay() -> None: - if os.path.ismount(OVERLAY_MERGED): - cloudlog.info("unmounting existing overlay") - run(["sudo", "umount", "-l", OVERLAY_MERGED]) - - -def init_overlay() -> None: - - # Re-create the overlay if BASEDIR/.git has changed since we created the overlay - if OVERLAY_INIT.is_file() and os.path.ismount(OVERLAY_MERGED): - git_dir_path = os.path.join(BASEDIR, ".git") - new_files = run(["find", git_dir_path, "-newer", str(OVERLAY_INIT)]) - if not len(new_files.splitlines()): - # A valid overlay already exists - return - else: - cloudlog.info(".git directory changed, recreating overlay") - - cloudlog.info("preparing new safe staging area") - - params = Params() - params.put_bool("UpdateAvailable", False) - set_consistent_flag(False) - dismount_overlay() - run(["sudo", "rm", "-rf", STAGING_ROOT]) - if os.path.isdir(STAGING_ROOT): - shutil.rmtree(STAGING_ROOT) - - for dirname in [STAGING_ROOT, OVERLAY_UPPER, OVERLAY_METADATA, OVERLAY_MERGED]: - os.mkdir(dirname, 0o755) - - if os.lstat(BASEDIR).st_dev != os.lstat(OVERLAY_MERGED).st_dev: - raise RuntimeError("base and overlay merge directories are on different filesystems; not valid for overlay FS!") - - # Leave a timestamped canary in BASEDIR to check at startup. The device clock - # should be correct by the time we get here. If the init file disappears, or - # critical mtimes in BASEDIR are newer than .overlay_init, continue.sh can - # assume that BASEDIR has used for local development or otherwise modified, - # and skips the update activation attempt. - consistent_file = Path(os.path.join(BASEDIR, ".overlay_consistent")) - if consistent_file.is_file(): - consistent_file.unlink() - OVERLAY_INIT.touch() - - os.sync() - overlay_opts = f"lowerdir={BASEDIR},upperdir={OVERLAY_UPPER},workdir={OVERLAY_METADATA}" - - mount_cmd = ["mount", "-t", "overlay", "-o", overlay_opts, "none", OVERLAY_MERGED] - run(["sudo"] + mount_cmd) - run(["sudo", "chmod", "755", os.path.join(OVERLAY_METADATA, "work")]) - - git_diff = run(["git", "diff"], OVERLAY_MERGED) - params.put("GitDiff", git_diff) - cloudlog.info(f"git diff output:\n{git_diff}") - - -def finalize_update() -> None: - """Take the current OverlayFS merged view and finalize a copy outside of - OverlayFS, ready to be swapped-in at BASEDIR. Copy using shutil.copytree""" - - # Remove the update ready flag and any old updates - cloudlog.info("creating finalized version of the overlay") - set_consistent_flag(False) - - # Copy the merged overlay view and set the update ready flag - if os.path.exists(FINALIZED): - shutil.rmtree(FINALIZED) - shutil.copytree(OVERLAY_MERGED, FINALIZED, symlinks=True) - - run(["git", "reset", "--hard"], FINALIZED) - run(["git", "submodule", "foreach", "--recursive", "git", "reset", "--hard"], FINALIZED) - - cloudlog.info("Starting git cleanup in finalized update") - t = time.monotonic() - try: - run(["git", "gc"], FINALIZED) - run(["git", "lfs", "prune"], FINALIZED) - cloudlog.event("Done git cleanup", duration=time.monotonic() - t) - except subprocess.CalledProcessError: - cloudlog.exception(f"Failed git cleanup, took {time.monotonic() - t:.3f} s") - - set_consistent_flag(True) - cloudlog.info("done finalizing overlay") - -def handle_agnos_update() -> None: +def handle_agnos_update(fetched_path) -> None: from openpilot.system.hardware.tici.agnos import flash_agnos_update, get_target_slot_number cur_version = HARDWARE.get_os_version() updated_version = run(["bash", "-c", r"unset AGNOS_VERSION && source launch_env.sh && \ - echo -n $AGNOS_VERSION"], OVERLAY_MERGED).strip() + echo -n $AGNOS_VERSION"], fetched_path).strip() cloudlog.info(f"AGNOS version check: {cur_version} vs {updated_version}") if cur_version == updated_version: @@ -223,61 +75,44 @@ def handle_agnos_update() -> None: cloudlog.info(f"Beginning background installation for AGNOS {updated_version}") set_offroad_alert("Offroad_NeosUpdate", True) - manifest_path = os.path.join(OVERLAY_MERGED, "system/hardware/tici/agnos.json") + manifest_path = os.path.join(fetched_path, "system/hardware/tici/agnos.json") target_slot_number = get_target_slot_number() flash_agnos_update(manifest_path, target_slot_number, cloudlog) set_offroad_alert("Offroad_NeosUpdate", False) +STRATEGY = { + "git": GitUpdateStrategy, +} + class Updater: def __init__(self): self.params = Params() - self.branches = defaultdict(str) self._has_internet: bool = False + self.strategy: UpdateStrategy = STRATEGY[os.environ.get("UPDATER_STRATEGY", "git")]() + @property def has_internet(self) -> bool: return self._has_internet - @property - def target_branch(self) -> str: - b: str | None = self.params.get("UpdaterTargetBranch", encoding='utf-8') - if b is None: - b = self.get_branch(BASEDIR) - return b - - @property - def update_ready(self) -> bool: - consistent_file = Path(os.path.join(FINALIZED, ".overlay_consistent")) - if consistent_file.is_file(): - hash_mismatch = self.get_commit_hash(BASEDIR) != self.branches[self.target_branch] - branch_mismatch = self.get_branch(BASEDIR) != self.target_branch - on_target_branch = self.get_branch(FINALIZED) == self.target_branch - return ((hash_mismatch or branch_mismatch) and on_target_branch) - return False + def init(self): + self.strategy.init() - @property - def update_available(self) -> bool: - if os.path.isdir(OVERLAY_MERGED) and len(self.branches) > 0: - hash_mismatch = self.get_commit_hash(OVERLAY_MERGED) != self.branches[self.target_branch] - branch_mismatch = self.get_branch(OVERLAY_MERGED) != self.target_branch - return hash_mismatch or branch_mismatch - return False - - def get_branch(self, path: str) -> str: - return run(["git", "rev-parse", "--abbrev-ref", "HEAD"], path).rstrip() - - def get_commit_hash(self, path: str = OVERLAY_MERGED) -> str: - return run(["git", "rev-parse", "HEAD"], path).rstrip() + def cleanup(self): + self.strategy.cleanup() def set_params(self, update_success: bool, failed_count: int, exception: str | None) -> None: self.params.put("UpdateFailedCount", str(failed_count)) - self.params.put("UpdaterTargetBranch", self.target_branch) - self.params.put_bool("UpdaterFetchAvailable", self.update_available) - if len(self.branches): - self.params.put("UpdaterAvailableBranches", ','.join(self.branches.keys())) + if self.params.get("UpdaterTargetBranch") is None: + self.params.put("UpdaterTargetBranch", self.strategy.current_channel()) + + self.params.put_bool("UpdaterFetchAvailable", self.strategy.update_available()) + + available_channels = self.strategy.get_available_channels() + self.params.put("UpdaterAvailableBranches", ','.join(available_channels)) last_update = datetime.datetime.utcnow() if update_success: @@ -292,32 +127,14 @@ class Updater: else: self.params.put("LastUpdateException", exception) - # Write out current and new version info - def get_description(basedir: str) -> str: - if not os.path.exists(basedir): - return "" + description_current, release_notes_current = self.strategy.describe_current_channel() + description_ready, release_notes_ready = self.strategy.describe_ready_channel() - version = "" - branch = "" - commit = "" - commit_date = "" - try: - branch = self.get_branch(basedir) - commit = self.get_commit_hash(basedir)[:7] - with open(os.path.join(basedir, "common", "version.h")) as f: - version = f.read().split('"')[1] - - commit_unix_ts = run(["git", "show", "-s", "--format=%ct", "HEAD"], basedir).rstrip() - dt = datetime.datetime.fromtimestamp(int(commit_unix_ts)) - commit_date = dt.strftime("%b %d") - except Exception: - cloudlog.exception("updater.get_description") - return f"{version} / {branch} / {commit} / {commit_date}" - self.params.put("UpdaterCurrentDescription", get_description(BASEDIR)) - self.params.put("UpdaterCurrentReleaseNotes", parse_release_notes(BASEDIR)) - self.params.put("UpdaterNewDescription", get_description(FINALIZED)) - self.params.put("UpdaterNewReleaseNotes", parse_release_notes(FINALIZED)) - self.params.put_bool("UpdateAvailable", self.update_ready) + self.params.put("UpdaterCurrentDescription", description_current) + self.params.put("UpdaterCurrentReleaseNotes", release_notes_current) + self.params.put("UpdaterNewDescription", description_ready) + self.params.put("UpdaterNewReleaseNotes", release_notes_ready) + self.params.put_bool("UpdateAvailable", self.strategy.update_ready()) # Handle user prompt for alert in ("Offroad_UpdateFailed", "Offroad_ConnectivityNeeded", "Offroad_ConnectivityNeededPrompt"): @@ -341,67 +158,24 @@ class Updater: def check_for_update(self) -> None: cloudlog.info("checking for updates") - excluded_branches = ('release2', 'release2-staging') - - try: - run(["git", "ls-remote", "origin", "HEAD"], OVERLAY_MERGED) - self._has_internet = True - except subprocess.CalledProcessError: - self._has_internet = False - - setup_git_options(OVERLAY_MERGED) - output = run(["git", "ls-remote", "--heads"], OVERLAY_MERGED) - - self.branches = defaultdict(lambda: None) - for line in output.split('\n'): - ls_remotes_re = r'(?P\b[0-9a-f]{5,40}\b)(\s+)(refs\/heads\/)(?P.*$)' - x = re.fullmatch(ls_remotes_re, line.strip()) - if x is not None and x.group('branch_name') not in excluded_branches: - self.branches[x.group('branch_name')] = x.group('commit_sha') - - cur_branch = self.get_branch(OVERLAY_MERGED) - cur_commit = self.get_commit_hash(OVERLAY_MERGED) - new_branch = self.target_branch - new_commit = self.branches[new_branch] - if (cur_branch, cur_commit) != (new_branch, new_commit): - cloudlog.info(f"update available, {cur_branch} ({str(cur_commit)[:7]}) -> {new_branch} ({str(new_commit)[:7]})") - else: - cloudlog.info(f"up to date on {cur_branch} ({str(cur_commit)[:7]})") + self.strategy.update_available() def fetch_update(self) -> None: - cloudlog.info("attempting git fetch inside staging overlay") - self.params.put("UpdaterState", "downloading...") # TODO: cleanly interrupt this and invalidate old update set_consistent_flag(False) self.params.put_bool("UpdateAvailable", False) - setup_git_options(OVERLAY_MERGED) - - branch = self.target_branch - git_fetch_output = run(["git", "fetch", "origin", branch], OVERLAY_MERGED) - cloudlog.info("git fetch success: %s", git_fetch_output) - - cloudlog.info("git reset in progress") - cmds = [ - ["git", "checkout", "--force", "--no-recurse-submodules", "-B", branch, "FETCH_HEAD"], - ["git", "reset", "--hard"], - ["git", "clean", "-xdff"], - ["git", "submodule", "sync"], - ["git", "submodule", "update", "--init", "--recursive"], - ["git", "submodule", "foreach", "--recursive", "git", "reset", "--hard"], - ] - r = [run(cmd, OVERLAY_MERGED) for cmd in cmds] - cloudlog.info("git reset success: %s", '\n'.join(r)) + self.strategy.fetch_update() # TODO: show agnos download progress if AGNOS: - handle_agnos_update() + handle_agnos_update(self.strategy.fetched_path()) # Create the finalized, ready-to-swap update self.params.put("UpdaterState", "finalizing update...") - finalize_update() + self.strategy.finalize_update() cloudlog.info("finalize success!") @@ -450,7 +224,7 @@ def main() -> None: exception = None try: # TODO: reuse overlay from previous updated instance if it looks clean - init_overlay() + updater.init() # ensure we have some params written soon after startup updater.set_params(False, update_failed_count, exception) @@ -486,11 +260,11 @@ def main() -> None: returncode=e.returncode ) exception = f"command failed: {e.cmd}\n{e.output}" - OVERLAY_INIT.unlink(missing_ok=True) + updater.cleanup() except Exception as e: cloudlog.exception("uncaught updated exception, shouldn't happen") exception = str(e) - OVERLAY_INIT.unlink(missing_ok=True) + updater.cleanup() try: params.put("UpdaterState", "idle")