|
|
|
@ -26,13 +26,11 @@ import os |
|
|
|
|
import datetime |
|
|
|
|
import subprocess |
|
|
|
|
import psutil |
|
|
|
|
from stat import S_ISREG, S_ISDIR, S_ISLNK, S_IMODE, ST_MODE, ST_INO, ST_UID, ST_GID, ST_ATIME, ST_MTIME |
|
|
|
|
import shutil |
|
|
|
|
import signal |
|
|
|
|
from pathlib import Path |
|
|
|
|
import fcntl |
|
|
|
|
import threading |
|
|
|
|
import time |
|
|
|
|
from cffi import FFI |
|
|
|
|
|
|
|
|
|
from common.basedir import BASEDIR |
|
|
|
@ -47,25 +45,25 @@ OVERLAY_MERGED = os.path.join(STAGING_ROOT, "merged") |
|
|
|
|
FINALIZED = os.path.join(STAGING_ROOT, "finalized") |
|
|
|
|
|
|
|
|
|
NICE_LOW_PRIORITY = ["nice", "-n", "19"] |
|
|
|
|
SHORT = os.getenv("SHORT") is not None |
|
|
|
|
|
|
|
|
|
# Workaround for the EON/termux build of Python having os.link removed. |
|
|
|
|
# Workaround for lack of os.link in the NEOS/termux python |
|
|
|
|
ffi = FFI() |
|
|
|
|
ffi.cdef("int link(const char *oldpath, const char *newpath);") |
|
|
|
|
libc = ffi.dlopen(None) |
|
|
|
|
def link(src, dest): |
|
|
|
|
return libc.link(src.encode(), dest.encode()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WaitTimeHelper: |
|
|
|
|
ready_event = threading.Event() |
|
|
|
|
shutdown = False |
|
|
|
|
|
|
|
|
|
def __init__(self): |
|
|
|
|
self.ready_event = threading.Event() |
|
|
|
|
self.shutdown = False |
|
|
|
|
signal.signal(signal.SIGTERM, self.graceful_shutdown) |
|
|
|
|
signal.signal(signal.SIGINT, self.graceful_shutdown) |
|
|
|
|
signal.signal(signal.SIGHUP, self.update_now) |
|
|
|
|
|
|
|
|
|
def graceful_shutdown(self, signum, frame): |
|
|
|
|
# umount -f doesn't appear effective in avoiding "device busy" on EON, |
|
|
|
|
# umount -f doesn't appear effective in avoiding "device busy" on NEOS, |
|
|
|
|
# so don't actually die until the next convenient opportunity in main(). |
|
|
|
|
cloudlog.info("caught SIGINT/SIGTERM, dismounting overlay at next opportunity") |
|
|
|
|
self.shutdown = True |
|
|
|
@ -75,18 +73,8 @@ class WaitTimeHelper: |
|
|
|
|
cloudlog.info("caught SIGHUP, running update check immediately") |
|
|
|
|
self.ready_event.set() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def wait_between_updates(ready_event, t=60*10): |
|
|
|
|
ready_event.clear() |
|
|
|
|
if SHORT: |
|
|
|
|
ready_event.wait(timeout=10) |
|
|
|
|
else: |
|
|
|
|
ready_event.wait(timeout=t) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def link(src, dest): |
|
|
|
|
# Workaround for the EON/termux build of Python having os.link removed. |
|
|
|
|
return libc.link(src.encode(), dest.encode()) |
|
|
|
|
def sleep(self, t): |
|
|
|
|
self.ready_event.wait(timeout=t) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run(cmd, cwd=None): |
|
|
|
@ -134,32 +122,28 @@ def dismount_ovfs(): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def setup_git_options(cwd): |
|
|
|
|
# We sync FS object atimes (which EON doesn't use) and mtimes, but ctimes |
|
|
|
|
# We sync FS object atimes (which NEOS doesn't use) and mtimes, but ctimes |
|
|
|
|
# are outside user control. Make sure Git is set up to ignore system ctimes, |
|
|
|
|
# because they change when we make hard links during finalize. Otherwise, |
|
|
|
|
# there is a lot of unnecessary churn. This appears to be a common need on |
|
|
|
|
# OSX as well: https://www.git-tower.com/blog/make-git-rebase-safe-on-osx/ |
|
|
|
|
try: |
|
|
|
|
trustctime = run(["git", "config", "--get", "core.trustctime"], cwd) |
|
|
|
|
trustctime_set = (trustctime.strip() == "false") |
|
|
|
|
except subprocess.CalledProcessError: |
|
|
|
|
trustctime_set = False |
|
|
|
|
|
|
|
|
|
if not trustctime_set: |
|
|
|
|
cloudlog.info("Setting core.trustctime false") |
|
|
|
|
run(["git", "config", "core.trustctime", "false"], cwd) |
|
|
|
|
|
|
|
|
|
# We are temporarily using copytree to copy the directory, which also changes |
|
|
|
|
# We are using copytree to copy the directory, which also changes |
|
|
|
|
# inode numbers. Ignore those changes too. |
|
|
|
|
git_cfg = [ |
|
|
|
|
("core.trustctime", "false"), |
|
|
|
|
("core.checkStat", "minimal"), |
|
|
|
|
] |
|
|
|
|
for option, value in git_cfg: |
|
|
|
|
try: |
|
|
|
|
checkstat = run(["git", "config", "--get", "core.checkStat"], cwd) |
|
|
|
|
checkstat_set = (checkstat.strip() == "minimal") |
|
|
|
|
ret = run(["git", "config", "--get", option], cwd) |
|
|
|
|
config_ok = (ret.strip() == value) |
|
|
|
|
except subprocess.CalledProcessError: |
|
|
|
|
checkstat_set = False |
|
|
|
|
config_ok = False |
|
|
|
|
|
|
|
|
|
if not checkstat_set: |
|
|
|
|
cloudlog.info("Setting core.checkState minimal") |
|
|
|
|
run(["git", "config", "core.checkStat", "minimal"], cwd) |
|
|
|
|
if not config_ok: |
|
|
|
|
cloudlog.info(f"Setting git '{option}' to '{value}'") |
|
|
|
|
run(["git", "config", option, value], cwd) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def init_ovfs(): |
|
|
|
@ -181,7 +165,7 @@ def init_ovfs(): |
|
|
|
|
if os.path.isfile(os.path.join(BASEDIR, ".overlay_consistent")): |
|
|
|
|
os.remove(os.path.join(BASEDIR, ".overlay_consistent")) |
|
|
|
|
|
|
|
|
|
# Leave a timestamped canary in BASEDIR to check at startup. The EON clock |
|
|
|
|
# Leave a timestamped canary in BASEDIR to check at startup. The device clock |
|
|
|
|
# should be correct by the time we get here. If the init file disappears, or |
|
|
|
|
# critical mtimes in BASEDIR are newer than .overlay_init, continue.sh can |
|
|
|
|
# assume that BASEDIR has used for local development or otherwise modified, |
|
|
|
@ -192,74 +176,7 @@ def init_ovfs(): |
|
|
|
|
run(["mount", "-t", "overlay", "-o", overlay_opts, "none", OVERLAY_MERGED]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inodes_in_tree(search_dir): |
|
|
|
|
"""Given a search root, produce a dictionary mapping of inodes to relative |
|
|
|
|
pathnames of regular files (no directories, symlinks, or special files).""" |
|
|
|
|
inode_map = {} |
|
|
|
|
for root, _, files in os.walk(search_dir, topdown=True): |
|
|
|
|
for file_name in files: |
|
|
|
|
full_path_name = os.path.join(root, file_name) |
|
|
|
|
st = os.lstat(full_path_name) |
|
|
|
|
if S_ISREG(st[ST_MODE]): |
|
|
|
|
inode_map[st[ST_INO]] = full_path_name |
|
|
|
|
return inode_map |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def dup_ovfs_object(inode_map, source_obj, target_dir): |
|
|
|
|
"""Given a relative pathname to copy, and a new target root, duplicate the |
|
|
|
|
source object in the target root, using hardlinks for regular files.""" |
|
|
|
|
|
|
|
|
|
source_full_path = os.path.join(OVERLAY_MERGED, source_obj) |
|
|
|
|
st = os.lstat(source_full_path) |
|
|
|
|
target_full_path = os.path.join(target_dir, source_obj) |
|
|
|
|
|
|
|
|
|
if S_ISREG(st[ST_MODE]): |
|
|
|
|
# Hardlink all regular files; ownership and permissions are shared. |
|
|
|
|
link(inode_map[st[ST_INO]], target_full_path) |
|
|
|
|
else: |
|
|
|
|
# Recreate all directories and symlinks; copy ownership and permissions. |
|
|
|
|
if S_ISDIR(st[ST_MODE]): |
|
|
|
|
os.mkdir(os.path.join(FINALIZED, source_obj), S_IMODE(st[ST_MODE])) |
|
|
|
|
elif S_ISLNK(st[ST_MODE]): |
|
|
|
|
os.symlink(os.readlink(source_full_path), target_full_path) |
|
|
|
|
os.chmod(target_full_path, S_IMODE(st[ST_MODE]), follow_symlinks=False) |
|
|
|
|
else: |
|
|
|
|
# Ran into a FIFO, socket, etc. Should not happen in OP install dir. |
|
|
|
|
# Ignore without copying for the time being; revisit later if needed. |
|
|
|
|
cloudlog.error("can't copy this file type: %s" % source_full_path) |
|
|
|
|
os.chown(target_full_path, st[ST_UID], st[ST_GID], follow_symlinks=False) |
|
|
|
|
|
|
|
|
|
# Sync target mtimes to the cached lstat() value from each source object. |
|
|
|
|
# Restores shared inode mtimes after linking, fixes symlinks and dirs. |
|
|
|
|
os.utime(target_full_path, (st[ST_ATIME], st[ST_MTIME]), follow_symlinks=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def finalize_from_ovfs_hardlink(): |
|
|
|
|
"""Take the current OverlayFS merged view and finalize a copy outside of |
|
|
|
|
OverlayFS, ready to be swapped-in at BASEDIR. Copy using hardlinks""" |
|
|
|
|
|
|
|
|
|
cloudlog.info("creating finalized version of the overlay") |
|
|
|
|
|
|
|
|
|
# The "copy" is done with hardlinks, but since the OverlayFS merge looks |
|
|
|
|
# like a different filesystem, and hardlinks can't cross filesystems, we |
|
|
|
|
# have to borrow a source pathname from the upper or lower layer. |
|
|
|
|
inode_map = inodes_in_tree(BASEDIR) |
|
|
|
|
inode_map.update(inodes_in_tree(OVERLAY_UPPER)) |
|
|
|
|
|
|
|
|
|
shutil.rmtree(FINALIZED) |
|
|
|
|
os.umask(0o077) |
|
|
|
|
os.mkdir(FINALIZED) |
|
|
|
|
for root, dirs, files in os.walk(OVERLAY_MERGED, topdown=True): |
|
|
|
|
for obj_name in dirs: |
|
|
|
|
relative_path_name = os.path.relpath(os.path.join(root, obj_name), OVERLAY_MERGED) |
|
|
|
|
dup_ovfs_object(inode_map, relative_path_name, FINALIZED) |
|
|
|
|
for obj_name in files: |
|
|
|
|
relative_path_name = os.path.relpath(os.path.join(root, obj_name), OVERLAY_MERGED) |
|
|
|
|
dup_ovfs_object(inode_map, relative_path_name, FINALIZED) |
|
|
|
|
cloudlog.info("done finalizing overlay") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def finalize_from_ovfs_copy(): |
|
|
|
|
def finalize_from_ovfs(): |
|
|
|
|
"""Take the current OverlayFS merged view and finalize a copy outside of |
|
|
|
|
OverlayFS, ready to be swapped-in at BASEDIR. Copy using shutil.copytree""" |
|
|
|
|
|
|
|
|
@ -301,7 +218,7 @@ def attempt_update(): |
|
|
|
|
# activated later if the finalize step is interrupted |
|
|
|
|
remove_consistent_flag() |
|
|
|
|
|
|
|
|
|
finalize_from_ovfs_copy() |
|
|
|
|
finalize_from_ovfs() |
|
|
|
|
|
|
|
|
|
# Make sure the validity flag lands on disk LAST, only when the local git |
|
|
|
|
# repo and OP install are in a consistent state. |
|
|
|
@ -322,7 +239,7 @@ def main(): |
|
|
|
|
if params.get("DisableUpdates") == b"1": |
|
|
|
|
raise RuntimeError("updates are disabled by param") |
|
|
|
|
|
|
|
|
|
if not os.geteuid() == 0: |
|
|
|
|
if os.geteuid() != 0: |
|
|
|
|
raise RuntimeError("updated must be launched as root!") |
|
|
|
|
|
|
|
|
|
# Set low io priority |
|
|
|
@ -336,19 +253,19 @@ def main(): |
|
|
|
|
except IOError: |
|
|
|
|
raise RuntimeError("couldn't get overlay lock; is another updated running?") |
|
|
|
|
|
|
|
|
|
# Wait a short time before our first update attempt |
|
|
|
|
# Avoids race with IsOffroad not being set, reduces manager startup load |
|
|
|
|
time.sleep(30) |
|
|
|
|
# Wait for IsOffroad to be set before our first update attempt |
|
|
|
|
wait_helper = WaitTimeHelper() |
|
|
|
|
wait_helper.sleep(30) |
|
|
|
|
|
|
|
|
|
while not wait_helper.shutdown: |
|
|
|
|
update_failed_count += 1 |
|
|
|
|
wait_helper.ready_event.clear() |
|
|
|
|
|
|
|
|
|
# Check for internet every 30s |
|
|
|
|
time_wrong = datetime.datetime.utcnow().year < 2019 |
|
|
|
|
ping_failed = subprocess.call(["ping", "-W", "4", "-c", "1", "8.8.8.8"]) |
|
|
|
|
if ping_failed or time_wrong: |
|
|
|
|
wait_between_updates(wait_helper.ready_event, t=30) |
|
|
|
|
wait_helper.sleep(30) |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
# Attempt an update |
|
|
|
@ -386,7 +303,7 @@ def main(): |
|
|
|
|
cloudlog.exception("uncaught updated exception, shouldn't happen") |
|
|
|
|
|
|
|
|
|
params.put("UpdateFailedCount", str(update_failed_count)) |
|
|
|
|
wait_between_updates(wait_helper.ready_event) |
|
|
|
|
wait_helper.sleep(60*10) |
|
|
|
|
|
|
|
|
|
# We've been signaled to shut down |
|
|
|
|
dismount_ovfs() |
|
|
|
|