Linux tombstones: add stacktrace and upload (#19737)

* Add stacktrace to tombstones

* make sentry output prettier

* Refactor

* Generate upload filename

* Actually move file

* Fix spaces

* copy and upload

* dont delete just yet

Co-authored-by: Comma Device <device@comma.ai>
old-commit-hash: f3db94a0c4
commatwo_master
Willem Melching 4 years ago committed by GitHub
parent 0c141e45aa
commit 8a187ec406
  1. 3
      selfdrive/loggerd/uploader.py
  2. 172
      selfdrive/tombstoned.py
  3. 3
      selfdrive/version.py

@ -58,6 +58,7 @@ class Uploader():
self.last_resp = None self.last_resp = None
self.last_exc = None self.last_exc = None
self.immediate_folders = ["crash/"]
self.immediate_priority = {"qlog.bz2": 0, "qcamera.ts": 1} self.immediate_priority = {"qlog.bz2": 0, "qcamera.ts": 1}
self.high_priority = {"rlog.bz2": 0, "fcamera.hevc": 1, "dcamera.hevc": 2, "ecamera.hevc": 3} self.high_priority = {"rlog.bz2": 0, "fcamera.hevc": 1, "dcamera.hevc": 2, "ecamera.hevc": 3}
@ -98,7 +99,7 @@ class Uploader():
# try to upload qlog files first # try to upload qlog files first
for name, key, fn in upload_files: for name, key, fn in upload_files:
if name in self.immediate_priority: if name in self.immediate_priority or any(f in fn for f in self.immediate_folders):
return (key, fn) return (key, fn)
if with_raw: if with_raw:

@ -1,23 +1,70 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import datetime
import os import os
import re
import shutil
import signal
import subprocess
import time import time
import glob
from raven import Client from raven import Client
from raven.transport.http import HTTPTransport from raven.transport.http import HTTPTransport
from common.file_helpers import mkdirs_exists_ok
from selfdrive.hardware import TICI from selfdrive.hardware import TICI
from selfdrive.loggerd.config import ROOT
from selfdrive.swaglog import cloudlog from selfdrive.swaglog import cloudlog
from selfdrive.version import version, origin, branch, dirty from selfdrive.version import branch, commit, dirty, origin, version
MAX_SIZE = 100000 * 10 # Normal size is 40-100k, allow up to 1M MAX_SIZE = 100000 * 10 # Normal size is 40-100k, allow up to 1M
if TICI: if TICI:
MAX_SIZE = MAX_SIZE * 10 # Allow larger size for tici MAX_SIZE = MAX_SIZE * 100 # Allow larger size for tici since files contain coredump
MAX_TOMBSTONE_FN_LEN = 85
TOMBSTONE_DIR = "/data/tombstones/"
APPORT_DIR = "/var/crash/"
def safe_fn(s):
extra = ['_']
return "".join(c for c in s if c.isalnum() or c in extra).rstrip()
def sentry_report(client, fn, message, contents):
cloudlog.error({'tombstone': message})
client.captureMessage(
message=message,
sdk={'name': 'tombstoned', 'version': '0'},
extra={
'tombstone_fn': fn,
'tombstone': contents
},
)
def clear_apport_folder():
for f in glob.glob(APPORT_DIR + '*'):
try:
os.remove(f)
except Exception:
pass
def get_apport_stacktrace(fn):
try:
cmd = f'apport-retrace -s <(cat <(echo "Package: openpilot") "{fn}")'
return subprocess.check_output(cmd, shell=True, encoding='utf8', timeout=30, executable='/bin/bash') # pylint: disable=unexpected-keyword-arg
except subprocess.CalledProcessError:
return "Error getting stacktrace"
except subprocess.TimeoutExpired:
return "Timeout getting stacktrace"
def get_tombstones(): def get_tombstones():
"""Returns list of (filename, ctime) for all tombstones in /data/tombstones """Returns list of (filename, ctime) for all tombstones in /data/tombstones
and apport crashlogs in /var/crash""" and apport crashlogs in /var/crash"""
files = [] files = []
for folder in ["/data/tombstones/", "/var/crash/"]: for folder in [TOMBSTONE_DIR, APPORT_DIR]:
if os.path.exists(folder): if os.path.exists(folder):
with os.scandir(folder) as d: with os.scandir(folder) as d:
@ -30,7 +77,7 @@ def get_tombstones():
return files return files
def report_tombstone(fn, client): def report_tombstone_android(fn, client):
f_size = os.path.getsize(fn) f_size = os.path.getsize(fn)
if f_size > MAX_SIZE: if f_size > MAX_SIZE:
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...") cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
@ -39,41 +86,99 @@ def report_tombstone(fn, client):
with open(fn, encoding='ISO-8859-1') as f: with open(fn, encoding='ISO-8859-1') as f:
contents = f.read() contents = f.read()
# Get summary for sentry title message = " ".join(contents.split('\n')[5:7])
if fn.endswith(".crash"):
lines = contents.split('\n')
message = lines[6]
status_idx = contents.find('ProcStatus') # Cut off pid/tid, since that varies per run
if status_idx >= 0: name_idx = message.find('name')
lines = contents[status_idx:].split('\n') if name_idx >= 0:
message += " " + lines[1] message = message[name_idx:]
else:
message = " ".join(contents.split('\n')[5:7])
# Cut off pid/tid, since that varies per run # Cut off fault addr
name_idx = message.find('name') fault_idx = message.find(', fault addr')
if name_idx >= 0: if fault_idx >= 0:
message = message[name_idx:] message = message[:fault_idx]
# Cut off fault addr sentry_report(client, fn, message, contents)
fault_idx = message.find(', fault addr')
if fault_idx >= 0:
message = message[:fault_idx]
cloudlog.error({'tombstone': message}) def report_tombstone_apport(fn, client):
client.captureMessage( f_size = os.path.getsize(fn)
message=message, if f_size > MAX_SIZE:
sdk={'name': 'tombstoned', 'version': '0'}, cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
extra={ return
'tombstone_fn': fn,
'tombstone': contents message = "" # One line description of the crash
}, contents = "" # Full file contents without coredump
) path = "" # File path relative to openpilot directory
proc_maps = False
with open(fn) as f:
for line in f:
if "CoreDump" in line:
break
elif "ProcMaps" in line:
proc_maps = True
elif "ProcStatus" in line:
proc_maps = False
if not proc_maps:
contents += line
if "ExecutablePath" in line:
path = line.strip().split(': ')[-1]
path = path.replace('/data/openpilot/', '')
message += path
elif "Signal" in line:
message += " - " + line.strip()
try:
sig_num = int(line.strip().split(': ')[-1])
message += " (" + signal.Signals(sig_num).name + ")" # pylint: disable=no-member
except ValueError:
pass
stacktrace = get_apport_stacktrace(fn)
stacktrace_s = stacktrace.split('\n')
crash_function = "No stacktrace"
if len(stacktrace_s) > 2:
found = False
# Try to find first entry in openpilot, fall back to first line
for line in stacktrace_s:
if "at selfdrive/" in line:
crash_function = line
found = True
break
if not found:
crash_function = stacktrace_s[1]
# Remove arguments that can contain pointers to make sentry one-liner unique
crash_function = re.sub(r'\(.*?\)', '', crash_function)
contents = stacktrace + "\n\n" + contents
message = message + " - " + crash_function
sentry_report(client, fn, message, contents)
# Copy crashlog to upload folder
clean_path = path.replace('/', '_')
date = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
new_fn = f"{date}_{commit[:8]}_{safe_fn(clean_path)}"[:MAX_TOMBSTONE_FN_LEN]
crashlog_dir = os.path.join(ROOT, "crash")
mkdirs_exists_ok(crashlog_dir)
# Files could be on different filesystems, copy, then delete
shutil.copy(fn, os.path.join(crashlog_dir, new_fn))
os.remove(fn)
def main(): def main():
# TODO: turn on when all tombstones are recovered
# clear_apport_folder() # Clear apport folder on start, otherwise duplicate crashes won't register
initial_tombstones = set(get_tombstones()) initial_tombstones = set(get_tombstones())
tags = { tags = {
@ -91,7 +196,10 @@ def main():
for fn, _ in (now_tombstones - initial_tombstones): for fn, _ in (now_tombstones - initial_tombstones):
try: try:
cloudlog.info(f"reporting new tombstone {fn}") cloudlog.info(f"reporting new tombstone {fn}")
report_tombstone(fn, client) if fn.endswith(".crash"):
report_tombstone_apport(fn, client)
else:
report_tombstone_android(fn, client)
except Exception: except Exception:
cloudlog.exception(f"Error reporting tombstone {fn}") cloudlog.exception(f"Error reporting tombstone {fn}")

@ -52,6 +52,7 @@ comma_remote: bool = False
tested_branch: bool = False tested_branch: bool = False
origin = get_git_remote() origin = get_git_remote()
branch = get_git_full_branchname() branch = get_git_full_branchname()
commit = get_git_commit()
if (origin is not None) and (branch is not None): if (origin is not None) and (branch is not None):
try: try:
@ -74,7 +75,7 @@ if (origin is not None) and (branch is not None):
try: try:
dirty_files = run_cmd(["git", "diff-index", branch, "--"]) dirty_files = run_cmd(["git", "diff-index", branch, "--"])
cloudlog.event("dirty comma branch", version=version, dirty=dirty, origin=origin, branch=branch, cloudlog.event("dirty comma branch", version=version, dirty=dirty, origin=origin, branch=branch,
dirty_files=dirty_files, commit=get_git_commit(), origin_commit=get_git_commit(branch)) dirty_files=dirty_files, commit=commit, origin_commit=get_git_commit(branch))
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
pass pass

Loading…
Cancel
Save