openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

429 lines
17 KiB

#!/usr/bin/env python3
5 years ago
import datetime
import os
import queue
import threading
import time
from collections import OrderedDict, namedtuple
from pathlib import Path
from typing import Dict, Optional, Tuple
import psutil
import cereal.messaging as messaging
5 years ago
from cereal import log
from common.dict_helpers import strip_deprecated_keys
5 years ago
from common.filter_simple import FirstOrderFilter
from common.params import Params
from common.realtime import DT_TRML, sec_since_boot
from selfdrive.controls.lib.alertmanager import set_offroad_alert
from system.hardware import HARDWARE, TICI, AGNOS
5 years ago
from selfdrive.loggerd.config import get_available_percent
from selfdrive.statsd import statlog
from system.swaglog import cloudlog
from selfdrive.thermald.power_monitoring import PowerMonitoring
from selfdrive.thermald.fan_controller import TiciFanController
from system.version import terms_version, training_version
ThermalStatus = log.DeviceState.ThermalStatus
NetworkType = log.DeviceState.NetworkType
NetworkStrength = log.DeviceState.NetworkStrength
5 years ago
CURRENT_TAU = 15. # 15s time constant
TEMP_TAU = 5. # 5s time constant
DISCONNECT_TIMEOUT = 5. # wait 5 seconds before going offroad after disconnect so you get an alert
PANDA_STATES_TIMEOUT = int(1000 * 1.5 * DT_TRML) # 1.5x the expected pandaState frequency
5 years ago
ThermalBand = namedtuple("ThermalBand", ['min_temp', 'max_temp'])
HardwareState = namedtuple("HardwareState", ['network_type', 'network_metered', 'network_strength', 'network_info', 'nvme_temps', 'modem_temps'])
# List of thermal bands. We will stay within this region as long as we are within the bounds.
# When exiting the bounds, we'll jump to the lower or higher band. Bands are ordered in the dict.
THERMAL_BANDS = OrderedDict({
ThermalStatus.green: ThermalBand(None, 80.0),
ThermalStatus.yellow: ThermalBand(75.0, 96.0),
ThermalStatus.red: ThermalBand(80.0, 107.),
ThermalStatus.danger: ThermalBand(94.0, None),
})
# Override to highest thermal band when offroad and above this temp
3 years ago
OFFROAD_DANGER_TEMP = 79.5
prev_offroad_states: Dict[str, Tuple[bool, Optional[str]]] = {}
tz_by_type: Optional[Dict[str, int]] = None
def populate_tz_by_type():
global tz_by_type
tz_by_type = {}
for n in os.listdir("/sys/devices/virtual/thermal"):
if not n.startswith("thermal_zone"):
continue
with open(os.path.join("/sys/devices/virtual/thermal", n, "type")) as f:
tz_by_type[f.read().strip()] = int(n.lstrip("thermal_zone"))
def read_tz(x):
if x is None:
5 years ago
return 0
if isinstance(x, str):
if tz_by_type is None:
populate_tz_by_type()
x = tz_by_type[x]
5 years ago
try:
with open(f"/sys/devices/virtual/thermal/thermal_zone{x}/temp") as f:
return int(f.read())
5 years ago
except FileNotFoundError:
return 0
def read_thermal(thermal_config):
dat = messaging.new_message('deviceState')
dat.deviceState.cpuTempC = [read_tz(z) / thermal_config.cpu[1] for z in thermal_config.cpu[0]]
dat.deviceState.gpuTempC = [read_tz(z) / thermal_config.gpu[1] for z in thermal_config.gpu[0]]
dat.deviceState.memoryTempC = read_tz(thermal_config.mem[0]) / thermal_config.mem[1]
dat.deviceState.ambientTempC = read_tz(thermal_config.ambient[0]) / thermal_config.ambient[1]
dat.deviceState.pmicTempC = [read_tz(z) / thermal_config.pmic[1] for z in thermal_config.pmic[0]]
5 years ago
return dat
def set_offroad_alert_if_changed(offroad_alert: str, show_alert: bool, extra_text: Optional[str]=None):
if prev_offroad_states.get(offroad_alert, None) == (show_alert, extra_text):
return
prev_offroad_states[offroad_alert] = (show_alert, extra_text)
set_offroad_alert(offroad_alert, show_alert, extra_text)
def hw_state_thread(end_event, hw_queue):
"""Handles non critical hardware state, and sends over queue"""
count = 0
prev_hw_state = None
5 years ago
modem_version = None
modem_nv = None
modem_configured = False
while not end_event.is_set():
# these are expensive calls. update every 10s
if (count % int(10. / DT_TRML)) == 0:
try:
network_type = HARDWARE.get_network_type()
modem_temps = HARDWARE.get_modem_temperatures()
if len(modem_temps) == 0 and prev_hw_state is not None:
modem_temps = prev_hw_state.modem_temps
# Log modem version once
if AGNOS and ((modem_version is None) or (modem_nv is None)):
modem_version = HARDWARE.get_modem_version() # pylint: disable=assignment-from-none
modem_nv = HARDWARE.get_modem_nv() # pylint: disable=assignment-from-none
if (modem_version is not None) and (modem_nv is not None):
cloudlog.event("modem version", version=modem_version, nv=modem_nv)
hw_state = HardwareState(
network_type=network_type,
network_metered=HARDWARE.get_network_metered(network_type),
network_strength=HARDWARE.get_network_strength(network_type),
network_info=HARDWARE.get_network_info(),
nvme_temps=HARDWARE.get_nvme_temperatures(),
modem_temps=modem_temps,
)
try:
hw_queue.put_nowait(hw_state)
except queue.Full:
pass
# TODO: remove this once the config is in AGNOS
if not modem_configured and len(HARDWARE.get_sim_info().get('sim_id', '')) > 0:
cloudlog.warning("configuring modem")
HARDWARE.configure_modem()
modem_configured = True
prev_hw_state = hw_state
except Exception:
cloudlog.exception("Error getting hardware state")
count += 1
time.sleep(DT_TRML)
def thermald_thread(end_event, hw_queue):
pm = messaging.PubMaster(['deviceState'])
sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "controlsState", "pandaStates"], poll=["pandaStates"])
5 years ago
count = 0
onroad_conditions: Dict[str, bool] = {
"ignition": False,
}
startup_conditions: Dict[str, bool] = {}
startup_conditions_prev: Dict[str, bool] = {}
5 years ago
off_ts = None
started_ts = None
started_seen = False
thermal_status = ThermalStatus.green
last_hw_state = HardwareState(
network_type=NetworkType.none,
network_metered=False,
network_strength=NetworkStrength.unknown,
network_info=None,
nvme_temps=[],
modem_temps=[],
)
5 years ago
current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML)
temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML)
5 years ago
should_start_prev = False
in_car = False
engaged_prev = False
5 years ago
params = Params()
power_monitor = PowerMonitoring()
5 years ago
HARDWARE.initialize_hardware()
thermal_config = HARDWARE.get_thermal_config()
fan_controller = None
while not end_event.is_set():
sm.update(PANDA_STATES_TIMEOUT)
pandaStates = sm['pandaStates']
peripheralState = sm['peripheralState']
msg = read_thermal(thermal_config)
5 years ago
if sm.updated['pandaStates'] and len(pandaStates) > 0:
# Set ignition based on any panda connected
onroad_conditions["ignition"] = any(ps.ignitionLine or ps.ignitionCan for ps in pandaStates if ps.pandaType != log.PandaState.PandaType.unknown)
pandaState = pandaStates[0]
in_car = pandaState.harnessStatus != log.PandaState.HarnessStatus.notConnected
# Setup fan handler on first connect to panda
if fan_controller is None and peripheralState.pandaType != log.PandaState.PandaType.unknown:
if TICI:
fan_controller = TiciFanController()
elif (sec_since_boot() - sm.rcv_time['pandaStates']) > DISCONNECT_TIMEOUT:
if onroad_conditions["ignition"]:
onroad_conditions["ignition"] = False
cloudlog.error("panda timed out onroad")
try:
last_hw_state = hw_queue.get_nowait()
except queue.Empty:
pass
msg.deviceState.freeSpacePercent = get_available_percent(default=100.0)
msg.deviceState.memoryUsagePercent = int(round(psutil.virtual_memory().percent))
msg.deviceState.cpuUsagePercent = [int(round(n)) for n in psutil.cpu_percent(percpu=True)]
msg.deviceState.gpuUsagePercent = int(round(HARDWARE.get_gpu_usage_percent()))
msg.deviceState.networkType = last_hw_state.network_type
msg.deviceState.networkMetered = last_hw_state.network_metered
msg.deviceState.networkStrength = last_hw_state.network_strength
if last_hw_state.network_info is not None:
msg.deviceState.networkInfo = last_hw_state.network_info
msg.deviceState.nvmeTempC = last_hw_state.nvme_temps
msg.deviceState.modemTempC = last_hw_state.modem_temps
msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness()
msg.deviceState.usbOnline = HARDWARE.get_usb_present()
current_filter.update(msg.deviceState.batteryCurrent / 1e6)
5 years ago
max_comp_temp = temp_filter.update(
max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC))
)
5 years ago
if fan_controller is not None:
msg.deviceState.fanSpeedPercentDesired = fan_controller.update(max_comp_temp, onroad_conditions["ignition"])
5 years ago
is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5))
if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP:
# If device is offroad we want to cool down before going onroad
# since going onroad increases load and can make temps go over 107
5 years ago
thermal_status = ThermalStatus.danger
else:
current_band = THERMAL_BANDS[thermal_status]
band_idx = list(THERMAL_BANDS.keys()).index(thermal_status)
if current_band.min_temp is not None and max_comp_temp < current_band.min_temp:
thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1]
elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp:
thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1]
5 years ago
# **** starting logic ****
# Ensure date/time are valid
now = datetime.datetime.utcnow()
startup_conditions["time_valid"] = (now.year > 2020) or (now.year == 2020 and now.month >= 10)
set_offroad_alert_if_changed("Offroad_InvalidTime", (not startup_conditions["time_valid"]))
5 years ago
startup_conditions["up_to_date"] = params.get("Offroad_ConnectivityNeeded") is None or params.get_bool("DisableUpdates") or params.get_bool("SnoozeUpdate")
startup_conditions["not_uninstalling"] = not params.get_bool("DoUninstall")
startup_conditions["accepted_terms"] = params.get("HasAcceptedTerms") == terms_version
5 years ago
# with 2% left, we killall, otherwise the phone will take a long time to boot
startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2
startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \
params.get_bool("Passive")
startup_conditions["not_driver_view"] = not params.get_bool("IsDriverViewEnabled")
startup_conditions["not_taking_snapshot"] = not params.get_bool("IsTakingSnapshot")
5 years ago
# if any CPU gets above 107 or the battery gets above 63, kill all processes
# controls will warn with CPU above 95 or battery above 60
onroad_conditions["device_temp_good"] = thermal_status < ThermalStatus.danger
set_offroad_alert_if_changed("Offroad_TemperatureTooHigh", (not onroad_conditions["device_temp_good"]))
5 years ago
# TODO: this should move to TICI.initialize_hardware, but we currently can't import params there
if TICI:
if not os.path.isfile("/persist/comma/living-in-the-moment"):
if not Path("/data/media").is_mount():
set_offroad_alert_if_changed("Offroad_StorageMissing", True)
else:
# check for bad NVMe
try:
with open("/sys/block/nvme0n1/device/model") as f:
model = f.read().strip()
if not model.startswith("Samsung SSD 980") and params.get("Offroad_BadNvme") is None:
set_offroad_alert_if_changed("Offroad_BadNvme", True)
cloudlog.event("Unsupported NVMe", model=model, error=True)
except Exception:
pass
# Handle offroad/onroad transition
should_start = all(onroad_conditions.values())
if started_ts is None:
should_start = should_start and all(startup_conditions.values())
if should_start != should_start_prev or (count == 0):
params.put_bool("IsOnroad", should_start)
params.put_bool("IsOffroad", not should_start)
params.put_bool("IsEngaged", False)
engaged_prev = False
HARDWARE.set_power_save(not should_start)
5 years ago
if sm.updated['controlsState']:
engaged = sm['controlsState'].enabled
if engaged != engaged_prev:
params.put_bool("IsEngaged", engaged)
engaged_prev = engaged
try:
with open('/dev/kmsg', 'w') as kmsg:
kmsg.write(f"<3>[thermald] engaged: {engaged}\n")
except Exception:
pass
if should_start:
5 years ago
off_ts = None
if started_ts is None:
started_ts = sec_since_boot()
started_seen = True
else:
if onroad_conditions["ignition"] and (startup_conditions != startup_conditions_prev):
cloudlog.event("Startup blocked", startup_conditions=startup_conditions, onroad_conditions=onroad_conditions)
5 years ago
started_ts = None
if off_ts is None:
off_ts = sec_since_boot()
# Offroad power monitoring
power_monitor.calculate(peripheralState, onroad_conditions["ignition"])
msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used()
msg.deviceState.carBatteryCapacityUwh = max(0, power_monitor.get_car_battery_capacity())
current_power_draw = HARDWARE.get_current_power_draw()
statlog.sample("power_draw", current_power_draw)
msg.deviceState.powerDrawW = current_power_draw
som_power_draw = HARDWARE.get_som_power_draw()
statlog.sample("som_power_draw", som_power_draw)
msg.deviceState.somPowerDrawW = som_power_draw
# Check if we need to disable charging (handled by boardd)
msg.deviceState.chargingDisabled = power_monitor.should_disable_charging(onroad_conditions["ignition"], in_car, off_ts)
# Check if we need to shut down
if power_monitor.should_shutdown(peripheralState, onroad_conditions["ignition"], in_car, off_ts, started_seen):
cloudlog.warning(f"shutting device down, offroad since {off_ts}")
params.put_bool("DoShutdown", True)
msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90 # if current is positive, then battery is being discharged
msg.deviceState.started = started_ts is not None
msg.deviceState.startedMonoTime = int(1e9*(started_ts or 0))
5 years ago
last_ping = params.get("LastAthenaPingTime")
if last_ping is not None:
msg.deviceState.lastAthenaPingTime = int(last_ping)
msg.deviceState.thermalStatus = thermal_status
pm.send("deviceState", msg)
5 years ago
should_start_prev = should_start
startup_conditions_prev = startup_conditions.copy()
5 years ago
# Log to statsd
statlog.gauge("free_space_percent", msg.deviceState.freeSpacePercent)
statlog.gauge("gpu_usage_percent", msg.deviceState.gpuUsagePercent)
statlog.gauge("memory_usage_percent", msg.deviceState.memoryUsagePercent)
for i, usage in enumerate(msg.deviceState.cpuUsagePercent):
statlog.gauge(f"cpu{i}_usage_percent", usage)
for i, temp in enumerate(msg.deviceState.cpuTempC):
statlog.gauge(f"cpu{i}_temperature", temp)
for i, temp in enumerate(msg.deviceState.gpuTempC):
statlog.gauge(f"gpu{i}_temperature", temp)
statlog.gauge("memory_temperature", msg.deviceState.memoryTempC)
statlog.gauge("ambient_temperature", msg.deviceState.ambientTempC)
for i, temp in enumerate(msg.deviceState.pmicTempC):
statlog.gauge(f"pmic{i}_temperature", temp)
for i, temp in enumerate(last_hw_state.nvme_temps):
statlog.gauge(f"nvme_temperature{i}", temp)
for i, temp in enumerate(last_hw_state.modem_temps):
statlog.gauge(f"modem_temperature{i}", temp)
statlog.gauge("fan_speed_percent_desired", msg.deviceState.fanSpeedPercentDesired)
statlog.gauge("screen_brightness_percent", msg.deviceState.screenBrightnessPercent)
# report to server once every 10 minutes
if (count % int(600. / DT_TRML)) == 0:
5 years ago
cloudlog.event("STATUS_PACKET",
count=count,
pandaStates=[strip_deprecated_keys(p.to_dict()) for p in pandaStates],
peripheralState=strip_deprecated_keys(peripheralState.to_dict()),
location=(strip_deprecated_keys(sm["gpsLocationExternal"].to_dict()) if sm.alive["gpsLocationExternal"] else None),
deviceState=strip_deprecated_keys(msg.to_dict()))
5 years ago
count += 1
def main():
hw_queue = queue.Queue(maxsize=1)
end_event = threading.Event()
threads = [
threading.Thread(target=hw_state_thread, args=(end_event, hw_queue)),
threading.Thread(target=thermald_thread, args=(end_event, hw_queue)),
]
for t in threads:
t.start()
try:
while True:
time.sleep(1)
if not all(t.is_alive() for t in threads):
break
finally:
end_event.set()
for t in threads:
t.join()
5 years ago
5 years ago
if __name__ == "__main__":
main()