|
|
|
@ -1,28 +1,31 @@ |
|
|
|
|
#!/usr/bin/env python3 |
|
|
|
|
import datetime |
|
|
|
|
import os |
|
|
|
|
import queue |
|
|
|
|
import threading |
|
|
|
|
import time |
|
|
|
|
from collections import OrderedDict, namedtuple |
|
|
|
|
from pathlib import Path |
|
|
|
|
from typing import Dict, NoReturn, Optional, Tuple |
|
|
|
|
from collections import namedtuple, OrderedDict |
|
|
|
|
from typing import Dict, Optional, Tuple |
|
|
|
|
|
|
|
|
|
import psutil |
|
|
|
|
from smbus2 import SMBus |
|
|
|
|
|
|
|
|
|
import cereal.messaging as messaging |
|
|
|
|
from cereal import log |
|
|
|
|
from common.dict_helpers import strip_deprecated_keys |
|
|
|
|
from common.filter_simple import FirstOrderFilter |
|
|
|
|
from common.numpy_fast import interp |
|
|
|
|
from common.params import Params |
|
|
|
|
from common.realtime import DT_TRML, sec_since_boot |
|
|
|
|
from common.dict_helpers import strip_deprecated_keys |
|
|
|
|
from selfdrive.controls.lib.alertmanager import set_offroad_alert |
|
|
|
|
from selfdrive.controls.lib.pid import PIController |
|
|
|
|
from selfdrive.hardware import EON, TICI, PC, HARDWARE |
|
|
|
|
from selfdrive.hardware import EON, HARDWARE, PC, TICI |
|
|
|
|
from selfdrive.loggerd.config import get_available_percent |
|
|
|
|
from selfdrive.statsd import statlog |
|
|
|
|
from selfdrive.swaglog import cloudlog |
|
|
|
|
from selfdrive.thermald.power_monitoring import PowerMonitoring |
|
|
|
|
from selfdrive.version import terms_version, training_version |
|
|
|
|
from selfdrive.statsd import statlog |
|
|
|
|
|
|
|
|
|
ThermalStatus = log.DeviceState.ThermalStatus |
|
|
|
|
NetworkType = log.DeviceState.NetworkType |
|
|
|
@ -30,8 +33,10 @@ NetworkStrength = log.DeviceState.NetworkStrength |
|
|
|
|
CURRENT_TAU = 15. # 15s time constant |
|
|
|
|
TEMP_TAU = 5. # 5s time constant |
|
|
|
|
DISCONNECT_TIMEOUT = 5. # wait 5 seconds before going offroad after disconnect so you get an alert |
|
|
|
|
PANDA_STATES_TIMEOUT = int(1000 * 2.5 * DT_TRML) # 2.5x the expected pandaState frequency |
|
|
|
|
|
|
|
|
|
ThermalBand = namedtuple("ThermalBand", ['min_temp', 'max_temp']) |
|
|
|
|
HardwareState = namedtuple("HardwareState", ['network_type', 'network_strength', 'network_info', 'nvme_temps', 'modem_temps']) |
|
|
|
|
|
|
|
|
|
# List of thermal bands. We will stay within this region as long as we are within the bounds. |
|
|
|
|
# When exiting the bounds, we'll jump to the lower or higher band. Bands are ordered in the dict. |
|
|
|
@ -152,13 +157,50 @@ def set_offroad_alert_if_changed(offroad_alert: str, show_alert: bool, extra_tex |
|
|
|
|
set_offroad_alert(offroad_alert, show_alert, extra_text) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def thermald_thread() -> NoReturn: |
|
|
|
|
def hw_state_thread(end_event, hw_queue): |
|
|
|
|
"""Handles non critical hardware state, and sends over queue""" |
|
|
|
|
count = 0 |
|
|
|
|
registered_count = 0 |
|
|
|
|
|
|
|
|
|
pm = messaging.PubMaster(['deviceState']) |
|
|
|
|
while not end_event.is_set(): |
|
|
|
|
# these are expensive calls. update every 10s |
|
|
|
|
if (count % int(10. / DT_TRML)) == 0: |
|
|
|
|
try: |
|
|
|
|
network_type = HARDWARE.get_network_type() |
|
|
|
|
|
|
|
|
|
hw_state = HardwareState( |
|
|
|
|
network_type=network_type, |
|
|
|
|
network_strength=HARDWARE.get_network_strength(network_type), |
|
|
|
|
network_info=HARDWARE.get_network_info(), |
|
|
|
|
nvme_temps=HARDWARE.get_nvme_temperatures(), |
|
|
|
|
modem_temps=HARDWARE.get_modem_temperatures(), |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
hw_queue.put_nowait(hw_state) |
|
|
|
|
except queue.Full: |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
if TICI and (hw_state.network_info is not None) and (hw_state.network_info.get('state', None) == "REGISTERED"): |
|
|
|
|
registered_count += 1 |
|
|
|
|
else: |
|
|
|
|
registered_count = 0 |
|
|
|
|
|
|
|
|
|
if registered_count > 10: |
|
|
|
|
cloudlog.warning(f"Modem stuck in registered state {hw_state.network_info}. nmcli conn up lte") |
|
|
|
|
os.system("nmcli conn up lte") |
|
|
|
|
registered_count = 0 |
|
|
|
|
|
|
|
|
|
except Exception: |
|
|
|
|
cloudlog.exception("Error getting network status") |
|
|
|
|
|
|
|
|
|
count += 1 |
|
|
|
|
time.sleep(DT_TRML) |
|
|
|
|
|
|
|
|
|
pandaState_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected pandaState frequency |
|
|
|
|
pandaState_sock = messaging.sub_sock('pandaStates', timeout=pandaState_timeout) |
|
|
|
|
sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "controlsState"]) |
|
|
|
|
|
|
|
|
|
def thermald_thread(end_event, hw_queue): |
|
|
|
|
pm = messaging.PubMaster(['deviceState']) |
|
|
|
|
sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "controlsState", "pandaStates"], poll=["pandaStates"]) |
|
|
|
|
|
|
|
|
|
fan_speed = 0 |
|
|
|
|
count = 0 |
|
|
|
@ -175,12 +217,13 @@ def thermald_thread() -> NoReturn: |
|
|
|
|
thermal_status = ThermalStatus.green |
|
|
|
|
usb_power = True |
|
|
|
|
|
|
|
|
|
network_type = NetworkType.none |
|
|
|
|
network_strength = NetworkStrength.unknown |
|
|
|
|
network_info = None |
|
|
|
|
registered_count = 0 |
|
|
|
|
nvme_temps = None |
|
|
|
|
modem_temps = None |
|
|
|
|
last_hw_state = HardwareState( |
|
|
|
|
network_type=NetworkType.none, |
|
|
|
|
network_strength=NetworkStrength.unknown, |
|
|
|
|
network_info=None, |
|
|
|
|
nvme_temps=[], |
|
|
|
|
modem_temps=[], |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) |
|
|
|
|
temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML) |
|
|
|
@ -199,16 +242,16 @@ def thermald_thread() -> NoReturn: |
|
|
|
|
# TODO: use PI controller for UNO |
|
|
|
|
controller = PIController(k_p=0, k_i=2e-3, neg_limit=-80, pos_limit=0, rate=(1 / DT_TRML)) |
|
|
|
|
|
|
|
|
|
while True: |
|
|
|
|
pandaStates = messaging.recv_sock(pandaState_sock, wait=True) |
|
|
|
|
while not end_event.is_set(): |
|
|
|
|
sm.update(PANDA_STATES_TIMEOUT) |
|
|
|
|
|
|
|
|
|
sm.update(0) |
|
|
|
|
pandaStates = sm['pandaStates'] |
|
|
|
|
peripheralState = sm['peripheralState'] |
|
|
|
|
|
|
|
|
|
msg = read_thermal(thermal_config) |
|
|
|
|
|
|
|
|
|
if pandaStates is not None and len(pandaStates.pandaStates) > 0: |
|
|
|
|
pandaState = pandaStates.pandaStates[0] |
|
|
|
|
if sm.updated['pandaStates'] and len(pandaStates) > 0: |
|
|
|
|
pandaState = pandaStates[0] |
|
|
|
|
|
|
|
|
|
if pandaState.pandaType != log.PandaState.PandaType.unknown: |
|
|
|
|
onroad_conditions["ignition"] = pandaState.ignitionLine or pandaState.ignitionCan |
|
|
|
@ -231,44 +274,23 @@ def thermald_thread() -> NoReturn: |
|
|
|
|
setup_eon_fan() |
|
|
|
|
handle_fan = handle_fan_eon |
|
|
|
|
|
|
|
|
|
# these are expensive calls. update every 10s |
|
|
|
|
if (count % int(10. / DT_TRML)) == 0: |
|
|
|
|
try: |
|
|
|
|
network_type = HARDWARE.get_network_type() |
|
|
|
|
network_strength = HARDWARE.get_network_strength(network_type) |
|
|
|
|
network_info = HARDWARE.get_network_info() # pylint: disable=assignment-from-none |
|
|
|
|
nvme_temps = HARDWARE.get_nvme_temperatures() |
|
|
|
|
modem_temps = HARDWARE.get_modem_temperatures() |
|
|
|
|
|
|
|
|
|
if TICI and (network_info.get('state', None) == "REGISTERED"): |
|
|
|
|
registered_count += 1 |
|
|
|
|
else: |
|
|
|
|
registered_count = 0 |
|
|
|
|
|
|
|
|
|
if registered_count > 10: |
|
|
|
|
cloudlog.warning(f"Modem stuck in registered state {network_info}. nmcli conn up lte") |
|
|
|
|
os.system("nmcli conn up lte") |
|
|
|
|
registered_count = 0 |
|
|
|
|
|
|
|
|
|
except Exception: |
|
|
|
|
cloudlog.exception("Error getting network status") |
|
|
|
|
try: |
|
|
|
|
last_hw_state = hw_queue.get_nowait() |
|
|
|
|
except queue.Empty: |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
msg.deviceState.freeSpacePercent = get_available_percent(default=100.0) |
|
|
|
|
msg.deviceState.memoryUsagePercent = int(round(psutil.virtual_memory().percent)) |
|
|
|
|
msg.deviceState.cpuUsagePercent = [int(round(n)) for n in psutil.cpu_percent(percpu=True)] |
|
|
|
|
msg.deviceState.gpuUsagePercent = int(round(HARDWARE.get_gpu_usage_percent())) |
|
|
|
|
msg.deviceState.networkType = network_type |
|
|
|
|
msg.deviceState.networkStrength = network_strength |
|
|
|
|
if network_info is not None: |
|
|
|
|
msg.deviceState.networkInfo = network_info |
|
|
|
|
if nvme_temps is not None: |
|
|
|
|
msg.deviceState.nvmeTempC = nvme_temps |
|
|
|
|
for i, temp in enumerate(nvme_temps): |
|
|
|
|
statlog.gauge(f"nvme_temperature{i}", temp) |
|
|
|
|
if modem_temps is not None: |
|
|
|
|
msg.deviceState.modemTempC = modem_temps |
|
|
|
|
for i, temp in enumerate(modem_temps): |
|
|
|
|
statlog.gauge(f"modem_temperature{i}", temp) |
|
|
|
|
|
|
|
|
|
msg.deviceState.networkType = last_hw_state.network_type |
|
|
|
|
msg.deviceState.networkStrength = last_hw_state.network_strength |
|
|
|
|
if last_hw_state.network_info is not None: |
|
|
|
|
msg.deviceState.networkInfo = last_hw_state.network_info |
|
|
|
|
|
|
|
|
|
msg.deviceState.nvmeTempC = last_hw_state.nvme_temps |
|
|
|
|
msg.deviceState.modemTempC = last_hw_state.modem_temps |
|
|
|
|
|
|
|
|
|
msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness() |
|
|
|
|
msg.deviceState.batteryPercent = HARDWARE.get_battery_capacity() |
|
|
|
@ -392,7 +414,7 @@ def thermald_thread() -> NoReturn: |
|
|
|
|
should_start_prev = should_start |
|
|
|
|
startup_conditions_prev = startup_conditions.copy() |
|
|
|
|
|
|
|
|
|
# log more stats |
|
|
|
|
# Log to statsd |
|
|
|
|
statlog.gauge("free_space_percent", msg.deviceState.freeSpacePercent) |
|
|
|
|
statlog.gauge("gpu_usage_percent", msg.deviceState.gpuUsagePercent) |
|
|
|
|
statlog.gauge("memory_usage_percent", msg.deviceState.memoryUsagePercent) |
|
|
|
@ -406,6 +428,10 @@ def thermald_thread() -> NoReturn: |
|
|
|
|
statlog.gauge("ambient_temperature", msg.deviceState.ambientTempC) |
|
|
|
|
for i, temp in enumerate(msg.deviceState.pmicTempC): |
|
|
|
|
statlog.gauge(f"pmic{i}_temperature", temp) |
|
|
|
|
for i, temp in enumerate(last_hw_state.nvme_temps): |
|
|
|
|
statlog.gauge(f"nvme_temperature{i}", temp) |
|
|
|
|
for i, temp in enumerate(last_hw_state.modem_temps): |
|
|
|
|
statlog.gauge(f"modem_temperature{i}", temp) |
|
|
|
|
statlog.gauge("fan_speed_percent_desired", msg.deviceState.fanSpeedPercentDesired) |
|
|
|
|
statlog.gauge("screen_brightness_percent", msg.deviceState.screenBrightnessPercent) |
|
|
|
|
|
|
|
|
@ -416,7 +442,7 @@ def thermald_thread() -> NoReturn: |
|
|
|
|
|
|
|
|
|
cloudlog.event("STATUS_PACKET", |
|
|
|
|
count=count, |
|
|
|
|
pandaStates=(strip_deprecated_keys(pandaStates.to_dict()) if pandaStates else None), |
|
|
|
|
pandaStates=[strip_deprecated_keys(p.to_dict()) for p in pandaStates], |
|
|
|
|
peripheralState=strip_deprecated_keys(peripheralState.to_dict()), |
|
|
|
|
location=(strip_deprecated_keys(sm["gpsLocationExternal"].to_dict()) if sm.alive["gpsLocationExternal"] else None), |
|
|
|
|
deviceState=strip_deprecated_keys(msg.to_dict())) |
|
|
|
@ -424,8 +450,28 @@ def thermald_thread() -> NoReturn: |
|
|
|
|
count += 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main() -> NoReturn: |
|
|
|
|
thermald_thread() |
|
|
|
|
def main(): |
|
|
|
|
hw_queue = queue.Queue(maxsize=1) |
|
|
|
|
end_event = threading.Event() |
|
|
|
|
|
|
|
|
|
threads = [ |
|
|
|
|
threading.Thread(target=hw_state_thread, args=(end_event, hw_queue)), |
|
|
|
|
threading.Thread(target=thermald_thread, args=(end_event, hw_queue)), |
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
for t in threads: |
|
|
|
|
t.start() |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
while True: |
|
|
|
|
time.sleep(1) |
|
|
|
|
if not all(t.is_alive() for t in threads): |
|
|
|
|
break |
|
|
|
|
finally: |
|
|
|
|
end_event.set() |
|
|
|
|
|
|
|
|
|
for t in threads: |
|
|
|
|
t.join() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|