Thermald thermal state cleanup (#21959)

* dumb controller 'tuned' on a desk

* reduce Ki

* fix prev_time bug

* reset integral on going onroad

* cleanup thermal detection logic

* more minor tuning

* use controls' PI controller

* get rid of bat_temp

* change if once more

* cleanup

* cleanup import order

* add back bat temp in this PR

* cleanup thermal status logic

* use offroad danger temp

* revert that

* use ordered dict for prev/next bands

Co-authored-by: Comma Device <device@comma.ai>
Co-authored-by: Willem Melching <willem.melching@gmail.com>
Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
old-commit-hash: 86bceed57d
commatwo_master
Robbe Derks 4 years ago committed by GitHub
parent 9e973a2059
commit a6e6667a3a
  1. 61
      selfdrive/thermald/thermald.py

@ -4,6 +4,7 @@ import os
import time import time
from pathlib import Path from pathlib import Path
from typing import Dict, Optional, Tuple from typing import Dict, Optional, Tuple
from collections import namedtuple, OrderedDict
import psutil import psutil
from smbus2 import SMBus from smbus2 import SMBus
@ -11,7 +12,7 @@ from smbus2 import SMBus
import cereal.messaging as messaging import cereal.messaging as messaging
from cereal import log from cereal import log
from common.filter_simple import FirstOrderFilter from common.filter_simple import FirstOrderFilter
from common.numpy_fast import clip, interp from common.numpy_fast import interp
from common.params import Params, ParamKeyType from common.params import Params, ParamKeyType
from common.realtime import DT_TRML, sec_since_boot from common.realtime import DT_TRML, sec_since_boot
from common.dict_helpers import strip_deprecated_keys from common.dict_helpers import strip_deprecated_keys
@ -30,15 +31,26 @@ ThermalStatus = log.DeviceState.ThermalStatus
NetworkType = log.DeviceState.NetworkType NetworkType = log.DeviceState.NetworkType
NetworkStrength = log.DeviceState.NetworkStrength NetworkStrength = log.DeviceState.NetworkStrength
CURRENT_TAU = 15. # 15s time constant CURRENT_TAU = 15. # 15s time constant
CPU_TEMP_TAU = 5. # 5s time constant TEMP_TAU = 5. # 5s time constant
DAYS_NO_CONNECTIVITY_MAX = 7 # do not allow to engage after a week without internet DAYS_NO_CONNECTIVITY_MAX = 7 # do not allow to engage after a week without internet
DAYS_NO_CONNECTIVITY_PROMPT = 4 # send an offroad prompt after 4 days with no internet DAYS_NO_CONNECTIVITY_PROMPT = 4 # send an offroad prompt after 4 days with no internet
DISCONNECT_TIMEOUT = 5. # wait 5 seconds before going offroad after disconnect so you get an alert DISCONNECT_TIMEOUT = 5. # wait 5 seconds before going offroad after disconnect so you get an alert
prev_offroad_states: Dict[str, Tuple[bool, Optional[str]]] = {} ThermalBand = namedtuple("ThermalBand", ['min_temp', 'max_temp'])
last_eon_fan_val = None # List of thermal bands. We will stay within this region as long as we are within the bounds.
# When exiting the bounds, we'll jump to the lower or higher band. Bands are ordered in the dict.
THERMAL_BANDS = OrderedDict({
ThermalStatus.green: ThermalBand(None, 80.0),
ThermalStatus.yellow: ThermalBand(75.0, 96.0),
ThermalStatus.red: ThermalBand(80.0, 107.),
ThermalStatus.danger: ThermalBand(94.0, None),
})
# Override to highest thermal band when offroad and above this temp
OFFROAD_DANGER_TEMP = 70.0
prev_offroad_states: Dict[str, Tuple[bool, Optional[str]]] = {}
def read_tz(x): def read_tz(x):
if x is None: if x is None:
@ -65,6 +77,7 @@ def setup_eon_fan():
os.system("echo 2 > /sys/module/dwc3_msm/parameters/otg_switch") os.system("echo 2 > /sys/module/dwc3_msm/parameters/otg_switch")
last_eon_fan_val = None
def set_eon_fan(val): def set_eon_fan(val):
global last_eon_fan_val global last_eon_fan_val
@ -122,7 +135,7 @@ def handle_fan_tici(controller, max_cpu_temp, fan_speed, ignition):
controller.pos_limit = -(30 if ignition else 0) controller.pos_limit = -(30 if ignition else 0)
fan_pwr_out = -int(controller.update( fan_pwr_out = -int(controller.update(
setpoint=(75 if ignition else 68), setpoint=(75 if ignition else (OFFROAD_DANGER_TEMP - 2)),
measurement=max_cpu_temp, measurement=max_cpu_temp,
feedforward=interp(max_cpu_temp, [60.0, 100.0], [0, -80]) feedforward=interp(max_cpu_temp, [60.0, 100.0], [0, -80])
)) ))
@ -167,7 +180,7 @@ def thermald_thread():
registered_count = 0 registered_count = 0
current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML)
cpu_temp_filter = FirstOrderFilter(0., CPU_TEMP_TAU, DT_TRML) temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML)
pandaState_prev = None pandaState_prev = None
should_start_prev = False should_start_prev = False
handle_fan = None handle_fan = None
@ -294,36 +307,26 @@ def thermald_thread():
current_filter.update(msg.deviceState.batteryCurrent / 1e6) current_filter.update(msg.deviceState.batteryCurrent / 1e6)
# TODO: add car battery voltage check max_comp_temp = temp_filter.update(
max_cpu_temp = cpu_temp_filter.update(max(msg.deviceState.cpuTempC)) max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC))
max_comp_temp = max(max_cpu_temp, msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC)) )
bat_temp = msg.deviceState.batteryTempC
if handle_fan is not None: if handle_fan is not None:
fan_speed = handle_fan(controller, max_cpu_temp, fan_speed, startup_conditions["ignition"]) fan_speed = handle_fan(controller, max_comp_temp, fan_speed, startup_conditions["ignition"])
msg.deviceState.fanSpeedPercentDesired = fan_speed msg.deviceState.fanSpeedPercentDesired = fan_speed
# If device is offroad we want to cool down before going onroad
# since going onroad increases load and can make temps go over 107
# We only do this if there is a relay that prevents the car from faulting
is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5)) is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5))
if max_cpu_temp > 107. or bat_temp >= 63. or (is_offroad_for_5_min and max_cpu_temp > 70.0): if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP:
# onroad not allowed # If device is offroad we want to cool down before going onroad
# since going onroad increases load and can make temps go over 107
thermal_status = ThermalStatus.danger thermal_status = ThermalStatus.danger
elif max_comp_temp > 96.0 or bat_temp > 60.:
# hysteresis between onroad not allowed and engage not allowed
thermal_status = clip(thermal_status, ThermalStatus.red, ThermalStatus.danger)
elif max_cpu_temp > 94.0:
# hysteresis between engage not allowed and uploader not allowed
thermal_status = clip(thermal_status, ThermalStatus.yellow, ThermalStatus.red)
elif max_cpu_temp > 80.0:
# uploader not allowed
thermal_status = ThermalStatus.yellow
elif max_cpu_temp > 75.0:
# hysteresis between uploader not allowed and all good
thermal_status = clip(thermal_status, ThermalStatus.green, ThermalStatus.yellow)
else: else:
thermal_status = ThermalStatus.green # default to good condition current_band = THERMAL_BANDS[thermal_status]
band_idx = list(THERMAL_BANDS.keys()).index(thermal_status)
if current_band.min_temp is not None and max_comp_temp < current_band.min_temp:
thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1]
elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp:
thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1]
# **** starting logic **** # **** starting logic ****

Loading…
Cancel
Save