Remove NVMe logging (#35740)

pull/35741/head
Adeeb Shihadeh 2 days ago committed by GitHub
parent ddb0d7c1a5
commit a8fd55740b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      cereal/log.capnp
  2. 1
      common/params_keys.h
  3. 4
      selfdrive/selfdrived/alerts_offroad.json
  4. 3
      system/hardware/base.py
  5. 17
      system/hardware/hardwared.py
  6. 2
      system/hardware/pc/hardware.py
  7. 10
      system/hardware/tici/hardware.py
  8. 3
      system/loggerd/bootlog.cc

@ -492,7 +492,6 @@ struct DeviceState @0xa4d8b5af2aa492eb {
gpuTempC @27 :List(Float32); gpuTempC @27 :List(Float32);
dspTempC @49 :Float32; dspTempC @49 :Float32;
memoryTempC @28 :Float32; memoryTempC @28 :Float32;
nvmeTempC @35 :List(Float32);
modemTempC @36 :List(Float32); modemTempC @36 :List(Float32);
pmicTempC @39 :List(Float32); pmicTempC @39 :List(Float32);
intakeTempC @46 :Float32; intakeTempC @46 :Float32;
@ -568,6 +567,7 @@ struct DeviceState @0xa4d8b5af2aa492eb {
chargingDisabledDEPRECATED @18 :Bool; chargingDisabledDEPRECATED @18 :Bool;
usbOnlineDEPRECATED @12 :Bool; usbOnlineDEPRECATED @12 :Bool;
ambientTempCDEPRECATED @30 :Float32; ambientTempCDEPRECATED @30 :Float32;
nvmeTempCDEPRECATED @35 :List(Float32);
} }
struct PandaState @0xa7649e2575e4591e { struct PandaState @0xa7649e2575e4591e {

@ -82,7 +82,6 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
{"NetworkMetered", PERSISTENT}, {"NetworkMetered", PERSISTENT},
{"ObdMultiplexingChanged", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION}, {"ObdMultiplexingChanged", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
{"ObdMultiplexingEnabled", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION}, {"ObdMultiplexingEnabled", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
{"Offroad_BadNvme", CLEAR_ON_MANAGER_START},
{"Offroad_CarUnrecognized", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION}, {"Offroad_CarUnrecognized", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
{"Offroad_ConnectivityNeeded", CLEAR_ON_MANAGER_START}, {"Offroad_ConnectivityNeeded", CLEAR_ON_MANAGER_START},
{"Offroad_ConnectivityNeededPrompt", CLEAR_ON_MANAGER_START}, {"Offroad_ConnectivityNeededPrompt", CLEAR_ON_MANAGER_START},

@ -33,10 +33,6 @@
"text": "NVMe drive not mounted.", "text": "NVMe drive not mounted.",
"severity": 1 "severity": 1
}, },
"Offroad_BadNvme": {
"text": "Unsupported NVMe drive detected. Device may draw significantly more power and overheat due to the unsupported NVMe.",
"severity": 1
},
"Offroad_CarUnrecognized": { "Offroad_CarUnrecognized": {
"text": "openpilot was unable to identify your car. Your car is either unsupported or its ECUs are not recognized. Please submit a pull request to add the firmware versions to the proper vehicle. Need help? Join discord.comma.ai.", "text": "openpilot was unable to identify your car. Your car is either unsupported or its ECUs are not recognized. Please submit a pull request to add the firmware versions to the proper vehicle. Need help? Join discord.comma.ai.",
"severity": 0 "severity": 0

@ -199,9 +199,6 @@ class HardwareBase(ABC):
def get_modem_temperatures(self): def get_modem_temperatures(self):
pass pass
@abstractmethod
def get_nvme_temperatures(self):
pass
@abstractmethod @abstractmethod
def initialize_hardware(self): def initialize_hardware(self):

@ -39,7 +39,7 @@ ONROAD_CYCLE_TIME = 1 # seconds to wait offroad after requesting an onroad cycl
ThermalBand = namedtuple("ThermalBand", ['min_temp', 'max_temp']) ThermalBand = namedtuple("ThermalBand", ['min_temp', 'max_temp'])
HardwareState = namedtuple("HardwareState", ['network_type', 'network_info', 'network_strength', 'network_stats', HardwareState = namedtuple("HardwareState", ['network_type', 'network_info', 'network_strength', 'network_stats',
'network_metered', 'nvme_temps', 'modem_temps']) 'network_metered', 'modem_temps'])
# List of thermal bands. We will stay within this region as long as we are within the bounds. # List of thermal bands. We will stay within this region as long as we are within the bounds.
# When exiting the bounds, we'll jump to the lower or higher band. Bands are ordered in the dict. # When exiting the bounds, we'll jump to the lower or higher band. Bands are ordered in the dict.
@ -142,7 +142,6 @@ def hw_state_thread(end_event, hw_queue):
network_strength=HARDWARE.get_network_strength(network_type), network_strength=HARDWARE.get_network_strength(network_type),
network_stats={'wwanTx': tx, 'wwanRx': rx}, network_stats={'wwanTx': tx, 'wwanRx': rx},
network_metered=HARDWARE.get_network_metered(network_type), network_metered=HARDWARE.get_network_metered(network_type),
nvme_temps=HARDWARE.get_nvme_temperatures(),
modem_temps=modem_temps, modem_temps=modem_temps,
) )
@ -189,7 +188,6 @@ def hardware_thread(end_event, hw_queue) -> None:
network_metered=False, network_metered=False,
network_strength=NetworkStrength.unknown, network_strength=NetworkStrength.unknown,
network_stats={'wwanTx': -1, 'wwanRx': -1}, network_stats={'wwanTx': -1, 'wwanRx': -1},
nvme_temps=[],
modem_temps=[], modem_temps=[],
) )
@ -268,7 +266,6 @@ def hardware_thread(end_event, hw_queue) -> None:
if last_hw_state.network_info is not None: if last_hw_state.network_info is not None:
msg.deviceState.networkInfo = last_hw_state.network_info msg.deviceState.networkInfo = last_hw_state.network_info
msg.deviceState.nvmeTempC = last_hw_state.nvme_temps
msg.deviceState.modemTempC = last_hw_state.modem_temps msg.deviceState.modemTempC = last_hw_state.modem_temps
msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness() msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness()
@ -337,16 +334,6 @@ def hardware_thread(end_event, hw_queue) -> None:
if not os.path.isfile("/persist/comma/living-in-the-moment"): if not os.path.isfile("/persist/comma/living-in-the-moment"):
if not Path("/data/media").is_mount(): if not Path("/data/media").is_mount():
set_offroad_alert_if_changed("Offroad_StorageMissing", True) set_offroad_alert_if_changed("Offroad_StorageMissing", True)
else:
# check for bad NVMe
try:
with open("/sys/block/nvme0n1/device/model") as f:
model = f.read().strip()
if not model.startswith("Samsung SSD 980") and params.get("Offroad_BadNvme") is None:
set_offroad_alert_if_changed("Offroad_BadNvme", True)
cloudlog.event("Unsupported NVMe", model=model, error=True)
except Exception:
pass
# Handle offroad/onroad transition # Handle offroad/onroad transition
should_start = all(onroad_conditions.values()) should_start = all(onroad_conditions.values())
@ -431,8 +418,6 @@ def hardware_thread(end_event, hw_queue) -> None:
statlog.gauge("memory_temperature", msg.deviceState.memoryTempC) statlog.gauge("memory_temperature", msg.deviceState.memoryTempC)
for i, temp in enumerate(msg.deviceState.pmicTempC): for i, temp in enumerate(msg.deviceState.pmicTempC):
statlog.gauge(f"pmic{i}_temperature", temp) statlog.gauge(f"pmic{i}_temperature", temp)
for i, temp in enumerate(last_hw_state.nvme_temps):
statlog.gauge(f"nvme_temperature{i}", temp)
for i, temp in enumerate(last_hw_state.modem_temps): for i, temp in enumerate(last_hw_state.modem_temps):
statlog.gauge(f"modem_temperature{i}", temp) statlog.gauge(f"modem_temperature{i}", temp)
statlog.gauge("fan_speed_percent_desired", msg.deviceState.fanSpeedPercentDesired) statlog.gauge("fan_speed_percent_desired", msg.deviceState.fanSpeedPercentDesired)

@ -70,8 +70,6 @@ class Pc(HardwareBase):
def get_modem_temperatures(self): def get_modem_temperatures(self):
return [] return []
def get_nvme_temperatures(self):
return []
def initialize_hardware(self): def initialize_hardware(self):
pass pass

@ -1,4 +1,3 @@
import json
import math import math
import os import os
import subprocess import subprocess
@ -291,15 +290,6 @@ class Tici(HardwareBase):
except Exception: except Exception:
return [] return []
def get_nvme_temperatures(self):
ret = []
try:
out = subprocess.check_output("sudo smartctl -aj /dev/nvme0", shell=True)
dat = json.loads(out)
ret = list(map(int, dat["nvme_smart_health_information_log"]["temperature_sensors"]))
except Exception:
pass
return ret
def get_current_power_draw(self): def get_current_power_draw(self):
return (self.read_param_file("/sys/class/hwmon/hwmon1/power1_input", int) / 1e6) return (self.read_param_file("/sys/class/hwmon/hwmon1/power1_input", int) / 1e6)

@ -31,9 +31,6 @@ static kj::Array<capnp::word> build_boot_log() {
"[ -x \"$(command -v journalctl)\" ] && journalctl -o short-monotonic", "[ -x \"$(command -v journalctl)\" ] && journalctl -o short-monotonic",
}; };
if (Hardware::TICI()) {
bootlog_commands.push_back("[ -e /dev/nvme0 ] && sudo nvme smart-log --output-format=json /dev/nvme0");
}
auto commands = boot.initCommands().initEntries(bootlog_commands.size()); auto commands = boot.initCommands().initEntries(bootlog_commands.size());
for (int j = 0; j < bootlog_commands.size(); j++) { for (int j = 0; j < bootlog_commands.size(); j++) {

Loading…
Cancel
Save