Remove NVMe logging (#35740)

pull/35741/head
Adeeb Shihadeh 1 day ago committed by GitHub
parent ddb0d7c1a5
commit a8fd55740b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      cereal/log.capnp
  2. 1
      common/params_keys.h
  3. 4
      selfdrive/selfdrived/alerts_offroad.json
  4. 3
      system/hardware/base.py
  5. 17
      system/hardware/hardwared.py
  6. 2
      system/hardware/pc/hardware.py
  7. 10
      system/hardware/tici/hardware.py
  8. 3
      system/loggerd/bootlog.cc

@ -492,7 +492,6 @@ struct DeviceState @0xa4d8b5af2aa492eb {
gpuTempC @27 :List(Float32);
dspTempC @49 :Float32;
memoryTempC @28 :Float32;
nvmeTempC @35 :List(Float32);
modemTempC @36 :List(Float32);
pmicTempC @39 :List(Float32);
intakeTempC @46 :Float32;
@ -568,6 +567,7 @@ struct DeviceState @0xa4d8b5af2aa492eb {
chargingDisabledDEPRECATED @18 :Bool;
usbOnlineDEPRECATED @12 :Bool;
ambientTempCDEPRECATED @30 :Float32;
nvmeTempCDEPRECATED @35 :List(Float32);
}
struct PandaState @0xa7649e2575e4591e {

@ -82,7 +82,6 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
{"NetworkMetered", PERSISTENT},
{"ObdMultiplexingChanged", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
{"ObdMultiplexingEnabled", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
{"Offroad_BadNvme", CLEAR_ON_MANAGER_START},
{"Offroad_CarUnrecognized", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
{"Offroad_ConnectivityNeeded", CLEAR_ON_MANAGER_START},
{"Offroad_ConnectivityNeededPrompt", CLEAR_ON_MANAGER_START},

@ -33,10 +33,6 @@
"text": "NVMe drive not mounted.",
"severity": 1
},
"Offroad_BadNvme": {
"text": "Unsupported NVMe drive detected. Device may draw significantly more power and overheat due to the unsupported NVMe.",
"severity": 1
},
"Offroad_CarUnrecognized": {
"text": "openpilot was unable to identify your car. Your car is either unsupported or its ECUs are not recognized. Please submit a pull request to add the firmware versions to the proper vehicle. Need help? Join discord.comma.ai.",
"severity": 0

@ -199,9 +199,6 @@ class HardwareBase(ABC):
def get_modem_temperatures(self):
pass
@abstractmethod
def get_nvme_temperatures(self):
pass
@abstractmethod
def initialize_hardware(self):

@ -39,7 +39,7 @@ ONROAD_CYCLE_TIME = 1 # seconds to wait offroad after requesting an onroad cycl
ThermalBand = namedtuple("ThermalBand", ['min_temp', 'max_temp'])
HardwareState = namedtuple("HardwareState", ['network_type', 'network_info', 'network_strength', 'network_stats',
'network_metered', 'nvme_temps', 'modem_temps'])
'network_metered', 'modem_temps'])
# List of thermal bands. We will stay within this region as long as we are within the bounds.
# When exiting the bounds, we'll jump to the lower or higher band. Bands are ordered in the dict.
@ -142,7 +142,6 @@ def hw_state_thread(end_event, hw_queue):
network_strength=HARDWARE.get_network_strength(network_type),
network_stats={'wwanTx': tx, 'wwanRx': rx},
network_metered=HARDWARE.get_network_metered(network_type),
nvme_temps=HARDWARE.get_nvme_temperatures(),
modem_temps=modem_temps,
)
@ -189,7 +188,6 @@ def hardware_thread(end_event, hw_queue) -> None:
network_metered=False,
network_strength=NetworkStrength.unknown,
network_stats={'wwanTx': -1, 'wwanRx': -1},
nvme_temps=[],
modem_temps=[],
)
@ -268,7 +266,6 @@ def hardware_thread(end_event, hw_queue) -> None:
if last_hw_state.network_info is not None:
msg.deviceState.networkInfo = last_hw_state.network_info
msg.deviceState.nvmeTempC = last_hw_state.nvme_temps
msg.deviceState.modemTempC = last_hw_state.modem_temps
msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness()
@ -337,16 +334,6 @@ def hardware_thread(end_event, hw_queue) -> None:
if not os.path.isfile("/persist/comma/living-in-the-moment"):
if not Path("/data/media").is_mount():
set_offroad_alert_if_changed("Offroad_StorageMissing", True)
else:
# check for bad NVMe
try:
with open("/sys/block/nvme0n1/device/model") as f:
model = f.read().strip()
if not model.startswith("Samsung SSD 980") and params.get("Offroad_BadNvme") is None:
set_offroad_alert_if_changed("Offroad_BadNvme", True)
cloudlog.event("Unsupported NVMe", model=model, error=True)
except Exception:
pass
# Handle offroad/onroad transition
should_start = all(onroad_conditions.values())
@ -431,8 +418,6 @@ def hardware_thread(end_event, hw_queue) -> None:
statlog.gauge("memory_temperature", msg.deviceState.memoryTempC)
for i, temp in enumerate(msg.deviceState.pmicTempC):
statlog.gauge(f"pmic{i}_temperature", temp)
for i, temp in enumerate(last_hw_state.nvme_temps):
statlog.gauge(f"nvme_temperature{i}", temp)
for i, temp in enumerate(last_hw_state.modem_temps):
statlog.gauge(f"modem_temperature{i}", temp)
statlog.gauge("fan_speed_percent_desired", msg.deviceState.fanSpeedPercentDesired)

@ -70,8 +70,6 @@ class Pc(HardwareBase):
def get_modem_temperatures(self):
return []
def get_nvme_temperatures(self):
return []
def initialize_hardware(self):
pass

@ -1,4 +1,3 @@
import json
import math
import os
import subprocess
@ -291,15 +290,6 @@ class Tici(HardwareBase):
except Exception:
return []
def get_nvme_temperatures(self):
ret = []
try:
out = subprocess.check_output("sudo smartctl -aj /dev/nvme0", shell=True)
dat = json.loads(out)
ret = list(map(int, dat["nvme_smart_health_information_log"]["temperature_sensors"]))
except Exception:
pass
return ret
def get_current_power_draw(self):
return (self.read_param_file("/sys/class/hwmon/hwmon1/power1_input", int) / 1e6)

@ -31,9 +31,6 @@ static kj::Array<capnp::word> build_boot_log() {
"[ -x \"$(command -v journalctl)\" ] && journalctl -o short-monotonic",
};
if (Hardware::TICI()) {
bootlog_commands.push_back("[ -e /dev/nvme0 ] && sudo nvme smart-log --output-format=json /dev/nvme0");
}
auto commands = boot.initCommands().initEntries(bootlog_commands.size());
for (int j = 0; j < bootlog_commands.size(); j++) {

Loading…
Cancel
Save