dmonitoringmodeld: clean up data structures (#36624)

* update onnx

* get meta

* start

* cast

* deprecate notready

* more

* line too long

* 2
pull/36514/merge
ZwX1616 21 hours ago committed by GitHub
parent a1795f80dd
commit b778da1d7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      cereal/log.capnp
  2. 2
      selfdrive/modeld/SConscript
  3. 94
      selfdrive/modeld/dmonitoringmodeld.py
  4. 4
      selfdrive/modeld/models/dmonitoring_model.onnx
  5. 41
      selfdrive/monitoring/helpers.py
  6. 2
      selfdrive/monitoring/test_monitoring.py
  7. 2
      selfdrive/test/process_replay/model_replay.py

@ -2166,7 +2166,8 @@ struct DriverStateV2 {
leftBlinkProb @7 :Float32; leftBlinkProb @7 :Float32;
rightBlinkProb @8 :Float32; rightBlinkProb @8 :Float32;
sunglassesProb @9 :Float32; sunglassesProb @9 :Float32;
notReadyProb @12 :List(Float32); phoneProb @13 :Float32;
notReadyProbDEPRECATED @12 :List(Float32);
occludedProbDEPRECATED @10 :Float32; occludedProbDEPRECATED @10 :Float32;
readyProbDEPRECATED @11 :List(Float32); readyProbDEPRECATED @11 :List(Float32);
} }

@ -32,7 +32,7 @@ lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LI
tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x] tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x]
# Get model metadata # Get model metadata
for model_name in ['driving_vision', 'driving_policy']: for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
fn = File(f"models/{model_name}").abspath fn = File(f"models/{model_name}").abspath
script_files = [File(Dir("#selfdrive/modeld").File("get_model_metadata.py").abspath)] script_files = [File(Dir("#selfdrive/modeld").File("get_model_metadata.py").abspath)]
cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx' cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx'

@ -7,7 +7,6 @@ from tinygrad.dtype import dtypes
import math import math
import time import time
import pickle import pickle
import ctypes
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
@ -16,47 +15,16 @@ from cereal.messaging import PubMaster, SubMaster
from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
from openpilot.common.swaglog import cloudlog from openpilot.common.swaglog import cloudlog
from openpilot.common.realtime import config_realtime_process from openpilot.common.realtime import config_realtime_process
from openpilot.common.transformations.model import dmonitoringmodel_intrinsics, DM_INPUT_SIZE from openpilot.common.transformations.model import dmonitoringmodel_intrinsics
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame
from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
MODEL_WIDTH, MODEL_HEIGHT = DM_INPUT_SIZE
CALIB_LEN = 3
FEATURE_LEN = 512
OUTPUT_SIZE = 83 + FEATURE_LEN
PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld" PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld"
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED') SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl' MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl'
METADATA_PATH = Path(__file__).parent / 'models/dmonitoring_model_metadata.pkl'
# TODO: slice from meta
class DriverStateResult(ctypes.Structure):
_fields_ = [
("face_orientation", ctypes.c_float*3),
("face_position", ctypes.c_float*3),
("face_orientation_std", ctypes.c_float*3),
("face_position_std", ctypes.c_float*3),
("face_prob", ctypes.c_float),
("_unused_a", ctypes.c_float*8),
("left_eye_prob", ctypes.c_float),
("_unused_b", ctypes.c_float*8),
("right_eye_prob", ctypes.c_float),
("left_blink_prob", ctypes.c_float),
("right_blink_prob", ctypes.c_float),
("sunglasses_prob", ctypes.c_float),
("_unused_c", ctypes.c_float),
("_unused_d", ctypes.c_float*4),
("not_ready_prob", ctypes.c_float*2)]
class DMonitoringModelResult(ctypes.Structure):
_fields_ = [
("driver_state_lhd", DriverStateResult),
("driver_state_rhd", DriverStateResult),
("wheel_on_right_prob", ctypes.c_float),
("features", ctypes.c_float*FEATURE_LEN)]
class ModelState: class ModelState:
@ -64,11 +32,14 @@ class ModelState:
output: np.ndarray output: np.ndarray
def __init__(self, cl_ctx): def __init__(self, cl_ctx):
assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float) with open(METADATA_PATH, 'rb') as f:
model_metadata = pickle.load(f)
self.input_shapes = model_metadata['input_shapes']
self.output_slices = model_metadata['output_slices']
self.frame = MonitoringModelFrame(cl_ctx) self.frame = MonitoringModelFrame(cl_ctx)
self.numpy_inputs = { self.numpy_inputs = {
'calib': np.zeros((1, CALIB_LEN), dtype=np.float32), 'calib': np.zeros(self.input_shapes['calib'], dtype=np.float32),
} }
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()} self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
@ -84,9 +55,9 @@ class ModelState:
if TICI: if TICI:
# The imgs tensors are backed by opencl memory, only need init once # The imgs tensors are backed by opencl memory, only need init once
if 'input_img' not in self.tensor_inputs: if 'input_img' not in self.tensor_inputs:
self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, (1, MODEL_WIDTH*MODEL_HEIGHT), dtype=dtypes.uint8) self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, self.input_shapes['input_img'], dtype=dtypes.uint8)
else: else:
self.tensor_inputs['input_img'] = Tensor(self.frame.buffer_from_cl(input_img_cl).reshape((1, MODEL_WIDTH*MODEL_HEIGHT)), dtype=dtypes.uint8).realize() self.tensor_inputs['input_img'] = Tensor(self.frame.buffer_from_cl(input_img_cl).reshape(self.input_shapes['input_img']), dtype=dtypes.uint8).realize()
output = self.model_run(**self.tensor_inputs).contiguous().realize().uop.base.buffer.numpy() output = self.model_run(**self.tensor_inputs).contiguous().realize().uop.base.buffer.numpy()
@ -95,31 +66,31 @@ class ModelState:
return output, t2 - t1 return output, t2 - t1
def fill_driver_state(msg, ds_result: DriverStateResult): def fill_driver_state(msg, model_output, output_slices, ds_suffix):
msg.faceOrientation = list(ds_result.face_orientation) face_descs = model_output[output_slices[f'face_descs_{ds_suffix}']]
msg.faceOrientationStd = [math.exp(x) for x in ds_result.face_orientation_std] face_descs_std = face_descs[-6:]
msg.facePosition = list(ds_result.face_position[:2]) msg.faceOrientation = [float(x) for x in face_descs[:3]]
msg.facePositionStd = [math.exp(x) for x in ds_result.face_position_std[:2]] msg.faceOrientationStd = [math.exp(x) for x in face_descs_std[:3]]
msg.faceProb = float(sigmoid(ds_result.face_prob)) msg.facePosition = [float(x) for x in face_descs[3:5]]
msg.leftEyeProb = float(sigmoid(ds_result.left_eye_prob)) msg.facePositionStd = [math.exp(x) for x in face_descs_std[3:5]]
msg.rightEyeProb = float(sigmoid(ds_result.right_eye_prob)) msg.faceProb = float(sigmoid(model_output[output_slices[f'face_prob_{ds_suffix}']][0]))
msg.leftBlinkProb = float(sigmoid(ds_result.left_blink_prob)) msg.leftEyeProb = float(sigmoid(model_output[output_slices[f'left_eye_prob_{ds_suffix}']][0]))
msg.rightBlinkProb = float(sigmoid(ds_result.right_blink_prob)) msg.rightEyeProb = float(sigmoid(model_output[output_slices[f'right_eye_prob_{ds_suffix}']][0]))
msg.sunglassesProb = float(sigmoid(ds_result.sunglasses_prob)) msg.leftBlinkProb = float(sigmoid(model_output[output_slices[f'left_blink_prob_{ds_suffix}']][0]))
msg.notReadyProb = [float(sigmoid(x)) for x in ds_result.not_ready_prob] msg.rightBlinkProb = float(sigmoid(model_output[output_slices[f'right_blink_prob_{ds_suffix}']][0]))
msg.sunglassesProb = float(sigmoid(model_output[output_slices[f'sunglasses_prob_{ds_suffix}']][0]))
msg.phoneProb = float(sigmoid(model_output[output_slices[f'using_phone_prob_{ds_suffix}']][0]))
def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts: int, execution_time: float, gpu_execution_time: float):
model_result = ctypes.cast(model_output.ctypes.data, ctypes.POINTER(DMonitoringModelResult)).contents def get_driverstate_packet(model_output: np.ndarray, output_slices: dict[str, slice], frame_id: int, location_ts: int, exec_time: float, gpu_exec_time: float):
msg = messaging.new_message('driverStateV2', valid=True) msg = messaging.new_message('driverStateV2', valid=True)
ds = msg.driverStateV2 ds = msg.driverStateV2
ds.frameId = frame_id ds.frameId = frame_id
ds.modelExecutionTime = execution_time ds.modelExecutionTime = exec_time
ds.gpuExecutionTime = gpu_execution_time ds.gpuExecutionTime = gpu_exec_time
ds.wheelOnRightProb = float(sigmoid(model_result.wheel_on_right_prob)) ds.wheelOnRightProb = float(sigmoid(model_output[output_slices['wheel_on_right']][0]))
ds.rawPredictions = model_output.tobytes() if SEND_RAW_PRED else b'' ds.rawPredictions = model_output.tobytes() if SEND_RAW_PRED else b''
fill_driver_state(ds.leftDriverData, model_result.driver_state_lhd) fill_driver_state(ds.leftDriverData, model_output, output_slices, 'lhd')
fill_driver_state(ds.rightDriverData, model_result.driver_state_rhd) fill_driver_state(ds.rightDriverData, model_output, output_slices, 'rhd')
return msg return msg
@ -140,7 +111,7 @@ def main():
sm = SubMaster(["liveCalibration"]) sm = SubMaster(["liveCalibration"])
pm = PubMaster(["driverStateV2"]) pm = PubMaster(["driverStateV2"])
calib = np.zeros(CALIB_LEN, dtype=np.float32) calib = np.zeros(model.numpy_inputs['calib'].size, dtype=np.float32)
model_transform = None model_transform = None
while True: while True:
@ -160,7 +131,8 @@ def main():
model_output, gpu_execution_time = model.run(buf, calib, model_transform) model_output, gpu_execution_time = model.run(buf, calib, model_transform)
t2 = time.perf_counter() t2 = time.perf_counter()
pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time)) msg = get_driverstate_packet(model_output, model.output_slices, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time)
pm.send("driverStateV2", msg)
if __name__ == "__main__": if __name__ == "__main__":

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1 version https://git-lfs.github.com/spec/v1
oid sha256:3a53626ab84757813fb16a1441704f2ae7192bef88c331bdc2415be6981d204f oid sha256:3446bf8b22e50e47669a25bf32460ae8baf8547037f346753e19ecbfcf6d4e59
size 7191776 size 6954368

@ -37,12 +37,12 @@ class DRIVER_MONITOR_SETTINGS:
self._BLINK_THRESHOLD = 0.865 self._BLINK_THRESHOLD = 0.865
if HARDWARE.get_device_type() == 'mici': if HARDWARE.get_device_type() == 'mici':
self._EE_THRESH11 = 0.75 self._PHONE_THRESH = 0.75
else: else:
self._EE_THRESH11 = 0.4 self._PHONE_THRESH = 0.4
self._EE_THRESH12 = 15.0 self._PHONE_THRESH2 = 15.0
self._EE_MAX_OFFSET1 = 0.06 self._PHONE_MAX_OFFSET = 0.06
self._EE_MIN_OFFSET1 = 0.025 self._PHONE_MIN_OFFSET = 0.025
self._POSE_PITCH_THRESHOLD = 0.3133 self._POSE_PITCH_THRESHOLD = 0.3133
self._POSE_PITCH_THRESHOLD_SLACK = 0.3237 self._POSE_PITCH_THRESHOLD_SLACK = 0.3237
@ -84,7 +84,7 @@ class DistractedType:
NOT_DISTRACTED = 0 NOT_DISTRACTED = 0
DISTRACTED_POSE = 1 << 0 DISTRACTED_POSE = 1 << 0
DISTRACTED_BLINK = 1 << 1 DISTRACTED_BLINK = 1 << 1
DISTRACTED_E2E = 1 << 2 DISTRACTED_PHONE = 1 << 2
class DriverPose: class DriverPose:
def __init__(self, max_trackable): def __init__(self, max_trackable):
@ -142,9 +142,9 @@ class DriverMonitoring:
self.wheelpos_learner = RunningStatFilter() self.wheelpos_learner = RunningStatFilter()
self.pose = DriverPose(self.settings._POSE_OFFSET_MAX_COUNT) self.pose = DriverPose(self.settings._POSE_OFFSET_MAX_COUNT)
self.blink = DriverBlink() self.blink = DriverBlink()
self.eev1 = 0. self.phone_prob = 0.
self.ee1_offseter = RunningStatFilter(max_trackable=self.settings._POSE_OFFSET_MAX_COUNT) self.phone_offseter = RunningStatFilter(max_trackable=self.settings._POSE_OFFSET_MAX_COUNT)
self.ee1_calibrated = False self.phone_calibrated = False
self.always_on = always_on self.always_on = always_on
self.distracted_types = [] self.distracted_types = []
@ -242,13 +242,13 @@ class DriverMonitoring:
if (self.blink.left + self.blink.right)*0.5 > self.settings._BLINK_THRESHOLD: if (self.blink.left + self.blink.right)*0.5 > self.settings._BLINK_THRESHOLD:
distracted_types.append(DistractedType.DISTRACTED_BLINK) distracted_types.append(DistractedType.DISTRACTED_BLINK)
if self.ee1_calibrated: if self.phone_calibrated:
ee1_dist = self.eev1 > max(min(self.ee1_offseter.filtered_stat.M, self.settings._EE_MAX_OFFSET1), self.settings._EE_MIN_OFFSET1) \ using_phone = self.phone_prob > max(min(self.phone_offseter.filtered_stat.M, self.settings._PHONE_MAX_OFFSET), self.settings._PHONE_MIN_OFFSET) \
* self.settings._EE_THRESH12 * self.settings._PHONE_THRESH2
else: else:
ee1_dist = self.eev1 > self.settings._EE_THRESH11 using_phone = self.phone_prob > self.settings._PHONE_THRESH
if ee1_dist: if using_phone:
distracted_types.append(DistractedType.DISTRACTED_E2E) distracted_types.append(DistractedType.DISTRACTED_PHONE)
return distracted_types return distracted_types
@ -267,8 +267,7 @@ class DriverMonitoring:
self.wheel_on_right = self.wheel_on_right_last self.wheel_on_right = self.wheel_on_right_last
driver_data = driver_state.rightDriverData if self.wheel_on_right else driver_state.leftDriverData driver_data = driver_state.rightDriverData if self.wheel_on_right else driver_state.leftDriverData
if not all(len(x) > 0 for x in (driver_data.faceOrientation, driver_data.facePosition, if not all(len(x) > 0 for x in (driver_data.faceOrientation, driver_data.facePosition,
driver_data.faceOrientationStd, driver_data.facePositionStd, driver_data.faceOrientationStd, driver_data.facePositionStd)):
driver_data.notReadyProb)):
return return
self.face_detected = driver_data.faceProb > self.settings._FACE_THRESHOLD self.face_detected = driver_data.faceProb > self.settings._FACE_THRESHOLD
@ -284,10 +283,10 @@ class DriverMonitoring:
* (driver_data.sunglassesProb < self.settings._SG_THRESHOLD) * (driver_data.sunglassesProb < self.settings._SG_THRESHOLD)
self.blink.right = driver_data.rightBlinkProb * (driver_data.rightEyeProb > self.settings._EYE_THRESHOLD) \ self.blink.right = driver_data.rightBlinkProb * (driver_data.rightEyeProb > self.settings._EYE_THRESHOLD) \
* (driver_data.sunglassesProb < self.settings._SG_THRESHOLD) * (driver_data.sunglassesProb < self.settings._SG_THRESHOLD)
self.eev1 = driver_data.notReadyProb[0] self.phone_prob = driver_data.phoneProb
self.distracted_types = self._get_distracted_types() self.distracted_types = self._get_distracted_types()
self.driver_distracted = (DistractedType.DISTRACTED_E2E in self.distracted_types or DistractedType.DISTRACTED_POSE in self.distracted_types self.driver_distracted = (DistractedType.DISTRACTED_PHONE in self.distracted_types or DistractedType.DISTRACTED_POSE in self.distracted_types
or DistractedType.DISTRACTED_BLINK in self.distracted_types) \ or DistractedType.DISTRACTED_BLINK in self.distracted_types) \
and driver_data.faceProb > self.settings._FACE_THRESHOLD and self.pose.low_std and driver_data.faceProb > self.settings._FACE_THRESHOLD and self.pose.low_std
self.driver_distraction_filter.update(self.driver_distracted) self.driver_distraction_filter.update(self.driver_distracted)
@ -297,11 +296,11 @@ class DriverMonitoring:
if self.face_detected and car_speed > self.settings._POSE_CALIB_MIN_SPEED and self.pose.low_std and (not op_engaged or not self.driver_distracted): if self.face_detected and car_speed > self.settings._POSE_CALIB_MIN_SPEED and self.pose.low_std and (not op_engaged or not self.driver_distracted):
self.pose.pitch_offseter.push_and_update(self.pose.pitch) self.pose.pitch_offseter.push_and_update(self.pose.pitch)
self.pose.yaw_offseter.push_and_update(self.pose.yaw) self.pose.yaw_offseter.push_and_update(self.pose.yaw)
self.ee1_offseter.push_and_update(self.eev1) self.phone_offseter.push_and_update(self.phone_prob)
self.pose.calibrated = self.pose.pitch_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT and \ self.pose.calibrated = self.pose.pitch_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT and \
self.pose.yaw_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT self.pose.yaw_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT
self.ee1_calibrated = self.ee1_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT self.phone_calibrated = self.phone_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT
if self.face_detected and not self.driver_distracted: if self.face_detected and not self.driver_distracted:
if model_std_max > self.settings._DCAM_UNCERTAIN_ALERT_THRESHOLD: if model_std_max > self.settings._DCAM_UNCERTAIN_ALERT_THRESHOLD:

@ -25,7 +25,7 @@ def make_msg(face_detected, distracted=False, model_uncertain=False):
ds.leftDriverData.faceOrientationStd = [1.*model_uncertain, 1.*model_uncertain, 1.*model_uncertain] ds.leftDriverData.faceOrientationStd = [1.*model_uncertain, 1.*model_uncertain, 1.*model_uncertain]
ds.leftDriverData.facePositionStd = [1.*model_uncertain, 1.*model_uncertain] ds.leftDriverData.facePositionStd = [1.*model_uncertain, 1.*model_uncertain]
# TODO: test both separately when e2e is used # TODO: test both separately when e2e is used
ds.leftDriverData.notReadyProb = [0., 0.] ds.leftDriverData.phoneProb = 0.
return ds return ds

@ -77,7 +77,7 @@ def generate_report(proposed, master, tmp, commit):
(lambda x: get_idx_if_non_empty(x.leftDriverData.faceProb), "leftDriverData.faceProb"), (lambda x: get_idx_if_non_empty(x.leftDriverData.faceProb), "leftDriverData.faceProb"),
(lambda x: get_idx_if_non_empty(x.leftDriverData.faceOrientation, 0), "leftDriverData.faceOrientation0"), (lambda x: get_idx_if_non_empty(x.leftDriverData.faceOrientation, 0), "leftDriverData.faceOrientation0"),
(lambda x: get_idx_if_non_empty(x.leftDriverData.leftBlinkProb), "leftDriverData.leftBlinkProb"), (lambda x: get_idx_if_non_empty(x.leftDriverData.leftBlinkProb), "leftDriverData.leftBlinkProb"),
(lambda x: get_idx_if_non_empty(x.leftDriverData.notReadyProb, 0), "leftDriverData.notReadyProb0"), (lambda x: get_idx_if_non_empty(x.leftDriverData.phoneProb), "leftDriverData.phoneProb"),
(lambda x: get_idx_if_non_empty(x.rightDriverData.faceProb), "rightDriverData.faceProb"), (lambda x: get_idx_if_non_empty(x.rightDriverData.faceProb), "rightDriverData.faceProb"),
], "driverStateV2") ], "driverStateV2")

Loading…
Cancel
Save