diff --git a/cereal/log.capnp b/cereal/log.capnp index 981cfd468f..86774b8d42 100644 --- a/cereal/log.capnp +++ b/cereal/log.capnp @@ -2166,7 +2166,8 @@ struct DriverStateV2 { leftBlinkProb @7 :Float32; rightBlinkProb @8 :Float32; sunglassesProb @9 :Float32; - notReadyProb @12 :List(Float32); + phoneProb @13 :Float32; + notReadyProbDEPRECATED @12 :List(Float32); occludedProbDEPRECATED @10 :Float32; readyProbDEPRECATED @11 :List(Float32); } diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index ae549f3a76..8b33a457f2 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -32,7 +32,7 @@ lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LI tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x] # Get model metadata -for model_name in ['driving_vision', 'driving_policy']: +for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']: fn = File(f"models/{model_name}").abspath script_files = [File(Dir("#selfdrive/modeld").File("get_model_metadata.py").abspath)] cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx' diff --git a/selfdrive/modeld/dmonitoringmodeld.py b/selfdrive/modeld/dmonitoringmodeld.py index 2851a3e7da..dc2de6f998 100755 --- a/selfdrive/modeld/dmonitoringmodeld.py +++ b/selfdrive/modeld/dmonitoringmodeld.py @@ -7,7 +7,6 @@ from tinygrad.dtype import dtypes import math import time import pickle -import ctypes import numpy as np from pathlib import Path @@ -16,47 +15,16 @@ from cereal.messaging import PubMaster, SubMaster from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf from openpilot.common.swaglog import cloudlog from openpilot.common.realtime import config_realtime_process -from openpilot.common.transformations.model import dmonitoringmodel_intrinsics, DM_INPUT_SIZE +from openpilot.common.transformations.model import dmonitoringmodel_intrinsics from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address -MODEL_WIDTH, MODEL_HEIGHT = DM_INPUT_SIZE -CALIB_LEN = 3 -FEATURE_LEN = 512 -OUTPUT_SIZE = 83 + FEATURE_LEN - PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld" SEND_RAW_PRED = os.getenv('SEND_RAW_PRED') MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl' - -# TODO: slice from meta -class DriverStateResult(ctypes.Structure): - _fields_ = [ - ("face_orientation", ctypes.c_float*3), - ("face_position", ctypes.c_float*3), - ("face_orientation_std", ctypes.c_float*3), - ("face_position_std", ctypes.c_float*3), - ("face_prob", ctypes.c_float), - ("_unused_a", ctypes.c_float*8), - ("left_eye_prob", ctypes.c_float), - ("_unused_b", ctypes.c_float*8), - ("right_eye_prob", ctypes.c_float), - ("left_blink_prob", ctypes.c_float), - ("right_blink_prob", ctypes.c_float), - ("sunglasses_prob", ctypes.c_float), - ("_unused_c", ctypes.c_float), - ("_unused_d", ctypes.c_float*4), - ("not_ready_prob", ctypes.c_float*2)] - - -class DMonitoringModelResult(ctypes.Structure): - _fields_ = [ - ("driver_state_lhd", DriverStateResult), - ("driver_state_rhd", DriverStateResult), - ("wheel_on_right_prob", ctypes.c_float), - ("features", ctypes.c_float*FEATURE_LEN)] +METADATA_PATH = Path(__file__).parent / 'models/dmonitoring_model_metadata.pkl' class ModelState: @@ -64,11 +32,14 @@ class ModelState: output: np.ndarray def __init__(self, cl_ctx): - assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float) + with open(METADATA_PATH, 'rb') as f: + model_metadata = pickle.load(f) + self.input_shapes = model_metadata['input_shapes'] + self.output_slices = model_metadata['output_slices'] self.frame = MonitoringModelFrame(cl_ctx) self.numpy_inputs = { - 'calib': np.zeros((1, CALIB_LEN), dtype=np.float32), + 'calib': np.zeros(self.input_shapes['calib'], dtype=np.float32), } self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()} @@ -84,9 +55,9 @@ class ModelState: if TICI: # The imgs tensors are backed by opencl memory, only need init once if 'input_img' not in self.tensor_inputs: - self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, (1, MODEL_WIDTH*MODEL_HEIGHT), dtype=dtypes.uint8) + self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, self.input_shapes['input_img'], dtype=dtypes.uint8) else: - self.tensor_inputs['input_img'] = Tensor(self.frame.buffer_from_cl(input_img_cl).reshape((1, MODEL_WIDTH*MODEL_HEIGHT)), dtype=dtypes.uint8).realize() + self.tensor_inputs['input_img'] = Tensor(self.frame.buffer_from_cl(input_img_cl).reshape(self.input_shapes['input_img']), dtype=dtypes.uint8).realize() output = self.model_run(**self.tensor_inputs).contiguous().realize().uop.base.buffer.numpy() @@ -95,31 +66,31 @@ class ModelState: return output, t2 - t1 -def fill_driver_state(msg, ds_result: DriverStateResult): - msg.faceOrientation = list(ds_result.face_orientation) - msg.faceOrientationStd = [math.exp(x) for x in ds_result.face_orientation_std] - msg.facePosition = list(ds_result.face_position[:2]) - msg.facePositionStd = [math.exp(x) for x in ds_result.face_position_std[:2]] - msg.faceProb = float(sigmoid(ds_result.face_prob)) - msg.leftEyeProb = float(sigmoid(ds_result.left_eye_prob)) - msg.rightEyeProb = float(sigmoid(ds_result.right_eye_prob)) - msg.leftBlinkProb = float(sigmoid(ds_result.left_blink_prob)) - msg.rightBlinkProb = float(sigmoid(ds_result.right_blink_prob)) - msg.sunglassesProb = float(sigmoid(ds_result.sunglasses_prob)) - msg.notReadyProb = [float(sigmoid(x)) for x in ds_result.not_ready_prob] - - -def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts: int, execution_time: float, gpu_execution_time: float): - model_result = ctypes.cast(model_output.ctypes.data, ctypes.POINTER(DMonitoringModelResult)).contents +def fill_driver_state(msg, model_output, output_slices, ds_suffix): + face_descs = model_output[output_slices[f'face_descs_{ds_suffix}']] + face_descs_std = face_descs[-6:] + msg.faceOrientation = [float(x) for x in face_descs[:3]] + msg.faceOrientationStd = [math.exp(x) for x in face_descs_std[:3]] + msg.facePosition = [float(x) for x in face_descs[3:5]] + msg.facePositionStd = [math.exp(x) for x in face_descs_std[3:5]] + msg.faceProb = float(sigmoid(model_output[output_slices[f'face_prob_{ds_suffix}']][0])) + msg.leftEyeProb = float(sigmoid(model_output[output_slices[f'left_eye_prob_{ds_suffix}']][0])) + msg.rightEyeProb = float(sigmoid(model_output[output_slices[f'right_eye_prob_{ds_suffix}']][0])) + msg.leftBlinkProb = float(sigmoid(model_output[output_slices[f'left_blink_prob_{ds_suffix}']][0])) + msg.rightBlinkProb = float(sigmoid(model_output[output_slices[f'right_blink_prob_{ds_suffix}']][0])) + msg.sunglassesProb = float(sigmoid(model_output[output_slices[f'sunglasses_prob_{ds_suffix}']][0])) + msg.phoneProb = float(sigmoid(model_output[output_slices[f'using_phone_prob_{ds_suffix}']][0])) + +def get_driverstate_packet(model_output: np.ndarray, output_slices: dict[str, slice], frame_id: int, location_ts: int, exec_time: float, gpu_exec_time: float): msg = messaging.new_message('driverStateV2', valid=True) ds = msg.driverStateV2 ds.frameId = frame_id - ds.modelExecutionTime = execution_time - ds.gpuExecutionTime = gpu_execution_time - ds.wheelOnRightProb = float(sigmoid(model_result.wheel_on_right_prob)) + ds.modelExecutionTime = exec_time + ds.gpuExecutionTime = gpu_exec_time + ds.wheelOnRightProb = float(sigmoid(model_output[output_slices['wheel_on_right']][0])) ds.rawPredictions = model_output.tobytes() if SEND_RAW_PRED else b'' - fill_driver_state(ds.leftDriverData, model_result.driver_state_lhd) - fill_driver_state(ds.rightDriverData, model_result.driver_state_rhd) + fill_driver_state(ds.leftDriverData, model_output, output_slices, 'lhd') + fill_driver_state(ds.rightDriverData, model_output, output_slices, 'rhd') return msg @@ -140,7 +111,7 @@ def main(): sm = SubMaster(["liveCalibration"]) pm = PubMaster(["driverStateV2"]) - calib = np.zeros(CALIB_LEN, dtype=np.float32) + calib = np.zeros(model.numpy_inputs['calib'].size, dtype=np.float32) model_transform = None while True: @@ -160,7 +131,8 @@ def main(): model_output, gpu_execution_time = model.run(buf, calib, model_transform) t2 = time.perf_counter() - pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time)) + msg = get_driverstate_packet(model_output, model.output_slices, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time) + pm.send("driverStateV2", msg) if __name__ == "__main__": diff --git a/selfdrive/modeld/models/dmonitoring_model.onnx b/selfdrive/modeld/models/dmonitoring_model.onnx index 1b6a8c3e93..9b1c4a1834 100644 --- a/selfdrive/modeld/models/dmonitoring_model.onnx +++ b/selfdrive/modeld/models/dmonitoring_model.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a53626ab84757813fb16a1441704f2ae7192bef88c331bdc2415be6981d204f -size 7191776 +oid sha256:3446bf8b22e50e47669a25bf32460ae8baf8547037f346753e19ecbfcf6d4e59 +size 6954368 diff --git a/selfdrive/monitoring/helpers.py b/selfdrive/monitoring/helpers.py index 02d8ff5c71..f405eba537 100644 --- a/selfdrive/monitoring/helpers.py +++ b/selfdrive/monitoring/helpers.py @@ -37,12 +37,12 @@ class DRIVER_MONITOR_SETTINGS: self._BLINK_THRESHOLD = 0.865 if HARDWARE.get_device_type() == 'mici': - self._EE_THRESH11 = 0.75 + self._PHONE_THRESH = 0.75 else: - self._EE_THRESH11 = 0.4 - self._EE_THRESH12 = 15.0 - self._EE_MAX_OFFSET1 = 0.06 - self._EE_MIN_OFFSET1 = 0.025 + self._PHONE_THRESH = 0.4 + self._PHONE_THRESH2 = 15.0 + self._PHONE_MAX_OFFSET = 0.06 + self._PHONE_MIN_OFFSET = 0.025 self._POSE_PITCH_THRESHOLD = 0.3133 self._POSE_PITCH_THRESHOLD_SLACK = 0.3237 @@ -84,7 +84,7 @@ class DistractedType: NOT_DISTRACTED = 0 DISTRACTED_POSE = 1 << 0 DISTRACTED_BLINK = 1 << 1 - DISTRACTED_E2E = 1 << 2 + DISTRACTED_PHONE = 1 << 2 class DriverPose: def __init__(self, max_trackable): @@ -142,9 +142,9 @@ class DriverMonitoring: self.wheelpos_learner = RunningStatFilter() self.pose = DriverPose(self.settings._POSE_OFFSET_MAX_COUNT) self.blink = DriverBlink() - self.eev1 = 0. - self.ee1_offseter = RunningStatFilter(max_trackable=self.settings._POSE_OFFSET_MAX_COUNT) - self.ee1_calibrated = False + self.phone_prob = 0. + self.phone_offseter = RunningStatFilter(max_trackable=self.settings._POSE_OFFSET_MAX_COUNT) + self.phone_calibrated = False self.always_on = always_on self.distracted_types = [] @@ -242,13 +242,13 @@ class DriverMonitoring: if (self.blink.left + self.blink.right)*0.5 > self.settings._BLINK_THRESHOLD: distracted_types.append(DistractedType.DISTRACTED_BLINK) - if self.ee1_calibrated: - ee1_dist = self.eev1 > max(min(self.ee1_offseter.filtered_stat.M, self.settings._EE_MAX_OFFSET1), self.settings._EE_MIN_OFFSET1) \ - * self.settings._EE_THRESH12 + if self.phone_calibrated: + using_phone = self.phone_prob > max(min(self.phone_offseter.filtered_stat.M, self.settings._PHONE_MAX_OFFSET), self.settings._PHONE_MIN_OFFSET) \ + * self.settings._PHONE_THRESH2 else: - ee1_dist = self.eev1 > self.settings._EE_THRESH11 - if ee1_dist: - distracted_types.append(DistractedType.DISTRACTED_E2E) + using_phone = self.phone_prob > self.settings._PHONE_THRESH + if using_phone: + distracted_types.append(DistractedType.DISTRACTED_PHONE) return distracted_types @@ -267,8 +267,7 @@ class DriverMonitoring: self.wheel_on_right = self.wheel_on_right_last driver_data = driver_state.rightDriverData if self.wheel_on_right else driver_state.leftDriverData if not all(len(x) > 0 for x in (driver_data.faceOrientation, driver_data.facePosition, - driver_data.faceOrientationStd, driver_data.facePositionStd, - driver_data.notReadyProb)): + driver_data.faceOrientationStd, driver_data.facePositionStd)): return self.face_detected = driver_data.faceProb > self.settings._FACE_THRESHOLD @@ -284,10 +283,10 @@ class DriverMonitoring: * (driver_data.sunglassesProb < self.settings._SG_THRESHOLD) self.blink.right = driver_data.rightBlinkProb * (driver_data.rightEyeProb > self.settings._EYE_THRESHOLD) \ * (driver_data.sunglassesProb < self.settings._SG_THRESHOLD) - self.eev1 = driver_data.notReadyProb[0] + self.phone_prob = driver_data.phoneProb self.distracted_types = self._get_distracted_types() - self.driver_distracted = (DistractedType.DISTRACTED_E2E in self.distracted_types or DistractedType.DISTRACTED_POSE in self.distracted_types + self.driver_distracted = (DistractedType.DISTRACTED_PHONE in self.distracted_types or DistractedType.DISTRACTED_POSE in self.distracted_types or DistractedType.DISTRACTED_BLINK in self.distracted_types) \ and driver_data.faceProb > self.settings._FACE_THRESHOLD and self.pose.low_std self.driver_distraction_filter.update(self.driver_distracted) @@ -297,11 +296,11 @@ class DriverMonitoring: if self.face_detected and car_speed > self.settings._POSE_CALIB_MIN_SPEED and self.pose.low_std and (not op_engaged or not self.driver_distracted): self.pose.pitch_offseter.push_and_update(self.pose.pitch) self.pose.yaw_offseter.push_and_update(self.pose.yaw) - self.ee1_offseter.push_and_update(self.eev1) + self.phone_offseter.push_and_update(self.phone_prob) self.pose.calibrated = self.pose.pitch_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT and \ self.pose.yaw_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT - self.ee1_calibrated = self.ee1_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT + self.phone_calibrated = self.phone_offseter.filtered_stat.n > self.settings._POSE_OFFSET_MIN_COUNT if self.face_detected and not self.driver_distracted: if model_std_max > self.settings._DCAM_UNCERTAIN_ALERT_THRESHOLD: diff --git a/selfdrive/monitoring/test_monitoring.py b/selfdrive/monitoring/test_monitoring.py index 1f8babe029..67234550f7 100644 --- a/selfdrive/monitoring/test_monitoring.py +++ b/selfdrive/monitoring/test_monitoring.py @@ -25,7 +25,7 @@ def make_msg(face_detected, distracted=False, model_uncertain=False): ds.leftDriverData.faceOrientationStd = [1.*model_uncertain, 1.*model_uncertain, 1.*model_uncertain] ds.leftDriverData.facePositionStd = [1.*model_uncertain, 1.*model_uncertain] # TODO: test both separately when e2e is used - ds.leftDriverData.notReadyProb = [0., 0.] + ds.leftDriverData.phoneProb = 0. return ds diff --git a/selfdrive/test/process_replay/model_replay.py b/selfdrive/test/process_replay/model_replay.py index 59b8cf8250..9ba599bac9 100755 --- a/selfdrive/test/process_replay/model_replay.py +++ b/selfdrive/test/process_replay/model_replay.py @@ -77,7 +77,7 @@ def generate_report(proposed, master, tmp, commit): (lambda x: get_idx_if_non_empty(x.leftDriverData.faceProb), "leftDriverData.faceProb"), (lambda x: get_idx_if_non_empty(x.leftDriverData.faceOrientation, 0), "leftDriverData.faceOrientation0"), (lambda x: get_idx_if_non_empty(x.leftDriverData.leftBlinkProb), "leftDriverData.leftBlinkProb"), - (lambda x: get_idx_if_non_empty(x.leftDriverData.notReadyProb, 0), "leftDriverData.notReadyProb0"), + (lambda x: get_idx_if_non_empty(x.leftDriverData.phoneProb), "leftDriverData.phoneProb"), (lambda x: get_idx_if_non_empty(x.rightDriverData.faceProb), "rightDriverData.faceProb"), ], "driverStateV2")