From c8264cbf2c99399794ed9473c49de19958f99eb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Harald=20Sch=C3=A4fer?= <harald.the.engineer@gmail.com>
Date: Mon, 6 Jan 2025 20:52:36 -0800
Subject: [PATCH] Notre Dame model in tinygrad (#34324)

* release model: 6f23a03f-486b-4d3e-a314-19d149644c7c/700

* old style model in tinygrad

* fix desire

* tg hack

* 20Hz

* no gas probs

* No gas here

* better indexing

---------

Co-authored-by: Yassine Yousfi <yyousfi1@binghamton.edu>
---
 selfdrive/modeld/constants.py                 | 20 ++++++-------
 selfdrive/modeld/fill_model_msg.py            | 20 +++----------
 selfdrive/modeld/modeld.py                    | 28 +++++++++++--------
 selfdrive/modeld/models/commonmodel.cc        |  6 ++--
 selfdrive/modeld/models/supercombo.onnx       |  4 +--
 selfdrive/modeld/parse_model_outputs.py       |  2 ++
 selfdrive/test/process_replay/model_replay.py |  2 +-
 tinygrad_repo                                 |  2 +-
 8 files changed, 38 insertions(+), 46 deletions(-)

diff --git a/selfdrive/modeld/constants.py b/selfdrive/modeld/constants.py
index bf74c2d1a0..beb0d22ef5 100644
--- a/selfdrive/modeld/constants.py
+++ b/selfdrive/modeld/constants.py
@@ -16,7 +16,6 @@ class ModelConstants:
   MODEL_FREQ = 20
   FEATURE_LEN = 512
   FULL_HISTORY_BUFFER_LEN = 99
-  HISTORY_BUFFER_LEN = 24
   DESIRE_LEN = 8
   TRAFFIC_CONVENTION_LEN = 2
   LAT_PLANNER_STATE_LEN = 4
@@ -73,14 +72,13 @@ class Plan:
 class Meta:
   ENGAGED = slice(0, 1)
   # next 2, 4, 6, 8, 10 seconds
-  GAS_DISENGAGE = slice(1, 31, 6)
-  BRAKE_DISENGAGE = slice(2, 31, 6)
-  STEER_OVERRIDE = slice(3, 31, 6)
-  HARD_BRAKE_3 = slice(4, 31, 6)
-  HARD_BRAKE_4 = slice(5, 31, 6)
-  HARD_BRAKE_5 = slice(6, 31, 6)
+  GAS_DISENGAGE = slice(1, 36, 7)
+  BRAKE_DISENGAGE = slice(2, 36, 7)
+  STEER_OVERRIDE = slice(3, 36, 7)
+  HARD_BRAKE_3 = slice(4, 36, 7)
+  HARD_BRAKE_4 = slice(5, 36, 7)
+  HARD_BRAKE_5 = slice(6, 36, 7)
+  GAS_PRESS = slice(7, 36, 7)
   # next 0, 2, 4, 6, 8, 10 seconds
-  GAS_PRESS = slice(31, 55, 4)
-  BRAKE_PRESS = slice(32, 55, 4)
-  LEFT_BLINKER = slice(33, 55, 4)
-  RIGHT_BLINKER = slice(34, 55, 4)
+  LEFT_BLINKER = slice(36, 48, 2)
+  RIGHT_BLINKER = slice(37, 48, 2)
diff --git a/selfdrive/modeld/fill_model_msg.py b/selfdrive/modeld/fill_model_msg.py
index a13d632aa6..3056da9374 100644
--- a/selfdrive/modeld/fill_model_msg.py
+++ b/selfdrive/modeld/fill_model_msg.py
@@ -3,21 +3,11 @@ import capnp
 import numpy as np
 from cereal import log
 from openpilot.selfdrive.modeld.constants import ModelConstants, Plan, Meta
-from openpilot.selfdrive.controls.lib.drive_helpers import MIN_SPEED
 
 SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
 
 ConfidenceClass = log.ModelDataV2.ConfidenceClass
 
-def curv_from_psis(psi_target, psi_rate, vego, delay):
-  vego = np.clip(vego, MIN_SPEED, np.inf)
-  curv_from_psi = psi_target / (vego * delay)  # epsilon to prevent divide-by-zero
-  return 2*curv_from_psi - psi_rate / vego
-
-def get_curvature_from_plan(plan, vego, delay):
-  psi_target = np.interp(delay, ModelConstants.T_IDXS, plan[:, Plan.T_FROM_CURRENT_EULER][:, 2])
-  psi_rate = plan[:, Plan.ORIENTATION_RATE][0, 2]
-  return curv_from_psis(psi_target, psi_rate, vego, delay)
 
 class PublishState:
   def __init__(self):
@@ -75,8 +65,6 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
   extended_msg.valid = valid
   base_msg.valid = valid
 
-  desired_curv = float(get_curvature_from_plan(net_output_data['plan'][0], v_ego, delay))
-
   driving_model_data = base_msg.drivingModelData
 
   driving_model_data.frameId = vipc_frame_id
@@ -85,7 +73,7 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
   driving_model_data.modelExecutionTime = model_execution_time
 
   action = driving_model_data.action
-  action.desiredCurvature = desired_curv
+  action.desiredCurvature = float(net_output_data['desired_curvature'][0,0])
 
   modelV2 = extended_msg.modelV2
   modelV2.frameId = vipc_frame_id
@@ -120,7 +108,7 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
 
   # lateral planning
   action = modelV2.action
-  action.desiredCurvature = desired_curv
+  action.desiredCurvature = float(net_output_data['desired_curvature'][0,0])
 
   # times at X_IDXS according to model plan
   PLAN_T_IDXS = [np.nan] * ModelConstants.IDX_N
@@ -181,8 +169,8 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
   disengage_predictions.brake3MetersPerSecondSquaredProbs = net_output_data['meta'][0,Meta.HARD_BRAKE_3].tolist()
   disengage_predictions.brake4MetersPerSecondSquaredProbs = net_output_data['meta'][0,Meta.HARD_BRAKE_4].tolist()
   disengage_predictions.brake5MetersPerSecondSquaredProbs = net_output_data['meta'][0,Meta.HARD_BRAKE_5].tolist()
-  disengage_predictions.gasPressProbs = net_output_data['meta'][0,Meta.GAS_PRESS].tolist()
-  disengage_predictions.brakePressProbs = net_output_data['meta'][0,Meta.BRAKE_PRESS].tolist()
+  #disengage_predictions.gasPressProbs = net_output_data['meta'][0,Meta.GAS_PRESS].tolist()
+  #disengage_predictions.brakePressProbs = net_output_data['meta'][0,Meta.BRAKE_PRESS].tolist()
 
   publish_state.prev_brake_5ms2_probs[:-1] = publish_state.prev_brake_5ms2_probs[1:]
   publish_state.prev_brake_5ms2_probs[-1] = net_output_data['meta'][0,Meta.HARD_BRAKE_5][0]
diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py
index 49f8c483f0..5783f6aa8e 100755
--- a/selfdrive/modeld/modeld.py
+++ b/selfdrive/modeld/modeld.py
@@ -59,14 +59,14 @@ class ModelState:
   def __init__(self, context: CLContext):
     self.frames = {'input_imgs': DrivingModelFrame(context), 'big_input_imgs': DrivingModelFrame(context)}
     self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
-    self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
-    self.desire_20Hz =  np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
 
     # img buffers are managed in openCL transform code
     self.numpy_inputs = {
-      'desire': np.zeros((1, (ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float32),
+      'desire': np.zeros((1, (ModelConstants.FULL_HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float32),
       'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float32),
-      'features_buffer': np.zeros((1, ModelConstants.HISTORY_BUFFER_LEN,  ModelConstants.FEATURE_LEN), dtype=np.float32),
+      'lateral_control_params': np.zeros((1, ModelConstants.LATERAL_CONTROL_PARAMS_LEN), dtype=np.float32),
+      'prev_desired_curv': np.zeros((1, (ModelConstants.FULL_HISTORY_BUFFER_LEN+1), ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32),
+      'features_buffer': np.zeros((1, ModelConstants.FULL_HISTORY_BUFFER_LEN,  ModelConstants.FEATURE_LEN), dtype=np.float32),
     }
 
     with open(METADATA_PATH, 'rb') as f:
@@ -98,11 +98,11 @@ class ModelState:
     new_desire = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
     self.prev_desire[:] = inputs['desire']
 
-    self.desire_20Hz[:-1] = self.desire_20Hz[1:]
-    self.desire_20Hz[-1] = new_desire
-    self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape((1,25,4,-1)).max(axis=2)
+    self.numpy_inputs['desire'][0,:-1] = self.numpy_inputs['desire'][0,1:]
+    self.numpy_inputs['desire'][0,-1] = new_desire
 
     self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
+    self.numpy_inputs['lateral_control_params'][:] = inputs['lateral_control_params']
     imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
                'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}
 
@@ -113,7 +113,7 @@ class ModelState:
           self.tensor_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtypes.uint8)
     else:
       for key in imgs_cl:
-        self.numpy_inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])
+        self.numpy_inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key]).astype(dtype=np.float32)
 
     if prepare_only:
       return None
@@ -125,11 +125,13 @@ class ModelState:
 
     outputs = self.parser.parse_outputs(self.slice_outputs(self.output))
 
-    self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
-    self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
+    self.numpy_inputs['features_buffer'][0,:-1] = self.numpy_inputs['features_buffer'][0,1:]
+    self.numpy_inputs['features_buffer'][0,-1] = outputs['hidden_state'][0, :]
 
-    idxs = np.arange(-4,-100,-4)[::-1]
-    self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[idxs]
+
+    # TODO model only uses last value now
+    self.numpy_inputs['prev_desired_curv'][0,:-1] = self.numpy_inputs['prev_desired_curv'][0,1:]
+    self.numpy_inputs['prev_desired_curv'][0,-1,:] = outputs['desired_curvature'][0, :]
     return outputs
 
 
@@ -240,6 +242,7 @@ def main(demo=False):
     is_rhd = sm["driverMonitoringState"].isRHD
     frame_id = sm["roadCameraState"].frameId
     v_ego = max(sm["carState"].vEgo, 0.)
+    lateral_control_params = np.array([v_ego, steer_delay], dtype=np.float32)
     if sm.updated["liveCalibration"] and sm.seen['roadCameraState'] and sm.seen['deviceState']:
       device_from_calib_euler = np.array(sm["liveCalibration"].rpyCalib, dtype=np.float32)
       dc = DEVICE_CAMERAS[(str(sm['deviceState'].deviceType), str(sm['roadCameraState'].sensor))]
@@ -270,6 +273,7 @@ def main(demo=False):
     inputs:dict[str, np.ndarray] = {
       'desire': vec_desire,
       'traffic_convention': traffic_convention,
+      'lateral_control_params': lateral_control_params,
       }
 
     mt1 = time.perf_counter()
diff --git a/selfdrive/modeld/models/commonmodel.cc b/selfdrive/modeld/models/commonmodel.cc
index ad2620c7b4..9973d18588 100644
--- a/selfdrive/modeld/models/commonmodel.cc
+++ b/selfdrive/modeld/models/commonmodel.cc
@@ -8,8 +8,8 @@
 DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
   input_frames = std::make_unique<uint8_t[]>(buf_size);
   input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
-  img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
-  region.origin = 4 * frame_size_bytes;
+  img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 2*frame_size_bytes, NULL, &err));
+  region.origin = 1 * frame_size_bytes;
   region.size = frame_size_bytes;
   last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &err));
 
@@ -20,7 +20,7 @@ DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context)
 cl_mem* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
   run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
 
-  for (int i = 0; i < 4; i++) {
+  for (int i = 0; i < 1; i++) {
     CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
   }
   loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
diff --git a/selfdrive/modeld/models/supercombo.onnx b/selfdrive/modeld/models/supercombo.onnx
index 777aa8aa7e..2e1a60a23e 100644
--- a/selfdrive/modeld/models/supercombo.onnx
+++ b/selfdrive/modeld/models/supercombo.onnx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72d3d6f8d3c98f5431ec86be77b6350d7d4f43c25075c0106f1d1e7ec7c77668
-size 49096168
+oid sha256:39786068cae1ed8c0dc34ef80c281dfcc67ed18a50e06b90765c49bcfdbf7db4
+size 51453312
diff --git a/selfdrive/modeld/parse_model_outputs.py b/selfdrive/modeld/parse_model_outputs.py
index b699c5fd13..4367e9db8a 100644
--- a/selfdrive/modeld/parse_model_outputs.py
+++ b/selfdrive/modeld/parse_model_outputs.py
@@ -96,6 +96,8 @@ class Parser:
                    out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH))
     if 'lat_planner_solution' in outs:
       self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH))
+    if 'desired_curvature' in outs:
+      self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,))
     for k in ['lead_prob', 'lane_lines_prob', 'meta']:
       self.parse_binary_crossentropy(k, outs)
     self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,))
diff --git a/selfdrive/test/process_replay/model_replay.py b/selfdrive/test/process_replay/model_replay.py
index 94803643e0..f406b79e10 100755
--- a/selfdrive/test/process_replay/model_replay.py
+++ b/selfdrive/test/process_replay/model_replay.py
@@ -57,7 +57,7 @@ def generate_report(proposed, master, tmp, commit):
                      (lambda x: x.action.desiredCurvature, "desiredCurvature"),
                      (lambda x: x.leadsV3[0].x[0], "leadsV3.x"),
                      (lambda x: x.laneLines[1].y[0], "laneLines.y"),
-                     (lambda x: x.meta.disengagePredictions.gasPressProbs[1], "gasPressProbs")
+                     #(lambda x: x.meta.disengagePredictions.gasPressProbs[1], "gasPressProbs")
                     ], "modelV2")
   DriverStateV2_Plots = zl([
                      (lambda x: x.wheelOnRightProb, "wheelOnRightProb"),
diff --git a/tinygrad_repo b/tinygrad_repo
index c18307e749..2619a86d2a 160000
--- a/tinygrad_repo
+++ b/tinygrad_repo
@@ -1 +1 @@
-Subproject commit c18307e7494054745389289d55686e81cba88db5
+Subproject commit 2619a86d2a6157c12cc9b1773b4a03cf968a1309