diff --git a/selfdrive/modeld/constants.py b/selfdrive/modeld/constants.py index 82161a5b67..bf74c2d1a0 100644 --- a/selfdrive/modeld/constants.py +++ b/selfdrive/modeld/constants.py @@ -15,7 +15,8 @@ class ModelConstants: # model inputs constants MODEL_FREQ = 20 FEATURE_LEN = 512 - HISTORY_BUFFER_LEN = 99 + FULL_HISTORY_BUFFER_LEN = 99 + HISTORY_BUFFER_LEN = 24 DESIRE_LEN = 8 TRAFFIC_CONVENTION_LEN = 2 LAT_PLANNER_STATE_LEN = 4 diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py index abaaf299f7..f8a4b7fe0d 100755 --- a/selfdrive/modeld/modeld.py +++ b/selfdrive/modeld/modeld.py @@ -34,6 +34,7 @@ MODEL_PATHS = { METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl' + class FrameMeta: frame_id: int = 0 timestamp_sof: int = 0 @@ -55,6 +56,11 @@ class ModelState: self.frame = ModelFrame(context) self.wide_frame = ModelFrame(context) self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32) + self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32) + self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32) + self.prev_desired_curv_20hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32) + + # img buffers are managed in openCL transform code self.inputs = { 'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32), 'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32), @@ -87,14 +93,16 @@ class ModelState: inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None: # Model decides when action is completed, so desire input is just a pulse triggered on rising edge inputs['desire'][0] = 0 - self.inputs['desire'][:-ModelConstants.DESIRE_LEN] = self.inputs['desire'][ModelConstants.DESIRE_LEN:] - self.inputs['desire'][-ModelConstants.DESIRE_LEN:] = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0) + new_desire = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0) self.prev_desire[:] = inputs['desire'] + self.desire_20Hz[:-1] = self.desire_20Hz[1:] + self.desire_20Hz[-1] = new_desire + self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten() + self.inputs['traffic_convention'][:] = inputs['traffic_convention'] self.inputs['lateral_control_params'][:] = inputs['lateral_control_params'] - # if getCLBuffer is not None, frame will be None self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs"))) self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs"))) @@ -104,10 +112,16 @@ class ModelState: self.model.execute() outputs = self.parser.parse_outputs(self.slice_outputs(self.output)) - self.inputs['features_buffer'][:-ModelConstants.FEATURE_LEN] = self.inputs['features_buffer'][ModelConstants.FEATURE_LEN:] - self.inputs['features_buffer'][-ModelConstants.FEATURE_LEN:] = outputs['hidden_state'][0, :] - self.inputs['prev_desired_curv'][:-ModelConstants.PREV_DESIRED_CURV_LEN] = self.inputs['prev_desired_curv'][ModelConstants.PREV_DESIRED_CURV_LEN:] - self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = outputs['desired_curvature'][0, :] + self.full_features_20Hz[:-1] = self.full_features_20Hz[1:] + self.full_features_20Hz[-1] = outputs['hidden_state'][0, :] + + self.prev_desired_curv_20hz[:-1] = self.prev_desired_curv_20hz[1:] + self.prev_desired_curv_20hz[-1] = outputs['desired_curvature'][0, :] + + idxs = np.arange(-4,-100,-4)[::-1] + self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten() + # TODO model only uses last value now, once that changes we need to input strided action history buffer + self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = 0. * self.prev_desired_curv_20hz[-4, :] return outputs @@ -173,7 +187,6 @@ def main(demo=False): CP = convert_to_capnp(get_demo_car_params()) else: CP = messaging.log_from_bytes(params.get("CarParams", block=True), car.CarParams) - cloudlog.info("modeld got CarParams: %s", CP.carName) # TODO this needs more thought, use .2s extra for now to estimate other delays diff --git a/selfdrive/modeld/models/commonmodel.cc b/selfdrive/modeld/models/commonmodel.cc index a188a4ebd3..e8a5a7ed52 100644 --- a/selfdrive/modeld/models/commonmodel.cc +++ b/selfdrive/modeld/models/commonmodel.cc @@ -13,7 +13,10 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) { y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err)); u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err)); v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err)); - net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size_bytes, NULL, &err)); + img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err)); + region.origin = 4 * frame_size_bytes; + region.size = frame_size_bytes; + last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err)); transform_init(&transform, context, device_id); loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT); @@ -24,15 +27,18 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset, y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection); - loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl); + for (int i = 0; i < 4; i++) { + CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr)); + } + loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl); if (output == NULL) { - std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], frame_size_bytes); - CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr)); + CL_CHECK(clEnqueueReadBuffer(q, img_buffer_20hz_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[0], 0, nullptr, nullptr)); + CL_CHECK(clEnqueueReadBuffer(q, last_img_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr)); clFinish(q); return &input_frames[0]; } else { - copy_queue(&loadyuv, q, *output, *output, frame_size_bytes, 0, frame_size_bytes); - copy_queue(&loadyuv, q, net_input_cl, *output, 0, frame_size_bytes, frame_size_bytes); + copy_queue(&loadyuv, q, img_buffer_20hz_cl, *output, 0, 0, frame_size_bytes); + copy_queue(&loadyuv, q, last_img_cl, *output, 0, frame_size_bytes, frame_size_bytes); // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready. clFinish(q); @@ -43,7 +49,8 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i ModelFrame::~ModelFrame() { transform_destroy(&transform); loadyuv_destroy(&loadyuv); - CL_CHECK(clReleaseMemObject(net_input_cl)); + CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl)); + CL_CHECK(clReleaseMemObject(last_img_cl)); CL_CHECK(clReleaseMemObject(v_cl)); CL_CHECK(clReleaseMemObject(u_cl)); CL_CHECK(clReleaseMemObject(y_cl)); diff --git a/selfdrive/modeld/models/commonmodel.h b/selfdrive/modeld/models/commonmodel.h index fb527fc7a1..1c7360f159 100644 --- a/selfdrive/modeld/models/commonmodel.h +++ b/selfdrive/modeld/models/commonmodel.h @@ -32,6 +32,7 @@ private: Transform transform; LoadYUVState loadyuv; cl_command_queue q; - cl_mem y_cl, u_cl, v_cl, net_input_cl; + cl_mem y_cl, u_cl, v_cl, img_buffer_20hz_cl, last_img_cl; + cl_buffer_region region; std::unique_ptr input_frames; -}; +}; \ No newline at end of file diff --git a/selfdrive/modeld/models/supercombo.onnx b/selfdrive/modeld/models/supercombo.onnx index 100fd14c98..49b51c182e 100644 --- a/selfdrive/modeld/models/supercombo.onnx +++ b/selfdrive/modeld/models/supercombo.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21c88ca8a3de59fb11dc3e5888476f6bb627f3647eb0d199680d598e8bf31c0c -size 62486469 +oid sha256:dd55319c8e3e9ac120f2b1bb1131cb67018669dfc0f7ebd31c27cc6de3e9f959 +size 50309976 diff --git a/selfdrive/test/process_replay/model_replay_ref_commit b/selfdrive/test/process_replay/model_replay_ref_commit index 5fb62db139..f8fd517548 100644 --- a/selfdrive/test/process_replay/model_replay_ref_commit +++ b/selfdrive/test/process_replay/model_replay_ref_commit @@ -1 +1 @@ -56743d36006d4312544ace7f53491727aa7ac302 +05b1cb87e32f280e46e0f45bbd6d76d5fd3f57a7