MLSIM trained model V0 (#33676)

* squash f7343ba0-3f7d-4776-9819-2219b8d2d667/270

* fix broken ref

* Model replay ref commit
pull/33690/head
Harald Schäfer 7 months ago committed by GitHub
parent 6c227a66a2
commit e442425c9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      selfdrive/modeld/constants.py
  2. 29
      selfdrive/modeld/modeld.py
  3. 21
      selfdrive/modeld/models/commonmodel.cc
  4. 5
      selfdrive/modeld/models/commonmodel.h
  5. 4
      selfdrive/modeld/models/supercombo.onnx
  6. 2
      selfdrive/test/process_replay/model_replay_ref_commit

@ -15,7 +15,8 @@ class ModelConstants:
# model inputs constants
MODEL_FREQ = 20
FEATURE_LEN = 512
HISTORY_BUFFER_LEN = 99
FULL_HISTORY_BUFFER_LEN = 99
HISTORY_BUFFER_LEN = 24
DESIRE_LEN = 8
TRAFFIC_CONVENTION_LEN = 2
LAT_PLANNER_STATE_LEN = 4

@ -34,6 +34,7 @@ MODEL_PATHS = {
METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'
class FrameMeta:
frame_id: int = 0
timestamp_sof: int = 0
@ -55,6 +56,11 @@ class ModelState:
self.frame = ModelFrame(context)
self.wide_frame = ModelFrame(context)
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
self.prev_desired_curv_20hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32)
# img buffers are managed in openCL transform code
self.inputs = {
'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32),
@ -87,14 +93,16 @@ class ModelState:
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
inputs['desire'][0] = 0
self.inputs['desire'][:-ModelConstants.DESIRE_LEN] = self.inputs['desire'][ModelConstants.DESIRE_LEN:]
self.inputs['desire'][-ModelConstants.DESIRE_LEN:] = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
new_desire = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
self.prev_desire[:] = inputs['desire']
self.desire_20Hz[:-1] = self.desire_20Hz[1:]
self.desire_20Hz[-1] = new_desire
self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten()
self.inputs['traffic_convention'][:] = inputs['traffic_convention']
self.inputs['lateral_control_params'][:] = inputs['lateral_control_params']
# if getCLBuffer is not None, frame will be None
self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs")))
self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")))
@ -104,10 +112,16 @@ class ModelState:
self.model.execute()
outputs = self.parser.parse_outputs(self.slice_outputs(self.output))
self.inputs['features_buffer'][:-ModelConstants.FEATURE_LEN] = self.inputs['features_buffer'][ModelConstants.FEATURE_LEN:]
self.inputs['features_buffer'][-ModelConstants.FEATURE_LEN:] = outputs['hidden_state'][0, :]
self.inputs['prev_desired_curv'][:-ModelConstants.PREV_DESIRED_CURV_LEN] = self.inputs['prev_desired_curv'][ModelConstants.PREV_DESIRED_CURV_LEN:]
self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = outputs['desired_curvature'][0, :]
self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
self.prev_desired_curv_20hz[:-1] = self.prev_desired_curv_20hz[1:]
self.prev_desired_curv_20hz[-1] = outputs['desired_curvature'][0, :]
idxs = np.arange(-4,-100,-4)[::-1]
self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten()
# TODO model only uses last value now, once that changes we need to input strided action history buffer
self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = 0. * self.prev_desired_curv_20hz[-4, :]
return outputs
@ -173,7 +187,6 @@ def main(demo=False):
CP = convert_to_capnp(get_demo_car_params())
else:
CP = messaging.log_from_bytes(params.get("CarParams", block=True), car.CarParams)
cloudlog.info("modeld got CarParams: %s", CP.carName)
# TODO this needs more thought, use .2s extra for now to estimate other delays

@ -13,7 +13,10 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size_bytes, NULL, &err));
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
region.origin = 4 * frame_size_bytes;
region.size = frame_size_bytes;
last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &err));
transform_init(&transform, context, device_id);
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
@ -24,15 +27,18 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
for (int i = 0; i < 4; i++) {
CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
}
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
if (output == NULL) {
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], frame_size_bytes);
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
CL_CHECK(clEnqueueReadBuffer(q, img_buffer_20hz_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[0], 0, nullptr, nullptr));
CL_CHECK(clEnqueueReadBuffer(q, last_img_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
clFinish(q);
return &input_frames[0];
} else {
copy_queue(&loadyuv, q, *output, *output, frame_size_bytes, 0, frame_size_bytes);
copy_queue(&loadyuv, q, net_input_cl, *output, 0, frame_size_bytes, frame_size_bytes);
copy_queue(&loadyuv, q, img_buffer_20hz_cl, *output, 0, 0, frame_size_bytes);
copy_queue(&loadyuv, q, last_img_cl, *output, 0, frame_size_bytes, frame_size_bytes);
// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
clFinish(q);
@ -43,7 +49,8 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i
ModelFrame::~ModelFrame() {
transform_destroy(&transform);
loadyuv_destroy(&loadyuv);
CL_CHECK(clReleaseMemObject(net_input_cl));
CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
CL_CHECK(clReleaseMemObject(last_img_cl));
CL_CHECK(clReleaseMemObject(v_cl));
CL_CHECK(clReleaseMemObject(u_cl));
CL_CHECK(clReleaseMemObject(y_cl));

@ -32,6 +32,7 @@ private:
Transform transform;
LoadYUVState loadyuv;
cl_command_queue q;
cl_mem y_cl, u_cl, v_cl, net_input_cl;
cl_mem y_cl, u_cl, v_cl, img_buffer_20hz_cl, last_img_cl;
cl_buffer_region region;
std::unique_ptr<uint8_t[]> input_frames;
};
};

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:21c88ca8a3de59fb11dc3e5888476f6bb627f3647eb0d199680d598e8bf31c0c
size 62486469
oid sha256:dd55319c8e3e9ac120f2b1bb1131cb67018669dfc0f7ebd31c27cc6de3e9f959
size 50309976

@ -1 +1 @@
56743d36006d4312544ace7f53491727aa7ac302
05b1cb87e32f280e46e0f45bbd6d76d5fd3f57a7

Loading…
Cancel
Save