From 8da538c18c10f838fba60e86c01bd5de59934bc6 Mon Sep 17 00:00:00 2001 From: Dean Lee Date: Sat, 19 Jun 2021 16:22:50 +0800 Subject: [PATCH] Refactor ModelFrame struct to class (#20005) * refactor modelframe * remove DUMP_YUV * use constructor/destructor instead of init/free * OMG...should be clEnqueueReadBuffer * rebase master * rebase master * fix build * small cleanup Co-authored-by: Adeeb Shihadeh --- selfdrive/modeld/models/commonmodel.cc | 62 ++++++++++++-------------- selfdrive/modeld/models/commonmodel.h | 32 +++++++------ selfdrive/modeld/models/driving.cc | 31 +++---------- selfdrive/modeld/models/driving.h | 4 +- 4 files changed, 54 insertions(+), 75 deletions(-) diff --git a/selfdrive/modeld/models/commonmodel.cc b/selfdrive/modeld/models/commonmodel.cc index 7ecb2d517b..9def7e49e3 100644 --- a/selfdrive/modeld/models/commonmodel.cc +++ b/selfdrive/modeld/models/commonmodel.cc @@ -3,51 +3,45 @@ #include #include #include +#include #include "selfdrive/common/clutil.h" #include "selfdrive/common/mat.h" #include "selfdrive/common/timing.h" -void frame_init(ModelFrame* frame, int width, int height, - cl_device_id device_id, cl_context context) { - transform_init(&frame->transform, context, device_id); - frame->width = width; - frame->height = height; +ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) { + input_frames = std::make_unique(buf_size); - frame->y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (size_t)width*height, NULL, &err)); - frame->u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (size_t)(width/2)*(height/2), NULL, &err)); - frame->v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (size_t)(width/2)*(height/2), NULL, &err)); - frame->net_input_size = ((width*height*3)/2)*sizeof(float); - frame->net_input = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, - frame->net_input_size, (void*)NULL, &err)); - loadyuv_init(&frame->loadyuv, context, device_id, width, height); + q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err)); + y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err)); + u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err)); + v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err)); + net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(float), NULL, &err)); + + transform_init(&transform, context, device_id); + loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT); } -float *frame_prepare(ModelFrame* frame, cl_command_queue q, - cl_mem yuv_cl, int width, int height, - const mat3 &transform) { - transform_queue(&frame->transform, q, - yuv_cl, width, height, - frame->y_cl, frame->u_cl, frame->v_cl, - frame->width, frame->height, - transform); - loadyuv_queue(&frame->loadyuv, q, - frame->y_cl, frame->u_cl, frame->v_cl, - frame->net_input); - float *net_input_buf = (float *)CL_CHECK_ERR(clEnqueueMapBuffer(q, frame->net_input, CL_TRUE, - CL_MAP_READ, 0, frame->net_input_size, - 0, NULL, NULL, &err)); +float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, const mat3 &transform) { + transform_queue(&this->transform, q, + yuv_cl, frame_width, frame_height, + y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, transform); + loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl); + + std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(float) * MODEL_FRAME_SIZE); + clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr); clFinish(q); - return net_input_buf; + return &input_frames[0]; } -void frame_free(ModelFrame* frame) { - transform_destroy(&frame->transform); - loadyuv_destroy(&frame->loadyuv); - CL_CHECK(clReleaseMemObject(frame->net_input)); - CL_CHECK(clReleaseMemObject(frame->v_cl)); - CL_CHECK(clReleaseMemObject(frame->u_cl)); - CL_CHECK(clReleaseMemObject(frame->y_cl)); +ModelFrame::~ModelFrame() { + transform_destroy(&transform); + loadyuv_destroy(&loadyuv); + CL_CHECK(clReleaseMemObject(net_input_cl)); + CL_CHECK(clReleaseMemObject(v_cl)); + CL_CHECK(clReleaseMemObject(u_cl)); + CL_CHECK(clReleaseMemObject(y_cl)); + CL_CHECK(clReleaseCommandQueue(q)); } void softmax(const float* input, float* output, size_t len) { diff --git a/selfdrive/modeld/models/commonmodel.h b/selfdrive/modeld/models/commonmodel.h index fe97710c96..e4e6563d7c 100644 --- a/selfdrive/modeld/models/commonmodel.h +++ b/selfdrive/modeld/models/commonmodel.h @@ -3,6 +3,8 @@ #include #include +#include + #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #ifdef __APPLE__ #include @@ -14,24 +16,28 @@ #include "selfdrive/modeld/transforms/loadyuv.h" #include "selfdrive/modeld/transforms/transform.h" +constexpr int MODEL_WIDTH = 512; +constexpr int MODEL_HEIGHT = 256; +constexpr int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2; + const bool send_raw_pred = getenv("SEND_RAW_PRED") != NULL; void softmax(const float* input, float* output, size_t len); float softplus(float input); float sigmoid(float input); -typedef struct ModelFrame { +class ModelFrame { + public: + ModelFrame(cl_device_id device_id, cl_context context); + ~ModelFrame(); + float* prepare(cl_mem yuv_cl, int width, int height, const mat3& transform); + + const int buf_size = MODEL_FRAME_SIZE * 2; + + private: Transform transform; - int width, height; - cl_mem y_cl, u_cl, v_cl; LoadYUVState loadyuv; - cl_mem net_input; - size_t net_input_size; -} ModelFrame; - -void frame_init(ModelFrame* frame, int width, int height, - cl_device_id device_id, cl_context context); -float *frame_prepare(ModelFrame* frame, cl_command_queue q, - cl_mem yuv_cl, int width, int height, - const mat3 &transform); -void frame_free(ModelFrame* frame); + cl_command_queue q; + cl_mem y_cl, u_cl, v_cl, net_input_cl; + std::unique_ptr input_frames; +}; diff --git a/selfdrive/modeld/models/driving.cc b/selfdrive/modeld/models/driving.cc index 366b868ea1..ba48af051e 100644 --- a/selfdrive/modeld/models/driving.cc +++ b/selfdrive/modeld/models/driving.cc @@ -17,10 +17,6 @@ constexpr int OTHER_META_SIZE = 32; constexpr int NUM_META_INTERVALS = 5; constexpr int META_STRIDE = 6; -constexpr int MODEL_WIDTH = 512; -constexpr int MODEL_HEIGHT = 256; -constexpr int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2; - constexpr int PLAN_MHP_N = 5; constexpr int PLAN_MHP_COLUMNS = 15; constexpr int PLAN_MHP_VALS = 15*33; @@ -60,8 +56,7 @@ float prev_brake_3ms2_probs[3] = {0,0,0}; // #define DUMP_YUV void model_init(ModelState* s, cl_device_id device_id, cl_context context) { - frame_init(&s->frame, MODEL_WIDTH, MODEL_HEIGHT, device_id, context); - s->input_frames = std::make_unique(MODEL_FRAME_SIZE * 2); + s->frame = new ModelFrame(device_id, context); constexpr int output_size = OUTPUT_SIZE + TEMPORAL_SIZE; s->output.resize(output_size); @@ -85,8 +80,6 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) { s->traffic_convention[idx] = 1.0; s->m->addTrafficConvention(s->traffic_convention, TRAFFIC_CONVENTION_LEN); #endif - - s->q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err)); } ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int height, @@ -108,19 +101,8 @@ ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int heigh //for (int i = 0; i < OUTPUT_SIZE + TEMPORAL_SIZE; i++) { printf("%f ", s->output[i]); } printf("\n"); - float *new_frame_buf = frame_prepare(&s->frame, s->q, yuv_cl, width, height, transform); - memmove(&s->input_frames[0], &s->input_frames[MODEL_FRAME_SIZE], sizeof(float)*MODEL_FRAME_SIZE); - memmove(&s->input_frames[MODEL_FRAME_SIZE], new_frame_buf, sizeof(float)*MODEL_FRAME_SIZE); - s->m->execute(&s->input_frames[0], MODEL_FRAME_SIZE*2); - - #ifdef DUMP_YUV - FILE *dump_yuv_file = fopen("/sdcard/dump.yuv", "wb"); - fwrite(new_frame_buf, MODEL_HEIGHT*MODEL_WIDTH*3/2, sizeof(float), dump_yuv_file); - fclose(dump_yuv_file); - assert(1==2); - #endif - - clEnqueueUnmapMemObject(s->q, s->frame.net_input, (void*)new_frame_buf, 0, NULL, NULL); + auto net_input_buf = s->frame->prepare(yuv_cl, width, height, transform); + s->m->execute(net_input_buf, s->frame->buf_size); // net outputs ModelDataRaw net_outputs; @@ -136,8 +118,7 @@ ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int heigh } void model_free(ModelState* s) { - frame_free(&s->frame); - CL_CHECK(clReleaseCommandQueue(s->q)); + delete s->frame; } static const float *get_best_data(const float *data, int size, int group_size, int offset) { @@ -203,8 +184,8 @@ void fill_meta(cereal::ModelDataV2::MetaData::Builder meta, const float *meta_da fill_sigmoid(&meta_data[DESIRE_LEN+5], brake_4ms2_sigmoid, NUM_META_INTERVALS, META_STRIDE); fill_sigmoid(&meta_data[DESIRE_LEN+6], brake_5ms2_sigmoid, NUM_META_INTERVALS, META_STRIDE); - memmove(prev_brake_5ms2_probs, &prev_brake_5ms2_probs[1], 4*sizeof(float)); - memmove(prev_brake_3ms2_probs, &prev_brake_3ms2_probs[1], 2*sizeof(float)); + std::memmove(prev_brake_5ms2_probs, &prev_brake_5ms2_probs[1], 4*sizeof(float)); + std::memmove(prev_brake_3ms2_probs, &prev_brake_3ms2_probs[1], 2*sizeof(float)); prev_brake_5ms2_probs[4] = brake_5ms2_sigmoid[0]; prev_brake_3ms2_probs[2] = brake_3ms2_sigmoid[0]; diff --git a/selfdrive/modeld/models/driving.h b/selfdrive/modeld/models/driving.h index d523f47c74..0840483eb5 100644 --- a/selfdrive/modeld/models/driving.h +++ b/selfdrive/modeld/models/driving.h @@ -32,11 +32,9 @@ struct ModelDataRaw { }; typedef struct ModelState { - ModelFrame frame; + ModelFrame *frame; std::vector output; - std::unique_ptr input_frames; std::unique_ptr m; - cl_command_queue q; #ifdef DESIRE float prev_desire[DESIRE_LEN] = {}; float pulse_desire[DESIRE_LEN] = {};