Refactor ModelFrame struct to class (#20005)

* refactor modelframe

* remove DUMP_YUV

* use constructor/destructor instead of init/free

* OMG...should be clEnqueueReadBuffer

* rebase master

* rebase master

* fix build

* small cleanup

Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
pull/69/head
Dean Lee 4 years ago committed by GitHub
parent 9ebfc2ba8d
commit 8da538c18c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 62
      selfdrive/modeld/models/commonmodel.cc
  2. 32
      selfdrive/modeld/models/commonmodel.h
  3. 31
      selfdrive/modeld/models/driving.cc
  4. 4
      selfdrive/modeld/models/driving.h

@ -3,51 +3,45 @@
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstring>
#include "selfdrive/common/clutil.h" #include "selfdrive/common/clutil.h"
#include "selfdrive/common/mat.h" #include "selfdrive/common/mat.h"
#include "selfdrive/common/timing.h" #include "selfdrive/common/timing.h"
void frame_init(ModelFrame* frame, int width, int height, ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
cl_device_id device_id, cl_context context) { input_frames = std::make_unique<float[]>(buf_size);
transform_init(&frame->transform, context, device_id);
frame->width = width;
frame->height = height;
frame->y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (size_t)width*height, NULL, &err)); q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
frame->u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (size_t)(width/2)*(height/2), NULL, &err)); y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
frame->v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (size_t)(width/2)*(height/2), NULL, &err)); u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
frame->net_input_size = ((width*height*3)/2)*sizeof(float); v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
frame->net_input = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(float), NULL, &err));
frame->net_input_size, (void*)NULL, &err));
loadyuv_init(&frame->loadyuv, context, device_id, width, height); transform_init(&transform, context, device_id);
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
} }
float *frame_prepare(ModelFrame* frame, cl_command_queue q, float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, const mat3 &transform) {
cl_mem yuv_cl, int width, int height, transform_queue(&this->transform, q,
const mat3 &transform) { yuv_cl, frame_width, frame_height,
transform_queue(&frame->transform, q, y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, transform);
yuv_cl, width, height, loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
frame->y_cl, frame->u_cl, frame->v_cl,
frame->width, frame->height, std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(float) * MODEL_FRAME_SIZE);
transform); clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr);
loadyuv_queue(&frame->loadyuv, q,
frame->y_cl, frame->u_cl, frame->v_cl,
frame->net_input);
float *net_input_buf = (float *)CL_CHECK_ERR(clEnqueueMapBuffer(q, frame->net_input, CL_TRUE,
CL_MAP_READ, 0, frame->net_input_size,
0, NULL, NULL, &err));
clFinish(q); clFinish(q);
return net_input_buf; return &input_frames[0];
} }
void frame_free(ModelFrame* frame) { ModelFrame::~ModelFrame() {
transform_destroy(&frame->transform); transform_destroy(&transform);
loadyuv_destroy(&frame->loadyuv); loadyuv_destroy(&loadyuv);
CL_CHECK(clReleaseMemObject(frame->net_input)); CL_CHECK(clReleaseMemObject(net_input_cl));
CL_CHECK(clReleaseMemObject(frame->v_cl)); CL_CHECK(clReleaseMemObject(v_cl));
CL_CHECK(clReleaseMemObject(frame->u_cl)); CL_CHECK(clReleaseMemObject(u_cl));
CL_CHECK(clReleaseMemObject(frame->y_cl)); CL_CHECK(clReleaseMemObject(y_cl));
CL_CHECK(clReleaseCommandQueue(q));
} }
void softmax(const float* input, float* output, size_t len) { void softmax(const float* input, float* output, size_t len) {

@ -3,6 +3,8 @@
#include <cfloat> #include <cfloat>
#include <cstdlib> #include <cstdlib>
#include <memory>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__ #ifdef __APPLE__
#include <OpenCL/cl.h> #include <OpenCL/cl.h>
@ -14,24 +16,28 @@
#include "selfdrive/modeld/transforms/loadyuv.h" #include "selfdrive/modeld/transforms/loadyuv.h"
#include "selfdrive/modeld/transforms/transform.h" #include "selfdrive/modeld/transforms/transform.h"
constexpr int MODEL_WIDTH = 512;
constexpr int MODEL_HEIGHT = 256;
constexpr int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
const bool send_raw_pred = getenv("SEND_RAW_PRED") != NULL; const bool send_raw_pred = getenv("SEND_RAW_PRED") != NULL;
void softmax(const float* input, float* output, size_t len); void softmax(const float* input, float* output, size_t len);
float softplus(float input); float softplus(float input);
float sigmoid(float input); float sigmoid(float input);
typedef struct ModelFrame { class ModelFrame {
public:
ModelFrame(cl_device_id device_id, cl_context context);
~ModelFrame();
float* prepare(cl_mem yuv_cl, int width, int height, const mat3& transform);
const int buf_size = MODEL_FRAME_SIZE * 2;
private:
Transform transform; Transform transform;
int width, height;
cl_mem y_cl, u_cl, v_cl;
LoadYUVState loadyuv; LoadYUVState loadyuv;
cl_mem net_input; cl_command_queue q;
size_t net_input_size; cl_mem y_cl, u_cl, v_cl, net_input_cl;
} ModelFrame; std::unique_ptr<float[]> input_frames;
};
void frame_init(ModelFrame* frame, int width, int height,
cl_device_id device_id, cl_context context);
float *frame_prepare(ModelFrame* frame, cl_command_queue q,
cl_mem yuv_cl, int width, int height,
const mat3 &transform);
void frame_free(ModelFrame* frame);

@ -17,10 +17,6 @@ constexpr int OTHER_META_SIZE = 32;
constexpr int NUM_META_INTERVALS = 5; constexpr int NUM_META_INTERVALS = 5;
constexpr int META_STRIDE = 6; constexpr int META_STRIDE = 6;
constexpr int MODEL_WIDTH = 512;
constexpr int MODEL_HEIGHT = 256;
constexpr int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
constexpr int PLAN_MHP_N = 5; constexpr int PLAN_MHP_N = 5;
constexpr int PLAN_MHP_COLUMNS = 15; constexpr int PLAN_MHP_COLUMNS = 15;
constexpr int PLAN_MHP_VALS = 15*33; constexpr int PLAN_MHP_VALS = 15*33;
@ -60,8 +56,7 @@ float prev_brake_3ms2_probs[3] = {0,0,0};
// #define DUMP_YUV // #define DUMP_YUV
void model_init(ModelState* s, cl_device_id device_id, cl_context context) { void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
frame_init(&s->frame, MODEL_WIDTH, MODEL_HEIGHT, device_id, context); s->frame = new ModelFrame(device_id, context);
s->input_frames = std::make_unique<float[]>(MODEL_FRAME_SIZE * 2);
constexpr int output_size = OUTPUT_SIZE + TEMPORAL_SIZE; constexpr int output_size = OUTPUT_SIZE + TEMPORAL_SIZE;
s->output.resize(output_size); s->output.resize(output_size);
@ -85,8 +80,6 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
s->traffic_convention[idx] = 1.0; s->traffic_convention[idx] = 1.0;
s->m->addTrafficConvention(s->traffic_convention, TRAFFIC_CONVENTION_LEN); s->m->addTrafficConvention(s->traffic_convention, TRAFFIC_CONVENTION_LEN);
#endif #endif
s->q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
} }
ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int height, ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int height,
@ -108,19 +101,8 @@ ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int heigh
//for (int i = 0; i < OUTPUT_SIZE + TEMPORAL_SIZE; i++) { printf("%f ", s->output[i]); } printf("\n"); //for (int i = 0; i < OUTPUT_SIZE + TEMPORAL_SIZE; i++) { printf("%f ", s->output[i]); } printf("\n");
float *new_frame_buf = frame_prepare(&s->frame, s->q, yuv_cl, width, height, transform); auto net_input_buf = s->frame->prepare(yuv_cl, width, height, transform);
memmove(&s->input_frames[0], &s->input_frames[MODEL_FRAME_SIZE], sizeof(float)*MODEL_FRAME_SIZE); s->m->execute(net_input_buf, s->frame->buf_size);
memmove(&s->input_frames[MODEL_FRAME_SIZE], new_frame_buf, sizeof(float)*MODEL_FRAME_SIZE);
s->m->execute(&s->input_frames[0], MODEL_FRAME_SIZE*2);
#ifdef DUMP_YUV
FILE *dump_yuv_file = fopen("/sdcard/dump.yuv", "wb");
fwrite(new_frame_buf, MODEL_HEIGHT*MODEL_WIDTH*3/2, sizeof(float), dump_yuv_file);
fclose(dump_yuv_file);
assert(1==2);
#endif
clEnqueueUnmapMemObject(s->q, s->frame.net_input, (void*)new_frame_buf, 0, NULL, NULL);
// net outputs // net outputs
ModelDataRaw net_outputs; ModelDataRaw net_outputs;
@ -136,8 +118,7 @@ ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int heigh
} }
void model_free(ModelState* s) { void model_free(ModelState* s) {
frame_free(&s->frame); delete s->frame;
CL_CHECK(clReleaseCommandQueue(s->q));
} }
static const float *get_best_data(const float *data, int size, int group_size, int offset) { static const float *get_best_data(const float *data, int size, int group_size, int offset) {
@ -203,8 +184,8 @@ void fill_meta(cereal::ModelDataV2::MetaData::Builder meta, const float *meta_da
fill_sigmoid(&meta_data[DESIRE_LEN+5], brake_4ms2_sigmoid, NUM_META_INTERVALS, META_STRIDE); fill_sigmoid(&meta_data[DESIRE_LEN+5], brake_4ms2_sigmoid, NUM_META_INTERVALS, META_STRIDE);
fill_sigmoid(&meta_data[DESIRE_LEN+6], brake_5ms2_sigmoid, NUM_META_INTERVALS, META_STRIDE); fill_sigmoid(&meta_data[DESIRE_LEN+6], brake_5ms2_sigmoid, NUM_META_INTERVALS, META_STRIDE);
memmove(prev_brake_5ms2_probs, &prev_brake_5ms2_probs[1], 4*sizeof(float)); std::memmove(prev_brake_5ms2_probs, &prev_brake_5ms2_probs[1], 4*sizeof(float));
memmove(prev_brake_3ms2_probs, &prev_brake_3ms2_probs[1], 2*sizeof(float)); std::memmove(prev_brake_3ms2_probs, &prev_brake_3ms2_probs[1], 2*sizeof(float));
prev_brake_5ms2_probs[4] = brake_5ms2_sigmoid[0]; prev_brake_5ms2_probs[4] = brake_5ms2_sigmoid[0];
prev_brake_3ms2_probs[2] = brake_3ms2_sigmoid[0]; prev_brake_3ms2_probs[2] = brake_3ms2_sigmoid[0];

@ -32,11 +32,9 @@ struct ModelDataRaw {
}; };
typedef struct ModelState { typedef struct ModelState {
ModelFrame frame; ModelFrame *frame;
std::vector<float> output; std::vector<float> output;
std::unique_ptr<float[]> input_frames;
std::unique_ptr<RunModel> m; std::unique_ptr<RunModel> m;
cl_command_queue q;
#ifdef DESIRE #ifdef DESIRE
float prev_desire[DESIRE_LEN] = {}; float prev_desire[DESIRE_LEN] = {};
float pulse_desire[DESIRE_LEN] = {}; float pulse_desire[DESIRE_LEN] = {};

Loading…
Cancel
Save