Refactor ModelFrame struct to class (#20005)

* refactor modelframe

* remove DUMP_YUV

* use constructor/destructor instead of init/free

* OMG...should be clEnqueueReadBuffer

* rebase master

* rebase master

* fix build

* small cleanup

Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
pull/21350/head
Dean Lee 4 years ago committed by GitHub
parent 9ebfc2ba8d
commit 8da538c18c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 62
      selfdrive/modeld/models/commonmodel.cc
  2. 32
      selfdrive/modeld/models/commonmodel.h
  3. 31
      selfdrive/modeld/models/driving.cc
  4. 4
      selfdrive/modeld/models/driving.h

@ -3,51 +3,45 @@
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include "selfdrive/common/clutil.h"
#include "selfdrive/common/mat.h"
#include "selfdrive/common/timing.h"
void frame_init(ModelFrame* frame, int width, int height,
cl_device_id device_id, cl_context context) {
transform_init(&frame->transform, context, device_id);
frame->width = width;
frame->height = height;
ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
input_frames = std::make_unique<float[]>(buf_size);
frame->y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (size_t)width*height, NULL, &err));
frame->u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (size_t)(width/2)*(height/2), NULL, &err));
frame->v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (size_t)(width/2)*(height/2), NULL, &err));
frame->net_input_size = ((width*height*3)/2)*sizeof(float);
frame->net_input = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE,
frame->net_input_size, (void*)NULL, &err));
loadyuv_init(&frame->loadyuv, context, device_id, width, height);
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(float), NULL, &err));
transform_init(&transform, context, device_id);
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
}
float *frame_prepare(ModelFrame* frame, cl_command_queue q,
cl_mem yuv_cl, int width, int height,
const mat3 &transform) {
transform_queue(&frame->transform, q,
yuv_cl, width, height,
frame->y_cl, frame->u_cl, frame->v_cl,
frame->width, frame->height,
transform);
loadyuv_queue(&frame->loadyuv, q,
frame->y_cl, frame->u_cl, frame->v_cl,
frame->net_input);
float *net_input_buf = (float *)CL_CHECK_ERR(clEnqueueMapBuffer(q, frame->net_input, CL_TRUE,
CL_MAP_READ, 0, frame->net_input_size,
0, NULL, NULL, &err));
float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, const mat3 &transform) {
transform_queue(&this->transform, q,
yuv_cl, frame_width, frame_height,
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, transform);
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(float) * MODEL_FRAME_SIZE);
clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr);
clFinish(q);
return net_input_buf;
return &input_frames[0];
}
void frame_free(ModelFrame* frame) {
transform_destroy(&frame->transform);
loadyuv_destroy(&frame->loadyuv);
CL_CHECK(clReleaseMemObject(frame->net_input));
CL_CHECK(clReleaseMemObject(frame->v_cl));
CL_CHECK(clReleaseMemObject(frame->u_cl));
CL_CHECK(clReleaseMemObject(frame->y_cl));
ModelFrame::~ModelFrame() {
transform_destroy(&transform);
loadyuv_destroy(&loadyuv);
CL_CHECK(clReleaseMemObject(net_input_cl));
CL_CHECK(clReleaseMemObject(v_cl));
CL_CHECK(clReleaseMemObject(u_cl));
CL_CHECK(clReleaseMemObject(y_cl));
CL_CHECK(clReleaseCommandQueue(q));
}
void softmax(const float* input, float* output, size_t len) {

@ -3,6 +3,8 @@
#include <cfloat>
#include <cstdlib>
#include <memory>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__
#include <OpenCL/cl.h>
@ -14,24 +16,28 @@
#include "selfdrive/modeld/transforms/loadyuv.h"
#include "selfdrive/modeld/transforms/transform.h"
constexpr int MODEL_WIDTH = 512;
constexpr int MODEL_HEIGHT = 256;
constexpr int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
const bool send_raw_pred = getenv("SEND_RAW_PRED") != NULL;
void softmax(const float* input, float* output, size_t len);
float softplus(float input);
float sigmoid(float input);
typedef struct ModelFrame {
class ModelFrame {
public:
ModelFrame(cl_device_id device_id, cl_context context);
~ModelFrame();
float* prepare(cl_mem yuv_cl, int width, int height, const mat3& transform);
const int buf_size = MODEL_FRAME_SIZE * 2;
private:
Transform transform;
int width, height;
cl_mem y_cl, u_cl, v_cl;
LoadYUVState loadyuv;
cl_mem net_input;
size_t net_input_size;
} ModelFrame;
void frame_init(ModelFrame* frame, int width, int height,
cl_device_id device_id, cl_context context);
float *frame_prepare(ModelFrame* frame, cl_command_queue q,
cl_mem yuv_cl, int width, int height,
const mat3 &transform);
void frame_free(ModelFrame* frame);
cl_command_queue q;
cl_mem y_cl, u_cl, v_cl, net_input_cl;
std::unique_ptr<float[]> input_frames;
};

@ -17,10 +17,6 @@ constexpr int OTHER_META_SIZE = 32;
constexpr int NUM_META_INTERVALS = 5;
constexpr int META_STRIDE = 6;
constexpr int MODEL_WIDTH = 512;
constexpr int MODEL_HEIGHT = 256;
constexpr int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
constexpr int PLAN_MHP_N = 5;
constexpr int PLAN_MHP_COLUMNS = 15;
constexpr int PLAN_MHP_VALS = 15*33;
@ -60,8 +56,7 @@ float prev_brake_3ms2_probs[3] = {0,0,0};
// #define DUMP_YUV
void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
frame_init(&s->frame, MODEL_WIDTH, MODEL_HEIGHT, device_id, context);
s->input_frames = std::make_unique<float[]>(MODEL_FRAME_SIZE * 2);
s->frame = new ModelFrame(device_id, context);
constexpr int output_size = OUTPUT_SIZE + TEMPORAL_SIZE;
s->output.resize(output_size);
@ -85,8 +80,6 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
s->traffic_convention[idx] = 1.0;
s->m->addTrafficConvention(s->traffic_convention, TRAFFIC_CONVENTION_LEN);
#endif
s->q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
}
ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int height,
@ -108,19 +101,8 @@ ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int heigh
//for (int i = 0; i < OUTPUT_SIZE + TEMPORAL_SIZE; i++) { printf("%f ", s->output[i]); } printf("\n");
float *new_frame_buf = frame_prepare(&s->frame, s->q, yuv_cl, width, height, transform);
memmove(&s->input_frames[0], &s->input_frames[MODEL_FRAME_SIZE], sizeof(float)*MODEL_FRAME_SIZE);
memmove(&s->input_frames[MODEL_FRAME_SIZE], new_frame_buf, sizeof(float)*MODEL_FRAME_SIZE);
s->m->execute(&s->input_frames[0], MODEL_FRAME_SIZE*2);
#ifdef DUMP_YUV
FILE *dump_yuv_file = fopen("/sdcard/dump.yuv", "wb");
fwrite(new_frame_buf, MODEL_HEIGHT*MODEL_WIDTH*3/2, sizeof(float), dump_yuv_file);
fclose(dump_yuv_file);
assert(1==2);
#endif
clEnqueueUnmapMemObject(s->q, s->frame.net_input, (void*)new_frame_buf, 0, NULL, NULL);
auto net_input_buf = s->frame->prepare(yuv_cl, width, height, transform);
s->m->execute(net_input_buf, s->frame->buf_size);
// net outputs
ModelDataRaw net_outputs;
@ -136,8 +118,7 @@ ModelDataRaw model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int heigh
}
void model_free(ModelState* s) {
frame_free(&s->frame);
CL_CHECK(clReleaseCommandQueue(s->q));
delete s->frame;
}
static const float *get_best_data(const float *data, int size, int group_size, int offset) {
@ -203,8 +184,8 @@ void fill_meta(cereal::ModelDataV2::MetaData::Builder meta, const float *meta_da
fill_sigmoid(&meta_data[DESIRE_LEN+5], brake_4ms2_sigmoid, NUM_META_INTERVALS, META_STRIDE);
fill_sigmoid(&meta_data[DESIRE_LEN+6], brake_5ms2_sigmoid, NUM_META_INTERVALS, META_STRIDE);
memmove(prev_brake_5ms2_probs, &prev_brake_5ms2_probs[1], 4*sizeof(float));
memmove(prev_brake_3ms2_probs, &prev_brake_3ms2_probs[1], 2*sizeof(float));
std::memmove(prev_brake_5ms2_probs, &prev_brake_5ms2_probs[1], 4*sizeof(float));
std::memmove(prev_brake_3ms2_probs, &prev_brake_3ms2_probs[1], 2*sizeof(float));
prev_brake_5ms2_probs[4] = brake_5ms2_sigmoid[0];
prev_brake_3ms2_probs[2] = brake_3ms2_sigmoid[0];

@ -32,11 +32,9 @@ struct ModelDataRaw {
};
typedef struct ModelState {
ModelFrame frame;
ModelFrame *frame;
std::vector<float> output;
std::unique_ptr<float[]> input_frames;
std::unique_ptr<RunModel> m;
cl_command_queue q;
#ifdef DESIRE
float prev_desire[DESIRE_LEN] = {};
float pulse_desire[DESIRE_LEN] = {};

Loading…
Cancel
Save