Cleanup shift (#33677)

* Cleanup shift code

* Unsafe buffer copy

* fix bugs
pull/33678/head
Harald Schäfer 7 months ago committed by GitHub
parent 3b08501db3
commit 19bd96004e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 13
      selfdrive/modeld/models/commonmodel.cc
  2. 1
      selfdrive/modeld/models/commonmodel.h
  3. 22
      selfdrive/modeld/transforms/loadyuv.cc
  4. 8
      selfdrive/modeld/transforms/loadyuv.cl
  5. 6
      selfdrive/modeld/transforms/loadyuv.h

@ -13,7 +13,7 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(uint8_t), NULL, &err));
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size_bytes, NULL, &err));
transform_init(&transform, context, device_id);
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
@ -24,15 +24,16 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
if (output == NULL) {
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(uint8_t) * MODEL_FRAME_SIZE);
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
if (output == NULL) {
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], frame_size_bytes);
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
clFinish(q);
return &input_frames[0];
} else {
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, *output, true);
copy_queue(&loadyuv, q, *output, *output, frame_size_bytes, 0, frame_size_bytes);
copy_queue(&loadyuv, q, net_input_cl, *output, 0, frame_size_bytes, frame_size_bytes);
// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
clFinish(q);
return NULL;

@ -26,6 +26,7 @@ public:
const int MODEL_HEIGHT = 256;
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
const int buf_size = MODEL_FRAME_SIZE * 2;
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
private:
Transform transform;

@ -33,17 +33,8 @@ void loadyuv_destroy(LoadYUVState* s) {
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
cl_mem out_cl, bool do_shift) {
cl_mem out_cl) {
cl_int global_out_off = 0;
if (do_shift) {
// shift the image in slot 1 to slot 0, then place the new image in slot 1
global_out_off += (s->width*s->height) + (s->width/2)*(s->height/2)*2;
CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &out_cl));
CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_int), &global_out_off));
const size_t copy_work_size = global_out_off/8;
CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
&copy_work_size, NULL, 0, 0, NULL));
}
CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl));
CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl));
@ -72,3 +63,14 @@ void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
&loaduv_work_size, NULL, 0, 0, NULL));
}
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
size_t src_offset, size_t dst_offset, size_t size) {
CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &src));
CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_mem), &dst));
CL_CHECK(clSetKernelArg(s->copy_krnl, 2, sizeof(cl_int), &src_offset));
CL_CHECK(clSetKernelArg(s->copy_krnl, 3, sizeof(cl_int), &dst_offset));
const size_t copy_work_size = size/8;
CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
&copy_work_size, NULL, 0, 0, NULL));
}

@ -37,9 +37,11 @@ __kernel void loaduv(__global uchar8 const * const in,
out[gid + out_offset / 8] = inv;
}
__kernel void copy(__global uchar8 * inout,
int in_offset)
__kernel void copy(__global uchar8 * in,
__global uchar8 * out,
int in_offset,
int out_offset)
{
const int gid = get_global_id(0);
inout[gid] = inout[gid + in_offset / 8];
out[gid + out_offset / 8] = in[gid + in_offset / 8];
}

@ -13,4 +13,8 @@ void loadyuv_destroy(LoadYUVState* s);
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
cl_mem out_cl, bool do_shift = false);
cl_mem out_cl);
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
size_t src_offset, size_t dst_offset, size_t size);
Loading…
Cancel
Save