From edf9522bc064aa30b348e53c674b052c860cc9fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Harald=20Sch=C3=A4fer?= Date: Sat, 28 Sep 2024 18:34:31 -0700 Subject: [PATCH] Model and YUV pipeline to uint8 (#33671) * Squash * 78cec5a0-577b-49ac-b443-f7cd327649bd/400 * bump tinygrad --- selfdrive/modeld/models/commonmodel.cc | 14 +++++++------- selfdrive/modeld/models/commonmodel.h | 4 ++-- selfdrive/modeld/models/commonmodel.pxd | 2 +- selfdrive/modeld/models/commonmodel_pyx.pyx | 4 ++-- selfdrive/modeld/models/supercombo.onnx | 4 ++-- selfdrive/modeld/transforms/loadyuv.cl | 18 ++++++++---------- tinygrad_repo | 2 +- 7 files changed, 23 insertions(+), 25 deletions(-) diff --git a/selfdrive/modeld/models/commonmodel.cc b/selfdrive/modeld/models/commonmodel.cc index 57c14dfa88..d2d37bbc5a 100644 --- a/selfdrive/modeld/models/commonmodel.cc +++ b/selfdrive/modeld/models/commonmodel.cc @@ -7,28 +7,28 @@ #include "common/clutil.h" ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) { - input_frames = std::make_unique(buf_size); + input_frames = std::make_unique(buf_size); q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err)); y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err)); u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err)); v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err)); - net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(float), NULL, &err)); + net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(uint8_t), NULL, &err)); transform_init(&transform, context, device_id); loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT); } -float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) { +uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) { transform_queue(&this->transform, q, - yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset, - y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection); + yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset, + y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection); if (output == NULL) { loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl); - std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(float) * MODEL_FRAME_SIZE); - CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr)); + std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(uint8_t) * MODEL_FRAME_SIZE); + CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr)); clFinish(q); return &input_frames[0]; } else { diff --git a/selfdrive/modeld/models/commonmodel.h b/selfdrive/modeld/models/commonmodel.h index 0b5d87fd6c..ea39466670 100644 --- a/selfdrive/modeld/models/commonmodel.h +++ b/selfdrive/modeld/models/commonmodel.h @@ -20,7 +20,7 @@ class ModelFrame { public: ModelFrame(cl_device_id device_id, cl_context context); ~ModelFrame(); - float* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output); + uint8_t* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output); const int MODEL_WIDTH = 512; const int MODEL_HEIGHT = 256; @@ -32,5 +32,5 @@ private: LoadYUVState loadyuv; cl_command_queue q; cl_mem y_cl, u_cl, v_cl, net_input_cl; - std::unique_ptr input_frames; + std::unique_ptr input_frames; }; diff --git a/selfdrive/modeld/models/commonmodel.pxd b/selfdrive/modeld/models/commonmodel.pxd index f37014219d..3348af3f17 100644 --- a/selfdrive/modeld/models/commonmodel.pxd +++ b/selfdrive/modeld/models/commonmodel.pxd @@ -15,4 +15,4 @@ cdef extern from "selfdrive/modeld/models/commonmodel.h": cppclass ModelFrame: int buf_size ModelFrame(cl_device_id, cl_context) - float * prepare(cl_mem, int, int, int, int, mat3, cl_mem*) + unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*) diff --git a/selfdrive/modeld/models/commonmodel_pyx.pyx b/selfdrive/modeld/models/commonmodel_pyx.pyx index 46ac575cc2..99f9c5dc17 100644 --- a/selfdrive/modeld/models/commonmodel_pyx.pyx +++ b/selfdrive/modeld/models/commonmodel_pyx.pyx @@ -35,11 +35,11 @@ cdef class ModelFrame: def prepare(self, VisionBuf buf, float[:] projection, CLMem output): cdef mat3 cprojection memcpy(cprojection.v, &projection[0], 9*sizeof(float)) - cdef float * data + cdef unsigned char * data if output is None: data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, NULL) else: data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem) if not data: return None - return np.asarray( data) + return np.asarray( data) diff --git a/selfdrive/modeld/models/supercombo.onnx b/selfdrive/modeld/models/supercombo.onnx index 8b7126c44a..aeb3ea3b2b 100644 --- a/selfdrive/modeld/models/supercombo.onnx +++ b/selfdrive/modeld/models/supercombo.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb2018c74cdd9e5cb070ec7bed7f8581fabd55e39057d0a03aaffd2e42408154 -size 62486347 +oid sha256:47417a13d8a8d6af6a1562834eee538e3b43242c6e277ab6f1978a78a6785b7d +size 62486469 diff --git a/selfdrive/modeld/transforms/loadyuv.cl b/selfdrive/modeld/transforms/loadyuv.cl index 7dd3d973a3..3a6b3ebc4f 100644 --- a/selfdrive/modeld/transforms/loadyuv.cl +++ b/selfdrive/modeld/transforms/loadyuv.cl @@ -1,7 +1,7 @@ #define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2)) __kernel void loadys(__global uchar8 const * const Y, - __global float * out, + __global uchar * out, int out_offset) { const int gid = get_global_id(0); @@ -10,13 +10,12 @@ __kernel void loadys(__global uchar8 const * const Y, const int ox = ois % TRANSFORMED_WIDTH; const uchar8 ys = Y[gid]; - const float8 ysf = convert_float8(ys); // 02 // 13 - __global float* outy0; - __global float* outy1; + __global uchar* outy0; + __global uchar* outy1; if ((oy & 1) == 0) { outy0 = out + out_offset; //y0 outy1 = out + out_offset + UV_SIZE*2; //y2 @@ -25,21 +24,20 @@ __kernel void loadys(__global uchar8 const * const Y, outy1 = out + out_offset + UV_SIZE*3; //y3 } - vstore4(ysf.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); - vstore4(ysf.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); + vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); + vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); } __kernel void loaduv(__global uchar8 const * const in, - __global float8 * out, + __global uchar8 * out, int out_offset) { const int gid = get_global_id(0); const uchar8 inv = in[gid]; - const float8 outv = convert_float8(inv); - out[gid + out_offset / 8] = outv; + out[gid + out_offset / 8] = inv; } -__kernel void copy(__global float8 * inout, +__kernel void copy(__global uchar8 * inout, int in_offset) { const int gid = get_global_id(0); diff --git a/tinygrad_repo b/tinygrad_repo index 3e15fa0dae..9dda6d260d 160000 --- a/tinygrad_repo +++ b/tinygrad_repo @@ -1 +1 @@ -Subproject commit 3e15fa0daefae75e2ddef98f82be5b5d37820631 +Subproject commit 9dda6d260db0255750bacff61e3cee1e580567e1