diff --git a/selfdrive/camerad/cameras/camera_common.h b/selfdrive/camerad/cameras/camera_common.h index 7698ee47f7..5e9da3bf65 100644 --- a/selfdrive/camerad/cameras/camera_common.h +++ b/selfdrive/camerad/cameras/camera_common.h @@ -10,7 +10,6 @@ #include "common/queue.h" #include "visionbuf.h" #include "common/visionimg.h" -#include "imgproc/utils.h" #include "messaging.hpp" #include "transforms/rgb_to_yuv.h" diff --git a/selfdrive/camerad/cameras/camera_qcom.cc b/selfdrive/camerad/cameras/camera_qcom.cc index fa649cbecc..a053a8726f 100644 --- a/selfdrive/camerad/cameras/camera_qcom.cc +++ b/selfdrive/camerad/cameras/camera_qcom.cc @@ -185,17 +185,6 @@ static void camera_init(VisionIpcServer *v, CameraState *s, int camera_id, int c s->buf.init(device_id, ctx, s, v, FRAME_BUF_COUNT, rgb_type, yuv_type, camera_release_buffer); } -cl_program build_conv_program(cl_device_id device_id, cl_context context, int image_w, int image_h, int filter_size) { - char args[4096]; - snprintf(args, sizeof(args), - "-cl-fast-relaxed-math -cl-denorms-are-zero " - "-DIMAGE_W=%d -DIMAGE_H=%d -DFLIP_RB=%d " - "-DFILTER_SIZE=%d -DHALF_FILTER_SIZE=%d -DTWICE_HALF_FILTER_SIZE=%d -DHALF_FILTER_SIZE_IMAGE_W=%d", - image_w, image_h, 1, - filter_size, filter_size/2, (filter_size/2)*2, (filter_size/2)*image_w); - return cl_program_from_file(context, device_id, "imgproc/conv.cl", args); -} - void cameras_init(VisionIpcServer *v, MultiCameraState *s, cl_device_id device_id, cl_context ctx) { char project_name[1024] = {0}; property_get("ro.boot.project_name", project_name, ""); @@ -239,19 +228,8 @@ void cameras_init(VisionIpcServer *v, MultiCameraState *s, cl_device_id device_i s->focus_bufs[i].allocate(0xb80); s->stats_bufs[i].allocate(0xb80); } - const int width = s->road_cam.buf.rgb_width/NUM_SEGMENTS_X; - const int height = s->road_cam.buf.rgb_height/NUM_SEGMENTS_Y; - s->prg_rgb_laplacian = build_conv_program(device_id, ctx, width, height, 3); - s->krnl_rgb_laplacian = CL_CHECK_ERR(clCreateKernel(s->prg_rgb_laplacian, "rgb2gray_conv2d", &err)); - // TODO: Removed CL_MEM_SVM_FINE_GRAIN_BUFFER, confirm it doesn't matter - s->rgb_conv_roi_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, - width * height * 3 * sizeof(uint8_t), NULL, &err)); - s->rgb_conv_result_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, - width * height * sizeof(int16_t), NULL, &err)); - s->rgb_conv_filter_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - 9 * sizeof(int16_t), (void*)&lapl_conv_krnl, &err)); - std::fill_n(s->lapres, std::size(s->lapres), 16160); + s->lap_conv = new LapConv(device_id, ctx, s->road_cam.buf.rgb_width, s->road_cam.buf.rgb_height, 3); } static void set_exposure(CameraState *s, float exposure_frac, float gain_frac) { @@ -1176,40 +1154,6 @@ static void ops_thread(MultiCameraState *s) { } } -static void update_lapmap(MultiCameraState *s, const CameraBuf *b, const int cnt) { - const size_t width = b->rgb_width / NUM_SEGMENTS_X; - const size_t height = b->rgb_height / NUM_SEGMENTS_Y; - static std::unique_ptr rgb_roi_buf = std::make_unique(width * height * 3); - static std::unique_ptr conv_result = std::make_unique(width * height); - - // sharpness scores - const int roi_id = cnt % std::size(s->lapres); // rolling roi - const int x_offset = ROI_X_MIN + roi_id % (ROI_X_MAX - ROI_X_MIN + 1); - const int y_offset = ROI_Y_MIN + roi_id / (ROI_X_MAX - ROI_X_MIN + 1); - - const uint8_t *rgb_addr_offset = (uint8_t *)b->cur_rgb_buf->addr + y_offset * height * FULL_STRIDE_X * 3 + x_offset * width * 3; - for (int i = 0; i < height; ++i) { - memcpy(rgb_roi_buf.get() + i * width * 3, rgb_addr_offset + i * FULL_STRIDE_X * 3, width * 3); - } - - constexpr int conv_cl_localMemSize = (CONV_LOCAL_WORKSIZE + 2 * (3 / 2)) * (CONV_LOCAL_WORKSIZE + 2 * (3 / 2)) * (3 * sizeof(uint8_t)); - CL_CHECK(clEnqueueWriteBuffer(b->q, s->rgb_conv_roi_cl, true, 0, width * height * 3 * sizeof(uint8_t), rgb_roi_buf.get(), 0, 0, 0)); - CL_CHECK(clSetKernelArg(s->krnl_rgb_laplacian, 0, sizeof(cl_mem), (void *)&s->rgb_conv_roi_cl)); - CL_CHECK(clSetKernelArg(s->krnl_rgb_laplacian, 1, sizeof(cl_mem), (void *)&s->rgb_conv_result_cl)); - CL_CHECK(clSetKernelArg(s->krnl_rgb_laplacian, 2, sizeof(cl_mem), (void *)&s->rgb_conv_filter_cl)); - CL_CHECK(clSetKernelArg(s->krnl_rgb_laplacian, 3, conv_cl_localMemSize, 0)); - cl_event conv_event; - CL_CHECK(clEnqueueNDRangeKernel(b->q, s->krnl_rgb_laplacian, 2, NULL, - (size_t[]){width, height}, (size_t[]){CONV_LOCAL_WORKSIZE, CONV_LOCAL_WORKSIZE}, 0, 0, &conv_event)); - clWaitForEvents(1, &conv_event); - CL_CHECK(clReleaseEvent(conv_event)); - - CL_CHECK(clEnqueueReadBuffer(b->q, s->rgb_conv_result_cl, true, 0, - width * height * sizeof(int16_t), conv_result.get(), 0, 0, 0)); - - s->lapres[roi_id] = get_lapmap_one(conv_result.get(), width, height); -} - static void setup_self_recover(CameraState *c, const uint16_t *lapres, size_t lapres_size) { const float lens_true_pos = c->lens_true_pos.load(); int self_recover = c->self_recover.load(); @@ -1238,7 +1182,8 @@ void process_driver_camera(MultiCameraState *s, CameraState *c, int cnt) { // called by processing_thread void process_road_camera(MultiCameraState *s, CameraState *c, int cnt) { const CameraBuf *b = &c->buf; - update_lapmap(s, b, cnt); + const int roi_id = cnt % std::size(s->lapres); // rolling roi + s->lapres[roi_id] = s->lap_conv->Update(b->q, (uint8_t *)b->cur_rgb_buf->addr, roi_id); setup_self_recover(c, &s->lapres[0], std::size(s->lapres)); MessageBuilder msg; @@ -1341,12 +1286,8 @@ void cameras_close(MultiCameraState *s) { s->focus_bufs[i].free(); s->stats_bufs[i].free(); } - CL_CHECK(clReleaseMemObject(s->rgb_conv_roi_cl)); - CL_CHECK(clReleaseMemObject(s->rgb_conv_result_cl)); - CL_CHECK(clReleaseMemObject(s->rgb_conv_filter_cl)); - CL_CHECK(clReleaseKernel(s->krnl_rgb_laplacian)); - CL_CHECK(clReleaseProgram(s->prg_rgb_laplacian)); + delete s->lap_conv; delete s->sm; delete s->pm; } diff --git a/selfdrive/camerad/cameras/camera_qcom.h b/selfdrive/camerad/cameras/camera_qcom.h index 45a914e89d..22da01d9e0 100644 --- a/selfdrive/camerad/cameras/camera_qcom.h +++ b/selfdrive/camerad/cameras/camera_qcom.h @@ -16,6 +16,7 @@ #include "common/mat.h" #include "common/util.h" +#include "imgproc/utils.h" #include "camera_common.h" @@ -93,22 +94,17 @@ typedef struct MultiCameraState { unique_fd ispif_fd; unique_fd msmcfg_fd; unique_fd v4l_fd; - - cl_mem rgb_conv_roi_cl, rgb_conv_result_cl, rgb_conv_filter_cl; uint16_t lapres[(ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1)]; VisionBuf focus_bufs[FRAME_BUF_COUNT]; VisionBuf stats_bufs[FRAME_BUF_COUNT]; - cl_program prg_rgb_laplacian; - cl_kernel krnl_rgb_laplacian; - CameraState road_cam; CameraState driver_cam; SubMaster *sm; PubMaster *pm; - + LapConv *lap_conv; } MultiCameraState; void actuator_move(CameraState *s, uint16_t target); diff --git a/selfdrive/camerad/imgproc/utils.cc b/selfdrive/camerad/imgproc/utils.cc index 4aeffac530..466c29dc65 100644 --- a/selfdrive/camerad/imgproc/utils.cc +++ b/selfdrive/camerad/imgproc/utils.cc @@ -1,7 +1,14 @@ #include "utils.h" +#include #include +#include #include #include + +const int16_t lapl_conv_krnl[9] = {0, 1, 0, + 1, -4, 1, + 0, 1, 0}; + // calculate score based on laplacians in one area uint16_t get_lapmap_one(const int16_t *lap, int x_pitch, int y_pitch) { const int size = x_pitch * y_pitch; @@ -34,4 +41,65 @@ bool is_blur(const uint16_t *lapmap, const size_t size) { } } return (bad_sum > LM_PREC_THRESH); -} \ No newline at end of file +} + +static cl_program build_conv_program(cl_device_id device_id, cl_context context, int image_w, int image_h, int filter_size) { + char args[4096]; + snprintf(args, sizeof(args), + "-cl-fast-relaxed-math -cl-denorms-are-zero " + "-DIMAGE_W=%d -DIMAGE_H=%d -DFLIP_RB=%d " + "-DFILTER_SIZE=%d -DHALF_FILTER_SIZE=%d -DTWICE_HALF_FILTER_SIZE=%d -DHALF_FILTER_SIZE_IMAGE_W=%d", + image_w, image_h, 1, + filter_size, filter_size/2, (filter_size/2)*2, (filter_size/2)*image_w); + return cl_program_from_file(context, device_id, "imgproc/conv.cl", args); +} + +LapConv::LapConv(cl_device_id device_id, cl_context ctx, int rgb_width, int rgb_height, int filter_size) + : width(rgb_width / NUM_SEGMENTS_X), height(rgb_height / NUM_SEGMENTS_Y), + roi_buf(width * height * 3), result_buf(width * height) { + + prg = build_conv_program(device_id, ctx, width, height, filter_size); + krnl = CL_CHECK_ERR(clCreateKernel(prg, "rgb2gray_conv2d", &err)); + // TODO: Removed CL_MEM_SVM_FINE_GRAIN_BUFFER, confirm it doesn't matter + roi_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, roi_buf.size() * sizeof(roi_buf[0]), NULL, &err)); + result_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, result_buf.size() * sizeof(result_buf[0]), NULL, &err)); + filter_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + 9 * sizeof(int16_t), (void *)&lapl_conv_krnl, &err)); +} + +LapConv::~LapConv() { + CL_CHECK(clReleaseMemObject(roi_cl)); + CL_CHECK(clReleaseMemObject(result_cl)); + CL_CHECK(clReleaseMemObject(filter_cl)); + CL_CHECK(clReleaseKernel(krnl)); + CL_CHECK(clReleaseProgram(prg)); +} + +uint16_t LapConv::Update(cl_command_queue q, const uint8_t *rgb_buf, const int roi_id) { + // sharpness scores + const int x_offset = ROI_X_MIN + roi_id % (ROI_X_MAX - ROI_X_MIN + 1); + const int y_offset = ROI_Y_MIN + roi_id / (ROI_X_MAX - ROI_X_MIN + 1); + + const uint8_t *rgb_offset = rgb_buf + y_offset * height * FULL_STRIDE_X * 3 + x_offset * width * 3; + for (int i = 0; i < height; ++i) { + memcpy(&roi_buf[i * width * 3], &rgb_offset[i * FULL_STRIDE_X * 3], width * 3); + } + + constexpr int local_mem_size = (CONV_LOCAL_WORKSIZE + 2 * (3 / 2)) * (CONV_LOCAL_WORKSIZE + 2 * (3 / 2)) * (3 * sizeof(uint8_t)); + const size_t global_work_size[] = {(size_t)width, (size_t)height}; + const size_t local_work_size[] = {CONV_LOCAL_WORKSIZE, CONV_LOCAL_WORKSIZE}; + + CL_CHECK(clEnqueueWriteBuffer(q, roi_cl, true, 0, roi_buf.size() * sizeof(roi_buf[0]), roi_buf.data(), 0, 0, 0)); + CL_CHECK(clSetKernelArg(krnl, 0, sizeof(cl_mem), (void *)&roi_cl)); + CL_CHECK(clSetKernelArg(krnl, 1, sizeof(cl_mem), (void *)&result_cl)); + CL_CHECK(clSetKernelArg(krnl, 2, sizeof(cl_mem), (void *)&filter_cl)); + CL_CHECK(clSetKernelArg(krnl, 3, local_mem_size, 0)); + cl_event conv_event; + CL_CHECK(clEnqueueNDRangeKernel(q, krnl, 2, NULL, global_work_size, local_work_size, 0, 0, &conv_event)); + CL_CHECK(clWaitForEvents(1, &conv_event)); + CL_CHECK(clReleaseEvent(conv_event)); + CL_CHECK(clEnqueueReadBuffer(q, result_cl, true, 0, + result_buf.size() * sizeof(result_buf[0]), result_buf.data(), 0, 0, 0)); + + return get_lapmap_one(result_buf.data(), width, height); +} diff --git a/selfdrive/camerad/imgproc/utils.h b/selfdrive/camerad/imgproc/utils.h index 4928f55a68..e33cfe9c81 100644 --- a/selfdrive/camerad/imgproc/utils.h +++ b/selfdrive/camerad/imgproc/utils.h @@ -2,6 +2,9 @@ #include #include +#include +#include "clutil.h" + #define NUM_SEGMENTS_X 8 #define NUM_SEGMENTS_Y 6 @@ -19,9 +22,19 @@ #define CONV_LOCAL_WORKSIZE 16 -const int16_t lapl_conv_krnl[9] = {0, 1, 0, - 1, -4, 1, - 0, 1, 0}; +class LapConv { +public: + LapConv(cl_device_id device_id, cl_context ctx, int rgb_width, int rgb_height, int filter_size); + ~LapConv(); + uint16_t Update(cl_command_queue q, const uint8_t *rgb_buf, const int roi_id); + +private: + cl_mem roi_cl, result_cl, filter_cl; + cl_program prg; + cl_kernel krnl; + const int width, height; + std::vector roi_buf; + std::vector result_buf; +}; -uint16_t get_lapmap_one(const int16_t *lap, int x_pitch, int y_pitch); bool is_blur(const uint16_t *lapmap, const size_t size);