From 4c6b7c3c444fbba7599be85a9a254ceb6490094e Mon Sep 17 00:00:00 2001 From: Adeeb Shihadeh Date: Mon, 17 Feb 2025 17:52:10 -0800 Subject: [PATCH] camerad: remove GPU debayer (#34610) --- system/camerad/cameras/camera_common.cc | 64 +----- system/camerad/cameras/camera_common.h | 2 - system/camerad/cameras/process_raw.cl | 252 ------------------------ 3 files changed, 3 insertions(+), 315 deletions(-) delete mode 100644 system/camerad/cameras/process_raw.cl diff --git a/system/camerad/cameras/camera_common.cc b/system/camerad/cameras/camera_common.cc index 666d027beb..4d3215ba32 100644 --- a/system/camerad/cameras/camera_common.cc +++ b/system/camerad/cameras/camera_common.cc @@ -3,58 +3,10 @@ #include #include -#include "common/clutil.h" #include "common/swaglog.h" - #include "system/camerad/cameras/spectra.h" -class ImgProc { -public: - ImgProc(cl_device_id device_id, cl_context context, const CameraBuf *b, const SensorInfo *sensor, int camera_num, int buf_width, int uv_offset) { - char args[4096]; - snprintf(args, sizeof(args), - "-cl-fast-relaxed-math -cl-denorms-are-zero -Isensors " - "-DFRAME_WIDTH=%d -DFRAME_HEIGHT=%d -DFRAME_STRIDE=%d -DFRAME_OFFSET=%d " - "-DRGB_WIDTH=%d -DRGB_HEIGHT=%d -DYUV_STRIDE=%d -DUV_OFFSET=%d " - "-DSENSOR_ID=%hu -DHDR_OFFSET=%d -DVIGNETTING=%d ", - sensor->frame_width, sensor->frame_height, sensor->hdr_offset > 0 ? sensor->frame_stride * 2 : sensor->frame_stride, sensor->frame_offset, - b->out_img_width, b->out_img_height, buf_width, uv_offset, - static_cast(sensor->image_sensor), sensor->hdr_offset, camera_num == 1); - const char *cl_file = "cameras/process_raw.cl"; - cl_program prg_imgproc = cl_program_from_file(context, device_id, cl_file, args); - krnl_ = CL_CHECK_ERR(clCreateKernel(prg_imgproc, "process_raw", &err)); - CL_CHECK(clReleaseProgram(prg_imgproc)); - - const cl_queue_properties props[] = {0}; //CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0}; - queue = CL_CHECK_ERR(clCreateCommandQueueWithProperties(context, device_id, props, &err)); - } - - void runKernel(cl_mem cam_buf_cl, cl_mem buf_cl, int width, int height, int expo_time) { - CL_CHECK(clSetKernelArg(krnl_, 0, sizeof(cl_mem), &cam_buf_cl)); - CL_CHECK(clSetKernelArg(krnl_, 1, sizeof(cl_mem), &buf_cl)); - CL_CHECK(clSetKernelArg(krnl_, 2, sizeof(cl_int), &expo_time)); - - const size_t globalWorkSize[] = {size_t(width / 2), size_t(height / 2)}; - const int imgproc_local_worksize = 16; - const size_t localWorkSize[] = {imgproc_local_worksize, imgproc_local_worksize}; - - cl_event event; - CL_CHECK(clEnqueueNDRangeKernel(queue, krnl_, 2, NULL, globalWorkSize, localWorkSize, 0, 0, &event)); - clWaitForEvents(1, &event); - CL_CHECK(clReleaseEvent(event)); - } - - ~ImgProc() { - CL_CHECK(clReleaseKernel(krnl_)); - CL_CHECK(clReleaseCommandQueue(queue)); - } - -private: - cl_kernel krnl_; - cl_command_queue queue; -}; - void CameraBuf::init(cl_device_id device_id, cl_context context, SpectraCamera *cam, VisionIpcServer * v, int frame_cnt, VisionStreamType type) { vipc_server = v; stream_type = type; @@ -86,8 +38,6 @@ void CameraBuf::init(cl_device_id device_id, cl_context context, SpectraCamera * vipc_server->create_buffers_with_sizes(stream_type, VIPC_BUFFER_COUNT, out_img_width, out_img_height, nv12_size, cam->stride, cam->uv_offset); LOGD("created %d YUV vipc buffers with size %dx%d", VIPC_BUFFER_COUNT, cam->stride, cam->y_height); - - if (is_raw) imgproc = new ImgProc(device_id, context, this, sensor, cam->cc.camera_num, cam->stride, cam->uv_offset); } CameraBuf::~CameraBuf() { @@ -96,7 +46,6 @@ CameraBuf::~CameraBuf() { camera_bufs_raw[i].free(); } } - if (imgproc) delete imgproc; } bool CameraBuf::acquire(int expo_time) { @@ -109,16 +58,9 @@ bool CameraBuf::acquire(int expo_time) { cur_frame_data = frame_metadata[cur_buf_idx]; cur_camera_buf = &camera_bufs_raw[cur_buf_idx]; - if (is_raw) { - cur_yuv_buf = vipc_server->get_buffer(stream_type); - - double start_time = millis_since_boot(); - imgproc->runKernel(camera_bufs_raw[cur_buf_idx].buf_cl, cur_yuv_buf->buf_cl, out_img_width, out_img_height, expo_time); - cur_frame_data.processing_time = (millis_since_boot() - start_time) / 1000.0; - } else { - cur_yuv_buf = vipc_server->get_buffer(stream_type, cur_buf_idx); - cur_frame_data.processing_time = (double)(cur_frame_data.timestamp_end_of_isp - cur_frame_data.timestamp_eof)*1e-9; - } + + cur_yuv_buf = vipc_server->get_buffer(stream_type, cur_buf_idx); + cur_frame_data.processing_time = (double)(cur_frame_data.timestamp_end_of_isp - cur_frame_data.timestamp_eof)*1e-9; VisionIpcBufExtra extra = { cur_frame_data.frame_id, diff --git a/system/camerad/cameras/camera_common.h b/system/camerad/cameras/camera_common.h index 242e17ef25..dea040d928 100644 --- a/system/camerad/cameras/camera_common.h +++ b/system/camerad/cameras/camera_common.h @@ -21,11 +21,9 @@ typedef struct FrameMetadata { class SpectraCamera; class CameraState; -class ImgProc; class CameraBuf { private: - ImgProc *imgproc = nullptr; int cur_buf_idx; SafeQueue safe_queue; int frame_buf_count; diff --git a/system/camerad/cameras/process_raw.cl b/system/camerad/cameras/process_raw.cl deleted file mode 100644 index ff6060d855..0000000000 --- a/system/camerad/cameras/process_raw.cl +++ /dev/null @@ -1,252 +0,0 @@ -#include "ar0231_cl.h" -#include "ox03c10_cl.h" -#include "os04c10_cl.h" - -#define UV_WIDTH RGB_WIDTH / 2 -#define UV_HEIGHT RGB_HEIGHT / 2 - -#define RGB_TO_Y(r, g, b) ((((mul24(b, 13) + mul24(g, 65) + mul24(r, 33)) + 64) >> 7) + 16) -#define RGB_TO_U(r, g, b) ((mul24(b, 56) - mul24(g, 37) - mul24(r, 19) + 0x8080) >> 8) -#define RGB_TO_V(r, g, b) ((mul24(r, 56) - mul24(g, 47) - mul24(b, 9) + 0x8080) >> 8) -#define AVERAGE(x, y, z, w) ((convert_ushort(x) + convert_ushort(y) + convert_ushort(z) + convert_ushort(w) + 1) >> 1) - -#if defined(BGGR) - #define ROW_READ_ORDER (int[]){3, 2, 1, 0} - #define RGB_WRITE_ORDER (int[]){2, 3, 0, 1} -#else - #define ROW_READ_ORDER (int[]){0, 1, 2, 3} - #define RGB_WRITE_ORDER (int[]){0, 1, 2, 3} -#endif - -float get_vignetting_s(float r) { -#if defined(VIGNETTE_PROFILE_4DT6MM) - if (r < 100000) { - return 1.0f + 0.0000013f*r; - } else if (r < 250000) { - return 1.02f + 0.0000011f*r; - } else if (r < 400000) { - return 0.92f + 0.0000015f*r; - } else { - return 0.44f + 0.0000027f*r; - } -#elif defined(VIGNETTE_PROFILE_8DT0MM) - if (r < 62500) { - return (1.0f + 0.0000008f*r); - } else if (r < 490000) { - return (0.9625f + 0.0000014f*r); - } else if (r < 1102500) { - return (1.26434f + 0.0000000000016f*r*r); - } else { - return (0.53503625f + 0.0000000000022f*r*r); - } -#else - return 1.0f; -#endif -} - -int4 parse_12bit(uchar8 pvs) { - // lower bits scambled? - return (int4)(((int)pvs.s0<<4) + (pvs.s1>>4), - ((int)pvs.s2<<4) + (pvs.s4&0xF), - ((int)pvs.s3<<4) + (pvs.s4>>4), - ((int)pvs.s5<<4) + (pvs.s7&0xF)); -} - -int4 parse_10bit(uchar8 pvs, uchar ext, bool aligned) { - if (aligned) { - return (int4)(((int)pvs.s0 << 2) + (pvs.s1 & 0b00000011), - ((int)pvs.s2 << 2) + ((pvs.s6 & 0b11000000) / 64), - ((int)pvs.s3 << 2) + ((pvs.s6 & 0b00110000) / 16), - ((int)pvs.s4 << 2) + ((pvs.s6 & 0b00001100) / 4)); - } else { - return (int4)(((int)pvs.s0 << 2) + ((pvs.s3 & 0b00110000) / 16), - ((int)pvs.s1 << 2) + ((pvs.s3 & 0b00001100) / 4), - ((int)pvs.s2 << 2) + ((pvs.s3 & 0b00000011)), - ((int)pvs.s4 << 2) + ((ext & 0b11000000) / 64)); - } -} - -float get_k(float a, float b, float c, float d) { - return 2.0 - (fabs(a - b) + fabs(c - d)); -} - -__kernel void process_raw(const __global uchar * in, __global uchar * out, int expo_time) -{ - const int gid_x = get_global_id(0); - const int gid_y = get_global_id(1); - - // estimate vignetting - #if VIGNETTING - int gx = (gid_x*2 - RGB_WIDTH/2); - int gy = (gid_y*2 - RGB_HEIGHT/2); - const float vignette_factor = get_vignetting_s(gx*gx + gy*gy); - #else - const float vignette_factor = 1.0; - #endif - - const int row_before_offset = (gid_y == 0) ? 2 : 0; - const int row_after_offset = (gid_y == (RGB_HEIGHT/2 - 1)) ? 1 : 3; - - float3 rgb_tmp; - uchar3 rgb_out[4]; // output is 2x2 window - - // read offset - int start_idx; - #if BIT_DEPTH == 10 - bool aligned10; - if (gid_x % 2 == 0) { - aligned10 = true; - start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (5 * gid_x / 2 - 2) + (FRAME_STRIDE * FRAME_OFFSET); - } else { - aligned10 = false; - start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (5 * (gid_x - 1) / 2 + 1) + (FRAME_STRIDE * FRAME_OFFSET); - } - #else - start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (3 * gid_x - 2) + (FRAME_STRIDE * FRAME_OFFSET); - #endif - - // read in 4 rows, 8 uchars each - uchar8 dat[4]; - // row_before - dat[0] = vload8(0, in + start_idx + FRAME_STRIDE*row_before_offset); - // row_0 - if (gid_x == 0 && gid_y == 0) { - // this wasn't a problem due to extra rows - dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*1 + 2); - dat[1] = (uchar8)(0, 0, dat[1].s0, dat[1].s1, dat[1].s2, dat[1].s3, dat[1].s4, dat[1].s5); - } else { - dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*1); - } - // row_1 - dat[2] = vload8(0, in + start_idx + FRAME_STRIDE*2); - // row_after - dat[3] = vload8(0, in + start_idx + FRAME_STRIDE*row_after_offset); - // need extra bit for 10-bit, 4 rows, 1 uchar each - #if BIT_DEPTH == 10 - uchar extra_dat[4]; - if (!aligned10) { - extra_dat[0] = in[start_idx + FRAME_STRIDE*row_before_offset + 8]; - extra_dat[1] = in[start_idx + FRAME_STRIDE*1 + 8]; - extra_dat[2] = in[start_idx + FRAME_STRIDE*2 + 8]; - extra_dat[3] = in[start_idx + FRAME_STRIDE*row_after_offset + 8]; - } - #endif - - // read odd rows for staggered second exposure - #if HDR_OFFSET > 0 - uchar8 short_dat[4]; - short_dat[0] = vload8(0, in + start_idx + FRAME_STRIDE*(row_before_offset+HDR_OFFSET/2) + FRAME_STRIDE/2); - short_dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*(1+HDR_OFFSET/2) + FRAME_STRIDE/2); - short_dat[2] = vload8(0, in + start_idx + FRAME_STRIDE*(2+HDR_OFFSET/2) + FRAME_STRIDE/2); - short_dat[3] = vload8(0, in + start_idx + FRAME_STRIDE*(row_after_offset+HDR_OFFSET/2) + FRAME_STRIDE/2); - #if BIT_DEPTH == 10 - uchar short_extra_dat[4]; - if (!aligned10) { - short_extra_dat[0] = in[start_idx + FRAME_STRIDE*(row_before_offset+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8]; - short_extra_dat[1] = in[start_idx + FRAME_STRIDE*(1+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8]; - short_extra_dat[2] = in[start_idx + FRAME_STRIDE*(2+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8]; - short_extra_dat[3] = in[start_idx + FRAME_STRIDE*(row_after_offset+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8]; - } - #endif - #endif - - // parse into floats 0.0-1.0 - float4 v_rows[4]; - #if BIT_DEPTH == 10 - // for now it's always HDR - int4 parsed = parse_10bit(dat[0], extra_dat[0], aligned10); - int4 short_parsed = parse_10bit(short_dat[0], short_extra_dat[0], aligned10); - v_rows[ROW_READ_ORDER[0]] = normalize_pv_hdr(parsed, short_parsed, vignette_factor, expo_time); - parsed = parse_10bit(dat[1], extra_dat[1], aligned10); - short_parsed = parse_10bit(short_dat[1], short_extra_dat[1], aligned10); - v_rows[ROW_READ_ORDER[1]] = normalize_pv_hdr(parsed, short_parsed, vignette_factor, expo_time); - parsed = parse_10bit(dat[2], extra_dat[2], aligned10); - short_parsed = parse_10bit(short_dat[2], short_extra_dat[2], aligned10); - v_rows[ROW_READ_ORDER[2]] = normalize_pv_hdr(parsed, short_parsed, vignette_factor, expo_time); - parsed = parse_10bit(dat[3], extra_dat[3], aligned10); - short_parsed = parse_10bit(short_dat[3], short_extra_dat[3], aligned10); - v_rows[ROW_READ_ORDER[3]] = normalize_pv_hdr(parsed, short_parsed, vignette_factor, expo_time); - #else - // no HDR here - int4 parsed = parse_12bit(dat[0]); - v_rows[ROW_READ_ORDER[0]] = normalize_pv(parsed, vignette_factor); - parsed = parse_12bit(dat[1]); - v_rows[ROW_READ_ORDER[1]] = normalize_pv(parsed, vignette_factor); - parsed = parse_12bit(dat[2]); - v_rows[ROW_READ_ORDER[2]] = normalize_pv(parsed, vignette_factor); - parsed = parse_12bit(dat[3]); - v_rows[ROW_READ_ORDER[3]] = normalize_pv(parsed, vignette_factor); - #endif - - // mirror padding - if (gid_x == 0) { - v_rows[0].s0 = v_rows[0].s2; - v_rows[1].s0 = v_rows[1].s2; - v_rows[2].s0 = v_rows[2].s2; - v_rows[3].s0 = v_rows[3].s2; - } else if (gid_x == RGB_WIDTH/2 - 1) { - v_rows[0].s3 = v_rows[0].s1; - v_rows[1].s3 = v_rows[1].s1; - v_rows[2].s3 = v_rows[2].s1; - v_rows[3].s3 = v_rows[3].s1; - } - - // debayering - // a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf - const float k01 = get_k(v_rows[0].s0, v_rows[1].s1, v_rows[0].s2, v_rows[1].s1); - const float k02 = get_k(v_rows[0].s2, v_rows[1].s1, v_rows[2].s2, v_rows[1].s1); - const float k03 = get_k(v_rows[2].s0, v_rows[1].s1, v_rows[2].s2, v_rows[1].s1); - const float k04 = get_k(v_rows[0].s0, v_rows[1].s1, v_rows[2].s0, v_rows[1].s1); - rgb_tmp.x = (k02*v_rows[1].s2+k04*v_rows[1].s0)/(k02+k04); // R_G1 - rgb_tmp.y = v_rows[1].s1; // G1(R) - rgb_tmp.z = (k01*v_rows[0].s1+k03*v_rows[2].s1)/(k01+k03); // B_G1 - rgb_out[RGB_WRITE_ORDER[0]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb_tmp, 0.0, 1.0)), expo_time) * 255.0); - - const float k11 = get_k(v_rows[0].s1, v_rows[2].s1, v_rows[0].s3, v_rows[2].s3); - const float k12 = get_k(v_rows[0].s2, v_rows[1].s1, v_rows[1].s3, v_rows[2].s2); - const float k13 = get_k(v_rows[0].s1, v_rows[0].s3, v_rows[2].s1, v_rows[2].s3); - const float k14 = get_k(v_rows[0].s2, v_rows[1].s3, v_rows[2].s2, v_rows[1].s1); - rgb_tmp.x = v_rows[1].s2; // R - rgb_tmp.y = (k11*(v_rows[0].s2+v_rows[2].s2)*0.5+k13*(v_rows[1].s3+v_rows[1].s1)*0.5)/(k11+k13); // G_R - rgb_tmp.z = (k12*(v_rows[0].s3+v_rows[2].s1)*0.5+k14*(v_rows[0].s1+v_rows[2].s3)*0.5)/(k12+k14); // B_R - rgb_out[RGB_WRITE_ORDER[1]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb_tmp, 0.0, 1.0)), expo_time) * 255.0); - - const float k21 = get_k(v_rows[1].s0, v_rows[3].s0, v_rows[1].s2, v_rows[3].s2); - const float k22 = get_k(v_rows[1].s1, v_rows[2].s0, v_rows[2].s2, v_rows[3].s1); - const float k23 = get_k(v_rows[1].s0, v_rows[1].s2, v_rows[3].s0, v_rows[3].s2); - const float k24 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[3].s1, v_rows[2].s0); - rgb_tmp.x = (k22*(v_rows[1].s2+v_rows[3].s0)*0.5+k24*(v_rows[1].s0+v_rows[3].s2)*0.5)/(k22+k24); // R_B - rgb_tmp.y = (k21*(v_rows[1].s1+v_rows[3].s1)*0.5+k23*(v_rows[2].s2+v_rows[2].s0)*0.5)/(k21+k23); // G_B - rgb_tmp.z = v_rows[2].s1; // B - rgb_out[RGB_WRITE_ORDER[2]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb_tmp, 0.0, 1.0)), expo_time) * 255.0); - - const float k31 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[1].s3, v_rows[2].s2); - const float k32 = get_k(v_rows[1].s3, v_rows[2].s2, v_rows[3].s3, v_rows[2].s2); - const float k33 = get_k(v_rows[3].s1, v_rows[2].s2, v_rows[3].s3, v_rows[2].s2); - const float k34 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[3].s1, v_rows[2].s2); - rgb_tmp.x = (k31*v_rows[1].s2+k33*v_rows[3].s2)/(k31+k33); // R_G2 - rgb_tmp.y = v_rows[2].s2; // G2(B) - rgb_tmp.z = (k32*v_rows[2].s3+k34*v_rows[2].s1)/(k32+k34); // B_G2 - rgb_out[RGB_WRITE_ORDER[3]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb_tmp, 0.0, 1.0)), expo_time) * 255.0); - - // rgb2yuv(nv12) - uchar2 yy = (uchar2)( - RGB_TO_Y(rgb_out[0].s0, rgb_out[0].s1, rgb_out[0].s2), - RGB_TO_Y(rgb_out[1].s0, rgb_out[1].s1, rgb_out[1].s2) - ); - vstore2(yy, 0, out + mad24(gid_y * 2, YUV_STRIDE, gid_x * 2)); - yy = (uchar2)( - RGB_TO_Y(rgb_out[2].s0, rgb_out[2].s1, rgb_out[2].s2), - RGB_TO_Y(rgb_out[3].s0, rgb_out[3].s1, rgb_out[3].s2) - ); - vstore2(yy, 0, out + mad24(gid_y * 2 + 1, YUV_STRIDE, gid_x * 2)); - - const short ar = AVERAGE(rgb_out[0].s0, rgb_out[1].s0, rgb_out[2].s0, rgb_out[3].s0); - const short ag = AVERAGE(rgb_out[0].s1, rgb_out[1].s1, rgb_out[2].s1, rgb_out[3].s1); - const short ab = AVERAGE(rgb_out[0].s2, rgb_out[1].s2, rgb_out[2].s2, rgb_out[3].s2); - uchar2 uv = (uchar2)( - RGB_TO_U(ar, ag, ab), - RGB_TO_V(ar, ag, ab) - ); - vstore2(uv, 0, out + UV_OFFSET + mad24(gid_y, YUV_STRIDE, gid_x * 2)); -}