diff --git a/selfdrive/camerad/cameras/camera_common.cc b/selfdrive/camerad/cameras/camera_common.cc index 676e740811..3e4814e65c 100644 --- a/selfdrive/camerad/cameras/camera_common.cc +++ b/selfdrive/camerad/cameras/camera_common.cc @@ -52,9 +52,9 @@ public: CL_CHECK(clSetKernelArg(krnl_, 0, sizeof(cl_mem), &cam_buf_cl)); CL_CHECK(clSetKernelArg(krnl_, 1, sizeof(cl_mem), &buf_cl)); + const size_t globalWorkSize[] = {size_t(width / 2), size_t(height / 2)}; const int debayer_local_worksize = 16; - constexpr int localMemSize = (debayer_local_worksize + 2 * (3 / 2)) * (debayer_local_worksize + 2 * (3 / 2)) * sizeof(short int); - const size_t globalWorkSize[] = {size_t(width), size_t(height)}; + constexpr int localMemSize = (debayer_local_worksize * 2 + 2) * (debayer_local_worksize * 2 + 2) * 2; const size_t localWorkSize[] = {debayer_local_worksize, debayer_local_worksize}; CL_CHECK(clSetKernelArg(krnl_, 2, localMemSize, 0)); CL_CHECK(clSetKernelArg(krnl_, 3, sizeof(float), &black_level)); @@ -141,6 +141,7 @@ bool CameraBuf::acquire() { cur_frame_data = camera_bufs_metadata[cur_buf_idx]; cur_rgb_buf = vipc_server->get_buffer(rgb_type); + cur_yuv_buf = vipc_server->get_buffer(yuv_type); cl_mem camrabuf_cl = camera_bufs[cur_buf_idx].buf_cl; cl_event event; @@ -157,18 +158,15 @@ bool CameraBuf::acquire() { #else if (camera_state->camera_id == CAMERA_ID_IMX390) black_level = 64.0; #endif - debayer->queue(q, camrabuf_cl, cur_rgb_buf->buf_cl, rgb_width, rgb_height, gain, black_level, &event); + debayer->queue(q, camrabuf_cl, cur_yuv_buf->buf_cl, rgb_width, rgb_height, gain, black_level, &event); } else { assert(rgb_stride == camera_state->ci.frame_stride); - CL_CHECK(clEnqueueCopyBuffer(q, camrabuf_cl, cur_rgb_buf->buf_cl, 0, 0, cur_rgb_buf->len, 0, 0, &event)); + rgb2yuv->queue(q, camrabuf_cl, cur_rgb_buf->buf_cl); } clWaitForEvents(1, &event); CL_CHECK(clReleaseEvent(event)); - cur_yuv_buf = vipc_server->get_buffer(yuv_type); - rgb2yuv->queue(q, cur_rgb_buf->buf_cl, cur_yuv_buf->buf_cl); - cur_frame_data.processing_time = (millis_since_boot() - start_time) / 1000.0; VisionIpcBufExtra extra = { @@ -176,9 +174,7 @@ bool CameraBuf::acquire() { cur_frame_data.timestamp_sof, cur_frame_data.timestamp_eof, }; - cur_rgb_buf->set_frame_id(cur_frame_data.frame_id); cur_yuv_buf->set_frame_id(cur_frame_data.frame_id); - vipc_server->send(cur_rgb_buf, &extra); vipc_server->send(cur_yuv_buf, &extra); return true; diff --git a/selfdrive/camerad/cameras/real_debayer.cl b/selfdrive/camerad/cameras/real_debayer.cl index 461b41dce3..b1b41f2bf1 100644 --- a/selfdrive/camerad/cameras/real_debayer.cl +++ b/selfdrive/camerad/cameras/real_debayer.cl @@ -1,10 +1,22 @@ #ifdef HALF_AS_FLOAT #define half float +#define half2 float2 #define half3 float3 +#define half4 float4 #else #pragma OPENCL EXTENSION cl_khr_fp16 : enable #endif +#define UV_WIDTH RGB_WIDTH / 2 +#define UV_HEIGHT RGB_HEIGHT / 2 +#define U_OFFSET RGB_WIDTH * RGB_HEIGHT +#define V_OFFSET RGB_WIDTH * RGB_HEIGHT + UV_WIDTH * UV_HEIGHT + +#define RGB_TO_Y(r, g, b) ((((mul24(b, 13) + mul24(g, 65) + mul24(r, 33)) + 64) >> 7) + 16) +#define RGB_TO_U(r, g, b) ((mul24(b, 56) - mul24(g, 37) - mul24(r, 19) + 0x8080) >> 8) +#define RGB_TO_V(r, g, b) ((mul24(r, 56) - mul24(g, 47) - mul24(b, 9) + 0x8080) >> 8) +#define AVERAGE(x, y, z, w) ((convert_ushort(x) + convert_ushort(y) + convert_ushort(z) + convert_ushort(w) + 1) >> 1) + // post wb CCM const __constant half3 color_correction_0 = (half3)(1.82717181, -0.31231438, 0.07307673); const __constant half3 color_correction_1 = (half3)(-0.5743977, 1.36858544, -0.53183455); @@ -75,115 +87,122 @@ __kernel void debayer10(const __global uchar * in, float black_level ) { - const int x_global = get_global_id(0); - const int y_global = get_global_id(1); + const int gid_x = get_global_id(0); + const int gid_y = get_global_id(1); + + const int lid_x = get_local_id(0); + const int lid_y = get_local_id(1); - const int x_local = get_local_id(0); - const int y_local = get_local_id(1); + const int localRowLen = mad24(get_local_size(0), 2, 2); // 2 padding + const int localColLen = mad24(get_local_size(1), 2, 2); - const int localRowLen = 2 + get_local_size(0); // 2 padding - const int localColLen = 2 + get_local_size(1); + const int x_global = mul24(gid_x, 2); + const int y_global = mul24(gid_y, 2); - const int localOffset = (y_local + 1) * localRowLen + x_local + 1; + const int x_local = mad24(lid_x, 2, 1); + const int y_local = mad24(lid_y, 2, 1); - int out_idx = 3 * x_global + 3 * y_global * RGB_WIDTH; + const int x_global_mod = (gid_x == 0 || gid_x == get_global_size(0) - 1) ? -1: 1; + const int y_global_mod = (gid_y == 0 || gid_y == get_global_size(1) - 1) ? -1: 1; - // cache padding - int localColOffset = -1; + int localColOffset = 0; int globalColOffset; - const int x_global_mod = (x_global == 0 || x_global == RGB_WIDTH - 1) ? -1: 1; - const int y_global_mod = (y_global == 0 || y_global == RGB_HEIGHT - 1) ? -1: 1; - - half pv = val_from_10(in, x_global, y_global, black_level); - cached[localOffset] = pv; - - // cache padding - if (x_local < 1) { - localColOffset = x_local; - globalColOffset = -1; - cached[(y_local + 1) * localRowLen + x_local] = val_from_10(in, x_global-x_global_mod, y_global, black_level); - } else if (x_local >= get_local_size(0) - 1) { - localColOffset = x_local + 2; - globalColOffset = 1; - cached[localOffset + 1] = val_from_10(in, x_global+x_global_mod, y_global, black_level); + cached[mad24(y_local + 0, localRowLen, x_local + 0)] = val_from_10(in, x_global + 0, y_global + 0, black_level); + cached[mad24(y_local + 0, localRowLen, x_local + 1)] = val_from_10(in, x_global + 1, y_global + 0, black_level); + cached[mad24(y_local + 1, localRowLen, x_local + 0)] = val_from_10(in, x_global + 0, y_global + 1, black_level); + cached[mad24(y_local + 1, localRowLen, x_local + 1)] = val_from_10(in, x_global + 1, y_global + 1, black_level); + + if (lid_x == 0) { // left edge + localColOffset = -1; + globalColOffset = -x_global_mod; + cached[mad24(y_local + 0, localRowLen, x_local - 1)] = val_from_10(in, x_global - x_global_mod, y_global + 0, black_level); + cached[mad24(y_local + 1, localRowLen, x_local - 1)] = val_from_10(in, x_global - x_global_mod, y_global + 1, black_level); + } else if (lid_x == get_local_size(0) - 1) { // right edge + localColOffset = 2; + globalColOffset = x_global_mod + 1; + cached[mad24(y_local + 0, localRowLen, x_local + 2)] = val_from_10(in, x_global + x_global_mod + 1, y_global + 0, black_level); + cached[mad24(y_local + 1, localRowLen, x_local + 2)] = val_from_10(in, x_global + x_global_mod + 1, y_global + 1, black_level); } - if (y_local < 1) { - cached[y_local * localRowLen + x_local + 1] = val_from_10(in, x_global, y_global-y_global_mod, black_level); - if (localColOffset != -1) { - cached[y_local * localRowLen + localColOffset] = val_from_10(in, x_global+(x_global_mod*globalColOffset), y_global-y_global_mod, black_level); + if (lid_y == 0) { // top row + cached[mad24(y_local - 1, localRowLen, x_local + 0)] = val_from_10(in, x_global + 0, y_global - y_global_mod, black_level); + cached[mad24(y_local - 1, localRowLen, x_local + 1)] = val_from_10(in, x_global + 1, y_global - y_global_mod, black_level); + if (localColOffset != 0) { // cache corners + cached[mad24(y_local - 1, localRowLen, x_local + localColOffset)] = val_from_10(in, x_global + globalColOffset, y_global - y_global_mod, black_level); } - } else if (y_local >= get_local_size(1) - 1) { - cached[(y_local + 2) * localRowLen + x_local + 1] = val_from_10(in, x_global, y_global+y_global_mod, black_level); - if (localColOffset != -1) { - cached[(y_local + 2) * localRowLen + localColOffset] = val_from_10(in, x_global+(x_global_mod*globalColOffset), y_global+y_global_mod, black_level); + } else if (lid_y == get_local_size(1) - 1) { // bottom row + cached[mad24(y_local + 2, localRowLen, x_local + 0)] = val_from_10(in, x_global + 0, y_global + y_global_mod + 1, black_level); + cached[mad24(y_local + 2, localRowLen, x_local + 1)] = val_from_10(in, x_global + 1, y_global + y_global_mod + 1, black_level); + if (localColOffset != 0) { // cache corners + cached[mad24(y_local + 2, localRowLen, x_local + localColOffset)] = val_from_10(in, x_global + globalColOffset, y_global + y_global_mod + 1, black_level); } } // sync barrier(CLK_LOCAL_MEM_FENCE); - half d1 = cached[localOffset - localRowLen - 1]; - half d2 = cached[localOffset - localRowLen + 1]; - half d3 = cached[localOffset + localRowLen - 1]; - half d4 = cached[localOffset + localRowLen + 1]; - half n1 = cached[localOffset - localRowLen]; - half n2 = cached[localOffset + 1]; - half n3 = cached[localOffset + localRowLen]; - half n4 = cached[localOffset - 1]; - half3 rgb; + uchar3 rgb_out[4]; - // a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf - if (x_global % 2 == 0) { - if (y_global % 2 == 0) { - rgb.y = pv; // G1(R) - half k1 = get_k(d1, pv, d2, pv); - half k2 = get_k(d2, pv, d4, pv); - half k3 = get_k(d3, pv, d4, pv); - half k4 = get_k(d1, pv, d3, pv); - // R_G1 - rgb.x = (k2*n2+k4*n4)/(k2+k4); - // B_G1 - rgb.z = (k1*n1+k3*n3)/(k1+k3); - } else { - rgb.z = pv; // B - half k1 = get_k(d1, d3, d2, d4); - half k2 = get_k(n1, n4, n2, n3); - half k3 = get_k(d1, d2, d3, d4); - half k4 = get_k(n1, n2, n3, n4); - // G_B - rgb.y = (k1*(n1+n3)*0.5+k3*(n2+n4)*0.5)/(k1+k3); - // R_B - rgb.x = (k2*(d2+d3)*0.5+k4*(d1+d4)*0.5)/(k2+k4); - } - } else { - if (y_global % 2 == 0) { - rgb.x = pv; // R - half k1 = get_k(d1, d3, d2, d4); - half k2 = get_k(n1, n4, n2, n3); - half k3 = get_k(d1, d2, d3, d4); - half k4 = get_k(n1, n2, n3, n4); - // G_R - rgb.y = (k1*(n1+n3)*0.5+k3*(n2+n4)*0.5)/(k1+k3); - // B_R - rgb.z = (k2*(d2+d3)*0.5+k4*(d1+d4)*0.5)/(k2+k4); - } else { - rgb.y = pv; // G2(B) - half k1 = get_k(d1, pv, d2, pv); - half k2 = get_k(d2, pv, d4, pv); - half k3 = get_k(d3, pv, d4, pv); - half k4 = get_k(d1, pv, d3, pv); - // R_G2 - rgb.x = (k1*n1+k3*n3)/(k1+k3); - // B_G2 - rgb.z = (k2*n2+k4*n4)/(k2+k4); - } - } + const half4 va = vload4(0, cached + mad24(lid_y * 2 + 0, localRowLen, lid_x * 2)); + const half4 vb = vload4(0, cached + mad24(lid_y * 2 + 1, localRowLen, lid_x * 2)); + const half4 vc = vload4(0, cached + mad24(lid_y * 2 + 2, localRowLen, lid_x * 2)); + const half4 vd = vload4(0, cached + mad24(lid_y * 2 + 3, localRowLen, lid_x * 2)); - uchar3 rgbc = convert_uchar3_sat(color_correct(clamp(rgb, (half)0.0, (half)1.0)) * 255.0); - out[out_idx + 0] = rgbc.z; - out[out_idx + 1] = rgbc.y; - out[out_idx + 2] = rgbc.x; + // a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf + const half k01 = get_k(va.s0, vb.s1, va.s2, vb.s1); + const half k02 = get_k(va.s2, vb.s1, vc.s2, vb.s1); + const half k03 = get_k(vc.s0, vb.s1, vc.s2, vb.s1); + const half k04 = get_k(va.s0, vb.s1, vc.s0, vb.s1); + rgb.x = (k02*vb.s2+k04*vb.s0)/(k02+k04); // R_G1 + rgb.y = vb.s1; // G1(R) + rgb.z = (k01*va.s1+k03*vc.s1)/(k01+k03); // B_G1 + rgb_out[0] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0); + + const half k11 = get_k(va.s1, vc.s1, va.s3, vc.s3); + const half k12 = get_k(va.s2, vb.s1, vb.s3, vc.s2); + const half k13 = get_k(va.s1, va.s3, vc.s1, vc.s3); + const half k14 = get_k(va.s2, vb.s3, vc.s2, vb.s1); + rgb.x = vb.s2; // R + rgb.y = (k11*(va.s2+vc.s2)*0.5+k13*(vb.s3+vb.s1)*0.5)/(k11+k13); // G_R + rgb.z = (k12*(va.s3+vc.s1)*0.5+k14*(va.s1+vc.s3)*0.5)/(k12+k14); // B_R + rgb_out[1] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0); + + const half k21 = get_k(vb.s0, vd.s0, vb.s2, vd.s2); + const half k22 = get_k(vb.s1, vc.s0, vc.s2, vd.s1); + const half k23 = get_k(vb.s0, vb.s2, vd.s0, vd.s2); + const half k24 = get_k(vb.s1, vc.s2, vd.s1, vc.s0); + rgb.x = (k22*(vb.s2+vd.s0)*0.5+k24*(vb.s0+vd.s2)*0.5)/(k22+k24); // R_B + rgb.y = (k21*(vb.s1+vd.s1)*0.5+k23*(vc.s2+vc.s0)*0.5)/(k21+k23); // G_B + rgb.z = vc.s1; // B + rgb_out[2] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0); + + const half k31 = get_k(vb.s1, vc.s2, vb.s3, vc.s2); + const half k32 = get_k(vb.s3, vc.s2, vd.s3, vc.s2); + const half k33 = get_k(vd.s1, vc.s2, vd.s3, vc.s2); + const half k34 = get_k(vb.s1, vc.s2, vd.s1, vc.s2); + rgb.x = (k31*vb.s2+k33*vd.s2)/(k31+k33); // R_G2 + rgb.y = vc.s2; // G2(B) + rgb.z = (k32*vc.s3+k34*vc.s1)/(k32+k34); // B_G2 + rgb_out[3] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0); + + // write ys + uchar2 yy = (uchar2)( + RGB_TO_Y(rgb_out[0].s0, rgb_out[0].s1, rgb_out[0].s2), + RGB_TO_Y(rgb_out[1].s0, rgb_out[1].s1, rgb_out[1].s2) + ); + vstore2(yy, 0, out + mad24(gid_y * 2, RGB_WIDTH, gid_x * 2)); + yy = (uchar2)( + RGB_TO_Y(rgb_out[2].s0, rgb_out[2].s1, rgb_out[2].s2), + RGB_TO_Y(rgb_out[3].s0, rgb_out[3].s1, rgb_out[3].s2) + ); + vstore2(yy, 0, out + mad24(gid_y * 2 + 1, RGB_WIDTH, gid_x * 2)); + + // write uvs + const short ar = AVERAGE(rgb_out[0].s0, rgb_out[1].s0, rgb_out[2].s0, rgb_out[3].s0); + const short ag = AVERAGE(rgb_out[0].s1, rgb_out[1].s1, rgb_out[2].s1, rgb_out[3].s1); + const short ab = AVERAGE(rgb_out[0].s2, rgb_out[1].s2, rgb_out[2].s2, rgb_out[3].s2); + out[U_OFFSET + mad24(gid_y, UV_WIDTH, gid_x)] = RGB_TO_U(ar, ag, ab); + out[V_OFFSET + mad24(gid_y, UV_WIDTH, gid_x)] = RGB_TO_V(ar, ag, ab); } diff --git a/selfdrive/camerad/snapshot/snapshot.py b/selfdrive/camerad/snapshot/snapshot.py index 1ec7677d31..2e8f7093da 100755 --- a/selfdrive/camerad/snapshot/snapshot.py +++ b/selfdrive/camerad/snapshot/snapshot.py @@ -17,9 +17,9 @@ from selfdrive.manager.process_config import managed_processes LM_THRESH = 120 # defined in selfdrive/camerad/imgproc/utils.h VISION_STREAMS = { - "roadCameraState": VisionStreamType.VISION_STREAM_RGB_ROAD, - "driverCameraState": VisionStreamType.VISION_STREAM_RGB_DRIVER, - "wideRoadCameraState": VisionStreamType.VISION_STREAM_RGB_WIDE_ROAD, + "roadCameraState": VisionStreamType.VISION_STREAM_ROAD, + "driverCameraState": VisionStreamType.VISION_STREAM_DRIVER, + "wideRoadCameraState": VisionStreamType.VISION_STREAM_WIDE_ROAD, } @@ -28,12 +28,28 @@ def jpeg_write(fn, dat): img.save(fn, "JPEG") -def extract_image(buf, w, h, stride): - img = np.hstack([buf[i * stride:i * stride + 3 * w] for i in range(h)]) - b = img[::3].reshape(h, w) - g = img[1::3].reshape(h, w) - r = img[2::3].reshape(h, w) - return np.dstack([r, g, b]) +def yuv_to_rgb(y, u, v): + ul = np.repeat(np.repeat(u, 2).reshape(u.shape[0], y.shape[1]), 2, axis=0).reshape(y.shape) + vl = np.repeat(np.repeat(v, 2).reshape(v.shape[0], y.shape[1]), 2, axis=0).reshape(y.shape) + + yuv = np.dstack((y, ul, vl)).astype(np.int16) + yuv[:, :, 1:] -= 128 + + m = np.array([ + [1.00000, 1.00000, 1.00000], + [0.00000, -0.39465, 2.03211], + [1.13983, -0.58060, 0.00000], + ]) + rgb = np.dot(yuv, m) + return rgb.astype(np.uint8) + + +def extract_image(buf, w, h): + y = np.array(buf[:w*h], dtype=np.uint8).reshape((h, w)) + u = np.array(buf[w*h: w*h+(w//2)*(h//2)], dtype=np.uint8).reshape((h//2, w//2)) + v = np.array(buf[w*h+(w//2)*(h//2):], dtype=np.uint8).reshape((h//2, w//2)) + + return yuv_to_rgb(y, u, v) def rois_in_focus(lapres: List[float]) -> float: @@ -63,10 +79,10 @@ def get_snapshots(frame="roadCameraState", front_frame="driverCameraState", focu rear, front = None, None if frame is not None: c = vipc_clients[frame] - rear = extract_image(c.recv(), c.width, c.height, c.stride) + rear = extract_image(c.recv(), c.width, c.height) if front_frame is not None: c = vipc_clients[front_frame] - front = extract_image(c.recv(), c.width, c.height, c.stride) + front = extract_image(c.recv(), c.width, c.height) return rear, front diff --git a/selfdrive/hardware/tici/test_power_draw.py b/selfdrive/hardware/tici/test_power_draw.py index 4261e10442..95c7505421 100755 --- a/selfdrive/hardware/tici/test_power_draw.py +++ b/selfdrive/hardware/tici/test_power_draw.py @@ -19,7 +19,7 @@ class Proc: warmup: float = 3. PROCS = [ - Proc('camerad', 2.5), + Proc('camerad', 2.17), Proc('modeld', 0.95), Proc('dmonitoringmodeld', 0.25), Proc('loggerd', 0.45, warmup=10.), diff --git a/selfdrive/test/process_replay/test_debayer.py b/selfdrive/test/process_replay/test_debayer.py index 6488184263..a70d478ff9 100755 --- a/selfdrive/test/process_replay/test_debayer.py +++ b/selfdrive/test/process_replay/test_debayer.py @@ -10,6 +10,7 @@ from selfdrive.hardware import PC, TICI from common.basedir import BASEDIR from selfdrive.test.openpilotci import BASE_URL, get_url from selfdrive.version import get_commit +from selfdrive.camerad.snapshot.snapshot import yuv_to_rgb from tools.lib.logreader import LogReader from tools.lib.filereader import FileReader @@ -68,38 +69,20 @@ def init_kernels(frame_offset=0): build_args += ' -DHALF_AS_FLOAT=1 -cl-std=CL2.0' debayer_prg = cl.Program(ctx, f.read()).build(options=build_args) - with open(os.path.join(BASEDIR, 'selfdrive/camerad/transforms/rgb_to_yuv.cl')) as f: - build_args = f' -cl-fast-relaxed-math -cl-denorms-are-zero -DWIDTH={FRAME_WIDTH} -DHEIGHT={FRAME_HEIGHT}' + \ - f' -DUV_WIDTH={UV_WIDTH} -DUV_HEIGHT={UV_HEIGHT} -DRGB_STRIDE={FRAME_WIDTH*3}' + \ - f' -DRGB_SIZE={FRAME_WIDTH*FRAME_HEIGHT}' - rgb_to_yuv_prg = cl.Program(ctx, f.read()).build(options=build_args) + return ctx, debayer_prg - return ctx, debayer_prg, rgb_to_yuv_prg - - -def debayer_frame(ctx, debayer_prg, rgb_to_yuv_prg, data, rgb=False): +def debayer_frame(ctx, debayer_prg, data, rgb=False): q = cl.CommandQueue(ctx) - rgb_buff = np.empty(FRAME_WIDTH * FRAME_HEIGHT * 3, dtype=np.uint8) yuv_buff = np.empty(FRAME_WIDTH * FRAME_HEIGHT + UV_SIZE * 2, dtype=np.uint8) cam_g = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data) - rgb_wg = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, FRAME_WIDTH * FRAME_HEIGHT * 3) yuv_g = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, FRAME_WIDTH * FRAME_HEIGHT + UV_SIZE * 2) - local_worksize = (16, 16) if TICI else (8, 8) - local_mem = cl.LocalMemory(648 if TICI else 400) - ev1 = debayer_prg.debayer10(q, (FRAME_WIDTH, FRAME_HEIGHT), local_worksize, cam_g, rgb_wg, local_mem, np.float32(42)) - cl.enqueue_copy(q, rgb_buff, rgb_wg, wait_for=[ev1]).wait() - cl.enqueue_barrier(q) - - rgb_rg = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=rgb_buff) - cl.enqueue_barrier(q) - - ev3 = rgb_to_yuv_prg.rgb_to_yuv(q, (FRAME_WIDTH // 4, FRAME_HEIGHT // 4), None, rgb_rg, yuv_g) - cl.enqueue_barrier(q) - - cl.enqueue_copy(q, yuv_buff, yuv_g, wait_for=[ev3]).wait() + local_worksize = (20, 20) if TICI else (4, 4) + local_mem = cl.LocalMemory(3528 if TICI else 400) + ev1 = debayer_prg.debayer10(q, (UV_WIDTH, UV_HEIGHT), local_worksize, cam_g, yuv_g, local_mem, np.float32(42)) + cl.enqueue_copy(q, yuv_buff, yuv_g, wait_for=[ev1]).wait() cl.enqueue_barrier(q) y = yuv_buff[:FRAME_WIDTH*FRAME_HEIGHT].reshape((FRAME_HEIGHT, FRAME_WIDTH)) @@ -107,13 +90,13 @@ def debayer_frame(ctx, debayer_prg, rgb_to_yuv_prg, data, rgb=False): v = yuv_buff[FRAME_WIDTH*FRAME_HEIGHT+UV_SIZE:].reshape((UV_HEIGHT, UV_WIDTH)) if rgb: - return rgb_buff.reshape((FRAME_HEIGHT, FRAME_WIDTH, 3))[:, :, (2, 1, 0)] + return yuv_to_rgb(y, u, v) else: return y, u, v def debayer_replay(lr): - ctx, debayer_prg, rgb_to_yuv_prg = init_kernels() + ctx, debayer_prg = init_kernels() frames = [] for m in lr: @@ -121,7 +104,7 @@ def debayer_replay(lr): cs = m.roadCameraState if cs.image: data = np.frombuffer(cs.image, dtype=np.uint8) - img = debayer_frame(ctx, debayer_prg, rgb_to_yuv_prg, data) + img = debayer_frame(ctx, debayer_prg, data) frames.append(img) @@ -173,7 +156,7 @@ if __name__ == "__main__": frame_diff = np.abs(np.subtract(fr, cmp_f)) diff_len = len(np.nonzero(frame_diff)[0]) - if diff_len > 1000: + if diff_len > 10000: diff += f'different at a large amount of pixels ({diff_len})\n' else: diff += 'different at (frame, yuv, pixel, ref, HEAD):\n' diff --git a/selfdrive/test/test_onroad.py b/selfdrive/test/test_onroad.py index 21696645cd..8644baac8d 100755 --- a/selfdrive/test/test_onroad.py +++ b/selfdrive/test/test_onroad.py @@ -23,7 +23,7 @@ from tools.lib.logreader import LogReader PROCS = { "selfdrive.controls.controlsd": 31.0, "./loggerd": 50.0, - "./camerad": 26.0, + "./camerad": 16.5, "./locationd": 9.1, "selfdrive.controls.plannerd": 11.7, "./_ui": 21.0, @@ -59,7 +59,7 @@ TIMINGS = { "roadCameraState": [2.5, 0.35], "driverCameraState": [2.5, 0.35], "modelV2": [2.5, 0.35], - "driverState": [2.5, 0.35], + "driverState": [2.5, 0.40], "liveLocationKalman": [2.5, 0.35], "wideRoadCameraState": [1.5, 0.35], }