camerad: combine debayering and rgb_to_yuv opencl kernels (#24452)

* camerad: combine debayering and rgb_to_yuv opencl kernels

* fix border

* fix snapshot

* rename function

* update camerad cpu usage

* update camerad power draw

* vignetting equal to previously

* test other local worksize

* use less floats

* reduce amount of code

* move barrier back

* make faster

* fix corners

* cleanup

* cleanup

* allow more jitter on driverState timing

Co-authored-by: Comma Device <device@comma.ai>
old-commit-hash: 489fbb74b0
taco
Joost Wooning 3 years ago committed by GitHub
parent 95bbb74c81
commit da6d0d878a
  1. 14
      selfdrive/camerad/cameras/camera_common.cc
  2. 203
      selfdrive/camerad/cameras/real_debayer.cl
  3. 38
      selfdrive/camerad/snapshot/snapshot.py
  4. 2
      selfdrive/hardware/tici/test_power_draw.py
  5. 39
      selfdrive/test/process_replay/test_debayer.py
  6. 4
      selfdrive/test/test_onroad.py

@ -52,9 +52,9 @@ public:
CL_CHECK(clSetKernelArg(krnl_, 0, sizeof(cl_mem), &cam_buf_cl));
CL_CHECK(clSetKernelArg(krnl_, 1, sizeof(cl_mem), &buf_cl));
const size_t globalWorkSize[] = {size_t(width / 2), size_t(height / 2)};
const int debayer_local_worksize = 16;
constexpr int localMemSize = (debayer_local_worksize + 2 * (3 / 2)) * (debayer_local_worksize + 2 * (3 / 2)) * sizeof(short int);
const size_t globalWorkSize[] = {size_t(width), size_t(height)};
constexpr int localMemSize = (debayer_local_worksize * 2 + 2) * (debayer_local_worksize * 2 + 2) * 2;
const size_t localWorkSize[] = {debayer_local_worksize, debayer_local_worksize};
CL_CHECK(clSetKernelArg(krnl_, 2, localMemSize, 0));
CL_CHECK(clSetKernelArg(krnl_, 3, sizeof(float), &black_level));
@ -141,6 +141,7 @@ bool CameraBuf::acquire() {
cur_frame_data = camera_bufs_metadata[cur_buf_idx];
cur_rgb_buf = vipc_server->get_buffer(rgb_type);
cur_yuv_buf = vipc_server->get_buffer(yuv_type);
cl_mem camrabuf_cl = camera_bufs[cur_buf_idx].buf_cl;
cl_event event;
@ -157,18 +158,15 @@ bool CameraBuf::acquire() {
#else
if (camera_state->camera_id == CAMERA_ID_IMX390) black_level = 64.0;
#endif
debayer->queue(q, camrabuf_cl, cur_rgb_buf->buf_cl, rgb_width, rgb_height, gain, black_level, &event);
debayer->queue(q, camrabuf_cl, cur_yuv_buf->buf_cl, rgb_width, rgb_height, gain, black_level, &event);
} else {
assert(rgb_stride == camera_state->ci.frame_stride);
CL_CHECK(clEnqueueCopyBuffer(q, camrabuf_cl, cur_rgb_buf->buf_cl, 0, 0, cur_rgb_buf->len, 0, 0, &event));
rgb2yuv->queue(q, camrabuf_cl, cur_rgb_buf->buf_cl);
}
clWaitForEvents(1, &event);
CL_CHECK(clReleaseEvent(event));
cur_yuv_buf = vipc_server->get_buffer(yuv_type);
rgb2yuv->queue(q, cur_rgb_buf->buf_cl, cur_yuv_buf->buf_cl);
cur_frame_data.processing_time = (millis_since_boot() - start_time) / 1000.0;
VisionIpcBufExtra extra = {
@ -176,9 +174,7 @@ bool CameraBuf::acquire() {
cur_frame_data.timestamp_sof,
cur_frame_data.timestamp_eof,
};
cur_rgb_buf->set_frame_id(cur_frame_data.frame_id);
cur_yuv_buf->set_frame_id(cur_frame_data.frame_id);
vipc_server->send(cur_rgb_buf, &extra);
vipc_server->send(cur_yuv_buf, &extra);
return true;

@ -1,10 +1,22 @@
#ifdef HALF_AS_FLOAT
#define half float
#define half2 float2
#define half3 float3
#define half4 float4
#else
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif
#define UV_WIDTH RGB_WIDTH / 2
#define UV_HEIGHT RGB_HEIGHT / 2
#define U_OFFSET RGB_WIDTH * RGB_HEIGHT
#define V_OFFSET RGB_WIDTH * RGB_HEIGHT + UV_WIDTH * UV_HEIGHT
#define RGB_TO_Y(r, g, b) ((((mul24(b, 13) + mul24(g, 65) + mul24(r, 33)) + 64) >> 7) + 16)
#define RGB_TO_U(r, g, b) ((mul24(b, 56) - mul24(g, 37) - mul24(r, 19) + 0x8080) >> 8)
#define RGB_TO_V(r, g, b) ((mul24(r, 56) - mul24(g, 47) - mul24(b, 9) + 0x8080) >> 8)
#define AVERAGE(x, y, z, w) ((convert_ushort(x) + convert_ushort(y) + convert_ushort(z) + convert_ushort(w) + 1) >> 1)
// post wb CCM
const __constant half3 color_correction_0 = (half3)(1.82717181, -0.31231438, 0.07307673);
const __constant half3 color_correction_1 = (half3)(-0.5743977, 1.36858544, -0.53183455);
@ -75,115 +87,122 @@ __kernel void debayer10(const __global uchar * in,
float black_level
)
{
const int x_global = get_global_id(0);
const int y_global = get_global_id(1);
const int gid_x = get_global_id(0);
const int gid_y = get_global_id(1);
const int lid_x = get_local_id(0);
const int lid_y = get_local_id(1);
const int x_local = get_local_id(0);
const int y_local = get_local_id(1);
const int localRowLen = mad24(get_local_size(0), 2, 2); // 2 padding
const int localColLen = mad24(get_local_size(1), 2, 2);
const int localRowLen = 2 + get_local_size(0); // 2 padding
const int localColLen = 2 + get_local_size(1);
const int x_global = mul24(gid_x, 2);
const int y_global = mul24(gid_y, 2);
const int localOffset = (y_local + 1) * localRowLen + x_local + 1;
const int x_local = mad24(lid_x, 2, 1);
const int y_local = mad24(lid_y, 2, 1);
int out_idx = 3 * x_global + 3 * y_global * RGB_WIDTH;
const int x_global_mod = (gid_x == 0 || gid_x == get_global_size(0) - 1) ? -1: 1;
const int y_global_mod = (gid_y == 0 || gid_y == get_global_size(1) - 1) ? -1: 1;
// cache padding
int localColOffset = -1;
int localColOffset = 0;
int globalColOffset;
const int x_global_mod = (x_global == 0 || x_global == RGB_WIDTH - 1) ? -1: 1;
const int y_global_mod = (y_global == 0 || y_global == RGB_HEIGHT - 1) ? -1: 1;
half pv = val_from_10(in, x_global, y_global, black_level);
cached[localOffset] = pv;
// cache padding
if (x_local < 1) {
localColOffset = x_local;
globalColOffset = -1;
cached[(y_local + 1) * localRowLen + x_local] = val_from_10(in, x_global-x_global_mod, y_global, black_level);
} else if (x_local >= get_local_size(0) - 1) {
localColOffset = x_local + 2;
globalColOffset = 1;
cached[localOffset + 1] = val_from_10(in, x_global+x_global_mod, y_global, black_level);
cached[mad24(y_local + 0, localRowLen, x_local + 0)] = val_from_10(in, x_global + 0, y_global + 0, black_level);
cached[mad24(y_local + 0, localRowLen, x_local + 1)] = val_from_10(in, x_global + 1, y_global + 0, black_level);
cached[mad24(y_local + 1, localRowLen, x_local + 0)] = val_from_10(in, x_global + 0, y_global + 1, black_level);
cached[mad24(y_local + 1, localRowLen, x_local + 1)] = val_from_10(in, x_global + 1, y_global + 1, black_level);
if (lid_x == 0) { // left edge
localColOffset = -1;
globalColOffset = -x_global_mod;
cached[mad24(y_local + 0, localRowLen, x_local - 1)] = val_from_10(in, x_global - x_global_mod, y_global + 0, black_level);
cached[mad24(y_local + 1, localRowLen, x_local - 1)] = val_from_10(in, x_global - x_global_mod, y_global + 1, black_level);
} else if (lid_x == get_local_size(0) - 1) { // right edge
localColOffset = 2;
globalColOffset = x_global_mod + 1;
cached[mad24(y_local + 0, localRowLen, x_local + 2)] = val_from_10(in, x_global + x_global_mod + 1, y_global + 0, black_level);
cached[mad24(y_local + 1, localRowLen, x_local + 2)] = val_from_10(in, x_global + x_global_mod + 1, y_global + 1, black_level);
}
if (y_local < 1) {
cached[y_local * localRowLen + x_local + 1] = val_from_10(in, x_global, y_global-y_global_mod, black_level);
if (localColOffset != -1) {
cached[y_local * localRowLen + localColOffset] = val_from_10(in, x_global+(x_global_mod*globalColOffset), y_global-y_global_mod, black_level);
if (lid_y == 0) { // top row
cached[mad24(y_local - 1, localRowLen, x_local + 0)] = val_from_10(in, x_global + 0, y_global - y_global_mod, black_level);
cached[mad24(y_local - 1, localRowLen, x_local + 1)] = val_from_10(in, x_global + 1, y_global - y_global_mod, black_level);
if (localColOffset != 0) { // cache corners
cached[mad24(y_local - 1, localRowLen, x_local + localColOffset)] = val_from_10(in, x_global + globalColOffset, y_global - y_global_mod, black_level);
}
} else if (y_local >= get_local_size(1) - 1) {
cached[(y_local + 2) * localRowLen + x_local + 1] = val_from_10(in, x_global, y_global+y_global_mod, black_level);
if (localColOffset != -1) {
cached[(y_local + 2) * localRowLen + localColOffset] = val_from_10(in, x_global+(x_global_mod*globalColOffset), y_global+y_global_mod, black_level);
} else if (lid_y == get_local_size(1) - 1) { // bottom row
cached[mad24(y_local + 2, localRowLen, x_local + 0)] = val_from_10(in, x_global + 0, y_global + y_global_mod + 1, black_level);
cached[mad24(y_local + 2, localRowLen, x_local + 1)] = val_from_10(in, x_global + 1, y_global + y_global_mod + 1, black_level);
if (localColOffset != 0) { // cache corners
cached[mad24(y_local + 2, localRowLen, x_local + localColOffset)] = val_from_10(in, x_global + globalColOffset, y_global + y_global_mod + 1, black_level);
}
}
// sync
barrier(CLK_LOCAL_MEM_FENCE);
half d1 = cached[localOffset - localRowLen - 1];
half d2 = cached[localOffset - localRowLen + 1];
half d3 = cached[localOffset + localRowLen - 1];
half d4 = cached[localOffset + localRowLen + 1];
half n1 = cached[localOffset - localRowLen];
half n2 = cached[localOffset + 1];
half n3 = cached[localOffset + localRowLen];
half n4 = cached[localOffset - 1];
half3 rgb;
uchar3 rgb_out[4];
// a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf
if (x_global % 2 == 0) {
if (y_global % 2 == 0) {
rgb.y = pv; // G1(R)
half k1 = get_k(d1, pv, d2, pv);
half k2 = get_k(d2, pv, d4, pv);
half k3 = get_k(d3, pv, d4, pv);
half k4 = get_k(d1, pv, d3, pv);
// R_G1
rgb.x = (k2*n2+k4*n4)/(k2+k4);
// B_G1
rgb.z = (k1*n1+k3*n3)/(k1+k3);
} else {
rgb.z = pv; // B
half k1 = get_k(d1, d3, d2, d4);
half k2 = get_k(n1, n4, n2, n3);
half k3 = get_k(d1, d2, d3, d4);
half k4 = get_k(n1, n2, n3, n4);
// G_B
rgb.y = (k1*(n1+n3)*0.5+k3*(n2+n4)*0.5)/(k1+k3);
// R_B
rgb.x = (k2*(d2+d3)*0.5+k4*(d1+d4)*0.5)/(k2+k4);
}
} else {
if (y_global % 2 == 0) {
rgb.x = pv; // R
half k1 = get_k(d1, d3, d2, d4);
half k2 = get_k(n1, n4, n2, n3);
half k3 = get_k(d1, d2, d3, d4);
half k4 = get_k(n1, n2, n3, n4);
// G_R
rgb.y = (k1*(n1+n3)*0.5+k3*(n2+n4)*0.5)/(k1+k3);
// B_R
rgb.z = (k2*(d2+d3)*0.5+k4*(d1+d4)*0.5)/(k2+k4);
} else {
rgb.y = pv; // G2(B)
half k1 = get_k(d1, pv, d2, pv);
half k2 = get_k(d2, pv, d4, pv);
half k3 = get_k(d3, pv, d4, pv);
half k4 = get_k(d1, pv, d3, pv);
// R_G2
rgb.x = (k1*n1+k3*n3)/(k1+k3);
// B_G2
rgb.z = (k2*n2+k4*n4)/(k2+k4);
}
}
const half4 va = vload4(0, cached + mad24(lid_y * 2 + 0, localRowLen, lid_x * 2));
const half4 vb = vload4(0, cached + mad24(lid_y * 2 + 1, localRowLen, lid_x * 2));
const half4 vc = vload4(0, cached + mad24(lid_y * 2 + 2, localRowLen, lid_x * 2));
const half4 vd = vload4(0, cached + mad24(lid_y * 2 + 3, localRowLen, lid_x * 2));
uchar3 rgbc = convert_uchar3_sat(color_correct(clamp(rgb, (half)0.0, (half)1.0)) * 255.0);
out[out_idx + 0] = rgbc.z;
out[out_idx + 1] = rgbc.y;
out[out_idx + 2] = rgbc.x;
// a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf
const half k01 = get_k(va.s0, vb.s1, va.s2, vb.s1);
const half k02 = get_k(va.s2, vb.s1, vc.s2, vb.s1);
const half k03 = get_k(vc.s0, vb.s1, vc.s2, vb.s1);
const half k04 = get_k(va.s0, vb.s1, vc.s0, vb.s1);
rgb.x = (k02*vb.s2+k04*vb.s0)/(k02+k04); // R_G1
rgb.y = vb.s1; // G1(R)
rgb.z = (k01*va.s1+k03*vc.s1)/(k01+k03); // B_G1
rgb_out[0] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
const half k11 = get_k(va.s1, vc.s1, va.s3, vc.s3);
const half k12 = get_k(va.s2, vb.s1, vb.s3, vc.s2);
const half k13 = get_k(va.s1, va.s3, vc.s1, vc.s3);
const half k14 = get_k(va.s2, vb.s3, vc.s2, vb.s1);
rgb.x = vb.s2; // R
rgb.y = (k11*(va.s2+vc.s2)*0.5+k13*(vb.s3+vb.s1)*0.5)/(k11+k13); // G_R
rgb.z = (k12*(va.s3+vc.s1)*0.5+k14*(va.s1+vc.s3)*0.5)/(k12+k14); // B_R
rgb_out[1] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
const half k21 = get_k(vb.s0, vd.s0, vb.s2, vd.s2);
const half k22 = get_k(vb.s1, vc.s0, vc.s2, vd.s1);
const half k23 = get_k(vb.s0, vb.s2, vd.s0, vd.s2);
const half k24 = get_k(vb.s1, vc.s2, vd.s1, vc.s0);
rgb.x = (k22*(vb.s2+vd.s0)*0.5+k24*(vb.s0+vd.s2)*0.5)/(k22+k24); // R_B
rgb.y = (k21*(vb.s1+vd.s1)*0.5+k23*(vc.s2+vc.s0)*0.5)/(k21+k23); // G_B
rgb.z = vc.s1; // B
rgb_out[2] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
const half k31 = get_k(vb.s1, vc.s2, vb.s3, vc.s2);
const half k32 = get_k(vb.s3, vc.s2, vd.s3, vc.s2);
const half k33 = get_k(vd.s1, vc.s2, vd.s3, vc.s2);
const half k34 = get_k(vb.s1, vc.s2, vd.s1, vc.s2);
rgb.x = (k31*vb.s2+k33*vd.s2)/(k31+k33); // R_G2
rgb.y = vc.s2; // G2(B)
rgb.z = (k32*vc.s3+k34*vc.s1)/(k32+k34); // B_G2
rgb_out[3] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
// write ys
uchar2 yy = (uchar2)(
RGB_TO_Y(rgb_out[0].s0, rgb_out[0].s1, rgb_out[0].s2),
RGB_TO_Y(rgb_out[1].s0, rgb_out[1].s1, rgb_out[1].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2, RGB_WIDTH, gid_x * 2));
yy = (uchar2)(
RGB_TO_Y(rgb_out[2].s0, rgb_out[2].s1, rgb_out[2].s2),
RGB_TO_Y(rgb_out[3].s0, rgb_out[3].s1, rgb_out[3].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2 + 1, RGB_WIDTH, gid_x * 2));
// write uvs
const short ar = AVERAGE(rgb_out[0].s0, rgb_out[1].s0, rgb_out[2].s0, rgb_out[3].s0);
const short ag = AVERAGE(rgb_out[0].s1, rgb_out[1].s1, rgb_out[2].s1, rgb_out[3].s1);
const short ab = AVERAGE(rgb_out[0].s2, rgb_out[1].s2, rgb_out[2].s2, rgb_out[3].s2);
out[U_OFFSET + mad24(gid_y, UV_WIDTH, gid_x)] = RGB_TO_U(ar, ag, ab);
out[V_OFFSET + mad24(gid_y, UV_WIDTH, gid_x)] = RGB_TO_V(ar, ag, ab);
}

@ -17,9 +17,9 @@ from selfdrive.manager.process_config import managed_processes
LM_THRESH = 120 # defined in selfdrive/camerad/imgproc/utils.h
VISION_STREAMS = {
"roadCameraState": VisionStreamType.VISION_STREAM_RGB_ROAD,
"driverCameraState": VisionStreamType.VISION_STREAM_RGB_DRIVER,
"wideRoadCameraState": VisionStreamType.VISION_STREAM_RGB_WIDE_ROAD,
"roadCameraState": VisionStreamType.VISION_STREAM_ROAD,
"driverCameraState": VisionStreamType.VISION_STREAM_DRIVER,
"wideRoadCameraState": VisionStreamType.VISION_STREAM_WIDE_ROAD,
}
@ -28,12 +28,28 @@ def jpeg_write(fn, dat):
img.save(fn, "JPEG")
def extract_image(buf, w, h, stride):
img = np.hstack([buf[i * stride:i * stride + 3 * w] for i in range(h)])
b = img[::3].reshape(h, w)
g = img[1::3].reshape(h, w)
r = img[2::3].reshape(h, w)
return np.dstack([r, g, b])
def yuv_to_rgb(y, u, v):
ul = np.repeat(np.repeat(u, 2).reshape(u.shape[0], y.shape[1]), 2, axis=0).reshape(y.shape)
vl = np.repeat(np.repeat(v, 2).reshape(v.shape[0], y.shape[1]), 2, axis=0).reshape(y.shape)
yuv = np.dstack((y, ul, vl)).astype(np.int16)
yuv[:, :, 1:] -= 128
m = np.array([
[1.00000, 1.00000, 1.00000],
[0.00000, -0.39465, 2.03211],
[1.13983, -0.58060, 0.00000],
])
rgb = np.dot(yuv, m)
return rgb.astype(np.uint8)
def extract_image(buf, w, h):
y = np.array(buf[:w*h], dtype=np.uint8).reshape((h, w))
u = np.array(buf[w*h: w*h+(w//2)*(h//2)], dtype=np.uint8).reshape((h//2, w//2))
v = np.array(buf[w*h+(w//2)*(h//2):], dtype=np.uint8).reshape((h//2, w//2))
return yuv_to_rgb(y, u, v)
def rois_in_focus(lapres: List[float]) -> float:
@ -63,10 +79,10 @@ def get_snapshots(frame="roadCameraState", front_frame="driverCameraState", focu
rear, front = None, None
if frame is not None:
c = vipc_clients[frame]
rear = extract_image(c.recv(), c.width, c.height, c.stride)
rear = extract_image(c.recv(), c.width, c.height)
if front_frame is not None:
c = vipc_clients[front_frame]
front = extract_image(c.recv(), c.width, c.height, c.stride)
front = extract_image(c.recv(), c.width, c.height)
return rear, front

@ -19,7 +19,7 @@ class Proc:
warmup: float = 3.
PROCS = [
Proc('camerad', 2.5),
Proc('camerad', 2.17),
Proc('modeld', 0.95),
Proc('dmonitoringmodeld', 0.25),
Proc('loggerd', 0.45, warmup=10.),

@ -10,6 +10,7 @@ from selfdrive.hardware import PC, TICI
from common.basedir import BASEDIR
from selfdrive.test.openpilotci import BASE_URL, get_url
from selfdrive.version import get_commit
from selfdrive.camerad.snapshot.snapshot import yuv_to_rgb
from tools.lib.logreader import LogReader
from tools.lib.filereader import FileReader
@ -68,38 +69,20 @@ def init_kernels(frame_offset=0):
build_args += ' -DHALF_AS_FLOAT=1 -cl-std=CL2.0'
debayer_prg = cl.Program(ctx, f.read()).build(options=build_args)
with open(os.path.join(BASEDIR, 'selfdrive/camerad/transforms/rgb_to_yuv.cl')) as f:
build_args = f' -cl-fast-relaxed-math -cl-denorms-are-zero -DWIDTH={FRAME_WIDTH} -DHEIGHT={FRAME_HEIGHT}' + \
f' -DUV_WIDTH={UV_WIDTH} -DUV_HEIGHT={UV_HEIGHT} -DRGB_STRIDE={FRAME_WIDTH*3}' + \
f' -DRGB_SIZE={FRAME_WIDTH*FRAME_HEIGHT}'
rgb_to_yuv_prg = cl.Program(ctx, f.read()).build(options=build_args)
return ctx, debayer_prg
return ctx, debayer_prg, rgb_to_yuv_prg
def debayer_frame(ctx, debayer_prg, rgb_to_yuv_prg, data, rgb=False):
def debayer_frame(ctx, debayer_prg, data, rgb=False):
q = cl.CommandQueue(ctx)
rgb_buff = np.empty(FRAME_WIDTH * FRAME_HEIGHT * 3, dtype=np.uint8)
yuv_buff = np.empty(FRAME_WIDTH * FRAME_HEIGHT + UV_SIZE * 2, dtype=np.uint8)
cam_g = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
rgb_wg = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, FRAME_WIDTH * FRAME_HEIGHT * 3)
yuv_g = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, FRAME_WIDTH * FRAME_HEIGHT + UV_SIZE * 2)
local_worksize = (16, 16) if TICI else (8, 8)
local_mem = cl.LocalMemory(648 if TICI else 400)
ev1 = debayer_prg.debayer10(q, (FRAME_WIDTH, FRAME_HEIGHT), local_worksize, cam_g, rgb_wg, local_mem, np.float32(42))
cl.enqueue_copy(q, rgb_buff, rgb_wg, wait_for=[ev1]).wait()
cl.enqueue_barrier(q)
rgb_rg = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=rgb_buff)
cl.enqueue_barrier(q)
ev3 = rgb_to_yuv_prg.rgb_to_yuv(q, (FRAME_WIDTH // 4, FRAME_HEIGHT // 4), None, rgb_rg, yuv_g)
cl.enqueue_barrier(q)
cl.enqueue_copy(q, yuv_buff, yuv_g, wait_for=[ev3]).wait()
local_worksize = (20, 20) if TICI else (4, 4)
local_mem = cl.LocalMemory(3528 if TICI else 400)
ev1 = debayer_prg.debayer10(q, (UV_WIDTH, UV_HEIGHT), local_worksize, cam_g, yuv_g, local_mem, np.float32(42))
cl.enqueue_copy(q, yuv_buff, yuv_g, wait_for=[ev1]).wait()
cl.enqueue_barrier(q)
y = yuv_buff[:FRAME_WIDTH*FRAME_HEIGHT].reshape((FRAME_HEIGHT, FRAME_WIDTH))
@ -107,13 +90,13 @@ def debayer_frame(ctx, debayer_prg, rgb_to_yuv_prg, data, rgb=False):
v = yuv_buff[FRAME_WIDTH*FRAME_HEIGHT+UV_SIZE:].reshape((UV_HEIGHT, UV_WIDTH))
if rgb:
return rgb_buff.reshape((FRAME_HEIGHT, FRAME_WIDTH, 3))[:, :, (2, 1, 0)]
return yuv_to_rgb(y, u, v)
else:
return y, u, v
def debayer_replay(lr):
ctx, debayer_prg, rgb_to_yuv_prg = init_kernels()
ctx, debayer_prg = init_kernels()
frames = []
for m in lr:
@ -121,7 +104,7 @@ def debayer_replay(lr):
cs = m.roadCameraState
if cs.image:
data = np.frombuffer(cs.image, dtype=np.uint8)
img = debayer_frame(ctx, debayer_prg, rgb_to_yuv_prg, data)
img = debayer_frame(ctx, debayer_prg, data)
frames.append(img)
@ -173,7 +156,7 @@ if __name__ == "__main__":
frame_diff = np.abs(np.subtract(fr, cmp_f))
diff_len = len(np.nonzero(frame_diff)[0])
if diff_len > 1000:
if diff_len > 10000:
diff += f'different at a large amount of pixels ({diff_len})\n'
else:
diff += 'different at (frame, yuv, pixel, ref, HEAD):\n'

@ -23,7 +23,7 @@ from tools.lib.logreader import LogReader
PROCS = {
"selfdrive.controls.controlsd": 31.0,
"./loggerd": 50.0,
"./camerad": 26.0,
"./camerad": 16.5,
"./locationd": 9.1,
"selfdrive.controls.plannerd": 11.7,
"./_ui": 21.0,
@ -59,7 +59,7 @@ TIMINGS = {
"roadCameraState": [2.5, 0.35],
"driverCameraState": [2.5, 0.35],
"modelV2": [2.5, 0.35],
"driverState": [2.5, 0.35],
"driverState": [2.5, 0.40],
"liveLocationKalman": [2.5, 0.35],
"wideRoadCameraState": [1.5, 0.35],
}

Loading…
Cancel
Save