open source driving agent
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

158 lines
5.9 KiB

#define UV_WIDTH RGB_WIDTH / 2
#define UV_HEIGHT RGB_HEIGHT / 2
#define RGB_TO_Y(r, g, b) ((((mul24(b, 13) + mul24(g, 65) + mul24(r, 33)) + 64) >> 7) + 16)
#define RGB_TO_U(r, g, b) ((mul24(b, 56) - mul24(g, 37) - mul24(r, 19) + 0x8080) >> 8)
#define RGB_TO_V(r, g, b) ((mul24(r, 56) - mul24(g, 47) - mul24(b, 9) + 0x8080) >> 8)
#define AVERAGE(x, y, z, w) ((convert_ushort(x) + convert_ushort(y) + convert_ushort(z) + convert_ushort(w) + 1) >> 1)
float3 color_correct(float3 rgb) {
// color correction
float3 x = rgb.x * (float3)(1.82717181, -0.31231438, 0.07307673);
x += rgb.y * (float3)(-0.5743977, 1.36858544, -0.53183455);
x += rgb.z * (float3)(-0.25277411, -0.05627105, 1.45875782);
// tone mapping params
const float gamma_k = 0.75;
const float gamma_b = 0.125;
const float mp = 0.01; // ideally midpoint should be adaptive
const float rk = 9 - 100*mp;
// poly approximation for s curve
return (x > mp) ?
((rk * (x-mp) * (1-(gamma_k*mp+gamma_b)) * (1+1/(rk*(1-mp))) / (1+rk*(x-mp))) + gamma_k*mp + gamma_b) :
((rk * (x-mp) * (gamma_k*mp+gamma_b) * (1+1/(rk*mp)) / (1-rk*(x-mp))) + gamma_k*mp + gamma_b);
}
float get_vignetting_s(float r) {
if (r < 62500) {
return (1.0f + 0.0000008f*r);
} else if (r < 490000) {
return (0.9625f + 0.0000014f*r);
} else if (r < 1102500) {
return (1.26434f + 0.0000000000016f*r*r);
} else {
return (0.53503625f + 0.0000000000022f*r*r);
}
}
float4 val4_from_12(uchar8 pvs, float gain) {
uint4 parsed = (uint4)(((uint)pvs.s0<<4) + (pvs.s1>>4), // is from the previous 10 bit
((uint)pvs.s2<<4) + (pvs.s4&0xF),
((uint)pvs.s3<<4) + (pvs.s4>>4),
((uint)pvs.s5<<4) + (pvs.s7&0xF));
// normalize and scale
float4 pv = (convert_float4(parsed) - 168.0) / (4096.0 - 168.0);
return clamp(pv*gain, 0.0, 1.0);
}
float get_k(float a, float b, float c, float d) {
return 2.0 - (fabs(a - b) + fabs(c - d));
}
__kernel void debayer10(const __global uchar * in, __global uchar * out)
{
const int gid_x = get_global_id(0);
const int gid_y = get_global_id(1);
const int y_top_mod = (gid_y == 0) ? 2: 0;
const int y_bot_mod = (gid_y == (RGB_HEIGHT/2 - 1)) ? 1: 3;
float3 rgb;
uchar3 rgb_out[4];
int start = (2 * gid_y - 1) * FRAME_STRIDE + (3 * gid_x - 2) + (FRAME_STRIDE * FRAME_OFFSET);
// read in 8x4 chars
uchar8 dat[4];
dat[0] = vload8(0, in + start + FRAME_STRIDE*y_top_mod);
dat[1] = vload8(0, in + start + FRAME_STRIDE*1);
dat[2] = vload8(0, in + start + FRAME_STRIDE*2);
dat[3] = vload8(0, in + start + FRAME_STRIDE*y_bot_mod);
// correct vignetting
#if VIGNETTING
int gx = (gid_x*2 - RGB_WIDTH/2);
int gy = (gid_y*2 - RGB_HEIGHT/2);
const float gain = get_vignetting_s(gx*gx + gy*gy);
#else
const float gain = 1.0;
#endif
// process them to floats
float4 va = val4_from_12(dat[0], gain);
float4 vb = val4_from_12(dat[1], gain);
float4 vc = val4_from_12(dat[2], gain);
float4 vd = val4_from_12(dat[3], gain);
if (gid_x == 0) {
va.s0 = va.s2;
vb.s0 = vb.s2;
vc.s0 = vc.s2;
vd.s0 = vd.s2;
} else if (gid_x == RGB_WIDTH/2 - 1) {
va.s3 = va.s1;
vb.s3 = vb.s1;
vc.s3 = vc.s1;
vd.s3 = vd.s1;
}
// a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf
const float k01 = get_k(va.s0, vb.s1, va.s2, vb.s1);
const float k02 = get_k(va.s2, vb.s1, vc.s2, vb.s1);
const float k03 = get_k(vc.s0, vb.s1, vc.s2, vb.s1);
const float k04 = get_k(va.s0, vb.s1, vc.s0, vb.s1);
rgb.x = (k02*vb.s2+k04*vb.s0)/(k02+k04); // R_G1
rgb.y = vb.s1; // G1(R)
rgb.z = (k01*va.s1+k03*vc.s1)/(k01+k03); // B_G1
rgb_out[0] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
const float k11 = get_k(va.s1, vc.s1, va.s3, vc.s3);
const float k12 = get_k(va.s2, vb.s1, vb.s3, vc.s2);
const float k13 = get_k(va.s1, va.s3, vc.s1, vc.s3);
const float k14 = get_k(va.s2, vb.s3, vc.s2, vb.s1);
rgb.x = vb.s2; // R
rgb.y = (k11*(va.s2+vc.s2)*0.5+k13*(vb.s3+vb.s1)*0.5)/(k11+k13); // G_R
rgb.z = (k12*(va.s3+vc.s1)*0.5+k14*(va.s1+vc.s3)*0.5)/(k12+k14); // B_R
rgb_out[1] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
const float k21 = get_k(vb.s0, vd.s0, vb.s2, vd.s2);
const float k22 = get_k(vb.s1, vc.s0, vc.s2, vd.s1);
const float k23 = get_k(vb.s0, vb.s2, vd.s0, vd.s2);
const float k24 = get_k(vb.s1, vc.s2, vd.s1, vc.s0);
rgb.x = (k22*(vb.s2+vd.s0)*0.5+k24*(vb.s0+vd.s2)*0.5)/(k22+k24); // R_B
rgb.y = (k21*(vb.s1+vd.s1)*0.5+k23*(vc.s2+vc.s0)*0.5)/(k21+k23); // G_B
rgb.z = vc.s1; // B
rgb_out[2] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
const float k31 = get_k(vb.s1, vc.s2, vb.s3, vc.s2);
const float k32 = get_k(vb.s3, vc.s2, vd.s3, vc.s2);
const float k33 = get_k(vd.s1, vc.s2, vd.s3, vc.s2);
const float k34 = get_k(vb.s1, vc.s2, vd.s1, vc.s2);
rgb.x = (k31*vb.s2+k33*vd.s2)/(k31+k33); // R_G2
rgb.y = vc.s2; // G2(B)
rgb.z = (k32*vc.s3+k34*vc.s1)/(k32+k34); // B_G2
rgb_out[3] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
// write ys
uchar2 yy = (uchar2)(
RGB_TO_Y(rgb_out[0].s0, rgb_out[0].s1, rgb_out[0].s2),
RGB_TO_Y(rgb_out[1].s0, rgb_out[1].s1, rgb_out[1].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2, YUV_STRIDE, gid_x * 2));
yy = (uchar2)(
RGB_TO_Y(rgb_out[2].s0, rgb_out[2].s1, rgb_out[2].s2),
RGB_TO_Y(rgb_out[3].s0, rgb_out[3].s1, rgb_out[3].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2 + 1, YUV_STRIDE, gid_x * 2));
// write uvs
const short ar = AVERAGE(rgb_out[0].s0, rgb_out[1].s0, rgb_out[2].s0, rgb_out[3].s0);
const short ag = AVERAGE(rgb_out[0].s1, rgb_out[1].s1, rgb_out[2].s1, rgb_out[3].s1);
const short ab = AVERAGE(rgb_out[0].s2, rgb_out[1].s2, rgb_out[2].s2, rgb_out[3].s2);
uchar2 uv = (uchar2)(
RGB_TO_U(ar, ag, ab),
RGB_TO_V(ar, ag, ab)
);
vstore2(uv, 0, out + UV_OFFSET + mad24(gid_y, YUV_STRIDE, gid_x * 2));
}