openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

294 lines
13 KiB

#include "ar0231_cl.h"
#include "ox03c10_cl.h"
#include "os04c10_cl.h"
#define UV_WIDTH RGB_WIDTH / 2
#define UV_HEIGHT RGB_HEIGHT / 2
#define RGB_TO_Y(r, g, b) ((((mul24(b, 13) + mul24(g, 65) + mul24(r, 33)) + 64) >> 7) + 16)
#define RGB_TO_U(r, g, b) ((mul24(b, 56) - mul24(g, 37) - mul24(r, 19) + 0x8080) >> 8)
#define RGB_TO_V(r, g, b) ((mul24(r, 56) - mul24(g, 47) - mul24(b, 9) + 0x8080) >> 8)
#define AVERAGE(x, y, z, w) ((convert_ushort(x) + convert_ushort(y) + convert_ushort(z) + convert_ushort(w) + 1) >> 1)
#if defined(BGGR)
#define ROW_READ_ORDER (int[]){3, 2, 1, 0}
#define RGB_WRITE_ORDER (int[]){2, 3, 0, 1}
#else
#define ROW_READ_ORDER (int[]){0, 1, 2, 3}
#define RGB_WRITE_ORDER (int[]){0, 1, 2, 3}
#endif
float get_vignetting_s(float r) {
#if IS_OS
r = r / 2.2545f;
#endif
if (r < 62500) {
return (1.0f + 0.0000008f*r);
} else if (r < 490000) {
return (0.9625f + 0.0000014f*r);
} else if (r < 1102500) {
return (1.26434f + 0.0000000000016f*r*r);
} else {
return (0.53503625f + 0.0000000000022f*r*r);
}
}
float4 val4_from_12(uchar8 pvs, float gain) {
uint4 parsed = (uint4)(((uint)pvs.s0<<4) + (pvs.s1>>4), // is from the previous 10 bit
((uint)pvs.s2<<4) + (pvs.s4&0xF),
((uint)pvs.s3<<4) + (pvs.s4>>4),
((uint)pvs.s5<<4) + (pvs.s7&0xF));
#if IS_OX
// PWL
//float4 pv = (convert_float4(parsed) - 64.0) / (4096.0 - 64.0);
float4 pv = {ox03c10_lut[parsed.s0], ox03c10_lut[parsed.s1], ox03c10_lut[parsed.s2], ox03c10_lut[parsed.s3]};
// it's a 24 bit signal, center in the middle 8 bits
return clamp(pv*gain*256.0, 0.0, 1.0);
#else // AR
// normalize and scale
float4 pv = (convert_float4(parsed) - 168.0) / (4096.0 - 168.0);
return clamp(pv*gain, 0.0, 1.0);
#endif
}
float4 val4_from_10(uchar8 pvs, uchar ext, bool aligned, float gain) {
uint4 parsed;
if (aligned) {
parsed = (uint4)(((uint)pvs.s0 << 2) + (pvs.s1 & 0b00000011),
((uint)pvs.s2 << 2) + ((pvs.s6 & 0b11000000) / 64),
((uint)pvs.s3 << 2) + ((pvs.s6 & 0b00110000) / 16),
((uint)pvs.s4 << 2) + ((pvs.s6 & 0b00001100) / 4));
} else {
parsed = (uint4)(((uint)pvs.s0 << 2) + ((pvs.s3 & 0b00110000) / 16),
((uint)pvs.s1 << 2) + ((pvs.s3 & 0b00001100) / 4),
((uint)pvs.s2 << 2) + ((pvs.s3 & 0b00000011)),
((uint)pvs.s4 << 2) + ((ext & 0b11000000) / 64));
}
float4 pv = (convert_float4(parsed) - 64.0) / (1024.0 - 64.0);
return clamp(pv*gain, 0.0, 1.0);
}
float combine_pvs(float lv, float sv, int expo) {
float svc = fmax(sv * expo, 61376.0);
float svd = sv * fmin(expo, 8.0) / 8;
if (expo > 64) {
if (lv < 959) {
return lv / (65536.0 - 64.0);
} else {
return (svc / 64) / (65536.0 - 64.0);
}
} else {
if (lv > 32) {
return (lv * 64 / fmax(expo, 8.0)) / (65536.0 - 64.0);
} else {
return svd / (65536.0 - 64.0);
}
}
}
float4 val4_from_10x2(uchar8 long_pvs, uchar long_ext, uchar8 short_pvs, uchar short_ext, bool aligned, float gain, int expo) {
int8 parsed;
if (aligned) {
parsed = (int8)(((int)long_pvs.s0 << 2) + (long_pvs.s1 & 0b00000011),
((int)long_pvs.s2 << 2) + ((long_pvs.s6 & 0b11000000) / 64),
((int)long_pvs.s3 << 2) + ((long_pvs.s6 & 0b00110000) / 16),
((int)long_pvs.s4 << 2) + ((long_pvs.s6 & 0b00001100) / 4),
((int)short_pvs.s0 << 2) + (short_pvs.s1 & 0b00000011),
((int)short_pvs.s2 << 2) + ((short_pvs.s6 & 0b11000000) / 64),
((int)short_pvs.s3 << 2) + ((short_pvs.s6 & 0b00110000) / 16),
((int)short_pvs.s4 << 2) + ((short_pvs.s6 & 0b00001100) / 4));
} else {
parsed = (int8)(((int)long_pvs.s0 << 2) + ((long_pvs.s3 & 0b00110000) / 16),
((int)long_pvs.s1 << 2) + ((long_pvs.s3 & 0b00001100) / 4),
((int)long_pvs.s2 << 2) + ((long_pvs.s3 & 0b00000011)),
((int)long_pvs.s4 << 2) + ((long_ext & 0b11000000) / 64),
((int)short_pvs.s0 << 2) + ((short_pvs.s3 & 0b00110000) / 16),
((int)short_pvs.s1 << 2) + ((short_pvs.s3 & 0b00001100) / 4),
((int)short_pvs.s2 << 2) + ((short_pvs.s3 & 0b00000011)),
((int)short_pvs.s4 << 2) + ((short_ext & 0b11000000) / 64));
}
float8 pf = convert_float8(parsed - 64);
float4 pv;
pv.s0 = combine_pvs(pf.s0, pf.s4, expo);
pv.s1 = combine_pvs(pf.s1, pf.s5, expo);
pv.s2 = combine_pvs(pf.s2, pf.s6, expo);
pv.s3 = combine_pvs(pf.s3, pf.s7, expo);
return clamp(pv*gain, 0.0, 1.0);
}
float get_k(float a, float b, float c, float d) {
return 2.0 - (fabs(a - b) + fabs(c - d));
}
__kernel void process_raw(const __global uchar * in, __global uchar * out, int expo_time)
{
const int gid_x = get_global_id(0);
const int gid_y = get_global_id(1);
const int row_before_offset = (gid_y == 0) ? 2 : 0;
const int row_after_offset = (gid_y == (RGB_HEIGHT/2 - 1)) ? 1 : 3;
float3 rgb;
uchar3 rgb_out[4];
int start_idx;
#if IS_10BIT
bool aligned10;
if (gid_x % 2 == 0) {
aligned10 = true;
start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (5 * gid_x / 2 - 2) + (FRAME_STRIDE * FRAME_OFFSET);
} else {
aligned10 = false;
start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (5 * (gid_x - 1) / 2 + 1) + (FRAME_STRIDE * FRAME_OFFSET);
}
#else
start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (3 * gid_x - 2) + (FRAME_STRIDE * FRAME_OFFSET);
#endif
// read in 8x4 chars
uchar8 dat[4];
dat[0] = vload8(0, in + start_idx + FRAME_STRIDE*row_before_offset);
if (gid_x == 0 && gid_y == 0) {
// this wasn't a problem due to extra rows
dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*1 + 2);
dat[1] = (uchar8)(0, 0, dat[1].s0, dat[1].s1, dat[1].s2, dat[1].s3, dat[1].s4, dat[1].s5);
} else {
dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*1);
}
dat[2] = vload8(0, in + start_idx + FRAME_STRIDE*2);
dat[3] = vload8(0, in + start_idx + FRAME_STRIDE*row_after_offset);
#if IS_HDR
uchar8 short_dat[4];
short_dat[0] = vload8(0, in + start_idx + FRAME_STRIDE*(row_before_offset+HDR_OFFSET/2) + FRAME_STRIDE/2);
short_dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*(1+HDR_OFFSET/2) + FRAME_STRIDE/2);
short_dat[2] = vload8(0, in + start_idx + FRAME_STRIDE*(2+HDR_OFFSET/2) + FRAME_STRIDE/2);
short_dat[3] = vload8(0, in + start_idx + FRAME_STRIDE*(row_after_offset+HDR_OFFSET/2) + FRAME_STRIDE/2);
#endif
// need extra bit for 10-bit
#if IS_10BIT
uchar extra[4];
if (!aligned10) {
extra[0] = in[start_idx + FRAME_STRIDE*row_before_offset + 8];
extra[1] = in[start_idx + FRAME_STRIDE*1 + 8];
extra[2] = in[start_idx + FRAME_STRIDE*2 + 8];
extra[3] = in[start_idx + FRAME_STRIDE*row_after_offset + 8];
}
#if IS_HDR
uchar short_extra[4];
if (!aligned10) {
short_extra[0] = in[start_idx + FRAME_STRIDE*(row_before_offset+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8];
short_extra[1] = in[start_idx + FRAME_STRIDE*(1+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8];
short_extra[2] = in[start_idx + FRAME_STRIDE*(2+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8];
short_extra[3] = in[start_idx + FRAME_STRIDE*(row_after_offset+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8];
}
#endif
#endif
// correct vignetting
#if VIGNETTING
int gx = (gid_x*2 - RGB_WIDTH/2);
int gy = (gid_y*2 - RGB_HEIGHT/2);
const float gain = get_vignetting_s(gx*gx + gy*gy);
#else
const float gain = 1.0;
#endif
float4 v_rows[4];
// parse into floats
#if IS_10BIT
#if IS_HDR
v_rows[ROW_READ_ORDER[0]] = val4_from_10x2(dat[0], extra[0], short_dat[0], short_extra[0], aligned10, gain, expo_time);
v_rows[ROW_READ_ORDER[1]] = val4_from_10x2(dat[1], extra[1], short_dat[1], short_extra[1], aligned10, gain, expo_time);
v_rows[ROW_READ_ORDER[2]] = val4_from_10x2(dat[2], extra[2], short_dat[2], short_extra[2], aligned10, gain, expo_time);
v_rows[ROW_READ_ORDER[3]] = val4_from_10x2(dat[3], extra[3], short_dat[3], short_extra[3], aligned10, gain, expo_time);
#else
v_rows[ROW_READ_ORDER[0]] = val4_from_10(dat[0], extra[0], aligned10, gain);
v_rows[ROW_READ_ORDER[1]] = val4_from_10(dat[1], extra[1], aligned10, gain);
v_rows[ROW_READ_ORDER[2]] = val4_from_10(dat[2], extra[2], aligned10, gain);
v_rows[ROW_READ_ORDER[3]] = val4_from_10(dat[3], extra[3], aligned10, gain);
#endif
#else
v_rows[ROW_READ_ORDER[0]] = val4_from_12(dat[0], gain);
v_rows[ROW_READ_ORDER[1]] = val4_from_12(dat[1], gain);
v_rows[ROW_READ_ORDER[2]] = val4_from_12(dat[2], gain);
v_rows[ROW_READ_ORDER[3]] = val4_from_12(dat[3], gain);
#endif
// mirror padding
if (gid_x == 0) {
v_rows[0].s0 = v_rows[0].s2;
v_rows[1].s0 = v_rows[1].s2;
v_rows[2].s0 = v_rows[2].s2;
v_rows[3].s0 = v_rows[3].s2;
} else if (gid_x == RGB_WIDTH/2 - 1) {
v_rows[0].s3 = v_rows[0].s1;
v_rows[1].s3 = v_rows[1].s1;
v_rows[2].s3 = v_rows[2].s1;
v_rows[3].s3 = v_rows[3].s1;
}
// a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf
const float k01 = get_k(v_rows[0].s0, v_rows[1].s1, v_rows[0].s2, v_rows[1].s1);
const float k02 = get_k(v_rows[0].s2, v_rows[1].s1, v_rows[2].s2, v_rows[1].s1);
const float k03 = get_k(v_rows[2].s0, v_rows[1].s1, v_rows[2].s2, v_rows[1].s1);
const float k04 = get_k(v_rows[0].s0, v_rows[1].s1, v_rows[2].s0, v_rows[1].s1);
rgb.x = (k02*v_rows[1].s2+k04*v_rows[1].s0)/(k02+k04); // R_G1
rgb.y = v_rows[1].s1; // G1(R)
rgb.z = (k01*v_rows[0].s1+k03*v_rows[2].s1)/(k01+k03); // B_G1
rgb_out[RGB_WRITE_ORDER[0]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb, 0.0, 1.0)), expo_time) * 255.0);
const float k11 = get_k(v_rows[0].s1, v_rows[2].s1, v_rows[0].s3, v_rows[2].s3);
const float k12 = get_k(v_rows[0].s2, v_rows[1].s1, v_rows[1].s3, v_rows[2].s2);
const float k13 = get_k(v_rows[0].s1, v_rows[0].s3, v_rows[2].s1, v_rows[2].s3);
const float k14 = get_k(v_rows[0].s2, v_rows[1].s3, v_rows[2].s2, v_rows[1].s1);
rgb.x = v_rows[1].s2; // R
rgb.y = (k11*(v_rows[0].s2+v_rows[2].s2)*0.5+k13*(v_rows[1].s3+v_rows[1].s1)*0.5)/(k11+k13); // G_R
rgb.z = (k12*(v_rows[0].s3+v_rows[2].s1)*0.5+k14*(v_rows[0].s1+v_rows[2].s3)*0.5)/(k12+k14); // B_R
rgb_out[RGB_WRITE_ORDER[1]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb, 0.0, 1.0)), expo_time) * 255.0);
const float k21 = get_k(v_rows[1].s0, v_rows[3].s0, v_rows[1].s2, v_rows[3].s2);
const float k22 = get_k(v_rows[1].s1, v_rows[2].s0, v_rows[2].s2, v_rows[3].s1);
const float k23 = get_k(v_rows[1].s0, v_rows[1].s2, v_rows[3].s0, v_rows[3].s2);
const float k24 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[3].s1, v_rows[2].s0);
rgb.x = (k22*(v_rows[1].s2+v_rows[3].s0)*0.5+k24*(v_rows[1].s0+v_rows[3].s2)*0.5)/(k22+k24); // R_B
rgb.y = (k21*(v_rows[1].s1+v_rows[3].s1)*0.5+k23*(v_rows[2].s2+v_rows[2].s0)*0.5)/(k21+k23); // G_B
rgb.z = v_rows[2].s1; // B
rgb_out[RGB_WRITE_ORDER[2]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb, 0.0, 1.0)), expo_time) * 255.0);
const float k31 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[1].s3, v_rows[2].s2);
const float k32 = get_k(v_rows[1].s3, v_rows[2].s2, v_rows[3].s3, v_rows[2].s2);
const float k33 = get_k(v_rows[3].s1, v_rows[2].s2, v_rows[3].s3, v_rows[2].s2);
const float k34 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[3].s1, v_rows[2].s2);
rgb.x = (k31*v_rows[1].s2+k33*v_rows[3].s2)/(k31+k33); // R_G2
rgb.y = v_rows[2].s2; // G2(B)
rgb.z = (k32*v_rows[2].s3+k34*v_rows[2].s1)/(k32+k34); // B_G2
rgb_out[RGB_WRITE_ORDER[3]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb, 0.0, 1.0)), expo_time) * 255.0);
// write ys
uchar2 yy = (uchar2)(
RGB_TO_Y(rgb_out[0].s0, rgb_out[0].s1, rgb_out[0].s2),
RGB_TO_Y(rgb_out[1].s0, rgb_out[1].s1, rgb_out[1].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2, YUV_STRIDE, gid_x * 2));
yy = (uchar2)(
RGB_TO_Y(rgb_out[2].s0, rgb_out[2].s1, rgb_out[2].s2),
RGB_TO_Y(rgb_out[3].s0, rgb_out[3].s1, rgb_out[3].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2 + 1, YUV_STRIDE, gid_x * 2));
// write uvs
const short ar = AVERAGE(rgb_out[0].s0, rgb_out[1].s0, rgb_out[2].s0, rgb_out[3].s0);
const short ag = AVERAGE(rgb_out[0].s1, rgb_out[1].s1, rgb_out[2].s1, rgb_out[3].s1);
const short ab = AVERAGE(rgb_out[0].s2, rgb_out[1].s2, rgb_out[2].s2, rgb_out[3].s2);
uchar2 uv = (uchar2)(
RGB_TO_U(ar, ag, ab),
RGB_TO_V(ar, ag, ab)
);
vstore2(uv, 0, out + UV_OFFSET + mad24(gid_y, YUV_STRIDE, gid_x * 2));
}