openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

267 lines
12 KiB

1 year ago
#include "ar0231_cl.h"
#include "ox03c10_cl.h"
#include "os04c10_cl.h"
#define UV_WIDTH RGB_WIDTH / 2
#define UV_HEIGHT RGB_HEIGHT / 2
#define RGB_TO_Y(r, g, b) ((((mul24(b, 13) + mul24(g, 65) + mul24(r, 33)) + 64) >> 7) + 16)
#define RGB_TO_U(r, g, b) ((mul24(b, 56) - mul24(g, 37) - mul24(r, 19) + 0x8080) >> 8)
#define RGB_TO_V(r, g, b) ((mul24(r, 56) - mul24(g, 47) - mul24(b, 9) + 0x8080) >> 8)
#define AVERAGE(x, y, z, w) ((convert_ushort(x) + convert_ushort(y) + convert_ushort(z) + convert_ushort(w) + 1) >> 1)
1 year ago
#if defined(BGGR)
#define ROW_READ_ORDER (int[]){3, 2, 1, 0}
#define RGB_WRITE_ORDER (int[]){2, 3, 0, 1}
#else
#define ROW_READ_ORDER (int[]){0, 1, 2, 3}
#define RGB_WRITE_ORDER (int[]){0, 1, 2, 3}
#endif
float get_vignetting_s(float r) {
#if IS_OS
r = r / 2.2545f;
#endif
if (r < 62500) {
return (1.0f + 0.0000008f*r);
} else if (r < 490000) {
return (0.9625f + 0.0000014f*r);
} else if (r < 1102500) {
return (1.26434f + 0.0000000000016f*r*r);
} else {
return (0.53503625f + 0.0000000000022f*r*r);
}
}
float4 val4_from_12(uchar8 pvs, float gain) {
uint4 parsed = (uint4)(((uint)pvs.s0<<4) + (pvs.s1>>4), // is from the previous 10 bit
((uint)pvs.s2<<4) + (pvs.s4&0xF),
((uint)pvs.s3<<4) + (pvs.s4>>4),
((uint)pvs.s5<<4) + (pvs.s7&0xF));
port OX03C10 (#25161) * start porting OS04C10 * something * is the pll why this doesn't work * understand plls, still doesn't work * work * weee something * working? * timing * encoderd works with less cameras * 1928 * flip image * color balance * adjust black level * extra height maybe * cleanups * clean up sensor i2c * sensors * approx 20fps * remove mirror * 50ms, no fsin yet * remove imx390 support * some attempt at AE * linearize the response * OX changes (#25697) * organize * wb * best effort but probably messed up by artifacts * multicam sync Co-authored-by: Comma Device <device@comma.ai> * clean up * more * fix multiexposure * start porting OS04C10 * something * is the pll why this doesn't work * understand plls, still doesn't work * work * weee something * working? * timing * encoderd works with less cameras * 1928 * flip image * color balance * adjust black level * extra height maybe * cleanups * clean up sensor i2c * sensors * approx 20fps * remove mirror * 50ms, no fsin yet * remove imx390 support * some attempt at AE * linearize the response * OX changes (#25697) * organize * wb * best effort but probably messed up by artifacts * multicam sync Co-authored-by: Comma Device <device@comma.ai> * clean up * more * fix multiexposure * unbroken AE * oops * sort of good AE * both should work 1 * open then init * fix debayer * rename * fix test build Co-authored-by: Comma Device <device@comma.ai> Co-authored-by: ZwX1616 <zwx1616@gmail.com> Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
3 years ago
#if IS_OX
// PWL
//float4 pv = (convert_float4(parsed) - 64.0) / (4096.0 - 64.0);
float4 pv = {ox03c10_lut[parsed.s0], ox03c10_lut[parsed.s1], ox03c10_lut[parsed.s2], ox03c10_lut[parsed.s3]};
// it's a 24 bit signal, center in the middle 8 bits
return clamp(pv*gain*256.0, 0.0, 1.0);
port OX03C10 (#25161) * start porting OS04C10 * something * is the pll why this doesn't work * understand plls, still doesn't work * work * weee something * working? * timing * encoderd works with less cameras * 1928 * flip image * color balance * adjust black level * extra height maybe * cleanups * clean up sensor i2c * sensors * approx 20fps * remove mirror * 50ms, no fsin yet * remove imx390 support * some attempt at AE * linearize the response * OX changes (#25697) * organize * wb * best effort but probably messed up by artifacts * multicam sync Co-authored-by: Comma Device <device@comma.ai> * clean up * more * fix multiexposure * start porting OS04C10 * something * is the pll why this doesn't work * understand plls, still doesn't work * work * weee something * working? * timing * encoderd works with less cameras * 1928 * flip image * color balance * adjust black level * extra height maybe * cleanups * clean up sensor i2c * sensors * approx 20fps * remove mirror * 50ms, no fsin yet * remove imx390 support * some attempt at AE * linearize the response * OX changes (#25697) * organize * wb * best effort but probably messed up by artifacts * multicam sync Co-authored-by: Comma Device <device@comma.ai> * clean up * more * fix multiexposure * unbroken AE * oops * sort of good AE * both should work 1 * open then init * fix debayer * rename * fix test build Co-authored-by: Comma Device <device@comma.ai> Co-authored-by: ZwX1616 <zwx1616@gmail.com> Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
3 years ago
#else // AR
// normalize and scale
float4 pv = (convert_float4(parsed) - 168.0) / (4096.0 - 168.0);
return clamp(pv*gain, 0.0, 1.0);
port OX03C10 (#25161) * start porting OS04C10 * something * is the pll why this doesn't work * understand plls, still doesn't work * work * weee something * working? * timing * encoderd works with less cameras * 1928 * flip image * color balance * adjust black level * extra height maybe * cleanups * clean up sensor i2c * sensors * approx 20fps * remove mirror * 50ms, no fsin yet * remove imx390 support * some attempt at AE * linearize the response * OX changes (#25697) * organize * wb * best effort but probably messed up by artifacts * multicam sync Co-authored-by: Comma Device <device@comma.ai> * clean up * more * fix multiexposure * start porting OS04C10 * something * is the pll why this doesn't work * understand plls, still doesn't work * work * weee something * working? * timing * encoderd works with less cameras * 1928 * flip image * color balance * adjust black level * extra height maybe * cleanups * clean up sensor i2c * sensors * approx 20fps * remove mirror * 50ms, no fsin yet * remove imx390 support * some attempt at AE * linearize the response * OX changes (#25697) * organize * wb * best effort but probably messed up by artifacts * multicam sync Co-authored-by: Comma Device <device@comma.ai> * clean up * more * fix multiexposure * unbroken AE * oops * sort of good AE * both should work 1 * open then init * fix debayer * rename * fix test build Co-authored-by: Comma Device <device@comma.ai> Co-authored-by: ZwX1616 <zwx1616@gmail.com> Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
3 years ago
#endif
}
1 year ago
float combine_pvs(float lv, float sv, int expo) {
1 year ago
float svc = fmax(sv * expo, 61376.0);
float svd = sv * fmin(expo, 8.0) / 8;
1 year ago
if (expo > 64) {
1 year ago
if (lv < 959) {
return lv / (65536.0 - 64.0);
1 year ago
} else {
1 year ago
return (svc / 64) / (65536.0 - 64.0);
1 year ago
}
} else {
1 year ago
if (lv > 32) {
return (lv * 64 / fmax(expo, 8.0)) / (65536.0 - 64.0);
1 year ago
} else {
1 year ago
return svd / (65536.0 - 64.0);
1 year ago
}
}
}
1 year ago
float4 val4_from_10x2(uchar8 long_pvs, uchar long_ext, uchar8 short_pvs, uchar short_ext, bool aligned, float gain, int expo) {
1 year ago
int8 parsed;
1 year ago
if (aligned) {
1 year ago
parsed = (int8)(((int)long_pvs.s0 << 2) + (long_pvs.s1 & 0b00000011),
1 year ago
((int)long_pvs.s2 << 2) + ((long_pvs.s6 & 0b11000000) / 64),
((int)long_pvs.s3 << 2) + ((long_pvs.s6 & 0b00110000) / 16),
1 year ago
((int)long_pvs.s4 << 2) + ((long_pvs.s6 & 0b00001100) / 4),
((int)short_pvs.s0 << 2) + (short_pvs.s1 & 0b00000011),
1 year ago
((int)short_pvs.s2 << 2) + ((short_pvs.s6 & 0b11000000) / 64),
((int)short_pvs.s3 << 2) + ((short_pvs.s6 & 0b00110000) / 16),
((int)short_pvs.s4 << 2) + ((short_pvs.s6 & 0b00001100) / 4));
1 year ago
} else {
1 year ago
parsed = (int8)(((int)long_pvs.s0 << 2) + ((long_pvs.s3 & 0b00110000) / 16),
1 year ago
((int)long_pvs.s1 << 2) + ((long_pvs.s3 & 0b00001100) / 4),
((int)long_pvs.s2 << 2) + ((long_pvs.s3 & 0b00000011)),
1 year ago
((int)long_pvs.s4 << 2) + ((long_ext & 0b11000000) / 64),
((int)short_pvs.s0 << 2) + ((short_pvs.s3 & 0b00110000) / 16),
1 year ago
((int)short_pvs.s1 << 2) + ((short_pvs.s3 & 0b00001100) / 4),
((int)short_pvs.s2 << 2) + ((short_pvs.s3 & 0b00000011)),
((int)short_pvs.s4 << 2) + ((short_ext & 0b11000000) / 64));
1 year ago
}
1 year ago
float8 pf = convert_float8(parsed - 64);
1 year ago
float4 pv;
1 year ago
pv.s0 = combine_pvs(pf.s0, pf.s4, expo);
pv.s1 = combine_pvs(pf.s1, pf.s5, expo);
pv.s2 = combine_pvs(pf.s2, pf.s6, expo);
pv.s3 = combine_pvs(pf.s3, pf.s7, expo);
1 year ago
return clamp(pv*gain, 0.0, 1.0);
}
float get_k(float a, float b, float c, float d) {
return 2.0 - (fabs(a - b) + fabs(c - d));
}
__kernel void process_raw(const __global uchar * in, __global uchar * out, int expo_time)
{
const int gid_x = get_global_id(0);
const int gid_y = get_global_id(1);
const int row_before_offset = (gid_y == 0) ? 2 : 0;
const int row_after_offset = (gid_y == (RGB_HEIGHT/2 - 1)) ? 1 : 3;
1 year ago
float3 rgb_tmp;
uchar3 rgb_out[4]; // output is 2x2 window
int start_idx;
1 year ago
#if BIT_DEPTH == 10
bool aligned10;
if (gid_x % 2 == 0) {
aligned10 = true;
start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (5 * gid_x / 2 - 2) + (FRAME_STRIDE * FRAME_OFFSET);
} else {
aligned10 = false;
start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (5 * (gid_x - 1) / 2 + 1) + (FRAME_STRIDE * FRAME_OFFSET);
}
#else
start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (3 * gid_x - 2) + (FRAME_STRIDE * FRAME_OFFSET);
#endif
// read in 8x4 chars
uchar8 dat[4];
dat[0] = vload8(0, in + start_idx + FRAME_STRIDE*row_before_offset);
if (gid_x == 0 && gid_y == 0) {
// this wasn't a problem due to extra rows
dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*1 + 2);
dat[1] = (uchar8)(0, 0, dat[1].s0, dat[1].s1, dat[1].s2, dat[1].s3, dat[1].s4, dat[1].s5);
} else {
dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*1);
}
dat[2] = vload8(0, in + start_idx + FRAME_STRIDE*2);
dat[3] = vload8(0, in + start_idx + FRAME_STRIDE*row_after_offset);
1 year ago
#if HDR_OFFSET > 0
1 year ago
uchar8 short_dat[4];
1 year ago
short_dat[0] = vload8(0, in + start_idx + FRAME_STRIDE*(row_before_offset+HDR_OFFSET/2) + FRAME_STRIDE/2);
short_dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*(1+HDR_OFFSET/2) + FRAME_STRIDE/2);
short_dat[2] = vload8(0, in + start_idx + FRAME_STRIDE*(2+HDR_OFFSET/2) + FRAME_STRIDE/2);
short_dat[3] = vload8(0, in + start_idx + FRAME_STRIDE*(row_after_offset+HDR_OFFSET/2) + FRAME_STRIDE/2);
1 year ago
#endif
// need extra bit for 10-bit
1 year ago
#if BIT_DEPTH == 10
uchar extra[4];
1 year ago
uchar short_extra[4];
if (!aligned10) {
extra[0] = in[start_idx + FRAME_STRIDE*row_before_offset + 8];
extra[1] = in[start_idx + FRAME_STRIDE*1 + 8];
extra[2] = in[start_idx + FRAME_STRIDE*2 + 8];
extra[3] = in[start_idx + FRAME_STRIDE*row_after_offset + 8];
1 year ago
short_extra[0] = in[start_idx + FRAME_STRIDE*(row_before_offset+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8];
short_extra[1] = in[start_idx + FRAME_STRIDE*(1+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8];
short_extra[2] = in[start_idx + FRAME_STRIDE*(2+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8];
short_extra[3] = in[start_idx + FRAME_STRIDE*(row_after_offset+HDR_OFFSET/2) + FRAME_STRIDE/2 + 8];
}
#endif
// correct vignetting
#if VIGNETTING
int gx = (gid_x*2 - RGB_WIDTH/2);
int gy = (gid_y*2 - RGB_HEIGHT/2);
const float gain = get_vignetting_s(gx*gx + gy*gy);
#else
const float gain = 1.0;
#endif
float4 v_rows[4];
// parse into floats
1 year ago
#if BIT_DEPTH == 10
1 year ago
v_rows[ROW_READ_ORDER[0]] = val4_from_10x2(dat[0], extra[0], short_dat[0], short_extra[0], aligned10, gain, expo_time);
v_rows[ROW_READ_ORDER[1]] = val4_from_10x2(dat[1], extra[1], short_dat[1], short_extra[1], aligned10, gain, expo_time);
v_rows[ROW_READ_ORDER[2]] = val4_from_10x2(dat[2], extra[2], short_dat[2], short_extra[2], aligned10, gain, expo_time);
v_rows[ROW_READ_ORDER[3]] = val4_from_10x2(dat[3], extra[3], short_dat[3], short_extra[3], aligned10, gain, expo_time);
#else
1 year ago
v_rows[ROW_READ_ORDER[0]] = val4_from_12(dat[0], gain);
v_rows[ROW_READ_ORDER[1]] = val4_from_12(dat[1], gain);
v_rows[ROW_READ_ORDER[2]] = val4_from_12(dat[2], gain);
v_rows[ROW_READ_ORDER[3]] = val4_from_12(dat[3], gain);
#endif
// mirror padding
if (gid_x == 0) {
v_rows[0].s0 = v_rows[0].s2;
v_rows[1].s0 = v_rows[1].s2;
v_rows[2].s0 = v_rows[2].s2;
v_rows[3].s0 = v_rows[3].s2;
} else if (gid_x == RGB_WIDTH/2 - 1) {
v_rows[0].s3 = v_rows[0].s1;
v_rows[1].s3 = v_rows[1].s1;
v_rows[2].s3 = v_rows[2].s1;
v_rows[3].s3 = v_rows[3].s1;
}
// a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf
const float k01 = get_k(v_rows[0].s0, v_rows[1].s1, v_rows[0].s2, v_rows[1].s1);
const float k02 = get_k(v_rows[0].s2, v_rows[1].s1, v_rows[2].s2, v_rows[1].s1);
const float k03 = get_k(v_rows[2].s0, v_rows[1].s1, v_rows[2].s2, v_rows[1].s1);
const float k04 = get_k(v_rows[0].s0, v_rows[1].s1, v_rows[2].s0, v_rows[1].s1);
1 year ago
rgb_tmp.x = (k02*v_rows[1].s2+k04*v_rows[1].s0)/(k02+k04); // R_G1
rgb_tmp.y = v_rows[1].s1; // G1(R)
rgb_tmp.z = (k01*v_rows[0].s1+k03*v_rows[2].s1)/(k01+k03); // B_G1
rgb_out[RGB_WRITE_ORDER[0]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb_tmp, 0.0, 1.0)), expo_time) * 255.0);
const float k11 = get_k(v_rows[0].s1, v_rows[2].s1, v_rows[0].s3, v_rows[2].s3);
const float k12 = get_k(v_rows[0].s2, v_rows[1].s1, v_rows[1].s3, v_rows[2].s2);
const float k13 = get_k(v_rows[0].s1, v_rows[0].s3, v_rows[2].s1, v_rows[2].s3);
const float k14 = get_k(v_rows[0].s2, v_rows[1].s3, v_rows[2].s2, v_rows[1].s1);
1 year ago
rgb_tmp.x = v_rows[1].s2; // R
rgb_tmp.y = (k11*(v_rows[0].s2+v_rows[2].s2)*0.5+k13*(v_rows[1].s3+v_rows[1].s1)*0.5)/(k11+k13); // G_R
rgb_tmp.z = (k12*(v_rows[0].s3+v_rows[2].s1)*0.5+k14*(v_rows[0].s1+v_rows[2].s3)*0.5)/(k12+k14); // B_R
rgb_out[RGB_WRITE_ORDER[1]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb_tmp, 0.0, 1.0)), expo_time) * 255.0);
const float k21 = get_k(v_rows[1].s0, v_rows[3].s0, v_rows[1].s2, v_rows[3].s2);
const float k22 = get_k(v_rows[1].s1, v_rows[2].s0, v_rows[2].s2, v_rows[3].s1);
const float k23 = get_k(v_rows[1].s0, v_rows[1].s2, v_rows[3].s0, v_rows[3].s2);
const float k24 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[3].s1, v_rows[2].s0);
1 year ago
rgb_tmp.x = (k22*(v_rows[1].s2+v_rows[3].s0)*0.5+k24*(v_rows[1].s0+v_rows[3].s2)*0.5)/(k22+k24); // R_B
rgb_tmp.y = (k21*(v_rows[1].s1+v_rows[3].s1)*0.5+k23*(v_rows[2].s2+v_rows[2].s0)*0.5)/(k21+k23); // G_B
rgb_tmp.z = v_rows[2].s1; // B
rgb_out[RGB_WRITE_ORDER[2]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb_tmp, 0.0, 1.0)), expo_time) * 255.0);
const float k31 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[1].s3, v_rows[2].s2);
const float k32 = get_k(v_rows[1].s3, v_rows[2].s2, v_rows[3].s3, v_rows[2].s2);
const float k33 = get_k(v_rows[3].s1, v_rows[2].s2, v_rows[3].s3, v_rows[2].s2);
const float k34 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[3].s1, v_rows[2].s2);
1 year ago
rgb_tmp.x = (k31*v_rows[1].s2+k33*v_rows[3].s2)/(k31+k33); // R_G2
rgb_tmp.y = v_rows[2].s2; // G2(B)
rgb_tmp.z = (k32*v_rows[2].s3+k34*v_rows[2].s1)/(k32+k34); // B_G2
rgb_out[RGB_WRITE_ORDER[3]] = convert_uchar3_sat(apply_gamma(color_correct(clamp(rgb_tmp, 0.0, 1.0)), expo_time) * 255.0);
// write ys
uchar2 yy = (uchar2)(
RGB_TO_Y(rgb_out[0].s0, rgb_out[0].s1, rgb_out[0].s2),
RGB_TO_Y(rgb_out[1].s0, rgb_out[1].s1, rgb_out[1].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2, YUV_STRIDE, gid_x * 2));
yy = (uchar2)(
RGB_TO_Y(rgb_out[2].s0, rgb_out[2].s1, rgb_out[2].s2),
RGB_TO_Y(rgb_out[3].s0, rgb_out[3].s1, rgb_out[3].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2 + 1, YUV_STRIDE, gid_x * 2));
// write uvs
const short ar = AVERAGE(rgb_out[0].s0, rgb_out[1].s0, rgb_out[2].s0, rgb_out[3].s0);
const short ag = AVERAGE(rgb_out[0].s1, rgb_out[1].s1, rgb_out[2].s1, rgb_out[3].s1);
const short ab = AVERAGE(rgb_out[0].s2, rgb_out[1].s2, rgb_out[2].s2, rgb_out[3].s2);
uchar2 uv = (uchar2)(
RGB_TO_U(ar, ag, ab),
RGB_TO_V(ar, ag, ab)
);
vstore2(uv, 0, out + UV_OFFSET + mad24(gid_y, YUV_STRIDE, gid_x * 2));
}