@ -1,12 +1,3 @@
# ifdef HALF_AS_FLOAT
# define half float
# define half2 float2
# define half3 float3
# define half4 float4
# else
# pragma OPENCL EXTENSION cl_khr_fp16 : enable
# endif
# define UV_WIDTH RGB_WIDTH / 2
# define UV_HEIGHT RGB_HEIGHT / 2
# define U_OFFSET RGB_WIDTH * RGB_HEIGHT
@ -17,171 +8,129 @@
# define RGB_TO_V ( r, g, b ) ( ( mul24 ( r, 56 ) - mul24 ( g, 47 ) - mul24 ( b, 9 ) + 0x8080 ) >> 8 )
# define AVERAGE ( x, y, z, w ) ( ( convert_ushort ( x ) + convert_ushort ( y ) + convert_ushort ( z ) + convert_ushort ( w ) + 1 ) >> 1 )
// post wb CCM
const __constant half3 color_correction_0 = ( half3 ) ( 1.82717181 , -0.31231438 , 0.07307673 ) ;
const __constant half3 color_correction_1 = ( half3 ) ( -0.5743977 , 1.36858544 , -0.53183455 ) ;
const __constant half3 color_correction_2 = ( half3 ) ( -0.25277411 , -0.05627105 , 1.45875782 ) ;
float3 color_correct ( float3 rgb ) {
// color correction
float3 x = rgb.x * ( float3 ) ( 1.82717181 , -0.31231438 , 0.07307673 ) ;
x += rgb.y * ( float3 ) ( -0.5743977 , 1.36858544 , -0.53183455 ) ;
x += rgb.z * ( float3 ) ( -0.25277411 , -0.05627105 , 1.45875782 ) ;
// tone mapping params
const half gamma_k = 0.75 ;
const half gamma_b = 0.125 ;
const half mp = 0.01 ; // ideally midpoint should be adaptive
const half rk = 9 - 100*mp ;
// tone mapping params
const float gamma_k = 0.75 ;
const float gamma_b = 0.125 ;
const float mp = 0.01 ; // ideally midpoint should be adaptive
const float rk = 9 - 100*mp ;
inline half3 gamma_apply ( half3 x ) {
// poly approximation for s curve
return ( x > mp ) ?
( ( rk * ( x-mp ) * ( 1- ( gamma_k*mp+gamma_b ) ) * ( 1+1/ ( rk* ( 1-mp ) ) ) / ( 1+rk* ( x-mp ) ) ) + gamma_k*mp + gamma_b ) :
( ( rk * ( x-mp ) * ( gamma_k*mp+gamma_b ) * ( 1+1/ ( rk*mp ) ) / ( 1-rk* ( x-mp ) ) ) + gamma_k*mp + gamma_b ) ;
}
inline half3 color_correct ( half3 rgb ) {
half3 ret = ( half ) rgb.x * color_correction_0 ;
ret += ( half ) rgb.y * color_correction_1 ;
ret += ( half ) rgb.z * color_correction_2 ;
return gamma_apply ( ret ) ;
}
inline half get_vignetting_s ( float r ) {
float get_vignetting_s ( float r ) {
if ( r < 62500 ) {
return ( half ) ( 1.0f + 0.0000008f*r ) ;
return ( 1.0f + 0.0000008f*r ) ;
} else if ( r < 490000 ) {
return ( half ) ( 0.9625f + 0.0000014f*r ) ;
return ( 0.9625f + 0.0000014f*r ) ;
} else if ( r < 1102500 ) {
return ( half ) ( 1.26434f + 0.0000000000016f*r*r ) ;
return ( 1.26434f + 0.0000000000016f*r*r ) ;
} else {
return ( half ) ( 0.53503625f + 0.0000000000022f*r*r ) ;
return ( 0.53503625f + 0.0000000000022f*r*r ) ;
}
}
inline half val_from_10 ( const uchar * source, int gx, int gy, half black_level ) {
// parse 12bit
int start = gy * FRAME_STRIDE + ( 3 * ( gx / 2 ) ) + ( FRAME_STRIDE * FRAME_OFFSET ) ;
int offset = gx % 2 ;
uint major = ( uint ) source[start + offset] << 4 ;
uint minor = ( source[start + 2] >> ( 4 * offset ) ) & 0xf ;
half pv = ( ( half ) ( major + minor ) ) / 4.0 ;
// normalize
pv = max ( ( half ) 0.0 , pv - black_level ) ;
pv /= ( 1024.0 - black_level ) ;
// correct vignetting
if ( CAM_NUM == 1 ) { // fcamera
gx = ( gx - RGB_WIDTH/2 ) ;
gy = ( gy - RGB_HEIGHT/2 ) ;
pv *= get_vignetting_s ( gx*gx + gy*gy ) ;
}
pv = clamp ( pv, ( half ) 0.0 , ( half ) 1.0 ) ;
return pv ;
float4 val4_from_12 ( uchar8 pvs, float gain ) {
uint4 parsed = ( uint4 ) ( ( ( uint ) pvs.s0<<4 ) + ( pvs.s1>>4 ) , // is from the previous 10 bit
( ( uint ) pvs.s2<<4 ) + ( pvs.s4&0xF ) ,
( ( uint ) pvs.s3<<4 ) + ( pvs.s4>>4 ) ,
( ( uint ) pvs.s5<<4 ) + ( pvs.s7&0xF ) ) ;
// normalize and scale
float4 pv = ( convert_float4 ( parsed ) - 168.0 ) / ( 4096.0 - 168.0 ) ;
return clamp ( pv*gain, 0.0 , 1.0 ) ;
}
inline half get_k ( half a, half b, half c, half d ) {
float get_k ( float a, float b, float c, float d ) {
return 2.0 - ( fabs ( a - b ) + fabs ( c - d ) ) ;
}
__kernel void debayer10 ( const __global uchar * in,
__global uchar * out,
__local half * cached,
float black_level
)
__kernel void debayer10 ( const __global uchar * in, __global uchar * out )
{
const int gid_x = get_global_id ( 0 ) ;
const int gid_y = get_global_id ( 1 ) ;
const int lid_x = get_local_id ( 0 ) ;
const int lid_y = get_local_id ( 1 ) ;
const int localRowLen = mad24 ( get_local_size ( 0 ) , 2 , 2 ) ; // 2 padding
const int localColLen = mad24 ( get_local_size ( 1 ) , 2 , 2 ) ;
const int x_global = mul24 ( gid_x, 2 ) ;
const int y_global = mul24 ( gid_y, 2 ) ;
const int y_top_mod = ( gid_y == 0 ) ? 2: 0 ;
const int y_bot_mod = ( gid_y == ( RGB_HEIGHT/2 - 1 ) ) ? 1: 3 ;
const int x_local = mad24 ( lid_x, 2 , 1 ) ;
const int y_local = mad24 ( lid_y, 2 , 1 ) ;
const int x_global_mod = ( gid_x == 0 | | gid_x == get_global_size ( 0 ) - 1 ) ? -1: 1 ;
const int y_global_mod = ( gid_y == 0 | | gid_y == get_global_size ( 1 ) - 1 ) ? -1: 1 ;
float3 rgb ;
uchar3 rgb_out[4] ;
int localColOffset = 0 ;
int globalColOffset ;
int start = ( 2 * gid_y - 1 ) * FRAME_STRIDE + ( 3 * gid_x - 2 ) + ( FRAME_STRIDE * FRAME_OFFSET ) ;
cached[mad24 ( y_local + 0 , localRowLen, x_local + 0 ) ] = val_from_10 ( in, x_global + 0 , y_global + 0 , black_level ) ;
cached[mad24 ( y_local + 0 , localRowLen, x_local + 1 ) ] = val_from_10 ( in, x_global + 1 , y_global + 0 , black_level ) ;
cached[mad24 ( y_local + 1 , localRowLen, x_local + 0 ) ] = val_from_10 ( in, x_global + 0 , y_global + 1 , black_level ) ;
cached[mad24 ( y_local + 1 , localRowLen, x_local + 1 ) ] = val_from_10 ( in, x_global + 1 , y_global + 1 , black_level ) ;
// read in 8x4 chars
uchar8 dat[4] ;
dat[0] = vload8 ( 0 , in + start + FRAME_STRIDE*y_top_mod ) ;
dat[1] = vload8 ( 0 , in + start + FRAME_STRIDE*1 ) ;
dat[2] = vload8 ( 0 , in + start + FRAME_STRIDE*2 ) ;
dat[3] = vload8 ( 0 , in + start + FRAME_STRIDE*y_bot_mod ) ;
if ( lid_x == 0 ) { // left edge
localColOffset = -1 ;
globalColOffset = -x_global_mod ;
cached[mad24 ( y_local + 0 , localRowLen, x_local - 1 ) ] = val_from_10 ( in, x_global - x_global_mod, y_global + 0 , black_level ) ;
cached[mad24 ( y_local + 1 , localRowLen, x_local - 1 ) ] = val_from_10 ( in, x_global - x_global_mod, y_global + 1 , black_level ) ;
} else if ( lid_x == get_local_size ( 0 ) - 1 ) { // right edge
localColOffset = 2 ;
globalColOffset = x_global_mod + 1 ;
cached[mad24 ( y_local + 0 , localRowLen, x_local + 2 ) ] = val_from_10 ( in, x_global + x_global_mod + 1 , y_global + 0 , black_level ) ;
cached[mad24 ( y_local + 1 , localRowLen, x_local + 2 ) ] = val_from_10 ( in, x_global + x_global_mod + 1 , y_global + 1 , black_level ) ;
}
if ( lid_y == 0 ) { // top row
cached[mad24 ( y_local - 1 , localRowLen, x_local + 0 ) ] = val_from_10 ( in, x_global + 0 , y_global - y_global_mod, black_level ) ;
cached[mad24 ( y_local - 1 , localRowLen, x_local + 1 ) ] = val_from_10 ( in, x_global + 1 , y_global - y_global_mod, black_level ) ;
if ( localColOffset != 0 ) { // cache corners
cached[mad24 ( y_local - 1 , localRowLen, x_local + localColOffset ) ] = val_from_10 ( in, x_global + globalColOffset, y_global - y_global_mod, black_level ) ;
}
} else if ( lid_y == get_local_size ( 1 ) - 1 ) { // bottom row
cached[mad24 ( y_local + 2 , localRowLen, x_local + 0 ) ] = val_from_10 ( in, x_global + 0 , y_global + y_global_mod + 1 , black_level ) ;
cached[mad24 ( y_local + 2 , localRowLen, x_local + 1 ) ] = val_from_10 ( in, x_global + 1 , y_global + y_global_mod + 1 , black_level ) ;
if ( localColOffset != 0 ) { // cache corners
cached[mad24 ( y_local + 2 , localRowLen, x_local + localColOffset ) ] = val_from_10 ( in, x_global + globalColOffset, y_global + y_global_mod + 1 , black_level ) ;
}
// correct vignetting
# if VIGNETTING
int gx = ( gid_x*2 - RGB_WIDTH/2 ) ;
int gy = ( gid_y*2 - RGB_HEIGHT/2 ) ;
const float gain = get_vignetting_s ( gx*gx + gy*gy ) ;
# else
const float gain = 1.0 ;
# endif
// process them to floats
float4 va = val4_from_12 ( dat[0], gain ) ;
float4 vb = val4_from_12 ( dat[1], gain ) ;
float4 vc = val4_from_12 ( dat[2], gain ) ;
float4 vd = val4_from_12 ( dat[3], gain ) ;
if ( gid_x == 0 ) {
va.s0 = va.s2 ;
vb.s0 = vb.s2 ;
vc.s0 = vc.s2 ;
vd.s0 = vd.s2 ;
} else if ( gid_x == RGB_WIDTH/2 - 1 ) {
va.s3 = va.s1 ;
vb.s3 = vb.s1 ;
vc.s3 = vc.s1 ;
vd.s3 = vd.s1 ;
}
// sync
barrier ( CLK_LOCAL_MEM_FENCE ) ;
half3 rgb ;
uchar3 rgb_out[4] ;
const half4 va = vload4 ( 0 , cached + mad24 ( lid_y * 2 + 0 , localRowLen, lid_x * 2 ) ) ;
const half4 vb = vload4 ( 0 , cached + mad24 ( lid_y * 2 + 1 , localRowLen, lid_x * 2 ) ) ;
const half4 vc = vload4 ( 0 , cached + mad24 ( lid_y * 2 + 2 , localRowLen, lid_x * 2 ) ) ;
const half4 vd = vload4 ( 0 , cached + mad24 ( lid_y * 2 + 3 , localRowLen, lid_x * 2 ) ) ;
// a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf
const half k01 = get_k ( va.s0, vb.s1, va.s2, vb.s1 ) ;
const half k02 = get_k ( va.s2, vb.s1, vc.s2, vb.s1 ) ;
const half k03 = get_k ( vc.s0, vb.s1, vc.s2, vb.s1 ) ;
const half k04 = get_k ( va.s0, vb.s1, vc.s0, vb.s1 ) ;
const float k01 = get_k ( va.s0, vb.s1, va.s2, vb.s1 ) ;
const float k02 = get_k ( va.s2, vb.s1, vc.s2, vb.s1 ) ;
const float k03 = get_k ( vc.s0, vb.s1, vc.s2, vb.s1 ) ;
const float k04 = get_k ( va.s0, vb.s1, vc.s0, vb.s1 ) ;
rgb.x = ( k02*vb.s2+k04*vb.s0 ) / ( k02+k04 ) ; // R_G1
rgb.y = vb.s1 ; // G1(R)
rgb.z = ( k01*va.s1+k03*vc.s1 ) / ( k01+k03 ) ; // B_G1
rgb_out[0] = convert_uchar3_sat ( color_correct ( clamp ( rgb, 0.0 , 1.0 ) ) * 255.0 ) ;
const half k11 = get_k ( va.s1, vc.s1, va.s3, vc.s3 ) ;
const half k12 = get_k ( va.s2, vb.s1, vb.s3, vc.s2 ) ;
const half k13 = get_k ( va.s1, va.s3, vc.s1, vc.s3 ) ;
const half k14 = get_k ( va.s2, vb.s3, vc.s2, vb.s1 ) ;
const float k11 = get_k ( va.s1, vc.s1, va.s3, vc.s3 ) ;
const float k12 = get_k ( va.s2, vb.s1, vb.s3, vc.s2 ) ;
const float k13 = get_k ( va.s1, va.s3, vc.s1, vc.s3 ) ;
const float k14 = get_k ( va.s2, vb.s3, vc.s2, vb.s1 ) ;
rgb.x = vb.s2 ; // R
rgb.y = ( k11* ( va.s2+vc.s2 ) *0.5+k13* ( vb.s3+vb.s1 ) *0.5 ) / ( k11+k13 ) ; // G_R
rgb.z = ( k12* ( va.s3+vc.s1 ) *0.5+k14* ( va.s1+vc.s3 ) *0.5 ) / ( k12+k14 ) ; // B_R
rgb_out[1] = convert_uchar3_sat ( color_correct ( clamp ( rgb, 0.0 , 1.0 ) ) * 255.0 ) ;
const half k21 = get_k ( vb.s0, vd.s0, vb.s2, vd.s2 ) ;
const half k22 = get_k ( vb.s1, vc.s0, vc.s2, vd.s1 ) ;
const half k23 = get_k ( vb.s0, vb.s2, vd.s0, vd.s2 ) ;
const half k24 = get_k ( vb.s1, vc.s2, vd.s1, vc.s0 ) ;
const float k21 = get_k ( vb.s0, vd.s0, vb.s2, vd.s2 ) ;
const float k22 = get_k ( vb.s1, vc.s0, vc.s2, vd.s1 ) ;
const float k23 = get_k ( vb.s0, vb.s2, vd.s0, vd.s2 ) ;
const float k24 = get_k ( vb.s1, vc.s2, vd.s1, vc.s0 ) ;
rgb.x = ( k22* ( vb.s2+vd.s0 ) *0.5+k24* ( vb.s0+vd.s2 ) *0.5 ) / ( k22+k24 ) ; // R_B
rgb.y = ( k21* ( vb.s1+vd.s1 ) *0.5+k23* ( vc.s2+vc.s0 ) *0.5 ) / ( k21+k23 ) ; // G_B
rgb.z = vc.s1 ; // B
rgb_out[2] = convert_uchar3_sat ( color_correct ( clamp ( rgb, 0.0 , 1.0 ) ) * 255.0 ) ;
const half k31 = get_k ( vb.s1, vc.s2, vb.s3, vc.s2 ) ;
const half k32 = get_k ( vb.s3, vc.s2, vd.s3, vc.s2 ) ;
const half k33 = get_k ( vd.s1, vc.s2, vd.s3, vc.s2 ) ;
const half k34 = get_k ( vb.s1, vc.s2, vd.s1, vc.s2 ) ;
const float k31 = get_k ( vb.s1, vc.s2, vb.s3, vc.s2 ) ;
const float k32 = get_k ( vb.s3, vc.s2, vd.s3, vc.s2 ) ;
const float k33 = get_k ( vd.s1, vc.s2, vd.s3, vc.s2 ) ;
const float k34 = get_k ( vb.s1, vc.s2, vd.s1, vc.s2 ) ;
rgb.x = ( k31*vb.s2+k33*vd.s2 ) / ( k31+k33 ) ; // R_G2
rgb.y = vc.s2 ; // G2(B)
rgb.z = ( k32*vc.s3+k34*vc.s1 ) / ( k32+k34 ) ; // B_G2