|  |  |  | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const half black_level = 42.0;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const __constant half3 color_correction[3] = {
 | 
					
						
							|  |  |  |   // post wb CCM
 | 
					
						
							|  |  |  |   (half3)(1.82717181, -0.31231438, 0.07307673),
 | 
					
						
							|  |  |  |   (half3)(-0.5743977, 1.36858544, -0.53183455),
 | 
					
						
							|  |  |  |   (half3)(-0.25277411, -0.05627105, 1.45875782),
 | 
					
						
							|  |  |  | };
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // tone mapping params
 | 
					
						
							|  |  |  | const half cpk = 0.75;
 | 
					
						
							|  |  |  | const half cpb = 0.125;
 | 
					
						
							|  |  |  | const half cpxk = 0.0025;
 | 
					
						
							|  |  |  | const half cpxb = 0.01;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | half mf(half x, half cp) {
 | 
					
						
							|  |  |  |   half rk = 9 - 100*cp;
 | 
					
						
							|  |  |  |   if (x > cp) {
 | 
					
						
							|  |  |  |     return (rk * (x-cp) * (1-(cpk*cp+cpb)) * (1+1/(rk*(1-cp))) / (1+rk*(x-cp))) + cpk*cp + cpb;
 | 
					
						
							|  |  |  |   } else if (x < cp) {
 | 
					
						
							|  |  |  |     return (rk * (x-cp) * (cpk*cp+cpb) * (1+1/(rk*cp)) / (1-rk*(x-cp))) + cpk*cp + cpb;
 | 
					
						
							|  |  |  |   } else {
 | 
					
						
							|  |  |  |     return x;
 | 
					
						
							|  |  |  |   }
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | half3 color_correct(half3 rgb) {
 | 
					
						
							|  |  |  |   half3 ret = (0,0,0);
 | 
					
						
							|  |  |  |   half cpx = 0.01;
 | 
					
						
							|  |  |  |   ret += (half)rgb.x * color_correction[0];
 | 
					
						
							|  |  |  |   ret += (half)rgb.y * color_correction[1];
 | 
					
						
							|  |  |  |   ret += (half)rgb.z * color_correction[2];
 | 
					
						
							|  |  |  |   ret.x = mf(ret.x, cpx);
 | 
					
						
							|  |  |  |   ret.y = mf(ret.y, cpx);
 | 
					
						
							|  |  |  |   ret.z = mf(ret.z, cpx);
 | 
					
						
							|  |  |  |   ret = clamp(0.0h, 255.0h, ret*255.0h);
 | 
					
						
							|  |  |  |   return ret;
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | half val_from_10(const uchar * source, int gx, int gy) {
 | 
					
						
							|  |  |  |   // parse 10bit
 | 
					
						
							|  |  |  |   int start = gy * FRAME_STRIDE + (5 * (gx / 4));
 | 
					
						
							|  |  |  |   int offset = gx % 4;
 | 
					
						
							|  |  |  |   uint major = (uint)source[start + offset] << 2;
 | 
					
						
							|  |  |  |   uint minor = (source[start + 4] >> (2 * offset)) & 3;
 | 
					
						
							|  |  |  |   half pv = (half)(major + minor);
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // normalize
 | 
					
						
							|  |  |  |   pv = max(0.0h, pv - black_level);
 | 
					
						
							|  |  |  |   pv *= 0.00101833h; // /= (1024.0f - black_level);
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // correct vignetting
 | 
					
						
							|  |  |  |   if (CAM_NUM == 1) { // fcamera
 | 
					
						
							|  |  |  |     gx = (gx - RGB_WIDTH/2);
 | 
					
						
							|  |  |  |     gy = (gy - RGB_HEIGHT/2);
 | 
					
						
							|  |  |  |     float r = gx*gx + gy*gy;
 | 
					
						
							|  |  |  |     half s;
 | 
					
						
							|  |  |  |     if (r < 62500) {
 | 
					
						
							|  |  |  |       s = (half)(1.0f + 0.0000008f*r);
 | 
					
						
							|  |  |  |     } else if (r < 490000) {
 | 
					
						
							|  |  |  |       s = (half)(0.9625f + 0.0000014f*r);
 | 
					
						
							|  |  |  |     } else if (r < 1102500) {
 | 
					
						
							|  |  |  |       s = (half)(1.26434f + 0.0000000000016f*r*r);
 | 
					
						
							|  |  |  |     } else {
 | 
					
						
							|  |  |  |       s = (half)(0.53503625f + 0.0000000000022f*r*r);
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  |     pv = s * pv;
 | 
					
						
							|  |  |  |   }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   pv = clamp(0.0h, 1.0h, pv);
 | 
					
						
							|  |  |  |   return pv;
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | half fabs_diff(half x, half y) {
 | 
					
						
							|  |  |  |   return fabs(x-y);
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | half phi(half x) {
 | 
					
						
							|  |  |  |   // detection funtion
 | 
					
						
							|  |  |  |   return 2 - x;
 | 
					
						
							|  |  |  |   // if (x > 1) {
 | 
					
						
							|  |  |  |   //   return 1 / x;
 | 
					
						
							|  |  |  |   // } else {
 | 
					
						
							|  |  |  |   //   return 2 - x;
 | 
					
						
							|  |  |  |   // }
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | __kernel void debayer10(const __global uchar * in,
 | 
					
						
							|  |  |  |                         __global uchar * out,
 | 
					
						
							|  |  |  |                         __local half * cached
 | 
					
						
							|  |  |  |                        )
 | 
					
						
							|  |  |  | {
 | 
					
						
							|  |  |  |   const int x_global = get_global_id(0);
 | 
					
						
							|  |  |  |   const int y_global = get_global_id(1);
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   const int localRowLen = 2 + get_local_size(0); // 2 padding
 | 
					
						
							|  |  |  |   const int x_local = get_local_id(0); // 0-15
 | 
					
						
							|  |  |  |   const int y_local = get_local_id(1); // 0-15
 | 
					
						
							|  |  |  |   const int localOffset = (y_local + 1) * localRowLen + x_local + 1; // max 18x18-1
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   int out_idx = 3 * x_global + 3 * y_global * RGB_WIDTH;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   half pv = val_from_10(in, x_global, y_global);
 | 
					
						
							|  |  |  |   cached[localOffset] = pv;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // don't care
 | 
					
						
							|  |  |  |   if (x_global < 1 || x_global >= RGB_WIDTH - 1 || y_global < 1 || y_global >= RGB_HEIGHT - 1) {
 | 
					
						
							|  |  |  |     return;
 | 
					
						
							|  |  |  |   }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // cache padding
 | 
					
						
							|  |  |  |   int localColOffset = -1;
 | 
					
						
							|  |  |  |   int globalColOffset = -1;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // cache padding
 | 
					
						
							|  |  |  |   if (x_local < 1) {
 | 
					
						
							|  |  |  |     localColOffset = x_local;
 | 
					
						
							|  |  |  |     globalColOffset = -1;
 | 
					
						
							|  |  |  |     cached[(y_local + 1) * localRowLen + x_local] = val_from_10(in, x_global-1, y_global);
 | 
					
						
							|  |  |  |   } else if (x_local >= get_local_size(0) - 1) {
 | 
					
						
							|  |  |  |     localColOffset = x_local + 2;
 | 
					
						
							|  |  |  |     globalColOffset = 1;
 | 
					
						
							|  |  |  |     cached[localOffset + 1] = val_from_10(in, x_global+1, y_global);
 | 
					
						
							|  |  |  |   }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (y_local < 1) {
 | 
					
						
							|  |  |  |     cached[y_local * localRowLen + x_local + 1] = val_from_10(in, x_global, y_global-1);
 | 
					
						
							|  |  |  |     if (localColOffset != -1) {
 | 
					
						
							|  |  |  |       cached[y_local * localRowLen + localColOffset] = val_from_10(in, x_global+globalColOffset, y_global-1);
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  |   } else if (y_local >= get_local_size(1) - 1) {
 | 
					
						
							|  |  |  |     cached[(y_local + 2) * localRowLen + x_local + 1] = val_from_10(in, x_global, y_global+1);
 | 
					
						
							|  |  |  |     if (localColOffset != -1) {
 | 
					
						
							|  |  |  |       cached[(y_local + 2) * localRowLen + localColOffset] = val_from_10(in, x_global+globalColOffset, y_global+1);
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  |   }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // sync
 | 
					
						
							|  |  |  |   barrier(CLK_LOCAL_MEM_FENCE);
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   half d1 = cached[localOffset - localRowLen - 1];
 | 
					
						
							|  |  |  |   half d2 = cached[localOffset - localRowLen + 1];
 | 
					
						
							|  |  |  |   half d3 = cached[localOffset + localRowLen - 1];
 | 
					
						
							|  |  |  |   half d4 = cached[localOffset + localRowLen + 1];
 | 
					
						
							|  |  |  |   half n1 = cached[localOffset - localRowLen];
 | 
					
						
							|  |  |  |   half n2 = cached[localOffset + 1];
 | 
					
						
							|  |  |  |   half n3 = cached[localOffset + localRowLen];
 | 
					
						
							|  |  |  |   half n4 = cached[localOffset - 1];
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   half3 rgb;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf
 | 
					
						
							|  |  |  |   if (x_global % 2 == 0) {
 | 
					
						
							|  |  |  |     if (y_global % 2 == 0) {
 | 
					
						
							|  |  |  |       rgb.y = pv; // G1(R)
 | 
					
						
							|  |  |  |       half k1 = phi(fabs_diff(d1, pv) + fabs_diff(d2, pv));
 | 
					
						
							|  |  |  |       half k2 = phi(fabs_diff(d2, pv) + fabs_diff(d4, pv));
 | 
					
						
							|  |  |  |       half k3 = phi(fabs_diff(d3, pv) + fabs_diff(d4, pv));
 | 
					
						
							|  |  |  |       half k4 = phi(fabs_diff(d1, pv) + fabs_diff(d3, pv));
 | 
					
						
							|  |  |  |       // R_G1
 | 
					
						
							|  |  |  |       rgb.x = (k2*n2+k4*n4)/(k2+k4);
 | 
					
						
							|  |  |  |       // B_G1
 | 
					
						
							|  |  |  |       rgb.z = (k1*n1+k3*n3)/(k1+k3);
 | 
					
						
							|  |  |  |     } else {
 | 
					
						
							|  |  |  |       rgb.z = pv; // B
 | 
					
						
							|  |  |  |       half k1 = phi(fabs_diff(d1, d3) + fabs_diff(d2, d4));
 | 
					
						
							|  |  |  |       half k2 = phi(fabs_diff(n1, n4) + fabs_diff(n2, n3));
 | 
					
						
							|  |  |  |       half k3 = phi(fabs_diff(d1, d2) + fabs_diff(d3, d4));
 | 
					
						
							|  |  |  |       half k4 = phi(fabs_diff(n1, n2) + fabs_diff(n3, n4));
 | 
					
						
							|  |  |  |       // G_B
 | 
					
						
							|  |  |  |       rgb.y = (k1*(n1+n3)*0.5+k3*(n2+n4)*0.5)/(k1+k3);
 | 
					
						
							|  |  |  |       // R_B
 | 
					
						
							|  |  |  |       rgb.x = (k2*(d2+d3)*0.5+k4*(d1+d4)*0.5)/(k2+k4);
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  |   } else {
 | 
					
						
							|  |  |  |     if (y_global % 2 == 0) {
 | 
					
						
							|  |  |  |       rgb.x = pv; // R
 | 
					
						
							|  |  |  |       half k1 = phi(fabs_diff(d1, d3) + fabs_diff(d2, d4));
 | 
					
						
							|  |  |  |       half k2 = phi(fabs_diff(n1, n4) + fabs_diff(n2, n3));
 | 
					
						
							|  |  |  |       half k3 = phi(fabs_diff(d1, d2) + fabs_diff(d3, d4));
 | 
					
						
							|  |  |  |       half k4 = phi(fabs_diff(n1, n2) + fabs_diff(n3, n4));
 | 
					
						
							|  |  |  |       // G_R
 | 
					
						
							|  |  |  |       rgb.y = (k1*(n1+n3)*0.5+k3*(n2+n4)*0.5)/(k1+k3);
 | 
					
						
							|  |  |  |       // B_R
 | 
					
						
							|  |  |  |       rgb.z = (k2*(d2+d3)*0.5+k4*(d1+d4)*0.5)/(k2+k4);
 | 
					
						
							|  |  |  |     } else {
 | 
					
						
							|  |  |  |       rgb.y = pv; // G2(B)
 | 
					
						
							|  |  |  |       half k1 = phi(fabs_diff(d1, pv) + fabs_diff(d2, pv));
 | 
					
						
							|  |  |  |       half k2 = phi(fabs_diff(d2, pv) + fabs_diff(d4, pv));
 | 
					
						
							|  |  |  |       half k3 = phi(fabs_diff(d3, pv) + fabs_diff(d4, pv));
 | 
					
						
							|  |  |  |       half k4 = phi(fabs_diff(d1, pv) + fabs_diff(d3, pv));
 | 
					
						
							|  |  |  |       // R_G2
 | 
					
						
							|  |  |  |       rgb.x = (k1*n1+k3*n3)/(k1+k3);
 | 
					
						
							|  |  |  |       // B_G2
 | 
					
						
							|  |  |  |       rgb.z = (k2*n2+k4*n4)/(k2+k4);
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  |   }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   rgb = clamp(0.0h, 1.0h, rgb);
 | 
					
						
							|  |  |  |   rgb = color_correct(rgb);
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   out[out_idx + 0] = (uchar)(rgb.z);
 | 
					
						
							|  |  |  |   out[out_idx + 1] = (uchar)(rgb.y);
 | 
					
						
							|  |  |  |   out[out_idx + 2] = (uchar)(rgb.x);
 | 
					
						
							|  |  |  | }
 |