#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2)) __kernel void loadys(__global uchar8 const * const Y, __global float * out, int out_offset) { const int gid = get_global_id(0); const int ois = gid * 8; const int oy = ois / TRANSFORMED_WIDTH; const int ox = ois % TRANSFORMED_WIDTH; const uchar8 ys = Y[gid]; const float8 ysf = convert_float8(ys); // 02 // 13 __global float* outy0; __global float* outy1; if ((oy & 1) == 0) { outy0 = out + out_offset; //y0 outy1 = out + out_offset + UV_SIZE*2; //y2 } else { outy0 = out + out_offset + UV_SIZE; //y1 outy1 = out + out_offset + UV_SIZE*3; //y3 } vstore4(ysf.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); vstore4(ysf.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); } __kernel void loaduv(__global uchar8 const * const in, __global float8 * out, int out_offset) { const int gid = get_global_id(0); const uchar8 inv = in[gid]; const float8 outv = convert_float8(inv); out[gid + out_offset / 8] = outv; } __kernel void copy(__global float8 * inout, int in_offset) { const int gid = get_global_id(0); inout[gid] = inout[gid + in_offset / 8]; }