You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
110 lines
4.7 KiB
110 lines
4.7 KiB
// const __constant float3 rgb_weights = (0.299, 0.587, 0.114); // opencv rgb2gray weights
|
|
// const __constant float3 bgr_weights = (0.114, 0.587, 0.299); // bgr2gray weights
|
|
|
|
// convert input rgb image to single channel then conv
|
|
__kernel void rgb2gray_conv2d(
|
|
const __global uchar * input,
|
|
__global short * output,
|
|
__constant short * filter,
|
|
__local uchar3 * cached
|
|
)
|
|
{
|
|
const int rowOffset = get_global_id(1) * IMAGE_W;
|
|
const int my = get_global_id(0) + rowOffset;
|
|
|
|
const int localRowLen = TWICE_HALF_FILTER_SIZE + get_local_size(0);
|
|
const int localRowOffset = ( get_local_id(1) + HALF_FILTER_SIZE ) * localRowLen;
|
|
const int myLocal = localRowOffset + get_local_id(0) + HALF_FILTER_SIZE;
|
|
|
|
// cache local pixels
|
|
cached[ myLocal ].x = input[ my * 3 ]; // r
|
|
cached[ myLocal ].y = input[ my * 3 + 1]; // g
|
|
cached[ myLocal ].z = input[ my * 3 + 2]; // b
|
|
|
|
// pad
|
|
if (
|
|
get_global_id(0) < HALF_FILTER_SIZE ||
|
|
get_global_id(0) > IMAGE_W - HALF_FILTER_SIZE - 1 ||
|
|
get_global_id(1) < HALF_FILTER_SIZE ||
|
|
get_global_id(1) > IMAGE_H - HALF_FILTER_SIZE - 1
|
|
)
|
|
{
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
int localColOffset = -1;
|
|
int globalColOffset = -1;
|
|
|
|
// cache extra
|
|
if ( get_local_id(0) < HALF_FILTER_SIZE )
|
|
{
|
|
localColOffset = get_local_id(0);
|
|
globalColOffset = -HALF_FILTER_SIZE;
|
|
|
|
cached[ localRowOffset + get_local_id(0) ].x = input[ my * 3 - HALF_FILTER_SIZE * 3 ];
|
|
cached[ localRowOffset + get_local_id(0) ].y = input[ my * 3 - HALF_FILTER_SIZE * 3 + 1];
|
|
cached[ localRowOffset + get_local_id(0) ].z = input[ my * 3 - HALF_FILTER_SIZE * 3 + 2];
|
|
}
|
|
else if ( get_local_id(0) >= get_local_size(0) - HALF_FILTER_SIZE )
|
|
{
|
|
localColOffset = get_local_id(0) + TWICE_HALF_FILTER_SIZE;
|
|
globalColOffset = HALF_FILTER_SIZE;
|
|
|
|
cached[ myLocal + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE * 3 ];
|
|
cached[ myLocal + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE * 3 + 1];
|
|
cached[ myLocal + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE * 3 + 2];
|
|
}
|
|
|
|
|
|
if ( get_local_id(1) < HALF_FILTER_SIZE )
|
|
{
|
|
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 ];
|
|
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
|
|
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
|
|
if (localColOffset > 0)
|
|
{
|
|
cached[ get_local_id(1) * localRowLen + localColOffset ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
|
|
cached[ get_local_id(1) * localRowLen + localColOffset ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
|
|
cached[ get_local_id(1) * localRowLen + localColOffset ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
|
|
}
|
|
}
|
|
else if ( get_local_id(1) >= get_local_size(1) -HALF_FILTER_SIZE )
|
|
{
|
|
int offset = ( get_local_id(1) + TWICE_HALF_FILTER_SIZE ) * localRowLen;
|
|
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 ];
|
|
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
|
|
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
|
|
if (localColOffset > 0)
|
|
{
|
|
cached[ offset + localColOffset ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
|
|
cached[ offset + localColOffset ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
|
|
cached[ offset + localColOffset ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
|
|
}
|
|
}
|
|
|
|
// sync
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
// perform convolution
|
|
int fIndex = 0;
|
|
short sum = 0;
|
|
|
|
for (int r = -HALF_FILTER_SIZE; r <= HALF_FILTER_SIZE; r++)
|
|
{
|
|
int curRow = r * localRowLen;
|
|
for (int c = -HALF_FILTER_SIZE; c <= HALF_FILTER_SIZE; c++, fIndex++)
|
|
{
|
|
if (!FLIP_RB){
|
|
// sum += dot(rgb_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
|
|
sum += (cached[ myLocal + curRow + c ].x / 3 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 9) * filter[ fIndex ];
|
|
} else {
|
|
// sum += dot(bgr_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
|
|
sum += (cached[ myLocal + curRow + c ].x / 9 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 3) * filter[ fIndex ];
|
|
}
|
|
}
|
|
}
|
|
output[my] = sum;
|
|
}
|
|
} |