openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

110 lines
4.7 KiB

// const __constant float3 rgb_weights = (0.299, 0.587, 0.114); // opencv rgb2gray weights
// const __constant float3 bgr_weights = (0.114, 0.587, 0.299); // bgr2gray weights
// convert input rgb image to single channel then conv
__kernel void rgb2gray_conv2d(
const __global uchar * input,
__global short * output,
__constant short * filter,
__local uchar3 * cached
)
{
const int rowOffset = get_global_id(1) * IMAGE_W;
const int my = get_global_id(0) + rowOffset;
const int localRowLen = TWICE_HALF_FILTER_SIZE + get_local_size(0);
const int localRowOffset = ( get_local_id(1) + HALF_FILTER_SIZE ) * localRowLen;
const int myLocal = localRowOffset + get_local_id(0) + HALF_FILTER_SIZE;
// cache local pixels
cached[ myLocal ].x = input[ my * 3 ]; // r
cached[ myLocal ].y = input[ my * 3 + 1]; // g
cached[ myLocal ].z = input[ my * 3 + 2]; // b
// pad
if (
get_global_id(0) < HALF_FILTER_SIZE ||
get_global_id(0) > IMAGE_W - HALF_FILTER_SIZE - 1 ||
get_global_id(1) < HALF_FILTER_SIZE ||
get_global_id(1) > IMAGE_H - HALF_FILTER_SIZE - 1
)
{
barrier(CLK_LOCAL_MEM_FENCE);
return;
}
else
{
int localColOffset = -1;
int globalColOffset = -1;
// cache extra
if ( get_local_id(0) < HALF_FILTER_SIZE )
{
localColOffset = get_local_id(0);
globalColOffset = -HALF_FILTER_SIZE;
cached[ localRowOffset + get_local_id(0) ].x = input[ my * 3 - HALF_FILTER_SIZE * 3 ];
cached[ localRowOffset + get_local_id(0) ].y = input[ my * 3 - HALF_FILTER_SIZE * 3 + 1];
cached[ localRowOffset + get_local_id(0) ].z = input[ my * 3 - HALF_FILTER_SIZE * 3 + 2];
}
else if ( get_local_id(0) >= get_local_size(0) - HALF_FILTER_SIZE )
{
localColOffset = get_local_id(0) + TWICE_HALF_FILTER_SIZE;
globalColOffset = HALF_FILTER_SIZE;
cached[ myLocal + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE * 3 ];
cached[ myLocal + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE * 3 + 1];
cached[ myLocal + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE * 3 + 2];
}
if ( get_local_id(1) < HALF_FILTER_SIZE )
{
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 ];
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
if (localColOffset > 0)
{
cached[ get_local_id(1) * localRowLen + localColOffset ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
cached[ get_local_id(1) * localRowLen + localColOffset ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
cached[ get_local_id(1) * localRowLen + localColOffset ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
}
}
else if ( get_local_id(1) >= get_local_size(1) -HALF_FILTER_SIZE )
{
int offset = ( get_local_id(1) + TWICE_HALF_FILTER_SIZE ) * localRowLen;
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 ];
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
if (localColOffset > 0)
{
cached[ offset + localColOffset ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
cached[ offset + localColOffset ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
cached[ offset + localColOffset ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
}
}
// sync
barrier(CLK_LOCAL_MEM_FENCE);
// perform convolution
int fIndex = 0;
short sum = 0;
for (int r = -HALF_FILTER_SIZE; r <= HALF_FILTER_SIZE; r++)
{
int curRow = r * localRowLen;
for (int c = -HALF_FILTER_SIZE; c <= HALF_FILTER_SIZE; c++, fIndex++)
{
if (!FLIP_RB){
// sum += dot(rgb_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
sum += (cached[ myLocal + curRow + c ].x / 3 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 9) * filter[ fIndex ];
} else {
// sum += dot(bgr_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
sum += (cached[ myLocal + curRow + c ].x / 9 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 3) * filter[ fIndex ];
}
}
}
output[my] = sum;
}
}