You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					110 lines
				
				4.7 KiB
			
		
		
			
		
	
	
					110 lines
				
				4.7 KiB
			| 
								 
											6 years ago
										 
									 | 
							
								// const __constant float3 rgb_weights = (0.299, 0.587, 0.114); // opencv rgb2gray weights
							 | 
						||
| 
								 | 
							
								// const __constant float3 bgr_weights = (0.114, 0.587, 0.299); // bgr2gray weights
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// convert input rgb image to single channel then conv
							 | 
						||
| 
								 | 
							
								__kernel void rgb2gray_conv2d(
							 | 
						||
| 
								 
											5 years ago
										 
									 | 
							
								  const __global uchar * input,
							 | 
						||
| 
								 
											6 years ago
										 
									 | 
							
								  __global short * output,
							 | 
						||
| 
								 | 
							
								  __constant short * filter,
							 | 
						||
| 
								 | 
							
								  __local uchar3 * cached
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								  const int rowOffset = get_global_id(1) * IMAGE_W;
							 | 
						||
| 
								 | 
							
								  const int my = get_global_id(0) + rowOffset;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  const int localRowLen = TWICE_HALF_FILTER_SIZE + get_local_size(0);
							 | 
						||
| 
								 | 
							
								  const int localRowOffset = ( get_local_id(1) + HALF_FILTER_SIZE ) * localRowLen;
							 | 
						||
| 
								 | 
							
								  const int myLocal = localRowOffset + get_local_id(0) + HALF_FILTER_SIZE;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // cache local pixels
							 | 
						||
| 
								 | 
							
								  cached[ myLocal ].x = input[ my * 3 ]; // r
							 | 
						||
| 
								 | 
							
								  cached[ myLocal ].y = input[ my * 3 + 1]; // g
							 | 
						||
| 
								 | 
							
								  cached[ myLocal ].z = input[ my * 3 + 2]; // b
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // pad
							 | 
						||
| 
								 | 
							
								  if (
							 | 
						||
| 
								 
											5 years ago
										 
									 | 
							
								    get_global_id(0) < HALF_FILTER_SIZE       ||
							 | 
						||
| 
								 | 
							
								    get_global_id(0) > IMAGE_W - HALF_FILTER_SIZE - 1   ||
							 | 
						||
| 
								 
											6 years ago
										 
									 | 
							
								    get_global_id(1) < HALF_FILTER_SIZE     ||
							 | 
						||
| 
								 | 
							
								    get_global_id(1) > IMAGE_H - HALF_FILTER_SIZE - 1
							 | 
						||
| 
								 | 
							
								  )
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								    barrier(CLK_LOCAL_MEM_FENCE);
							 | 
						||
| 
								 | 
							
								    return;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 
											5 years ago
										 
									 | 
							
								  else
							 | 
						||
| 
								 
											6 years ago
										 
									 | 
							
								  {
							 | 
						||
| 
								 | 
							
								    int localColOffset = -1;
							 | 
						||
| 
								 | 
							
								    int globalColOffset = -1;
							 | 
						||
| 
								 
											5 years ago
										 
									 | 
							
								
							 | 
						||
| 
								 
											6 years ago
										 
									 | 
							
								    // cache extra
							 | 
						||
| 
								 | 
							
								    if ( get_local_id(0) < HALF_FILTER_SIZE )
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								      localColOffset = get_local_id(0);
							 | 
						||
| 
								 | 
							
								      globalColOffset = -HALF_FILTER_SIZE;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      cached[ localRowOffset + get_local_id(0) ].x = input[ my * 3 - HALF_FILTER_SIZE * 3 ];
							 | 
						||
| 
								 | 
							
								      cached[ localRowOffset + get_local_id(0) ].y = input[ my * 3 - HALF_FILTER_SIZE * 3 + 1];
							 | 
						||
| 
								 | 
							
								      cached[ localRowOffset + get_local_id(0) ].z = input[ my * 3 - HALF_FILTER_SIZE * 3 + 2];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    else if ( get_local_id(0) >= get_local_size(0) - HALF_FILTER_SIZE )
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								      localColOffset = get_local_id(0) + TWICE_HALF_FILTER_SIZE;
							 | 
						||
| 
								 | 
							
								      globalColOffset = HALF_FILTER_SIZE;
							 | 
						||
| 
								 
											5 years ago
										 
									 | 
							
								
							 | 
						||
| 
								 
											6 years ago
										 
									 | 
							
								      cached[ myLocal + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE * 3 ];
							 | 
						||
| 
								 | 
							
								      cached[ myLocal + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE * 3 + 1];
							 | 
						||
| 
								 | 
							
								      cached[ myLocal + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE * 3 + 2];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if ( get_local_id(1) < HALF_FILTER_SIZE )
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								      cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 ];
							 | 
						||
| 
								 | 
							
								      cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
							 | 
						||
| 
								 | 
							
								      cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
							 | 
						||
| 
								 | 
							
								      if (localColOffset > 0)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								        cached[ get_local_id(1) * localRowLen + localColOffset ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
							 | 
						||
| 
								 | 
							
								        cached[ get_local_id(1) * localRowLen + localColOffset ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
							 | 
						||
| 
								 | 
							
								        cached[ get_local_id(1) * localRowLen + localColOffset ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    else if ( get_local_id(1) >= get_local_size(1) -HALF_FILTER_SIZE )
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								      int offset = ( get_local_id(1) + TWICE_HALF_FILTER_SIZE ) * localRowLen;
							 | 
						||
| 
								 | 
							
								      cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 ];
							 | 
						||
| 
								 | 
							
								      cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
							 | 
						||
| 
								 | 
							
								      cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
							 | 
						||
| 
								 | 
							
								      if (localColOffset > 0)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								        cached[ offset + localColOffset ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
							 | 
						||
| 
								 | 
							
								        cached[ offset + localColOffset ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
							 | 
						||
| 
								 | 
							
								        cached[ offset + localColOffset ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // sync
							 | 
						||
| 
								 | 
							
								    barrier(CLK_LOCAL_MEM_FENCE);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // perform convolution
							 | 
						||
| 
								 | 
							
								    int fIndex = 0;
							 | 
						||
| 
								 | 
							
								    short sum = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    for (int r = -HALF_FILTER_SIZE; r <= HALF_FILTER_SIZE; r++)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								      int curRow = r * localRowLen;
							 | 
						||
| 
								 | 
							
								      for (int c = -HALF_FILTER_SIZE; c <= HALF_FILTER_SIZE; c++, fIndex++)
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								        if (!FLIP_RB){
							 | 
						||
| 
								 | 
							
								          // sum += dot(rgb_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
							 | 
						||
| 
								 | 
							
								          sum += (cached[ myLocal + curRow + c ].x / 3 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 9) * filter[ fIndex ];
							 | 
						||
| 
								 | 
							
								        } else {
							 | 
						||
| 
								 | 
							
								          // sum += dot(bgr_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
							 | 
						||
| 
								 | 
							
								          sum += (cached[ myLocal + curRow + c ].x / 9 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 3) * filter[ fIndex ];
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    output[my] = sum;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								}
							 |