camerad cleanup (#30573)
* misc cleanup
* rm those
* rm utils
* fix build
* rm pool
* little more
* goodbye imgproc
old-commit-hash: e34ee43eea
chrysler-long2
parent
a9f5dcf4cf
commit
5acd765c1e
15 changed files with 56 additions and 367 deletions
@ -1,110 +0,0 @@ |
||||
// const __constant float3 rgb_weights = (0.299, 0.587, 0.114); // opencv rgb2gray weights |
||||
// const __constant float3 bgr_weights = (0.114, 0.587, 0.299); // bgr2gray weights |
||||
|
||||
// convert input rgb image to single channel then conv |
||||
__kernel void rgb2gray_conv2d( |
||||
const __global uchar * input, |
||||
__global short * output, |
||||
__constant short * filter, |
||||
__local uchar3 * cached |
||||
) |
||||
{ |
||||
const int rowOffset = get_global_id(1) * IMAGE_W; |
||||
const int my = get_global_id(0) + rowOffset; |
||||
|
||||
const int localRowLen = TWICE_HALF_FILTER_SIZE + get_local_size(0); |
||||
const int localRowOffset = ( get_local_id(1) + HALF_FILTER_SIZE ) * localRowLen; |
||||
const int myLocal = localRowOffset + get_local_id(0) + HALF_FILTER_SIZE; |
||||
|
||||
// cache local pixels |
||||
cached[ myLocal ].x = input[ my * 3 ]; // r |
||||
cached[ myLocal ].y = input[ my * 3 + 1]; // g |
||||
cached[ myLocal ].z = input[ my * 3 + 2]; // b |
||||
|
||||
// pad |
||||
if ( |
||||
get_global_id(0) < HALF_FILTER_SIZE || |
||||
get_global_id(0) > IMAGE_W - HALF_FILTER_SIZE - 1 || |
||||
get_global_id(1) < HALF_FILTER_SIZE || |
||||
get_global_id(1) > IMAGE_H - HALF_FILTER_SIZE - 1 |
||||
) |
||||
{ |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
return; |
||||
} |
||||
else |
||||
{ |
||||
int localColOffset = -1; |
||||
int globalColOffset = -1; |
||||
|
||||
// cache extra |
||||
if ( get_local_id(0) < HALF_FILTER_SIZE ) |
||||
{ |
||||
localColOffset = get_local_id(0); |
||||
globalColOffset = -HALF_FILTER_SIZE; |
||||
|
||||
cached[ localRowOffset + get_local_id(0) ].x = input[ my * 3 - HALF_FILTER_SIZE * 3 ]; |
||||
cached[ localRowOffset + get_local_id(0) ].y = input[ my * 3 - HALF_FILTER_SIZE * 3 + 1]; |
||||
cached[ localRowOffset + get_local_id(0) ].z = input[ my * 3 - HALF_FILTER_SIZE * 3 + 2]; |
||||
} |
||||
else if ( get_local_id(0) >= get_local_size(0) - HALF_FILTER_SIZE ) |
||||
{ |
||||
localColOffset = get_local_id(0) + TWICE_HALF_FILTER_SIZE; |
||||
globalColOffset = HALF_FILTER_SIZE; |
||||
|
||||
cached[ myLocal + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE * 3 ]; |
||||
cached[ myLocal + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE * 3 + 1]; |
||||
cached[ myLocal + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE * 3 + 2]; |
||||
} |
||||
|
||||
|
||||
if ( get_local_id(1) < HALF_FILTER_SIZE ) |
||||
{ |
||||
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 ]; |
||||
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 1]; |
||||
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 2]; |
||||
if (localColOffset > 0) |
||||
{ |
||||
cached[ get_local_id(1) * localRowLen + localColOffset ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3]; |
||||
cached[ get_local_id(1) * localRowLen + localColOffset ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1]; |
||||
cached[ get_local_id(1) * localRowLen + localColOffset ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2]; |
||||
} |
||||
} |
||||
else if ( get_local_id(1) >= get_local_size(1) -HALF_FILTER_SIZE ) |
||||
{ |
||||
int offset = ( get_local_id(1) + TWICE_HALF_FILTER_SIZE ) * localRowLen; |
||||
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 ]; |
||||
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 1]; |
||||
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 2]; |
||||
if (localColOffset > 0) |
||||
{ |
||||
cached[ offset + localColOffset ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3]; |
||||
cached[ offset + localColOffset ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1]; |
||||
cached[ offset + localColOffset ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2]; |
||||
} |
||||
} |
||||
|
||||
// sync |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
// perform convolution |
||||
int fIndex = 0; |
||||
short sum = 0; |
||||
|
||||
for (int r = -HALF_FILTER_SIZE; r <= HALF_FILTER_SIZE; r++) |
||||
{ |
||||
int curRow = r * localRowLen; |
||||
for (int c = -HALF_FILTER_SIZE; c <= HALF_FILTER_SIZE; c++, fIndex++) |
||||
{ |
||||
if (!FLIP_RB){ |
||||
// sum += dot(rgb_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ]; |
||||
sum += (cached[ myLocal + curRow + c ].x / 3 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 9) * filter[ fIndex ]; |
||||
} else { |
||||
// sum += dot(bgr_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ]; |
||||
sum += (cached[ myLocal + curRow + c ].x / 9 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 3) * filter[ fIndex ]; |
||||
} |
||||
} |
||||
} |
||||
output[my] = sum; |
||||
} |
||||
} |
@ -1,34 +0,0 @@ |
||||
// calculate variance in each subregion |
||||
__kernel void var_pool( |
||||
const __global char * input, |
||||
__global ushort * output // should not be larger than 128*128 so uint16 |
||||
) |
||||
{ |
||||
const int xidx = get_global_id(0) + ROI_X_MIN; |
||||
const int yidx = get_global_id(1) + ROI_Y_MIN; |
||||
|
||||
const int size = X_PITCH * Y_PITCH; |
||||
|
||||
float fsum = 0; |
||||
char mean, max; |
||||
|
||||
for (int i = 0; i < size; i++) { |
||||
int x_offset = i % X_PITCH; |
||||
int y_offset = i / X_PITCH; |
||||
fsum += input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]; |
||||
max = input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]>max ? input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]:max; |
||||
} |
||||
|
||||
mean = convert_char_rte(fsum / size); |
||||
|
||||
float fvar = 0; |
||||
for (int i = 0; i < size; i++) { |
||||
int x_offset = i % X_PITCH; |
||||
int y_offset = i / X_PITCH; |
||||
fvar += (input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X] - mean) * (input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X] - mean); |
||||
} |
||||
|
||||
fvar = fvar / size; |
||||
|
||||
output[(xidx-ROI_X_MIN)+(yidx-ROI_Y_MIN)*(ROI_X_MAX-ROI_X_MIN+1)] = convert_ushort_rte(5 * fvar + convert_float_rte(max)); |
||||
} |
@ -1,106 +0,0 @@ |
||||
#include "system/camerad/imgproc/utils.h" |
||||
|
||||
#include <algorithm> |
||||
#include <cassert> |
||||
#include <cstdio> |
||||
#include <cmath> |
||||
#include <cstring> |
||||
|
||||
const int16_t lapl_conv_krnl[9] = {0, 1, 0, |
||||
1, -4, 1, |
||||
0, 1, 0}; |
||||
|
||||
// calculate score based on laplacians in one area
|
||||
uint16_t get_lapmap_one(const int16_t *lap, int x_pitch, int y_pitch) { |
||||
const int size = x_pitch * y_pitch; |
||||
// avg and max of roi
|
||||
int16_t max = 0; |
||||
int sum = 0; |
||||
for (int i = 0; i < size; ++i) { |
||||
const int16_t v = lap[i]; |
||||
sum += v; |
||||
if (v > max) max = v; |
||||
} |
||||
|
||||
const int16_t mean = sum / size; |
||||
|
||||
// var of roi
|
||||
int var = 0; |
||||
for (int i = 0; i < size; ++i) { |
||||
var += std::pow(lap[i] - mean, 2); |
||||
} |
||||
|
||||
const float fvar = (float)var / size; |
||||
return std::min(5 * fvar + max, (float)65535); |
||||
} |
||||
|
||||
bool is_blur(const uint16_t *lapmap, const size_t size) { |
||||
float bad_sum = 0; |
||||
for (int i = 0; i < size; i++) { |
||||
if (lapmap[i] < LM_THRESH) { |
||||
bad_sum += 1 / (float)size; |
||||
} |
||||
} |
||||
return (bad_sum > LM_PREC_THRESH); |
||||
} |
||||
|
||||
static cl_program build_conv_program(cl_device_id device_id, cl_context context, int image_w, int image_h, int filter_size) { |
||||
char args[4096]; |
||||
snprintf(args, sizeof(args), |
||||
"-cl-fast-relaxed-math -cl-denorms-are-zero " |
||||
"-DIMAGE_W=%d -DIMAGE_H=%d -DFLIP_RB=%d " |
||||
"-DFILTER_SIZE=%d -DHALF_FILTER_SIZE=%d -DTWICE_HALF_FILTER_SIZE=%d -DHALF_FILTER_SIZE_IMAGE_W=%d", |
||||
image_w, image_h, 1, |
||||
filter_size, filter_size/2, (filter_size/2)*2, (filter_size/2)*image_w); |
||||
return cl_program_from_file(context, device_id, "imgproc/conv.cl", args); |
||||
} |
||||
|
||||
LapConv::LapConv(cl_device_id device_id, cl_context ctx, int rgb_width, int rgb_height, int rgb_stride, int filter_size) |
||||
: width(rgb_width / NUM_SEGMENTS_X), height(rgb_height / NUM_SEGMENTS_Y), rgb_stride(rgb_stride), |
||||
roi_buf(width * height * 3), result_buf(width * height) { |
||||
|
||||
prg = build_conv_program(device_id, ctx, width, height, filter_size); |
||||
krnl = CL_CHECK_ERR(clCreateKernel(prg, "rgb2gray_conv2d", &err)); |
||||
// TODO: Removed CL_MEM_SVM_FINE_GRAIN_BUFFER, confirm it doesn't matter
|
||||
roi_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, roi_buf.size() * sizeof(roi_buf[0]), NULL, &err)); |
||||
result_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, result_buf.size() * sizeof(result_buf[0]), NULL, &err)); |
||||
filter_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, |
||||
9 * sizeof(int16_t), (void *)&lapl_conv_krnl, &err)); |
||||
} |
||||
|
||||
LapConv::~LapConv() { |
||||
CL_CHECK(clReleaseMemObject(roi_cl)); |
||||
CL_CHECK(clReleaseMemObject(result_cl)); |
||||
CL_CHECK(clReleaseMemObject(filter_cl)); |
||||
CL_CHECK(clReleaseKernel(krnl)); |
||||
CL_CHECK(clReleaseProgram(prg)); |
||||
} |
||||
|
||||
uint16_t LapConv::Update(cl_command_queue q, const uint8_t *rgb_buf, const int roi_id) { |
||||
// sharpness scores
|
||||
const int x_offset = ROI_X_MIN + roi_id % (ROI_X_MAX - ROI_X_MIN + 1); |
||||
const int y_offset = ROI_Y_MIN + roi_id / (ROI_X_MAX - ROI_X_MIN + 1); |
||||
|
||||
const uint8_t *rgb_offset = rgb_buf + y_offset * height * rgb_stride + x_offset * width * 3; |
||||
for (int i = 0; i < height; ++i) { |
||||
memcpy(&roi_buf[i * width * 3], &rgb_offset[i * rgb_stride], width * 3); |
||||
} |
||||
|
||||
constexpr int local_mem_size = (CONV_LOCAL_WORKSIZE + 2 * (3 / 2)) * (CONV_LOCAL_WORKSIZE + 2 * (3 / 2)) * (3 * sizeof(uint8_t)); |
||||
const size_t global_work_size[] = {(size_t)width, (size_t)height}; |
||||
const size_t local_work_size[] = {CONV_LOCAL_WORKSIZE, CONV_LOCAL_WORKSIZE}; |
||||
|
||||
CL_CHECK(clEnqueueWriteBuffer(q, roi_cl, CL_TRUE, 0, roi_buf.size() * sizeof(roi_buf[0]), roi_buf.data(), 0, 0, 0)); |
||||
CL_CHECK(clSetKernelArg(krnl, 0, sizeof(cl_mem), (void *)&roi_cl)); |
||||
CL_CHECK(clSetKernelArg(krnl, 1, sizeof(cl_mem), (void *)&result_cl)); |
||||
CL_CHECK(clSetKernelArg(krnl, 2, sizeof(cl_mem), (void *)&filter_cl)); |
||||
CL_CHECK(clSetKernelArg(krnl, 3, local_mem_size, 0)); |
||||
cl_event conv_event; |
||||
CL_CHECK(clEnqueueNDRangeKernel(q, krnl, 2, NULL, global_work_size, local_work_size, 0, 0, &conv_event)); |
||||
CL_CHECK(clWaitForEvents(1, &conv_event)); |
||||
CL_CHECK(clReleaseEvent(conv_event)); |
||||
CL_CHECK(clEnqueueReadBuffer(q, result_cl, CL_TRUE, 0, |
||||
result_buf.size() * sizeof(result_buf[0]), result_buf.data(), 0, 0, 0)); |
||||
|
||||
return get_lapmap_one(result_buf.data(), width, height); |
||||
} |
@ -1,37 +0,0 @@ |
||||
#pragma once |
||||
|
||||
#include <cstddef> |
||||
#include <cstdint> |
||||
#include <vector> |
||||
|
||||
#include "common/clutil.h" |
||||
|
||||
#define NUM_SEGMENTS_X 8 |
||||
#define NUM_SEGMENTS_Y 6 |
||||
|
||||
#define ROI_X_MIN 1 |
||||
#define ROI_X_MAX 6 |
||||
#define ROI_Y_MIN 2 |
||||
#define ROI_Y_MAX 3 |
||||
|
||||
#define LM_THRESH 120 |
||||
#define LM_PREC_THRESH 0.9 // 90 perc is blur
|
||||
#define CONV_LOCAL_WORKSIZE 16 |
||||
|
||||
class LapConv { |
||||
public: |
||||
LapConv(cl_device_id device_id, cl_context ctx, int rgb_width, int rgb_height, int rgb_stride, int filter_size); |
||||
~LapConv(); |
||||
uint16_t Update(cl_command_queue q, const uint8_t *rgb_buf, const int roi_id); |
||||
|
||||
private: |
||||
cl_mem roi_cl, result_cl, filter_cl; |
||||
cl_program prg; |
||||
cl_kernel krnl; |
||||
const int width, height; |
||||
const int rgb_stride; |
||||
std::vector<uint8_t> roi_buf; |
||||
std::vector<int16_t> result_buf; |
||||
}; |
||||
|
||||
bool is_blur(const uint16_t *lapmap, const size_t size); |
Loading…
Reference in new issue