From aada0e9a66bfb8fe6c05bfbf3b058baef3bd3881 Mon Sep 17 00:00:00 2001 From: ZwX1616 Date: Sat, 2 May 2020 21:40:43 -0700 Subject: [PATCH] bad AF state logging (#1365) old-commit-hash: 968e2585cc7e2a580fa351a75ceb02c1fe02c409 --- release/files_common | 5 + selfdrive/camerad/SConscript | 1 + selfdrive/camerad/cameras/camera_qcom.c | 2 + selfdrive/camerad/imgproc/conv.cl | 110 +++++++++++++++++++ selfdrive/camerad/imgproc/pool.cl | 34 ++++++ selfdrive/camerad/imgproc/utils.cc | 44 ++++++++ selfdrive/camerad/imgproc/utils.h | 30 ++++++ selfdrive/camerad/main.cc | 136 +++++++++++++++++++++++- 8 files changed, 358 insertions(+), 4 deletions(-) create mode 100644 selfdrive/camerad/imgproc/conv.cl create mode 100644 selfdrive/camerad/imgproc/pool.cl create mode 100644 selfdrive/camerad/imgproc/utils.cc create mode 100644 selfdrive/camerad/imgproc/utils.h diff --git a/release/files_common b/release/files_common index 4a666b6b0b..b2791c4d46 100644 --- a/release/files_common +++ b/release/files_common @@ -355,6 +355,11 @@ selfdrive/camerad/transforms/rgb_to_yuv.h selfdrive/camerad/transforms/rgb_to_yuv.cl selfdrive/camerad/transforms/rgb_to_yuv_test.cc +selfdrive/camerad/imgproc/conv.cl +selfdrive/camerad/imgproc/pool.cl +selfdrive/camerad/imgproc/utils.cc +selfdrive/camerad/imgproc/utils.h + selfdrive/modeld/SConscript selfdrive/modeld/modeld.cc selfdrive/modeld/dmonitoringmodeld.cc diff --git a/selfdrive/camerad/SConscript b/selfdrive/camerad/SConscript index 374ae139c5..405be689f1 100644 --- a/selfdrive/camerad/SConscript +++ b/selfdrive/camerad/SConscript @@ -26,5 +26,6 @@ env.SharedLibrary('snapshot/visionipc', env.Program('camerad', [ 'main.cc', 'transforms/rgb_to_yuv.c', + 'imgproc/utils.cc', cameras, ], LIBS=libs) diff --git a/selfdrive/camerad/cameras/camera_qcom.c b/selfdrive/camerad/cameras/camera_qcom.c index 1d1396d7bf..b7b951f9ca 100644 --- a/selfdrive/camerad/cameras/camera_qcom.c +++ b/selfdrive/camerad/cameras/camera_qcom.c @@ -1719,6 +1719,8 @@ static void parse_autofocus(CameraState *s, uint8_t *d) { for (int i = 0; i < NUM_FOCUS; i++) { int doff = i*5+5; s->confidence[i] = d[doff]; + // this should just be a 10-bit signed int instead of 11 + // TODO: write it in a nicer way int16_t focus_t = (d[doff+1] << 3) | (d[doff+2] >> 5); if (focus_t >= 1024) focus_t = -(2048-focus_t); s->focus[i] = focus_t; diff --git a/selfdrive/camerad/imgproc/conv.cl b/selfdrive/camerad/imgproc/conv.cl new file mode 100644 index 0000000000..f92d356705 --- /dev/null +++ b/selfdrive/camerad/imgproc/conv.cl @@ -0,0 +1,110 @@ +// const __constant float3 rgb_weights = (0.299, 0.587, 0.114); // opencv rgb2gray weights +// const __constant float3 bgr_weights = (0.114, 0.587, 0.299); // bgr2gray weights + +// convert input rgb image to single channel then conv +__kernel void rgb2gray_conv2d( + const __global uchar * input, + __global short * output, + __constant short * filter, + __local uchar3 * cached +) +{ + const int rowOffset = get_global_id(1) * IMAGE_W; + const int my = get_global_id(0) + rowOffset; + + const int localRowLen = TWICE_HALF_FILTER_SIZE + get_local_size(0); + const int localRowOffset = ( get_local_id(1) + HALF_FILTER_SIZE ) * localRowLen; + const int myLocal = localRowOffset + get_local_id(0) + HALF_FILTER_SIZE; + + // cache local pixels + cached[ myLocal ].x = input[ my * 3 ]; // r + cached[ myLocal ].y = input[ my * 3 + 1]; // g + cached[ myLocal ].z = input[ my * 3 + 2]; // b + + // pad + if ( + get_global_id(0) < HALF_FILTER_SIZE || + get_global_id(0) > IMAGE_W - HALF_FILTER_SIZE - 1 || + get_global_id(1) < HALF_FILTER_SIZE || + get_global_id(1) > IMAGE_H - HALF_FILTER_SIZE - 1 + ) + { + barrier(CLK_LOCAL_MEM_FENCE); + return; + } + else + { + int localColOffset = -1; + int globalColOffset = -1; + + // cache extra + if ( get_local_id(0) < HALF_FILTER_SIZE ) + { + localColOffset = get_local_id(0); + globalColOffset = -HALF_FILTER_SIZE; + + cached[ localRowOffset + get_local_id(0) ].x = input[ my * 3 - HALF_FILTER_SIZE * 3 ]; + cached[ localRowOffset + get_local_id(0) ].y = input[ my * 3 - HALF_FILTER_SIZE * 3 + 1]; + cached[ localRowOffset + get_local_id(0) ].z = input[ my * 3 - HALF_FILTER_SIZE * 3 + 2]; + } + else if ( get_local_id(0) >= get_local_size(0) - HALF_FILTER_SIZE ) + { + localColOffset = get_local_id(0) + TWICE_HALF_FILTER_SIZE; + globalColOffset = HALF_FILTER_SIZE; + + cached[ myLocal + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE * 3 ]; + cached[ myLocal + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE * 3 + 1]; + cached[ myLocal + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE * 3 + 2]; + } + + + if ( get_local_id(1) < HALF_FILTER_SIZE ) + { + cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 ]; + cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 1]; + cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 2]; + if (localColOffset > 0) + { + cached[ get_local_id(1) * localRowLen + localColOffset ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3]; + cached[ get_local_id(1) * localRowLen + localColOffset ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1]; + cached[ get_local_id(1) * localRowLen + localColOffset ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2]; + } + } + else if ( get_local_id(1) >= get_local_size(1) -HALF_FILTER_SIZE ) + { + int offset = ( get_local_id(1) + TWICE_HALF_FILTER_SIZE ) * localRowLen; + cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 ]; + cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 1]; + cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 2]; + if (localColOffset > 0) + { + cached[ offset + localColOffset ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3]; + cached[ offset + localColOffset ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1]; + cached[ offset + localColOffset ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2]; + } + } + + // sync + barrier(CLK_LOCAL_MEM_FENCE); + + // perform convolution + int fIndex = 0; + short sum = 0; + + for (int r = -HALF_FILTER_SIZE; r <= HALF_FILTER_SIZE; r++) + { + int curRow = r * localRowLen; + for (int c = -HALF_FILTER_SIZE; c <= HALF_FILTER_SIZE; c++, fIndex++) + { + if (!FLIP_RB){ + // sum += dot(rgb_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ]; + sum += (cached[ myLocal + curRow + c ].x / 3 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 9) * filter[ fIndex ]; + } else { + // sum += dot(bgr_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ]; + sum += (cached[ myLocal + curRow + c ].x / 9 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 3) * filter[ fIndex ]; + } + } + } + output[my] = sum; + } +} \ No newline at end of file diff --git a/selfdrive/camerad/imgproc/pool.cl b/selfdrive/camerad/imgproc/pool.cl new file mode 100644 index 0000000000..3ba86ae24e --- /dev/null +++ b/selfdrive/camerad/imgproc/pool.cl @@ -0,0 +1,34 @@ +// calculate variance in each subregion +__kernel void var_pool( + const __global char * input, + __global ushort * output // should not be larger than 128*128 so uint16 +) +{ + const int xidx = get_global_id(0) + ROI_X_MIN; + const int yidx = get_global_id(1) + ROI_Y_MIN; + + const int size = X_PITCH * Y_PITCH; + + float fsum = 0; + char mean, max; + + for (int i = 0; i < size; i++) { + int x_offset = i % X_PITCH; + int y_offset = i / X_PITCH; + fsum += input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]; + max = input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]>max ? input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]:max; + } + + mean = convert_char_rte(fsum / size); + + float fvar = 0; + for (int i = 0; i < size; i++) { + int x_offset = i % X_PITCH; + int y_offset = i / X_PITCH; + fvar += (input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X] - mean) * (input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X] - mean); + } + + fvar = fvar / size; + + output[(xidx-ROI_X_MIN)+(yidx-ROI_Y_MIN)*(ROI_X_MAX-ROI_X_MIN+1)] = convert_ushort_rte(5 * fvar + convert_float_rte(max)); +} \ No newline at end of file diff --git a/selfdrive/camerad/imgproc/utils.cc b/selfdrive/camerad/imgproc/utils.cc new file mode 100644 index 0000000000..0b36d68637 --- /dev/null +++ b/selfdrive/camerad/imgproc/utils.cc @@ -0,0 +1,44 @@ +#include "utils.h" +#include +#include + +// calculate score based on laplacians in one area +void get_lapmap_one(int16_t *lap, uint16_t *res, int x_pitch, int y_pitch) { + int size = x_pitch * y_pitch; + // avg and max of roi + float fsum = 0; + int16_t mean, max; + max = 0; + + for (int i = 0; i < size; i++) { + int x_offset = i % x_pitch; + int y_offset = i / x_pitch; + fsum += lap[x_offset + y_offset*x_pitch]; + max = std::max(lap[x_offset + y_offset*x_pitch], max); + } + + mean = fsum / size; + + // var of roi + float fvar = 0; + for (int i = 0; i < size; i++) { + int x_offset = i % x_pitch; + int y_offset = i / x_pitch; + fvar += (float)((lap[x_offset + y_offset*x_pitch] - mean) * (lap[x_offset + y_offset*x_pitch] - mean)); + } + + fvar = fvar / size; + + *res = std::min(5 * fvar + max, (float)65535); +} + +bool is_blur(uint16_t *lapmap) { + int n_roi = (ROI_X_MAX - ROI_X_MIN + 1) * (ROI_Y_MAX - ROI_Y_MIN + 1); + float bad_sum = 0; + for (int i = 0; i < n_roi; i++) { + if (*(lapmap + i) < LM_THRESH) { + bad_sum += 1/(float)n_roi; + } + } + return (bad_sum > LM_PREC_THRESH); +} \ No newline at end of file diff --git a/selfdrive/camerad/imgproc/utils.h b/selfdrive/camerad/imgproc/utils.h new file mode 100644 index 0000000000..f77a456912 --- /dev/null +++ b/selfdrive/camerad/imgproc/utils.h @@ -0,0 +1,30 @@ +#ifndef IMGPROC_UTILS +#define IMGPROC_UTILS + +#include + +#define NUM_SEGMENTS_X 8 +#define NUM_SEGMENTS_Y 6 + +#define ROI_X_MIN 1 +#define ROI_X_MAX 6 +#define ROI_Y_MIN 2 +#define ROI_Y_MAX 3 + +#define LM_THRESH 222 +#define LM_PREC_THRESH 0.9 + +// only apply to QCOM +#define FULL_STRIDE_X 1280 +#define FULL_STRIDE_Y 896 + +#define CONV_LOCAL_WORKSIZE 16 + +const int16_t lapl_conv_krnl[9] = {0, 1, 0, + 1, -4, 1, + 0, 1, 0}; + +void get_lapmap_one(int16_t *lap, uint16_t *res, int x_pitch, int y_pitch); +bool is_blur(uint16_t *lapmap); + +#endif \ No newline at end of file diff --git a/selfdrive/camerad/main.cc b/selfdrive/camerad/main.cc index ccbd9874b8..95b57de19e 100644 --- a/selfdrive/camerad/main.cc +++ b/selfdrive/camerad/main.cc @@ -23,6 +23,7 @@ #include "messaging.hpp" #include "transforms/rgb_to_yuv.h" +#include "imgproc/utils.h" #include "clutil.h" #include "bufs.h" @@ -78,6 +79,14 @@ struct VisionState { cl_kernel krnl_debayer_rear; cl_kernel krnl_debayer_front; + cl_program prg_rgb_laplacian; + cl_kernel krnl_rgb_laplacian; + + int conv_cl_localMemSize; + size_t conv_cl_globalWorkSize[2]; + size_t conv_cl_localWorkSize[2]; + size_t pool_cl_globalWorkSize[2]; + // processing TBuffer ui_tb; TBuffer ui_front_tb; @@ -110,6 +119,8 @@ struct VisionState { int rgb_width, rgb_height, rgb_stride; VisionBuf rgb_bufs[UI_BUF_COUNT]; cl_mem rgb_bufs_cl[UI_BUF_COUNT]; + cl_mem rgb_conv_roi_cl, rgb_conv_result_cl, rgb_conv_filter_cl; + uint16_t lapres[(ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1)]; size_t rgb_front_buf_size; int rgb_front_width, rgb_front_height, rgb_front_stride; @@ -177,9 +188,9 @@ void* frontview_thread(void *arg) { cl_event debayer_event; if (s->cameras.front.ci.bayer) { err = clSetKernelArg(s->krnl_debayer_front, 0, sizeof(cl_mem), &s->front_camera_bufs_cl[buf_idx]); - cl_check_error(err); + assert(err == 0); err = clSetKernelArg(s->krnl_debayer_front, 1, sizeof(cl_mem), &s->rgb_front_bufs_cl[rgb_idx]); - cl_check_error(err); + assert(err == 0); float digital_gain = 1.0; err = clSetKernelArg(s->krnl_debayer_front, 2, sizeof(float), &digital_gain); assert(err == 0); @@ -390,9 +401,9 @@ void* processing_thread(void *arg) { cl_event debayer_event; if (s->cameras.rear.ci.bayer) { err = clSetKernelArg(s->krnl_debayer_rear, 0, sizeof(cl_mem), &s->camera_bufs_cl[buf_idx]); - cl_check_error(err); + assert(err == 0); err = clSetKernelArg(s->krnl_debayer_rear, 1, sizeof(cl_mem), &s->rgb_bufs_cl[rgb_idx]); - cl_check_error(err); + assert(err == 0); err = clSetKernelArg(s->krnl_debayer_rear, 2, sizeof(float), &s->cameras.rear.digital_gain); assert(err == 0); @@ -416,6 +427,74 @@ void* processing_thread(void *arg) { visionbuf_sync(&s->rgb_bufs[rgb_idx], VISIONBUF_SYNC_FROM_DEVICE); +#ifdef QCOM + /*FILE *dump_rgb_file = fopen("/tmp/process_dump.rgb", "wb"); + fwrite(s->rgb_bufs[rgb_idx].addr, s->rgb_bufs[rgb_idx].len, sizeof(uint8_t), dump_rgb_file); + fclose(dump_rgb_file); + printf("ORIGINAL SAVED!!\n");*/ + + /*double t10 = millis_since_boot();*/ + + // cache rgb roi and write to cl + uint8_t *rgb_roi_buf = new uint8_t[(s->rgb_width/NUM_SEGMENTS_X)*(s->rgb_height/NUM_SEGMENTS_Y)*3]; + int roi_id = cnt % ((ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1)); // rolling roi + int roi_x_offset = roi_id % (ROI_X_MAX-ROI_X_MIN+1); + int roi_y_offset = roi_id / (ROI_X_MAX-ROI_X_MIN+1); + + for (int r=0;r<(s->rgb_height/NUM_SEGMENTS_Y);r++) { + memcpy(rgb_roi_buf + r * (s->rgb_width/NUM_SEGMENTS_X) * 3, + (uint8_t *) s->rgb_bufs[rgb_idx].addr + \ + (ROI_Y_MIN + roi_y_offset) * s->rgb_height/NUM_SEGMENTS_Y * FULL_STRIDE_X * 3 + \ + (ROI_X_MIN + roi_x_offset) * s->rgb_width/NUM_SEGMENTS_X * 3 + r * FULL_STRIDE_X * 3, + s->rgb_width/NUM_SEGMENTS_X * 3); + } + + err = clEnqueueWriteBuffer (q, s->rgb_conv_roi_cl, true, 0, + s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), rgb_roi_buf, 0, 0, 0); + assert(err == 0); + + /*double t11 = millis_since_boot(); + printf("cache time: %f ms\n", t11 - t10); + t10 = millis_since_boot();*/ + + err = clSetKernelArg(s->krnl_rgb_laplacian, 0, sizeof(cl_mem), (void *) &s->rgb_conv_roi_cl); + assert(err == 0); + err = clSetKernelArg(s->krnl_rgb_laplacian, 1, sizeof(cl_mem), (void *) &s->rgb_conv_result_cl); + assert(err == 0); + err = clSetKernelArg(s->krnl_rgb_laplacian, 2, sizeof(cl_mem), (void *) &s->rgb_conv_filter_cl); + assert(err == 0); + err = clSetKernelArg(s->krnl_rgb_laplacian, 3, s->conv_cl_localMemSize, 0); + assert(err == 0); + + cl_event conv_event; + err = clEnqueueNDRangeKernel(q, s->krnl_rgb_laplacian, 2, NULL, + s->conv_cl_globalWorkSize, s->conv_cl_localWorkSize, 0, 0, &conv_event); + assert(err == 0); + clWaitForEvents(1, &conv_event); + clReleaseEvent(conv_event); + + int16_t *conv_result = new int16_t[(s->rgb_width/NUM_SEGMENTS_X)*(s->rgb_height/NUM_SEGMENTS_Y)]; + err = clEnqueueReadBuffer(q, s->rgb_conv_result_cl, true, 0, + s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * sizeof(int16_t), conv_result, 0, 0, 0); + assert(err == 0); + + /*t11 = millis_since_boot(); + printf("conv time: %f ms\n", t11 - t10); + t10 = millis_since_boot();*/ + + get_lapmap_one(conv_result, &s->lapres[roi_id], s->rgb_width/NUM_SEGMENTS_X, s->rgb_height/NUM_SEGMENTS_Y); + + /*t11 = millis_since_boot(); + printf("pool time: %f ms\n", t11 - t10); + t10 = millis_since_boot();*/ + + delete [] rgb_roi_buf; + delete [] conv_result; + + /*t11 = millis_since_boot(); + printf("process time: %f ms\n ----- \n", t11 - t10); + t10 = millis_since_boot();*/ +#endif double t2 = millis_since_boot(); @@ -465,6 +544,8 @@ void* processing_thread(void *arg) { kj::ArrayPtr focus_confs(&s->cameras.rear.confidence[0], NUM_FOCUS); framed.setFocusVal(focus_vals); framed.setFocusConf(focus_confs); + kj::ArrayPtr sharpness_score(&s->lapres[0], (ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1)); + framed.setSharpnessScore(sharpness_score); #endif // TODO: add this back @@ -907,6 +988,34 @@ cl_program build_debayer_program(VisionState *s, return CLU_LOAD_FROM_FILE(s->context, s->device_id, "cameras/debayer.cl", args); } +cl_program build_conv_program(VisionState *s, + int image_w, int image_h, + int filter_size) { + char args[4096]; + snprintf(args, sizeof(args), + "-cl-fast-relaxed-math -cl-denorms-are-zero " + "-DIMAGE_W=%d -DIMAGE_H=%d -DFLIP_RB=%d " + "-DFILTER_SIZE=%d -DHALF_FILTER_SIZE=%d -DTWICE_HALF_FILTER_SIZE=%d -DHALF_FILTER_SIZE_IMAGE_W=%d", + image_w, image_h, 1, + filter_size, filter_size/2, (filter_size/2)*2, (filter_size/2)*image_w); + return CLU_LOAD_FROM_FILE(s->context, s->device_id, "imgproc/conv.cl", args); +} + +cl_program build_pool_program(VisionState *s, + int full_stride_x, + int x_pitch, int y_pitch, + int roi_x_min, int roi_x_max, + int roi_y_min, int roi_y_max) { + char args[4096]; + snprintf(args, sizeof(args), + "-cl-fast-relaxed-math -cl-denorms-are-zero " + "-DFULL_STRIDE_X=%d -DX_PITCH=%d -DY_PITCH=%d " + "-DROI_X_MIN=%d -DROI_X_MAX=%d -DROI_Y_MIN=%d -DROI_Y_MAX=%d", + full_stride_x, x_pitch, y_pitch, + roi_x_min, roi_x_max, roi_y_min, roi_y_max); + return CLU_LOAD_FROM_FILE(s->context, s->device_id, "imgproc/pool.cl", args); +} + void cl_init(VisionState *s) { int err; cl_platform_id platform_id = NULL; @@ -1049,6 +1158,25 @@ void init_buffers(VisionState *s) { assert(err == 0); } + s->prg_rgb_laplacian = build_conv_program(s, s->rgb_width/NUM_SEGMENTS_X, s->rgb_height/NUM_SEGMENTS_Y, + 3); + s->krnl_rgb_laplacian = clCreateKernel(s->prg_rgb_laplacian, "rgb2gray_conv2d", &err); + assert(err == 0); + s->rgb_conv_roi_cl = clCreateBuffer(s->context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, + s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), NULL, NULL); + s->rgb_conv_result_cl = clCreateBuffer(s->context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, + s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * sizeof(int16_t), NULL, NULL); + s->rgb_conv_filter_cl = clCreateBuffer(s->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + 9 * sizeof(int16_t), (void*)&lapl_conv_krnl, NULL); + s->conv_cl_localMemSize = ( CONV_LOCAL_WORKSIZE + 2 * (3 / 2) ) * ( CONV_LOCAL_WORKSIZE + 2 * (3 / 2) ); + s->conv_cl_localMemSize *= 3 * sizeof(uint8_t); + s->conv_cl_globalWorkSize[0] = s->rgb_width/NUM_SEGMENTS_X; + s->conv_cl_globalWorkSize[1] = s->rgb_height/NUM_SEGMENTS_Y; + s->conv_cl_localWorkSize[0] = CONV_LOCAL_WORKSIZE; + s->conv_cl_localWorkSize[1] = CONV_LOCAL_WORKSIZE; + + for (int i=0; i<(ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1); i++) {s->lapres[i] = 16160;} + rgb_to_yuv_init(&s->rgb_to_yuv_state, s->context, s->device_id, s->yuv_width, s->yuv_height, s->rgb_stride); rgb_to_yuv_init(&s->front_rgb_to_yuv_state, s->context, s->device_id, s->yuv_front_width, s->yuv_front_height, s->rgb_front_stride); }