Error checking macros for opencl (#2615)

* macro CL_CHECK&CL_CHECK_ERR

* trigger checks
old-commit-hash: 42183d913f
commatwo_master
Dean Lee 4 years ago committed by GitHub
parent 111ff3a411
commit 57138a6e0f
  1. 37
      selfdrive/camerad/cameras/camera_common.cc
  2. 48
      selfdrive/camerad/cameras/camera_qcom.cc
  3. 15
      selfdrive/camerad/cameras/camera_webcam.cc
  4. 6
      selfdrive/camerad/main.cc
  5. 25
      selfdrive/camerad/transforms/rgb_to_yuv.c
  6. 7
      selfdrive/camerad/transforms/rgb_to_yuv_test.cc
  7. 66
      selfdrive/common/clutil.c
  8. 13
      selfdrive/common/clutil.h
  9. 20
      selfdrive/common/visionbuf_cl.c
  10. 11
      selfdrive/common/visionbuf_ion.c
  11. 11
      selfdrive/modeld/modeld.cc
  12. 39
      selfdrive/modeld/models/commonmodel.cc
  13. 59
      selfdrive/modeld/transforms/loadyuv.c
  14. 134
      selfdrive/modeld/transforms/transform.cc
  15. 77
      selfdrive/modeld/visiontest.c

@ -103,23 +103,20 @@ void CameraBuf::init(cl_device_id device_id, cl_context context, CameraState *s,
yuv_bufs[i].v = yuv_bufs[i].u + (yuv_width / 2 * yuv_height / 2);
}
int err;
if (ci->bayer) {
cl_program prg_debayer = build_debayer_program(device_id, context, ci, this);
krnl_debayer = clCreateKernel(prg_debayer, "debayer10", &err);
assert(err == 0);
assert(clReleaseProgram(prg_debayer) == 0);
krnl_debayer = CL_CHECK_ERR(clCreateKernel(prg_debayer, "debayer10", &err));
CL_CHECK(clReleaseProgram(prg_debayer));
}
rgb_to_yuv_init(&rgb_to_yuv_state, context, device_id, yuv_width, yuv_height, rgb_stride);
#ifdef __APPLE__
q = clCreateCommandQueue(context, device_id, 0, &err);
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
#else
const cl_queue_properties props[] = {0}; //CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0};
q = clCreateCommandQueueWithProperties(context, device_id, props, &err);
q = CL_CHECK_ERR(clCreateCommandQueueWithProperties(context, device_id, props, &err));
#endif
assert(err == 0);
}
CameraBuf::~CameraBuf() {
@ -132,8 +129,8 @@ CameraBuf::~CameraBuf() {
for (int i = 0; i < YUV_COUNT; i++) {
visionbuf_free(&yuv_ion[i]);
}
clReleaseKernel(krnl_debayer);
clReleaseCommandQueue(q);
CL_CHECK(clReleaseKernel(krnl_debayer));
CL_CHECK(clReleaseCommandQueue(q));
}
bool CameraBuf::acquire() {
@ -156,32 +153,32 @@ bool CameraBuf::acquire() {
cl_event debayer_event;
cl_mem camrabuf_cl = camera_bufs[buf_idx].buf_cl;
if (camera_state->ci.bayer) {
assert(clSetKernelArg(krnl_debayer, 0, sizeof(cl_mem), &camrabuf_cl) == 0);
assert(clSetKernelArg(krnl_debayer, 1, sizeof(cl_mem), &cur_rgb_buf->buf_cl) == 0);
CL_CHECK(clSetKernelArg(krnl_debayer, 0, sizeof(cl_mem), &camrabuf_cl));
CL_CHECK(clSetKernelArg(krnl_debayer, 1, sizeof(cl_mem), &cur_rgb_buf->buf_cl));
#ifdef QCOM2
assert(clSetKernelArg(krnl_debayer, 2, camera_state->debayer_cl_localMemSize, 0) == 0);
assert(clEnqueueNDRangeKernel(q, krnl_debayer, 2, NULL,
CL_CHECK(clSetKernelArg(krnl_debayer, 2, camera_state->debayer_cl_localMemSize, 0));
CL_CHECK(clEnqueueNDRangeKernel(q, krnl_debayer, 2, NULL,
camera_state->debayer_cl_globalWorkSize, camera_state->debayer_cl_localWorkSize,
0, 0, &debayer_event) == 0);
0, 0, &debayer_event));
#else
float digital_gain = camera_state->digital_gain;
if ((int)digital_gain == 0) {
digital_gain = 1.0;
}
assert(clSetKernelArg(krnl_debayer, 2, sizeof(float), &digital_gain) == 0);
CL_CHECK(clSetKernelArg(krnl_debayer, 2, sizeof(float), &digital_gain));
const size_t debayer_work_size = rgb_height; // doesn't divide evenly, is this okay?
assert(clEnqueueNDRangeKernel(q, krnl_debayer, 1, NULL,
&debayer_work_size, NULL, 0, 0, &debayer_event) == 0);
CL_CHECK(clEnqueueNDRangeKernel(q, krnl_debayer, 1, NULL,
&debayer_work_size, NULL, 0, 0, &debayer_event));
#endif
} else {
assert(cur_rgb_buf->len >= frame_size);
assert(rgb_stride == camera_state->ci.frame_stride);
assert(clEnqueueCopyBuffer(q, camrabuf_cl, cur_rgb_buf->buf_cl, 0, 0,
cur_rgb_buf->len, 0, 0, &debayer_event) == 0);
CL_CHECK(clEnqueueCopyBuffer(q, camrabuf_cl, cur_rgb_buf->buf_cl, 0, 0,
cur_rgb_buf->len, 0, 0, &debayer_event));
}
clWaitForEvents(1, &debayer_event);
clReleaseEvent(debayer_event);
CL_CHECK(clReleaseEvent(debayer_event));
tbuffer_release(&camera_tb, buf_idx);
visionbuf_sync(cur_rgb_buf, VISIONBUF_SYNC_FROM_DEVICE);

@ -336,7 +336,6 @@ void cameras_init(MultiCameraState *s, cl_device_id device_id, cl_context ctx) {
s->sm_rear = new SubMaster({"sensorEvents"});
s->pm = new PubMaster({"frame", "frontFrame", "thumbnail"});
int err;
const int rgb_width = s->rear.buf.rgb_width;
const int rgb_height = s->rear.buf.rgb_height;
for (int i = 0; i < FRAME_BUF_COUNT; i++) {
@ -345,15 +344,14 @@ void cameras_init(MultiCameraState *s, cl_device_id device_id, cl_context ctx) {
s->stats_bufs[i] = visionbuf_allocate(0xb80);
}
s->prg_rgb_laplacian = build_conv_program(device_id, ctx, rgb_width/NUM_SEGMENTS_X, rgb_height/NUM_SEGMENTS_Y, 3);
s->krnl_rgb_laplacian = clCreateKernel(s->prg_rgb_laplacian, "rgb2gray_conv2d", &err);
assert(err == 0);
s->krnl_rgb_laplacian = CL_CHECK_ERR(clCreateKernel(s->prg_rgb_laplacian, "rgb2gray_conv2d", &err));
// TODO: Removed CL_MEM_SVM_FINE_GRAIN_BUFFER, confirm it doesn't matter
s->rgb_conv_roi_cl = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
rgb_width/NUM_SEGMENTS_X * rgb_height/NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), NULL, NULL);
s->rgb_conv_result_cl = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
rgb_width/NUM_SEGMENTS_X * rgb_height/NUM_SEGMENTS_Y * sizeof(int16_t), NULL, NULL);
s->rgb_conv_filter_cl = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
9 * sizeof(int16_t), (void*)&lapl_conv_krnl, NULL);
s->rgb_conv_roi_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE,
rgb_width/NUM_SEGMENTS_X * rgb_height/NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), NULL, &err));
s->rgb_conv_result_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE,
rgb_width/NUM_SEGMENTS_X * rgb_height/NUM_SEGMENTS_Y * sizeof(int16_t), NULL, &err));
s->rgb_conv_filter_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
9 * sizeof(int16_t), (void*)&lapl_conv_krnl, &err));
s->conv_cl_localMemSize = ( CONV_LOCAL_WORKSIZE + 2 * (3 / 2) ) * ( CONV_LOCAL_WORKSIZE + 2 * (3 / 2) );
s->conv_cl_localMemSize *= 3 * sizeof(uint8_t);
s->conv_cl_globalWorkSize[0] = rgb_width/NUM_SEGMENTS_X;
@ -2093,20 +2091,20 @@ void camera_process_frame(MultiCameraState *s, CameraState *c, int cnt) {
b->rgb_width/NUM_SEGMENTS_X * 3);
}
assert(clEnqueueWriteBuffer(b->q, s->rgb_conv_roi_cl, true, 0,
b->rgb_width / NUM_SEGMENTS_X * b->rgb_height / NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), s->rgb_roi_buf.get(), 0, 0, 0) == 0);
assert(clSetKernelArg(s->krnl_rgb_laplacian, 0, sizeof(cl_mem), (void *)&s->rgb_conv_roi_cl) == 0);
assert(clSetKernelArg(s->krnl_rgb_laplacian, 1, sizeof(cl_mem), (void *)&s->rgb_conv_result_cl) == 0);
assert(clSetKernelArg(s->krnl_rgb_laplacian, 2, sizeof(cl_mem), (void *)&s->rgb_conv_filter_cl) == 0);
assert(clSetKernelArg(s->krnl_rgb_laplacian, 3, s->conv_cl_localMemSize, 0) == 0);
CL_CHECK(clEnqueueWriteBuffer(b->q, s->rgb_conv_roi_cl, true, 0,
b->rgb_width / NUM_SEGMENTS_X * b->rgb_height / NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), s->rgb_roi_buf.get(), 0, 0, 0));
CL_CHECK(clSetKernelArg(s->krnl_rgb_laplacian, 0, sizeof(cl_mem), (void *)&s->rgb_conv_roi_cl));
CL_CHECK(clSetKernelArg(s->krnl_rgb_laplacian, 1, sizeof(cl_mem), (void *)&s->rgb_conv_result_cl));
CL_CHECK(clSetKernelArg(s->krnl_rgb_laplacian, 2, sizeof(cl_mem), (void *)&s->rgb_conv_filter_cl));
CL_CHECK(clSetKernelArg(s->krnl_rgb_laplacian, 3, s->conv_cl_localMemSize, 0));
cl_event conv_event;
assert(clEnqueueNDRangeKernel(b->q, s->krnl_rgb_laplacian, 2, NULL,
s->conv_cl_globalWorkSize, s->conv_cl_localWorkSize, 0, 0, &conv_event) == 0);
CL_CHECK(clEnqueueNDRangeKernel(b->q, s->krnl_rgb_laplacian, 2, NULL,
s->conv_cl_globalWorkSize, s->conv_cl_localWorkSize, 0, 0, &conv_event));
clWaitForEvents(1, &conv_event);
clReleaseEvent(conv_event);
CL_CHECK(clReleaseEvent(conv_event));
assert(clEnqueueReadBuffer(b->q, s->rgb_conv_result_cl, true, 0,
b->rgb_width / NUM_SEGMENTS_X * b->rgb_height / NUM_SEGMENTS_Y * sizeof(int16_t), s->conv_result.get(), 0, 0, 0) == 0);
CL_CHECK(clEnqueueReadBuffer(b->q, s->rgb_conv_result_cl, true, 0,
b->rgb_width / NUM_SEGMENTS_X * b->rgb_height / NUM_SEGMENTS_Y * sizeof(int16_t), s->conv_result.get(), 0, 0, 0));
get_lapmap_one(s->conv_result.get(), &s->lapres[roi_id], b->rgb_width / NUM_SEGMENTS_X, b->rgb_height / NUM_SEGMENTS_Y);
@ -2288,12 +2286,12 @@ void cameras_close(MultiCameraState *s) {
visionbuf_free(&s->focus_bufs[i]);
visionbuf_free(&s->stats_bufs[i]);
}
clReleaseMemObject(s->rgb_conv_roi_cl);
clReleaseMemObject(s->rgb_conv_result_cl);
clReleaseMemObject(s->rgb_conv_filter_cl);
CL_CHECK(clReleaseMemObject(s->rgb_conv_roi_cl));
CL_CHECK(clReleaseMemObject(s->rgb_conv_result_cl));
CL_CHECK(clReleaseMemObject(s->rgb_conv_filter_cl));
clReleaseProgram(s->prg_rgb_laplacian);
clReleaseKernel(s->krnl_rgb_laplacian);
CL_CHECK(clReleaseKernel(s->krnl_rgb_laplacian));
CL_CHECK(clReleaseProgram(s->prg_rgb_laplacian));
delete s->sm_front;
delete s->sm_rear;
delete s->pm;

@ -8,6 +8,7 @@
#include "common/util.h"
#include "common/timing.h"
#include "common/clutil.h"
#include "common/swaglog.h"
#pragma clang diagnostic push
@ -101,15 +102,14 @@ static void* rear_thread(void *arg) {
cl_command_queue q = s->buf.camera_bufs[buf_idx].copy_q;
cl_mem yuv_cl = s->buf.camera_bufs[buf_idx].buf_cl;
cl_event map_event;
void *yuv_buf = (void *)clEnqueueMapBuffer(q, yuv_cl, CL_TRUE,
void *yuv_buf = (void *)CL_CHECK_ERR(clEnqueueMapBuffer(q, yuv_cl, CL_TRUE,
CL_MAP_WRITE, 0, transformed_size,
0, NULL, &map_event, &err);
assert(err == 0);
0, NULL, &map_event, &err));
clWaitForEvents(1, &map_event);
clReleaseEvent(map_event);
memcpy(yuv_buf, transformed_mat.data, transformed_size);
clEnqueueUnmapMemObject(q, yuv_cl, yuv_buf, 0, NULL, &map_event);
CL_CHECK(clEnqueueUnmapMemObject(q, yuv_cl, yuv_buf, 0, NULL, &map_event));
clWaitForEvents(1, &map_event);
clReleaseEvent(map_event);
tbuffer_dispatch(tb, buf_idx);
@ -175,15 +175,14 @@ void front_thread(CameraState *s) {
cl_command_queue q = s->buf.camera_bufs[buf_idx].copy_q;
cl_mem yuv_cl = s->buf.camera_bufs[buf_idx].buf_cl;
cl_event map_event;
void *yuv_buf = (void *)clEnqueueMapBuffer(q, yuv_cl, CL_TRUE,
void *yuv_buf = (void *)CL_CHECK_ERR(clEnqueueMapBuffer(q, yuv_cl, CL_TRUE,
CL_MAP_WRITE, 0, transformed_size,
0, NULL, &map_event, &err);
assert(err == 0);
0, NULL, &map_event, &err));
clWaitForEvents(1, &map_event);
clReleaseEvent(map_event);
memcpy(yuv_buf, transformed_mat.data, transformed_size);
clEnqueueUnmapMemObject(q, yuv_cl, yuv_buf, 0, NULL, &map_event);
CL_CHECK(clEnqueueUnmapMemObject(q, yuv_cl, yuv_buf, 0, NULL, &map_event));
clWaitForEvents(1, &map_event);
clReleaseEvent(map_event);
tbuffer_dispatch(tb, buf_idx);

@ -330,13 +330,11 @@ int main(int argc, char *argv[]) {
signal(SIGINT, (sighandler_t)set_do_exit);
signal(SIGTERM, (sighandler_t)set_do_exit);
int err;
clu_init();
cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
assert(err == 0);
cl_context context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, NULL, NULL, &err));
party(device_id, context);
clReleaseContext(context);
CL_CHECK(clReleaseContext(context));
}

@ -6,7 +6,6 @@
#include "rgb_to_yuv.h"
void rgb_to_yuv_init(RGBToYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height, int rgb_stride) {
int err = 0;
memset(s, 0, sizeof(*s));
printf("width %d, height %d, rgb_stride %d\n", width, height, rgb_stride);
assert(width % 2 == 0);
@ -23,32 +22,24 @@ void rgb_to_yuv_init(RGBToYUVState* s, cl_context ctx, cl_device_id device_id, i
width, height, width/ 2, height / 2, rgb_stride, width * height);
cl_program prg = CLU_LOAD_FROM_FILE(ctx, device_id, "transforms/rgb_to_yuv.cl", args);
s->rgb_to_yuv_krnl = clCreateKernel(prg, "rgb_to_yuv", &err);
assert(err == 0);
s->rgb_to_yuv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "rgb_to_yuv", &err));
// done with this
err = clReleaseProgram(prg);
assert(err == 0);
CL_CHECK(clReleaseProgram(prg));
}
void rgb_to_yuv_destroy(RGBToYUVState* s) {
int err = 0;
err = clReleaseKernel(s->rgb_to_yuv_krnl);
assert(err == 0);
CL_CHECK(clReleaseKernel(s->rgb_to_yuv_krnl));
}
void rgb_to_yuv_queue(RGBToYUVState* s, cl_command_queue q, cl_mem rgb_cl, cl_mem yuv_cl) {
int err = 0;
err = clSetKernelArg(s->rgb_to_yuv_krnl, 0, sizeof(cl_mem), &rgb_cl);
assert(err == 0);
err = clSetKernelArg(s->rgb_to_yuv_krnl, 1, sizeof(cl_mem), &yuv_cl);
assert(err == 0);
CL_CHECK(clSetKernelArg(s->rgb_to_yuv_krnl, 0, sizeof(cl_mem), &rgb_cl));
CL_CHECK(clSetKernelArg(s->rgb_to_yuv_krnl, 1, sizeof(cl_mem), &yuv_cl));
const size_t work_size[2] = {
(size_t)(s->width + (s->width % 4 == 0 ? 0 : (4 - s->width % 4))) / 4,
(size_t)(s->height + (s->height % 4 == 0 ? 0 : (4 - s->height % 4))) / 4
};
cl_event event;
err = clEnqueueNDRangeKernel(q, s->rgb_to_yuv_krnl, 2, NULL, &work_size[0], NULL, 0, 0, &event);
assert(err == 0);
clWaitForEvents(1, &event);
clReleaseEvent(event);
CL_CHECK(clEnqueueNDRangeKernel(q, s->rgb_to_yuv_krnl, 2, NULL, &work_size[0], NULL, 0, 0, &event));
CL_CHECK(clWaitForEvents(1, &event));
CL_CHECK(clReleaseEvent(event));
}

@ -41,9 +41,8 @@ static inline double millis_since_boot() {
}
void cl_init(cl_device_id &device_id, cl_context &context) {
int err;
device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, NULL, NULL, &err));
}
@ -137,13 +136,13 @@ int main(int argc, char** argv) {
rgb_to_yuv_init(&rgb_to_yuv_state, context, device_id, width, height, width * 3);
int frame_yuv_buf_size = width * height * 3 / 2;
cl_mem yuv_cl = clCreateBuffer(context, CL_MEM_READ_WRITE, frame_yuv_buf_size, (void*)NULL, &err);
cl_mem yuv_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, frame_yuv_buf_size, (void*)NULL, &err));
uint8_t *frame_yuv_buf = new uint8_t[frame_yuv_buf_size];
uint8_t *frame_yuv_ptr_y = frame_yuv_buf;
uint8_t *frame_yuv_ptr_u = frame_yuv_buf + (width * height);
uint8_t *frame_yuv_ptr_v = frame_yuv_ptr_u + ((width/2) * (height/2));
cl_mem rgb_cl = clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * 3, (void*)NULL, &err);
cl_mem rgb_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * 3, (void*)NULL, &err));
int mismatched = 0;
int counter = 0;
srand (time(NULL));

@ -41,26 +41,22 @@ cl_device_id cl_get_device_id(cl_device_type device_type) {
cl_device_id device_id = NULL;
cl_uint num_platforms = 0;
int err = clGetPlatformIDs(0, NULL, &num_platforms);
assert(err == 0);
CL_CHECK(clGetPlatformIDs(0, NULL, &num_platforms));
cl_platform_id* platform_ids = malloc(sizeof(cl_platform_id) * num_platforms);
err = clGetPlatformIDs(num_platforms, platform_ids, NULL);
assert(err == 0);
CL_CHECK(clGetPlatformIDs(num_platforms, platform_ids, NULL));
char cBuffer[1024];
for (size_t i = 0; i < num_platforms; i++) {
err = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(cBuffer), &cBuffer, NULL);
assert(err == 0);
CL_CHECK(clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(cBuffer), &cBuffer, NULL));
printf("platform[%zu] CL_PLATFORM_NAME: %s\n", i, cBuffer);
cl_uint num_devices;
err = clGetDeviceIDs(platform_ids[i], device_type, 0, NULL, &num_devices);
int err = clGetDeviceIDs(platform_ids[i], device_type, 0, NULL, &num_devices);
if (err != 0 || !num_devices) {
continue;
}
// Get first device
err = clGetDeviceIDs(platform_ids[i], device_type, 1, &device_id, NULL);
assert(err == 0);
CL_CHECK(clGetDeviceIDs(platform_ids[i], device_type, 1, &device_id, NULL));
cl_print_info(platform_ids[i], device_id);
opencl_platform_found = true;
break;
@ -77,25 +73,17 @@ cl_device_id cl_get_device_id(cl_device_type device_type) {
cl_program cl_create_program_from_file(cl_context ctx, const char* path) {
char* src_buf = read_file(path, NULL);
assert(src_buf);
int err = 0;
cl_program ret = clCreateProgramWithSource(ctx, 1, (const char**)&src_buf, NULL, &err);
assert(err == 0);
cl_program ret = CL_CHECK_ERR(clCreateProgramWithSource(ctx, 1, (const char**)&src_buf, NULL, &err));
free(src_buf);
return ret;
}
static char* get_version_string(cl_platform_id platform) {
size_t size = 0;
int err;
err = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size);
assert(err == 0);
CL_CHECK(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size));
char *str = malloc(size);
assert(str);
err = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, str, NULL);
assert(err == 0);
CL_CHECK(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, str, NULL));
return str;
}
@ -191,8 +179,6 @@ uint64_t clu_fnv_hash(const uint8_t *data, size_t len) {
}
cl_program cl_cached_program_from_hash(cl_context ctx, cl_device_id device_id, uint64_t hash) {
int err;
char cache_path[1024];
snprintf(cache_path, sizeof(cache_path), "/tmp/clcache/%016" PRIx64 ".clb", hash);
@ -202,29 +188,22 @@ cl_program cl_cached_program_from_hash(cl_context ctx, cl_device_id device_id, u
return NULL;
}
cl_program prg = clCreateProgramWithBinary(ctx, 1, &device_id, &bin_size, (const uint8_t**)&bin, NULL, &err);
assert(err == 0);
cl_program prg = CL_CHECK_ERR(clCreateProgramWithBinary(ctx, 1, &device_id, &bin_size, (const uint8_t**)&bin, NULL, &err));
free(bin);
err = clBuildProgram(prg, 1, &device_id, NULL, NULL, NULL);
assert(err == 0);
CL_CHECK(clBuildProgram(prg, 1, &device_id, NULL, NULL, NULL));
return prg;
}
#ifndef CLU_NO_CACHE
static uint8_t* get_program_binary(cl_program prg, size_t *out_size) {
int err;
cl_uint num_devices;
err = clGetProgramInfo(prg, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL);
assert(err == 0);
CL_CHECK(clGetProgramInfo(prg, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL));
assert(num_devices == 1);
size_t binary_size = 0;
err = clGetProgramInfo(prg, CL_PROGRAM_BINARY_SIZES, sizeof(binary_size), &binary_size, NULL);
assert(err == 0);
CL_CHECK(clGetProgramInfo(prg, CL_PROGRAM_BINARY_SIZES, sizeof(binary_size), &binary_size, NULL));
assert(binary_size > 0);
uint8_t *binary_buf = malloc(binary_size);
@ -232,8 +211,7 @@ static uint8_t* get_program_binary(cl_program prg, size_t *out_size) {
uint8_t* bufs[1] = { binary_buf, };
err = clGetProgramInfo(prg, CL_PROGRAM_BINARIES, sizeof(bufs), &bufs, NULL);
assert(err == 0);
CL_CHECK(clGetProgramInfo(prg, CL_PROGRAM_BINARIES, sizeof(bufs), &bufs, NULL));
*out_size = binary_size;
return binary_buf;
@ -243,11 +221,8 @@ static uint8_t* get_program_binary(cl_program prg, size_t *out_size) {
cl_program cl_cached_program_from_string(cl_context ctx, cl_device_id device_id,
const char* src, const char* args,
uint64_t *out_hash) {
int err;
cl_platform_id platform;
err = clGetDeviceInfo(device_id, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
assert(err == 0);
CL_CHECK(clGetDeviceInfo(device_id, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL));
const char* platform_version = get_version_string(platform);
@ -266,10 +241,9 @@ cl_program cl_cached_program_from_string(cl_context ctx, cl_device_id device_id,
prg = cl_cached_program_from_hash(ctx, device_id, hash);
#endif
if (prg == NULL) {
prg = clCreateProgramWithSource(ctx, 1, (const char**)&src, NULL, &err);
assert(err == 0);
prg = CL_CHECK_ERR(clCreateProgramWithSource(ctx, 1, (const char**)&src, NULL, &err));
err = clBuildProgram(prg, 1, &device_id, args, NULL, NULL);
int err = clBuildProgram(prg, 1, &device_id, args, NULL, NULL);
if (err != 0) {
cl_print_build_errors(prg, device_id);
}
@ -315,8 +289,6 @@ static void add_index(uint64_t index_hash, uint64_t src_hash) {
#endif
cl_program cl_program_from_index(cl_context ctx, cl_device_id device_id, uint64_t index_hash) {
int err;
int i;
for (i=0; i<ARRAYSIZE(clu_index); i++) {
if (clu_index[i].index_hash == index_hash) {
@ -330,11 +302,9 @@ cl_program cl_program_from_index(cl_context ctx, cl_device_id device_id, uint64_
size_t bin_size = clu_index[i].bin_end - clu_index[i].bin_data;
const uint8_t *bin_data = clu_index[i].bin_data;
cl_program prg = clCreateProgramWithBinary(ctx, 1, &device_id, &bin_size, (const uint8_t**)&bin_data, NULL, &err);
assert(err == 0);
cl_program prg = CL_CHECK_ERR(clCreateProgramWithBinary(ctx, 1, &device_id, &bin_size, (const uint8_t**)&bin_data, NULL, &err));
err = clBuildProgram(prg, 1, &device_id, NULL, NULL, NULL);
assert(err == 0);
CL_CHECK(clBuildProgram(prg, 1, &device_id, NULL, NULL, NULL));
return prg;
}

@ -15,6 +15,19 @@
extern "C" {
#endif
#define CL_CHECK(_expr) \
do { \
assert(CL_SUCCESS == _expr); \
} while (0)
#define CL_CHECK_ERR(_expr) \
({ \
cl_int err = CL_INVALID_VALUE; \
__typeof__(_expr) _ret = _expr; \
assert(_ret&& err == CL_SUCCESS); \
_ret; \
})
void clu_init(void);
cl_device_id cl_get_device_id(cl_device_type device_type);

@ -7,6 +7,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include "common/clutil.h"
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__
@ -46,8 +47,6 @@ VisionBuf visionbuf_allocate(size_t len) {
}
VisionBuf visionbuf_allocate_cl(size_t len, cl_device_id device_id, cl_context ctx) {
int err;
#if __OPENCL_VERSION__ >= 200
void* host_ptr =
clSVMAlloc(ctx, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, len, 0);
@ -56,12 +55,10 @@ VisionBuf visionbuf_allocate_cl(size_t len, cl_device_id device_id, cl_context c
int fd;
void* host_ptr = malloc_with_fd(len, &fd);
cl_command_queue q = clCreateCommandQueue(ctx, device_id, 0, &err);
assert(err == 0);
cl_command_queue q = CL_CHECK_ERR(clCreateCommandQueue(ctx, device_id, 0, &err));
#endif
cl_mem mem = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, len, host_ptr, &err);
assert(err == 0);
cl_mem mem = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, len, host_ptr, &err));
return (VisionBuf){
.len = len, .addr = host_ptr, .handle = 0, .fd = fd,
@ -75,16 +72,14 @@ VisionBuf visionbuf_allocate_cl(size_t len, cl_device_id device_id, cl_context c
}
void visionbuf_sync(const VisionBuf* buf, int dir) {
int err = 0;
if (!buf->buf_cl) return;
#if __OPENCL_VERSION__ < 200
if (dir == VISIONBUF_SYNC_FROM_DEVICE) {
err = clEnqueueReadBuffer(buf->copy_q, buf->buf_cl, CL_FALSE, 0, buf->len, buf->addr, 0, NULL, NULL);
CL_CHECK(clEnqueueReadBuffer(buf->copy_q, buf->buf_cl, CL_FALSE, 0, buf->len, buf->addr, 0, NULL, NULL));
} else {
err = clEnqueueWriteBuffer(buf->copy_q, buf->buf_cl, CL_FALSE, 0, buf->len, buf->addr, 0, NULL, NULL);
CL_CHECK(clEnqueueWriteBuffer(buf->copy_q, buf->buf_cl, CL_FALSE, 0, buf->len, buf->addr, 0, NULL, NULL));
}
assert(err == 0);
clFinish(buf->copy_q);
#endif
}
@ -94,12 +89,11 @@ void visionbuf_free(const VisionBuf* buf) {
munmap(buf->addr, buf->len);
close(buf->fd);
} else {
int err = clReleaseMemObject(buf->buf_cl);
assert(err == 0);
CL_CHECK(clReleaseMemObject(buf->buf_cl));
#if __OPENCL_VERSION__ >= 200
clSVMFree(buf->ctx, buf->addr);
#else
clReleaseCommandQueue(buf->copy_q);
CL_CHECK(clReleaseCommandQueue(buf->copy_q));
munmap(buf->addr, buf->len);
close(buf->fd);
#endif

@ -10,7 +10,7 @@
#include <unistd.h>
#include <linux/ion.h>
#include <CL/cl_ext.h>
#include "common/clutil.h"
#include <msm_ion.h>
#include "visionbuf.h"
@ -73,7 +73,6 @@ VisionBuf visionbuf_allocate(size_t len) {
VisionBuf visionbuf_allocate_cl(size_t len, cl_device_id device_id, cl_context ctx) {
VisionBuf buf = visionbuf_allocate(len);
int err = 0;
assert(((uintptr_t)buf.addr % DEVICE_PAGE_SIZE_CL) == 0);
@ -83,11 +82,9 @@ VisionBuf visionbuf_allocate_cl(size_t len, cl_device_id device_id, cl_context c
ion_cl.ion_filedesc = buf.fd;
ion_cl.ion_hostptr = buf.addr;
buf.buf_cl = clCreateBuffer(ctx,
buf.buf_cl = CL_CHECK_ERR(clCreateBuffer(ctx,
CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
buf.len, &ion_cl, &err);
assert(err == 0);
buf.len, &ion_cl, &err));
return buf;
}
@ -134,7 +131,7 @@ void visionbuf_sync(const VisionBuf* buf, int dir) {
}
void visionbuf_free(const VisionBuf* buf) {
clReleaseMemObject(buf->buf_cl);
CL_CHECK(clReleaseMemObject(buf->buf_cl));
munmap(buf->addr, buf->mmap_len);
close(buf->fd);
struct ion_handle_data handle_data = {

@ -122,11 +122,8 @@ int main(int argc, char **argv) {
// cl init
cl_device_id device_id = cl_get_device_id(device_type);
cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
assert(err == 0);
cl_command_queue q = clCreateCommandQueue(context, device_id, 0, &err);
assert(err == 0);
cl_context context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, NULL, NULL, &err));
cl_command_queue q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
// init the models
ModelState model;
@ -223,8 +220,8 @@ int main(int argc, char **argv) {
LOG("joining live_thread");
err = pthread_join(live_thread_handle, NULL);
assert(err == 0);
clReleaseCommandQueue(q);
clReleaseContext(context);
CL_CHECK(clReleaseContext(context));
CL_CHECK(clReleaseCommandQueue(q));
pthread_mutex_destroy(&transform_lock);
return 0;

@ -1,40 +1,31 @@
#include <assert.h>
#include <math.h>
#include "commonmodel.h"
#include "common/clutil.h"
#include "common/mat.h"
#include "common/timing.h"
void frame_init(ModelFrame* frame, int width, int height,
cl_device_id device_id, cl_context context) {
int err;
transform_init(&frame->transform, context, device_id);
frame->transformed_width = width;
frame->transformed_height = height;
frame->transformed_y_cl = clCreateBuffer(context, CL_MEM_READ_WRITE,
(size_t)frame->transformed_width*frame->transformed_height, NULL, &err);
assert(err == 0);
frame->transformed_u_cl = clCreateBuffer(context, CL_MEM_READ_WRITE,
(size_t)(frame->transformed_width/2)*(frame->transformed_height/2), NULL, &err);
assert(err == 0);
frame->transformed_v_cl = clCreateBuffer(context, CL_MEM_READ_WRITE,
(size_t)(frame->transformed_width/2)*(frame->transformed_height/2), NULL, &err);
assert(err == 0);
frame->transformed_y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE,
(size_t)frame->transformed_width*frame->transformed_height, NULL, &err));
frame->transformed_u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE,
(size_t)(frame->transformed_width/2)*(frame->transformed_height/2), NULL, &err));
frame->transformed_v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE,
(size_t)(frame->transformed_width/2)*(frame->transformed_height/2), NULL, &err));
frame->net_input_size = ((width*height*3)/2)*sizeof(float);
frame->net_input = clCreateBuffer(context, CL_MEM_READ_WRITE,
frame->net_input_size, (void*)NULL, &err);
assert(err == 0);
frame->net_input = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE,
frame->net_input_size, (void*)NULL, &err));
loadyuv_init(&frame->loadyuv, context, device_id, frame->transformed_width, frame->transformed_height);
}
float *frame_prepare(ModelFrame* frame, cl_command_queue q,
cl_mem yuv_cl, int width, int height,
mat3 transform) {
int err;
transform_queue(&frame->transform, q,
yuv_cl, width, height,
frame->transformed_y_cl, frame->transformed_u_cl, frame->transformed_v_cl,
@ -43,9 +34,9 @@ float *frame_prepare(ModelFrame* frame, cl_command_queue q,
loadyuv_queue(&frame->loadyuv, q,
frame->transformed_y_cl, frame->transformed_u_cl, frame->transformed_v_cl,
frame->net_input);
float *net_input_buf = (float *)clEnqueueMapBuffer(q, frame->net_input, CL_TRUE,
float *net_input_buf = (float *)CL_CHECK_ERR(clEnqueueMapBuffer(q, frame->net_input, CL_TRUE,
CL_MAP_READ, 0, frame->net_input_size,
0, NULL, NULL, &err);
0, NULL, NULL, &err));
clFinish(q);
return net_input_buf;
}
@ -53,10 +44,10 @@ float *frame_prepare(ModelFrame* frame, cl_command_queue q,
void frame_free(ModelFrame* frame) {
transform_destroy(&frame->transform);
loadyuv_destroy(&frame->loadyuv);
clReleaseMemObject(frame->net_input);
clReleaseMemObject(frame->transformed_v_cl);
clReleaseMemObject(frame->transformed_u_cl);
clReleaseMemObject(frame->transformed_y_cl);
CL_CHECK(clReleaseMemObject(frame->net_input));
CL_CHECK(clReleaseMemObject(frame->transformed_v_cl));
CL_CHECK(clReleaseMemObject(frame->transformed_u_cl));
CL_CHECK(clReleaseMemObject(frame->transformed_y_cl));
}
void softmax(const float* input, float* output, size_t len) {

@ -6,7 +6,6 @@
#include "loadyuv.h"
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) {
int err = 0;
memset(s, 0, sizeof(*s));
s->width = width;
@ -19,64 +18,44 @@ void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int w
width, height);
cl_program prg = CLU_LOAD_FROM_FILE(ctx, device_id, "transforms/loadyuv.cl", args);
s->loadys_krnl = clCreateKernel(prg, "loadys", &err);
assert(err == 0);
s->loaduv_krnl = clCreateKernel(prg, "loaduv", &err);
assert(err == 0);
s->loadys_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loadys", &err));
s->loaduv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loaduv", &err));
// done with this
err = clReleaseProgram(prg);
assert(err == 0);
CL_CHECK(clReleaseProgram(prg));
}
void loadyuv_destroy(LoadYUVState* s) {
int err = 0;
err = clReleaseKernel(s->loadys_krnl);
assert(err == 0);
err = clReleaseKernel(s->loaduv_krnl);
assert(err == 0);
CL_CHECK(clReleaseKernel(s->loadys_krnl));
CL_CHECK(clReleaseKernel(s->loaduv_krnl));
}
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
cl_mem out_cl) {
int err = 0;
err = clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl);
assert(err == 0);
err = clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl);
assert(err == 0);
CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl));
CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl));
const size_t loadys_work_size = (s->width*s->height)/8;
err = clEnqueueNDRangeKernel(q, s->loadys_krnl, 1, NULL,
&loadys_work_size, NULL, 0, 0, NULL);
assert(err == 0);
CL_CHECK(clEnqueueNDRangeKernel(q, s->loadys_krnl, 1, NULL,
&loadys_work_size, NULL, 0, 0, NULL));
const size_t loaduv_work_size = ((s->width/2)*(s->height/2))/8;
cl_int loaduv_out_off = (s->width*s->height);
err = clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &u_cl);
assert(err == 0);
err = clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl);
assert(err == 0);
err = clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &loaduv_out_off);
assert(err == 0);
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &u_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &loaduv_out_off));
err = clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
&loaduv_work_size, NULL, 0, 0, NULL);
assert(err == 0);
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
&loaduv_work_size, NULL, 0, 0, NULL));
loaduv_out_off += (s->width/2)*(s->height/2);
err = clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &v_cl);
assert(err == 0);
err = clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl);
assert(err == 0);
err = clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &loaduv_out_off);
assert(err == 0);
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &v_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &loaduv_out_off));
err = clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
&loaduv_work_size, NULL, 0, 0, NULL);
assert(err == 0);
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
&loaduv_work_size, NULL, 0, 0, NULL));
}

@ -6,35 +6,21 @@
#include "transform.h"
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id) {
int err = 0;
memset(s, 0, sizeof(*s));
cl_program prg = CLU_LOAD_FROM_FILE(ctx, device_id, "transforms/transform.cl", "");
s->krnl = clCreateKernel(prg, "warpPerspective", &err);
assert(err == 0);
s->krnl = CL_CHECK_ERR(clCreateKernel(prg, "warpPerspective", &err));
// done with this
err = clReleaseProgram(prg);
assert(err == 0);
CL_CHECK(clReleaseProgram(prg));
s->m_y_cl = clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err);
assert(err == 0);
s->m_uv_cl = clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err);
assert(err == 0);
s->m_y_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
s->m_uv_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
}
void transform_destroy(Transform* s) {
int err = 0;
err = clReleaseMemObject(s->m_y_cl);
assert(err == 0);
err = clReleaseMemObject(s->m_uv_cl);
assert(err == 0);
err = clReleaseKernel(s->krnl);
assert(err == 0);
CL_CHECK(clReleaseMemObject(s->m_y_cl));
CL_CHECK(clReleaseMemObject(s->m_uv_cl));
CL_CHECK(clReleaseKernel(s->krnl));
}
void transform_queue(Transform* s,
@ -43,7 +29,6 @@ void transform_queue(Transform* s,
cl_mem out_y, cl_mem out_u, cl_mem out_v,
int out_width, int out_height,
mat3 projection) {
int err = 0;
const int zero = 0;
// sampled using pixel center origin
@ -54,10 +39,8 @@ void transform_queue(Transform* s,
// in and out uv is half the size of y.
mat3 projection_uv = transform_scale_buffer(projection, 0.5);
err = clEnqueueWriteBuffer(q, s->m_y_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_y.v, 0, NULL, NULL);
assert(err == 0);
err = clEnqueueWriteBuffer(q, s->m_uv_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_uv.v, 0, NULL, NULL);
assert(err == 0);
CL_CHECK(clEnqueueWriteBuffer(q, s->m_y_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_y.v, 0, NULL, NULL));
CL_CHECK(clEnqueueWriteBuffer(q, s->m_uv_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_uv.v, 0, NULL, NULL));
const int in_y_width = in_width;
const int in_y_height = in_height;
@ -72,78 +55,41 @@ void transform_queue(Transform* s,
const int out_uv_width = out_width/2;
const int out_uv_height = out_height/2;
err = clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv);
assert(err == 0);
err = clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_y_width);
assert(err == 0);
err = clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_offset);
assert(err == 0);
err = clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_y_height);
assert(err == 0);
err = clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_width);
assert(err == 0);
err = clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_y);
assert(err == 0);
err = clSetKernelArg(s->krnl, 6, sizeof(cl_int), &out_y_width);
assert(err == 0);
err = clSetKernelArg(s->krnl, 7, sizeof(cl_int), &zero);
assert(err == 0);
err = clSetKernelArg(s->krnl, 8, sizeof(cl_int), &out_y_height);
assert(err == 0);
err = clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_width);
assert(err == 0);
err = clSetKernelArg(s->krnl, 10, sizeof(cl_mem), &s->m_y_cl);
assert(err == 0);
CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv));
CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_y_width));
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_offset));
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_y_height));
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_width));
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_y));
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_int), &out_y_width));
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &zero));
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &out_y_height));
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_width));
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_mem), &s->m_y_cl));
const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height};
err = clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_y, NULL, 0, 0, NULL);
assert(err == 0);
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_y, NULL, 0, 0, NULL));
const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height};
err = clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_uv_width);
assert(err == 0);
err = clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_u_offset);
assert(err == 0);
err = clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_uv_height);
assert(err == 0);
err = clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_width);
assert(err == 0);
err = clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_u);
assert(err == 0);
err = clSetKernelArg(s->krnl, 6, sizeof(cl_int), &out_uv_width);
assert(err == 0);
err = clSetKernelArg(s->krnl, 7, sizeof(cl_int), &zero);
assert(err == 0);
err = clSetKernelArg(s->krnl, 8, sizeof(cl_int), &out_uv_height);
assert(err == 0);
err = clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_width);
assert(err == 0);
err = clSetKernelArg(s->krnl, 10, sizeof(cl_mem), &s->m_uv_cl);
assert(err == 0);
err = clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL);
assert(err == 0);
err = clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_v_offset);
assert(err == 0);
err = clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_v);
assert(err == 0);
err = clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL);
assert(err == 0);
CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_uv_width));
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_u_offset));
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_uv_height));
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_width));
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_u));
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_int), &out_uv_width));
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &zero));
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &out_uv_height));
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_width));
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_mem), &s->m_uv_cl));
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_v_offset));
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_v));
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));
}

@ -33,19 +33,14 @@ typedef struct {
void initialize_opencl(VisionTest* visiontest) {
// init cl
int err;
cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_CPU);
visiontest->context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
assert(err == 0);
visiontest->context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, NULL, NULL, &err));
visiontest->device_id = device_id;
}
VisionTest* visiontest_create(int temporal_model, int disable_model,
int input_width, int input_height,
int model_input_width, int model_input_height) {
int err = 0;
VisionTest* const vt = calloc(1, sizeof(*vt));
assert(vt);
@ -62,45 +57,27 @@ VisionTest* visiontest_create(int temporal_model, int disable_model,
assert((vt->in_width%2) == 0 && (vt->in_height%2) == 0);
vt->in_yuv_size = vt->in_width*vt->in_height*3/2;
vt->in_yuv_cl = clCreateBuffer(vt->context, CL_MEM_READ_WRITE,
vt->in_yuv_size, NULL, &err);
assert(err == 0);
vt->out_y_cl = clCreateBuffer(vt->context, CL_MEM_READ_WRITE,
vt->out_width*vt->out_width, NULL, &err);
assert(err == 0);
vt->out_u_cl = clCreateBuffer(vt->context, CL_MEM_READ_WRITE,
vt->out_width*vt->out_width/4, NULL, &err);
assert(err == 0);
vt->out_v_cl = clCreateBuffer(vt->context, CL_MEM_READ_WRITE,
vt->out_width*vt->out_width/4, NULL, &err);
assert(err == 0);
vt->command_queue = clCreateCommandQueue(vt->context, vt->device_id, 0, &err);
assert(err == 0);
vt->in_yuv_cl = CL_CHECK_ERR(clCreateBuffer(vt->context, CL_MEM_READ_WRITE,
vt->in_yuv_size, NULL, &err));
vt->out_y_cl = CL_CHECK_ERR(clCreateBuffer(vt->context, CL_MEM_READ_WRITE,
vt->out_width*vt->out_width, NULL, &err));
vt->out_u_cl = CL_CHECK_ERR(clCreateBuffer(vt->context, CL_MEM_READ_WRITE,
vt->out_width*vt->out_width/4, NULL, &err));
vt->out_v_cl = CL_CHECK_ERR(clCreateBuffer(vt->context, CL_MEM_READ_WRITE,
vt->out_width*vt->out_width/4, NULL, &err));
vt->command_queue = CL_CHECK_ERR(clCreateCommandQueue(vt->context, vt->device_id, 0, &err));
return vt;
}
void visiontest_destroy(VisionTest* vt) {
transform_destroy(&vt->transform);
int err = 0;
err = clReleaseMemObject(vt->in_yuv_cl);
assert(err == 0);
err = clReleaseMemObject(vt->out_y_cl);
assert(err == 0);
err = clReleaseMemObject(vt->out_u_cl);
assert(err == 0);
err = clReleaseMemObject(vt->out_v_cl);
assert(err == 0);
err = clReleaseCommandQueue(vt->command_queue);
assert(err == 0);
err = clReleaseContext(vt->context);
assert(err == 0);
CL_CHECK(clReleaseMemObject(vt->in_yuv_cl));
CL_CHECK(clReleaseMemObject(vt->out_y_cl));
CL_CHECK(clReleaseMemObject(vt->out_u_cl));
CL_CHECK(clReleaseMemObject(vt->out_v_cl));
CL_CHECK(clReleaseCommandQueue(vt->command_queue));
CL_CHECK(clReleaseContext(vt->context));
free(vt);
}
@ -108,11 +85,8 @@ void visiontest_destroy(VisionTest* vt) {
void visiontest_transform(VisionTest* vt, const uint8_t* yuv_data,
uint8_t* out_y, uint8_t* out_u, uint8_t* out_v,
const float* transform) {
int err = 0;
err = clEnqueueWriteBuffer(vt->command_queue, vt->in_yuv_cl, CL_FALSE,
0, vt->in_yuv_size, yuv_data, 0, NULL, NULL);
assert(err == 0);
CL_CHECK(clEnqueueWriteBuffer(vt->command_queue, vt->in_yuv_cl, CL_FALSE,
0, vt->in_yuv_size, yuv_data, 0, NULL, NULL));
mat3 transform_m = *(const mat3*)transform;
@ -122,18 +96,15 @@ void visiontest_transform(VisionTest* vt, const uint8_t* yuv_data,
vt->out_width, vt->out_height,
transform_m);
err = clEnqueueReadBuffer(vt->command_queue, vt->out_y_cl, CL_FALSE,
CL_CHECK(clEnqueueReadBuffer(vt->command_queue, vt->out_y_cl, CL_FALSE,
0, vt->out_width*vt->out_height, out_y,
0, NULL, NULL);
assert(err == 0);
err = clEnqueueReadBuffer(vt->command_queue, vt->out_u_cl, CL_FALSE,
0, NULL, NULL));
CL_CHECK(clEnqueueReadBuffer(vt->command_queue, vt->out_u_cl, CL_FALSE,
0, vt->out_width*vt->out_height/4, out_u,
0, NULL, NULL);
assert(err == 0);
err = clEnqueueReadBuffer(vt->command_queue, vt->out_v_cl, CL_FALSE,
0, NULL, NULL));
CL_CHECK(clEnqueueReadBuffer(vt->command_queue, vt->out_v_cl, CL_FALSE,
0, vt->out_width*vt->out_height/4, out_v,
0, NULL, NULL);
assert(err == 0);
0, NULL, NULL));
clFinish(vt->command_queue);
}

Loading…
Cancel
Save