openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
3.2 KiB

#include <cstdlib>
#include <cstdio>
#include <cassert>
#include <cstring>
#include <unistd.h>
// #include <opencv2/opencv.hpp>
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include "common/util.h"
#include "common/timing.h"
#include "common/mat.h"
#include "clutil.h"
int main() {
int rgb_width = 1164;
int rgb_height = 874;
int rgb_stride = rgb_width*3;
size_t out_size = rgb_width*rgb_height*3/2;
uint8_t* rgb_buf = (uint8_t*)calloc(1, rgb_width*rgb_height*3);
uint8_t* out = (uint8_t*)calloc(1, out_size);
for (int y=0; y<rgb_height; y++) {
for (int k=0; k<rgb_stride; k++) {
rgb_buf[y*rgb_stride + k] = k ^ y;
}
}
// init cl
int err;
cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
assert(err == 0);
cl_program prg = cl_create_program_from_file(context, "yuv.cl");
err = clBuildProgram(prg, 1, &device_id, "", NULL, NULL);
if (err != 0) {
cl_print_build_errors(prg, device_id);
}
cl_check_error(err);
cl_kernel krnl = clCreateKernel(prg, "RGB2YUV_YV12_IYUV", &err);
assert(err == 0);
cl_mem inbuf_cl = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
rgb_width*rgb_height*3, (void*)rgb_buf, &err);
cl_check_error(err);
cl_mem out_cl = clCreateBuffer(context, CL_MEM_READ_WRITE, out_size, NULL, &err);
cl_check_error(err);
// load into net
err = clSetKernelArg(krnl, 0, sizeof(cl_mem), &inbuf_cl); //srcptr
assert(err == 0);
int zero = 0;
err = clSetKernelArg(krnl, 1, sizeof(cl_int), &rgb_stride); //src_step
assert(err == 0);
err = clSetKernelArg(krnl, 2, sizeof(cl_int), &zero); //src_offset
assert(err == 0);
err = clSetKernelArg(krnl, 3, sizeof(cl_mem), &out_cl); //dstptr
assert(err == 0);
err = clSetKernelArg(krnl, 4, sizeof(cl_int), &rgb_width); //dst_step
assert(err == 0);
err = clSetKernelArg(krnl, 5, sizeof(cl_int), &zero); //dst_offset
assert(err == 0);
const int rows = rgb_height * 3 / 2;
err = clSetKernelArg(krnl, 6, sizeof(cl_int), &rows); //rows
assert(err == 0);
err = clSetKernelArg(krnl, 7, sizeof(cl_int), &rgb_width); //cols
assert(err == 0);
cl_command_queue q = clCreateCommandQueue(context, device_id, 0, &err);
assert(err == 0);
const size_t work_size[2] = {rgb_width/2, rows/3};
err = clEnqueueNDRangeKernel(q, krnl, 2, NULL,
(const size_t*)&work_size, NULL, 0, 0, NULL);
cl_check_error(err);
clFinish(q);
double t1 = millis_since_boot();
for (int k=0; k<32; k++) {
err = clEnqueueNDRangeKernel(q, krnl, 2, NULL,
(const size_t*)&work_size, NULL, 0, 0, NULL);
cl_check_error(err);
}
clFinish(q);
double t2 = millis_since_boot();
printf("t: %.2f\n", (t2-t1)/32.);
uint8_t* out_ptr = (uint8_t*)clEnqueueMapBuffer(q, out_cl, CL_FALSE,
CL_MAP_READ, 0, out_size,
0, NULL, NULL, &err);
assert(err == 0);
clFinish(q);
FILE* of = fopen("out_cl.bin", "wb");
fwrite(out_ptr, out_size, 1, of);
fclose(of);
// #endif
return 0;
}