You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
126 lines
3.2 KiB
126 lines
3.2 KiB
#include <cstdlib>
|
|
#include <cstdio>
|
|
#include <cassert>
|
|
|
|
#include <cstring>
|
|
#include <unistd.h>
|
|
|
|
// #include <opencv2/opencv.hpp>
|
|
|
|
#ifdef __APPLE__
|
|
#include <OpenCL/cl.h>
|
|
#else
|
|
#include <CL/cl.h>
|
|
#endif
|
|
|
|
#include "common/util.h"
|
|
#include "common/timing.h"
|
|
#include "common/mat.h"
|
|
#include "clutil.h"
|
|
|
|
int main() {
|
|
|
|
int rgb_width = 1164;
|
|
int rgb_height = 874;
|
|
|
|
int rgb_stride = rgb_width*3;
|
|
|
|
size_t out_size = rgb_width*rgb_height*3/2;
|
|
|
|
uint8_t* rgb_buf = (uint8_t*)calloc(1, rgb_width*rgb_height*3);
|
|
uint8_t* out = (uint8_t*)calloc(1, out_size);
|
|
|
|
for (int y=0; y<rgb_height; y++) {
|
|
for (int k=0; k<rgb_stride; k++) {
|
|
rgb_buf[y*rgb_stride + k] = k ^ y;
|
|
}
|
|
}
|
|
|
|
|
|
// init cl
|
|
int err;
|
|
cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
|
|
cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
|
|
assert(err == 0);
|
|
|
|
cl_program prg = cl_create_program_from_file(context, "yuv.cl");
|
|
|
|
err = clBuildProgram(prg, 1, &device_id, "", NULL, NULL);
|
|
if (err != 0) {
|
|
cl_print_build_errors(prg, device_id);
|
|
}
|
|
cl_check_error(err);
|
|
|
|
|
|
cl_kernel krnl = clCreateKernel(prg, "RGB2YUV_YV12_IYUV", &err);
|
|
assert(err == 0);
|
|
|
|
|
|
cl_mem inbuf_cl = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
|
rgb_width*rgb_height*3, (void*)rgb_buf, &err);
|
|
cl_check_error(err);
|
|
cl_mem out_cl = clCreateBuffer(context, CL_MEM_READ_WRITE, out_size, NULL, &err);
|
|
cl_check_error(err);
|
|
|
|
|
|
// load into net
|
|
err = clSetKernelArg(krnl, 0, sizeof(cl_mem), &inbuf_cl); //srcptr
|
|
assert(err == 0);
|
|
|
|
int zero = 0;
|
|
err = clSetKernelArg(krnl, 1, sizeof(cl_int), &rgb_stride); //src_step
|
|
assert(err == 0);
|
|
err = clSetKernelArg(krnl, 2, sizeof(cl_int), &zero); //src_offset
|
|
assert(err == 0);
|
|
|
|
err = clSetKernelArg(krnl, 3, sizeof(cl_mem), &out_cl); //dstptr
|
|
assert(err == 0);
|
|
err = clSetKernelArg(krnl, 4, sizeof(cl_int), &rgb_width); //dst_step
|
|
assert(err == 0);
|
|
err = clSetKernelArg(krnl, 5, sizeof(cl_int), &zero); //dst_offset
|
|
assert(err == 0);
|
|
|
|
const int rows = rgb_height * 3 / 2;
|
|
err = clSetKernelArg(krnl, 6, sizeof(cl_int), &rows); //rows
|
|
assert(err == 0);
|
|
err = clSetKernelArg(krnl, 7, sizeof(cl_int), &rgb_width); //cols
|
|
assert(err == 0);
|
|
|
|
cl_command_queue q = clCreateCommandQueue(context, device_id, 0, &err);
|
|
assert(err == 0);
|
|
const size_t work_size[2] = {rgb_width/2, rows/3};
|
|
|
|
err = clEnqueueNDRangeKernel(q, krnl, 2, NULL,
|
|
(const size_t*)&work_size, NULL, 0, 0, NULL);
|
|
cl_check_error(err);
|
|
clFinish(q);
|
|
|
|
|
|
|
|
double t1 = millis_since_boot();
|
|
for (int k=0; k<32; k++) {
|
|
err = clEnqueueNDRangeKernel(q, krnl, 2, NULL,
|
|
(const size_t*)&work_size, NULL, 0, 0, NULL);
|
|
cl_check_error(err);
|
|
}
|
|
clFinish(q);
|
|
double t2 = millis_since_boot();
|
|
printf("t: %.2f\n", (t2-t1)/32.);
|
|
|
|
uint8_t* out_ptr = (uint8_t*)clEnqueueMapBuffer(q, out_cl, CL_FALSE,
|
|
CL_MAP_READ, 0, out_size,
|
|
0, NULL, NULL, &err);
|
|
assert(err == 0);
|
|
clFinish(q);
|
|
|
|
|
|
FILE* of = fopen("out_cl.bin", "wb");
|
|
fwrite(out_ptr, out_size, 1, of);
|
|
fclose(of);
|
|
|
|
|
|
// #endif
|
|
|
|
|
|
return 0;
|
|
} |