|
|
|
@ -14,11 +14,62 @@ static inline uint64_t nanos_since_boot() { |
|
|
|
|
struct kernel { |
|
|
|
|
cl_kernel k; |
|
|
|
|
const char *name; |
|
|
|
|
cl_program p; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int k_index = 0; |
|
|
|
|
struct kernel kk[0x1000] = {0}; |
|
|
|
|
|
|
|
|
|
FILE *f = NULL; |
|
|
|
|
|
|
|
|
|
cl_program clCreateProgramWithSource(cl_context context, |
|
|
|
|
cl_uint count, |
|
|
|
|
const char **strings, |
|
|
|
|
const size_t *lengths, |
|
|
|
|
cl_int *errcode_ret) { |
|
|
|
|
printf("clCreateProgramWithSource: %d\n", count); |
|
|
|
|
|
|
|
|
|
if (f == NULL) { |
|
|
|
|
f = fopen("/tmp/kernels.cl", "w"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fprintf(f, "/* ************************ PROGRAM BREAK ****************************/\n"); |
|
|
|
|
for (int i = 0; i < count; i++) { |
|
|
|
|
fprintf(f, "%s\n", strings[i]); |
|
|
|
|
if (i != 0) fprintf(f, "/* ************************ SECTION BREAK ****************************/\n"); |
|
|
|
|
} |
|
|
|
|
fflush(f); |
|
|
|
|
|
|
|
|
|
cl_program (*my_clCreateProgramWithSource)(cl_context context, |
|
|
|
|
cl_uint count, |
|
|
|
|
const char **strings, |
|
|
|
|
const size_t *lengths, |
|
|
|
|
cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithSource"); |
|
|
|
|
|
|
|
|
|
return my_clCreateProgramWithSource(context, count, strings, lengths, errcode_ret); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
cl_program clCreateProgramWithBinary(cl_context context, |
|
|
|
|
cl_uint num_devices, |
|
|
|
|
const cl_device_id *device_list, |
|
|
|
|
const size_t *lengths, |
|
|
|
|
const unsigned char **binaries, |
|
|
|
|
cl_int *binary_status, |
|
|
|
|
cl_int *errcode_ret) { |
|
|
|
|
printf("clCreateProgramWithBinary\n"); |
|
|
|
|
|
|
|
|
|
cl_program (*my_clCreateProgramWithBinary)(cl_context context, |
|
|
|
|
cl_uint num_devices, |
|
|
|
|
const cl_device_id *device_list, |
|
|
|
|
const size_t *lengths, |
|
|
|
|
const unsigned char **binaries, |
|
|
|
|
cl_int *binary_status, |
|
|
|
|
cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithBinary"); |
|
|
|
|
|
|
|
|
|
return my_clCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) { |
|
|
|
|
cl_kernel (*my_clCreateKernel)(cl_program program, const char *kernel_name, cl_int *errcode_ret); |
|
|
|
|
my_clCreateKernel = dlsym(RTLD_NEXT, "REAL_clCreateKernel"); |
|
|
|
@ -30,6 +81,7 @@ cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *er |
|
|
|
|
|
|
|
|
|
kk[k_index].k = ret; |
|
|
|
|
kk[k_index].name = tmp; |
|
|
|
|
kk[k_index].p = program; |
|
|
|
|
k_index++; |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
@ -39,14 +91,14 @@ uint64_t start_time = 0; |
|
|
|
|
int cnt = 0; |
|
|
|
|
|
|
|
|
|
cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, |
|
|
|
|
cl_kernel kernel, |
|
|
|
|
cl_uint work_dim, |
|
|
|
|
const size_t *global_work_offset, |
|
|
|
|
const size_t *global_work_size, |
|
|
|
|
const size_t *local_work_size, |
|
|
|
|
cl_uint num_events_in_wait_list, |
|
|
|
|
const cl_event *event_wait_list, |
|
|
|
|
cl_event *event) { |
|
|
|
|
cl_kernel kernel, |
|
|
|
|
cl_uint work_dim, |
|
|
|
|
const size_t *global_work_offset, |
|
|
|
|
const size_t *global_work_size, |
|
|
|
|
const size_t *local_work_size, |
|
|
|
|
cl_uint num_events_in_wait_list, |
|
|
|
|
const cl_event *event_wait_list, |
|
|
|
|
cl_event *event) { |
|
|
|
|
|
|
|
|
|
cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, |
|
|
|
|
const size_t *, const size_t *, const size_t *, |
|
|
|
@ -59,9 +111,11 @@ cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, |
|
|
|
|
|
|
|
|
|
// get kernel name
|
|
|
|
|
const char *name = NULL; |
|
|
|
|
cl_program p; |
|
|
|
|
for (int i = 0; i < k_index; i++) { |
|
|
|
|
if (kk[i].k == kernel) { |
|
|
|
|
name = kk[i].name; |
|
|
|
|
p = kk[i].p; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -77,7 +131,7 @@ cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, |
|
|
|
|
for (int i = 0; i < work_dim; i++) { |
|
|
|
|
printf("%4zu ", global_work_size[i]); |
|
|
|
|
} |
|
|
|
|
printf("%s\n", name); |
|
|
|
|
printf("%p %s\n", p, name); |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -89,6 +143,10 @@ void *dlsym(void *handle, const char *symbol) { |
|
|
|
|
return clEnqueueNDRangeKernel; |
|
|
|
|
} else if (strcmp("clCreateKernel", symbol) == 0) { |
|
|
|
|
return clCreateKernel; |
|
|
|
|
} else if (strcmp("clCreateProgramWithSource", symbol) == 0) { |
|
|
|
|
return clCreateProgramWithSource; |
|
|
|
|
} else if (strcmp("clCreateProgramWithBinary", symbol) == 0) { |
|
|
|
|
return clCreateProgramWithBinary; |
|
|
|
|
} else { |
|
|
|
|
printf("dlsym %s\n", symbol); |
|
|
|
|
return my_dlsym(handle, symbol); |
|
|
|
|