#include #include #include #include #include #include static inline uint64_t nanos_since_boot() { struct timespec t; clock_gettime(CLOCK_BOOTTIME, &t); return t.tv_sec * 1000000000ULL + t.tv_nsec; } struct kernel { cl_kernel k; const char *name; cl_program p; }; int k_index = 0; struct kernel kk[0x1000] = {0}; FILE *f = NULL; cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { printf("clCreateProgramWithSource: %d\n", count); if (f == NULL) { f = fopen("/tmp/kernels.cl", "w"); } fprintf(f, "/* ************************ PROGRAM BREAK ****************************/\n"); for (int i = 0; i < count; i++) { fprintf(f, "%s\n", strings[i]); if (i != 0) fprintf(f, "/* ************************ SECTION BREAK ****************************/\n"); } fflush(f); cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithSource"); return my_clCreateProgramWithSource(context, count, strings, lengths, errcode_ret); } cl_program clCreateProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) { printf("clCreateProgramWithBinary\n"); cl_program (*my_clCreateProgramWithBinary)(cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithBinary"); return my_clCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret); } cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) { cl_kernel (*my_clCreateKernel)(cl_program program, const char *kernel_name, cl_int *errcode_ret); my_clCreateKernel = dlsym(RTLD_NEXT, "REAL_clCreateKernel"); cl_kernel ret = my_clCreateKernel(program, kernel_name, errcode_ret); //printf("clCreateKernel: %s -> %p\n", kernel_name, ret); char *tmp = (char*)malloc(strlen(kernel_name)+1); strcpy(tmp, kernel_name); kk[k_index].k = ret; kk[k_index].name = tmp; kk[k_index].p = program; k_index++; return ret; } uint64_t start_time = 0; int cnt = 0; cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) = NULL; my_clEnqueueNDRangeKernel = dlsym(RTLD_NEXT, "REAL_clEnqueueNDRangeKernel"); if (start_time == 0) { start_time = nanos_since_boot(); } // get kernel name const char *name = NULL; cl_program p; for (int i = 0; i < k_index; i++) { if (kk[i].k == kernel) { name = kk[i].name; p = kk[i].p; break; } } uint64_t tb = nanos_since_boot(); cl_int ret = my_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, global_work_offset, global_work_size, local_work_size, num_events_in_wait_list, event_wait_list, event); uint64_t te = nanos_since_boot(); printf("%10lu run%8d in %5ld us command_queue:%p work_dim:%d event:%p ", (tb-start_time)/1000, cnt++, (te-tb)/1000, command_queue, work_dim, event); for (int i = 0; i < work_dim; i++) { printf("%4zu ", global_work_size[i]); } printf("%p %s\n", p, name); return ret; } void *dlsym(void *handle, const char *symbol) { void *(*my_dlsym)(void *handle, const char *symbol) = (void*)dlopen-0x2d4; if (memcmp("REAL_", symbol, 5) == 0) { return my_dlsym(handle, symbol+5); } else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) { return clEnqueueNDRangeKernel; } else if (strcmp("clCreateKernel", symbol) == 0) { return clCreateKernel; } else if (strcmp("clCreateProgramWithSource", symbol) == 0) { return clCreateProgramWithSource; } else if (strcmp("clCreateProgramWithBinary", symbol) == 0) { return clCreateProgramWithBinary; } else { printf("dlsym %s\n", symbol); return my_dlsym(handle, symbol); } }