From b8571710e09e58b9e67170a9924eef79f32c480b Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Sun, 24 May 2020 03:33:36 -0700 Subject: [PATCH] remove the clCreateProgramWithSource interceptor (#1559) * remove the clCreateProgramWithSource interceptor * that's old code, thneed is better * label them thneed_, we shouldn't need to touch CL for anything not SNPE related --- selfdrive/modeld/test/opencl_hooks/build.sh | 3 - selfdrive/modeld/test/opencl_hooks/hook.c | 155 -------------------- selfdrive/modeld/thneed/thneed.cc | 22 +-- 3 files changed, 12 insertions(+), 168 deletions(-) delete mode 100755 selfdrive/modeld/test/opencl_hooks/build.sh delete mode 100644 selfdrive/modeld/test/opencl_hooks/hook.c diff --git a/selfdrive/modeld/test/opencl_hooks/build.sh b/selfdrive/modeld/test/opencl_hooks/build.sh deleted file mode 100755 index 03f8115354..0000000000 --- a/selfdrive/modeld/test/opencl_hooks/build.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -gcc -fPIC -I /data/openpilot/phonelibs/opencl/include -shared hook.c - diff --git a/selfdrive/modeld/test/opencl_hooks/hook.c b/selfdrive/modeld/test/opencl_hooks/hook.c deleted file mode 100644 index f2ee2c0d51..0000000000 --- a/selfdrive/modeld/test/opencl_hooks/hook.c +++ /dev/null @@ -1,155 +0,0 @@ -#include -#include -#include -#include -#include -#include - -static inline uint64_t nanos_since_boot() { - struct timespec t; - clock_gettime(CLOCK_BOOTTIME, &t); - return t.tv_sec * 1000000000ULL + t.tv_nsec; -} - -struct kernel { - cl_kernel k; - const char *name; - cl_program p; -}; - - -int k_index = 0; -struct kernel kk[0x1000] = {0}; - -FILE *f = NULL; - -cl_program clCreateProgramWithSource(cl_context context, - cl_uint count, - const char **strings, - const size_t *lengths, - cl_int *errcode_ret) { - printf("clCreateProgramWithSource: %d\n", count); - - if (f == NULL) { - f = fopen("/tmp/kernels.cl", "w"); - } - - fprintf(f, "/* ************************ PROGRAM BREAK ****************************/\n"); - for (int i = 0; i < count; i++) { - fprintf(f, "%s\n", strings[i]); - if (i != 0) fprintf(f, "/* ************************ SECTION BREAK ****************************/\n"); - } - fflush(f); - - cl_program (*my_clCreateProgramWithSource)(cl_context context, - cl_uint count, - const char **strings, - const size_t *lengths, - cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithSource"); - - return my_clCreateProgramWithSource(context, count, strings, lengths, errcode_ret); -} - -cl_program clCreateProgramWithBinary(cl_context context, - cl_uint num_devices, - const cl_device_id *device_list, - const size_t *lengths, - const unsigned char **binaries, - cl_int *binary_status, - cl_int *errcode_ret) { - printf("clCreateProgramWithBinary\n"); - - cl_program (*my_clCreateProgramWithBinary)(cl_context context, - cl_uint num_devices, - const cl_device_id *device_list, - const size_t *lengths, - const unsigned char **binaries, - cl_int *binary_status, - cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithBinary"); - - return my_clCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret); -} - -cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) { - cl_kernel (*my_clCreateKernel)(cl_program program, const char *kernel_name, cl_int *errcode_ret); - my_clCreateKernel = dlsym(RTLD_NEXT, "REAL_clCreateKernel"); - cl_kernel ret = my_clCreateKernel(program, kernel_name, errcode_ret); - //printf("clCreateKernel: %s -> %p\n", kernel_name, ret); - - char *tmp = (char*)malloc(strlen(kernel_name)+1); - strcpy(tmp, kernel_name); - - kk[k_index].k = ret; - kk[k_index].name = tmp; - kk[k_index].p = program; - k_index++; - return ret; -} - - -uint64_t start_time = 0; -int cnt = 0; - -cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *global_work_offset, - const size_t *global_work_size, - const size_t *local_work_size, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) { - - cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, - const size_t *, const size_t *, const size_t *, - cl_uint, const cl_event *, cl_event *) = NULL; - my_clEnqueueNDRangeKernel = dlsym(RTLD_NEXT, "REAL_clEnqueueNDRangeKernel"); - - if (start_time == 0) { - start_time = nanos_since_boot(); - } - - // get kernel name - const char *name = NULL; - cl_program p; - for (int i = 0; i < k_index; i++) { - if (kk[i].k == kernel) { - name = kk[i].name; - p = kk[i].p; - break; - } - } - - uint64_t tb = nanos_since_boot(); - cl_int ret = my_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, - global_work_offset, global_work_size, local_work_size, - num_events_in_wait_list, event_wait_list, event); - uint64_t te = nanos_since_boot(); - - printf("%10lu run%8d in %5ld us command_queue:%p work_dim:%d event:%p ", (tb-start_time)/1000, cnt++, (te-tb)/1000, - command_queue, work_dim, event); - for (int i = 0; i < work_dim; i++) { - printf("%4zu ", global_work_size[i]); - } - printf("%p %s\n", p, name); - return ret; -} - -void *dlsym(void *handle, const char *symbol) { - void *(*my_dlsym)(void *handle, const char *symbol) = (void*)dlopen-0x2d4; - if (memcmp("REAL_", symbol, 5) == 0) { - return my_dlsym(handle, symbol+5); - } else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) { - return clEnqueueNDRangeKernel; - } else if (strcmp("clCreateKernel", symbol) == 0) { - return clCreateKernel; - } else if (strcmp("clCreateProgramWithSource", symbol) == 0) { - return clCreateProgramWithSource; - } else if (strcmp("clCreateProgramWithBinary", symbol) == 0) { - return clCreateProgramWithBinary; - } else { - printf("dlsym %s\n", symbol); - return my_dlsym(handle, symbol); - } -} - diff --git a/selfdrive/modeld/thneed/thneed.cc b/selfdrive/modeld/thneed/thneed.cc index 6bacd5440d..cd1242b086 100644 --- a/selfdrive/modeld/thneed/thneed.cc +++ b/selfdrive/modeld/thneed/thneed.cc @@ -269,8 +269,10 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) { } } +// TODO: with a different way of getting the input and output buffers, we don't have to intercept CL at all + cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) = NULL; -cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) { +cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) { if (my_clSetKernelArg == NULL) my_clSetKernelArg = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clSetKernelArg")); if (arg_value != NULL) { g_args[std::make_pair(kernel, arg_index)] = std::string((char*)arg_value, arg_size); @@ -280,7 +282,7 @@ cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, cons } cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) = NULL; -cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, +cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, @@ -403,17 +405,15 @@ cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, //#define SAVE_KERNELS #ifdef SAVE_KERNELS - std::map program_source; -#endif +std::map program_source; cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL; -cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { +cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { if (my_clCreateProgramWithSource == NULL) my_clCreateProgramWithSource = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clCreateProgramWithSource")); assert(count == 1); size_t my_lengths[1]; my_lengths[0] = lengths[0]; -#ifdef SAVE_KERNELS char fn[0x100]; snprintf(fn, sizeof(fn), "/tmp/program_%zu.cl", strlen(strings[0])); FILE *f = fopen(fn, "wb"); @@ -433,22 +433,24 @@ cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const ch } program_source[ret] = strings[0]; -#endif cl_program ret = my_clCreateProgramWithSource(context, count, strings, my_lengths, errcode_ret); return ret; } +#endif void *dlsym(void *handle, const char *symbol) { void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4); if (memcmp("REAL_", symbol, 5) == 0) { return my_dlsym(handle, symbol+5); } else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) { - return (void*)clEnqueueNDRangeKernel; + return (void*)thneed_clEnqueueNDRangeKernel; } else if (strcmp("clSetKernelArg", symbol) == 0) { - return (void*)clSetKernelArg; + return (void*)thneed_clSetKernelArg; +#ifdef SAVE_KERNELS } else if (strcmp("clCreateProgramWithSource", symbol) == 0) { - return (void*)clCreateProgramWithSource; + return (void*)thneed_clCreateProgramWithSource; +#endif } else { return my_dlsym(handle, symbol); }