From 774912075af3bb23b6e25e8921ce7da940bcc039 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 17 Mar 2020 20:04:55 -0700 Subject: [PATCH] hook opencl to profile SNPE (#1249) * hook for snpe speed * hook works * prints kernel names * add timing to hook * clean up printing Co-authored-by: Comma Device --- selfdrive/modeld/test/opencl_hooks/build.sh | 3 + selfdrive/modeld/test/opencl_hooks/hook.c | 97 +++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100755 selfdrive/modeld/test/opencl_hooks/build.sh create mode 100644 selfdrive/modeld/test/opencl_hooks/hook.c diff --git a/selfdrive/modeld/test/opencl_hooks/build.sh b/selfdrive/modeld/test/opencl_hooks/build.sh new file mode 100755 index 0000000000..03f8115354 --- /dev/null +++ b/selfdrive/modeld/test/opencl_hooks/build.sh @@ -0,0 +1,3 @@ +#!/bin/sh +gcc -fPIC -I /data/openpilot/phonelibs/opencl/include -shared hook.c + diff --git a/selfdrive/modeld/test/opencl_hooks/hook.c b/selfdrive/modeld/test/opencl_hooks/hook.c new file mode 100644 index 0000000000..17b8e32353 --- /dev/null +++ b/selfdrive/modeld/test/opencl_hooks/hook.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include + +static inline uint64_t nanos_since_boot() { + struct timespec t; + clock_gettime(CLOCK_BOOTTIME, &t); + return t.tv_sec * 1000000000ULL + t.tv_nsec; +} + +struct kernel { + cl_kernel k; + const char *name; +}; + +int k_index = 0; +struct kernel kk[0x1000] = {0}; + +cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) { + cl_kernel (*my_clCreateKernel)(cl_program program, const char *kernel_name, cl_int *errcode_ret); + my_clCreateKernel = dlsym(RTLD_NEXT, "REAL_clCreateKernel"); + cl_kernel ret = my_clCreateKernel(program, kernel_name, errcode_ret); + //printf("clCreateKernel: %s -> %p\n", kernel_name, ret); + + char *tmp = (char*)malloc(strlen(kernel_name)+1); + strcpy(tmp, kernel_name); + + kk[k_index].k = ret; + kk[k_index].name = tmp; + k_index++; + return ret; +} + + +uint64_t start_time = 0; +int cnt = 0; + +cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t *global_work_offset, + const size_t *global_work_size, + const size_t *local_work_size, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) { + + cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, + const size_t *, const size_t *, const size_t *, + cl_uint, const cl_event *, cl_event *) = NULL; + my_clEnqueueNDRangeKernel = dlsym(RTLD_NEXT, "REAL_clEnqueueNDRangeKernel"); + + if (start_time == 0) { + start_time = nanos_since_boot(); + } + + // get kernel name + const char *name = NULL; + for (int i = 0; i < k_index; i++) { + if (kk[i].k == kernel) { + name = kk[i].name; + break; + } + } + + uint64_t tb = nanos_since_boot(); + cl_int ret = my_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, + global_work_offset, global_work_size, local_work_size, + num_events_in_wait_list, event_wait_list, event); + uint64_t te = nanos_since_boot(); + + printf("%10lu run%8d in %5ld us command_queue:%p work_dim:%d event:%p ", (tb-start_time)/1000, cnt++, (te-tb)/1000, + command_queue, work_dim, event); + for (int i = 0; i < work_dim; i++) { + printf("%4zu ", global_work_size[i]); + } + printf("%s\n", name); + return ret; +} + +void *dlsym(void *handle, const char *symbol) { + void *(*my_dlsym)(void *handle, const char *symbol) = (void*)dlopen-0x2d4; + if (memcmp("REAL_", symbol, 5) == 0) { + return my_dlsym(handle, symbol+5); + } else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) { + return clEnqueueNDRangeKernel; + } else if (strcmp("clCreateKernel", symbol) == 0) { + return clCreateKernel; + } else { + printf("dlsym %s\n", symbol); + return my_dlsym(handle, symbol); + } +} +