diff --git a/selfdrive/modeld/thneed/thneed.cc b/selfdrive/modeld/thneed/thneed.cc index cd1242b086..9a593d9e04 100644 --- a/selfdrive/modeld/thneed/thneed.cc +++ b/selfdrive/modeld/thneed/thneed.cc @@ -8,7 +8,7 @@ Thneed *g_thneed = NULL; int g_fd = -1; -std::map, std::string> g_args; +map, string> g_args; static inline uint64_t nanos_since_boot() { struct timespec t; @@ -43,9 +43,7 @@ int ioctl(int filedes, unsigned long request, void *argp) { if (thneed->record & 1) { thneed->timestamp = cmd->timestamp; thneed->context_id = cmd->context_id; - CachedCommand *ccmd = new CachedCommand(thneed, cmd); - //ccmd->disassemble(); - thneed->cmds.push_back(ccmd); + thneed->cmds.push_back(unique_ptr(new CachedCommand(thneed, cmd))); } if (thneed->record & 2) { printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx context_id: %u timestamp: %u\n", @@ -66,9 +64,7 @@ int ioctl(int filedes, unsigned long request, void *argp) { } if (thneed->record & 1) { - struct kgsl_gpuobj_sync_obj *new_objs = (struct kgsl_gpuobj_sync_obj *)malloc(sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count); - memcpy(new_objs, objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count); - thneed->syncobjs.push_back(std::make_pair(cmd->count, new_objs)); + thneed->syncobjs.push_back(string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count)); } } else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) { struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp; @@ -111,6 +107,10 @@ GPUMalloc::GPUMalloc(int size, int fd) { remaining = size; } +GPUMalloc::~GPUMalloc() { + // TODO: free the GPU malloced area +} + void *GPUMalloc::alloc(int size) { if (size > remaining) return NULL; remaining -= size; @@ -170,7 +170,7 @@ void CachedCommand::exec(bool wait) { Thneed::Thneed() { assert(g_fd != -1); fd = g_fd; - ram = new GPUMalloc(0x40000, fd); + ram = make_unique(0x40000, fd); record = 1; timestamp = -1; g_thneed = this; @@ -235,9 +235,9 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) { for (auto it = syncobjs.begin(); it != syncobjs.end(); ++it) { struct kgsl_gpuobj_sync cmd; - cmd.objs = (uint64_t)it->second; - cmd.obj_len = it->first * sizeof(struct kgsl_gpuobj_sync_obj); - cmd.count = it->first; + cmd.objs = (uint64_t)it->data(); + cmd.obj_len = it->length(); + cmd.count = it->length() / sizeof(struct kgsl_gpuobj_sync_obj); ret = ioctl(fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd); assert(ret == 0); @@ -275,7 +275,7 @@ cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) { if (my_clSetKernelArg == NULL) my_clSetKernelArg = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clSetKernelArg")); if (arg_value != NULL) { - g_args[std::make_pair(kernel, arg_index)] = std::string((char*)arg_value, arg_size); + g_args[make_pair(kernel, arg_index)] = string((char*)arg_value, arg_size); } cl_int ret = my_clSetKernelArg(kernel, arg_index, arg_size, arg_value); return ret; @@ -310,7 +310,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, for (int i = 0; i < num_args; i++) { char arg_name[0x100]; clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL); - std::string arg = g_args[std::make_pair(kernel, i)]; + string arg = g_args[make_pair(kernel, i)]; if (strcmp(arg_name, "input") == 0 && strcmp(name, "zero_pad_image_float") == 0) { cl_mem mem; @@ -343,7 +343,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, char arg_name[0x100]; clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_type), arg_type, NULL); clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL); - std::string arg = g_args[std::make_pair(kernel, i)]; + string arg = g_args[make_pair(kernel, i)]; printf(" %s %s", arg_type, arg_name); void *arg_value = (void*)arg.data(); int arg_size = arg.size(); @@ -405,7 +405,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, //#define SAVE_KERNELS #ifdef SAVE_KERNELS -std::map program_source; +map program_source; cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL; cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { diff --git a/selfdrive/modeld/thneed/thneed.h b/selfdrive/modeld/thneed/thneed.h index 89e8522570..36f0bfed7f 100644 --- a/selfdrive/modeld/thneed/thneed.h +++ b/selfdrive/modeld/thneed/thneed.h @@ -5,11 +5,14 @@ #include #include +using namespace std; + class Thneed; class GPUMalloc { public: GPUMalloc(int size, int fd); + ~GPUMalloc(); void *alloc(int size); private: uint64_t base; @@ -34,7 +37,7 @@ class Thneed { void stop(); void execute(float **finputs, float *foutput, bool slow=false); - std::vector inputs; + vector inputs; cl_mem output; cl_command_queue command_queue; @@ -43,9 +46,9 @@ class Thneed { // protected? int record; int timestamp; - GPUMalloc *ram; - std::vector cmds; - std::vector > syncobjs; + unique_ptr ram; + vector > cmds; + vector syncobjs; int fd; };