c++ify thneed to remove memory leaks (#1737)

Co-authored-by: Comma Device <device@comma.ai>
5 years ago · 01a486308d
parent 78b428e534
commit 01a486308d
2 changed files with 22 additions and 19 deletions
--- a/selfdrive/modeld/thneed/thneed.cc
+++ b/selfdrive/modeld/thneed/thneed.cc
@ -8,7 +8,7 @@

 Thneed *g_thneed = NULL;
 int g_fd = -1;
-std::map<std::pair<cl_kernel, int>, std::string> g_args;
+map<pair<cl_kernel, int>, string> g_args;

 static inline uint64_t nanos_since_boot() {
  struct timespec t;
@ -43,9 +43,7 @@ int ioctl(int filedes, unsigned long request, void *argp) {
      if (thneed->record & 1) {
        thneed->timestamp = cmd->timestamp;
        thneed->context_id = cmd->context_id;
-        CachedCommand *ccmd = new CachedCommand(thneed, cmd);
-        //ccmd->disassemble();
-        thneed->cmds.push_back(ccmd);
+        thneed->cmds.push_back(unique_ptr<CachedCommand>(new CachedCommand(thneed, cmd)));
      }
      if (thneed->record & 2) {
        printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx    context_id: %u  timestamp: %u\n",
@ -66,9 +64,7 @@ int ioctl(int filedes, unsigned long request, void *argp) {
      }

      if (thneed->record & 1) {
-        struct kgsl_gpuobj_sync_obj *new_objs = (struct kgsl_gpuobj_sync_obj *)malloc(sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count);
-        memcpy(new_objs, objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count);
-        thneed->syncobjs.push_back(std::make_pair(cmd->count, new_objs));
+        thneed->syncobjs.push_back(string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count));
      }
    } else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) {
      struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp;
@ -111,6 +107,10 @@ GPUMalloc::GPUMalloc(int size, int fd) {
  remaining = size;
 }

+GPUMalloc::~GPUMalloc() {
+  // TODO: free the GPU malloced area
+}
+
 void *GPUMalloc::alloc(int size) {
  if (size > remaining) return NULL;
  remaining -= size;
@ -170,7 +170,7 @@ void CachedCommand::exec(bool wait) {
 Thneed::Thneed() {
  assert(g_fd != -1);
  fd = g_fd;
-  ram = new GPUMalloc(0x40000, fd);
+  ram = make_unique<GPUMalloc>(0x40000, fd);
  record = 1;
  timestamp = -1;
  g_thneed = this;
@ -235,9 +235,9 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) {
  for (auto it = syncobjs.begin(); it != syncobjs.end(); ++it) {
    struct kgsl_gpuobj_sync cmd;

-    cmd.objs = (uint64_t)it->second;
-    cmd.obj_len = it->first * sizeof(struct kgsl_gpuobj_sync_obj);
-    cmd.count = it->first;
+    cmd.objs = (uint64_t)it->data();
+    cmd.obj_len = it->length();
+    cmd.count = it->length() / sizeof(struct kgsl_gpuobj_sync_obj);

    ret = ioctl(fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd);
    assert(ret == 0);
@ -275,7 +275,7 @@ cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size
 cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) {
  if (my_clSetKernelArg == NULL) my_clSetKernelArg = reinterpret_cast<decltype(my_clSetKernelArg)>(dlsym(RTLD_NEXT, "REAL_clSetKernelArg"));
  if (arg_value != NULL) {
-    g_args[std::make_pair(kernel, arg_index)] = std::string((char*)arg_value, arg_size);
+    g_args[make_pair(kernel, arg_index)] = string((char*)arg_value, arg_size);
  }
  cl_int ret = my_clSetKernelArg(kernel, arg_index, arg_size, arg_value);
  return ret;
@ -310,7 +310,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
    for (int i = 0; i < num_args; i++) {
      char arg_name[0x100];
      clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
-      std::string arg = g_args[std::make_pair(kernel, i)];
+      string arg = g_args[make_pair(kernel, i)];

      if (strcmp(arg_name, "input") == 0 && strcmp(name, "zero_pad_image_float") == 0) {
        cl_mem mem;
@ -343,7 +343,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
      char arg_name[0x100];
      clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_type), arg_type, NULL);
      clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
-      std::string arg = g_args[std::make_pair(kernel, i)];
+      string arg = g_args[make_pair(kernel, i)];
      printf("  %s %s", arg_type, arg_name);
      void *arg_value = (void*)arg.data();
      int arg_size = arg.size();
@ -405,7 +405,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
 //#define SAVE_KERNELS

 #ifdef SAVE_KERNELS
-std::map<cl_program, std::string> program_source;
+map<cl_program, string> program_source;

 cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL;
 cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) {
--- a/selfdrive/modeld/thneed/thneed.h
+++ b/selfdrive/modeld/thneed/thneed.h
@ -5,11 +5,14 @@
 #include <vector>
 #include <CL/cl.h>

+using namespace std;
+
 class Thneed;

 class GPUMalloc {
  public:
    GPUMalloc(int size, int fd);
+    ~GPUMalloc();
    void *alloc(int size);
  private:
    uint64_t base;
@ -34,7 +37,7 @@ class Thneed {
    void stop();
    void execute(float **finputs, float *foutput, bool slow=false);

-    std::vector<cl_mem> inputs;
+    vector<cl_mem> inputs;
    cl_mem output;

    cl_command_queue command_queue;
@ -43,9 +46,9 @@ class Thneed {
    // protected?
    int record;
    int timestamp;
-    GPUMalloc *ram;
-    std::vector<CachedCommand *> cmds;
-    std::vector<std::pair<int, struct kgsl_gpuobj_sync_obj *> > syncobjs;
+    unique_ptr<GPUMalloc> ram;
+    vector<unique_ptr<CachedCommand> > cmds;
+    vector<string> syncobjs;
    int fd;
 };