diff --git a/selfdrive/modeld/thneed/thneed.cc b/selfdrive/modeld/thneed/thneed.cc
index cd1242b086..9a593d9e04 100644
--- a/selfdrive/modeld/thneed/thneed.cc
+++ b/selfdrive/modeld/thneed/thneed.cc
@@ -8,7 +8,7 @@
 
 Thneed *g_thneed = NULL;
 int g_fd = -1;
-std::map<std::pair<cl_kernel, int>, std::string> g_args;
+map<pair<cl_kernel, int>, string> g_args;
 
 static inline uint64_t nanos_since_boot() {
   struct timespec t;
@@ -43,9 +43,7 @@ int ioctl(int filedes, unsigned long request, void *argp) {
       if (thneed->record & 1) {
         thneed->timestamp = cmd->timestamp;
         thneed->context_id = cmd->context_id;
-        CachedCommand *ccmd = new CachedCommand(thneed, cmd);
-        //ccmd->disassemble();
-        thneed->cmds.push_back(ccmd);
+        thneed->cmds.push_back(unique_ptr<CachedCommand>(new CachedCommand(thneed, cmd)));
       }
       if (thneed->record & 2) {
         printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx    context_id: %u  timestamp: %u\n",
@@ -66,9 +64,7 @@ int ioctl(int filedes, unsigned long request, void *argp) {
       }
 
       if (thneed->record & 1) {
-        struct kgsl_gpuobj_sync_obj *new_objs = (struct kgsl_gpuobj_sync_obj *)malloc(sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count);
-        memcpy(new_objs, objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count);
-        thneed->syncobjs.push_back(std::make_pair(cmd->count, new_objs));
+        thneed->syncobjs.push_back(string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count));
       }
     } else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) {
       struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp;
@@ -111,6 +107,10 @@ GPUMalloc::GPUMalloc(int size, int fd) {
   remaining = size;
 }
 
+GPUMalloc::~GPUMalloc() {
+  // TODO: free the GPU malloced area
+}
+
 void *GPUMalloc::alloc(int size) {
   if (size > remaining) return NULL;
   remaining -= size;
@@ -170,7 +170,7 @@ void CachedCommand::exec(bool wait) {
 Thneed::Thneed() {
   assert(g_fd != -1);
   fd = g_fd;
-  ram = new GPUMalloc(0x40000, fd);
+  ram = make_unique<GPUMalloc>(0x40000, fd);
   record = 1;
   timestamp = -1;
   g_thneed = this;
@@ -235,9 +235,9 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) {
   for (auto it = syncobjs.begin(); it != syncobjs.end(); ++it) {
     struct kgsl_gpuobj_sync cmd;
 
-    cmd.objs = (uint64_t)it->second;
-    cmd.obj_len = it->first * sizeof(struct kgsl_gpuobj_sync_obj);
-    cmd.count = it->first;
+    cmd.objs = (uint64_t)it->data();
+    cmd.obj_len = it->length();
+    cmd.count = it->length() / sizeof(struct kgsl_gpuobj_sync_obj);
 
     ret = ioctl(fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd);
     assert(ret == 0);
@@ -275,7 +275,7 @@ cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size
 cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) {
   if (my_clSetKernelArg == NULL) my_clSetKernelArg = reinterpret_cast<decltype(my_clSetKernelArg)>(dlsym(RTLD_NEXT, "REAL_clSetKernelArg"));
   if (arg_value != NULL) {
-    g_args[std::make_pair(kernel, arg_index)] = std::string((char*)arg_value, arg_size);
+    g_args[make_pair(kernel, arg_index)] = string((char*)arg_value, arg_size);
   }
   cl_int ret = my_clSetKernelArg(kernel, arg_index, arg_size, arg_value);
   return ret;
@@ -310,7 +310,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
     for (int i = 0; i < num_args; i++) {
       char arg_name[0x100];
       clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
-      std::string arg = g_args[std::make_pair(kernel, i)];
+      string arg = g_args[make_pair(kernel, i)];
 
       if (strcmp(arg_name, "input") == 0 && strcmp(name, "zero_pad_image_float") == 0) {
         cl_mem mem;
@@ -343,7 +343,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
       char arg_name[0x100];
       clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_type), arg_type, NULL);
       clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
-      std::string arg = g_args[std::make_pair(kernel, i)];
+      string arg = g_args[make_pair(kernel, i)];
       printf("  %s %s", arg_type, arg_name);
       void *arg_value = (void*)arg.data();
       int arg_size = arg.size();
@@ -405,7 +405,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
 //#define SAVE_KERNELS
 
 #ifdef SAVE_KERNELS
-std::map<cl_program, std::string> program_source;
+map<cl_program, string> program_source;
 
 cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL;
 cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) {
diff --git a/selfdrive/modeld/thneed/thneed.h b/selfdrive/modeld/thneed/thneed.h
index 89e8522570..36f0bfed7f 100644
--- a/selfdrive/modeld/thneed/thneed.h
+++ b/selfdrive/modeld/thneed/thneed.h
@@ -5,11 +5,14 @@
 #include <vector>
 #include <CL/cl.h>
 
+using namespace std;
+
 class Thneed;
 
 class GPUMalloc {
   public:
     GPUMalloc(int size, int fd);
+    ~GPUMalloc();
     void *alloc(int size);
   private:
     uint64_t base;
@@ -34,7 +37,7 @@ class Thneed {
     void stop();
     void execute(float **finputs, float *foutput, bool slow=false);
 
-    std::vector<cl_mem> inputs;
+    vector<cl_mem> inputs;
     cl_mem output;
 
     cl_command_queue command_queue;
@@ -43,9 +46,9 @@ class Thneed {
     // protected?
     int record;
     int timestamp;
-    GPUMalloc *ram;
-    std::vector<CachedCommand *> cmds;
-    std::vector<std::pair<int, struct kgsl_gpuobj_sync_obj *> > syncobjs;
+    unique_ptr<GPUMalloc> ram;
+    vector<unique_ptr<CachedCommand> > cmds;
+    vector<string> syncobjs;
     int fd;
 };