| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -14,11 +14,62 @@ static inline uint64_t nanos_since_boot() { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					struct kernel { | 
					 | 
					 | 
					 | 
					struct kernel { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  cl_kernel k; | 
					 | 
					 | 
					 | 
					  cl_kernel k; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  const char *name; | 
					 | 
					 | 
					 | 
					  const char *name; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  cl_program p; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					}; | 
					 | 
					 | 
					 | 
					}; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					int k_index = 0; | 
					 | 
					 | 
					 | 
					int k_index = 0; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					struct kernel kk[0x1000] = {0}; | 
					 | 
					 | 
					 | 
					struct kernel kk[0x1000] = {0}; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					FILE *f = NULL; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					cl_program clCreateProgramWithSource(cl_context context, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  cl_uint count, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  const char **strings, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  const size_t *lengths, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  cl_int *errcode_ret) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  printf("clCreateProgramWithSource: %d\n", count); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  if (f == NULL) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    f = fopen("/tmp/kernels.cl", "w"); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  } | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  fprintf(f, "/* ************************ PROGRAM BREAK ****************************/\n"); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  for (int i = 0; i < count; i++) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    fprintf(f, "%s\n", strings[i]); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (i != 0) fprintf(f, "/* ************************ SECTION BREAK ****************************/\n"); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  } | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  fflush(f); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  cl_program (*my_clCreateProgramWithSource)(cl_context context, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    cl_uint count, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    const char **strings, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    const size_t *lengths, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithSource"); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  return my_clCreateProgramWithSource(context, count, strings, lengths, errcode_ret); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					cl_program clCreateProgramWithBinary(cl_context context, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  cl_uint num_devices, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  const cl_device_id *device_list, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  const size_t *lengths, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  const unsigned char **binaries, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  cl_int *binary_status, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  cl_int *errcode_ret) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  printf("clCreateProgramWithBinary\n"); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  cl_program (*my_clCreateProgramWithBinary)(cl_context context, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    cl_uint num_devices, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    const cl_device_id *device_list, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    const size_t *lengths, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    const unsigned char **binaries, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    cl_int *binary_status, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithBinary"); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  return my_clCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) { | 
					 | 
					 | 
					 | 
					cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  cl_kernel (*my_clCreateKernel)(cl_program program, const char *kernel_name, cl_int *errcode_ret); | 
					 | 
					 | 
					 | 
					  cl_kernel (*my_clCreateKernel)(cl_program program, const char *kernel_name, cl_int *errcode_ret); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  my_clCreateKernel = dlsym(RTLD_NEXT, "REAL_clCreateKernel"); | 
					 | 
					 | 
					 | 
					  my_clCreateKernel = dlsym(RTLD_NEXT, "REAL_clCreateKernel"); | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -30,6 +81,7 @@ cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *er | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  kk[k_index].k = ret; | 
					 | 
					 | 
					 | 
					  kk[k_index].k = ret; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  kk[k_index].name = tmp; | 
					 | 
					 | 
					 | 
					  kk[k_index].name = tmp; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  kk[k_index].p = program; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  k_index++; | 
					 | 
					 | 
					 | 
					  k_index++; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  return ret; | 
					 | 
					 | 
					 | 
					  return ret; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					} | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -39,14 +91,14 @@ uint64_t start_time = 0; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					int cnt = 0; | 
					 | 
					 | 
					 | 
					int cnt = 0; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, | 
					 | 
					 | 
					 | 
					cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 	cl_kernel kernel, | 
					 | 
					 | 
					 | 
					  cl_kernel kernel, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 	cl_uint work_dim, | 
					 | 
					 | 
					 | 
					  cl_uint work_dim, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 	const size_t *global_work_offset, | 
					 | 
					 | 
					 | 
					  const size_t *global_work_offset, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 	const size_t *global_work_size, | 
					 | 
					 | 
					 | 
					  const size_t *global_work_size, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 	const size_t *local_work_size, | 
					 | 
					 | 
					 | 
					  const size_t *local_work_size, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 	cl_uint num_events_in_wait_list, | 
					 | 
					 | 
					 | 
					  cl_uint num_events_in_wait_list, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 	const cl_event *event_wait_list, | 
					 | 
					 | 
					 | 
					  const cl_event *event_wait_list, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 	cl_event *event) { | 
					 | 
					 | 
					 | 
					  cl_event *event) { | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, | 
					 | 
					 | 
					 | 
					  cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    const size_t *, const size_t *, const size_t *, | 
					 | 
					 | 
					 | 
					    const size_t *, const size_t *, const size_t *, | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -59,9 +111,11 @@ cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  // get kernel name
 | 
					 | 
					 | 
					 | 
					  // get kernel name
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  const char *name = NULL; | 
					 | 
					 | 
					 | 
					  const char *name = NULL; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  cl_program p; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  for (int i = 0; i < k_index; i++) { | 
					 | 
					 | 
					 | 
					  for (int i = 0; i < k_index; i++) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if (kk[i].k == kernel) { | 
					 | 
					 | 
					 | 
					    if (kk[i].k == kernel) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					      name = kk[i].name; | 
					 | 
					 | 
					 | 
					      name = kk[i].name; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      p = kk[i].p; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					      break; | 
					 | 
					 | 
					 | 
					      break; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    } | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  } | 
					 | 
					 | 
					 | 
					  } | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -77,7 +131,7 @@ cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  for (int i = 0; i < work_dim; i++) { | 
					 | 
					 | 
					 | 
					  for (int i = 0; i < work_dim; i++) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    printf("%4zu ", global_work_size[i]); | 
					 | 
					 | 
					 | 
					    printf("%4zu ", global_work_size[i]); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  } | 
					 | 
					 | 
					 | 
					  } | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  printf("%s\n", name); | 
					 | 
					 | 
					 | 
					  printf("%p %s\n", p, name); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  return ret; | 
					 | 
					 | 
					 | 
					  return ret; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					} | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -89,6 +143,10 @@ void *dlsym(void *handle, const char *symbol) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    return clEnqueueNDRangeKernel; | 
					 | 
					 | 
					 | 
					    return clEnqueueNDRangeKernel; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  } else if (strcmp("clCreateKernel", symbol) == 0) { | 
					 | 
					 | 
					 | 
					  } else if (strcmp("clCreateKernel", symbol) == 0) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    return clCreateKernel; | 
					 | 
					 | 
					 | 
					    return clCreateKernel; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  } else if (strcmp("clCreateProgramWithSource", symbol) == 0) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    return clCreateProgramWithSource; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  } else if (strcmp("clCreateProgramWithBinary", symbol) == 0) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    return clCreateProgramWithBinary; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  } else { | 
					 | 
					 | 
					 | 
					  } else { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    printf("dlsym %s\n", symbol); | 
					 | 
					 | 
					 | 
					    printf("dlsym %s\n", symbol); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    return my_dlsym(handle, symbol); | 
					 | 
					 | 
					 | 
					    return my_dlsym(handle, symbol); | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
  |