# include <stdio.h>
# include <stdlib.h>
# include <dlfcn.h>
# include <CL/cl.h>
# include <stdint.h>
# include <time.h>
static inline uint64_t nanos_since_boot ( ) {
struct timespec t ;
clock_gettime ( CLOCK_BOOTTIME , & t ) ;
return t . tv_sec * 1000000000ULL + t . tv_nsec ;
}
struct kernel {
cl_kernel k ;
const char * name ;
cl_program p ;
} ;
int k_index = 0 ;
struct kernel kk [ 0x1000 ] = { 0 } ;
FILE * f = NULL ;
cl_program clCreateProgramWithSource ( cl_context context ,
cl_uint count ,
const char * * strings ,
const size_t * lengths ,
cl_int * errcode_ret ) {
printf ( " clCreateProgramWithSource: %d \n " , count ) ;
if ( f = = NULL ) {
f = fopen ( " /tmp/kernels.cl " , " w " ) ;
}
fprintf ( f , " /* ************************ PROGRAM BREAK ****************************/ \n " ) ;
for ( int i = 0 ; i < count ; i + + ) {
fprintf ( f , " %s \n " , strings [ i ] ) ;
if ( i ! = 0 ) fprintf ( f , " /* ************************ SECTION BREAK ****************************/ \n " ) ;
}
fflush ( f ) ;
cl_program ( * my_clCreateProgramWithSource ) ( cl_context context ,
cl_uint count ,
const char * * strings ,
const size_t * lengths ,
cl_int * errcode_ret ) = dlsym ( RTLD_NEXT , " REAL_clCreateProgramWithSource " ) ;
return my_clCreateProgramWithSource ( context , count , strings , lengths , errcode_ret ) ;
}
cl_program clCreateProgramWithBinary ( cl_context context ,
cl_uint num_devices ,
const cl_device_id * device_list ,
const size_t * lengths ,
const unsigned char * * binaries ,
cl_int * binary_status ,
cl_int * errcode_ret ) {
printf ( " clCreateProgramWithBinary \n " ) ;
cl_program ( * my_clCreateProgramWithBinary ) ( cl_context context ,
cl_uint num_devices ,
const cl_device_id * device_list ,
const size_t * lengths ,
const unsigned char * * binaries ,
cl_int * binary_status ,
cl_int * errcode_ret ) = dlsym ( RTLD_NEXT , " REAL_clCreateProgramWithBinary " ) ;
return my_clCreateProgramWithBinary ( context , num_devices , device_list , lengths , binaries , binary_status , errcode_ret ) ;
}
cl_kernel clCreateKernel ( cl_program program , const char * kernel_name , cl_int * errcode_ret ) {
cl_kernel ( * my_clCreateKernel ) ( cl_program program , const char * kernel_name , cl_int * errcode_ret ) ;
my_clCreateKernel = dlsym ( RTLD_NEXT , " REAL_clCreateKernel " ) ;
cl_kernel ret = my_clCreateKernel ( program , kernel_name , errcode_ret ) ;
//printf("clCreateKernel: %s -> %p\n", kernel_name, ret);
char * tmp = ( char * ) malloc ( strlen ( kernel_name ) + 1 ) ;
strcpy ( tmp , kernel_name ) ;
kk [ k_index ] . k = ret ;
kk [ k_index ] . name = tmp ;
kk [ k_index ] . p = program ;
k_index + + ;
return ret ;
}
uint64_t start_time = 0 ;
int cnt = 0 ;
cl_int clEnqueueNDRangeKernel ( cl_command_queue command_queue ,
cl_kernel kernel ,
cl_uint work_dim ,
const size_t * global_work_offset ,
const size_t * global_work_size ,
const size_t * local_work_size ,
cl_uint num_events_in_wait_list ,
const cl_event * event_wait_list ,
cl_event * event ) {
cl_int ( * my_clEnqueueNDRangeKernel ) ( cl_command_queue , cl_kernel , cl_uint ,
const size_t * , const size_t * , const size_t * ,
cl_uint , const cl_event * , cl_event * ) = NULL ;
my_clEnqueueNDRangeKernel = dlsym ( RTLD_NEXT , " REAL_clEnqueueNDRangeKernel " ) ;
if ( start_time = = 0 ) {
start_time = nanos_since_boot ( ) ;
}
// get kernel name
const char * name = NULL ;
cl_program p ;
for ( int i = 0 ; i < k_index ; i + + ) {
if ( kk [ i ] . k = = kernel ) {
name = kk [ i ] . name ;
p = kk [ i ] . p ;
break ;
}
}
uint64_t tb = nanos_since_boot ( ) ;
cl_int ret = my_clEnqueueNDRangeKernel ( command_queue , kernel , work_dim ,
global_work_offset , global_work_size , local_work_size ,
num_events_in_wait_list , event_wait_list , event ) ;
uint64_t te = nanos_since_boot ( ) ;
printf ( " %10lu run%8d in %5ld us command_queue:%p work_dim:%d event:%p " , ( tb - start_time ) / 1000 , cnt + + , ( te - tb ) / 1000 ,
command_queue , work_dim , event ) ;
for ( int i = 0 ; i < work_dim ; i + + ) {
printf ( " %4zu " , global_work_size [ i ] ) ;
}
printf ( " %p %s \n " , p , name ) ;
return ret ;
}
void * dlsym ( void * handle , const char * symbol ) {
void * ( * my_dlsym ) ( void * handle , const char * symbol ) = ( void * ) dlopen - 0x2d4 ;
if ( memcmp ( " REAL_ " , symbol , 5 ) = = 0 ) {
return my_dlsym ( handle , symbol + 5 ) ;
} else if ( strcmp ( " clEnqueueNDRangeKernel " , symbol ) = = 0 ) {
return clEnqueueNDRangeKernel ;
} else if ( strcmp ( " clCreateKernel " , symbol ) = = 0 ) {
return clCreateKernel ;
} else if ( strcmp ( " clCreateProgramWithSource " , symbol ) = = 0 ) {
return clCreateProgramWithSource ;
} else if ( strcmp ( " clCreateProgramWithBinary " , symbol ) = = 0 ) {
return clCreateProgramWithBinary ;
} else {
printf ( " dlsym %s \n " , symbol ) ;
return my_dlsym ( handle , symbol ) ;
}
}