|
|
|
@ -8,7 +8,7 @@ |
|
|
|
|
|
|
|
|
|
Thneed *g_thneed = NULL; |
|
|
|
|
int g_fd = -1; |
|
|
|
|
std::map<std::pair<cl_kernel, int>, std::string> g_args; |
|
|
|
|
map<pair<cl_kernel, int>, string> g_args; |
|
|
|
|
|
|
|
|
|
static inline uint64_t nanos_since_boot() { |
|
|
|
|
struct timespec t; |
|
|
|
@ -43,9 +43,7 @@ int ioctl(int filedes, unsigned long request, void *argp) { |
|
|
|
|
if (thneed->record & 1) { |
|
|
|
|
thneed->timestamp = cmd->timestamp; |
|
|
|
|
thneed->context_id = cmd->context_id; |
|
|
|
|
CachedCommand *ccmd = new CachedCommand(thneed, cmd); |
|
|
|
|
//ccmd->disassemble();
|
|
|
|
|
thneed->cmds.push_back(ccmd); |
|
|
|
|
thneed->cmds.push_back(unique_ptr<CachedCommand>(new CachedCommand(thneed, cmd))); |
|
|
|
|
} |
|
|
|
|
if (thneed->record & 2) { |
|
|
|
|
printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx context_id: %u timestamp: %u\n", |
|
|
|
@ -66,9 +64,7 @@ int ioctl(int filedes, unsigned long request, void *argp) { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (thneed->record & 1) { |
|
|
|
|
struct kgsl_gpuobj_sync_obj *new_objs = (struct kgsl_gpuobj_sync_obj *)malloc(sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count); |
|
|
|
|
memcpy(new_objs, objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count); |
|
|
|
|
thneed->syncobjs.push_back(std::make_pair(cmd->count, new_objs)); |
|
|
|
|
thneed->syncobjs.push_back(string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count)); |
|
|
|
|
} |
|
|
|
|
} else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) { |
|
|
|
|
struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp; |
|
|
|
@ -111,6 +107,10 @@ GPUMalloc::GPUMalloc(int size, int fd) { |
|
|
|
|
remaining = size; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
GPUMalloc::~GPUMalloc() { |
|
|
|
|
// TODO: free the GPU malloced area
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void *GPUMalloc::alloc(int size) { |
|
|
|
|
if (size > remaining) return NULL; |
|
|
|
|
remaining -= size; |
|
|
|
@ -170,7 +170,7 @@ void CachedCommand::exec(bool wait) { |
|
|
|
|
Thneed::Thneed() { |
|
|
|
|
assert(g_fd != -1); |
|
|
|
|
fd = g_fd; |
|
|
|
|
ram = new GPUMalloc(0x40000, fd); |
|
|
|
|
ram = make_unique<GPUMalloc>(0x40000, fd); |
|
|
|
|
record = 1; |
|
|
|
|
timestamp = -1; |
|
|
|
|
g_thneed = this; |
|
|
|
@ -235,9 +235,9 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) { |
|
|
|
|
for (auto it = syncobjs.begin(); it != syncobjs.end(); ++it) { |
|
|
|
|
struct kgsl_gpuobj_sync cmd; |
|
|
|
|
|
|
|
|
|
cmd.objs = (uint64_t)it->second; |
|
|
|
|
cmd.obj_len = it->first * sizeof(struct kgsl_gpuobj_sync_obj); |
|
|
|
|
cmd.count = it->first; |
|
|
|
|
cmd.objs = (uint64_t)it->data(); |
|
|
|
|
cmd.obj_len = it->length(); |
|
|
|
|
cmd.count = it->length() / sizeof(struct kgsl_gpuobj_sync_obj); |
|
|
|
|
|
|
|
|
|
ret = ioctl(fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd); |
|
|
|
|
assert(ret == 0); |
|
|
|
@ -275,7 +275,7 @@ cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size |
|
|
|
|
cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) { |
|
|
|
|
if (my_clSetKernelArg == NULL) my_clSetKernelArg = reinterpret_cast<decltype(my_clSetKernelArg)>(dlsym(RTLD_NEXT, "REAL_clSetKernelArg")); |
|
|
|
|
if (arg_value != NULL) { |
|
|
|
|
g_args[std::make_pair(kernel, arg_index)] = std::string((char*)arg_value, arg_size); |
|
|
|
|
g_args[make_pair(kernel, arg_index)] = string((char*)arg_value, arg_size); |
|
|
|
|
} |
|
|
|
|
cl_int ret = my_clSetKernelArg(kernel, arg_index, arg_size, arg_value); |
|
|
|
|
return ret; |
|
|
|
@ -310,7 +310,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, |
|
|
|
|
for (int i = 0; i < num_args; i++) { |
|
|
|
|
char arg_name[0x100]; |
|
|
|
|
clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL); |
|
|
|
|
std::string arg = g_args[std::make_pair(kernel, i)]; |
|
|
|
|
string arg = g_args[make_pair(kernel, i)]; |
|
|
|
|
|
|
|
|
|
if (strcmp(arg_name, "input") == 0 && strcmp(name, "zero_pad_image_float") == 0) { |
|
|
|
|
cl_mem mem; |
|
|
|
@ -343,7 +343,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, |
|
|
|
|
char arg_name[0x100]; |
|
|
|
|
clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_type), arg_type, NULL); |
|
|
|
|
clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL); |
|
|
|
|
std::string arg = g_args[std::make_pair(kernel, i)]; |
|
|
|
|
string arg = g_args[make_pair(kernel, i)]; |
|
|
|
|
printf(" %s %s", arg_type, arg_name); |
|
|
|
|
void *arg_value = (void*)arg.data(); |
|
|
|
|
int arg_size = arg.size(); |
|
|
|
@ -405,7 +405,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, |
|
|
|
|
//#define SAVE_KERNELS
|
|
|
|
|
|
|
|
|
|
#ifdef SAVE_KERNELS |
|
|
|
|
std::map<cl_program, std::string> program_source; |
|
|
|
|
map<cl_program, string> program_source; |
|
|
|
|
|
|
|
|
|
cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL; |
|
|
|
|
cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { |
|
|
|
|