You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							133 lines
						
					
					
						
							2.9 KiB
						
					
					
				
			
		
		
	
	
							133 lines
						
					
					
						
							2.9 KiB
						
					
					
				| #pragma once
 | |
| 
 | |
| #ifndef __user
 | |
| #define __user __attribute__(())
 | |
| #endif
 | |
| 
 | |
| #include <cstdint>
 | |
| #include <cstdlib>
 | |
| #include <memory>
 | |
| #include <string>
 | |
| #include <vector>
 | |
| 
 | |
| #include <CL/cl.h>
 | |
| 
 | |
| #include "third_party/linux/include/msm_kgsl.h"
 | |
| 
 | |
| using namespace std;
 | |
| 
 | |
| cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value);
 | |
| 
 | |
| namespace json11 {
 | |
|   class Json;
 | |
| }
 | |
| class Thneed;
 | |
| 
 | |
| class GPUMalloc {
 | |
|   public:
 | |
|     GPUMalloc(int size, int fd);
 | |
|     ~GPUMalloc();
 | |
|     void *alloc(int size);
 | |
|   private:
 | |
|     uint64_t base;
 | |
|     int remaining;
 | |
| };
 | |
| 
 | |
| class CLQueuedKernel {
 | |
|   public:
 | |
|     CLQueuedKernel(Thneed *lthneed) { thneed = lthneed; }
 | |
|     CLQueuedKernel(Thneed *lthneed,
 | |
|                    cl_kernel _kernel,
 | |
|                    cl_uint _work_dim,
 | |
|                    const size_t *_global_work_size,
 | |
|                    const size_t *_local_work_size);
 | |
|     cl_int exec();
 | |
|     void debug_print(bool verbose);
 | |
|     int get_arg_num(const char *search_arg_name);
 | |
|     cl_program program;
 | |
|     string name;
 | |
|     cl_uint num_args;
 | |
|     vector<string> arg_names;
 | |
|     vector<string> arg_types;
 | |
|     vector<string> args;
 | |
|     vector<int> args_size;
 | |
|     cl_kernel kernel = NULL;
 | |
|     json11::Json to_json() const;
 | |
| 
 | |
|     cl_uint work_dim;
 | |
|     size_t global_work_size[3] = {0};
 | |
|     size_t local_work_size[3] = {0};
 | |
|   private:
 | |
|     Thneed *thneed;
 | |
| };
 | |
| 
 | |
| class CachedIoctl {
 | |
|   public:
 | |
|     virtual void exec() {}
 | |
| };
 | |
| 
 | |
| class CachedSync: public CachedIoctl {
 | |
|   public:
 | |
|     CachedSync(Thneed *lthneed, string ldata) { thneed = lthneed; data = ldata; }
 | |
|     void exec();
 | |
|   private:
 | |
|     Thneed *thneed;
 | |
|     string data;
 | |
| };
 | |
| 
 | |
| class CachedCommand: public CachedIoctl {
 | |
|   public:
 | |
|     CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd);
 | |
|     void exec();
 | |
|   private:
 | |
|     void disassemble(int cmd_index);
 | |
|     struct kgsl_gpu_command cache;
 | |
|     unique_ptr<kgsl_command_object[]> cmds;
 | |
|     unique_ptr<kgsl_command_object[]> objs;
 | |
|     Thneed *thneed;
 | |
|     vector<shared_ptr<CLQueuedKernel> > kq;
 | |
| };
 | |
| 
 | |
| class Thneed {
 | |
|   public:
 | |
|     Thneed(bool do_clinit=false, cl_context _context = NULL);
 | |
|     void stop();
 | |
|     void execute(float **finputs, float *foutput, bool slow=false);
 | |
|     void wait();
 | |
| 
 | |
|     vector<cl_mem> input_clmem;
 | |
|     vector<void *> inputs;
 | |
|     vector<size_t> input_sizes;
 | |
|     cl_mem output = NULL;
 | |
| 
 | |
|     cl_context context = NULL;
 | |
|     cl_command_queue command_queue;
 | |
|     cl_device_id device_id;
 | |
|     int context_id;
 | |
| 
 | |
|     // protected?
 | |
|     bool record = false;
 | |
|     int debug;
 | |
|     int timestamp;
 | |
| 
 | |
| #ifdef QCOM2
 | |
|     unique_ptr<GPUMalloc> ram;
 | |
|     vector<unique_ptr<CachedIoctl> > cmds;
 | |
|     int fd;
 | |
| #endif
 | |
| 
 | |
|     // all CL kernels
 | |
|     void copy_inputs(float **finputs, bool internal=false);
 | |
|     void copy_output(float *foutput);
 | |
|     cl_int clexec();
 | |
|     vector<shared_ptr<CLQueuedKernel> > kq;
 | |
| 
 | |
|     // pending CL kernels
 | |
|     vector<shared_ptr<CLQueuedKernel> > ckq;
 | |
| 
 | |
|     // loading
 | |
|     void load(const char *filename);
 | |
|   private:
 | |
|     void clinit();
 | |
| };
 | |
| 
 | |
| 
 |