You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							133 lines
						
					
					
						
							2.9 KiB
						
					
					
				
			
		
		
	
	
							133 lines
						
					
					
						
							2.9 KiB
						
					
					
				#pragma once
 | 
						|
 | 
						|
#ifndef __user
 | 
						|
#define __user __attribute__(())
 | 
						|
#endif
 | 
						|
 | 
						|
#include <cstdint>
 | 
						|
#include <cstdlib>
 | 
						|
#include <memory>
 | 
						|
#include <string>
 | 
						|
#include <vector>
 | 
						|
 | 
						|
#include <CL/cl.h>
 | 
						|
 | 
						|
#include "third_party/linux/include/msm_kgsl.h"
 | 
						|
 | 
						|
using namespace std;
 | 
						|
 | 
						|
cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value);
 | 
						|
 | 
						|
namespace json11 {
 | 
						|
  class Json;
 | 
						|
}
 | 
						|
class Thneed;
 | 
						|
 | 
						|
class GPUMalloc {
 | 
						|
  public:
 | 
						|
    GPUMalloc(int size, int fd);
 | 
						|
    ~GPUMalloc();
 | 
						|
    void *alloc(int size);
 | 
						|
  private:
 | 
						|
    uint64_t base;
 | 
						|
    int remaining;
 | 
						|
};
 | 
						|
 | 
						|
class CLQueuedKernel {
 | 
						|
  public:
 | 
						|
    CLQueuedKernel(Thneed *lthneed) { thneed = lthneed; }
 | 
						|
    CLQueuedKernel(Thneed *lthneed,
 | 
						|
                   cl_kernel _kernel,
 | 
						|
                   cl_uint _work_dim,
 | 
						|
                   const size_t *_global_work_size,
 | 
						|
                   const size_t *_local_work_size);
 | 
						|
    cl_int exec();
 | 
						|
    void debug_print(bool verbose);
 | 
						|
    int get_arg_num(const char *search_arg_name);
 | 
						|
    cl_program program;
 | 
						|
    string name;
 | 
						|
    cl_uint num_args;
 | 
						|
    vector<string> arg_names;
 | 
						|
    vector<string> arg_types;
 | 
						|
    vector<string> args;
 | 
						|
    vector<int> args_size;
 | 
						|
    cl_kernel kernel = NULL;
 | 
						|
    json11::Json to_json() const;
 | 
						|
 | 
						|
    cl_uint work_dim;
 | 
						|
    size_t global_work_size[3] = {0};
 | 
						|
    size_t local_work_size[3] = {0};
 | 
						|
  private:
 | 
						|
    Thneed *thneed;
 | 
						|
};
 | 
						|
 | 
						|
class CachedIoctl {
 | 
						|
  public:
 | 
						|
    virtual void exec() {}
 | 
						|
};
 | 
						|
 | 
						|
class CachedSync: public CachedIoctl {
 | 
						|
  public:
 | 
						|
    CachedSync(Thneed *lthneed, string ldata) { thneed = lthneed; data = ldata; }
 | 
						|
    void exec();
 | 
						|
  private:
 | 
						|
    Thneed *thneed;
 | 
						|
    string data;
 | 
						|
};
 | 
						|
 | 
						|
class CachedCommand: public CachedIoctl {
 | 
						|
  public:
 | 
						|
    CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd);
 | 
						|
    void exec();
 | 
						|
  private:
 | 
						|
    void disassemble(int cmd_index);
 | 
						|
    struct kgsl_gpu_command cache;
 | 
						|
    unique_ptr<kgsl_command_object[]> cmds;
 | 
						|
    unique_ptr<kgsl_command_object[]> objs;
 | 
						|
    Thneed *thneed;
 | 
						|
    vector<shared_ptr<CLQueuedKernel> > kq;
 | 
						|
};
 | 
						|
 | 
						|
class Thneed {
 | 
						|
  public:
 | 
						|
    Thneed(bool do_clinit=false, cl_context _context = NULL);
 | 
						|
    void stop();
 | 
						|
    void execute(float **finputs, float *foutput, bool slow=false);
 | 
						|
    void wait();
 | 
						|
 | 
						|
    vector<cl_mem> input_clmem;
 | 
						|
    vector<void *> inputs;
 | 
						|
    vector<size_t> input_sizes;
 | 
						|
    cl_mem output = NULL;
 | 
						|
 | 
						|
    cl_context context = NULL;
 | 
						|
    cl_command_queue command_queue;
 | 
						|
    cl_device_id device_id;
 | 
						|
    int context_id;
 | 
						|
 | 
						|
    // protected?
 | 
						|
    bool record = false;
 | 
						|
    int debug;
 | 
						|
    int timestamp;
 | 
						|
 | 
						|
#ifdef QCOM2
 | 
						|
    unique_ptr<GPUMalloc> ram;
 | 
						|
    vector<unique_ptr<CachedIoctl> > cmds;
 | 
						|
    int fd;
 | 
						|
#endif
 | 
						|
 | 
						|
    // all CL kernels
 | 
						|
    void copy_inputs(float **finputs, bool internal=false);
 | 
						|
    void copy_output(float *foutput);
 | 
						|
    cl_int clexec();
 | 
						|
    vector<shared_ptr<CLQueuedKernel> > kq;
 | 
						|
 | 
						|
    // pending CL kernels
 | 
						|
    vector<shared_ptr<CLQueuedKernel> > ckq;
 | 
						|
 | 
						|
    // loading
 | 
						|
    void load(const char *filename);
 | 
						|
  private:
 | 
						|
    void clinit();
 | 
						|
};
 | 
						|
 | 
						|
 |