openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

258 lines
7.9 KiB

#include "selfdrive/modeld/thneed/thneed.h"
#include <dlfcn.h>
#include <sys/mman.h>
#include <cassert>
#include <cerrno>
#include <cstring>
#include <map>
#include <string>
#include "common/clutil.h"
#include "common/timing.h"
Thneed *g_thneed = NULL;
int g_fd = -1;
void hexdump(uint8_t *d, int len) {
assert((len%4) == 0);
printf(" dumping %p len 0x%x\n", d, len);
for (int i = 0; i < len/4; i++) {
if (i != 0 && (i%0x10) == 0) printf("\n");
printf("%8x ", d[i]);
}
printf("\n");
}
// *********** ioctl interceptor ***********
extern "C" {
int (*my_ioctl)(int filedes, unsigned long request, void *argp) = NULL;
#undef ioctl
int ioctl(int filedes, unsigned long request, void *argp) {
request &= 0xFFFFFFFF; // needed on QCOM2
if (my_ioctl == NULL) my_ioctl = reinterpret_cast<decltype(my_ioctl)>(dlsym(RTLD_NEXT, "ioctl"));
Thneed *thneed = g_thneed;
// save the fd
if (request == IOCTL_KGSL_GPUOBJ_ALLOC) g_fd = filedes;
// note that this runs always, even without a thneed object
if (request == IOCTL_KGSL_DRAWCTXT_CREATE) {
struct kgsl_drawctxt_create *create = (struct kgsl_drawctxt_create *)argp;
create->flags &= ~KGSL_CONTEXT_PRIORITY_MASK;
create->flags |= 6 << KGSL_CONTEXT_PRIORITY_SHIFT; // priority from 1-15, 1 is max priority
printf("IOCTL_KGSL_DRAWCTXT_CREATE: creating context with flags 0x%x\n", create->flags);
}
if (thneed != NULL) {
if (request == IOCTL_KGSL_GPU_COMMAND) {
struct kgsl_gpu_command *cmd = (struct kgsl_gpu_command *)argp;
if (thneed->record) {
thneed->timestamp = cmd->timestamp;
thneed->context_id = cmd->context_id;
thneed->cmds.push_back(unique_ptr<CachedCommand>(new CachedCommand(thneed, cmd)));
}
if (thneed->debug >= 1) {
printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx context_id: %u timestamp: %u numcmds: %d numobjs: %d\n",
thneed->cmds.size(),
cmd->flags,
cmd->context_id, cmd->timestamp, cmd->numcmds, cmd->numobjs);
}
} else if (request == IOCTL_KGSL_GPUOBJ_SYNC) {
struct kgsl_gpuobj_sync *cmd = (struct kgsl_gpuobj_sync *)argp;
struct kgsl_gpuobj_sync_obj *objs = (struct kgsl_gpuobj_sync_obj *)(cmd->objs);
if (thneed->debug >= 2) {
printf("IOCTL_KGSL_GPUOBJ_SYNC count:%d ", cmd->count);
for (int i = 0; i < cmd->count; i++) {
printf(" -- offset:0x%lx len:0x%lx id:%d op:%d ", objs[i].offset, objs[i].length, objs[i].id, objs[i].op);
}
printf("\n");
}
if (thneed->record) {
thneed->cmds.push_back(unique_ptr<CachedSync>(new
CachedSync(thneed, string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count))));
}
} else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) {
struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp;
if (thneed->debug >= 1) {
printf("IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID: context_id: %d timestamp: %d timeout: %d\n",
cmd->context_id, cmd->timestamp, cmd->timeout);
}
} else if (request == IOCTL_KGSL_SETPROPERTY) {
if (thneed->debug >= 1) {
struct kgsl_device_getproperty *prop = (struct kgsl_device_getproperty *)argp;
printf("IOCTL_KGSL_SETPROPERTY: 0x%x sizebytes:%zu\n", prop->type, prop->sizebytes);
if (thneed->debug >= 2) {
hexdump((uint8_t *)prop->value, prop->sizebytes);
if (prop->type == KGSL_PROP_PWR_CONSTRAINT) {
struct kgsl_device_constraint *constraint = (struct kgsl_device_constraint *)prop->value;
hexdump((uint8_t *)constraint->data, constraint->size);
}
}
}
} else if (request == IOCTL_KGSL_DRAWCTXT_CREATE || request == IOCTL_KGSL_DRAWCTXT_DESTROY) {
// this happens
} else if (request == IOCTL_KGSL_GPUOBJ_ALLOC || request == IOCTL_KGSL_GPUOBJ_FREE) {
// this happens
} else {
if (thneed->debug >= 1) {
printf("other ioctl %lx\n", request);
}
}
}
int ret = my_ioctl(filedes, request, argp);
// NOTE: This error message goes into stdout and messes up pyenv
// if (ret != 0) printf("ioctl returned %d with errno %d\n", ret, errno);
return ret;
}
}
// *********** GPUMalloc ***********
GPUMalloc::GPUMalloc(int size, int fd) {
struct kgsl_gpuobj_alloc alloc;
memset(&alloc, 0, sizeof(alloc));
alloc.size = size;
alloc.flags = 0x10000a00;
ioctl(fd, IOCTL_KGSL_GPUOBJ_ALLOC, &alloc);
void *addr = mmap64(NULL, alloc.mmapsize, 0x3, 0x1, fd, alloc.id*0x1000);
assert(addr != MAP_FAILED);
base = (uint64_t)addr;
remaining = size;
}
GPUMalloc::~GPUMalloc() {
// TODO: free the GPU malloced area
}
void *GPUMalloc::alloc(int size) {
void *ret = (void*)base;
size = (size+0xff) & (~0xFF);
assert(size <= remaining);
remaining -= size;
base += size;
return ret;
}
// *********** CachedSync, at the ioctl layer ***********
void CachedSync::exec() {
struct kgsl_gpuobj_sync cmd;
cmd.objs = (uint64_t)data.data();
cmd.obj_len = data.length();
cmd.count = data.length() / sizeof(struct kgsl_gpuobj_sync_obj);
int ret = ioctl(thneed->fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd);
assert(ret == 0);
}
// *********** CachedCommand, at the ioctl layer ***********
CachedCommand::CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd) {
thneed = lthneed;
assert(cmd->numsyncs == 0);
memcpy(&cache, cmd, sizeof(cache));
if (cmd->numcmds > 0) {
cmds = make_unique<struct kgsl_command_object[]>(cmd->numcmds);
memcpy(cmds.get(), (void *)cmd->cmdlist, sizeof(struct kgsl_command_object)*cmd->numcmds);
cache.cmdlist = (uint64_t)cmds.get();
for (int i = 0; i < cmd->numcmds; i++) {
void *nn = thneed->ram->alloc(cmds[i].size);
memcpy(nn, (void*)cmds[i].gpuaddr, cmds[i].size);
cmds[i].gpuaddr = (uint64_t)nn;
}
}
if (cmd->numobjs > 0) {
objs = make_unique<struct kgsl_command_object[]>(cmd->numobjs);
memcpy(objs.get(), (void *)cmd->objlist, sizeof(struct kgsl_command_object)*cmd->numobjs);
cache.objlist = (uint64_t)objs.get();
for (int i = 0; i < cmd->numobjs; i++) {
void *nn = thneed->ram->alloc(objs[i].size);
memset(nn, 0, objs[i].size);
objs[i].gpuaddr = (uint64_t)nn;
}
}
kq = thneed->ckq;
thneed->ckq.clear();
}
void CachedCommand::exec() {
cache.timestamp = ++thneed->timestamp;
int ret = ioctl(thneed->fd, IOCTL_KGSL_GPU_COMMAND, &cache);
if (thneed->debug >= 1) printf("CachedCommand::exec got %d\n", ret);
if (thneed->debug >= 2) {
for (auto &it : kq) {
it->debug_print(false);
}
}
assert(ret == 0);
}
// *********** Thneed ***********
Thneed::Thneed(bool do_clinit, cl_context _context) {
// TODO: QCOM2 actually requires a different context
//context = _context;
if (do_clinit) clinit();
assert(g_fd != -1);
fd = g_fd;
ram = make_unique<GPUMalloc>(0x80000, fd);
timestamp = -1;
g_thneed = this;
char *thneed_debug_env = getenv("THNEED_DEBUG");
debug = (thneed_debug_env != NULL) ? atoi(thneed_debug_env) : 0;
}
void Thneed::wait() {
struct kgsl_device_waittimestamp_ctxtid wait;
wait.context_id = context_id;
wait.timestamp = timestamp;
wait.timeout = -1;
uint64_t tb = nanos_since_boot();
int wret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
uint64_t te = nanos_since_boot();
if (debug >= 1) printf("wait %d after %lu us\n", wret, (te-tb)/1000);
}
void Thneed::execute(float **finputs, float *foutput, bool slow) {
uint64_t tb, te;
if (debug >= 1) tb = nanos_since_boot();
// ****** copy inputs
copy_inputs(finputs, true);
// ****** run commands
int i = 0;
for (auto &it : cmds) {
++i;
if (debug >= 1) printf("run %2d @ %7lu us: ", i, (nanos_since_boot()-tb)/1000);
it->exec();
if ((i == cmds.size()) || slow) wait();
}
// ****** copy outputs
copy_output(foutput);
if (debug >= 1) {
te = nanos_since_boot();
printf("model exec in %lu us\n", (te-tb)/1000);
}
}