openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

306 lines
11 KiB

#include <cassert>
#include <set>
#include "json11.hpp"
#include "common/util.h"
#include "common/clutil.h"
#include "selfdrive/modeld/thneed/thneed.h"
using namespace json11;
extern map<cl_program, string> g_program_source;
void Thneed::load(const char *filename) {
printf("Thneed::load: loading from %s\n", filename);
string buf = util::read_file(filename);
int jsz = *(int *)buf.data();
string jsonerr;
string jj(buf.data() + sizeof(int), jsz);
Json jdat = Json::parse(jj, jsonerr);
map<cl_mem, cl_mem> real_mem;
real_mem[NULL] = NULL;
int ptr = sizeof(int)+jsz;
for (auto &obj : jdat["objects"].array_items()) {
auto mobj = obj.object_items();
int sz = mobj["size"].int_value();
cl_mem clbuf = NULL;
if (mobj["buffer_id"].string_value().size() > 0) {
// image buffer must already be allocated
clbuf = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
assert(mobj["needs_load"].bool_value() == false);
} else {
if (mobj["needs_load"].bool_value()) {
clbuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sz, &buf[ptr], NULL);
if (debug >= 1) printf("loading %p %d @ 0x%X\n", clbuf, sz, ptr);
ptr += sz;
} else {
// TODO: is there a faster way to init zeroed out buffers?
void *host_zeros = calloc(sz, 1);
clbuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sz, host_zeros, NULL);
free(host_zeros);
}
}
assert(clbuf != NULL);
if (mobj["arg_type"] == "image2d_t" || mobj["arg_type"] == "image1d_t") {
cl_image_desc desc = {0};
desc.image_type = (mobj["arg_type"] == "image2d_t") ? CL_MEM_OBJECT_IMAGE2D : CL_MEM_OBJECT_IMAGE1D_BUFFER;
desc.image_width = mobj["width"].int_value();
desc.image_height = mobj["height"].int_value();
desc.image_row_pitch = mobj["row_pitch"].int_value();
assert(sz == desc.image_height*desc.image_row_pitch);
#ifdef QCOM2
desc.buffer = clbuf;
#else
// TODO: we are creating unused buffers on PC
clReleaseMemObject(clbuf);
#endif
cl_image_format format = {0};
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = mobj["float32"].bool_value() ? CL_FLOAT : CL_HALF_FLOAT;
cl_int errcode;
#ifndef QCOM2
if (mobj["needs_load"].bool_value()) {
clbuf = clCreateImage(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, &format, &desc, &buf[ptr-sz], &errcode);
} else {
clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &errcode);
}
#else
clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &errcode);
#endif
if (clbuf == NULL) {
printf("clError: %s create image %zux%zu rp %zu with buffer %p\n", cl_get_error_string(errcode),
desc.image_width, desc.image_height, desc.image_row_pitch, desc.buffer
);
}
assert(clbuf != NULL);
}
real_mem[*(cl_mem*)(mobj["id"].string_value().data())] = clbuf;
}
map<string, cl_program> g_programs;
for (const auto &[name, source] : jdat["programs"].object_items()) {
if (debug >= 1) printf("building %s with size %zu\n", name.c_str(), source.string_value().size());
g_programs[name] = cl_program_from_source(context, device_id, source.string_value());
}
for (auto &obj : jdat["inputs"].array_items()) {
auto mobj = obj.object_items();
int sz = mobj["size"].int_value();
cl_mem aa = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
input_clmem.push_back(aa);
input_sizes.push_back(sz);
printf("Thneed::load: adding input %s with size %d\n", mobj["name"].string_value().data(), sz);
cl_int cl_err;
void *ret = clEnqueueMapBuffer(command_queue, aa, CL_TRUE, CL_MAP_WRITE, 0, sz, 0, NULL, NULL, &cl_err);
if (cl_err != CL_SUCCESS) printf("clError: %s map %p %d\n", cl_get_error_string(cl_err), aa, sz);
assert(cl_err == CL_SUCCESS);
inputs.push_back(ret);
}
for (auto &obj : jdat["outputs"].array_items()) {
auto mobj = obj.object_items();
int sz = mobj["size"].int_value();
printf("Thneed::save: adding output with size %d\n", sz);
// TODO: support multiple outputs
output = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
assert(output != NULL);
}
for (auto &obj : jdat["binaries"].array_items()) {
string name = obj["name"].string_value();
size_t length = obj["length"].int_value();
if (debug >= 1) printf("binary %s with size %zu\n", name.c_str(), length);
g_programs[name] = cl_program_from_binary(context, device_id, (const uint8_t*)&buf[ptr], length);
ptr += length;
}
for (auto &obj : jdat["kernels"].array_items()) {
auto gws = obj["global_work_size"];
auto lws = obj["local_work_size"];
auto kk = shared_ptr<CLQueuedKernel>(new CLQueuedKernel(this));
kk->name = obj["name"].string_value();
kk->program = g_programs[kk->name];
kk->work_dim = obj["work_dim"].int_value();
for (int i = 0; i < kk->work_dim; i++) {
kk->global_work_size[i] = gws[i].int_value();
kk->local_work_size[i] = lws[i].int_value();
}
kk->num_args = obj["num_args"].int_value();
for (int i = 0; i < kk->num_args; i++) {
string arg = obj["args"].array_items()[i].string_value();
int arg_size = obj["args_size"].array_items()[i].int_value();
kk->args_size.push_back(arg_size);
if (arg_size == 8) {
cl_mem val = *(cl_mem*)(arg.data());
val = real_mem[val];
kk->args.push_back(string((char*)&val, sizeof(val)));
} else {
kk->args.push_back(arg);
}
}
kq.push_back(kk);
}
clFinish(command_queue);
}
void Thneed::save(const char *filename, bool save_binaries) {
printf("Thneed::save: saving to %s\n", filename);
// get kernels
std::vector<Json> kernels;
std::set<string> saved_objects;
std::vector<Json> objects;
std::map<string, string> programs;
std::map<string, string> binaries;
for (auto &k : kq) {
kernels.push_back(k->to_json());
// check args for objects
int i = 0;
for (auto &a : k->args) {
if (a.size() == 8) {
if (saved_objects.find(a) == saved_objects.end()) {
saved_objects.insert(a);
cl_mem val = *(cl_mem*)(a.data());
if (val != NULL) {
bool needs_load = k->arg_names[i] == "weights" || k->arg_names[i] == "biases";
auto jj = Json::object({
{"id", a},
{"arg_type", k->arg_types[i]},
});
if (k->arg_types[i] == "image2d_t" || k->arg_types[i] == "image1d_t") {
cl_mem buf = NULL;
clGetImageInfo(val, CL_IMAGE_BUFFER, sizeof(buf), &buf, NULL);
string aa = string((char *)&buf, sizeof(buf));
jj["buffer_id"] = aa;
size_t width, height, row_pitch;
clGetImageInfo(val, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
clGetImageInfo(val, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
clGetImageInfo(val, CL_IMAGE_ROW_PITCH, sizeof(row_pitch), &row_pitch, NULL);
jj["width"] = (int)width;
jj["height"] = (int)height;
jj["row_pitch"] = (int)row_pitch;
jj["size"] = (int)(height * row_pitch);
jj["needs_load"] = false;
jj["float32"] = false;
if (saved_objects.find(aa) == saved_objects.end()) {
saved_objects.insert(aa);
size_t sz;
clGetMemObjectInfo(buf, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
// save the buffer
objects.push_back(Json::object({
{"id", aa},
{"arg_type", "<image buffer>"},
{"needs_load", needs_load},
{"size", (int)sz}
}));
if (needs_load) assert(sz == height * row_pitch);
}
} else {
size_t sz = 0;
clGetMemObjectInfo(val, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
jj["size"] = (int)sz;
jj["needs_load"] = needs_load;
}
objects.push_back(jj);
}
}
}
i++;
}
if (save_binaries) {
int err;
size_t binary_size = 0;
err = clGetProgramInfo(k->program, CL_PROGRAM_BINARY_SIZES, sizeof(binary_size), &binary_size, NULL);
assert(err == 0);
assert(binary_size > 0);
string sv(binary_size, '\x00');
uint8_t* bufs[1] = { (uint8_t*)sv.data(), };
err = clGetProgramInfo(k->program, CL_PROGRAM_BINARIES, sizeof(bufs), &bufs, NULL);
assert(err == 0);
binaries[k->name] = sv;
} else {
programs[k->name] = g_program_source[k->program];
}
}
vector<string> saved_buffers;
for (auto &obj : objects) {
auto mobj = obj.object_items();
cl_mem val = *(cl_mem*)(mobj["id"].string_value().data());
int sz = mobj["size"].int_value();
if (mobj["needs_load"].bool_value()) {
char *buf = (char *)malloc(sz);
if (mobj["arg_type"] == "image2d_t" || mobj["arg_type"] == "image1d_t") {
assert(false);
} else {
// buffers allocated with CL_MEM_HOST_WRITE_ONLY, hence this hack
//hexdump((uint32_t*)val, 0x100);
// the worst hack in thneed, the flags are at 0x14
((uint32_t*)val)[0x14] &= ~CL_MEM_HOST_WRITE_ONLY;
cl_int ret = clEnqueueReadBuffer(command_queue, val, CL_TRUE, 0, sz, buf, 0, NULL, NULL);
assert(ret == CL_SUCCESS);
}
//printf("saving buffer: %d %p %s\n", sz, buf, mobj["arg_type"].string_value().c_str());
saved_buffers.push_back(string(buf, sz));
free(buf);
}
}
std::vector<Json> jbinaries;
for (auto &obj : binaries) {
jbinaries.push_back(Json::object({{"name", obj.first}, {"length", (int)obj.second.size()}}));
saved_buffers.push_back(obj.second);
}
Json jdat = Json::object({
{"kernels", kernels},
{"objects", objects},
{"programs", programs},
{"binaries", jbinaries},
});
string str = jdat.dump();
int jsz = str.length();
FILE *f = fopen(filename, "wb");
fwrite(&jsz, 1, sizeof(jsz), f);
fwrite(str.data(), 1, jsz, f);
for (auto &s : saved_buffers) {
fwrite(s.data(), 1, s.length(), f);
}
fclose(f);
}
Json CLQueuedKernel::to_json() const {
return Json::object {
{ "name", name },
{ "work_dim", (int)work_dim },
{ "global_work_size", Json::array { (int)global_work_size[0], (int)global_work_size[1], (int)global_work_size[2] } },
{ "local_work_size", Json::array { (int)local_work_size[0], (int)local_work_size[1], (int)local_work_size[2] } },
{ "num_args", (int)num_args },
{ "args", args },
{ "args_size", args_size },
};
}