openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

259 lines
8.6 KiB

#include <cassert>
#include <set>
#include "json11.hpp"
#include "common/util.h"
#include "common/clutil.h"
#include "selfdrive/modeld/thneed/thneed.h"
using namespace json11;
extern map<cl_program, string> g_program_source;
void Thneed::load(const char *filename) {
printf("Thneed::load: loading from %s\n", filename);
string buf = util::read_file(filename);
int jsz = *(int *)buf.data();
string err;
string jj(buf.data() + sizeof(int), jsz);
Json jdat = Json::parse(jj, err);
map<cl_mem, cl_mem> real_mem;
real_mem[NULL] = NULL;
int ptr = sizeof(int)+jsz;
for (auto &obj : jdat["objects"].array_items()) {
auto mobj = obj.object_items();
int sz = mobj["size"].int_value();
cl_mem clbuf = NULL;
if (mobj["buffer_id"].string_value().size() > 0) {
// image buffer must already be allocated
clbuf = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
assert(mobj["needs_load"].bool_value() == false);
} else {
if (mobj["needs_load"].bool_value()) {
//printf("loading %p %d @ 0x%X\n", clbuf, sz, ptr);
clbuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sz, &buf[ptr], NULL);
ptr += sz;
} else {
clbuf = clCreateBuffer(context, CL_MEM_READ_WRITE, sz, NULL, NULL);
}
}
assert(clbuf != NULL);
if (mobj["arg_type"] == "image2d_t" || mobj["arg_type"] == "image1d_t") {
cl_image_desc desc = {0};
desc.image_type = (mobj["arg_type"] == "image2d_t") ? CL_MEM_OBJECT_IMAGE2D : CL_MEM_OBJECT_IMAGE1D_BUFFER;
desc.image_width = mobj["width"].int_value();
desc.image_height = mobj["height"].int_value();
desc.image_row_pitch = mobj["row_pitch"].int_value();
desc.buffer = clbuf;
cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_HALF_FLOAT;
clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, NULL);
assert(clbuf != NULL);
}
real_mem[*(cl_mem*)(mobj["id"].string_value().data())] = clbuf;
}
map<string, cl_program> g_programs;
for (const auto &[name, source] : jdat["programs"].object_items()) {
if (debug >= 1) printf("building %s with size %zu\n", name.c_str(), source.string_value().size());
g_programs[name] = cl_program_from_source(context, device_id, source.string_value());
}
for (auto &obj : jdat["binaries"].array_items()) {
string name = obj["name"].string_value();
size_t length = obj["length"].int_value();
if (debug >= 1) printf("binary %s with size %zu\n", name.c_str(), length);
g_programs[name] = cl_program_from_binary(context, device_id, (const uint8_t*)&buf[ptr], length);
ptr += length;
}
for (auto &obj : jdat["kernels"].array_items()) {
auto gws = obj["global_work_size"];
auto lws = obj["local_work_size"];
auto kk = shared_ptr<CLQueuedKernel>(new CLQueuedKernel(this));
kk->name = obj["name"].string_value();
kk->program = g_programs[kk->name];
kk->work_dim = obj["work_dim"].int_value();
for (int i = 0; i < kk->work_dim; i++) {
kk->global_work_size[i] = gws[i].int_value();
kk->local_work_size[i] = lws[i].int_value();
}
kk->num_args = obj["num_args"].int_value();
for (int i = 0; i < kk->num_args; i++) {
string arg = obj["args"].array_items()[i].string_value();
int arg_size = obj["args_size"].array_items()[i].int_value();
kk->args_size.push_back(arg_size);
if (arg_size == 8) {
cl_mem val = *(cl_mem*)(arg.data());
val = real_mem[val];
kk->args.push_back(string((char*)&val, sizeof(val)));
} else {
kk->args.push_back(arg);
}
}
kq.push_back(kk);
}
clFinish(command_queue);
}
void Thneed::save(const char *filename, bool save_binaries) {
printf("Thneed::save: saving to %s\n", filename);
// get kernels
std::vector<Json> kernels;
std::set<string> saved_objects;
std::vector<Json> objects;
std::map<string, string> programs;
std::map<string, string> binaries;
for (auto &k : kq) {
kernels.push_back(k->to_json());
// check args for objects
int i = 0;
for (auto &a : k->args) {
if (a.size() == 8) {
if (saved_objects.find(a) == saved_objects.end()) {
saved_objects.insert(a);
cl_mem val = *(cl_mem*)(a.data());
if (val != NULL) {
bool needs_load = k->arg_names[i] == "weights" || k->arg_names[i] == "biases";
auto jj = Json::object({
{"id", a},
{"arg_type", k->arg_types[i]},
});
if (k->arg_types[i] == "image2d_t" || k->arg_types[i] == "image1d_t") {
cl_mem buf;
clGetImageInfo(val, CL_IMAGE_BUFFER, sizeof(buf), &buf, NULL);
string aa = string((char *)&buf, sizeof(buf));
jj["buffer_id"] = aa;
size_t width, height, row_pitch;
clGetImageInfo(val, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
clGetImageInfo(val, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
clGetImageInfo(val, CL_IMAGE_ROW_PITCH, sizeof(row_pitch), &row_pitch, NULL);
jj["width"] = (int)width;
jj["height"] = (int)height;
jj["row_pitch"] = (int)row_pitch;
jj["size"] = (int)(height * row_pitch);
jj["needs_load"] = false;
if (saved_objects.find(aa) == saved_objects.end()) {
saved_objects.insert(aa);
size_t sz;
clGetMemObjectInfo(buf, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
// save the buffer
objects.push_back(Json::object({
{"id", aa},
{"arg_type", "<image buffer>"},
{"needs_load", needs_load},
{"size", (int)sz}
}));
if (needs_load) assert(sz == height * row_pitch);
}
} else {
size_t sz = 0;
clGetMemObjectInfo(val, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
jj["size"] = (int)sz;
jj["needs_load"] = needs_load;
}
objects.push_back(jj);
}
}
}
i++;
}
if (save_binaries) {
int err;
size_t binary_size = 0;
err = clGetProgramInfo(k->program, CL_PROGRAM_BINARY_SIZES, sizeof(binary_size), &binary_size, NULL);
assert(err == 0);
assert(binary_size > 0);
string sv(binary_size, '\x00');
uint8_t* bufs[1] = { (uint8_t*)sv.data(), };
err = clGetProgramInfo(k->program, CL_PROGRAM_BINARIES, sizeof(bufs), &bufs, NULL);
assert(err == 0);
binaries[k->name] = sv;
} else {
programs[k->name] = g_program_source[k->program];
}
}
vector<string> saved_buffers;
for (auto &obj : objects) {
auto mobj = obj.object_items();
cl_mem val = *(cl_mem*)(mobj["id"].string_value().data());
int sz = mobj["size"].int_value();
if (mobj["needs_load"].bool_value()) {
char *buf = (char *)malloc(sz);
if (mobj["arg_type"] == "image2d_t" || mobj["arg_type"] == "image1d_t") {
assert(false);
} else {
// buffers allocated with CL_MEM_HOST_WRITE_ONLY, hence this hack
//hexdump((uint32_t*)val, 0x100);
// the worst hack in thneed, the flags are at 0x14
((uint32_t*)val)[0x14] &= ~CL_MEM_HOST_WRITE_ONLY;
cl_int ret = clEnqueueReadBuffer(command_queue, val, CL_TRUE, 0, sz, buf, 0, NULL, NULL);
assert(ret == CL_SUCCESS);
}
//printf("saving buffer: %d %p %s\n", sz, buf, mobj["arg_type"].string_value().c_str());
saved_buffers.push_back(string(buf, sz));
free(buf);
}
}
std::vector<Json> jbinaries;
for (auto &obj : binaries) {
jbinaries.push_back(Json::object({{"name", obj.first}, {"length", (int)obj.second.size()}}));
saved_buffers.push_back(obj.second);
}
Json jdat = Json::object({
{"kernels", kernels},
{"objects", objects},
{"programs", programs},
{"binaries", jbinaries},
});
string str = jdat.dump();
int jsz = str.length();
FILE *f = fopen(filename, "wb");
fwrite(&jsz, 1, sizeof(jsz), f);
fwrite(str.data(), 1, jsz, f);
for (auto &s : saved_buffers) {
fwrite(s.data(), 1, s.length(), f);
}
fclose(f);
}
Json CLQueuedKernel::to_json() const {
return Json::object {
{ "name", name },
{ "work_dim", (int)work_dim },
{ "global_work_size", Json::array { (int)global_work_size[0], (int)global_work_size[1], (int)global_work_size[2] } },
{ "local_work_size", Json::array { (int)local_work_size[0], (int)local_work_size[1], (int)local_work_size[2] } },
{ "num_args", (int)num_args },
{ "args", args },
{ "args_size", args_size },
};
}