nv12: encoderd avoids a full frame copy (#24519)

* rgb to nv12

* nv12 works (w memcpy)

* correct now

* no copy

* fix nv12 with fast debayer

* reverts of unused stuff

* ui use nv12

* comment out thumbnails for now

* rebase fix

* dm read nv12

* model read nv12

* fix ffmpeg encoder

* thumbnails from nv12

* replay to nv12

* python framereader support nv12

* remove hardcoded frame/buffer sizes

* fix build

* ffmpeg encoder fix buffers

* small cleanup

* reduce power usage test

* fix cpu usage test

* fix snapshot

* fix loggerd test

* bump cereal

Co-authored-by: Comma Device <device@comma.ai>
Co-authored-by: Joost Wooning <jwooning@gmail.com>
pull/24696/head
George Hotz 3 years ago committed by GitHub
parent 2b9f8662cc
commit ea5b8cdfb1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      cereal
  2. 43
      selfdrive/camerad/cameras/camera_common.cc
  3. 13
      selfdrive/camerad/cameras/real_debayer.cl
  4. 12
      selfdrive/camerad/snapshot/snapshot.py
  5. 2
      selfdrive/hardware/tici/test_power_draw.py
  6. 3
      selfdrive/loggerd/encoder/encoder.h
  7. 31
      selfdrive/loggerd/encoder/ffmpeg_encoder.cc
  8. 4
      selfdrive/loggerd/encoder/ffmpeg_encoder.h
  9. 35
      selfdrive/loggerd/encoder/v4l_encoder.cc
  10. 3
      selfdrive/loggerd/encoder/v4l_encoder.h
  11. 3
      selfdrive/loggerd/encoderd.cc
  12. 20
      selfdrive/loggerd/tests/test_loggerd.py
  13. 2
      selfdrive/modeld/dmonitoringmodeld.cc
  14. 4
      selfdrive/modeld/models/commonmodel.cc
  15. 2
      selfdrive/modeld/models/commonmodel.h
  16. 21
      selfdrive/modeld/models/dmonitoring.cc
  17. 2
      selfdrive/modeld/models/dmonitoring.h
  18. 4
      selfdrive/modeld/models/driving.cc
  19. 56
      selfdrive/modeld/transforms/transform.cc
  20. 14
      selfdrive/modeld/transforms/transform.cl
  21. 2
      selfdrive/modeld/transforms/transform.h
  22. 2
      selfdrive/test/process_replay/model_replay.py
  23. 2
      selfdrive/test/process_replay/regen.py
  24. 4
      selfdrive/test/test_onroad.py
  25. 53
      selfdrive/ui/qt/widgets/cameraview.cc
  26. 1
      selfdrive/ui/qt/widgets/cameraview.h
  27. 19
      selfdrive/ui/replay/framereader.cc
  28. 42
      tools/lib/framereader.py

@ -1 +1 @@
Subproject commit 93a2560618ef41bc256f7a0331c8b047863d4d69
Subproject commit bf4960f83ccdefc7e9d2e405878ae6c8efced83b

@ -16,6 +16,7 @@
#include "common/swaglog.h"
#include "common/util.h"
#include "selfdrive/hardware/hw.h"
#include "msm_media_info.h"
#ifdef QCOM2
#include "CL/cl_ext_qcom.h"
@ -28,17 +29,17 @@ ExitHandler do_exit;
class Debayer {
public:
Debayer(cl_device_id device_id, cl_context context, const CameraBuf *b, const CameraState *s) {
Debayer(cl_device_id device_id, cl_context context, const CameraBuf *b, const CameraState *s, int buf_width, int uv_offset) {
char args[4096];
const CameraInfo *ci = &s->ci;
hdr_ = ci->hdr;
snprintf(args, sizeof(args),
"-cl-fast-relaxed-math -cl-denorms-are-zero "
"-DFRAME_WIDTH=%d -DFRAME_HEIGHT=%d -DFRAME_STRIDE=%d -DFRAME_OFFSET=%d "
"-DRGB_WIDTH=%d -DRGB_HEIGHT=%d -DRGB_STRIDE=%d "
"-DRGB_WIDTH=%d -DRGB_HEIGHT=%d -DRGB_STRIDE=%d -DYUV_STRIDE=%d -DUV_OFFSET=%d "
"-DBAYER_FLIP=%d -DHDR=%d -DCAM_NUM=%d%s",
ci->frame_width, ci->frame_height, ci->frame_stride, ci->frame_offset,
b->rgb_width, b->rgb_height, b->rgb_stride,
b->rgb_width, b->rgb_height, b->rgb_stride, buf_width, uv_offset,
ci->bayer_flip, ci->hdr, s->camera_num, s->camera_num==1 ? " -DVIGNETTING" : "");
const char *cl_file = "cameras/real_debayer.cl";
cl_program prg_debayer = cl_program_from_file(context, device_id, cl_file, args);
@ -101,11 +102,17 @@ void CameraBuf::init(cl_device_id device_id, cl_context context, CameraState *s,
rgb_stride = vipc_server->get_buffer(rgb_type)->stride;
LOGD("created %d UI vipc buffers with size %dx%d", UI_BUF_COUNT, rgb_width, rgb_height);
vipc_server->create_buffers(yuv_type, YUV_BUFFER_COUNT, false, rgb_width, rgb_height);
LOGD("created %d YUV vipc buffers with size %dx%d", YUV_BUFFER_COUNT, rgb_width, rgb_height);
int nv12_width = VENUS_Y_STRIDE(COLOR_FMT_NV12, rgb_width);
int nv12_height = VENUS_Y_SCANLINES(COLOR_FMT_NV12, rgb_height);
assert(nv12_width == VENUS_UV_STRIDE(COLOR_FMT_NV12, rgb_width));
assert(nv12_height/2 == VENUS_UV_SCANLINES(COLOR_FMT_NV12, rgb_height));
size_t nv12_size = 2346 * nv12_width; // comes from v4l2_format.fmt.pix_mp.plane_fmt[0].sizeimage
size_t nv12_uv_offset = nv12_width * nv12_height;
vipc_server->create_buffers_with_sizes(yuv_type, YUV_BUFFER_COUNT, false, rgb_width, rgb_height, nv12_size, nv12_width, nv12_uv_offset);
LOGD("created %d YUV vipc buffers with size %dx%d", YUV_BUFFER_COUNT, nv12_width, nv12_height);
if (ci->bayer) {
debayer = new Debayer(device_id, context, this, s);
debayer = new Debayer(device_id, context, this, s, nv12_width, nv12_uv_offset);
}
rgb2yuv = std::make_unique<Rgb2Yuv>(context, device_id, rgb_width, rgb_height, rgb_stride);
@ -233,20 +240,28 @@ kj::Array<uint8_t> get_raw_frame_image(const CameraBuf *b) {
}
static kj::Array<capnp::byte> yuv420_to_jpeg(const CameraBuf *b, int thumbnail_width, int thumbnail_height) {
int downscale = b->cur_yuv_buf->width / thumbnail_width;
assert(downscale * thumbnail_height == b->cur_yuv_buf->height);
int in_stride = b->cur_yuv_buf->stride;
// make the buffer big enough. jpeg_write_raw_data requires 16-pixels aligned height to be used.
std::unique_ptr<uint8[]> buf(new uint8_t[(thumbnail_width * ((thumbnail_height + 15) & ~15) * 3) / 2]);
uint8_t *y_plane = buf.get();
uint8_t *u_plane = y_plane + thumbnail_width * thumbnail_height;
uint8_t *v_plane = u_plane + (thumbnail_width * thumbnail_height) / 4;
{
int result = libyuv::I420Scale(
b->cur_yuv_buf->y, b->rgb_width, b->cur_yuv_buf->u, b->rgb_width / 2, b->cur_yuv_buf->v, b->rgb_width / 2,
b->rgb_width, b->rgb_height,
y_plane, thumbnail_width, u_plane, thumbnail_width / 2, v_plane, thumbnail_width / 2,
thumbnail_width, thumbnail_height, libyuv::kFilterNone);
if (result != 0) {
LOGE("Generate YUV thumbnail failed.");
return {};
// subsampled conversion from nv12 to yuv
for (int hy = 0; hy < thumbnail_height/2; hy++) {
for (int hx = 0; hx < thumbnail_width/2; hx++) {
int ix = hx * downscale + (downscale-1)/2;
int iy = hy * downscale + (downscale-1)/2;
y_plane[(hy*2 + 0)*thumbnail_width + (hx*2 + 0)] = b->cur_yuv_buf->y[(iy*2 + 0) * in_stride + ix*2 + 0];
y_plane[(hy*2 + 0)*thumbnail_width + (hx*2 + 1)] = b->cur_yuv_buf->y[(iy*2 + 0) * in_stride + ix*2 + 1];
y_plane[(hy*2 + 1)*thumbnail_width + (hx*2 + 0)] = b->cur_yuv_buf->y[(iy*2 + 1) * in_stride + ix*2 + 0];
y_plane[(hy*2 + 1)*thumbnail_width + (hx*2 + 1)] = b->cur_yuv_buf->y[(iy*2 + 1) * in_stride + ix*2 + 1];
u_plane[hy*thumbnail_width/2 + hx] = b->cur_yuv_buf->uv[iy*in_stride + ix*2 + 0];
v_plane[hy*thumbnail_width/2 + hx] = b->cur_yuv_buf->uv[iy*in_stride + ix*2 + 1];
}
}
}

@ -1,7 +1,5 @@
#define UV_WIDTH RGB_WIDTH / 2
#define UV_HEIGHT RGB_HEIGHT / 2
#define U_OFFSET RGB_WIDTH * RGB_HEIGHT
#define V_OFFSET RGB_WIDTH * RGB_HEIGHT + UV_WIDTH * UV_HEIGHT
#define RGB_TO_Y(r, g, b) ((((mul24(b, 13) + mul24(g, 65) + mul24(r, 33)) + 64) >> 7) + 16)
#define RGB_TO_U(r, g, b) ((mul24(b, 56) - mul24(g, 37) - mul24(r, 19) + 0x8080) >> 8)
@ -141,17 +139,20 @@ __kernel void debayer10(const __global uchar * in, __global uchar * out)
RGB_TO_Y(rgb_out[0].s0, rgb_out[0].s1, rgb_out[0].s2),
RGB_TO_Y(rgb_out[1].s0, rgb_out[1].s1, rgb_out[1].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2, RGB_WIDTH, gid_x * 2));
vstore2(yy, 0, out + mad24(gid_y * 2, YUV_STRIDE, gid_x * 2));
yy = (uchar2)(
RGB_TO_Y(rgb_out[2].s0, rgb_out[2].s1, rgb_out[2].s2),
RGB_TO_Y(rgb_out[3].s0, rgb_out[3].s1, rgb_out[3].s2)
);
vstore2(yy, 0, out + mad24(gid_y * 2 + 1, RGB_WIDTH, gid_x * 2));
vstore2(yy, 0, out + mad24(gid_y * 2 + 1, YUV_STRIDE, gid_x * 2));
// write uvs
const short ar = AVERAGE(rgb_out[0].s0, rgb_out[1].s0, rgb_out[2].s0, rgb_out[3].s0);
const short ag = AVERAGE(rgb_out[0].s1, rgb_out[1].s1, rgb_out[2].s1, rgb_out[3].s1);
const short ab = AVERAGE(rgb_out[0].s2, rgb_out[1].s2, rgb_out[2].s2, rgb_out[3].s2);
out[U_OFFSET + mad24(gid_y, UV_WIDTH, gid_x)] = RGB_TO_U(ar, ag, ab);
out[V_OFFSET + mad24(gid_y, UV_WIDTH, gid_x)] = RGB_TO_V(ar, ag, ab);
uchar2 uv = (uchar2)(
RGB_TO_U(ar, ag, ab),
RGB_TO_V(ar, ag, ab)
);
vstore2(uv, 0, out + UV_OFFSET + mad24(gid_y, YUV_STRIDE, gid_x * 2));
}

@ -44,10 +44,10 @@ def yuv_to_rgb(y, u, v):
return rgb.astype(np.uint8)
def extract_image(buf, w, h):
y = np.array(buf[:w*h], dtype=np.uint8).reshape((h, w))
u = np.array(buf[w*h: w*h+(w//2)*(h//2)], dtype=np.uint8).reshape((h//2, w//2))
v = np.array(buf[w*h+(w//2)*(h//2):], dtype=np.uint8).reshape((h//2, w//2))
def extract_image(buf, w, h, stride, uv_offset):
y = np.array(buf[:uv_offset], dtype=np.uint8).reshape((-1, stride))[:h, :w]
u = np.array(buf[uv_offset::2], dtype=np.uint8).reshape((-1, stride//2))[:h//2, :w//2]
v = np.array(buf[uv_offset+1::2], dtype=np.uint8).reshape((-1, stride//2))[:h//2, :w//2]
return yuv_to_rgb(y, u, v)
@ -79,10 +79,10 @@ def get_snapshots(frame="roadCameraState", front_frame="driverCameraState", focu
rear, front = None, None
if frame is not None:
c = vipc_clients[frame]
rear = extract_image(c.recv(), c.width, c.height)
rear = extract_image(c.recv(), c.width, c.height, c.stride, c.uv_offset)
if front_frame is not None:
c = vipc_clients[front_frame]
front = extract_image(c.recv(), c.width, c.height)
front = extract_image(c.recv(), c.width, c.height, c.stride, c.uv_offset)
return rear, front

@ -22,7 +22,7 @@ PROCS = [
Proc('camerad', 2.25),
Proc('modeld', 0.95),
Proc('dmonitoringmodeld', 0.25),
Proc('encoderd', 0.42),
Proc('encoderd', 0.23),
]

@ -19,8 +19,7 @@ public:
: filename(filename), type(type), in_width(in_width), in_height(in_height), fps(fps),
bitrate(bitrate), codec(codec), out_width(out_width), out_height(out_height), write(write) { }
virtual ~VideoEncoder();
virtual int encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr,
int in_width, int in_height, VisionIpcBufExtra *extra) = 0;
virtual int encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra) = 0;
virtual void encoder_open(const char* path) = 0;
virtual void encoder_close() = 0;

@ -34,6 +34,8 @@ void FfmpegEncoder::encoder_init() {
frame->linesize[1] = out_width/2;
frame->linesize[2] = out_width/2;
convert_buf.resize(in_width * in_height * 3 / 2);
if (in_width != out_width || in_height != out_height) {
downscale_buf.resize(out_width * out_height * 3 / 2);
}
@ -70,18 +72,27 @@ void FfmpegEncoder::encoder_close() {
is_open = false;
}
int FfmpegEncoder::encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr,
int in_width_, int in_height_, VisionIpcBufExtra *extra) {
assert(in_width_ == this->in_width);
assert(in_height_ == this->in_height);
int FfmpegEncoder::encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra) {
assert(buf->width == this->in_width);
assert(buf->height == this->in_height);
uint8_t *cy = convert_buf.data();
uint8_t *cu = cy + in_width * in_height;
uint8_t *cv = cu + (in_width / 2) * (in_height / 2);
libyuv::NV12ToI420(buf->y, buf->stride,
buf->uv, buf->stride,
cy, in_width,
cu, in_width/2,
cv, in_width/2,
in_width, in_height);
if (downscale_buf.size() > 0) {
uint8_t *out_y = downscale_buf.data();
uint8_t *out_u = out_y + frame->width * frame->height;
uint8_t *out_v = out_u + (frame->width / 2) * (frame->height / 2);
libyuv::I420Scale(y_ptr, in_width,
u_ptr, in_width/2,
v_ptr, in_width/2,
libyuv::I420Scale(cy, in_width,
cu, in_width/2,
cv, in_width/2,
in_width, in_height,
out_y, frame->width,
out_u, frame->width/2,
@ -92,9 +103,9 @@ int FfmpegEncoder::encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, cons
frame->data[1] = out_u;
frame->data[2] = out_v;
} else {
frame->data[0] = (uint8_t*)y_ptr;
frame->data[1] = (uint8_t*)u_ptr;
frame->data[2] = (uint8_t*)v_ptr;
frame->data[0] = cy;
frame->data[1] = cu;
frame->data[2] = cv;
}
frame->pts = counter*50*1000; // 50ms per frame

@ -21,8 +21,7 @@ class FfmpegEncoder : public VideoEncoder {
VideoEncoder(filename, type, in_width, in_height, fps, bitrate, cereal::EncodeIndex::Type::BIG_BOX_LOSSLESS, out_width, out_height, write) { encoder_init(); }
~FfmpegEncoder();
void encoder_init();
int encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr,
int in_width_, int in_height_, VisionIpcBufExtra *extra);
int encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra);
void encoder_open(const char* path);
void encoder_close();
@ -33,5 +32,6 @@ private:
AVCodecContext *codec_ctx;
AVFrame *frame = NULL;
std::vector<uint8_t> convert_buf;
std::vector<uint8_t> downscale_buf;
};

@ -248,7 +248,6 @@ void V4LEncoder::encoder_init() {
}
// queue up input buffers
for (unsigned int i = 0; i < BUF_IN_COUNT; i++) {
buf_in[i].allocate(fmt_in.fmt.pix_mp.plane_fmt[0].sizeimage);
free_buf_in.push(i);
}
@ -262,41 +261,19 @@ void V4LEncoder::encoder_open(const char* path) {
this->counter = 0;
}
int V4LEncoder::encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr,
int in_width_, int in_height_, VisionIpcBufExtra *extra) {
assert(in_width == in_width_);
assert(in_height == in_height_);
assert(is_open);
// reserve buffer
int buffer_in = free_buf_in.pop();
uint8_t *in_y_ptr = (uint8_t*)buf_in[buffer_in].addr;
int in_y_stride = VENUS_Y_STRIDE(COLOR_FMT_NV12, in_width);
int in_uv_stride = VENUS_UV_STRIDE(COLOR_FMT_NV12, in_width);
uint8_t *in_uv_ptr = in_y_ptr + (in_y_stride * VENUS_Y_SCANLINES(COLOR_FMT_NV12, in_height));
// GRRR COPY
int err = libyuv::I420ToNV12(y_ptr, in_width,
u_ptr, in_width/2,
v_ptr, in_width/2,
in_y_ptr, in_y_stride,
in_uv_ptr, in_uv_stride,
in_width, in_height);
assert(err == 0);
int V4LEncoder::encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra) {
struct timeval timestamp {
.tv_sec = (long)(extra->timestamp_eof/1000000000),
.tv_usec = (long)((extra->timestamp_eof/1000) % 1000000),
};
// reserve buffer
int buffer_in = free_buf_in.pop();
// push buffer
extras.push(*extra);
buf_in[buffer_in].sync(VISIONBUF_SYNC_TO_DEVICE);
int bytesused = VENUS_Y_STRIDE(COLOR_FMT_NV12, in_width) * VENUS_Y_SCANLINES(COLOR_FMT_NV12, in_height) +
VENUS_UV_STRIDE(COLOR_FMT_NV12, in_width) * VENUS_UV_SCANLINES(COLOR_FMT_NV12, in_height);
queue_buffer(fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, buffer_in, &buf_in[buffer_in], timestamp, bytesused);
//buf->sync(VISIONBUF_SYNC_TO_DEVICE);
queue_buffer(fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, buffer_in, buf, timestamp);
return this->counter++;
}

@ -13,8 +13,7 @@ public:
VideoEncoder(filename, type, in_width, in_height, fps, bitrate, codec, out_width, out_height, write) { encoder_init(); }
~V4LEncoder();
void encoder_init();
int encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr,
int in_width, int in_height, VisionIpcBufExtra *extra);
int encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra);
void encoder_open(const char* path);
void encoder_close();
private:

@ -103,8 +103,7 @@ void encoder_thread(EncoderdState *s, const LogCameraInfo &cam_info) {
// encode a frame
for (int i = 0; i < encoders.size(); ++i) {
int out_id = encoders[i]->encode_frame(buf->y, buf->u, buf->v,
buf->width, buf->height, &extra);
int out_id = encoders[i]->encode_frame(buf, &extra);
if (out_id == -1) {
LOGE("Failed to encode frame. frame_id: %d", extra.frame_id);

@ -109,16 +109,19 @@ class TestLoggerd(unittest.TestCase):
Params().put("RecordFront", "1")
expected_files = {"rlog", "qlog", "qcamera.ts", "fcamera.hevc", "dcamera.hevc"}
streams = [(VisionStreamType.VISION_STREAM_ROAD, tici_f_frame_size if TICI else eon_f_frame_size, "roadCameraState"),
(VisionStreamType.VISION_STREAM_DRIVER, tici_d_frame_size if TICI else eon_d_frame_size, "driverCameraState")]
streams = [(VisionStreamType.VISION_STREAM_ROAD, (*tici_f_frame_size, 2048*2346, 2048, 2048*1216) if TICI else eon_f_frame_size, "roadCameraState"),
(VisionStreamType.VISION_STREAM_DRIVER, (*tici_d_frame_size, 2048*2346, 2048, 2048*1216) if TICI else eon_d_frame_size, "driverCameraState")]
if TICI:
expected_files.add("ecamera.hevc")
streams.append((VisionStreamType.VISION_STREAM_WIDE_ROAD, tici_e_frame_size, "wideRoadCameraState"))
streams.append((VisionStreamType.VISION_STREAM_WIDE_ROAD, (*tici_e_frame_size, 2048*2346, 2048, 2048*1216), "wideRoadCameraState"))
pm = messaging.PubMaster(["roadCameraState", "driverCameraState", "wideRoadCameraState"])
vipc_server = VisionIpcServer("camerad")
for stream_type, frame_size, _ in streams:
vipc_server.create_buffers(stream_type, 40, False, *(frame_size))
for stream_type, frame_spec, _ in streams:
if TICI:
vipc_server.create_buffers_with_sizes(stream_type, 40, False, *(frame_spec))
else:
vipc_server.create_buffers(stream_type, 40, False, *(frame_spec))
vipc_server.start_listener()
for _ in range(5):
@ -130,8 +133,11 @@ class TestLoggerd(unittest.TestCase):
fps = 20.0
for n in range(1, int(num_segs*length*fps)+1):
for stream_type, frame_size, state in streams:
dat = np.empty(int(frame_size[0]*frame_size[1]*3/2), dtype=np.uint8)
for stream_type, frame_spec, state in streams:
if TICI:
dat = np.empty(frame_spec[2], dtype=np.uint8)
else:
dat = np.empty(int(frame_spec[0]*frame_spec[1]*3/2), dtype=np.uint8)
vipc_server.send(stream_type, dat[:].flatten().tobytes(), n, n/fps, n/fps)
camera_state = messaging.new_message(state)

@ -31,7 +31,7 @@ void run_model(DMonitoringModelState &model, VisionIpcClient &vipc_client) {
}
double t1 = millis_since_boot();
DMonitoringResult res = dmonitoring_eval_frame(&model, buf->addr, buf->width, buf->height, calib);
DMonitoringResult res = dmonitoring_eval_frame(&model, buf->addr, buf->width, buf->height, buf->stride, buf->uv_offset, calib);
double t2 = millis_since_boot();
// send dm packet

@ -22,9 +22,9 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
}
float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, const mat3 &projection, cl_mem *output) {
float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
transform_queue(&this->transform, q,
yuv_cl, frame_width, frame_height,
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
if (output == NULL) {

@ -25,7 +25,7 @@ class ModelFrame {
public:
ModelFrame(cl_device_id device_id, cl_context context);
~ModelFrame();
float* prepare(cl_mem yuv_cl, int width, int height, const mat3& transform, cl_mem *output);
float* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output);
const int MODEL_WIDTH = 512;
const int MODEL_HEIGHT = 256;

@ -55,19 +55,20 @@ static inline auto get_yuv_buf(std::vector<uint8_t> &buf, const int width, int h
}
struct Rect {int x, y, w, h;};
void crop_yuv(uint8_t *raw, int width, int height, uint8_t *y, uint8_t *u, uint8_t *v, const Rect &rect) {
void crop_nv12_to_yuv(uint8_t *raw, int stride, int uv_offset, uint8_t *y, uint8_t *u, uint8_t *v, const Rect &rect) {
uint8_t *raw_y = raw;
uint8_t *raw_u = raw_y + (width * height);
uint8_t *raw_v = raw_u + ((width / 2) * (height / 2));
uint8_t *raw_uv = raw_y + uv_offset;
for (int r = 0; r < rect.h / 2; r++) {
memcpy(y + 2 * r * rect.w, raw_y + (2 * r + rect.y) * width + rect.x, rect.w);
memcpy(y + (2 * r + 1) * rect.w, raw_y + (2 * r + rect.y + 1) * width + rect.x, rect.w);
memcpy(u + r * (rect.w / 2), raw_u + (r + (rect.y / 2)) * width / 2 + (rect.x / 2), rect.w / 2);
memcpy(v + r * (rect.w / 2), raw_v + (r + (rect.y / 2)) * width / 2 + (rect.x / 2), rect.w / 2);
memcpy(y + 2 * r * rect.w, raw_y + (2 * r + rect.y) * stride + rect.x, rect.w);
memcpy(y + (2 * r + 1) * rect.w, raw_y + (2 * r + rect.y + 1) * stride + rect.x, rect.w);
for (int h = 0; h < rect.w / 2; h++) {
u[r * rect.w/2 + h] = raw_uv[(r + (rect.y/2)) * stride + (rect.x/2 + h)*2];
v[r * rect.w/2 + h] = raw_uv[(r + (rect.y/2)) * stride + (rect.x/2 + h)*2 + 1];
}
}
}
DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, float *calib) {
DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, int stride, int uv_offset, float *calib) {
Rect crop_rect;
if (width == TICI_CAM_WIDTH) {
const int cropped_height = tici_dm_crop::width / 1.33;
@ -91,10 +92,10 @@ DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_
auto [cropped_y, cropped_u, cropped_v] = get_yuv_buf(s->cropped_buf, crop_rect.w, crop_rect.h);
if (!s->is_rhd) {
crop_yuv((uint8_t *)stream_buf, width, height, cropped_y, cropped_u, cropped_v, crop_rect);
crop_nv12_to_yuv((uint8_t *)stream_buf, stride, uv_offset, cropped_y, cropped_u, cropped_v, crop_rect);
} else {
auto [mirror_y, mirror_u, mirror_v] = get_yuv_buf(s->premirror_cropped_buf, crop_rect.w, crop_rect.h);
crop_yuv((uint8_t *)stream_buf, width, height, mirror_y, mirror_u, mirror_v, crop_rect);
crop_nv12_to_yuv((uint8_t *)stream_buf, stride, uv_offset, mirror_y, mirror_u, mirror_v, crop_rect);
libyuv::I420Mirror(mirror_y, crop_rect.w,
mirror_u, crop_rect.w / 2,
mirror_v, crop_rect.w / 2,

@ -46,7 +46,7 @@ typedef struct DMonitoringModelState {
} DMonitoringModelState;
void dmonitoring_init(DMonitoringModelState* s);
DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, float *calib);
DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, int stride, int uv_offset, float *calib);
void dmonitoring_publish(PubMaster &pm, uint32_t frame_id, const DMonitoringResult &res, float execution_time, kj::ArrayPtr<const float> raw_pred);
void dmonitoring_free(DMonitoringModelState* s);

@ -73,12 +73,12 @@ ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf,
#endif
// if getInputBuf is not NULL, net_input_buf will be
auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, transform, static_cast<cl_mem*>(s->m->getInputBuf()));
auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, buf->stride, buf->uv_offset, transform, static_cast<cl_mem*>(s->m->getInputBuf()));
s->m->addImage(net_input_buf, s->frame->buf_size);
LOGT("Image added");
if (wbuf != nullptr) {
auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, transform_wide, static_cast<cl_mem*>(s->m->getExtraBuf()));
auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, wbuf->stride, wbuf->uv_offset, transform_wide, static_cast<cl_mem*>(s->m->getExtraBuf()));
s->m->addExtra(net_extra_buf, s->wide_frame->buf_size);
LOGT("Extra image added");
}

@ -25,7 +25,7 @@ void transform_destroy(Transform* s) {
void transform_queue(Transform* s,
cl_command_queue q,
cl_mem in_yuv, int in_width, int in_height,
cl_mem in_yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
cl_mem out_y, cl_mem out_u, cl_mem out_v,
int out_width, int out_height,
const mat3& projection) {
@ -44,28 +44,30 @@ void transform_queue(Transform* s,
const int in_y_width = in_width;
const int in_y_height = in_height;
const int in_y_px_stride = 1;
const int in_uv_width = in_width/2;
const int in_uv_height = in_height/2;
const int in_y_offset = 0;
const int in_u_offset = in_y_offset + in_y_width*in_y_height;
const int in_v_offset = in_u_offset + in_uv_width*in_uv_height;
const int in_uv_px_stride = 2;
const int in_u_offset = in_uv_offset;
const int in_v_offset = in_uv_offset + 1;
const int out_y_width = out_width;
const int out_y_height = out_height;
const int out_uv_width = out_width/2;
const int out_uv_height = out_height/2;
CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv));
CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_y_width));
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_offset));
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_y_height));
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_width));
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_y));
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_int), &out_y_width));
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &zero));
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &out_y_height));
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_width));
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_mem), &s->m_y_cl));
CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); // src
CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_stride)); // src_row_stride
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_px_stride)); // src_px_stride
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &zero)); // src_offset
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_height)); // src_rows
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_y_width)); // src_cols
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_y)); // dst
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_y_width)); // dst_row_stride
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_height)); // dst_rows
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_y_width)); // dst_cols
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_y_cl)); // M
const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height};
@ -74,21 +76,21 @@ void transform_queue(Transform* s,
const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height};
CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_uv_width));
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_u_offset));
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_uv_height));
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_width));
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_u));
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_int), &out_uv_width));
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &zero));
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &out_uv_height));
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_width));
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_mem), &s->m_uv_cl));
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_uv_px_stride)); // src_px_stride
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_u_offset)); // src_offset
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_height)); // src_rows
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_uv_width)); // src_cols
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_u)); // dst
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_uv_width)); // dst_row_stride
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_height)); // dst_rows
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_uv_width)); // dst_cols
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_uv_cl)); // M
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_v_offset));
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_v));
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_v_offset)); // src_ofset
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_v)); // dst
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));

@ -6,9 +6,9 @@
#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS)
__kernel void warpPerspective(__global const uchar * src,
int src_step, int src_offset, int src_rows, int src_cols,
int src_row_stride, int src_px_stride, int src_offset, int src_rows, int src_cols,
__global uchar * dst,
int dst_step, int dst_offset, int dst_rows, int dst_cols,
int dst_row_stride, int dst_offset, int dst_rows, int dst_cols,
__constant float * M)
{
int dx = get_global_id(0);
@ -28,18 +28,18 @@ __kernel void warpPerspective(__global const uchar * src,
short ax = (short)(X & (INTER_TAB_SIZE - 1));
int v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ?
convert_int(src[mad24(sy, src_step, src_offset + sx)]) : 0;
convert_int(src[mad24(sy, src_row_stride, src_offset + sx*src_px_stride)]) : 0;
int v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ?
convert_int(src[mad24(sy, src_step, src_offset + (sx+1))]) : 0;
convert_int(src[mad24(sy, src_row_stride, src_offset + (sx+1)*src_px_stride)]) : 0;
int v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ?
convert_int(src[mad24(sy+1, src_step, src_offset + sx)]) : 0;
convert_int(src[mad24(sy+1, src_row_stride, src_offset + sx*src_px_stride)]) : 0;
int v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ?
convert_int(src[mad24(sy+1, src_step, src_offset + (sx+1))]) : 0;
convert_int(src[mad24(sy+1, src_row_stride, src_offset + (sx+1)*src_px_stride)]) : 0;
float taby = 1.f/INTER_TAB_SIZE*ay;
float tabx = 1.f/INTER_TAB_SIZE*ax;
int dst_index = mad24(dy, dst_step, dst_offset + dx);
int dst_index = mad24(dy, dst_row_stride, dst_offset + dx);
int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE );

@ -19,7 +19,7 @@ void transform_init(Transform* s, cl_context ctx, cl_device_id device_id);
void transform_destroy(Transform* transform);
void transform_queue(Transform* s, cl_command_queue q,
cl_mem yuv, int in_width, int in_height,
cl_mem yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
cl_mem out_y, cl_mem out_u, cl_mem out_v,
int out_width, int out_height,
const mat3& projection);

@ -96,7 +96,7 @@ def model_replay(lr, frs):
if msg.which() in VIPC_STREAM:
msg = msg.as_builder()
camera_state = getattr(msg, msg.which())
img = frs[msg.which()].get(frame_idxs[msg.which()], pix_fmt="yuv420p")[0]
img = frs[msg.which()].get(frame_idxs[msg.which()], pix_fmt="nv12")[0]
frame_idxs[msg.which()] += 1
# send camera state and frame

@ -164,7 +164,7 @@ def replay_cameras(lr, frs):
print(f"Decompressing frames {s}")
frames = []
for i in tqdm(range(fr.frame_count)):
img = fr.get(i, pix_fmt='yuv420p')[0]
img = fr.get(i, pix_fmt='nv12')[0]
frames.append(img.flatten().tobytes())
vs.create_buffers(stream, 40, False, size[0], size[1])

@ -23,11 +23,11 @@ from tools.lib.logreader import LogReader
PROCS = {
"selfdrive.controls.controlsd": 35.0,
"./loggerd": 10.0,
"./encoderd": 37.3,
"./encoderd": 12.5,
"./camerad": 16.5,
"./locationd": 9.1,
"selfdrive.controls.plannerd": 11.7,
"./_ui": 21.0,
"./_ui": 26.4,
"selfdrive.locationd.paramsd": 9.0,
"./_sensord": 6.17,
"selfdrive.controls.radard": 4.5,

@ -34,17 +34,15 @@ const char frame_fragment_shader[] =
"precision mediump float;\n"
#endif
"uniform sampler2D uTextureY;\n"
"uniform sampler2D uTextureU;\n"
"uniform sampler2D uTextureV;\n"
"uniform sampler2D uTextureUV;\n"
"in vec2 vTexCoord;\n"
"out vec4 colorOut;\n"
"void main() {\n"
" float y = texture(uTextureY, vTexCoord).r;\n"
" float u = texture(uTextureU, vTexCoord).r - 0.5;\n"
" float v = texture(uTextureV, vTexCoord).r - 0.5;\n"
" float r = y + 1.402 * v;\n"
" float g = y - 0.344 * u - 0.714 * v;\n"
" float b = y + 1.772 * u;\n"
" vec2 uv = texture(uTextureUV, vTexCoord).rg - 0.5;\n"
" float r = y + 1.402 * uv.y;\n"
" float g = y - 0.344 * uv.x - 0.714 * uv.y;\n"
" float b = y + 1.772 * uv.x;\n"
" colorOut = vec4(r, g, b, 1.0);\n"
"}\n";
@ -158,8 +156,7 @@ void CameraViewWidget::initializeGL() {
glGenTextures(3, textures);
glUseProgram(program->programId());
glUniform1i(program->uniformLocation("uTextureY"), 0);
glUniform1i(program->uniformLocation("uTextureU"), 1);
glUniform1i(program->uniformLocation("uTextureV"), 2);
glUniform1i(program->uniformLocation("uTextureUV"), 1);
}
void CameraViewWidget::showEvent(QShowEvent *event) {
@ -223,19 +220,23 @@ void CameraViewWidget::paintGL() {
VisionBuf *frame = frames[frame_idx].second;
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glPixelStorei(GL_UNPACK_ROW_LENGTH, stream_stride);
glViewport(0, 0, width(), height());
glBindVertexArray(frame_vao);
glUseProgram(program->programId());
uint8_t *address[3] = {frame->y, frame->u, frame->v};
for (int i = 0; i < 3; ++i) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, textures[i]);
int width = i == 0 ? stream_width : stream_width / 2;
int height = i == 0 ? stream_height : stream_height / 2;
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RED, GL_UNSIGNED_BYTE, address[i]);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, textures[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, stream_width, stream_height, GL_RED, GL_UNSIGNED_BYTE, frame->y);
assert(glGetError() == GL_NO_ERROR);
glPixelStorei(GL_UNPACK_ROW_LENGTH, stream_stride/2);
glActiveTexture(GL_TEXTURE0 + 1);
glBindTexture(GL_TEXTURE_2D, textures[1]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, stream_width/2, stream_height/2, GL_RG, GL_UNSIGNED_BYTE, frame->uv);
assert(glGetError() == GL_NO_ERROR);
}
glUniformMatrix4fv(program->uniformLocation("uTransform"), 1, GL_TRUE, frame_mat.v);
assert(glGetError() == GL_NO_ERROR);
@ -246,6 +247,7 @@ void CameraViewWidget::paintGL() {
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
void CameraViewWidget::vipcConnected(VisionIpcClient *vipc_client) {
@ -253,18 +255,23 @@ void CameraViewWidget::vipcConnected(VisionIpcClient *vipc_client) {
frames.clear();
stream_width = vipc_client->buffers[0].width;
stream_height = vipc_client->buffers[0].height;
stream_stride = vipc_client->buffers[0].stride;
for (int i = 0; i < 3; ++i) {
glBindTexture(GL_TEXTURE_2D, textures[i]);
glBindTexture(GL_TEXTURE_2D, textures[0]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
int width = i == 0 ? stream_width : stream_width / 2;
int height = i == 0 ? stream_height : stream_height / 2;
glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, stream_width, stream_height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
assert(glGetError() == GL_NO_ERROR);
glBindTexture(GL_TEXTURE_2D, textures[1]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, stream_width/2, stream_height/2, 0, GL_RG, GL_UNSIGNED_BYTE, nullptr);
assert(glGetError() == GL_NO_ERROR);
}
updateFrameMat(width(), height());
}

@ -49,6 +49,7 @@ protected:
std::string stream_name;
int stream_width = 0;
int stream_height = 0;
int stream_stride = 0;
std::atomic<VisionStreamType> stream_type;
QThread *vipc_thread = nullptr;

@ -229,17 +229,20 @@ AVFrame *FrameReader::decodeFrame(AVPacket *pkt) {
bool FrameReader::copyBuffers(AVFrame *f, uint8_t *yuv) {
if (hw_pix_fmt == HW_PIX_FMT) {
uint8_t *y = yuv ? yuv : nv12toyuv_buffer.data();
uint8_t *u = y + width * height;
uint8_t *v = u + (width / 2) * (height / 2);
libyuv::NV12ToI420(f->data[0], f->linesize[0], f->data[1], f->linesize[1],
y, width, u, width / 2, v, width / 2, width, height);
uint8_t *uv = y + width * height;
for (int i = 0; i < height/2; i++) {
memcpy(y + (i*2 + 0)*width, f->data[0] + (i*2 + 0)*f->linesize[0], width);
memcpy(y + (i*2 + 1)*width, f->data[0] + (i*2 + 1)*f->linesize[0], width);
memcpy(uv + i*width, f->data[1] + i*f->linesize[1], width);
}
} else {
uint8_t *u = yuv + width * height;
uint8_t *v = u + (width / 2) * (height / 2);
libyuv::I420Copy(f->data[0], f->linesize[0],
uint8_t *y = yuv ? yuv : nv12toyuv_buffer.data();
uint8_t *uv = y + width * height;
libyuv::I420ToNV12(f->data[0], f->linesize[0],
f->data[1], f->linesize[1],
f->data[2], f->linesize[2],
yuv, width, u, width / 2, v, width / 2,
y, width,
uv, width / 2,
width, height);
}
return true;

@ -185,20 +185,28 @@ def read_file_check_size(f, sz, cookie):
return buff
def rgb24toyuv420(rgb):
def rgb24toyuv(rgb):
yuv_from_rgb = np.array([[ 0.299 , 0.587 , 0.114 ],
[-0.14714119, -0.28886916, 0.43601035 ],
[ 0.61497538, -0.51496512, -0.10001026 ]])
img = np.dot(rgb.reshape(-1, 3), yuv_from_rgb.T).reshape(rgb.shape)
y_len = img.shape[0] * img.shape[1]
uv_len = y_len // 4
ys = img[:, :, 0]
us = (img[::2, ::2, 1] + img[1::2, ::2, 1] + img[::2, 1::2, 1] + img[1::2, 1::2, 1]) / 4 + 128
vs = (img[::2, ::2, 2] + img[1::2, ::2, 2] + img[::2, 1::2, 2] + img[1::2, 1::2, 2]) / 4 + 128
yuv420 = np.empty(y_len + 2 * uv_len, dtype=img.dtype)
return ys, us, vs
def rgb24toyuv420(rgb):
ys, us, vs = rgb24toyuv(rgb)
y_len = rgb.shape[0] * rgb.shape[1]
uv_len = y_len // 4
yuv420 = np.empty(y_len + 2 * uv_len, dtype=rgb.dtype)
yuv420[:y_len] = ys.reshape(-1)
yuv420[y_len:y_len + uv_len] = us.reshape(-1)
yuv420[y_len + uv_len:y_len + 2 * uv_len] = vs.reshape(-1)
@ -206,6 +214,20 @@ def rgb24toyuv420(rgb):
return yuv420.clip(0, 255).astype('uint8')
def rgb24tonv12(rgb):
ys, us, vs = rgb24toyuv(rgb)
y_len = rgb.shape[0] * rgb.shape[1]
uv_len = y_len // 4
nv12 = np.empty(y_len + 2 * uv_len, dtype=rgb.dtype)
nv12[:y_len] = ys.reshape(-1)
nv12[y_len::2] = us.reshape(-1)
nv12[y_len+1::2] = vs.reshape(-1)
return nv12.clip(0, 255).astype('uint8')
def decompress_video_data(rawdat, vid_fmt, w, h, pix_fmt):
# using a tempfile is much faster than proc.communicate for some reason
@ -237,6 +259,8 @@ def decompress_video_data(rawdat, vid_fmt, w, h, pix_fmt):
if pix_fmt == "rgb24":
ret = np.frombuffer(dat, dtype=np.uint8).reshape(-1, h, w, 3)
elif pix_fmt == "nv12":
ret = np.frombuffer(dat, dtype=np.uint8).reshape(-1, (h*w*3//2))
elif pix_fmt == "yuv420p":
ret = np.frombuffer(dat, dtype=np.uint8).reshape(-1, (h*w*3//2))
elif pix_fmt == "yuv444p":
@ -304,7 +328,7 @@ class RawFrameReader(BaseFrameReader):
assert self.frame_count is not None
assert num+count <= self.frame_count
if pix_fmt not in ("yuv420p", "rgb24"):
if pix_fmt not in ("nv12", "yuv420p", "rgb24"):
raise ValueError(f"Unsupported pixel format {pix_fmt!r}")
app = []
@ -313,6 +337,8 @@ class RawFrameReader(BaseFrameReader):
rgb_dat = self.load_and_debayer(dat)
if pix_fmt == "rgb24":
app.append(rgb_dat)
elif pix_fmt == "nv12":
app.append(rgb24tonv12(rgb_dat))
elif pix_fmt == "yuv420p":
app.append(rgb24toyuv420(rgb_dat))
else:
@ -329,7 +355,7 @@ class VideoStreamDecompressor:
self.h = h
self.pix_fmt = pix_fmt
if pix_fmt == "yuv420p":
if pix_fmt in ("nv12", "yuv420p"):
self.out_size = w*h*3//2 # yuv420p
elif pix_fmt in ("rgb24", "yuv444p"):
self.out_size = w*h*3
@ -387,6 +413,8 @@ class VideoStreamDecompressor:
ret = np.frombuffer(dat, dtype=np.uint8).reshape((self.h, self.w, 3))
elif self.pix_fmt == "yuv420p":
ret = np.frombuffer(dat, dtype=np.uint8)
elif self.pix_fmt == "nv12":
ret = np.frombuffer(dat, dtype=np.uint8)
elif self.pix_fmt == "yuv444p":
ret = np.frombuffer(dat, dtype=np.uint8).reshape((3, self.h, self.w))
else:
@ -549,7 +577,7 @@ class GOPFrameReader(BaseFrameReader):
if num + count > self.frame_count:
raise ValueError(f"{num + count} > {self.frame_count}")
if pix_fmt not in ("yuv420p", "rgb24", "yuv444p"):
if pix_fmt not in ("nv12", "yuv420p", "rgb24", "yuv444p"):
raise ValueError(f"Unsupported pixel format {pix_fmt!r}")
ret = [self._get_one(num + i, pix_fmt) for i in range(count)]

Loading…
Cancel
Save