diff --git a/cereal b/cereal index 93a2560618..bf4960f83c 160000 --- a/cereal +++ b/cereal @@ -1 +1 @@ -Subproject commit 93a2560618ef41bc256f7a0331c8b047863d4d69 +Subproject commit bf4960f83ccdefc7e9d2e405878ae6c8efced83b diff --git a/selfdrive/camerad/cameras/camera_common.cc b/selfdrive/camerad/cameras/camera_common.cc index d3fd34f6e4..40a3b86cdd 100644 --- a/selfdrive/camerad/cameras/camera_common.cc +++ b/selfdrive/camerad/cameras/camera_common.cc @@ -16,6 +16,7 @@ #include "common/swaglog.h" #include "common/util.h" #include "selfdrive/hardware/hw.h" +#include "msm_media_info.h" #ifdef QCOM2 #include "CL/cl_ext_qcom.h" @@ -28,17 +29,17 @@ ExitHandler do_exit; class Debayer { public: - Debayer(cl_device_id device_id, cl_context context, const CameraBuf *b, const CameraState *s) { + Debayer(cl_device_id device_id, cl_context context, const CameraBuf *b, const CameraState *s, int buf_width, int uv_offset) { char args[4096]; const CameraInfo *ci = &s->ci; hdr_ = ci->hdr; snprintf(args, sizeof(args), "-cl-fast-relaxed-math -cl-denorms-are-zero " "-DFRAME_WIDTH=%d -DFRAME_HEIGHT=%d -DFRAME_STRIDE=%d -DFRAME_OFFSET=%d " - "-DRGB_WIDTH=%d -DRGB_HEIGHT=%d -DRGB_STRIDE=%d " + "-DRGB_WIDTH=%d -DRGB_HEIGHT=%d -DRGB_STRIDE=%d -DYUV_STRIDE=%d -DUV_OFFSET=%d " "-DBAYER_FLIP=%d -DHDR=%d -DCAM_NUM=%d%s", ci->frame_width, ci->frame_height, ci->frame_stride, ci->frame_offset, - b->rgb_width, b->rgb_height, b->rgb_stride, + b->rgb_width, b->rgb_height, b->rgb_stride, buf_width, uv_offset, ci->bayer_flip, ci->hdr, s->camera_num, s->camera_num==1 ? " -DVIGNETTING" : ""); const char *cl_file = "cameras/real_debayer.cl"; cl_program prg_debayer = cl_program_from_file(context, device_id, cl_file, args); @@ -101,11 +102,17 @@ void CameraBuf::init(cl_device_id device_id, cl_context context, CameraState *s, rgb_stride = vipc_server->get_buffer(rgb_type)->stride; LOGD("created %d UI vipc buffers with size %dx%d", UI_BUF_COUNT, rgb_width, rgb_height); - vipc_server->create_buffers(yuv_type, YUV_BUFFER_COUNT, false, rgb_width, rgb_height); - LOGD("created %d YUV vipc buffers with size %dx%d", YUV_BUFFER_COUNT, rgb_width, rgb_height); + int nv12_width = VENUS_Y_STRIDE(COLOR_FMT_NV12, rgb_width); + int nv12_height = VENUS_Y_SCANLINES(COLOR_FMT_NV12, rgb_height); + assert(nv12_width == VENUS_UV_STRIDE(COLOR_FMT_NV12, rgb_width)); + assert(nv12_height/2 == VENUS_UV_SCANLINES(COLOR_FMT_NV12, rgb_height)); + size_t nv12_size = 2346 * nv12_width; // comes from v4l2_format.fmt.pix_mp.plane_fmt[0].sizeimage + size_t nv12_uv_offset = nv12_width * nv12_height; + vipc_server->create_buffers_with_sizes(yuv_type, YUV_BUFFER_COUNT, false, rgb_width, rgb_height, nv12_size, nv12_width, nv12_uv_offset); + LOGD("created %d YUV vipc buffers with size %dx%d", YUV_BUFFER_COUNT, nv12_width, nv12_height); if (ci->bayer) { - debayer = new Debayer(device_id, context, this, s); + debayer = new Debayer(device_id, context, this, s, nv12_width, nv12_uv_offset); } rgb2yuv = std::make_unique(context, device_id, rgb_width, rgb_height, rgb_stride); @@ -233,20 +240,28 @@ kj::Array get_raw_frame_image(const CameraBuf *b) { } static kj::Array yuv420_to_jpeg(const CameraBuf *b, int thumbnail_width, int thumbnail_height) { + int downscale = b->cur_yuv_buf->width / thumbnail_width; + assert(downscale * thumbnail_height == b->cur_yuv_buf->height); + int in_stride = b->cur_yuv_buf->stride; + // make the buffer big enough. jpeg_write_raw_data requires 16-pixels aligned height to be used. std::unique_ptr buf(new uint8_t[(thumbnail_width * ((thumbnail_height + 15) & ~15) * 3) / 2]); uint8_t *y_plane = buf.get(); uint8_t *u_plane = y_plane + thumbnail_width * thumbnail_height; uint8_t *v_plane = u_plane + (thumbnail_width * thumbnail_height) / 4; { - int result = libyuv::I420Scale( - b->cur_yuv_buf->y, b->rgb_width, b->cur_yuv_buf->u, b->rgb_width / 2, b->cur_yuv_buf->v, b->rgb_width / 2, - b->rgb_width, b->rgb_height, - y_plane, thumbnail_width, u_plane, thumbnail_width / 2, v_plane, thumbnail_width / 2, - thumbnail_width, thumbnail_height, libyuv::kFilterNone); - if (result != 0) { - LOGE("Generate YUV thumbnail failed."); - return {}; + // subsampled conversion from nv12 to yuv + for (int hy = 0; hy < thumbnail_height/2; hy++) { + for (int hx = 0; hx < thumbnail_width/2; hx++) { + int ix = hx * downscale + (downscale-1)/2; + int iy = hy * downscale + (downscale-1)/2; + y_plane[(hy*2 + 0)*thumbnail_width + (hx*2 + 0)] = b->cur_yuv_buf->y[(iy*2 + 0) * in_stride + ix*2 + 0]; + y_plane[(hy*2 + 0)*thumbnail_width + (hx*2 + 1)] = b->cur_yuv_buf->y[(iy*2 + 0) * in_stride + ix*2 + 1]; + y_plane[(hy*2 + 1)*thumbnail_width + (hx*2 + 0)] = b->cur_yuv_buf->y[(iy*2 + 1) * in_stride + ix*2 + 0]; + y_plane[(hy*2 + 1)*thumbnail_width + (hx*2 + 1)] = b->cur_yuv_buf->y[(iy*2 + 1) * in_stride + ix*2 + 1]; + u_plane[hy*thumbnail_width/2 + hx] = b->cur_yuv_buf->uv[iy*in_stride + ix*2 + 0]; + v_plane[hy*thumbnail_width/2 + hx] = b->cur_yuv_buf->uv[iy*in_stride + ix*2 + 1]; + } } } diff --git a/selfdrive/camerad/cameras/real_debayer.cl b/selfdrive/camerad/cameras/real_debayer.cl index dc6044ed53..fc9f410494 100644 --- a/selfdrive/camerad/cameras/real_debayer.cl +++ b/selfdrive/camerad/cameras/real_debayer.cl @@ -1,7 +1,5 @@ #define UV_WIDTH RGB_WIDTH / 2 #define UV_HEIGHT RGB_HEIGHT / 2 -#define U_OFFSET RGB_WIDTH * RGB_HEIGHT -#define V_OFFSET RGB_WIDTH * RGB_HEIGHT + UV_WIDTH * UV_HEIGHT #define RGB_TO_Y(r, g, b) ((((mul24(b, 13) + mul24(g, 65) + mul24(r, 33)) + 64) >> 7) + 16) #define RGB_TO_U(r, g, b) ((mul24(b, 56) - mul24(g, 37) - mul24(r, 19) + 0x8080) >> 8) @@ -141,17 +139,20 @@ __kernel void debayer10(const __global uchar * in, __global uchar * out) RGB_TO_Y(rgb_out[0].s0, rgb_out[0].s1, rgb_out[0].s2), RGB_TO_Y(rgb_out[1].s0, rgb_out[1].s1, rgb_out[1].s2) ); - vstore2(yy, 0, out + mad24(gid_y * 2, RGB_WIDTH, gid_x * 2)); + vstore2(yy, 0, out + mad24(gid_y * 2, YUV_STRIDE, gid_x * 2)); yy = (uchar2)( RGB_TO_Y(rgb_out[2].s0, rgb_out[2].s1, rgb_out[2].s2), RGB_TO_Y(rgb_out[3].s0, rgb_out[3].s1, rgb_out[3].s2) ); - vstore2(yy, 0, out + mad24(gid_y * 2 + 1, RGB_WIDTH, gid_x * 2)); + vstore2(yy, 0, out + mad24(gid_y * 2 + 1, YUV_STRIDE, gid_x * 2)); // write uvs const short ar = AVERAGE(rgb_out[0].s0, rgb_out[1].s0, rgb_out[2].s0, rgb_out[3].s0); const short ag = AVERAGE(rgb_out[0].s1, rgb_out[1].s1, rgb_out[2].s1, rgb_out[3].s1); const short ab = AVERAGE(rgb_out[0].s2, rgb_out[1].s2, rgb_out[2].s2, rgb_out[3].s2); - out[U_OFFSET + mad24(gid_y, UV_WIDTH, gid_x)] = RGB_TO_U(ar, ag, ab); - out[V_OFFSET + mad24(gid_y, UV_WIDTH, gid_x)] = RGB_TO_V(ar, ag, ab); + uchar2 uv = (uchar2)( + RGB_TO_U(ar, ag, ab), + RGB_TO_V(ar, ag, ab) + ); + vstore2(uv, 0, out + UV_OFFSET + mad24(gid_y, YUV_STRIDE, gid_x * 2)); } diff --git a/selfdrive/camerad/snapshot/snapshot.py b/selfdrive/camerad/snapshot/snapshot.py index db1342ee7d..bf37ca181e 100755 --- a/selfdrive/camerad/snapshot/snapshot.py +++ b/selfdrive/camerad/snapshot/snapshot.py @@ -44,10 +44,10 @@ def yuv_to_rgb(y, u, v): return rgb.astype(np.uint8) -def extract_image(buf, w, h): - y = np.array(buf[:w*h], dtype=np.uint8).reshape((h, w)) - u = np.array(buf[w*h: w*h+(w//2)*(h//2)], dtype=np.uint8).reshape((h//2, w//2)) - v = np.array(buf[w*h+(w//2)*(h//2):], dtype=np.uint8).reshape((h//2, w//2)) +def extract_image(buf, w, h, stride, uv_offset): + y = np.array(buf[:uv_offset], dtype=np.uint8).reshape((-1, stride))[:h, :w] + u = np.array(buf[uv_offset::2], dtype=np.uint8).reshape((-1, stride//2))[:h//2, :w//2] + v = np.array(buf[uv_offset+1::2], dtype=np.uint8).reshape((-1, stride//2))[:h//2, :w//2] return yuv_to_rgb(y, u, v) @@ -79,10 +79,10 @@ def get_snapshots(frame="roadCameraState", front_frame="driverCameraState", focu rear, front = None, None if frame is not None: c = vipc_clients[frame] - rear = extract_image(c.recv(), c.width, c.height) + rear = extract_image(c.recv(), c.width, c.height, c.stride, c.uv_offset) if front_frame is not None: c = vipc_clients[front_frame] - front = extract_image(c.recv(), c.width, c.height) + front = extract_image(c.recv(), c.width, c.height, c.stride, c.uv_offset) return rear, front diff --git a/selfdrive/hardware/tici/test_power_draw.py b/selfdrive/hardware/tici/test_power_draw.py index d6af25a4e5..934d72e3a1 100755 --- a/selfdrive/hardware/tici/test_power_draw.py +++ b/selfdrive/hardware/tici/test_power_draw.py @@ -22,7 +22,7 @@ PROCS = [ Proc('camerad', 2.25), Proc('modeld', 0.95), Proc('dmonitoringmodeld', 0.25), - Proc('encoderd', 0.42), + Proc('encoderd', 0.23), ] diff --git a/selfdrive/loggerd/encoder/encoder.h b/selfdrive/loggerd/encoder/encoder.h index 9792d61c46..312b68ba19 100644 --- a/selfdrive/loggerd/encoder/encoder.h +++ b/selfdrive/loggerd/encoder/encoder.h @@ -19,8 +19,7 @@ public: : filename(filename), type(type), in_width(in_width), in_height(in_height), fps(fps), bitrate(bitrate), codec(codec), out_width(out_width), out_height(out_height), write(write) { } virtual ~VideoEncoder(); - virtual int encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr, - int in_width, int in_height, VisionIpcBufExtra *extra) = 0; + virtual int encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra) = 0; virtual void encoder_open(const char* path) = 0; virtual void encoder_close() = 0; diff --git a/selfdrive/loggerd/encoder/ffmpeg_encoder.cc b/selfdrive/loggerd/encoder/ffmpeg_encoder.cc index a2b14ef3fd..22587819a4 100644 --- a/selfdrive/loggerd/encoder/ffmpeg_encoder.cc +++ b/selfdrive/loggerd/encoder/ffmpeg_encoder.cc @@ -34,6 +34,8 @@ void FfmpegEncoder::encoder_init() { frame->linesize[1] = out_width/2; frame->linesize[2] = out_width/2; + convert_buf.resize(in_width * in_height * 3 / 2); + if (in_width != out_width || in_height != out_height) { downscale_buf.resize(out_width * out_height * 3 / 2); } @@ -70,18 +72,27 @@ void FfmpegEncoder::encoder_close() { is_open = false; } -int FfmpegEncoder::encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr, - int in_width_, int in_height_, VisionIpcBufExtra *extra) { - assert(in_width_ == this->in_width); - assert(in_height_ == this->in_height); +int FfmpegEncoder::encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra) { + assert(buf->width == this->in_width); + assert(buf->height == this->in_height); + + uint8_t *cy = convert_buf.data(); + uint8_t *cu = cy + in_width * in_height; + uint8_t *cv = cu + (in_width / 2) * (in_height / 2); + libyuv::NV12ToI420(buf->y, buf->stride, + buf->uv, buf->stride, + cy, in_width, + cu, in_width/2, + cv, in_width/2, + in_width, in_height); if (downscale_buf.size() > 0) { uint8_t *out_y = downscale_buf.data(); uint8_t *out_u = out_y + frame->width * frame->height; uint8_t *out_v = out_u + (frame->width / 2) * (frame->height / 2); - libyuv::I420Scale(y_ptr, in_width, - u_ptr, in_width/2, - v_ptr, in_width/2, + libyuv::I420Scale(cy, in_width, + cu, in_width/2, + cv, in_width/2, in_width, in_height, out_y, frame->width, out_u, frame->width/2, @@ -92,9 +103,9 @@ int FfmpegEncoder::encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, cons frame->data[1] = out_u; frame->data[2] = out_v; } else { - frame->data[0] = (uint8_t*)y_ptr; - frame->data[1] = (uint8_t*)u_ptr; - frame->data[2] = (uint8_t*)v_ptr; + frame->data[0] = cy; + frame->data[1] = cu; + frame->data[2] = cv; } frame->pts = counter*50*1000; // 50ms per frame diff --git a/selfdrive/loggerd/encoder/ffmpeg_encoder.h b/selfdrive/loggerd/encoder/ffmpeg_encoder.h index ae41abc69f..497a28b651 100644 --- a/selfdrive/loggerd/encoder/ffmpeg_encoder.h +++ b/selfdrive/loggerd/encoder/ffmpeg_encoder.h @@ -21,8 +21,7 @@ class FfmpegEncoder : public VideoEncoder { VideoEncoder(filename, type, in_width, in_height, fps, bitrate, cereal::EncodeIndex::Type::BIG_BOX_LOSSLESS, out_width, out_height, write) { encoder_init(); } ~FfmpegEncoder(); void encoder_init(); - int encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr, - int in_width_, int in_height_, VisionIpcBufExtra *extra); + int encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra); void encoder_open(const char* path); void encoder_close(); @@ -33,5 +32,6 @@ private: AVCodecContext *codec_ctx; AVFrame *frame = NULL; + std::vector convert_buf; std::vector downscale_buf; }; diff --git a/selfdrive/loggerd/encoder/v4l_encoder.cc b/selfdrive/loggerd/encoder/v4l_encoder.cc index 5fb9cc8de7..b3bd692b16 100644 --- a/selfdrive/loggerd/encoder/v4l_encoder.cc +++ b/selfdrive/loggerd/encoder/v4l_encoder.cc @@ -248,7 +248,6 @@ void V4LEncoder::encoder_init() { } // queue up input buffers for (unsigned int i = 0; i < BUF_IN_COUNT; i++) { - buf_in[i].allocate(fmt_in.fmt.pix_mp.plane_fmt[0].sizeimage); free_buf_in.push(i); } @@ -262,41 +261,19 @@ void V4LEncoder::encoder_open(const char* path) { this->counter = 0; } -int V4LEncoder::encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr, - int in_width_, int in_height_, VisionIpcBufExtra *extra) { - assert(in_width == in_width_); - assert(in_height == in_height_); - assert(is_open); - - // reserve buffer - int buffer_in = free_buf_in.pop(); - - uint8_t *in_y_ptr = (uint8_t*)buf_in[buffer_in].addr; - int in_y_stride = VENUS_Y_STRIDE(COLOR_FMT_NV12, in_width); - int in_uv_stride = VENUS_UV_STRIDE(COLOR_FMT_NV12, in_width); - uint8_t *in_uv_ptr = in_y_ptr + (in_y_stride * VENUS_Y_SCANLINES(COLOR_FMT_NV12, in_height)); - - - // GRRR COPY - int err = libyuv::I420ToNV12(y_ptr, in_width, - u_ptr, in_width/2, - v_ptr, in_width/2, - in_y_ptr, in_y_stride, - in_uv_ptr, in_uv_stride, - in_width, in_height); - assert(err == 0); - +int V4LEncoder::encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra) { struct timeval timestamp { .tv_sec = (long)(extra->timestamp_eof/1000000000), .tv_usec = (long)((extra->timestamp_eof/1000) % 1000000), }; + // reserve buffer + int buffer_in = free_buf_in.pop(); + // push buffer extras.push(*extra); - buf_in[buffer_in].sync(VISIONBUF_SYNC_TO_DEVICE); - int bytesused = VENUS_Y_STRIDE(COLOR_FMT_NV12, in_width) * VENUS_Y_SCANLINES(COLOR_FMT_NV12, in_height) + - VENUS_UV_STRIDE(COLOR_FMT_NV12, in_width) * VENUS_UV_SCANLINES(COLOR_FMT_NV12, in_height); - queue_buffer(fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, buffer_in, &buf_in[buffer_in], timestamp, bytesused); + //buf->sync(VISIONBUF_SYNC_TO_DEVICE); + queue_buffer(fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, buffer_in, buf, timestamp); return this->counter++; } diff --git a/selfdrive/loggerd/encoder/v4l_encoder.h b/selfdrive/loggerd/encoder/v4l_encoder.h index 9f267cfb03..b7c378be85 100644 --- a/selfdrive/loggerd/encoder/v4l_encoder.h +++ b/selfdrive/loggerd/encoder/v4l_encoder.h @@ -13,8 +13,7 @@ public: VideoEncoder(filename, type, in_width, in_height, fps, bitrate, codec, out_width, out_height, write) { encoder_init(); } ~V4LEncoder(); void encoder_init(); - int encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr, - int in_width, int in_height, VisionIpcBufExtra *extra); + int encode_frame(VisionBuf* buf, VisionIpcBufExtra *extra); void encoder_open(const char* path); void encoder_close(); private: diff --git a/selfdrive/loggerd/encoderd.cc b/selfdrive/loggerd/encoderd.cc index 297eaf88d0..5eb70dad33 100644 --- a/selfdrive/loggerd/encoderd.cc +++ b/selfdrive/loggerd/encoderd.cc @@ -103,8 +103,7 @@ void encoder_thread(EncoderdState *s, const LogCameraInfo &cam_info) { // encode a frame for (int i = 0; i < encoders.size(); ++i) { - int out_id = encoders[i]->encode_frame(buf->y, buf->u, buf->v, - buf->width, buf->height, &extra); + int out_id = encoders[i]->encode_frame(buf, &extra); if (out_id == -1) { LOGE("Failed to encode frame. frame_id: %d", extra.frame_id); diff --git a/selfdrive/loggerd/tests/test_loggerd.py b/selfdrive/loggerd/tests/test_loggerd.py index 89fb0c5838..8eecd834e1 100755 --- a/selfdrive/loggerd/tests/test_loggerd.py +++ b/selfdrive/loggerd/tests/test_loggerd.py @@ -109,16 +109,19 @@ class TestLoggerd(unittest.TestCase): Params().put("RecordFront", "1") expected_files = {"rlog", "qlog", "qcamera.ts", "fcamera.hevc", "dcamera.hevc"} - streams = [(VisionStreamType.VISION_STREAM_ROAD, tici_f_frame_size if TICI else eon_f_frame_size, "roadCameraState"), - (VisionStreamType.VISION_STREAM_DRIVER, tici_d_frame_size if TICI else eon_d_frame_size, "driverCameraState")] + streams = [(VisionStreamType.VISION_STREAM_ROAD, (*tici_f_frame_size, 2048*2346, 2048, 2048*1216) if TICI else eon_f_frame_size, "roadCameraState"), + (VisionStreamType.VISION_STREAM_DRIVER, (*tici_d_frame_size, 2048*2346, 2048, 2048*1216) if TICI else eon_d_frame_size, "driverCameraState")] if TICI: expected_files.add("ecamera.hevc") - streams.append((VisionStreamType.VISION_STREAM_WIDE_ROAD, tici_e_frame_size, "wideRoadCameraState")) + streams.append((VisionStreamType.VISION_STREAM_WIDE_ROAD, (*tici_e_frame_size, 2048*2346, 2048, 2048*1216), "wideRoadCameraState")) pm = messaging.PubMaster(["roadCameraState", "driverCameraState", "wideRoadCameraState"]) vipc_server = VisionIpcServer("camerad") - for stream_type, frame_size, _ in streams: - vipc_server.create_buffers(stream_type, 40, False, *(frame_size)) + for stream_type, frame_spec, _ in streams: + if TICI: + vipc_server.create_buffers_with_sizes(stream_type, 40, False, *(frame_spec)) + else: + vipc_server.create_buffers(stream_type, 40, False, *(frame_spec)) vipc_server.start_listener() for _ in range(5): @@ -130,8 +133,11 @@ class TestLoggerd(unittest.TestCase): fps = 20.0 for n in range(1, int(num_segs*length*fps)+1): - for stream_type, frame_size, state in streams: - dat = np.empty(int(frame_size[0]*frame_size[1]*3/2), dtype=np.uint8) + for stream_type, frame_spec, state in streams: + if TICI: + dat = np.empty(frame_spec[2], dtype=np.uint8) + else: + dat = np.empty(int(frame_spec[0]*frame_spec[1]*3/2), dtype=np.uint8) vipc_server.send(stream_type, dat[:].flatten().tobytes(), n, n/fps, n/fps) camera_state = messaging.new_message(state) diff --git a/selfdrive/modeld/dmonitoringmodeld.cc b/selfdrive/modeld/dmonitoringmodeld.cc index 61f3343c8d..68c49572e6 100644 --- a/selfdrive/modeld/dmonitoringmodeld.cc +++ b/selfdrive/modeld/dmonitoringmodeld.cc @@ -31,7 +31,7 @@ void run_model(DMonitoringModelState &model, VisionIpcClient &vipc_client) { } double t1 = millis_since_boot(); - DMonitoringResult res = dmonitoring_eval_frame(&model, buf->addr, buf->width, buf->height, calib); + DMonitoringResult res = dmonitoring_eval_frame(&model, buf->addr, buf->width, buf->height, buf->stride, buf->uv_offset, calib); double t2 = millis_since_boot(); // send dm packet diff --git a/selfdrive/modeld/models/commonmodel.cc b/selfdrive/modeld/models/commonmodel.cc index bc013395df..b7c9051c6e 100644 --- a/selfdrive/modeld/models/commonmodel.cc +++ b/selfdrive/modeld/models/commonmodel.cc @@ -22,9 +22,9 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) { loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT); } -float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, const mat3 &projection, cl_mem *output) { +float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) { transform_queue(&this->transform, q, - yuv_cl, frame_width, frame_height, + yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset, y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection); if (output == NULL) { diff --git a/selfdrive/modeld/models/commonmodel.h b/selfdrive/modeld/models/commonmodel.h index 059a85e7dd..40c82a8c21 100644 --- a/selfdrive/modeld/models/commonmodel.h +++ b/selfdrive/modeld/models/commonmodel.h @@ -25,7 +25,7 @@ class ModelFrame { public: ModelFrame(cl_device_id device_id, cl_context context); ~ModelFrame(); - float* prepare(cl_mem yuv_cl, int width, int height, const mat3& transform, cl_mem *output); + float* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output); const int MODEL_WIDTH = 512; const int MODEL_HEIGHT = 256; diff --git a/selfdrive/modeld/models/dmonitoring.cc b/selfdrive/modeld/models/dmonitoring.cc index 4792806c8b..4cc1cd1229 100644 --- a/selfdrive/modeld/models/dmonitoring.cc +++ b/selfdrive/modeld/models/dmonitoring.cc @@ -55,19 +55,20 @@ static inline auto get_yuv_buf(std::vector &buf, const int width, int h } struct Rect {int x, y, w, h;}; -void crop_yuv(uint8_t *raw, int width, int height, uint8_t *y, uint8_t *u, uint8_t *v, const Rect &rect) { +void crop_nv12_to_yuv(uint8_t *raw, int stride, int uv_offset, uint8_t *y, uint8_t *u, uint8_t *v, const Rect &rect) { uint8_t *raw_y = raw; - uint8_t *raw_u = raw_y + (width * height); - uint8_t *raw_v = raw_u + ((width / 2) * (height / 2)); + uint8_t *raw_uv = raw_y + uv_offset; for (int r = 0; r < rect.h / 2; r++) { - memcpy(y + 2 * r * rect.w, raw_y + (2 * r + rect.y) * width + rect.x, rect.w); - memcpy(y + (2 * r + 1) * rect.w, raw_y + (2 * r + rect.y + 1) * width + rect.x, rect.w); - memcpy(u + r * (rect.w / 2), raw_u + (r + (rect.y / 2)) * width / 2 + (rect.x / 2), rect.w / 2); - memcpy(v + r * (rect.w / 2), raw_v + (r + (rect.y / 2)) * width / 2 + (rect.x / 2), rect.w / 2); + memcpy(y + 2 * r * rect.w, raw_y + (2 * r + rect.y) * stride + rect.x, rect.w); + memcpy(y + (2 * r + 1) * rect.w, raw_y + (2 * r + rect.y + 1) * stride + rect.x, rect.w); + for (int h = 0; h < rect.w / 2; h++) { + u[r * rect.w/2 + h] = raw_uv[(r + (rect.y/2)) * stride + (rect.x/2 + h)*2]; + v[r * rect.w/2 + h] = raw_uv[(r + (rect.y/2)) * stride + (rect.x/2 + h)*2 + 1]; + } } } -DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, float *calib) { +DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, int stride, int uv_offset, float *calib) { Rect crop_rect; if (width == TICI_CAM_WIDTH) { const int cropped_height = tici_dm_crop::width / 1.33; @@ -91,10 +92,10 @@ DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_ auto [cropped_y, cropped_u, cropped_v] = get_yuv_buf(s->cropped_buf, crop_rect.w, crop_rect.h); if (!s->is_rhd) { - crop_yuv((uint8_t *)stream_buf, width, height, cropped_y, cropped_u, cropped_v, crop_rect); + crop_nv12_to_yuv((uint8_t *)stream_buf, stride, uv_offset, cropped_y, cropped_u, cropped_v, crop_rect); } else { auto [mirror_y, mirror_u, mirror_v] = get_yuv_buf(s->premirror_cropped_buf, crop_rect.w, crop_rect.h); - crop_yuv((uint8_t *)stream_buf, width, height, mirror_y, mirror_u, mirror_v, crop_rect); + crop_nv12_to_yuv((uint8_t *)stream_buf, stride, uv_offset, mirror_y, mirror_u, mirror_v, crop_rect); libyuv::I420Mirror(mirror_y, crop_rect.w, mirror_u, crop_rect.w / 2, mirror_v, crop_rect.w / 2, diff --git a/selfdrive/modeld/models/dmonitoring.h b/selfdrive/modeld/models/dmonitoring.h index db4e2ef72a..a1be91e3bb 100644 --- a/selfdrive/modeld/models/dmonitoring.h +++ b/selfdrive/modeld/models/dmonitoring.h @@ -46,7 +46,7 @@ typedef struct DMonitoringModelState { } DMonitoringModelState; void dmonitoring_init(DMonitoringModelState* s); -DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, float *calib); +DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, int stride, int uv_offset, float *calib); void dmonitoring_publish(PubMaster &pm, uint32_t frame_id, const DMonitoringResult &res, float execution_time, kj::ArrayPtr raw_pred); void dmonitoring_free(DMonitoringModelState* s); diff --git a/selfdrive/modeld/models/driving.cc b/selfdrive/modeld/models/driving.cc index b601c4898b..59c0b249d9 100644 --- a/selfdrive/modeld/models/driving.cc +++ b/selfdrive/modeld/models/driving.cc @@ -73,12 +73,12 @@ ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf, #endif // if getInputBuf is not NULL, net_input_buf will be - auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, transform, static_cast(s->m->getInputBuf())); + auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, buf->stride, buf->uv_offset, transform, static_cast(s->m->getInputBuf())); s->m->addImage(net_input_buf, s->frame->buf_size); LOGT("Image added"); if (wbuf != nullptr) { - auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, transform_wide, static_cast(s->m->getExtraBuf())); + auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, wbuf->stride, wbuf->uv_offset, transform_wide, static_cast(s->m->getExtraBuf())); s->m->addExtra(net_extra_buf, s->wide_frame->buf_size); LOGT("Extra image added"); } diff --git a/selfdrive/modeld/transforms/transform.cc b/selfdrive/modeld/transforms/transform.cc index 8929f56a9e..cabc58a46d 100644 --- a/selfdrive/modeld/transforms/transform.cc +++ b/selfdrive/modeld/transforms/transform.cc @@ -25,7 +25,7 @@ void transform_destroy(Transform* s) { void transform_queue(Transform* s, cl_command_queue q, - cl_mem in_yuv, int in_width, int in_height, + cl_mem in_yuv, int in_width, int in_height, int in_stride, int in_uv_offset, cl_mem out_y, cl_mem out_u, cl_mem out_v, int out_width, int out_height, const mat3& projection) { @@ -44,28 +44,30 @@ void transform_queue(Transform* s, const int in_y_width = in_width; const int in_y_height = in_height; + const int in_y_px_stride = 1; const int in_uv_width = in_width/2; const int in_uv_height = in_height/2; - const int in_y_offset = 0; - const int in_u_offset = in_y_offset + in_y_width*in_y_height; - const int in_v_offset = in_u_offset + in_uv_width*in_uv_height; + const int in_uv_px_stride = 2; + const int in_u_offset = in_uv_offset; + const int in_v_offset = in_uv_offset + 1; const int out_y_width = out_width; const int out_y_height = out_height; const int out_uv_width = out_width/2; const int out_uv_height = out_height/2; - CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); - CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_y_width)); - CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_offset)); - CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_y_height)); - CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_width)); - CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_y)); - CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_int), &out_y_width)); - CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &zero)); - CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &out_y_height)); - CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_width)); - CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_mem), &s->m_y_cl)); + CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); // src + CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_stride)); // src_row_stride + CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_px_stride)); // src_px_stride + CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &zero)); // src_offset + CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_height)); // src_rows + CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_y_width)); // src_cols + CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_y)); // dst + CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_y_width)); // dst_row_stride + CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset + CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_height)); // dst_rows + CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_y_width)); // dst_cols + CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_y_cl)); // M const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height}; @@ -74,21 +76,21 @@ void transform_queue(Transform* s, const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height}; - CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_uv_width)); - CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_u_offset)); - CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_uv_height)); - CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_width)); - CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_u)); - CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_int), &out_uv_width)); - CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &zero)); - CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &out_uv_height)); - CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_width)); - CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_mem), &s->m_uv_cl)); - + CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_uv_px_stride)); // src_px_stride + CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_u_offset)); // src_offset + CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_height)); // src_rows + CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_uv_width)); // src_cols + CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_u)); // dst + CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_uv_width)); // dst_row_stride + CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset + CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_height)); // dst_rows + CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_uv_width)); // dst_cols + CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_uv_cl)); // M + CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, (const size_t*)&work_size_uv, NULL, 0, 0, NULL)); - CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_v_offset)); - CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_mem), &out_v)); + CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_v_offset)); // src_ofset + CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_v)); // dst CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, (const size_t*)&work_size_uv, NULL, 0, 0, NULL)); diff --git a/selfdrive/modeld/transforms/transform.cl b/selfdrive/modeld/transforms/transform.cl index 8ad1869351..357ef87321 100644 --- a/selfdrive/modeld/transforms/transform.cl +++ b/selfdrive/modeld/transforms/transform.cl @@ -6,9 +6,9 @@ #define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS) __kernel void warpPerspective(__global const uchar * src, - int src_step, int src_offset, int src_rows, int src_cols, + int src_row_stride, int src_px_stride, int src_offset, int src_rows, int src_cols, __global uchar * dst, - int dst_step, int dst_offset, int dst_rows, int dst_cols, + int dst_row_stride, int dst_offset, int dst_rows, int dst_cols, __constant float * M) { int dx = get_global_id(0); @@ -28,18 +28,18 @@ __kernel void warpPerspective(__global const uchar * src, short ax = (short)(X & (INTER_TAB_SIZE - 1)); int v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? - convert_int(src[mad24(sy, src_step, src_offset + sx)]) : 0; + convert_int(src[mad24(sy, src_row_stride, src_offset + sx*src_px_stride)]) : 0; int v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? - convert_int(src[mad24(sy, src_step, src_offset + (sx+1))]) : 0; + convert_int(src[mad24(sy, src_row_stride, src_offset + (sx+1)*src_px_stride)]) : 0; int v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? - convert_int(src[mad24(sy+1, src_step, src_offset + sx)]) : 0; + convert_int(src[mad24(sy+1, src_row_stride, src_offset + sx*src_px_stride)]) : 0; int v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? - convert_int(src[mad24(sy+1, src_step, src_offset + (sx+1))]) : 0; + convert_int(src[mad24(sy+1, src_row_stride, src_offset + (sx+1)*src_px_stride)]) : 0; float taby = 1.f/INTER_TAB_SIZE*ay; float tabx = 1.f/INTER_TAB_SIZE*ax; - int dst_index = mad24(dy, dst_step, dst_offset + dx); + int dst_index = mad24(dy, dst_row_stride, dst_offset + dx); int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ); int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE ); diff --git a/selfdrive/modeld/transforms/transform.h b/selfdrive/modeld/transforms/transform.h index a1629a517e..771a7054b3 100644 --- a/selfdrive/modeld/transforms/transform.h +++ b/selfdrive/modeld/transforms/transform.h @@ -19,7 +19,7 @@ void transform_init(Transform* s, cl_context ctx, cl_device_id device_id); void transform_destroy(Transform* transform); void transform_queue(Transform* s, cl_command_queue q, - cl_mem yuv, int in_width, int in_height, + cl_mem yuv, int in_width, int in_height, int in_stride, int in_uv_offset, cl_mem out_y, cl_mem out_u, cl_mem out_v, int out_width, int out_height, const mat3& projection); diff --git a/selfdrive/test/process_replay/model_replay.py b/selfdrive/test/process_replay/model_replay.py index 20739ae37d..34f35e0ed7 100755 --- a/selfdrive/test/process_replay/model_replay.py +++ b/selfdrive/test/process_replay/model_replay.py @@ -96,7 +96,7 @@ def model_replay(lr, frs): if msg.which() in VIPC_STREAM: msg = msg.as_builder() camera_state = getattr(msg, msg.which()) - img = frs[msg.which()].get(frame_idxs[msg.which()], pix_fmt="yuv420p")[0] + img = frs[msg.which()].get(frame_idxs[msg.which()], pix_fmt="nv12")[0] frame_idxs[msg.which()] += 1 # send camera state and frame diff --git a/selfdrive/test/process_replay/regen.py b/selfdrive/test/process_replay/regen.py index b0bc033fe6..bcc91b3ab6 100755 --- a/selfdrive/test/process_replay/regen.py +++ b/selfdrive/test/process_replay/regen.py @@ -164,7 +164,7 @@ def replay_cameras(lr, frs): print(f"Decompressing frames {s}") frames = [] for i in tqdm(range(fr.frame_count)): - img = fr.get(i, pix_fmt='yuv420p')[0] + img = fr.get(i, pix_fmt='nv12')[0] frames.append(img.flatten().tobytes()) vs.create_buffers(stream, 40, False, size[0], size[1]) diff --git a/selfdrive/test/test_onroad.py b/selfdrive/test/test_onroad.py index 725b0dd1ea..6cf53a046e 100755 --- a/selfdrive/test/test_onroad.py +++ b/selfdrive/test/test_onroad.py @@ -23,11 +23,11 @@ from tools.lib.logreader import LogReader PROCS = { "selfdrive.controls.controlsd": 35.0, "./loggerd": 10.0, - "./encoderd": 37.3, + "./encoderd": 12.5, "./camerad": 16.5, "./locationd": 9.1, "selfdrive.controls.plannerd": 11.7, - "./_ui": 21.0, + "./_ui": 26.4, "selfdrive.locationd.paramsd": 9.0, "./_sensord": 6.17, "selfdrive.controls.radard": 4.5, diff --git a/selfdrive/ui/qt/widgets/cameraview.cc b/selfdrive/ui/qt/widgets/cameraview.cc index a0b3c43cde..eade9cbe30 100644 --- a/selfdrive/ui/qt/widgets/cameraview.cc +++ b/selfdrive/ui/qt/widgets/cameraview.cc @@ -34,17 +34,15 @@ const char frame_fragment_shader[] = "precision mediump float;\n" #endif "uniform sampler2D uTextureY;\n" - "uniform sampler2D uTextureU;\n" - "uniform sampler2D uTextureV;\n" + "uniform sampler2D uTextureUV;\n" "in vec2 vTexCoord;\n" "out vec4 colorOut;\n" "void main() {\n" " float y = texture(uTextureY, vTexCoord).r;\n" - " float u = texture(uTextureU, vTexCoord).r - 0.5;\n" - " float v = texture(uTextureV, vTexCoord).r - 0.5;\n" - " float r = y + 1.402 * v;\n" - " float g = y - 0.344 * u - 0.714 * v;\n" - " float b = y + 1.772 * u;\n" + " vec2 uv = texture(uTextureUV, vTexCoord).rg - 0.5;\n" + " float r = y + 1.402 * uv.y;\n" + " float g = y - 0.344 * uv.x - 0.714 * uv.y;\n" + " float b = y + 1.772 * uv.x;\n" " colorOut = vec4(r, g, b, 1.0);\n" "}\n"; @@ -158,8 +156,7 @@ void CameraViewWidget::initializeGL() { glGenTextures(3, textures); glUseProgram(program->programId()); glUniform1i(program->uniformLocation("uTextureY"), 0); - glUniform1i(program->uniformLocation("uTextureU"), 1); - glUniform1i(program->uniformLocation("uTextureV"), 2); + glUniform1i(program->uniformLocation("uTextureUV"), 1); } void CameraViewWidget::showEvent(QShowEvent *event) { @@ -223,19 +220,23 @@ void CameraViewWidget::paintGL() { VisionBuf *frame = frames[frame_idx].second; glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ROW_LENGTH, stream_stride); glViewport(0, 0, width(), height()); glBindVertexArray(frame_vao); glUseProgram(program->programId()); - uint8_t *address[3] = {frame->y, frame->u, frame->v}; - for (int i = 0; i < 3; ++i) { - glActiveTexture(GL_TEXTURE0 + i); - glBindTexture(GL_TEXTURE_2D, textures[i]); - int width = i == 0 ? stream_width : stream_width / 2; - int height = i == 0 ? stream_height : stream_height / 2; - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RED, GL_UNSIGNED_BYTE, address[i]); - assert(glGetError() == GL_NO_ERROR); - } + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, textures[0]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, stream_width, stream_height, GL_RED, GL_UNSIGNED_BYTE, frame->y); + assert(glGetError() == GL_NO_ERROR); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, stream_stride/2); + + glActiveTexture(GL_TEXTURE0 + 1); + glBindTexture(GL_TEXTURE_2D, textures[1]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, stream_width/2, stream_height/2, GL_RG, GL_UNSIGNED_BYTE, frame->uv); + assert(glGetError() == GL_NO_ERROR); glUniformMatrix4fv(program->uniformLocation("uTransform"), 1, GL_TRUE, frame_mat.v); assert(glGetError() == GL_NO_ERROR); @@ -246,6 +247,7 @@ void CameraViewWidget::paintGL() { glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0); glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } void CameraViewWidget::vipcConnected(VisionIpcClient *vipc_client) { @@ -253,18 +255,23 @@ void CameraViewWidget::vipcConnected(VisionIpcClient *vipc_client) { frames.clear(); stream_width = vipc_client->buffers[0].width; stream_height = vipc_client->buffers[0].height; + stream_stride = vipc_client->buffers[0].stride; + + glBindTexture(GL_TEXTURE_2D, textures[0]); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, stream_width, stream_height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr); + assert(glGetError() == GL_NO_ERROR); - for (int i = 0; i < 3; ++i) { - glBindTexture(GL_TEXTURE_2D, textures[i]); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - int width = i == 0 ? stream_width : stream_width / 2; - int height = i == 0 ? stream_height : stream_height / 2; - glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr); - assert(glGetError() == GL_NO_ERROR); - } + glBindTexture(GL_TEXTURE_2D, textures[1]); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, stream_width/2, stream_height/2, 0, GL_RG, GL_UNSIGNED_BYTE, nullptr); + assert(glGetError() == GL_NO_ERROR); updateFrameMat(width(), height()); } diff --git a/selfdrive/ui/qt/widgets/cameraview.h b/selfdrive/ui/qt/widgets/cameraview.h index 65d5edc221..953cbed00b 100644 --- a/selfdrive/ui/qt/widgets/cameraview.h +++ b/selfdrive/ui/qt/widgets/cameraview.h @@ -49,6 +49,7 @@ protected: std::string stream_name; int stream_width = 0; int stream_height = 0; + int stream_stride = 0; std::atomic stream_type; QThread *vipc_thread = nullptr; diff --git a/selfdrive/ui/replay/framereader.cc b/selfdrive/ui/replay/framereader.cc index ca839e6819..8453407ce7 100644 --- a/selfdrive/ui/replay/framereader.cc +++ b/selfdrive/ui/replay/framereader.cc @@ -229,18 +229,21 @@ AVFrame *FrameReader::decodeFrame(AVPacket *pkt) { bool FrameReader::copyBuffers(AVFrame *f, uint8_t *yuv) { if (hw_pix_fmt == HW_PIX_FMT) { uint8_t *y = yuv ? yuv : nv12toyuv_buffer.data(); - uint8_t *u = y + width * height; - uint8_t *v = u + (width / 2) * (height / 2); - libyuv::NV12ToI420(f->data[0], f->linesize[0], f->data[1], f->linesize[1], - y, width, u, width / 2, v, width / 2, width, height); + uint8_t *uv = y + width * height; + for (int i = 0; i < height/2; i++) { + memcpy(y + (i*2 + 0)*width, f->data[0] + (i*2 + 0)*f->linesize[0], width); + memcpy(y + (i*2 + 1)*width, f->data[0] + (i*2 + 1)*f->linesize[0], width); + memcpy(uv + i*width, f->data[1] + i*f->linesize[1], width); + } } else { - uint8_t *u = yuv + width * height; - uint8_t *v = u + (width / 2) * (height / 2); - libyuv::I420Copy(f->data[0], f->linesize[0], - f->data[1], f->linesize[1], - f->data[2], f->linesize[2], - yuv, width, u, width / 2, v, width / 2, - width, height); + uint8_t *y = yuv ? yuv : nv12toyuv_buffer.data(); + uint8_t *uv = y + width * height; + libyuv::I420ToNV12(f->data[0], f->linesize[0], + f->data[1], f->linesize[1], + f->data[2], f->linesize[2], + y, width, + uv, width / 2, + width, height); } return true; } diff --git a/tools/lib/framereader.py b/tools/lib/framereader.py index e333683b59..d9920ab128 100644 --- a/tools/lib/framereader.py +++ b/tools/lib/framereader.py @@ -185,20 +185,28 @@ def read_file_check_size(f, sz, cookie): return buff -def rgb24toyuv420(rgb): +def rgb24toyuv(rgb): yuv_from_rgb = np.array([[ 0.299 , 0.587 , 0.114 ], [-0.14714119, -0.28886916, 0.43601035 ], [ 0.61497538, -0.51496512, -0.10001026 ]]) img = np.dot(rgb.reshape(-1, 3), yuv_from_rgb.T).reshape(rgb.shape) - y_len = img.shape[0] * img.shape[1] - uv_len = y_len // 4 + ys = img[:, :, 0] us = (img[::2, ::2, 1] + img[1::2, ::2, 1] + img[::2, 1::2, 1] + img[1::2, 1::2, 1]) / 4 + 128 vs = (img[::2, ::2, 2] + img[1::2, ::2, 2] + img[::2, 1::2, 2] + img[1::2, 1::2, 2]) / 4 + 128 - yuv420 = np.empty(y_len + 2 * uv_len, dtype=img.dtype) + return ys, us, vs + + +def rgb24toyuv420(rgb): + ys, us, vs = rgb24toyuv(rgb) + + y_len = rgb.shape[0] * rgb.shape[1] + uv_len = y_len // 4 + + yuv420 = np.empty(y_len + 2 * uv_len, dtype=rgb.dtype) yuv420[:y_len] = ys.reshape(-1) yuv420[y_len:y_len + uv_len] = us.reshape(-1) yuv420[y_len + uv_len:y_len + 2 * uv_len] = vs.reshape(-1) @@ -206,6 +214,20 @@ def rgb24toyuv420(rgb): return yuv420.clip(0, 255).astype('uint8') +def rgb24tonv12(rgb): + ys, us, vs = rgb24toyuv(rgb) + + y_len = rgb.shape[0] * rgb.shape[1] + uv_len = y_len // 4 + + nv12 = np.empty(y_len + 2 * uv_len, dtype=rgb.dtype) + nv12[:y_len] = ys.reshape(-1) + nv12[y_len::2] = us.reshape(-1) + nv12[y_len+1::2] = vs.reshape(-1) + + return nv12.clip(0, 255).astype('uint8') + + def decompress_video_data(rawdat, vid_fmt, w, h, pix_fmt): # using a tempfile is much faster than proc.communicate for some reason @@ -237,6 +259,8 @@ def decompress_video_data(rawdat, vid_fmt, w, h, pix_fmt): if pix_fmt == "rgb24": ret = np.frombuffer(dat, dtype=np.uint8).reshape(-1, h, w, 3) + elif pix_fmt == "nv12": + ret = np.frombuffer(dat, dtype=np.uint8).reshape(-1, (h*w*3//2)) elif pix_fmt == "yuv420p": ret = np.frombuffer(dat, dtype=np.uint8).reshape(-1, (h*w*3//2)) elif pix_fmt == "yuv444p": @@ -304,7 +328,7 @@ class RawFrameReader(BaseFrameReader): assert self.frame_count is not None assert num+count <= self.frame_count - if pix_fmt not in ("yuv420p", "rgb24"): + if pix_fmt not in ("nv12", "yuv420p", "rgb24"): raise ValueError(f"Unsupported pixel format {pix_fmt!r}") app = [] @@ -313,6 +337,8 @@ class RawFrameReader(BaseFrameReader): rgb_dat = self.load_and_debayer(dat) if pix_fmt == "rgb24": app.append(rgb_dat) + elif pix_fmt == "nv12": + app.append(rgb24tonv12(rgb_dat)) elif pix_fmt == "yuv420p": app.append(rgb24toyuv420(rgb_dat)) else: @@ -329,7 +355,7 @@ class VideoStreamDecompressor: self.h = h self.pix_fmt = pix_fmt - if pix_fmt == "yuv420p": + if pix_fmt in ("nv12", "yuv420p"): self.out_size = w*h*3//2 # yuv420p elif pix_fmt in ("rgb24", "yuv444p"): self.out_size = w*h*3 @@ -387,6 +413,8 @@ class VideoStreamDecompressor: ret = np.frombuffer(dat, dtype=np.uint8).reshape((self.h, self.w, 3)) elif self.pix_fmt == "yuv420p": ret = np.frombuffer(dat, dtype=np.uint8) + elif self.pix_fmt == "nv12": + ret = np.frombuffer(dat, dtype=np.uint8) elif self.pix_fmt == "yuv444p": ret = np.frombuffer(dat, dtype=np.uint8).reshape((3, self.h, self.w)) else: @@ -549,7 +577,7 @@ class GOPFrameReader(BaseFrameReader): if num + count > self.frame_count: raise ValueError(f"{num + count} > {self.frame_count}") - if pix_fmt not in ("yuv420p", "rgb24", "yuv444p"): + if pix_fmt not in ("nv12", "yuv420p", "rgb24", "yuv444p"): raise ValueError(f"Unsupported pixel format {pix_fmt!r}") ret = [self._get_one(num + i, pix_fmt) for i in range(count)]