framereader: replace swscale with libyuv, reduce cpu usage by half (#22992)

* use libyuv

* cleanup
old-commit-hash: 5ae5174509
commatwo_master
Dean Lee 4 years ago committed by GitHub
parent 89fcaad057
commit e308bc0a3f
  1. 2
      selfdrive/camerad/SConscript
  2. 2
      selfdrive/ui/SConscript
  3. 67
      selfdrive/ui/replay/framereader.cc
  4. 7
      selfdrive/ui/replay/framereader.h

@ -21,7 +21,7 @@ else:
if USE_FRAME_STREAM:
cameras = ['cameras/camera_frame_stream.cc']
else:
libs += ['avutil', 'avcodec', 'avformat', 'swscale', 'bz2', 'ssl', 'curl', 'crypto']
libs += ['avutil', 'avcodec', 'avformat', 'bz2', 'ssl', 'curl', 'crypto']
# TODO: import replay_lib from root SConstruct
cameras = ['cameras/camera_replay.cc',
env.Object('camera-util', '#/selfdrive/ui/replay/util.cc'),

@ -116,7 +116,7 @@ if arch in ['x86_64', 'Darwin'] or GetOption('extras'):
replay_lib_src = ["replay/replay.cc", "replay/camera.cc", "replay/filereader.cc", "replay/logreader.cc", "replay/framereader.cc", "replay/route.cc", "replay/util.cc"]
replay_lib = qt_env.Library("qt_replay", replay_lib_src, LIBS=base_libs)
replay_libs = [replay_lib, 'avutil', 'avcodec', 'avformat', 'bz2', 'curl', 'swscale', 'yuv'] + qt_libs
replay_libs = [replay_lib, 'avutil', 'avcodec', 'avformat', 'bz2', 'curl', 'yuv'] + qt_libs
qt_env.Program("replay/replay", ["replay/main.cc"], LIBS=replay_libs)
qt_env.Program("watch3", ["watch3.cc"], LIBS=qt_libs + ['common', 'json11'])

@ -34,8 +34,6 @@ enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *
} // namespace
FrameReader::FrameReader(bool local_cache, int chunk_size, int retries) : FileReader(local_cache, chunk_size, retries) {
input_ctx = avformat_alloc_context();
sws_frame.reset(av_frame_alloc());
}
FrameReader::~FrameReader() {
@ -47,9 +45,6 @@ FrameReader::~FrameReader() {
if (input_ctx) avformat_close_input(&input_ctx);
if (hw_device_ctx) av_buffer_unref(&hw_device_ctx);
if (rgb_sws_ctx_) sws_freeContext(rgb_sws_ctx_);
if (yuv_sws_ctx_) sws_freeContext(yuv_sws_ctx_);
if (avio_ctx_) {
av_freep(&avio_ctx_->buffer);
avio_context_free(&avio_ctx_);
@ -60,6 +55,9 @@ bool FrameReader::load(const std::string &url, bool no_cuda, std::atomic<bool> *
std::string content = read(url, abort);
if (content.empty()) return false;
input_ctx = avformat_alloc_context();
if (!input_ctx) return false;
struct buffer_data bd = {
.data = (uint8_t *)content.data(),
.offset = 0,
@ -99,18 +97,11 @@ bool FrameReader::load(const std::string &url, bool no_cuda, std::atomic<bool> *
if (!no_cuda) {
if (!initHardwareDecoder(AV_HWDEVICE_TYPE_CUDA)) {
printf("No CUDA capable device was found. fallback to CPU decoding.\n");
} else {
nv12toyuv_buffer.resize(getYUVSize());
}
}
rgb_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
width, height, AV_PIX_FMT_BGR24,
SWS_BILINEAR, NULL, NULL, NULL);
if (!rgb_sws_ctx_) return false;
yuv_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
width, height, AV_PIX_FMT_YUV420P,
SWS_BILINEAR, NULL, NULL, NULL);
if (!yuv_sws_ctx_) return false;
ret = avcodec_open2(decoder_ctx, decoder, NULL);
if (ret < 0) return false;
@ -149,17 +140,6 @@ bool FrameReader::initHardwareDecoder(AVHWDeviceType hw_device_type) {
return false;
}
// get sws source format
AVHWFramesConstraints *hw_frames_const = av_hwdevice_get_hwframe_constraints(hw_device_ctx, nullptr);
assert(hw_frames_const != 0);
for (AVPixelFormat *p = hw_frames_const->valid_sw_formats; *p != AV_PIX_FMT_NONE; p++) {
if (sws_isSupportedInput(*p)) {
sws_src_format = *p;
break;
}
}
av_hwframe_constraints_free(&hw_frames_const);
decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
decoder_ctx->opaque = &hw_pix_fmt;
decoder_ctx->get_format = get_hw_format;
@ -228,27 +208,26 @@ AVFrame *FrameReader::decodeFrame(AVPacket *pkt) {
}
bool FrameReader::copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv) {
if (yuv) {
if (sws_src_format == AV_PIX_FMT_NV12) {
// libswscale crash if height is not 16 bytes aligned for NV12->YUV420 conversion
assert(sws_src_format == AV_PIX_FMT_NV12);
if (hw_pix_fmt == AV_PIX_FMT_CUDA) {
uint8_t *y = yuv ? yuv : nv12toyuv_buffer.data();
uint8_t *u = y + width * height;
uint8_t *v = u + (width / 2) * (height / 2);
libyuv::NV12ToI420(f->data[0], f->linesize[0], f->data[1], f->linesize[1],
y, width, u, width / 2, v, width / 2, width, height);
libyuv::I420ToRGB24(y, width, u, width / 2, v, width / 2,
rgb, width * 3, width, height);
} else {
if (yuv) {
uint8_t *u = yuv + width * height;
uint8_t *v = u + (width / 2) * (height / 2);
libyuv::NV12ToI420(f->data[0], f->linesize[0],
f->data[1], f->linesize[1],
yuv, width,
u, width / 2,
v, width / 2,
width, height);
} else {
av_image_fill_arrays(sws_frame->data, sws_frame->linesize, yuv, AV_PIX_FMT_YUV420P, width, height, 1);
int ret = sws_scale(yuv_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, sws_frame->data, sws_frame->linesize);
if (ret < 0) return false;
memcpy(yuv, f->data[0], width * height);
memcpy(u, f->data[1], width / 2 * height / 2);
memcpy(v, f->data[2], width / 2 * height / 2);
}
libyuv::I420ToRGB24(f->data[0], f->linesize[0],
f->data[1], f->linesize[1],
f->data[2], f->linesize[2],
rgb, width * 3, width, height);
}
// images is going to be written to output buffers, no alignment (align = 1)
av_image_fill_arrays(sws_frame->data, sws_frame->linesize, rgb, AV_PIX_FMT_BGR24, width, height, 1);
int ret = sws_scale(rgb_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, sws_frame->data, sws_frame->linesize);
return ret >= 0;
return true;
}

@ -9,8 +9,6 @@
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
struct AVFrameDeleter {
@ -42,9 +40,7 @@ private:
bool failed = false;
};
std::vector<Frame> frames_;
AVPixelFormat sws_src_format = AV_PIX_FMT_YUV420P;
SwsContext *rgb_sws_ctx_ = nullptr, *yuv_sws_ctx_ = nullptr;
std::unique_ptr<AVFrame, AVFrameDeleter>av_frame_, sws_frame, hw_frame;
std::unique_ptr<AVFrame, AVFrameDeleter>av_frame_, hw_frame;
AVFormatContext *input_ctx = nullptr;
AVCodecContext *decoder_ctx = nullptr;
int key_frames_count_ = 0;
@ -53,4 +49,5 @@ private:
AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE;
AVBufferRef *hw_device_ctx = nullptr;
std::vector<uint8_t> nv12toyuv_buffer;
};

Loading…
Cancel
Save