#pragma clang diagnostic ignored "-Wdeprecated-declarations" #include #include "system/loggerd/video_writer.h" #include "common/swaglog.h" #include "common/util.h" VideoWriter::VideoWriter(const char *path, const char *filename, bool remuxing, int width, int height, int fps, cereal::EncodeIndex::Type codec) : remuxing(remuxing) { vid_path = util::string_format("%s/%s", path, filename); lock_path = util::string_format("%s/%s.lock", path, filename); int lock_fd = HANDLE_EINTR(open(lock_path.c_str(), O_RDWR | O_CREAT, 0664)); assert(lock_fd >= 0); close(lock_fd); LOGD("encoder_open %s remuxing:%d", this->vid_path.c_str(), this->remuxing); if (this->remuxing) { bool raw = (codec == cereal::EncodeIndex::Type::BIG_BOX_LOSSLESS); avformat_alloc_output_context2(&this->ofmt_ctx, NULL, raw ? "matroska" : NULL, this->vid_path.c_str()); assert(this->ofmt_ctx); // set codec correctly. needed? assert(codec != cereal::EncodeIndex::Type::FULL_H_E_V_C); const AVCodec *avcodec = avcodec_find_encoder(raw ? AV_CODEC_ID_FFVHUFF : AV_CODEC_ID_H264); assert(avcodec); this->codec_ctx = avcodec_alloc_context3(avcodec); assert(this->codec_ctx); this->codec_ctx->width = width; this->codec_ctx->height = height; this->codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P; this->codec_ctx->time_base = (AVRational){ 1, fps }; if (codec == cereal::EncodeIndex::Type::BIG_BOX_LOSSLESS) { // without this, there's just noise int err = avcodec_open2(this->codec_ctx, avcodec, NULL); assert(err >= 0); } this->out_stream = avformat_new_stream(this->ofmt_ctx, raw ? avcodec : NULL); assert(this->out_stream); int err = avio_open(&this->ofmt_ctx->pb, this->vid_path.c_str(), AVIO_FLAG_WRITE); assert(err >= 0); } else { this->of = util::safe_fopen(this->vid_path.c_str(), "wb"); assert(this->of); } } void VideoWriter::initialize_audio(int sample_rate) { assert(this->ofmt_ctx->oformat->audio_codec != AV_CODEC_ID_NONE); // check output format supports audio streams const AVCodec *audio_avcodec = avcodec_find_encoder(AV_CODEC_ID_AAC); assert(audio_avcodec); this->audio_codec_ctx = avcodec_alloc_context3(audio_avcodec); assert(this->audio_codec_ctx); this->audio_codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP; this->audio_codec_ctx->sample_rate = sample_rate; #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1+ av_channel_layout_default(&this->audio_codec_ctx->ch_layout, 1); #else this->audio_codec_ctx->channel_layout = AV_CH_LAYOUT_MONO; #endif this->audio_codec_ctx->bit_rate = 32000; this->audio_codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; this->audio_codec_ctx->time_base = (AVRational){1, audio_codec_ctx->sample_rate}; int err = avcodec_open2(this->audio_codec_ctx, audio_avcodec, NULL); assert(err >= 0); av_log_set_level(AV_LOG_WARNING); // hide "QAvg" info msgs at the end of every segment this->audio_stream = avformat_new_stream(this->ofmt_ctx, NULL); assert(this->audio_stream); err = avcodec_parameters_from_context(this->audio_stream->codecpar, this->audio_codec_ctx); assert(err >= 0); this->audio_frame = av_frame_alloc(); assert(this->audio_frame); this->audio_frame->format = this->audio_codec_ctx->sample_fmt; #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1+ av_channel_layout_copy(&this->audio_frame->ch_layout, &this->audio_codec_ctx->ch_layout); #else this->audio_frame->channel_layout = this->audio_codec_ctx->channel_layout; #endif this->audio_frame->sample_rate = this->audio_codec_ctx->sample_rate; this->audio_frame->nb_samples = this->audio_codec_ctx->frame_size; err = av_frame_get_buffer(this->audio_frame, 0); assert(err >= 0); } void VideoWriter::write(uint8_t *data, int len, long long timestamp, bool codecconfig, bool keyframe) { if (of && data) { size_t written = util::safe_fwrite(data, 1, len, of); if (written != len) { LOGE("failed to write file.errno=%d", errno); } } if (remuxing) { if (codecconfig) { if (len > 0) { codec_ctx->extradata = (uint8_t*)av_mallocz(len + AV_INPUT_BUFFER_PADDING_SIZE); codec_ctx->extradata_size = len; memcpy(codec_ctx->extradata, data, len); } int err = avcodec_parameters_from_context(out_stream->codecpar, codec_ctx); assert(err >= 0); // if there is an audio stream, it must be initialized before this point err = avformat_write_header(ofmt_ctx, NULL); assert(err >= 0); header_written = true; } else { // input timestamps are in microseconds AVRational in_timebase = {1, 1000000}; AVPacket pkt; av_init_packet(&pkt); pkt.data = data; pkt.size = len; pkt.stream_index = this->out_stream->index; enum AVRounding rnd = static_cast(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); pkt.pts = pkt.dts = av_rescale_q_rnd(timestamp, in_timebase, ofmt_ctx->streams[0]->time_base, rnd); pkt.duration = av_rescale_q(50*1000, in_timebase, ofmt_ctx->streams[0]->time_base); if (keyframe) { pkt.flags |= AV_PKT_FLAG_KEY; } // TODO: can use av_write_frame for non raw? int err = av_interleaved_write_frame(ofmt_ctx, &pkt); if (err < 0) { LOGW("ts encoder write issue len: %d ts: %lld", len, timestamp); } av_packet_unref(&pkt); } } } void VideoWriter::write_audio(uint8_t *data, int len, long long timestamp, int sample_rate) { if (!remuxing) return; if (!audio_initialized) { initialize_audio(sample_rate); audio_initialized = true; } if (!audio_codec_ctx) return; // sync logMonoTime of first audio packet with the timestampEof of first video packet if (audio_pts == 0) { audio_pts = (timestamp * audio_codec_ctx->sample_rate) / 1000000ULL; } // convert s16le samples to fltp and add to buffer const int16_t *raw_samples = reinterpret_cast(data); int sample_count = len / sizeof(int16_t); constexpr float normalizer = 1.0f / 32768.0f; const size_t max_buffer_size = sample_rate * 10; // 10 seconds if (audio_buffer.size() + sample_count > max_buffer_size) { size_t samples_to_drop = (audio_buffer.size() + sample_count) - max_buffer_size; LOGE("Audio buffer overflow, dropping %zu oldest samples", samples_to_drop); audio_buffer.erase(audio_buffer.begin(), audio_buffer.begin() + samples_to_drop); audio_pts += samples_to_drop; } // Add new samples to the buffer const size_t original_size = audio_buffer.size(); audio_buffer.resize(original_size + sample_count); std::transform(raw_samples, raw_samples + sample_count, audio_buffer.begin() + original_size, [](int16_t sample) { return sample * normalizer; }); if (!header_written) return; // header not written yet, process audio frame after header is written while (audio_buffer.size() >= audio_codec_ctx->frame_size) { audio_frame->pts = audio_pts; float *f_samples = reinterpret_cast(audio_frame->data[0]); std::copy(audio_buffer.begin(), audio_buffer.begin() + audio_codec_ctx->frame_size, f_samples); audio_buffer.erase(audio_buffer.begin(), audio_buffer.begin() + audio_codec_ctx->frame_size); encode_and_write_audio_frame(audio_frame); } } void VideoWriter::encode_and_write_audio_frame(AVFrame* frame) { if (!remuxing || !audio_codec_ctx) return; int send_result = avcodec_send_frame(audio_codec_ctx, frame); // encode frame if (send_result >= 0) { AVPacket *pkt = av_packet_alloc(); while (avcodec_receive_packet(audio_codec_ctx, pkt) == 0) { av_packet_rescale_ts(pkt, audio_codec_ctx->time_base, audio_stream->time_base); pkt->stream_index = audio_stream->index; int err = av_interleaved_write_frame(ofmt_ctx, pkt); // write encoded frame if (err < 0) { LOGW("AUDIO: Write frame failed - error: %d", err); } av_packet_unref(pkt); } av_packet_free(&pkt); } else { LOGW("AUDIO: Failed to send audio frame to encoder: %d", send_result); } audio_pts += audio_codec_ctx->frame_size; } VideoWriter::~VideoWriter() { if (this->remuxing) { if (this->audio_codec_ctx) { encode_and_write_audio_frame(NULL); // flush encoder avcodec_free_context(&this->audio_codec_ctx); } int err = av_write_trailer(this->ofmt_ctx); if (err != 0) LOGE("av_write_trailer failed %d", err); avcodec_free_context(&this->codec_ctx); if (this->audio_frame) av_frame_free(&this->audio_frame); err = avio_closep(&this->ofmt_ctx->pb); if (err != 0) LOGE("avio_closep failed %d", err); avformat_free_context(this->ofmt_ctx); } else { util::safe_fflush(this->of); fclose(this->of); this->of = nullptr; } unlink(this->lock_path.c_str()); }