ffmpeg 入门（八、音频转码）

在上篇文章中，我们对一个视频进行了解码并重新编码，但是在处理音频时，我们使用了原视频的音频参数，因为对于不同的编码器，需要的音频帧大小不同，而 swr_convert 不会对音频帧的大小进行处理，本文增加了对于音频帧大小变化的情况的处理。代码中分成了两种情况进行处理，如果是支持可变帧大小的编码器，直接把帧发送给编码器，如果是固定帧大小的，则写入 fifo 缓冲区，再从 fifo 读取帧。

梵尔纳多

419人浏览 · 2025-09-02 19:50:47

梵尔纳多 · 2025-09-02 19:50:47 发布

在上篇文章中，我们对一个视频进行了解码并重新编码，但是在处理音频时，我们使用了原视频的音频参数，因为对于不同的编码器，需要的音频帧大小不同，而 swr_convert 不会对音频帧的大小进行处理，本文增加了对于音频帧大小变化的情况的处理。

首先创建一个 AVAudioFifo 对象
AVAudioFifo *av_audio_fifo_alloc(enum AVSampleFormat sample_fmt, int channels,
int nb_samples);
nb_samples（初始分配的样本数）指定FIFO缓冲区初始分配的样本数量（每个声道），FIFO会在需要时自动扩展。如果预先知道需要处理的最大样本量，可以设置一个较大的初始值以避免多次重分配内存。
把重采样后的帧传入 fifo
int av_audio_fifo_write(AVAudioFifo *af, void * const *data, int nb_samples);
判断 fifo 内的数据是否大于所需帧大小，如果是，则读出数据，并进行编码
int av_audio_fifo_size(AVAudioFifo *af) // 获取帧大小
int av_audio_fifo_read(AVAudioFifo *af, void * const *data, int nb_samples); // 从 fifo 读取数据

        if (m_audioCodecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) {
            ret = avcodec_send_frame(m_audioCodecContext, m_audioFrame);
            if (ret < 0) {
                showError(ret);
                return false;
            }

            while (true) {
                int ret = avcodec_receive_packet(m_audioCodecContext, m_packet);
                if(ret < 0) {
                    showError(ret);
                    break;
                }
                // 将数据包中的有效时间字段（时间戳/持续时间）从一个时基转换为 输出流的时间
                av_packet_rescale_ts(m_packet, m_videoCodecContext->time_base, m_audioStream->time_base);
                m_packet->stream_index = m_audioStream->index;
                av_interleaved_write_frame(m_formatContext, m_packet);   // 将数据包写入输出媒体文件
                av_packet_unref(m_packet);
            }
        } else {
            av_audio_fifo_write(m_fifo, (void**)m_audioFrame->data, m_audioFrame->nb_samples);
            while (av_audio_fifo_size(m_fifo) >= m_audioCodecContext->frame_size) {
                AVFrame* enc_frame = av_frame_alloc();
                enc_frame->nb_samples = m_audioCodecContext->frame_size;
                enc_frame->format = m_audioCodecContext->sample_fmt;
                enc_frame->ch_layout = m_audioCodecContext->ch_layout;
                enc_frame->sample_rate = m_audioCodecContext->sample_rate;

                av_frame_get_buffer(enc_frame, 0);
                av_audio_fifo_read(m_fifo, (void**)enc_frame->data, m_audioCodecContext->frame_size);

                // 送编码器处理
                avcodec_send_frame(m_audioCodecContext, enc_frame);
                av_frame_free(&enc_frame);

                while (true) {
                    int ret = avcodec_receive_packet(m_audioCodecContext, m_packet);
                    if(ret < 0) {
                        showError(ret);
                        break;
                    }
                    av_packet_rescale_ts(m_packet, m_audioCodecContext->time_base, m_audioStream->time_base);
                    m_packet->stream_index = m_audioStream->index;
                    av_interleaved_write_frame(m_formatContext, m_packet);
                    av_packet_unref(m_packet);
                }
            }
        }

代码中分成了两种情况进行处理，如果是支持可变帧大小的编码器，直接把帧发送给编码器，如果是固定帧大小的，则写入 fifo 缓冲区，再从 fifo 读取帧。
完整代码如下：
解码器头文件

#ifndef VIDEODECODER_H
#define VIDEODECODER_H
#include <string>
#include <memory>
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavutil/channel_layout.h"
#include "libswresample/swresample.h"
#include "libavutil/opt.h"
}

using namespace std;
struct MFrame {
    AVFrame* frame_ptr;
    AVMediaType type = AVMEDIA_TYPE_UNKNOWN;
};

struct VideoInfo {
    int videoBitRate = -1;
    int width = -1;
    int height = -1;
    int fps = -1;                   // 帧率
    int gopSize;                    // 关键帧间隔
    int maxBFrames;                 // 最大 B 帧数

    int audioBitRate;
    int sampleRate = -1;
    int sampleFmt;                  // enum AVSampleFormat
    int channels;
};

class VideoDecoder
{
public:
    VideoDecoder();
    bool open(const string& filePath);
    VideoInfo getVideoInfo() { return m_videoInfo;}
    void close();
    bool decode();
    MFrame read();

private:
    void showError(int ret);

private:
    AVFormatContext* m_formatContext = nullptr;
    int m_videoIndex = -1;                              // 视频流所在索引
    int m_audioIndex = -1;                              // 音频流所在索引

    AVCodecContext* m_videoCodecContext = nullptr;      // 视频解码器实例
    AVCodecContext* m_audioCodecContext = nullptr;      // 音频解码器实例

    AVPacket* m_packet = nullptr;                       // 数据包
    AVFrame* m_frame = nullptr;                         // 数据帧
    VideoInfo m_videoInfo;

    bool m_nextPacket = true;
};

#endif // VIDEODECODER_H



#endif // VIDEOENCODER_H

解码器源文件

#include "VideoDecoder.h"
#include <iostream>
VideoDecoder::VideoDecoder() {}

bool VideoDecoder::open(const string &filePath)
{
    int ret = avformat_open_input(&m_formatContext, filePath.c_str(), NULL, NULL);
    if (ret < 0) {
        showError(ret);
        close();
        return false;
    }

    // 查找流信息
    ret = avformat_find_stream_info(m_formatContext, NULL);
    if (ret < 0) {
        close();
        return false;
    }

    // 查找视频流
    m_videoIndex = av_find_best_stream(m_formatContext, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
    if (m_videoIndex == -1) {
        close();
        return false;
    }

    // 查找视频解码器
    AVStream *videoStream = m_formatContext->streams[m_videoIndex];
    const AVCodec *videoCodec = avcodec_find_decoder(videoStream->codecpar->codec_id);
    if (videoCodec == nullptr) {
        close();
        return false;
    }

    // 创建视频解码器
    m_videoCodecContext = avcodec_alloc_context3(videoCodec);
    if (m_videoCodecContext == nullptr) {
        close();
        return false;
    }
    // 把视频流中的编解码参数复制给解码器的实例
    avcodec_parameters_to_context(m_videoCodecContext, videoStream->codecpar);
    // 打开视频解码器实例
    ret = avcodec_open2(m_videoCodecContext, NULL, NULL);
    if (ret < 0) {
        close();
        return ret;
    }

    m_videoInfo.videoBitRate = m_videoCodecContext->bit_rate;
    m_videoInfo.width = m_videoCodecContext->width;
    m_videoInfo.height = m_videoCodecContext->height;
    m_videoInfo.fps = m_videoCodecContext->framerate.num / m_videoCodecContext->framerate.den ;
    m_videoInfo.gopSize = m_videoCodecContext->gop_size;
    m_videoInfo.maxBFrames = m_videoCodecContext->max_b_frames;

    // 打开音频解码器实例
    m_audioIndex = av_find_best_stream(m_formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    if (m_audioIndex >= 0) {
        AVStream *audioStream = m_formatContext->streams[m_audioIndex];
        const AVCodec *audioCodec = avcodec_find_decoder(audioStream->codecpar->codec_id);
        if (audioCodec == nullptr) {
            close();
            return false;
        }
        m_audioCodecContext = avcodec_alloc_context3(audioCodec);
        if (m_audioCodecContext == nullptr) {
            close();
            return false;
        }
        avcodec_parameters_to_context(m_audioCodecContext, audioStream->codecpar);
        ret = avcodec_open2(m_audioCodecContext, audioCodec, NULL);
        if (ret < 0) {
            close();
            return false;
        }

        m_videoInfo.sampleRate = m_audioCodecContext->sample_rate;
        m_videoInfo.sampleFmt = m_audioCodecContext->sample_fmt;
        m_videoInfo.audioBitRate = m_audioCodecContext->bit_rate;
        m_videoInfo.channels = m_audioCodecContext->ch_layout.nb_channels;
    }

    m_packet = av_packet_alloc();
    m_frame  = av_frame_alloc();

    return true;
}

void VideoDecoder::close() {
    if (m_formatContext != nullptr) {
        avformat_close_input(&m_formatContext);
    }

    if (m_videoCodecContext != nullptr) {
        avcodec_free_context(&m_videoCodecContext);
    }

    if (m_audioCodecContext != nullptr) {
        avcodec_free_context(&m_audioCodecContext);
    }

    if (m_packet != nullptr) {
        av_packet_free(&m_packet);
    }

    if (m_frame != nullptr) {
        av_frame_free(&m_frame);
    }
}

bool VideoDecoder::decode()
{
    while (av_read_frame(m_formatContext, m_packet) >= 0) { // 轮询数据包
        if (m_packet->stream_index == m_videoIndex) {
            int ret = avcodec_send_packet(m_videoCodecContext, m_packet);
            while ( ret >= 0) {
                av_frame_unref(m_frame);
                ret = avcodec_receive_frame(m_videoCodecContext, m_frame);
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                    break;
                } else if (ret < 0) {
                    return false;
                }
            }
        } else if (m_packet->stream_index == m_audioIndex) {
            int ret = avcodec_send_packet(m_audioCodecContext, m_packet);
            while ( ret >= 0) {
                av_frame_unref(m_frame);
                ret = avcodec_receive_frame(m_audioCodecContext, m_frame);
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                    break;
                } else if (ret < 0) {
                    return false;
                }
            }
        }
        // 清空 packet 内容
        av_packet_unref(m_packet);
    }
    return true;
}

MFrame VideoDecoder::read()
{
    MFrame outFrame;
    // 这里用 while 是因为在解码某个视频时，avcodec_send_packet 失败了，但是视频是能正常播放的，且跳过这个数据包后编解码都是正常的
    // 故错误后没有返回空值，而是继续读下一个数据包
    int ret = - 1;
    while (m_nextPacket && ret < 0) {
        av_packet_unref(m_packet);
        ret = av_read_frame(m_formatContext, m_packet);
        if (ret < 0) {
            return MFrame();
        }
        if (m_packet->stream_index == m_videoIndex) {
            ret = avcodec_send_packet(m_videoCodecContext, m_packet);
        } else {
            ret = avcodec_send_packet(m_audioCodecContext, m_packet);
        }

        if (ret < 0) {
            showError(ret);
        }
    }

    m_nextPacket = true;
    av_frame_unref(m_frame);
    if (m_packet->stream_index == m_videoIndex) {
        int ret = avcodec_receive_frame(m_videoCodecContext, m_frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            return read();
        } else if (ret < 0) {
            return MFrame();
        }
        m_nextPacket = false;
        outFrame.frame_ptr = m_frame;
        outFrame.type = AVMEDIA_TYPE_VIDEO;
        return outFrame;
    } else {
        int ret = avcodec_receive_frame(m_audioCodecContext, m_frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            return read();
        } else if (ret < 0) {
            showError(ret);
            return MFrame();
        }
        m_nextPacket = false;
        outFrame.frame_ptr = m_frame;
        outFrame.type = AVMEDIA_TYPE_AUDIO;
        return outFrame;
    }
    return MFrame();
}

void VideoDecoder::showError(int ret)
{
    char errorBuf[1024];
    av_strerror(ret, errorBuf, sizeof(errorBuf));
    std::cerr << errorBuf << std::endl;
}

编码器头文件

#ifndef VIDEOENCODER_H
#define VIDEOENCODER_H
#include <string>
#include <vector>
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavutil/channel_layout.h"
#include "libswresample/swresample.h"
#include "libavutil/opt.h"
#include "libavutil/audio_fifo.h"
}
#include "VideoDecoder.h"

struct VideoParams {
    int width;
    int height;
    int fps;
    int bitRate;
    int gopSize;
    int maxBFrames;
};

struct AudioParams {
    int bitRate;
    int sampleRate;
    int sampleFmt;
    int channels;
};

using namespace std;
class VideoEncoder
{
public:
    VideoEncoder();
    void setVideoParam(const VideoParams& param) { m_videoParam = param;}
    void setAudioParam(const AudioParams& param) { m_audioParam = param;}

    bool open(const string& filePath);
    bool write(const MFrame& frame);
    void close();

private:
    bool initSwsContext(const AVFrame* frame);
    bool initSwrContext(const AVFrame* frame);
    void clear();
    void showError(int ret);

private:
    VideoParams m_videoParam;
    AudioParams m_audioParam;

    AVFormatContext* m_formatContext = nullptr;
    AVCodecContext* m_videoCodecContext = nullptr;      // 视频编码器实例
    AVCodecContext* m_audioCodecContext = nullptr;      // 音频编码器实例
    AVStream * m_videoStream = nullptr;
    AVStream* m_audioStream = nullptr;
    AVPacket* m_packet = nullptr;                       // 数据包
    AVFrame* m_audioFrame = nullptr;                    // 数据帧
    AVFrame* m_videoFrame = nullptr;

    SwsContext* m_swsContext = nullptr;                 // 视频格式转换实例
    SwrContext* m_swrContext = nullptr;                 // 音频格式转换实例

    int m_audioPts = 0;
    int m_videoPts = 0;

    AVAudioFifo* m_fifo = nullptr;                      // 音频缓存区
};


#endif // VIDEOENCODER_H

编码器源文件

#include "VideoEncoder.h"
#include <iostream>
VideoEncoder::VideoEncoder() {}

bool VideoEncoder::open(const string &filePath)
{
    m_audioPts = 0;
    m_videoPts = 0;
    int ret = avformat_alloc_output_context2(&m_formatContext, nullptr, nullptr, filePath.c_str());
    if (ret < 0) {
        clear();
        return false;
    }

    // 打开输出文件
    ret = avio_open(&m_formatContext->pb, filePath.c_str(), AVIO_FLAG_WRITE);
    if (ret < 0) {
        clear();
        return false;
    }

    // 查找视频编码器
    const AVCodec* videoCodec = avcodec_find_encoder(m_formatContext->oformat->video_codec);
    if(videoCodec) {
        m_videoCodecContext = avcodec_alloc_context3(videoCodec);
        if(!m_videoCodecContext) {
            clear();
            return false;
        }

        // 设置编码器上下文参数
        m_videoCodecContext->width = m_videoParam.width;
        m_videoCodecContext->height = m_videoParam.height;
        m_videoCodecContext->time_base = {1, m_videoParam.fps};
        m_videoCodecContext->framerate = {m_videoParam.fps, 1};
        m_videoCodecContext->bit_rate = m_videoParam.bitRate;
        m_videoCodecContext->gop_size = m_videoParam.gopSize;
        m_videoCodecContext->max_b_frames = 0;
        m_videoCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;

        ret = avcodec_open2(m_videoCodecContext, nullptr, nullptr);
        if(ret < 0) {
            clear();
            return false;
        }

        m_videoStream = avformat_new_stream(m_formatContext, nullptr);
        // 写入 packet 时有进行时间基转换，因此这里不需要设置，笔者设置了反而导致有的视频时长不对
        //m_videoStream->time_base = m_videoCodecContext->time_base;
        m_videoStream->codecpar->codec_tag = 0;
        if(!m_videoStream) {
            clear();
            return false;
        }
        ret = avcodec_parameters_from_context(m_videoStream->codecpar, m_videoCodecContext);
        if(ret < 0) {
            clear();
            return false;
        }
    }

    // 这里将音频编码器设置成和原视频一样，不同的音频编码器对应的样本数可能不同，因此需要额外处理
    const AVCodec* audioCodec = avcodec_find_encoder(m_formatContext->oformat->audio_codec);
    if(audioCodec) {
        m_audioCodecContext = avcodec_alloc_context3(audioCodec);
        m_audioCodecContext->bit_rate = m_audioParam.bitRate;
        m_audioCodecContext->sample_rate = m_audioParam.sampleRate;
        m_audioCodecContext->sample_fmt = (AVSampleFormat)m_audioParam.sampleFmt;
        m_audioCodecContext->time_base = {1, m_audioParam.sampleRate};
        m_audioCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;

        AVChannelLayout layout = AV_CHANNEL_LAYOUT_STEREO;
        ret = av_channel_layout_copy(&m_audioCodecContext->ch_layout, &layout);
        if (ret < 0) {
            clear();
            return false;
        }
        ret = avcodec_open2(m_audioCodecContext, nullptr, nullptr);
        if(ret < 0) {
            clear();
            return false;
        }

        m_audioStream = avformat_new_stream(m_formatContext, nullptr);
       // m_audioStream->time_base = m_audioCodecContext->time_base;
        m_audioStream->codecpar->codec_tag = 0;
        if(!m_audioStream) {
            clear();
            return false;
        }
        ret = avcodec_parameters_from_context(m_audioStream->codecpar, m_audioCodecContext);
        if(ret < 0) {
            clear();
            return false;
        }

        m_fifo = av_audio_fifo_alloc(m_audioCodecContext->sample_fmt, m_audioCodecContext->ch_layout.nb_channels, 2048);
    }

    ret = avformat_write_header(m_formatContext, nullptr);
    if(ret < 0) {
        clear();
        return false;
    }

    m_packet = av_packet_alloc();
    m_audioFrame = av_frame_alloc();
    m_videoFrame = av_frame_alloc();
    return true;
}

bool VideoEncoder::write(const MFrame& frame)
{
    if (frame.type == AVMEDIA_TYPE_VIDEO) {
        if (m_swsContext == nullptr) {
            initSwsContext(frame.frame_ptr);
        }
        av_frame_unref(m_videoFrame);
        m_videoFrame->format = m_videoCodecContext->pix_fmt;
        m_videoFrame->width = m_videoCodecContext->width;
        m_videoFrame->height = m_videoCodecContext->height;
        m_videoFrame->pts = m_videoPts++;
        int ret = av_frame_get_buffer(m_videoFrame, 0);
        if (ret < 0) {
            showError(ret);
            return false;
        }
        ret = sws_scale(
            m_swsContext,
            frame.frame_ptr->data, frame.frame_ptr->linesize,
            0, frame.frame_ptr->height,
            m_videoFrame->data, m_videoFrame->linesize);

        ret = avcodec_send_frame(m_videoCodecContext, m_videoFrame);
        if (ret < 0) {
            showError(ret);
            return false;
        }

        while (true) {
            int ret = avcodec_receive_packet(m_videoCodecContext, m_packet);
            if(ret < 0) {
                showError(ret);
                break;
            }
            // 将数据包中的有效时间字段（时间戳/持续时间）从一个时基转换为 输出流的时间
            av_packet_rescale_ts(m_packet, m_videoCodecContext->time_base, m_videoStream->time_base);
            // 不设置流索引的话会导致数据异常
            m_packet->stream_index = m_videoStream->index;
            av_interleaved_write_frame(m_formatContext, m_packet);   // 将数据包写入输出媒体文件
            av_packet_unref(m_packet);
        }

    } else if (frame.type == AVMEDIA_TYPE_AUDIO){
        if (m_swrContext == nullptr) {
            initSwrContext(frame.frame_ptr);
        }
        av_frame_unref(m_audioFrame);
        m_audioFrame->format = m_audioCodecContext->sample_fmt;
        int ret = av_channel_layout_copy(&m_audioFrame->ch_layout, &m_audioCodecContext->ch_layout);
        if (ret < 0) {
            showError(ret);
            return false;
        }

        // 计算输出样本数（使用输入帧的样本数）
        int out_nb_samples = swr_get_out_samples(m_swrContext, frame.frame_ptr->nb_samples);
        if (out_nb_samples < 0) {
            av_frame_free(&m_audioFrame);
            return false;
        }
        m_audioFrame->nb_samples = out_nb_samples;

        // 分配缓冲区
        ret = av_frame_get_buffer(m_audioFrame, 0);
        if (ret < 0) {
            showError(ret);
            av_frame_free(&m_audioFrame);
            return false;
        }

        // 进行音频重采样
        out_nb_samples = swr_convert(m_swrContext,
                          m_audioFrame->data, out_nb_samples,
                          (const uint8_t **)frame.frame_ptr->data, frame.frame_ptr->nb_samples);
        if (out_nb_samples < 0) {
            av_frame_free(&m_audioFrame);
            showError(ret);
            return false;
        }
        m_audioFrame->nb_samples = out_nb_samples;
        m_audioFrame->pts = m_audioPts;
        m_audioPts += m_audioFrame->nb_samples;

        if (m_audioCodecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) {
            ret = avcodec_send_frame(m_audioCodecContext, m_audioFrame);
            if (ret < 0) {
                showError(ret);
                return false;
            }

            while (true) {
                int ret = avcodec_receive_packet(m_audioCodecContext, m_packet);
                if(ret < 0) {
                    showError(ret);
                    break;
                }
                // 将数据包中的有效时间字段（时间戳/持续时间）从一个时基转换为 输出流的时间
                av_packet_rescale_ts(m_packet, m_videoCodecContext->time_base, m_audioStream->time_base);
                m_packet->stream_index = m_audioStream->index;
                av_interleaved_write_frame(m_formatContext, m_packet);   // 将数据包写入输出媒体文件
                av_packet_unref(m_packet);
            }
        } else {
            av_audio_fifo_write(m_fifo, (void**)m_audioFrame->data, m_audioFrame->nb_samples);
            while (av_audio_fifo_size(m_fifo) >= m_audioCodecContext->frame_size) {
                AVFrame* enc_frame = av_frame_alloc();
                enc_frame->nb_samples = m_audioCodecContext->frame_size;
                enc_frame->format = m_audioCodecContext->sample_fmt;
                enc_frame->ch_layout = m_audioCodecContext->ch_layout;
                enc_frame->sample_rate = m_audioCodecContext->sample_rate;

                av_frame_get_buffer(enc_frame, 0);
                av_audio_fifo_read(m_fifo, (void**)enc_frame->data, m_audioCodecContext->frame_size);

                // 送编码器处理
                avcodec_send_frame(m_audioCodecContext, enc_frame);
                av_frame_free(&enc_frame);

                while (true) {
                    int ret = avcodec_receive_packet(m_audioCodecContext, m_packet);
                    if(ret < 0) {
                        showError(ret);
                        break;
                    }
                    av_packet_rescale_ts(m_packet, m_audioCodecContext->time_base, m_audioStream->time_base);
                    m_packet->stream_index = m_audioStream->index;
                    av_interleaved_write_frame(m_formatContext, m_packet);
                    av_packet_unref(m_packet);
                }
            }
        }
    }
    return true;
}

void VideoEncoder::close()
{
    if (av_audio_fifo_size(m_fifo) > 0) {
        AVFrame* last_frame = av_frame_alloc();
        last_frame->nb_samples = m_audioCodecContext->frame_size;
        last_frame->format = m_audioCodecContext->sample_fmt;
        last_frame->ch_layout = m_audioCodecContext->ch_layout;  // 关键修改
        av_frame_get_buffer(last_frame, 0);

        // 读取并填充静音
        int read = av_audio_fifo_read(m_fifo, (void**)last_frame->data, av_audio_fifo_size(m_fifo));
        for (int ch = 0; ch < m_audioCodecContext->ch_layout.nb_channels; ch++) {
            int size = av_get_bytes_per_sample(m_audioCodecContext->sample_fmt);
            memset(last_frame->data[ch] + read * size, 0,
                   (m_audioCodecContext->frame_size - read) * size);
        }

        avcodec_send_frame(m_audioCodecContext, last_frame);
        av_frame_free(&last_frame);
        while (avcodec_receive_packet(m_videoCodecContext, m_packet) == 0) {
            av_packet_rescale_ts(m_packet, m_videoCodecContext->time_base, m_videoStream->time_base);
            av_interleaved_write_frame(m_formatContext, m_packet);
            av_packet_unref(m_packet);
        }
    }
    av_audio_fifo_free(m_fifo);

    if (m_videoCodecContext != nullptr) {
        avcodec_send_frame(m_videoCodecContext, nullptr);
        while (avcodec_receive_packet(m_videoCodecContext, m_packet) == 0) {
            av_packet_rescale_ts(m_packet, m_videoCodecContext->time_base, m_videoStream->time_base);
            av_interleaved_write_frame(m_formatContext, m_packet);
            av_packet_unref(m_packet);
        }
    }

    if (m_audioCodecContext != nullptr) {
        avcodec_send_frame(m_audioCodecContext, nullptr);
        while (avcodec_receive_packet(m_audioCodecContext, m_packet) == 0) {
            av_packet_rescale_ts(m_packet, m_audioCodecContext->time_base, m_audioStream->time_base);
            av_interleaved_write_frame(m_formatContext, m_packet);
            av_packet_unref(m_packet);
        }
    }

    if (m_formatContext != nullptr) {
        int ret = av_write_trailer(m_formatContext);
        if(ret < 0) {
            showError(ret);
            return;
        }
    }
}

bool VideoEncoder::initSwsContext(const AVFrame *frame)
{
    m_swsContext = sws_getContext(
        frame->width, frame->height, (AVPixelFormat)frame->format,
        m_videoCodecContext->width, m_videoCodecContext->height, m_videoCodecContext->pix_fmt,
        SWS_BILINEAR, nullptr, nullptr, nullptr);
    return m_swsContext != nullptr;
}

bool VideoEncoder::initSwrContext(const AVFrame *frame)
{
    int ret = swr_alloc_set_opts2(&m_swrContext,
                                  &m_audioCodecContext->ch_layout,
                                  m_audioCodecContext->sample_fmt,
                                  m_audioCodecContext->sample_rate,
                                  &frame->ch_layout,
                                  (AVSampleFormat)frame->format,
                                  frame->sample_rate,
                                  0, NULL);
    if (ret < 0) {
        showError(ret);
        return false;
    }

    ret = swr_init(m_swrContext);
    if (ret < 0) {
        showError(ret);
        swr_free(&m_swrContext);
        m_swrContext = nullptr;
        return false;
    }
    return true;
}

void VideoEncoder::clear()
{
    if (m_formatContext != nullptr) {
        avformat_close_input(&m_formatContext);
    }

    if (m_videoCodecContext != nullptr) {
        avcodec_free_context(&m_videoCodecContext);
    }

    if (m_audioCodecContext != nullptr) {
        avcodec_free_context(&m_audioCodecContext);
    }

    if (m_swsContext != nullptr) {
        sws_free_context(&m_swsContext);
    }

    if (m_swrContext != nullptr) {
        swr_free(&m_swrContext);
    }

    if (m_packet != nullptr) {
        av_packet_free(&m_packet);
    }

    if (m_videoFrame != nullptr) {
        av_frame_free(&m_videoFrame);
    }

    if (m_audioFrame != nullptr) {
        av_frame_free(&m_audioFrame);
    }
}

void VideoEncoder::showError(int ret)
{
    char errorBuf[1024];
    av_strerror(ret, errorBuf, sizeof(errorBuf));
    std::cerr << errorBuf << std::endl;
}

魔乐社区

魔乐社区（Modelers.cn) 是一个中立、公益的人工智能社区，提供人工智能工具、模型、数据的托管、展示与应用协同服务，为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作，由全产业链共同建设、共同运营、共同享有，推动国产AI生态繁荣发展。

更多推荐

替你试过了，消费级显卡可以跑的开源文生图SOTA模型，顶级渲染、高密度文本绘图

魔乐社区

量化挑战赛冠军专访：4小时啃下W4A8量化，我靠的是这些经验

魔乐社区

小参数・大码力・易部署 | Qwen3.6-27B上线魔乐社区，基于昇腾的部署教程来了

继一周前模型开源发布后，千问再度开源Qwen3.6-27B —— 一个拥有270亿参数的稠密多模态模型，也是社区呼声最高的模型规格。Qwen3.6-27B 依然支持多模态思考与非思考模式，在智能体编程方面达到了旗舰级表现，全面超越前代开源旗舰 Qwen3.5-397B-A17B（总参数397B / 激活参数17B的MoE模型）。作为稠密架构，它无需MoE路由即可部署，是开发者在实用、可广泛部署规模