Inhaltsverzeichnis

CLI

Stream

Mit VLC kann man auch direkt streamen.

// save to file
ffmpeg.exe -i http://example.com/playlist.m3u8 -vcodec libx264 -acodec copy -f mp4 record.mp4 

// restream to hls
ffmpeg.exe -re -i manifest.m3u8 -f hls -hls_time 1 -hls_list_size 5 -hls_flags delete_segments live.m3u8
ffmpeg -i manifest.m3u8 -f ssegment -strftime 1 -segment_list live.m3u8 -segment_time 10 live_%%Y%%m%%d%%H%%M%%S.ts

// restream to dash
ffmpeg -i manifest.m3u8 -f dash live.mpd

// restream to rtmp
ffmpeg.exe -re -i test.mp4 -vcodec copy -acodec copy -f flv rtmp://localhost:1935/live/test
ffmpeg -re -i file.mp4 -vcodec libx264 -f flv rtmp://live.twitch.tv/app/<STREAM KEY>

//restream to hls
ffmpeg -re -i https://cnn-cnninternational-1-de.samsung.wurl.com/manifest/playlist.m3u8 -hls_time 1 -hls_list_size 3 -hls_flags delete_segments -s 1366x768 -strict -2 -ab 128k -ar 44100 c:\xampp\htdocs\playlist.m3u8

// find recording devices
ffmpeg -list_devices true -f dshow -i dummy
// list recording device options
ffmpeg -f dshow -list_options true -i video="Integrated Camera"
// start recording
ffmpeg -f dshow -s 320x240 -r 30 -vcodec mjpeg -i video="Integrated Camera" output.mp4

// stream desktop
ffmpeg -f gdigrab -r 30 -i desktop -c:v libx264 -g 250 -c:a libfdk_aac -ac 2 -hls_time 1 -hls_list_size 4 -hls_wrap 8 -s 1366x768 -strict -2 -ab 128k -ar 44100 D:/xampp/htdocs/playlist.m3u8

// stream image
ffmpeg.exe -loop 1 -i test.png -vcodec libx264 -acodec copy -vf "drawtext=fontsize=340:fontcolor=white:font=SansSerif:textfile='xxx.txt':reload=1:x=(w-text_w)/2:y=(h-text_h)/2" -f flv rtmp://

Convert

https://bytescout.com/blog/2016/12/ffmpeg-command-lines-convert-various-video-formats.html

ffmpeg -i file.mp4 file.mp3

// mp4 to flv
ffmpeg -i v.mp4 -c:v libx264 -crf 19 v.flv

// avi to gif
FFmpeg –i – v.avi v.gif

// mp4 to ts
ffmpeg -i test.mp4 -bsf:v h264_mp4toannexb -codec copy output.ts

// mix png and mp3 to mp4
ffmpeg -loop 1 -framerate 2 -i test.png -i test.mp3 -c:v libx264 -preset medium -tune stillimage -crf 18 -c:a copy -shortest -pix_fmt yuv420p test.mp4

ffmpeg -i video.mp4 -i overlay.png -filter_complex "[0:v][1:v] overlay=0:0" -c:a copy output.mp4

// extract frames
ffmpeg -i test.mp4 frame_%05d.bmp

// extract all frames from between 1 and 5 seconds, and also between 11 and 15 seconds:
ffmpeg -i in.mp4 -vf select='between(t,1,5)+between(t,11,15)' -vsync 0 out%d.png

Modify

// extract metadata
ffmpeg -i in.mp3 -f ffmetadata metadata.txt

// add metadata
ffmpeg -i in.mp3 -acodec copy -metadata title="t" -metadata artist="a" -metadata album="a" out.mp3

// remove audio
ffmpeg in.mp4 -an out.mp4

// remove video
ffmpeg -i in.mp4 -vn out.mp3

// increase volume to 150%
ffmpeg -i in.mp4 -filter:a "volume=1.5" out.mp4

// decrease volume by 50%
ffmpeg -i in.mp4 -filter:a "volume=0.5" out.mp4

//deinterlace
ffmpeg -i in.mp4 -vf yadif out.mp4

//replace the first 90 seconds of audio with silence
ffmpeg -i in.mp4 -vcodec copy -af "volume=enable='lte(t,90)':volume=0" out.mp4

//replace all audio between 1'20" and 1'30" with silence:
ffmpeg -i in.mp4 -vcodec copy -af "volume=enable='between(t,80,90)':volume=0" out.mp4

// rotate 2x 90' clockwise
// 0 = 90CounterCLockwise and Vertical Flip (default)
// 1 = 90Clockwise
// 2 = 90CounterClockwise
// 3 = 90Clockwise and Vertical Flip
ffmpeg -i in.mp4 -vf "transpose=1, transpose=1" out.mp4

// flip vertical/horizontal
ffmpeg -i in.mp4 -vf "hflip,vflip" out.mp4

//scale
ffmpeg -i in.mp4 -vf scale=1024:789 out.mp4
ffmpeg -i in.mp4 -s 1280x720 -c:a copy out.mp4

// overlay, 1st input on layer 0, 2nd input on layer 1
ffmpeg -i in.mp4 -i overlay.png -filter_complex "[0:v][1:v] overlay=100:100" out.mp4
ffmpeg -loop 1 -i in.png -i in.mp3 -c:v libx264 -c:a aac -b:a 192k -shortest out.mp4
ffmpeg -i in.mp4 -i overlay.png -filter_complex "overlay=x=2160-800:y=3840-400" out.mp4

// cut out a clip, start at 0 seconds and record 3 seconds long
ffmpeg -i in.mp4 -ss 0 -c copy -t 3 out.mp4
ffmpeg -i in.mp4 -ss 00:00:00 -c copy -t 00:00:03 out.mp4

// cut out a clip, start at 00:00:00 seconds and record until 00:01:30
ffmpeg -i in.mp4 -ss 00:00:00 -to 00:01:30 -c:v copy -c:a copy out.mp4

// make transparent background
ffmpeg -i in.png -vf colorkey=white:0.3:0.5 out.png

//change color (lt, lte, eq, gte, gt)
ffmpeg -i logo.png -vf "yadif,format=rgb24,lutrgb=r='if(gt(val,128),255,val)':g='if(gt(val,128),255,val)':b='if(gt(val,128),255,val)'" out.png

// 0:0:0:0 == rgba
ffmpeg -i logo.png -filter_complex "colorchannelmixer=0:0:1:0:0:1:0:0:1:0:0:0" out.png

//Delay video by 3.84 seconds:
ffmpeg -i in.mp4 -itsoffset 3.84 -i in.mp4 -map 1:v -map 0:a -vcodec copy -acodec copy out.mp4

//Delay audio by 3.84 seconds:
ffmpeg -i in.mp4 -itsoffset 3.84 -i in.mp4 -map 0:v -map 1:a -vcodec copy -acodec copy out.mp4

// demuxing
// file.txt example
file 'in1.mp4'
file 'in2.mp4'
file 'in3.mp4'
file 'in4.mp4'
// then run
ffmpeg -f concat -i file.txt -c copy out.mp4

//copy the video from in0.mp4 and audio from in1.mp4:
ffmpeg -i in0.mp4 -i in1.mp4 -c copy -map 0:0 -map 1:1 -shortest out.mp4

Build

Nur MinGW (gcc/g++), kein MSYS2. Du brauchst dafür vorkompilierte FFmpeg-“dev”-Dateien (Headers + *.dll.a Import-Libs) für Win32, plus die shared DLLs zum Ausführen. FFmpeg selbst liefert nur Source; Windows-Binaries kommen von Drittanbietern.

Von diesem Win32-Build-Verzeichnis:

Beide in einen Ordner entpacken. Die DLLs etc. aus dem „shared“ Archiv in einen Ordner /bin legen.

Lege an: C:\ffmpeg\

Entpacke beide Archive da hinein (Ordnerstruktur soll am Ende ungefähr so sein):

Demo Programm

Probe/Demux-Reader

#include <iostream>
#include <string>
 
extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
}
 
static std::string fferr(int e) {
    char buf[AV_ERROR_MAX_STRING_SIZE] = {0};
    av_strerror(e, buf, sizeof(buf));
    return buf;
}
 
int main(int argc, char** argv) {
    if (argc < 2) {
        std::cerr << "Usage: probe.exe <mediafile>\n";
        return 1;
    }
 
    AVFormatContext* fmt = nullptr;
 
    int r = avformat_open_input(&fmt, argv[1], nullptr, nullptr);
    if (r < 0) {
        std::cerr << "avformat_open_input failed: " << fferr(r) << "\n";
        return 1;
    }
 
    r = avformat_find_stream_info(fmt, nullptr);
    if (r < 0) {
        std::cerr << "avformat_find_stream_info failed: " << fferr(r) << "\n";
        avformat_close_input(&fmt);
        return 1;
    }
 
    std::cout << "Format: " << (fmt->iformat ? fmt->iformat->name : "unknown") << "\n";
    std::cout << "Streams: " << fmt->nb_streams << "\n";
    std::cout << "Duration (us): " << fmt->duration << "\n";
 
    av_dump_format(fmt, 0, argv[1], 0);
 
    avformat_close_input(&fmt);
    return 0;
}

Kompilieren:

g++ -std=c++11 -O2 main.cpp -o probe.exe -IC:\ffmpeg\include -LC:\ffmpeg\lib -lavformat -lavcodec -lavutil

Test (Die benötigten DLLs neben die EXE legen)

probe.exe test.mp4

Remux

#include <iostream>
#include <vector>
#include <string>
 
extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
}
 
static std::string fferr(int e) {
    char buf[AV_ERROR_MAX_STRING_SIZE] = {0};
    av_strerror(e, buf, sizeof(buf));
    return buf;
}
 
int main(int argc, char** argv) {
    if (argc < 3) {
        std::cerr << "Usage: remux.exe <infile> <outfile>\n";
        return 1;
    }
 
    const char* in_filename  = argv[1];
    const char* out_filename = argv[2];
 
    AVFormatContext* ifmt = nullptr;
    AVFormatContext* ofmt = nullptr;
 
    int ret = avformat_open_input(&ifmt, in_filename, nullptr, nullptr);
    if (ret < 0) { std::cerr << "open input failed: " << fferr(ret) << "\n"; return 1; }
 
    ret = avformat_find_stream_info(ifmt, nullptr);
    if (ret < 0) { std::cerr << "find stream info failed: " << fferr(ret) << "\n"; avformat_close_input(&ifmt); return 1; }
 
    ret = avformat_alloc_output_context2(&ofmt, nullptr, nullptr, out_filename);
    if (ret < 0 || !ofmt) { std::cerr << "alloc output failed: " << fferr(ret) << "\n"; avformat_close_input(&ifmt); return 1; }
 
    std::vector<int> stream_mapping(ifmt->nb_streams, -1);
 
    for (unsigned i = 0; i < ifmt->nb_streams; i++) {
        AVStream* in_stream = ifmt->streams[i];
        AVCodecParameters* in_par = in_stream->codecpar;
 
        // nur A/V/Subs remuxen
        if (in_par->codec_type != AVMEDIA_TYPE_AUDIO &&
            in_par->codec_type != AVMEDIA_TYPE_VIDEO &&
            in_par->codec_type != AVMEDIA_TYPE_SUBTITLE) {
            continue;
        }
 
        AVStream* out_stream = avformat_new_stream(ofmt, nullptr);
        if (!out_stream) { std::cerr << "new stream failed\n"; ret = AVERROR(ENOMEM); break; }
 
        ret = avcodec_parameters_copy(out_stream->codecpar, in_par);
        if (ret < 0) { std::cerr << "copy codecpar failed: " << fferr(ret) << "\n"; break; }
 
        out_stream->codecpar->codec_tag = 0;
        out_stream->time_base = in_stream->time_base;
 
        stream_mapping[i] = out_stream->index;
    }
 
    if (ret >= 0 && !(ofmt->oformat->flags & AVFMT_NOFILE)) {
        ret = avio_open(&ofmt->pb, out_filename, AVIO_FLAG_WRITE);
        if (ret < 0) std::cerr << "avio_open failed: " << fferr(ret) << "\n";
    }
 
    if (ret >= 0) {
        ret = avformat_write_header(ofmt, nullptr);
        if (ret < 0) std::cerr << "write_header failed: " << fferr(ret) << "\n";
    }
 
    AVPacket* pkt = av_packet_alloc();
    if (!pkt) ret = AVERROR(ENOMEM);
 
    while (ret >= 0) {
        ret = av_read_frame(ifmt, pkt);
        if (ret < 0) break;
 
        int in_si = pkt->stream_index;
        if (in_si < 0 || in_si >= (int)stream_mapping.size() || stream_mapping[in_si] < 0) {
            av_packet_unref(pkt);
            continue;
        }
 
        AVStream* in_stream  = ifmt->streams[in_si];
        AVStream* out_stream = ofmt->streams[stream_mapping[in_si]];
 
        pkt->stream_index = stream_mapping[in_si];
        av_packet_rescale_ts(pkt, in_stream->time_base, out_stream->time_base);
        pkt->pos = -1;
 
        ret = av_interleaved_write_frame(ofmt, pkt);
        av_packet_unref(pkt);
 
        if (ret < 0) std::cerr << "write_frame failed: " << fferr(ret) << "\n";
    }
 
    // EOF ist ok
    if (ret == AVERROR_EOF) ret = 0;
 
    av_write_trailer(ofmt);
 
    av_packet_free(&pkt);
    avformat_close_input(&ifmt);
    if (ofmt && !(ofmt->oformat->flags & AVFMT_NOFILE)) avio_closep(&ofmt->pb);
    avformat_free_context(ofmt);
 
    return ret < 0 ? 1 : 0;
}

Transcode

Video to Video

#include <iostream>
#include <vector>
#include <string>
 
extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
#include <libavutil/frame.h>
}
 
static std::string fferr(int e) {
    char buf[AV_ERROR_MAX_STRING_SIZE] = {0};
    av_strerror(e, buf, sizeof(buf));
    return buf;
}
 
static int encode_and_write(AVCodecContext* enc,
                            AVFrame* frame,              // nullptr = flush
                            AVFormatContext* ofmt,
                            AVStream* out_stream,
                            AVPacket* pkt)
{
    int ret = avcodec_send_frame(enc, frame);
    if (ret < 0) return ret;
 
    while (true) {
        ret = avcodec_receive_packet(enc, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) return 0;
        if (ret < 0) return ret;
 
        // Packet timestamps to muxer time_base
        av_packet_rescale_ts(pkt, enc->time_base, out_stream->time_base);
        pkt->stream_index = out_stream->index;
 
        ret = av_interleaved_write_frame(ofmt, pkt);
        av_packet_unref(pkt);
        if (ret < 0) return ret;
    }
}
 
int main(int argc, char** argv) {
    if (argc < 3) {
        std::cerr << "Usage: transcode_min.exe <infile> <outfile.mp4>\n";
        return 1;
    }
 
    const char* in_filename  = argv[1];
    const char* out_filename = argv[2];
 
    AVFormatContext* ifmt = nullptr;
    AVFormatContext* ofmt = nullptr;
 
    AVCodecContext* dec_v = nullptr;
    AVCodecContext* enc_v = nullptr;
    SwsContext* sws = nullptr;
 
    AVFrame* dec_frame = av_frame_alloc();
    AVFrame* enc_frame = av_frame_alloc();
    AVPacket* in_pkt   = av_packet_alloc();
    AVPacket* out_pkt  = av_packet_alloc();
 
    if (!dec_frame || !enc_frame || !in_pkt || !out_pkt) {
        std::cerr << "OOM\n";
        return 1;
    }
 
    int ret = 0;
 
    // --- open input ---
    ret = avformat_open_input(&ifmt, in_filename, nullptr, nullptr);
    if (ret < 0) { std::cerr << "open input: " << fferr(ret) << "\n"; return 1; }
 
    ret = avformat_find_stream_info(ifmt, nullptr);
    if (ret < 0) { std::cerr << "stream info: " << fferr(ret) << "\n"; return 1; }
 
    // find best video stream
    int v_si = av_find_best_stream(ifmt, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
    if (v_si < 0) { std::cerr << "No video stream found.\n"; return 1; }
 
    AVStream* in_vst = ifmt->streams[v_si];
 
    // --- open video decoder ---
    const AVCodec* vdec = avcodec_find_decoder(in_vst->codecpar->codec_id);
    if (!vdec) { std::cerr << "No decoder.\n"; return 1; }
 
    dec_v = avcodec_alloc_context3(vdec);
    if (!dec_v) { std::cerr << "alloc dec ctx failed\n"; return 1; }
 
    ret = avcodec_parameters_to_context(dec_v, in_vst->codecpar);
    if (ret < 0) { std::cerr << "par->ctx: " << fferr(ret) << "\n"; return 1; }
 
    ret = avcodec_open2(dec_v, vdec, nullptr);
    if (ret < 0) { std::cerr << "open decoder: " << fferr(ret) << "\n"; return 1; }
 
    // --- open output ---
    ret = avformat_alloc_output_context2(&ofmt, nullptr, nullptr, out_filename);
    if (ret < 0 || !ofmt) { std::cerr << "alloc output: " << fferr(ret) << "\n"; return 1; }
 
    std::vector<int> map(ifmt->nb_streams, -1);
 
    // Create output streams for non-video (copy)
    for (unsigned i = 0; i < ifmt->nb_streams; i++) {
        if ((int)i == v_si) continue;
 
        AVStream* in_st = ifmt->streams[i];
        if (in_st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
            in_st->codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
            continue;
        }
 
        AVStream* out_st = avformat_new_stream(ofmt, nullptr);
        if (!out_st) { std::cerr << "new stream failed\n"; return 1; }
 
        ret = avcodec_parameters_copy(out_st->codecpar, in_st->codecpar);
        if (ret < 0) { std::cerr << "par copy: " << fferr(ret) << "\n"; return 1; }
 
        out_st->codecpar->codec_tag = 0;
        out_st->time_base = in_st->time_base;
        map[i] = out_st->index;
    }
 
    // --- create output video stream (H.264 encode) ---
    AVStream* out_vst = avformat_new_stream(ofmt, nullptr);
    if (!out_vst) { std::cerr << "new video stream failed\n"; return 1; }
    map[v_si] = out_vst->index;
 
    // prefer libx264 if available, fallback to any H.264 encoder
    const AVCodec* venc = avcodec_find_encoder_by_name("libx264");
    if (!venc) venc = avcodec_find_encoder(AV_CODEC_ID_H264);
    if (!venc) { std::cerr << "No H.264 encoder found.\n"; return 1; }
 
    enc_v = avcodec_alloc_context3(venc);
    if (!enc_v) { std::cerr << "alloc enc ctx failed\n"; return 1; }
 
    enc_v->width  = dec_v->width;
    enc_v->height = dec_v->height;
    enc_v->sample_aspect_ratio = dec_v->sample_aspect_ratio;
 
    // MP4 wants yuv420p typically
    enc_v->pix_fmt = AV_PIX_FMT_YUV420P;
 
    AVRational fps = av_guess_frame_rate(ifmt, in_vst, nullptr);
    if (fps.num <= 0 || fps.den <= 0) fps = AVRational{25, 1};
    enc_v->time_base = av_inv_q(fps);
    enc_v->framerate = fps;
 
    enc_v->bit_rate = 2'000'000; // 2 Mbps (einfacher Default)
 
    if (ofmt->oformat->flags & AVFMT_GLOBALHEADER)
        enc_v->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 
    // if libx264: set some nice defaults (ignore errors if unsupported)
    av_opt_set(enc_v->priv_data, "preset", "veryfast", 0);
    av_opt_set(enc_v->priv_data, "crf", "23", 0);
 
    ret = avcodec_open2(enc_v, venc, nullptr);
    if (ret < 0) { std::cerr << "open encoder: " << fferr(ret) << "\n"; return 1; }
 
    ret = avcodec_parameters_from_context(out_vst->codecpar, enc_v);
    if (ret < 0) { std::cerr << "ctx->par: " << fferr(ret) << "\n"; return 1; }
    out_vst->time_base = enc_v->time_base;
 
    // --- open output IO + header ---
    if (!(ofmt->oformat->flags & AVFMT_NOFILE)) {
        ret = avio_open(&ofmt->pb, out_filename, AVIO_FLAG_WRITE);
        if (ret < 0) { std::cerr << "avio_open: " << fferr(ret) << "\n"; return 1; }
    }
 
    ret = avformat_write_header(ofmt, nullptr);
    if (ret < 0) { std::cerr << "write_header: " << fferr(ret) << "\n"; return 1; }
 
    // prepare scaling/conversion frame buffer
    enc_frame->format = enc_v->pix_fmt;
    enc_frame->width  = enc_v->width;
    enc_frame->height = enc_v->height;
 
    ret = av_frame_get_buffer(enc_frame, 32);
    if (ret < 0) { std::cerr << "frame_get_buffer: " << fferr(ret) << "\n"; return 1; }
 
    int64_t fallback_pts = 0;
 
    // --- main loop ---
    while ((ret = av_read_frame(ifmt, in_pkt)) >= 0) {
        int si = in_pkt->stream_index;
 
        if (si != v_si) {
            // remux non-video streams (copy)
            if (si >= 0 && si < (int)map.size() && map[si] >= 0) {
                AVStream* in_st  = ifmt->streams[si];
                AVStream* out_st = ofmt->streams[map[si]];
 
                in_pkt->stream_index = map[si];
                av_packet_rescale_ts(in_pkt, in_st->time_base, out_st->time_base);
                in_pkt->pos = -1;
 
                int wret = av_interleaved_write_frame(ofmt, in_pkt);
                if (wret < 0) std::cerr << "write copy pkt: " << fferr(wret) << "\n";
            }
            av_packet_unref(in_pkt);
            continue;
        }
 
        // --- decode video packet ---
        ret = avcodec_send_packet(dec_v, in_pkt);
        av_packet_unref(in_pkt);
        if (ret < 0) { std::cerr << "send_packet: " << fferr(ret) << "\n"; return 1; }
 
        while (true) {
            ret = avcodec_receive_frame(dec_v, dec_frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
            if (ret < 0) { std::cerr << "receive_frame: " << fferr(ret) << "\n"; return 1; }
 
            // pts handling (best-effort)
            int64_t in_pts = (dec_frame->pts != AV_NOPTS_VALUE)
                               ? dec_frame->pts
                               : dec_frame->best_effort_timestamp;
            if (in_pts == AV_NOPTS_VALUE) in_pts = fallback_pts++;
 
            int64_t enc_pts = av_rescale_q(in_pts, in_vst->time_base, enc_v->time_base);
 
            // convert to encoder format if needed
            if (!sws) {
                sws = sws_getContext(dec_v->width, dec_v->height, (AVPixelFormat)dec_frame->format,
                                     enc_v->width, enc_v->height, enc_v->pix_fmt,
                                     SWS_BILINEAR, nullptr, nullptr, nullptr);
                if (!sws) { std::cerr << "sws_getContext failed\n"; return 1; }
            }
 
            ret = av_frame_make_writable(enc_frame);
            if (ret < 0) { std::cerr << "frame not writable: " << fferr(ret) << "\n"; return 1; }
 
            sws_scale(sws,
                      dec_frame->data, dec_frame->linesize,
                      0, dec_v->height,
                      enc_frame->data, enc_frame->linesize);
 
            enc_frame->pts = enc_pts;
 
            // --- encode + mux ---
            ret = encode_and_write(enc_v, enc_frame, ofmt, out_vst, out_pkt);
            av_frame_unref(dec_frame);
            if (ret < 0) { std::cerr << "encode/write: " << fferr(ret) << "\n"; return 1; }
        }
    }
 
    // Drain decoder
    avcodec_send_packet(dec_v, nullptr);
    while (true) {
        ret = avcodec_receive_frame(dec_v, dec_frame);
        if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN)) break;
        if (ret < 0) break;
 
        int64_t in_pts = (dec_frame->pts != AV_NOPTS_VALUE)
                           ? dec_frame->pts
                           : dec_frame->best_effort_timestamp;
        if (in_pts == AV_NOPTS_VALUE) in_pts = fallback_pts++;
 
        int64_t enc_pts = av_rescale_q(in_pts, in_vst->time_base, enc_v->time_base);
 
        av_frame_make_writable(enc_frame);
        sws_scale(sws,
                  dec_frame->data, dec_frame->linesize,
                  0, dec_v->height,
                  enc_frame->data, enc_frame->linesize);
        enc_frame->pts = enc_pts;
 
        ret = encode_and_write(enc_v, enc_frame, ofmt, out_vst, out_pkt);
        av_frame_unref(dec_frame);
        if (ret < 0) break;
    }
 
    // Flush encoder
    ret = encode_and_write(enc_v, nullptr, ofmt, out_vst, out_pkt);
    if (ret < 0) std::cerr << "flush encoder: " << fferr(ret) << "\n";
 
    av_write_trailer(ofmt);
 
    // cleanup
    sws_freeContext(sws);
    avcodec_free_context(&dec_v);
    avcodec_free_context(&enc_v);
 
    av_frame_free(&dec_frame);
    av_frame_free(&enc_frame);
    av_packet_free(&in_pkt);
    av_packet_free(&out_pkt);
 
    avformat_close_input(&ifmt);
    if (ofmt && !(ofmt->oformat->flags & AVFMT_NOFILE)) avio_closep(&ofmt->pb);
    avformat_free_context(ofmt);
 
    return 0;
}
g++ -std=c++17 -O2 main.cpp -o main.exe -IC:\chroot\ffmpeg\include -LC:\chroot\ffmpeg\lib -lavformat -lavcodec -lavutil -lswscale -lws2_32 -lsecur32 

# optional: -lbcrypt
main.exe test.mp4 test.mkv

Video to Audio

#include <iostream>
#include <string>
#include <algorithm>
 
extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
#include <libavutil/audio_fifo.h>
#include <libswresample/swresample.h>
}
 
static std::string fferr(int e) {
    char buf[AV_ERROR_MAX_STRING_SIZE] = {0};
    av_strerror(e, buf, sizeof(buf));
    return buf;
}
 
static int encode_and_write_audio(AVCodecContext* enc, AVFrame* frame, AVFormatContext* ofmt,
                                  AVStream* out_st, AVPacket* pkt)
{
    int ret = avcodec_send_frame(enc, frame);
    if (ret < 0) return ret;
 
    while (true) {
        ret = avcodec_receive_packet(enc, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) return 0;
        if (ret < 0) return ret;
 
        av_packet_rescale_ts(pkt, enc->time_base, out_st->time_base);
        pkt->stream_index = out_st->index;
 
        ret = av_interleaved_write_frame(ofmt, pkt);
        av_packet_unref(pkt);
        if (ret < 0) return ret;
    }
}
 
int main(int argc, char** argv) {
    if (argc < 3) {
        std::cerr << "Usage: mp4_to_mp3.exe <infile> <outfile.mp3>\n";
        return 1;
    }
 
    const char* in_filename  = argv[1];
    const char* out_filename = argv[2];
 
    AVFormatContext* ifmt = nullptr;
    AVFormatContext* ofmt = nullptr;
 
    AVCodecContext* dec = nullptr;
    AVCodecContext* enc = nullptr;
    SwrContext* swr = nullptr;
    AVAudioFifo* fifo = nullptr;
 
    AVPacket* ipkt = av_packet_alloc();
    AVPacket* opkt = av_packet_alloc();
    AVFrame*  frame = av_frame_alloc();
    AVFrame*  out_frame = av_frame_alloc();
 
    if (!ipkt || !opkt || !frame || !out_frame) {
        std::cerr << "OOM\n";
        return 1;
    }
 
    int ret = avformat_open_input(&ifmt, in_filename, nullptr, nullptr);
    if (ret < 0) { std::cerr << "open input: " << fferr(ret) << "\n"; return 1; }
 
    ret = avformat_find_stream_info(ifmt, nullptr);
    if (ret < 0) { std::cerr << "stream info: " << fferr(ret) << "\n"; return 1; }
 
    int a_si = av_find_best_stream(ifmt, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
    if (a_si < 0) { std::cerr << "No audio stream.\n"; return 1; }
 
    AVStream* in_st = ifmt->streams[a_si];
 
    // --- decoder ---
    const AVCodec* adec = avcodec_find_decoder(in_st->codecpar->codec_id);
    if (!adec) { std::cerr << "No audio decoder.\n"; return 1; }
 
    dec = avcodec_alloc_context3(adec);
    if (!dec) { std::cerr << "alloc dec failed\n"; return 1; }
 
    ret = avcodec_parameters_to_context(dec, in_st->codecpar);
    if (ret < 0) { std::cerr << "par->ctx: " << fferr(ret) << "\n"; return 1; }
 
    ret = avcodec_open2(dec, adec, nullptr);
    if (ret < 0) { std::cerr << "open dec: " << fferr(ret) << "\n"; return 1; }
 
    // --- output context (.mp3) ---
    ret = avformat_alloc_output_context2(&ofmt, nullptr, "mp3", out_filename);
    if (ret < 0 || !ofmt) { std::cerr << "alloc output: " << fferr(ret) << "\n"; return 1; }
 
    AVStream* out_st = avformat_new_stream(ofmt, nullptr);
    if (!out_st) { std::cerr << "new stream failed\n"; return 1; }
 
    // --- encoder (prefer libmp3lame) ---
    const AVCodec* aenc = avcodec_find_encoder_by_name("libmp3lame");
    if (!aenc) aenc = avcodec_find_encoder(AV_CODEC_ID_MP3);
    if (!aenc) {
        std::cerr << "No MP3 encoder found. Your FFmpeg is missing libmp3lame encoder.\n";
        return 1;
    }
 
    enc = avcodec_alloc_context3(aenc);
    if (!enc) { std::cerr << "alloc enc failed\n"; return 1; }
 
    enc->bit_rate = 192000;
    enc->sample_rate = dec->sample_rate > 0 ? dec->sample_rate : 44100;
 
/*
    // channel layout (old-style, works on many FFmpeg builds; may be deprecated)
    uint64_t in_ch_layout = dec->channel_layout;
    if (!in_ch_layout) in_ch_layout = av_get_default_channel_layout(dec->channels);
 
    enc->channel_layout = in_ch_layout;
    enc->channels = av_get_channel_layout_nb_channels(enc->channel_layout);
 */
 AVChannelLayout in_layout;
av_channel_layout_default(&in_layout, 2);              // fallback stereo
if (dec->ch_layout.nb_channels > 0) {
    av_channel_layout_copy(&in_layout, &dec->ch_layout);
}
 
// encoder layout = input layout
av_channel_layout_copy(&enc->ch_layout, &in_layout);
 
// wenn du irgendwo "channels" brauchst:
int out_channels = enc->ch_layout.nb_channels;
 
    // pick a supported sample format
    enc->sample_fmt = (aenc->sample_fmts && aenc->sample_fmts[0] != AV_SAMPLE_FMT_NONE)
                        ? aenc->sample_fmts[0]
                        : AV_SAMPLE_FMT_S16P;
 
    enc->time_base = AVRational{1, enc->sample_rate};
 
    if (ofmt->oformat->flags & AVFMT_GLOBALHEADER)
        enc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 
    ret = avcodec_open2(enc, aenc, nullptr);
    if (ret < 0) { std::cerr << "open enc: " << fferr(ret) << "\n"; return 1; }
 
    ret = avcodec_parameters_from_context(out_st->codecpar, enc);
    if (ret < 0) { std::cerr << "ctx->par: " << fferr(ret) << "\n"; return 1; }
    out_st->time_base = enc->time_base;
 
/* 
    // --- swr (convert input audio to encoder format) ---
    swr = swr_alloc_set_opts(nullptr,
                             (int64_t)enc->channel_layout, enc->sample_fmt, enc->sample_rate,
                             (int64_t)in_ch_layout,       dec->sample_fmt, dec->sample_rate,
                             0, nullptr);
    if (!swr) { std::cerr << "swr_alloc_set_opts failed\n"; return 1; }
*/
ret = swr_alloc_set_opts2(&swr,
                          &enc->ch_layout, enc->sample_fmt, enc->sample_rate,
                          &in_layout,      dec->sample_fmt, dec->sample_rate,
                          0, nullptr);
if (ret < 0) { std::cerr << "swr_alloc_set_opts2: " << fferr(ret) << "\n"; return 1; }
 
    ret = swr_init(swr);
    if (ret < 0) { std::cerr << "swr_init: " << fferr(ret) << "\n"; return 1; }
 
    //fifo = av_audio_fifo_alloc(enc->sample_fmt, enc->channels, 1);
	fifo = av_audio_fifo_alloc(enc->sample_fmt, enc->ch_layout.nb_channels, 1);
    if (!fifo) { std::cerr << "audio fifo alloc failed\n"; return 1; }
 
    // --- write header ---
    if (!(ofmt->oformat->flags & AVFMT_NOFILE)) {
        ret = avio_open(&ofmt->pb, out_filename, AVIO_FLAG_WRITE);
        if (ret < 0) { std::cerr << "avio_open: " << fferr(ret) << "\n"; return 1; }
    }
 
    ret = avformat_write_header(ofmt, nullptr);
    if (ret < 0) { std::cerr << "write_header: " << fferr(ret) << "\n"; return 1; }
 
    int frame_size = (enc->frame_size > 0) ? enc->frame_size : 1152; // MP3 typical
    int64_t samples_written = 0;
 
    auto push_converted_to_fifo = [&](AVFrame* inframe) -> int {
        int out_nb = (int)av_rescale_rnd(
            swr_get_delay(swr, dec->sample_rate) + inframe->nb_samples,
            enc->sample_rate, dec->sample_rate, AV_ROUND_UP);
 
        uint8_t** conv = nullptr;
 
		//int ret2 = av_samples_alloc_array_and_samples(&conv, nullptr, enc->channels, out_nb, enc->sample_fmt, 0);
		int ret2 = av_samples_alloc_array_and_samples(&conv, nullptr, enc->ch_layout.nb_channels, out_nb, enc->sample_fmt, 0);
 
        if (ret2 < 0) return ret2;
 
        int conv_samp = swr_convert(swr, conv, out_nb,
                                   (const uint8_t**)inframe->extended_data, inframe->nb_samples);
        if (conv_samp < 0) { av_freep(&conv[0]); av_freep(&conv); return conv_samp; }
 
        ret2 = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + conv_samp);
        if (ret2 < 0) { av_freep(&conv[0]); av_freep(&conv); return ret2; }
 
        av_audio_fifo_write(fifo, (void**)conv, conv_samp);
 
        av_freep(&conv[0]);
        av_freep(&conv);
        return 0;
    };
 
    auto pop_fifo_encode = [&](int nb) -> int {
        av_frame_unref(out_frame);
        out_frame->nb_samples = nb;
        out_frame->format = enc->sample_fmt;
 
		//out_frame->channel_layout = enc->channel_layout;
        av_channel_layout_uninit(&out_frame->ch_layout);
		av_channel_layout_copy(&out_frame->ch_layout, &enc->ch_layout);
 
		out_frame->sample_rate = enc->sample_rate;
 
        int r2 = av_frame_get_buffer(out_frame, 0);
        if (r2 < 0) return r2;
 
        r2 = av_frame_make_writable(out_frame);
        if (r2 < 0) return r2;
 
        int got = av_audio_fifo_read(fifo, (void**)out_frame->data, nb);
        if (got != nb) return AVERROR(EIO);
 
        out_frame->pts = samples_written;
        samples_written += nb;
 
        return encode_and_write_audio(enc, out_frame, ofmt, out_st, opkt);
    };
 
    // --- main loop ---
    while ((ret = av_read_frame(ifmt, ipkt)) >= 0) {
        if (ipkt->stream_index != a_si) {
            av_packet_unref(ipkt);
            continue;
        }
 
        ret = avcodec_send_packet(dec, ipkt);
        av_packet_unref(ipkt);
        if (ret < 0) { std::cerr << "send_packet: " << fferr(ret) << "\n"; return 1; }
 
        while (true) {
            ret = avcodec_receive_frame(dec, frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
            if (ret < 0) { std::cerr << "receive_frame: " << fferr(ret) << "\n"; return 1; }
 
            ret = push_converted_to_fifo(frame);
            av_frame_unref(frame);
            if (ret < 0) { std::cerr << "convert: " << fferr(ret) << "\n"; return 1; }
 
            while (av_audio_fifo_size(fifo) >= frame_size) {
                ret = pop_fifo_encode(frame_size);
                if (ret < 0) { std::cerr << "encode: " << fferr(ret) << "\n"; return 1; }
            }
        }
    }
 
    // flush decoder
    avcodec_send_packet(dec, nullptr);
    while (true) {
        ret = avcodec_receive_frame(dec, frame);
        if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN)) break;
        if (ret < 0) break;
 
        ret = push_converted_to_fifo(frame);
        av_frame_unref(frame);
        if (ret < 0) break;
    }
 
    // drain fifo (last partial)
    while (av_audio_fifo_size(fifo) > 0) {
        int nb = std::min(av_audio_fifo_size(fifo), frame_size);
        ret = pop_fifo_encode(nb);
        if (ret < 0) { std::cerr << "encode tail: " << fferr(ret) << "\n"; break; }
    }
 
    // flush encoder
    ret = encode_and_write_audio(enc, nullptr, ofmt, out_st, opkt);
    if (ret < 0) std::cerr << "flush enc: " << fferr(ret) << "\n";
 
    av_write_trailer(ofmt);
 
    // cleanup
    av_audio_fifo_free(fifo);
    swr_free(&swr);
    avcodec_free_context(&dec);
    avcodec_free_context(&enc);
 
    av_frame_free(&frame);
    av_frame_free(&out_frame);
    av_packet_free(&ipkt);
    av_packet_free(&opkt);
 
    avformat_close_input(&ifmt);
    if (ofmt && !(ofmt->oformat->flags & AVFMT_NOFILE)) avio_closep(&ofmt->pb);
    avformat_free_context(ofmt);
 
    return 0;
}
g++ -std=c++17 -O2 main.cpp -o main.exe -IC:\chroot\ffmpeg\include -LC:\chroot\ffmpeg\lib -lavformat -lavcodec -lavutil -lswresample -lws2_32 -lsecur32 

# optional: -lbcrypt
main.exe test.mp4 test.mp3