feat: GPU-downscale + software H.264 encode pipeline (WIP)
Add SwEncState in avhw.rs: GPU pipeline using scale_vaapi to downscale 4K BGRA -> 2K NV12 on AMD iGPU, then software encode with libopenh264. - import_dma_buf_to_vaapi: av_hwframe_map based DMA-BUF import - SwEncState: GPU filter graph (scale_vaapi) + NV12->YUV420P + libopenh264 - state_portal.rs: integrated SwEncState, auto DRM device detection - vaapi_import_bench.rs: CPU vs GPU pipeline benchmark - sw_encode_bench.rs: software encode benchmark Benchmark results: GPU pipeline ~91 FPS theoretical (10.95ms/frame) vs CPU pipeline ~33 FPS (30.21ms/frame). Known issue: only 1 frame encoded in production recording, diagnostic STATS logging added to debug frame flow.
This commit is contained in:
545
src/bin/sw_encode_bench.rs
Normal file
545
src/bin/sw_encode_bench.rs
Normal file
@@ -0,0 +1,545 @@
|
||||
// sw_encode_bench.rs — Software encoding pipeline benchmark for screen capture
|
||||
//
|
||||
// Benchmarks: Portal capture -> mmap DMA-BUF -> sws_scale BGR0->YUV420P -> libx264 encode
|
||||
//
|
||||
// Usage: cargo run --bin sw_encode_bench -- --output /tmp/bench_test.mp4
|
||||
|
||||
use std::ffi::CString;
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::path::Path;
|
||||
use std::ptr;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use clap::Parser;
|
||||
|
||||
use ffmpeg_next as ff;
|
||||
use ffmpeg_next::ffi;
|
||||
use ffmpeg_next::packet::Mut;
|
||||
|
||||
use wl_webrtc::args::Args;
|
||||
use wl_webrtc::cap_portal::{CapPortal, PwCtrlEvent};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(
|
||||
name = "sw_encode_bench",
|
||||
about = "Software encoding pipeline benchmark"
|
||||
)]
|
||||
struct BenchArgs {
|
||||
#[arg(short, long)]
|
||||
output: String,
|
||||
|
||||
#[arg(long, default_value_t = 120)]
|
||||
frames: u32,
|
||||
|
||||
#[arg(long, default_value_t = 2560)]
|
||||
enc_width: u32,
|
||||
|
||||
#[arg(long, default_value_t = 1440)]
|
||||
enc_height: u32,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct FrameStats {
|
||||
mmap_us: Vec<u64>,
|
||||
scale_us: Vec<u64>,
|
||||
encode_us: Vec<u64>,
|
||||
total_us: Vec<u64>,
|
||||
mmap_failures: u32,
|
||||
}
|
||||
|
||||
impl FrameStats {
|
||||
fn avg_ms(data: &[u64]) -> f64 {
|
||||
if data.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
data.iter().sum::<u64>() as f64 / data.len() as f64 / 1000.0
|
||||
}
|
||||
}
|
||||
|
||||
fn pix_fmt(p: ff::format::Pixel) -> ffi::AVPixelFormat {
|
||||
Into::<ffi::AVPixelFormat>::into(p)
|
||||
}
|
||||
|
||||
fn receive_first_frame(cap: &CapPortal) -> Result<wl_webrtc::cap_portal::PwDmaBufFrame> {
|
||||
loop {
|
||||
if let Ok(ctrl) = cap.event_receiver().try_recv() {
|
||||
match ctrl {
|
||||
PwCtrlEvent::StreamEnded => bail!("PipeWire stream ended before first frame"),
|
||||
PwCtrlEvent::Error(e) => bail!("PipeWire error: {e}"),
|
||||
}
|
||||
}
|
||||
match cap
|
||||
.frame_receiver()
|
||||
.recv_timeout(std::time::Duration::from_secs(10))
|
||||
{
|
||||
Ok(frame) => return Ok(frame),
|
||||
Err(crossbeam_channel::RecvTimeoutError::Timeout) => {
|
||||
bail!("Timeout waiting for first frame (10s)");
|
||||
}
|
||||
Err(crossbeam_channel::RecvTimeoutError::Disconnected) => {
|
||||
bail!("PipeWire frame channel disconnected");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let bench_args = BenchArgs::parse();
|
||||
|
||||
println!("=== Software Encode Benchmark ===");
|
||||
println!("Output: {}", bench_args.output);
|
||||
println!("Target frames: {}", bench_args.frames);
|
||||
println!(
|
||||
"Encode resolution: {}x{}",
|
||||
bench_args.enc_width, bench_args.enc_height
|
||||
);
|
||||
println!();
|
||||
|
||||
ff::init()?;
|
||||
|
||||
println!("[1/4] Requesting screen capture via XDG Portal...");
|
||||
println!(" (Select a screen to share in the portal dialog)");
|
||||
|
||||
let portal_args = Args {
|
||||
output: bench_args.output.clone(),
|
||||
output_name: None,
|
||||
fps: 60,
|
||||
codec: "h264".to_string(),
|
||||
hw_accel: "vaapi".to_string(),
|
||||
drm_device: None,
|
||||
bitrate: None,
|
||||
gop_size: None,
|
||||
verbose: false,
|
||||
backend: Some("portal".to_string()),
|
||||
port: 0,
|
||||
};
|
||||
|
||||
let cap = CapPortal::new(&portal_args)?;
|
||||
println!("[1/4] Portal connected, PipeWire stream active\n");
|
||||
|
||||
println!("[2/4] Waiting for first frame from PipeWire...");
|
||||
let first_frame = receive_first_frame(&cap)?;
|
||||
|
||||
let src_width = first_frame.width;
|
||||
let src_height = first_frame.height;
|
||||
let src_stride = first_frame.stride;
|
||||
let enc_width = bench_args.enc_width;
|
||||
let enc_height = bench_args.enc_height;
|
||||
|
||||
println!(
|
||||
"[2/4] First frame: {}x{}, stride={}, format=0x{:08X}",
|
||||
src_width, src_height, src_stride, first_frame.format
|
||||
);
|
||||
println!(
|
||||
" Capture: {}x{} Encode: {}x{}\n",
|
||||
src_width, src_height, enc_width, enc_height
|
||||
);
|
||||
|
||||
println!("[3/4] Testing mmap on DMA-BUF...");
|
||||
let mmap_size = (src_stride as usize) * (src_height as usize);
|
||||
let mmap_ptr = unsafe {
|
||||
libc::mmap(
|
||||
ptr::null_mut(),
|
||||
mmap_size,
|
||||
libc::PROT_READ,
|
||||
libc::MAP_SHARED,
|
||||
first_frame.fd.as_raw_fd(),
|
||||
first_frame.offset as i64,
|
||||
)
|
||||
};
|
||||
|
||||
if mmap_ptr == libc::MAP_FAILED {
|
||||
let errno = std::io::Error::last_os_error();
|
||||
bail!(
|
||||
"mmap on DMA-BUF fd FAILED — AMD driver may not support \
|
||||
CPU read of screen capture DMA-BUF buffers.\n\
|
||||
Error: {} (errno={})\n\
|
||||
\n\
|
||||
Workarounds:\n\
|
||||
1. Use VAAPI hardware import (av_hwframe_map) instead of mmap\n\
|
||||
2. Use wlroots compositor with wlr-screencopy (SHM-based)\n\
|
||||
3. Use a virtual display or software renderer",
|
||||
errno,
|
||||
errno.raw_os_error().unwrap_or(-1)
|
||||
);
|
||||
}
|
||||
|
||||
println!(
|
||||
"[3/4] mmap SUCCESS — CPU can read DMA-BUF ({:.1} MB)\n",
|
||||
mmap_size as f64 / 1024.0 / 1024.0
|
||||
);
|
||||
unsafe {
|
||||
libc::munmap(mmap_ptr, mmap_size);
|
||||
}
|
||||
drop(first_frame);
|
||||
|
||||
// Set up libx264 encoder via FFI (same pattern as avhw.rs)
|
||||
println!("[4/4] Setting up libx264 encoder...");
|
||||
let output_path = Path::new(&bench_args.output);
|
||||
let output_cstr = CString::new(output_path.to_str().unwrap())?;
|
||||
|
||||
// Try libx264 first (best quality/speed), fall back to openh264
|
||||
let codec = ff::encoder::find_by_name("libx264")
|
||||
.or_else(|| ff::encoder::find_by_name("libopenh264"))
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!("No H.264 software encoder found (tried libx264, libopenh264)")
|
||||
})?;
|
||||
println!("[4/4] Using encoder: {}\n", codec.name());
|
||||
|
||||
let mut enc = {
|
||||
let ctx = ff::codec::Context::new_with_codec(codec);
|
||||
ctx.encoder().video()?
|
||||
};
|
||||
|
||||
enc.set_width(enc_width);
|
||||
enc.set_height(enc_height);
|
||||
enc.set_format(ff::format::Pixel::YUV420P);
|
||||
enc.set_time_base(ff::Rational::new(1, 60));
|
||||
enc.set_max_b_frames(0);
|
||||
enc.set_gop(60);
|
||||
|
||||
let codec_name = codec.name();
|
||||
if codec_name == "libx264" {
|
||||
unsafe {
|
||||
let key = CString::new("preset").unwrap();
|
||||
let val = CString::new("veryfast").unwrap();
|
||||
ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0);
|
||||
let key = CString::new("tune").unwrap();
|
||||
let val = CString::new("zerolatency").unwrap();
|
||||
ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
let opened = enc.open()?;
|
||||
let mut enc_video = opened.0;
|
||||
|
||||
// Create output format context via FFI
|
||||
let mut fmt_ctx_ptr: *mut ffi::AVFormatContext = ptr::null_mut();
|
||||
let ret = unsafe {
|
||||
ffi::avformat_alloc_output_context2(
|
||||
&mut fmt_ctx_ptr,
|
||||
ptr::null_mut(),
|
||||
ptr::null(),
|
||||
output_cstr.as_ptr(),
|
||||
)
|
||||
};
|
||||
if ret < 0 || fmt_ctx_ptr.is_null() {
|
||||
bail!("Failed to allocate output format context: error {ret}");
|
||||
}
|
||||
|
||||
let stream_ptr = unsafe { ffi::avformat_new_stream(fmt_ctx_ptr, ptr::null()) };
|
||||
if stream_ptr.is_null() {
|
||||
bail!("Failed to create new stream");
|
||||
}
|
||||
|
||||
let ret =
|
||||
unsafe { ffi::avcodec_parameters_from_context((*stream_ptr).codecpar, enc_video.as_ptr()) };
|
||||
if ret < 0 {
|
||||
bail!("Failed to copy encoder parameters: error {ret}");
|
||||
}
|
||||
|
||||
unsafe {
|
||||
(*stream_ptr).time_base = (*enc_video.as_ptr()).time_base;
|
||||
}
|
||||
|
||||
let ret = unsafe {
|
||||
ffi::avio_open(
|
||||
&mut (*fmt_ctx_ptr).pb,
|
||||
output_cstr.as_ptr(),
|
||||
ffi::AVIO_FLAG_WRITE,
|
||||
)
|
||||
};
|
||||
if ret < 0 {
|
||||
bail!(
|
||||
"Failed to open output file '{}': error {ret}",
|
||||
output_path.display()
|
||||
);
|
||||
}
|
||||
|
||||
let ret = unsafe { ffi::avformat_write_header(fmt_ctx_ptr, ptr::null_mut()) };
|
||||
if ret < 0 {
|
||||
bail!("Failed to write header: error {ret}");
|
||||
}
|
||||
|
||||
let mut octx = unsafe { ff::format::context::Output::wrap(fmt_ctx_ptr) };
|
||||
|
||||
// Create sws_scale context: BGRZ (BGR0) -> YUV420P
|
||||
let bgr0_fmt = pix_fmt(ff::format::Pixel::BGRZ);
|
||||
let yuv420p_fmt = pix_fmt(ff::format::Pixel::YUV420P);
|
||||
|
||||
let sws_ctx = unsafe {
|
||||
ffi::sws_getContext(
|
||||
src_width as i32,
|
||||
src_height as i32,
|
||||
bgr0_fmt,
|
||||
enc_width as i32,
|
||||
enc_height as i32,
|
||||
yuv420p_fmt,
|
||||
2,
|
||||
ptr::null_mut(),
|
||||
ptr::null_mut(),
|
||||
ptr::null_mut(),
|
||||
)
|
||||
};
|
||||
if sws_ctx.is_null() {
|
||||
bail!("Failed to create sws_scale context");
|
||||
}
|
||||
|
||||
// Allocate reusable YUV frame
|
||||
let mut yuv_frame = unsafe {
|
||||
let mut f = ffi::av_frame_alloc();
|
||||
if f.is_null() {
|
||||
bail!("av_frame_alloc failed");
|
||||
}
|
||||
(*f).width = enc_width as i32;
|
||||
(*f).height = enc_height as i32;
|
||||
(*f).format = yuv420p_fmt as i32;
|
||||
let ret = ffi::av_frame_get_buffer(f, 0);
|
||||
if ret < 0 {
|
||||
ffi::av_frame_free(&mut f);
|
||||
bail!("av_frame_get_buffer failed: {ret}");
|
||||
}
|
||||
f
|
||||
};
|
||||
|
||||
println!(
|
||||
"[4/4] Encoder ready: {}, {}x{}\n",
|
||||
codec_name, enc_width, enc_height
|
||||
);
|
||||
|
||||
println!("=== Encoding {} frames ===\n", bench_args.frames);
|
||||
|
||||
let mut stats = FrameStats::default();
|
||||
let total_start = Instant::now();
|
||||
let mut frames_encoded: u32 = 0;
|
||||
let mut pts: i64 = 0;
|
||||
|
||||
while frames_encoded < bench_args.frames {
|
||||
if let Ok(ctrl) = cap.event_receiver().try_recv() {
|
||||
match ctrl {
|
||||
PwCtrlEvent::StreamEnded => {
|
||||
eprintln!("PipeWire stream ended after {} frames", frames_encoded);
|
||||
break;
|
||||
}
|
||||
PwCtrlEvent::Error(e) => {
|
||||
eprintln!("PipeWire error after {} frames: {}", frames_encoded, e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let frame = match cap
|
||||
.frame_receiver()
|
||||
.recv_timeout(std::time::Duration::from_secs(5))
|
||||
{
|
||||
Ok(f) => f,
|
||||
Err(_) => {
|
||||
eprintln!("Frame timeout/disconnect after {} frames", frames_encoded);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
let frame_start = Instant::now();
|
||||
|
||||
let mmap_start = Instant::now();
|
||||
let frame_size = (frame.stride as usize) * (frame.height as usize);
|
||||
let mmap_ptr = unsafe {
|
||||
libc::mmap(
|
||||
ptr::null_mut(),
|
||||
frame_size,
|
||||
libc::PROT_READ,
|
||||
libc::MAP_SHARED,
|
||||
frame.fd.as_raw_fd(),
|
||||
frame.offset as i64,
|
||||
)
|
||||
};
|
||||
|
||||
if mmap_ptr == libc::MAP_FAILED {
|
||||
stats.mmap_failures += 1;
|
||||
eprintln!("mmap failed on frame {}", frames_encoded);
|
||||
drop(frame);
|
||||
continue;
|
||||
}
|
||||
stats.mmap_us.push(mmap_start.elapsed().as_micros() as u64);
|
||||
|
||||
let scale_start = Instant::now();
|
||||
let src_data = unsafe { std::slice::from_raw_parts(mmap_ptr as *const u8, frame_size) };
|
||||
|
||||
unsafe {
|
||||
ffi::av_frame_make_writable(yuv_frame);
|
||||
|
||||
let src_ptr = src_data.as_ptr();
|
||||
let src_linesize = frame.stride as i32;
|
||||
|
||||
ffi::sws_scale(
|
||||
sws_ctx,
|
||||
&src_ptr as *const *const u8,
|
||||
&src_linesize as *const i32,
|
||||
0,
|
||||
frame.height as i32,
|
||||
(*yuv_frame).data.as_ptr() as *mut *mut u8,
|
||||
(*yuv_frame).linesize.as_ptr() as *mut i32,
|
||||
);
|
||||
}
|
||||
stats
|
||||
.scale_us
|
||||
.push(scale_start.elapsed().as_micros() as u64);
|
||||
|
||||
unsafe {
|
||||
libc::munmap(mmap_ptr, frame_size);
|
||||
}
|
||||
drop(frame);
|
||||
|
||||
let encode_start = Instant::now();
|
||||
|
||||
unsafe {
|
||||
(*yuv_frame).pts = pts;
|
||||
pts += 1;
|
||||
|
||||
let ret = ffi::avcodec_send_frame(enc_video.as_mut_ptr(), yuv_frame);
|
||||
if ret < 0 {
|
||||
eprintln!("avcodec_send_frame failed: {ret}");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
drain_encoder(&mut enc_video, &mut octx)?;
|
||||
|
||||
stats
|
||||
.encode_us
|
||||
.push(encode_start.elapsed().as_micros() as u64);
|
||||
stats
|
||||
.total_us
|
||||
.push(frame_start.elapsed().as_micros() as u64);
|
||||
|
||||
frames_encoded += 1;
|
||||
if frames_encoded % 30 == 0 {
|
||||
let fps = frames_encoded as f64 / total_start.elapsed().as_secs_f64();
|
||||
println!(
|
||||
" [{}/{}] {:.1} FPS",
|
||||
frames_encoded, bench_args.frames, fps
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let total_elapsed = total_start.elapsed();
|
||||
|
||||
println!("\nFlushing encoder...");
|
||||
unsafe {
|
||||
ffi::avcodec_send_frame(enc_video.as_mut_ptr(), ptr::null());
|
||||
}
|
||||
drain_encoder(&mut enc_video, &mut octx)?;
|
||||
|
||||
octx.write_trailer()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to write trailer: {e}"))?;
|
||||
|
||||
// Cleanup
|
||||
unsafe {
|
||||
ffi::av_frame_free(&mut yuv_frame as *mut _);
|
||||
ffi::sws_freeContext(sws_ctx);
|
||||
}
|
||||
|
||||
drop(cap);
|
||||
|
||||
// Print results
|
||||
let mmap_count = stats.mmap_us.len() as u32;
|
||||
let mmap_success_rate = if mmap_count + stats.mmap_failures > 0 {
|
||||
mmap_count as f64 / (mmap_count + stats.mmap_failures) as f64 * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let total_fps = frames_encoded as f64 / total_elapsed.as_secs_f64();
|
||||
let avg_total_ms = FrameStats::avg_ms(&stats.total_us);
|
||||
let max_fps = if avg_total_ms > 0.0 {
|
||||
1000.0 / avg_total_ms
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
println!();
|
||||
println!("╔══════════════════════════════════════════════════════════════╗");
|
||||
println!("║ Software Encode Benchmark Results ║");
|
||||
println!("╚══════════════════════════════════════════════════════════════╝");
|
||||
println!();
|
||||
println!("Capture resolution: {}x{}", src_width, src_height);
|
||||
println!("Encode resolution: {}x{}", enc_width, enc_height);
|
||||
println!("Frames encoded: {}", frames_encoded);
|
||||
println!("Total time: {:.2}s", total_elapsed.as_secs_f64());
|
||||
println!();
|
||||
println!("mmap (DMA-BUF -> CPU):");
|
||||
println!(
|
||||
" avg: {:.2} ms/frame",
|
||||
FrameStats::avg_ms(&stats.mmap_us)
|
||||
);
|
||||
println!(
|
||||
" success rate: {:.1}% ({}/{})",
|
||||
mmap_success_rate,
|
||||
mmap_count,
|
||||
mmap_count + stats.mmap_failures
|
||||
);
|
||||
println!();
|
||||
println!("scale (BGR0 -> YUV420P via sws_scale):");
|
||||
println!(
|
||||
" avg: {:.2} ms/frame",
|
||||
FrameStats::avg_ms(&stats.scale_us)
|
||||
);
|
||||
println!();
|
||||
println!("encode ({}):", codec_name);
|
||||
println!(
|
||||
" avg: {:.2} ms/frame",
|
||||
FrameStats::avg_ms(&stats.encode_us)
|
||||
);
|
||||
println!();
|
||||
println!("total pipeline:");
|
||||
println!(" avg: {:.2} ms/frame", avg_total_ms);
|
||||
println!(" achieved FPS: {:.1}", total_fps);
|
||||
println!(" max theoretical: {:.1} FPS", max_fps);
|
||||
println!();
|
||||
|
||||
if mmap_success_rate < 100.0 {
|
||||
println!(
|
||||
"WARNING: Some mmap operations failed ({}/{})",
|
||||
stats.mmap_failures,
|
||||
stats.mmap_failures + mmap_count
|
||||
);
|
||||
}
|
||||
if total_fps < 30.0 {
|
||||
println!(
|
||||
"NOTE: Achieved FPS ({:.1}) is below 30 FPS target.",
|
||||
total_fps
|
||||
);
|
||||
}
|
||||
|
||||
println!("Output written to: {}", bench_args.output);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn drain_encoder(
|
||||
enc_video: &mut ff::encoder::video::Video,
|
||||
octx: &mut ff::format::context::Output,
|
||||
) -> Result<()> {
|
||||
loop {
|
||||
let mut pkt = ff::Packet::empty();
|
||||
let ret = unsafe { ffi::avcodec_receive_packet(enc_video.as_mut_ptr(), pkt.as_mut_ptr()) };
|
||||
if ret < 0 {
|
||||
if ret == ffi::AVERROR(ffi::EAGAIN) || ret == ffi::AVERROR_EOF {
|
||||
break;
|
||||
}
|
||||
eprintln!("avcodec_receive_packet failed: {ret}");
|
||||
break;
|
||||
}
|
||||
|
||||
let enc_tb = enc_video.time_base();
|
||||
let stream_tb = unsafe {
|
||||
let streams = (*octx.as_ptr()).streams;
|
||||
let st = *streams.add(0);
|
||||
ff::Rational::from((*st).time_base)
|
||||
};
|
||||
pkt.rescale_ts(enc_tb, stream_tb);
|
||||
pkt.set_stream(0);
|
||||
pkt.write_interleaved(octx)
|
||||
.map_err(|e| anyhow::anyhow!("write packet failed: {e}"))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
1036
src/bin/vaapi_import_bench.rs
Normal file
1036
src/bin/vaapi_import_bench.rs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user