Add SwEncState in avhw.rs: GPU pipeline using scale_vaapi to downscale 4K BGRA -> 2K NV12 on AMD iGPU, then software encode with libopenh264. - import_dma_buf_to_vaapi: av_hwframe_map based DMA-BUF import - SwEncState: GPU filter graph (scale_vaapi) + NV12->YUV420P + libopenh264 - state_portal.rs: integrated SwEncState, auto DRM device detection - vaapi_import_bench.rs: CPU vs GPU pipeline benchmark - sw_encode_bench.rs: software encode benchmark Benchmark results: GPU pipeline ~91 FPS theoretical (10.95ms/frame) vs CPU pipeline ~33 FPS (30.21ms/frame). Known issue: only 1 frame encoded in production recording, diagnostic STATS logging added to debug frame flow.
546 lines
16 KiB
Rust
546 lines
16 KiB
Rust
// sw_encode_bench.rs — Software encoding pipeline benchmark for screen capture
|
|
//
|
|
// Benchmarks: Portal capture -> mmap DMA-BUF -> sws_scale BGR0->YUV420P -> libx264 encode
|
|
//
|
|
// Usage: cargo run --bin sw_encode_bench -- --output /tmp/bench_test.mp4
|
|
|
|
use std::ffi::CString;
|
|
use std::os::fd::AsRawFd;
|
|
use std::path::Path;
|
|
use std::ptr;
|
|
use std::time::Instant;
|
|
|
|
use anyhow::{bail, Result};
|
|
use clap::Parser;
|
|
|
|
use ffmpeg_next as ff;
|
|
use ffmpeg_next::ffi;
|
|
use ffmpeg_next::packet::Mut;
|
|
|
|
use wl_webrtc::args::Args;
|
|
use wl_webrtc::cap_portal::{CapPortal, PwCtrlEvent};
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(
|
|
name = "sw_encode_bench",
|
|
about = "Software encoding pipeline benchmark"
|
|
)]
|
|
struct BenchArgs {
|
|
#[arg(short, long)]
|
|
output: String,
|
|
|
|
#[arg(long, default_value_t = 120)]
|
|
frames: u32,
|
|
|
|
#[arg(long, default_value_t = 2560)]
|
|
enc_width: u32,
|
|
|
|
#[arg(long, default_value_t = 1440)]
|
|
enc_height: u32,
|
|
}
|
|
|
|
#[derive(Default)]
|
|
struct FrameStats {
|
|
mmap_us: Vec<u64>,
|
|
scale_us: Vec<u64>,
|
|
encode_us: Vec<u64>,
|
|
total_us: Vec<u64>,
|
|
mmap_failures: u32,
|
|
}
|
|
|
|
impl FrameStats {
|
|
fn avg_ms(data: &[u64]) -> f64 {
|
|
if data.is_empty() {
|
|
return 0.0;
|
|
}
|
|
data.iter().sum::<u64>() as f64 / data.len() as f64 / 1000.0
|
|
}
|
|
}
|
|
|
|
fn pix_fmt(p: ff::format::Pixel) -> ffi::AVPixelFormat {
|
|
Into::<ffi::AVPixelFormat>::into(p)
|
|
}
|
|
|
|
fn receive_first_frame(cap: &CapPortal) -> Result<wl_webrtc::cap_portal::PwDmaBufFrame> {
|
|
loop {
|
|
if let Ok(ctrl) = cap.event_receiver().try_recv() {
|
|
match ctrl {
|
|
PwCtrlEvent::StreamEnded => bail!("PipeWire stream ended before first frame"),
|
|
PwCtrlEvent::Error(e) => bail!("PipeWire error: {e}"),
|
|
}
|
|
}
|
|
match cap
|
|
.frame_receiver()
|
|
.recv_timeout(std::time::Duration::from_secs(10))
|
|
{
|
|
Ok(frame) => return Ok(frame),
|
|
Err(crossbeam_channel::RecvTimeoutError::Timeout) => {
|
|
bail!("Timeout waiting for first frame (10s)");
|
|
}
|
|
Err(crossbeam_channel::RecvTimeoutError::Disconnected) => {
|
|
bail!("PipeWire frame channel disconnected");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn main() -> Result<()> {
|
|
let bench_args = BenchArgs::parse();
|
|
|
|
println!("=== Software Encode Benchmark ===");
|
|
println!("Output: {}", bench_args.output);
|
|
println!("Target frames: {}", bench_args.frames);
|
|
println!(
|
|
"Encode resolution: {}x{}",
|
|
bench_args.enc_width, bench_args.enc_height
|
|
);
|
|
println!();
|
|
|
|
ff::init()?;
|
|
|
|
println!("[1/4] Requesting screen capture via XDG Portal...");
|
|
println!(" (Select a screen to share in the portal dialog)");
|
|
|
|
let portal_args = Args {
|
|
output: bench_args.output.clone(),
|
|
output_name: None,
|
|
fps: 60,
|
|
codec: "h264".to_string(),
|
|
hw_accel: "vaapi".to_string(),
|
|
drm_device: None,
|
|
bitrate: None,
|
|
gop_size: None,
|
|
verbose: false,
|
|
backend: Some("portal".to_string()),
|
|
port: 0,
|
|
};
|
|
|
|
let cap = CapPortal::new(&portal_args)?;
|
|
println!("[1/4] Portal connected, PipeWire stream active\n");
|
|
|
|
println!("[2/4] Waiting for first frame from PipeWire...");
|
|
let first_frame = receive_first_frame(&cap)?;
|
|
|
|
let src_width = first_frame.width;
|
|
let src_height = first_frame.height;
|
|
let src_stride = first_frame.stride;
|
|
let enc_width = bench_args.enc_width;
|
|
let enc_height = bench_args.enc_height;
|
|
|
|
println!(
|
|
"[2/4] First frame: {}x{}, stride={}, format=0x{:08X}",
|
|
src_width, src_height, src_stride, first_frame.format
|
|
);
|
|
println!(
|
|
" Capture: {}x{} Encode: {}x{}\n",
|
|
src_width, src_height, enc_width, enc_height
|
|
);
|
|
|
|
println!("[3/4] Testing mmap on DMA-BUF...");
|
|
let mmap_size = (src_stride as usize) * (src_height as usize);
|
|
let mmap_ptr = unsafe {
|
|
libc::mmap(
|
|
ptr::null_mut(),
|
|
mmap_size,
|
|
libc::PROT_READ,
|
|
libc::MAP_SHARED,
|
|
first_frame.fd.as_raw_fd(),
|
|
first_frame.offset as i64,
|
|
)
|
|
};
|
|
|
|
if mmap_ptr == libc::MAP_FAILED {
|
|
let errno = std::io::Error::last_os_error();
|
|
bail!(
|
|
"mmap on DMA-BUF fd FAILED — AMD driver may not support \
|
|
CPU read of screen capture DMA-BUF buffers.\n\
|
|
Error: {} (errno={})\n\
|
|
\n\
|
|
Workarounds:\n\
|
|
1. Use VAAPI hardware import (av_hwframe_map) instead of mmap\n\
|
|
2. Use wlroots compositor with wlr-screencopy (SHM-based)\n\
|
|
3. Use a virtual display or software renderer",
|
|
errno,
|
|
errno.raw_os_error().unwrap_or(-1)
|
|
);
|
|
}
|
|
|
|
println!(
|
|
"[3/4] mmap SUCCESS — CPU can read DMA-BUF ({:.1} MB)\n",
|
|
mmap_size as f64 / 1024.0 / 1024.0
|
|
);
|
|
unsafe {
|
|
libc::munmap(mmap_ptr, mmap_size);
|
|
}
|
|
drop(first_frame);
|
|
|
|
// Set up libx264 encoder via FFI (same pattern as avhw.rs)
|
|
println!("[4/4] Setting up libx264 encoder...");
|
|
let output_path = Path::new(&bench_args.output);
|
|
let output_cstr = CString::new(output_path.to_str().unwrap())?;
|
|
|
|
// Try libx264 first (best quality/speed), fall back to openh264
|
|
let codec = ff::encoder::find_by_name("libx264")
|
|
.or_else(|| ff::encoder::find_by_name("libopenh264"))
|
|
.ok_or_else(|| {
|
|
anyhow::anyhow!("No H.264 software encoder found (tried libx264, libopenh264)")
|
|
})?;
|
|
println!("[4/4] Using encoder: {}\n", codec.name());
|
|
|
|
let mut enc = {
|
|
let ctx = ff::codec::Context::new_with_codec(codec);
|
|
ctx.encoder().video()?
|
|
};
|
|
|
|
enc.set_width(enc_width);
|
|
enc.set_height(enc_height);
|
|
enc.set_format(ff::format::Pixel::YUV420P);
|
|
enc.set_time_base(ff::Rational::new(1, 60));
|
|
enc.set_max_b_frames(0);
|
|
enc.set_gop(60);
|
|
|
|
let codec_name = codec.name();
|
|
if codec_name == "libx264" {
|
|
unsafe {
|
|
let key = CString::new("preset").unwrap();
|
|
let val = CString::new("veryfast").unwrap();
|
|
ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0);
|
|
let key = CString::new("tune").unwrap();
|
|
let val = CString::new("zerolatency").unwrap();
|
|
ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0);
|
|
}
|
|
}
|
|
|
|
let opened = enc.open()?;
|
|
let mut enc_video = opened.0;
|
|
|
|
// Create output format context via FFI
|
|
let mut fmt_ctx_ptr: *mut ffi::AVFormatContext = ptr::null_mut();
|
|
let ret = unsafe {
|
|
ffi::avformat_alloc_output_context2(
|
|
&mut fmt_ctx_ptr,
|
|
ptr::null_mut(),
|
|
ptr::null(),
|
|
output_cstr.as_ptr(),
|
|
)
|
|
};
|
|
if ret < 0 || fmt_ctx_ptr.is_null() {
|
|
bail!("Failed to allocate output format context: error {ret}");
|
|
}
|
|
|
|
let stream_ptr = unsafe { ffi::avformat_new_stream(fmt_ctx_ptr, ptr::null()) };
|
|
if stream_ptr.is_null() {
|
|
bail!("Failed to create new stream");
|
|
}
|
|
|
|
let ret =
|
|
unsafe { ffi::avcodec_parameters_from_context((*stream_ptr).codecpar, enc_video.as_ptr()) };
|
|
if ret < 0 {
|
|
bail!("Failed to copy encoder parameters: error {ret}");
|
|
}
|
|
|
|
unsafe {
|
|
(*stream_ptr).time_base = (*enc_video.as_ptr()).time_base;
|
|
}
|
|
|
|
let ret = unsafe {
|
|
ffi::avio_open(
|
|
&mut (*fmt_ctx_ptr).pb,
|
|
output_cstr.as_ptr(),
|
|
ffi::AVIO_FLAG_WRITE,
|
|
)
|
|
};
|
|
if ret < 0 {
|
|
bail!(
|
|
"Failed to open output file '{}': error {ret}",
|
|
output_path.display()
|
|
);
|
|
}
|
|
|
|
let ret = unsafe { ffi::avformat_write_header(fmt_ctx_ptr, ptr::null_mut()) };
|
|
if ret < 0 {
|
|
bail!("Failed to write header: error {ret}");
|
|
}
|
|
|
|
let mut octx = unsafe { ff::format::context::Output::wrap(fmt_ctx_ptr) };
|
|
|
|
// Create sws_scale context: BGRZ (BGR0) -> YUV420P
|
|
let bgr0_fmt = pix_fmt(ff::format::Pixel::BGRZ);
|
|
let yuv420p_fmt = pix_fmt(ff::format::Pixel::YUV420P);
|
|
|
|
let sws_ctx = unsafe {
|
|
ffi::sws_getContext(
|
|
src_width as i32,
|
|
src_height as i32,
|
|
bgr0_fmt,
|
|
enc_width as i32,
|
|
enc_height as i32,
|
|
yuv420p_fmt,
|
|
2,
|
|
ptr::null_mut(),
|
|
ptr::null_mut(),
|
|
ptr::null_mut(),
|
|
)
|
|
};
|
|
if sws_ctx.is_null() {
|
|
bail!("Failed to create sws_scale context");
|
|
}
|
|
|
|
// Allocate reusable YUV frame
|
|
let mut yuv_frame = unsafe {
|
|
let mut f = ffi::av_frame_alloc();
|
|
if f.is_null() {
|
|
bail!("av_frame_alloc failed");
|
|
}
|
|
(*f).width = enc_width as i32;
|
|
(*f).height = enc_height as i32;
|
|
(*f).format = yuv420p_fmt as i32;
|
|
let ret = ffi::av_frame_get_buffer(f, 0);
|
|
if ret < 0 {
|
|
ffi::av_frame_free(&mut f);
|
|
bail!("av_frame_get_buffer failed: {ret}");
|
|
}
|
|
f
|
|
};
|
|
|
|
println!(
|
|
"[4/4] Encoder ready: {}, {}x{}\n",
|
|
codec_name, enc_width, enc_height
|
|
);
|
|
|
|
println!("=== Encoding {} frames ===\n", bench_args.frames);
|
|
|
|
let mut stats = FrameStats::default();
|
|
let total_start = Instant::now();
|
|
let mut frames_encoded: u32 = 0;
|
|
let mut pts: i64 = 0;
|
|
|
|
while frames_encoded < bench_args.frames {
|
|
if let Ok(ctrl) = cap.event_receiver().try_recv() {
|
|
match ctrl {
|
|
PwCtrlEvent::StreamEnded => {
|
|
eprintln!("PipeWire stream ended after {} frames", frames_encoded);
|
|
break;
|
|
}
|
|
PwCtrlEvent::Error(e) => {
|
|
eprintln!("PipeWire error after {} frames: {}", frames_encoded, e);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
let frame = match cap
|
|
.frame_receiver()
|
|
.recv_timeout(std::time::Duration::from_secs(5))
|
|
{
|
|
Ok(f) => f,
|
|
Err(_) => {
|
|
eprintln!("Frame timeout/disconnect after {} frames", frames_encoded);
|
|
break;
|
|
}
|
|
};
|
|
|
|
let frame_start = Instant::now();
|
|
|
|
let mmap_start = Instant::now();
|
|
let frame_size = (frame.stride as usize) * (frame.height as usize);
|
|
let mmap_ptr = unsafe {
|
|
libc::mmap(
|
|
ptr::null_mut(),
|
|
frame_size,
|
|
libc::PROT_READ,
|
|
libc::MAP_SHARED,
|
|
frame.fd.as_raw_fd(),
|
|
frame.offset as i64,
|
|
)
|
|
};
|
|
|
|
if mmap_ptr == libc::MAP_FAILED {
|
|
stats.mmap_failures += 1;
|
|
eprintln!("mmap failed on frame {}", frames_encoded);
|
|
drop(frame);
|
|
continue;
|
|
}
|
|
stats.mmap_us.push(mmap_start.elapsed().as_micros() as u64);
|
|
|
|
let scale_start = Instant::now();
|
|
let src_data = unsafe { std::slice::from_raw_parts(mmap_ptr as *const u8, frame_size) };
|
|
|
|
unsafe {
|
|
ffi::av_frame_make_writable(yuv_frame);
|
|
|
|
let src_ptr = src_data.as_ptr();
|
|
let src_linesize = frame.stride as i32;
|
|
|
|
ffi::sws_scale(
|
|
sws_ctx,
|
|
&src_ptr as *const *const u8,
|
|
&src_linesize as *const i32,
|
|
0,
|
|
frame.height as i32,
|
|
(*yuv_frame).data.as_ptr() as *mut *mut u8,
|
|
(*yuv_frame).linesize.as_ptr() as *mut i32,
|
|
);
|
|
}
|
|
stats
|
|
.scale_us
|
|
.push(scale_start.elapsed().as_micros() as u64);
|
|
|
|
unsafe {
|
|
libc::munmap(mmap_ptr, frame_size);
|
|
}
|
|
drop(frame);
|
|
|
|
let encode_start = Instant::now();
|
|
|
|
unsafe {
|
|
(*yuv_frame).pts = pts;
|
|
pts += 1;
|
|
|
|
let ret = ffi::avcodec_send_frame(enc_video.as_mut_ptr(), yuv_frame);
|
|
if ret < 0 {
|
|
eprintln!("avcodec_send_frame failed: {ret}");
|
|
continue;
|
|
}
|
|
}
|
|
|
|
drain_encoder(&mut enc_video, &mut octx)?;
|
|
|
|
stats
|
|
.encode_us
|
|
.push(encode_start.elapsed().as_micros() as u64);
|
|
stats
|
|
.total_us
|
|
.push(frame_start.elapsed().as_micros() as u64);
|
|
|
|
frames_encoded += 1;
|
|
if frames_encoded % 30 == 0 {
|
|
let fps = frames_encoded as f64 / total_start.elapsed().as_secs_f64();
|
|
println!(
|
|
" [{}/{}] {:.1} FPS",
|
|
frames_encoded, bench_args.frames, fps
|
|
);
|
|
}
|
|
}
|
|
|
|
let total_elapsed = total_start.elapsed();
|
|
|
|
println!("\nFlushing encoder...");
|
|
unsafe {
|
|
ffi::avcodec_send_frame(enc_video.as_mut_ptr(), ptr::null());
|
|
}
|
|
drain_encoder(&mut enc_video, &mut octx)?;
|
|
|
|
octx.write_trailer()
|
|
.map_err(|e| anyhow::anyhow!("Failed to write trailer: {e}"))?;
|
|
|
|
// Cleanup
|
|
unsafe {
|
|
ffi::av_frame_free(&mut yuv_frame as *mut _);
|
|
ffi::sws_freeContext(sws_ctx);
|
|
}
|
|
|
|
drop(cap);
|
|
|
|
// Print results
|
|
let mmap_count = stats.mmap_us.len() as u32;
|
|
let mmap_success_rate = if mmap_count + stats.mmap_failures > 0 {
|
|
mmap_count as f64 / (mmap_count + stats.mmap_failures) as f64 * 100.0
|
|
} else {
|
|
0.0
|
|
};
|
|
let total_fps = frames_encoded as f64 / total_elapsed.as_secs_f64();
|
|
let avg_total_ms = FrameStats::avg_ms(&stats.total_us);
|
|
let max_fps = if avg_total_ms > 0.0 {
|
|
1000.0 / avg_total_ms
|
|
} else {
|
|
0.0
|
|
};
|
|
|
|
println!();
|
|
println!("╔══════════════════════════════════════════════════════════════╗");
|
|
println!("║ Software Encode Benchmark Results ║");
|
|
println!("╚══════════════════════════════════════════════════════════════╝");
|
|
println!();
|
|
println!("Capture resolution: {}x{}", src_width, src_height);
|
|
println!("Encode resolution: {}x{}", enc_width, enc_height);
|
|
println!("Frames encoded: {}", frames_encoded);
|
|
println!("Total time: {:.2}s", total_elapsed.as_secs_f64());
|
|
println!();
|
|
println!("mmap (DMA-BUF -> CPU):");
|
|
println!(
|
|
" avg: {:.2} ms/frame",
|
|
FrameStats::avg_ms(&stats.mmap_us)
|
|
);
|
|
println!(
|
|
" success rate: {:.1}% ({}/{})",
|
|
mmap_success_rate,
|
|
mmap_count,
|
|
mmap_count + stats.mmap_failures
|
|
);
|
|
println!();
|
|
println!("scale (BGR0 -> YUV420P via sws_scale):");
|
|
println!(
|
|
" avg: {:.2} ms/frame",
|
|
FrameStats::avg_ms(&stats.scale_us)
|
|
);
|
|
println!();
|
|
println!("encode ({}):", codec_name);
|
|
println!(
|
|
" avg: {:.2} ms/frame",
|
|
FrameStats::avg_ms(&stats.encode_us)
|
|
);
|
|
println!();
|
|
println!("total pipeline:");
|
|
println!(" avg: {:.2} ms/frame", avg_total_ms);
|
|
println!(" achieved FPS: {:.1}", total_fps);
|
|
println!(" max theoretical: {:.1} FPS", max_fps);
|
|
println!();
|
|
|
|
if mmap_success_rate < 100.0 {
|
|
println!(
|
|
"WARNING: Some mmap operations failed ({}/{})",
|
|
stats.mmap_failures,
|
|
stats.mmap_failures + mmap_count
|
|
);
|
|
}
|
|
if total_fps < 30.0 {
|
|
println!(
|
|
"NOTE: Achieved FPS ({:.1}) is below 30 FPS target.",
|
|
total_fps
|
|
);
|
|
}
|
|
|
|
println!("Output written to: {}", bench_args.output);
|
|
Ok(())
|
|
}
|
|
|
|
fn drain_encoder(
|
|
enc_video: &mut ff::encoder::video::Video,
|
|
octx: &mut ff::format::context::Output,
|
|
) -> Result<()> {
|
|
loop {
|
|
let mut pkt = ff::Packet::empty();
|
|
let ret = unsafe { ffi::avcodec_receive_packet(enc_video.as_mut_ptr(), pkt.as_mut_ptr()) };
|
|
if ret < 0 {
|
|
if ret == ffi::AVERROR(ffi::EAGAIN) || ret == ffi::AVERROR_EOF {
|
|
break;
|
|
}
|
|
eprintln!("avcodec_receive_packet failed: {ret}");
|
|
break;
|
|
}
|
|
|
|
let enc_tb = enc_video.time_base();
|
|
let stream_tb = unsafe {
|
|
let streams = (*octx.as_ptr()).streams;
|
|
let st = *streams.add(0);
|
|
ff::Rational::from((*st).time_base)
|
|
};
|
|
pkt.rescale_ts(enc_tb, stream_tb);
|
|
pkt.set_stream(0);
|
|
pkt.write_interleaved(octx)
|
|
.map_err(|e| anyhow::anyhow!("write packet failed: {e}"))?;
|
|
}
|
|
Ok(())
|
|
}
|