// sw_encode_bench.rs — Software encoding pipeline benchmark for screen capture // // Benchmarks: Portal capture -> mmap DMA-BUF -> sws_scale BGR0->YUV420P -> libx264 encode // // Usage: cargo run --bin sw_encode_bench -- --output /tmp/bench_test.mp4 use std::ffi::CString; use std::os::fd::AsRawFd; use std::path::Path; use std::ptr; use std::time::Instant; use anyhow::{bail, Result}; use clap::Parser; use ffmpeg_next as ff; use ffmpeg_next::ffi; use ffmpeg_next::packet::Mut; use wl_webrtc::args::Args; use wl_webrtc::cap_portal::{CapPortal, PwCtrlEvent}; #[derive(Parser, Debug)] #[command( name = "sw_encode_bench", about = "Software encoding pipeline benchmark" )] struct BenchArgs { #[arg(short, long)] output: String, #[arg(long, default_value_t = 120)] frames: u32, #[arg(long, default_value_t = 2560)] enc_width: u32, #[arg(long, default_value_t = 1440)] enc_height: u32, } #[derive(Default)] struct FrameStats { mmap_us: Vec, scale_us: Vec, encode_us: Vec, total_us: Vec, mmap_failures: u32, } impl FrameStats { fn avg_ms(data: &[u64]) -> f64 { if data.is_empty() { return 0.0; } data.iter().sum::() as f64 / data.len() as f64 / 1000.0 } } fn pix_fmt(p: ff::format::Pixel) -> ffi::AVPixelFormat { Into::::into(p) } fn receive_first_frame(cap: &CapPortal) -> Result { loop { if let Ok(ctrl) = cap.event_receiver().try_recv() { match ctrl { PwCtrlEvent::StreamEnded => bail!("PipeWire stream ended before first frame"), PwCtrlEvent::Error(e) => bail!("PipeWire error: {e}"), } } match cap .frame_receiver() .recv_timeout(std::time::Duration::from_secs(10)) { Ok(frame) => return Ok(frame), Err(crossbeam_channel::RecvTimeoutError::Timeout) => { bail!("Timeout waiting for first frame (10s)"); } Err(crossbeam_channel::RecvTimeoutError::Disconnected) => { bail!("PipeWire frame channel disconnected"); } } } } fn main() -> Result<()> { let bench_args = BenchArgs::parse(); println!("=== Software Encode Benchmark ==="); println!("Output: {}", bench_args.output); println!("Target frames: {}", bench_args.frames); println!( "Encode resolution: {}x{}", bench_args.enc_width, bench_args.enc_height ); println!(); ff::init()?; println!("[1/4] Requesting screen capture via XDG Portal..."); println!(" (Select a screen to share in the portal dialog)"); let portal_args = Args { output: bench_args.output.clone(), output_name: None, fps: 60, codec: "h264".to_string(), hw_accel: "vaapi".to_string(), drm_device: None, bitrate: None, gop_size: None, verbose: false, backend: Some("portal".to_string()), port: 0, }; let cap = CapPortal::new(&portal_args)?; println!("[1/4] Portal connected, PipeWire stream active\n"); println!("[2/4] Waiting for first frame from PipeWire..."); let first_frame = receive_first_frame(&cap)?; let src_width = first_frame.width; let src_height = first_frame.height; let src_stride = first_frame.stride; let enc_width = bench_args.enc_width; let enc_height = bench_args.enc_height; println!( "[2/4] First frame: {}x{}, stride={}, format=0x{:08X}", src_width, src_height, src_stride, first_frame.format ); println!( " Capture: {}x{} Encode: {}x{}\n", src_width, src_height, enc_width, enc_height ); println!("[3/4] Testing mmap on DMA-BUF..."); let mmap_size = (src_stride as usize) * (src_height as usize); let mmap_ptr = unsafe { libc::mmap( ptr::null_mut(), mmap_size, libc::PROT_READ, libc::MAP_SHARED, first_frame.fd.as_raw_fd(), first_frame.offset as i64, ) }; if mmap_ptr == libc::MAP_FAILED { let errno = std::io::Error::last_os_error(); bail!( "mmap on DMA-BUF fd FAILED — AMD driver may not support \ CPU read of screen capture DMA-BUF buffers.\n\ Error: {} (errno={})\n\ \n\ Workarounds:\n\ 1. Use VAAPI hardware import (av_hwframe_map) instead of mmap\n\ 2. Use wlroots compositor with wlr-screencopy (SHM-based)\n\ 3. Use a virtual display or software renderer", errno, errno.raw_os_error().unwrap_or(-1) ); } println!( "[3/4] mmap SUCCESS — CPU can read DMA-BUF ({:.1} MB)\n", mmap_size as f64 / 1024.0 / 1024.0 ); unsafe { libc::munmap(mmap_ptr, mmap_size); } drop(first_frame); // Set up libx264 encoder via FFI (same pattern as avhw.rs) println!("[4/4] Setting up libx264 encoder..."); let output_path = Path::new(&bench_args.output); let output_cstr = CString::new(output_path.to_str().unwrap())?; // Try libx264 first (best quality/speed), fall back to openh264 let codec = ff::encoder::find_by_name("libx264") .or_else(|| ff::encoder::find_by_name("libopenh264")) .ok_or_else(|| { anyhow::anyhow!("No H.264 software encoder found (tried libx264, libopenh264)") })?; println!("[4/4] Using encoder: {}\n", codec.name()); let mut enc = { let ctx = ff::codec::Context::new_with_codec(codec); ctx.encoder().video()? }; enc.set_width(enc_width); enc.set_height(enc_height); enc.set_format(ff::format::Pixel::YUV420P); enc.set_time_base(ff::Rational::new(1, 60)); enc.set_max_b_frames(0); enc.set_gop(60); let codec_name = codec.name(); if codec_name == "libx264" { unsafe { let key = CString::new("preset").unwrap(); let val = CString::new("veryfast").unwrap(); ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); let key = CString::new("tune").unwrap(); let val = CString::new("zerolatency").unwrap(); ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); } } let opened = enc.open()?; let mut enc_video = opened.0; // Create output format context via FFI let mut fmt_ctx_ptr: *mut ffi::AVFormatContext = ptr::null_mut(); let ret = unsafe { ffi::avformat_alloc_output_context2( &mut fmt_ctx_ptr, ptr::null_mut(), ptr::null(), output_cstr.as_ptr(), ) }; if ret < 0 || fmt_ctx_ptr.is_null() { bail!("Failed to allocate output format context: error {ret}"); } let stream_ptr = unsafe { ffi::avformat_new_stream(fmt_ctx_ptr, ptr::null()) }; if stream_ptr.is_null() { bail!("Failed to create new stream"); } let ret = unsafe { ffi::avcodec_parameters_from_context((*stream_ptr).codecpar, enc_video.as_ptr()) }; if ret < 0 { bail!("Failed to copy encoder parameters: error {ret}"); } unsafe { (*stream_ptr).time_base = (*enc_video.as_ptr()).time_base; } let ret = unsafe { ffi::avio_open( &mut (*fmt_ctx_ptr).pb, output_cstr.as_ptr(), ffi::AVIO_FLAG_WRITE, ) }; if ret < 0 { bail!( "Failed to open output file '{}': error {ret}", output_path.display() ); } let ret = unsafe { ffi::avformat_write_header(fmt_ctx_ptr, ptr::null_mut()) }; if ret < 0 { bail!("Failed to write header: error {ret}"); } let mut octx = unsafe { ff::format::context::Output::wrap(fmt_ctx_ptr) }; // Create sws_scale context: BGRZ (BGR0) -> YUV420P let bgr0_fmt = pix_fmt(ff::format::Pixel::BGRZ); let yuv420p_fmt = pix_fmt(ff::format::Pixel::YUV420P); let sws_ctx = unsafe { ffi::sws_getContext( src_width as i32, src_height as i32, bgr0_fmt, enc_width as i32, enc_height as i32, yuv420p_fmt, 2, ptr::null_mut(), ptr::null_mut(), ptr::null_mut(), ) }; if sws_ctx.is_null() { bail!("Failed to create sws_scale context"); } // Allocate reusable YUV frame let mut yuv_frame = unsafe { let mut f = ffi::av_frame_alloc(); if f.is_null() { bail!("av_frame_alloc failed"); } (*f).width = enc_width as i32; (*f).height = enc_height as i32; (*f).format = yuv420p_fmt as i32; let ret = ffi::av_frame_get_buffer(f, 0); if ret < 0 { ffi::av_frame_free(&mut f); bail!("av_frame_get_buffer failed: {ret}"); } f }; println!( "[4/4] Encoder ready: {}, {}x{}\n", codec_name, enc_width, enc_height ); println!("=== Encoding {} frames ===\n", bench_args.frames); let mut stats = FrameStats::default(); let total_start = Instant::now(); let mut frames_encoded: u32 = 0; let mut pts: i64 = 0; while frames_encoded < bench_args.frames { if let Ok(ctrl) = cap.event_receiver().try_recv() { match ctrl { PwCtrlEvent::StreamEnded => { eprintln!("PipeWire stream ended after {} frames", frames_encoded); break; } PwCtrlEvent::Error(e) => { eprintln!("PipeWire error after {} frames: {}", frames_encoded, e); break; } } } let frame = match cap .frame_receiver() .recv_timeout(std::time::Duration::from_secs(5)) { Ok(f) => f, Err(_) => { eprintln!("Frame timeout/disconnect after {} frames", frames_encoded); break; } }; let frame_start = Instant::now(); let mmap_start = Instant::now(); let frame_size = (frame.stride as usize) * (frame.height as usize); let mmap_ptr = unsafe { libc::mmap( ptr::null_mut(), frame_size, libc::PROT_READ, libc::MAP_SHARED, frame.fd.as_raw_fd(), frame.offset as i64, ) }; if mmap_ptr == libc::MAP_FAILED { stats.mmap_failures += 1; eprintln!("mmap failed on frame {}", frames_encoded); drop(frame); continue; } stats.mmap_us.push(mmap_start.elapsed().as_micros() as u64); let scale_start = Instant::now(); let src_data = unsafe { std::slice::from_raw_parts(mmap_ptr as *const u8, frame_size) }; unsafe { ffi::av_frame_make_writable(yuv_frame); let src_ptr = src_data.as_ptr(); let src_linesize = frame.stride as i32; ffi::sws_scale( sws_ctx, &src_ptr as *const *const u8, &src_linesize as *const i32, 0, frame.height as i32, (*yuv_frame).data.as_ptr() as *mut *mut u8, (*yuv_frame).linesize.as_ptr() as *mut i32, ); } stats .scale_us .push(scale_start.elapsed().as_micros() as u64); unsafe { libc::munmap(mmap_ptr, frame_size); } drop(frame); let encode_start = Instant::now(); unsafe { (*yuv_frame).pts = pts; pts += 1; let ret = ffi::avcodec_send_frame(enc_video.as_mut_ptr(), yuv_frame); if ret < 0 { eprintln!("avcodec_send_frame failed: {ret}"); continue; } } drain_encoder(&mut enc_video, &mut octx)?; stats .encode_us .push(encode_start.elapsed().as_micros() as u64); stats .total_us .push(frame_start.elapsed().as_micros() as u64); frames_encoded += 1; if frames_encoded % 30 == 0 { let fps = frames_encoded as f64 / total_start.elapsed().as_secs_f64(); println!( " [{}/{}] {:.1} FPS", frames_encoded, bench_args.frames, fps ); } } let total_elapsed = total_start.elapsed(); println!("\nFlushing encoder..."); unsafe { ffi::avcodec_send_frame(enc_video.as_mut_ptr(), ptr::null()); } drain_encoder(&mut enc_video, &mut octx)?; octx.write_trailer() .map_err(|e| anyhow::anyhow!("Failed to write trailer: {e}"))?; // Cleanup unsafe { ffi::av_frame_free(&mut yuv_frame as *mut _); ffi::sws_freeContext(sws_ctx); } drop(cap); // Print results let mmap_count = stats.mmap_us.len() as u32; let mmap_success_rate = if mmap_count + stats.mmap_failures > 0 { mmap_count as f64 / (mmap_count + stats.mmap_failures) as f64 * 100.0 } else { 0.0 }; let total_fps = frames_encoded as f64 / total_elapsed.as_secs_f64(); let avg_total_ms = FrameStats::avg_ms(&stats.total_us); let max_fps = if avg_total_ms > 0.0 { 1000.0 / avg_total_ms } else { 0.0 }; println!(); println!("╔══════════════════════════════════════════════════════════════╗"); println!("║ Software Encode Benchmark Results ║"); println!("╚══════════════════════════════════════════════════════════════╝"); println!(); println!("Capture resolution: {}x{}", src_width, src_height); println!("Encode resolution: {}x{}", enc_width, enc_height); println!("Frames encoded: {}", frames_encoded); println!("Total time: {:.2}s", total_elapsed.as_secs_f64()); println!(); println!("mmap (DMA-BUF -> CPU):"); println!( " avg: {:.2} ms/frame", FrameStats::avg_ms(&stats.mmap_us) ); println!( " success rate: {:.1}% ({}/{})", mmap_success_rate, mmap_count, mmap_count + stats.mmap_failures ); println!(); println!("scale (BGR0 -> YUV420P via sws_scale):"); println!( " avg: {:.2} ms/frame", FrameStats::avg_ms(&stats.scale_us) ); println!(); println!("encode ({}):", codec_name); println!( " avg: {:.2} ms/frame", FrameStats::avg_ms(&stats.encode_us) ); println!(); println!("total pipeline:"); println!(" avg: {:.2} ms/frame", avg_total_ms); println!(" achieved FPS: {:.1}", total_fps); println!(" max theoretical: {:.1} FPS", max_fps); println!(); if mmap_success_rate < 100.0 { println!( "WARNING: Some mmap operations failed ({}/{})", stats.mmap_failures, stats.mmap_failures + mmap_count ); } if total_fps < 30.0 { println!( "NOTE: Achieved FPS ({:.1}) is below 30 FPS target.", total_fps ); } println!("Output written to: {}", bench_args.output); Ok(()) } fn drain_encoder( enc_video: &mut ff::encoder::video::Video, octx: &mut ff::format::context::Output, ) -> Result<()> { loop { let mut pkt = ff::Packet::empty(); let ret = unsafe { ffi::avcodec_receive_packet(enc_video.as_mut_ptr(), pkt.as_mut_ptr()) }; if ret < 0 { if ret == ffi::AVERROR(ffi::EAGAIN) || ret == ffi::AVERROR_EOF { break; } eprintln!("avcodec_receive_packet failed: {ret}"); break; } let enc_tb = enc_video.time_base(); let stream_tb = unsafe { let streams = (*octx.as_ptr()).streams; let st = *streams.add(0); ff::Rational::from((*st).time_base) }; pkt.rescale_ts(enc_tb, stream_tb); pkt.set_stream(0); pkt.write_interleaved(octx) .map_err(|e| anyhow::anyhow!("write packet failed: {e}"))?; } Ok(()) }