// vaapi_import_bench.rs — VAAPI DMA-BUF import + GPU-side downscale benchmark // // Tests: Portal capture -> av_hwframe_map (ARGB sw_format) -> transfer -> sw encode // // Usage: cargo run --bin vaapi_import_bench -- --output /tmp/vaapi_bench.mp4 use std::ffi::CString; use std::os::fd::AsRawFd; use std::path::Path; use std::ptr; use std::time::Instant; use anyhow::{bail, Result}; use clap::{Parser, ValueEnum}; use ffmpeg_next as ff; use ffmpeg_next::ffi; use ffmpeg_next::packet::Mut; use wl_webrtc::args::Args; use wl_webrtc::avhw::{import_dma_buf_to_vaapi, AvHwDevCtx, AvHwFrameCtx}; use wl_webrtc::cap_portal::{CapPortal, PwCtrlEvent}; #[derive(Parser, Debug)] #[command(name = "vaapi_import_bench", about = "VAAPI DMA-BUF import benchmark")] struct BenchArgs { #[arg(short, long)] output: String, #[arg(long, default_value_t = 60)] frames: u32, #[arg(long, default_value_t = 2560)] enc_width: u32, #[arg(long, default_value_t = 1440)] enc_height: u32, #[arg(long, default_value = "/dev/dri/renderD128")] drm_device: String, #[arg(long, value_enum, default_value_t = PipelineMode::Both)] mode: PipelineMode, } #[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] enum PipelineMode { Cpu, Gpu, Both, } #[derive(Default)] struct FrameStats { import_us: Vec, filter_us: Vec, transfer_us: Vec, scale_us: Vec, format_us: Vec, encode_us: Vec, total_us: Vec, import_failures: u32, frames_encoded: u32, elapsed_secs: f64, codec_name: String, output_path: String, } impl FrameStats { fn avg_ms(data: &[u64]) -> f64 { if data.is_empty() { return 0.0; } data.iter().sum::() as f64 / data.len() as f64 / 1000.0 } fn avg_total_ms(&self) -> f64 { Self::avg_ms(&self.total_us) } fn achieved_fps(&self) -> f64 { if self.frames_encoded > 0 && self.elapsed_secs > 0.0 { self.frames_encoded as f64 / self.elapsed_secs } else { 0.0 } } fn theoretical_fps(&self) -> f64 { let avg = self.avg_total_ms(); if avg > 0.0 { 1000.0 / avg } else { 0.0 } } } struct SoftwareEncoder { enc_video: ff::codec::encoder::video::Video, octx: ff::format::context::Output, yuv_frame: *mut ffi::AVFrame, codec_name: String, } impl Drop for SoftwareEncoder { fn drop(&mut self) { // SAFETY: yuv_frame is allocated by av_frame_alloc in create_software_encoder and // owned exclusively by this SoftwareEncoder. unsafe { ffi::av_frame_free(&mut self.yuv_frame); } } } struct SwsContext(*mut ffi::SwsContext); impl Drop for SwsContext { fn drop(&mut self) { // SAFETY: Context is either null or returned by sws_getContext and owned here. unsafe { ffi::sws_freeContext(self.0); } } } fn av_err_to_string(ret: i32) -> String { let mut buf = vec![0u8; 128]; unsafe { ffi::av_strerror(ret, buf.as_mut_ptr() as *mut i8, buf.len()); } let end = buf.iter().position(|&b| b == 0).unwrap_or(buf.len()); String::from_utf8_lossy(&buf[..end]).to_string() } fn receive_first_frame(cap: &CapPortal) -> Result { loop { if let Ok(ctrl) = cap.event_receiver().try_recv() { match ctrl { PwCtrlEvent::StreamEnded => bail!("PipeWire stream ended before first frame"), PwCtrlEvent::Error(e) => bail!("PipeWire error: {e}"), } } match cap .frame_receiver() .recv_timeout(std::time::Duration::from_secs(10)) { Ok(frame) => return Ok(frame), Err(crossbeam_channel::RecvTimeoutError::Timeout) => { bail!("Timeout waiting for first frame (10s)"); } Err(crossbeam_channel::RecvTimeoutError::Disconnected) => { bail!("PipeWire frame channel disconnected"); } } } } fn drain_encoder( enc_video: &mut ff::codec::encoder::video::Video, octx: &mut ff::format::context::Output, ) -> Result<()> { loop { let mut pkt = ff::Packet::empty(); let ret = unsafe { ffi::avcodec_receive_packet(enc_video.as_mut_ptr(), pkt.as_mut_ptr()) }; if ret < 0 { if ret == ffi::AVERROR(ffi::EAGAIN) || ret == ffi::AVERROR_EOF { break; } eprintln!("avcodec_receive_packet failed: {ret}"); break; } let enc_tb = enc_video.time_base(); let stream_tb = unsafe { let streams = (*octx.as_ptr()).streams; let st = *streams.add(0); ff::Rational::from((*st).time_base) }; pkt.rescale_ts(enc_tb, stream_tb); pkt.set_stream(0); pkt.write_interleaved(octx) .map_err(|e| anyhow::anyhow!("write packet failed: {e}"))?; } Ok(()) } fn create_software_encoder(output_path: &Path, width: u32, height: u32) -> Result { let output_cstr = CString::new(output_path.to_str().unwrap())?; let codec = ff::encoder::find_by_name("libx264") .or_else(|| ff::encoder::find_by_name("libopenh264")) .ok_or_else(|| { anyhow::anyhow!("No H.264 software encoder found (tried libx264, libopenh264)") })?; let codec_name = codec.name().to_string(); let mut enc = { let ctx = ff::codec::Context::new_with_codec(codec); ctx.encoder().video()? }; enc.set_width(width); enc.set_height(height); enc.set_format(ff::format::Pixel::YUV420P); enc.set_time_base(ff::Rational::new(1, 60)); enc.set_max_b_frames(0); enc.set_gop(60); if codec_name == "libx264" { // SAFETY: priv_data belongs to the not-yet-opened encoder context. Option strings are // valid NUL-terminated C strings for the duration of each av_opt_set call. unsafe { let key = CString::new("preset").unwrap(); let val = CString::new("veryfast").unwrap(); ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); let key = CString::new("tune").unwrap(); let val = CString::new("zerolatency").unwrap(); ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); } } let opened = enc.open()?; let enc_video = opened.0; let use_null_muxer = output_path .to_str() .map(|s| s.contains("null")) .unwrap_or(false); let fmt_name = if use_null_muxer { CString::new("null").unwrap() } else { CString::new("").unwrap() }; let fmt_name_ptr = if use_null_muxer { fmt_name.as_ptr() } else { ptr::null() }; let mut fmt_ctx_ptr: *mut ffi::AVFormatContext = ptr::null_mut(); // SAFETY: fmt_ctx_ptr is an out pointer initialized by FFmpeg; output_cstr and fmt_name live // across the call. let ret = unsafe { ffi::avformat_alloc_output_context2( &mut fmt_ctx_ptr, ptr::null_mut(), fmt_name_ptr, output_cstr.as_ptr(), ) }; if ret < 0 || fmt_ctx_ptr.is_null() { bail!("Failed to allocate output format context: error {ret}"); } // SAFETY: fmt_ctx_ptr is a valid output context allocated above. let stream_ptr = unsafe { ffi::avformat_new_stream(fmt_ctx_ptr, ptr::null()) }; if stream_ptr.is_null() { bail!("Failed to create output stream"); } // SAFETY: stream and codec context pointers are valid; parameters are copied into stream. let ret = unsafe { ffi::avcodec_parameters_from_context((*stream_ptr).codecpar, enc_video.as_ptr()) }; if ret < 0 { bail!("Failed to copy codec parameters: error {ret}"); } // SAFETY: fmt_ctx_ptr is valid; pb is initialized for non-NOFILE muxers. unsafe { if (*(*fmt_ctx_ptr).oformat).flags & ffi::AVFMT_NOFILE == 0 { let ret = ffi::avio_open( &mut (*fmt_ctx_ptr).pb, output_cstr.as_ptr(), ffi::AVIO_FLAG_WRITE, ); if ret < 0 { bail!("Failed to open output file: error {ret}"); } } } // SAFETY: fmt_ctx_ptr is a fully configured output context. let ret = unsafe { ffi::avformat_write_header(fmt_ctx_ptr, ptr::null_mut()) }; if ret < 0 { bail!("Failed to write header: error {ret}"); } // SAFETY: ownership of fmt_ctx_ptr transfers into ffmpeg-next Output wrapper. let octx = unsafe { ff::format::context::Output::wrap(fmt_ctx_ptr) }; // SAFETY: Allocate and configure an owned writable YUV420P frame for encoder input. let yuv_frame = unsafe { let mut f = ffi::av_frame_alloc(); if f.is_null() { bail!("av_frame_alloc failed"); } (*f).width = width as i32; (*f).height = height as i32; (*f).format = ffi::AVPixelFormat::AV_PIX_FMT_YUV420P as i32; let r = ffi::av_frame_get_buffer(f, 0); if r < 0 { ffi::av_frame_free(&mut f); bail!("av_frame_get_buffer failed: {r}"); } f }; Ok(SoftwareEncoder { enc_video, octx, yuv_frame, codec_name, }) } fn output_for_mode(base: &str, mode: PipelineMode, split: bool) -> String { if !split || base.contains("null") { return base.to_string(); } let path = Path::new(base); let suffix = match mode { PipelineMode::Cpu => "cpu", PipelineMode::Gpu => "gpu", PipelineMode::Both => unreachable!(), }; let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or(base); let split_name = if let Some((stem, ext)) = file_name.rsplit_once('.') { format!("{stem}.{suffix}.{ext}") } else { format!("{file_name}.{suffix}") }; path.with_file_name(split_name) .to_string_lossy() .into_owned() } fn create_sws_context( src_width: u32, src_height: u32, src_fmt: ffi::AVPixelFormat, dst_width: u32, dst_height: u32, ) -> Result { // SAFETY: sws_getContext creates an owned scaler context for the provided dimensions/formats. let ctx = unsafe { ffi::sws_getContext( src_width as i32, src_height as i32, src_fmt, dst_width as i32, dst_height as i32, ffi::AVPixelFormat::AV_PIX_FMT_YUV420P, 2, ptr::null_mut(), ptr::null_mut(), ptr::null_mut(), ) }; if ctx.is_null() { bail!("Failed to create sws_scale context"); } Ok(SwsContext(ctx)) } fn encode_yuv_frame(encoder: &mut SoftwareEncoder, pts: &mut i64) -> Result { let t_encode = Instant::now(); // SAFETY: yuv_frame is allocated, writable, and formatted as the encoder's configured // YUV420P input frame. FFmpeg consumes but does not take ownership. unsafe { (*encoder.yuv_frame).pts = *pts; *pts += 1; let r = ffi::avcodec_send_frame(encoder.enc_video.as_mut_ptr(), encoder.yuv_frame); if r < 0 { bail!("avcodec_send_frame failed: {r}"); } } drain_encoder(&mut encoder.enc_video, &mut encoder.octx)?; Ok(t_encode.elapsed().as_micros() as u64) } fn finish_encoder(mut encoder: SoftwareEncoder) -> Result<()> { // SAFETY: Sending a null frame flushes the encoder; context remains owned by encoder. unsafe { ffi::avcodec_send_frame(encoder.enc_video.as_mut_ptr(), ptr::null()); } drain_encoder(&mut encoder.enc_video, &mut encoder.octx)?; encoder .octx .write_trailer() .map_err(|e| anyhow::anyhow!("Failed to write trailer: {e}"))?; Ok(()) } fn import_frame( frames_ctx: &AvHwFrameCtx, frame: &wl_webrtc::cap_portal::PwDmaBufFrame, ) -> Result { // SAFETY: frames_ctx is a live VAAPI frames context configured for the capture format; frame // carries a valid DMA-BUF fd and metadata from PipeWire for the duration of the call. unsafe { import_dma_buf_to_vaapi( frames_ctx.as_ptr(), frame.fd.as_raw_fd(), frame.width, frame.height, frame.format, frame.modifier, frame.stride, frame.offset, ) } } fn build_gpu_filter_graph( hw_dev: &AvHwDevCtx, frames_rgb: &AvHwFrameCtx, width: u32, height: u32, enc_width: u32, enc_height: u32, ) -> Result { let mut graph = ff::filter::Graph::new(); let buffersrc = ff::filter::find("buffer").ok_or_else(|| anyhow::anyhow!("filter 'buffer' not found"))?; let buffersink = ff::filter::find("buffersink") .ok_or_else(|| anyhow::anyhow!("filter 'buffersink' not found"))?; let scale_vaapi = ff::filter::find("scale_vaapi") .ok_or_else(|| anyhow::anyhow!("filter 'scale_vaapi' not found"))?; // pix_fmt must be set via av_buffersrc_parameters_set (below), not in args — // FFmpeg 8.0+ rejects HW pixel formats during init() if hw_frames_ctx is missing. // Use a placeholder SW format here; it gets overridden by parameters_set below. let args = format!( "video_size={}x{}:pix_fmt=bgra:time_base=1/60:pixel_aspect=1/1", width, height, ); let mut src_ctx = graph.add(&buffersrc, "in", &args)?; // SAFETY: Allocate buffersrc parameters, attach a ref-counted hw_frames_ctx compatible with // imported VAAPI BGRA frames, apply it, then free only the parameter struct (not the ref). let par = unsafe { ffi::av_buffersrc_parameters_alloc() }; if par.is_null() { bail!("av_buffersrc_parameters_alloc returned null"); } // SAFETY: par and src_ctx are valid; frames_rgb.ref_clone returns an owned AVBufferRef. unsafe { (*par).format = Into::::into(ff::format::Pixel::VAAPI) as i32; (*par).width = width as i32; (*par).height = height as i32; (*par).time_base = ffi::AVRational { num: 1, den: 60 }; (*par).hw_frames_ctx = frames_rgb.ref_clone(); let ret = ffi::av_buffersrc_parameters_set(src_ctx.as_mut_ptr(), par); ffi::av_free(par as *mut _); if ret < 0 { bail!("av_buffersrc_parameters_set failed: error {ret}"); } } let mut scale_ctx = graph.add( &scale_vaapi, "scale", &format!("{enc_width}:{enc_height}:format=nv12"), )?; // SAFETY: scale_vaapi uses this ref-counted VAAPI device context while graph is alive. unsafe { (*scale_ctx.as_mut_ptr()).hw_device_ctx = hw_dev.ref_clone(); } let mut sink_ctx = graph.add(&buffersink, "out", "")?; src_ctx.link(0, &mut scale_ctx, 0); scale_ctx.link(0, &mut sink_ctx, 0); graph .validate() .map_err(|e| anyhow::anyhow!("GPU filter graph validation failed: {e}"))?; Ok(graph) } #[allow(clippy::too_many_arguments)] fn run_cpu_pipeline( cap: &CapPortal, frames_ctx: &AvHwFrameCtx, output: &str, frames: u32, src_width: u32, src_height: u32, enc_width: u32, enc_height: u32, ) -> Result { let mut encoder = create_software_encoder(Path::new(output), enc_width, enc_height)?; let sws_ctx = create_sws_context( src_width, src_height, ffi::AVPixelFormat::AV_PIX_FMT_BGRA, enc_width, enc_height, )?; println!( " Encoder: {}, {}x{} YUV420P", encoder.codec_name, enc_width, enc_height ); println!(" Output: {output}"); println!(" CPU Pipeline: DMA-BUF 4K BGRA -> av_hwframe_map -> av_hwframe_transfer_data -> sws_scale -> YUV420P 2K -> encode\n"); let mut stats = FrameStats { codec_name: encoder.codec_name.clone(), output_path: output.to_string(), ..FrameStats::default() }; let total_start = Instant::now(); let mut pts: i64 = 0; while stats.frames_encoded < frames { if let Ok(ctrl) = cap.event_receiver().try_recv() { match ctrl { PwCtrlEvent::StreamEnded => break, PwCtrlEvent::Error(e) => bail!( "PipeWire error after {} CPU frames: {e}", stats.frames_encoded ), } } let frame = match cap .frame_receiver() .recv_timeout(std::time::Duration::from_secs(5)) { Ok(f) => f, Err(_) => break, }; let frame_start = Instant::now(); let t_import = Instant::now(); let vaapi_frame = match import_frame(frames_ctx, &frame) { Ok(f) => f, Err(e) => { stats.import_failures += 1; if stats.import_failures <= 3 { eprintln!("CPU frame {}: import failed: {e}", stats.frames_encoded); } continue; } }; let import_us = t_import.elapsed().as_micros() as u64; let t_transfer = Instant::now(); // SAFETY: sw_frame is allocated by FFmpeg and freed on all paths below. let mut sw_frame = unsafe { ffi::av_frame_alloc() }; if sw_frame.is_null() { bail!("CPU frame {}: av_frame_alloc failed", stats.frames_encoded); } // SAFETY: sw_frame is an allocated destination; vaapi_frame is a valid VAAPI source frame. let transfer_ret = unsafe { ffi::av_hwframe_transfer_data(sw_frame, vaapi_frame.as_ptr(), 0) }; if transfer_ret < 0 { // SAFETY: sw_frame was allocated above and has not been freed yet. unsafe { ffi::av_frame_free(&mut sw_frame) }; bail!( "CPU frame {}: av_hwframe_transfer_data failed: {} ({})", stats.frames_encoded, transfer_ret, av_err_to_string(transfer_ret) ); } let transfer_us = t_transfer.elapsed().as_micros() as u64; let t_scale = Instant::now(); // SAFETY: sw_frame contains transferred BGRA data; encoder.yuv_frame is writable YUV420P // at the configured output dimensions; sws_ctx converts and downscales between them. unsafe { ffi::av_frame_make_writable(encoder.yuv_frame); ffi::sws_scale( sws_ctx.0, (*sw_frame).data.as_ptr() as *const *const u8, (*sw_frame).linesize.as_ptr() as *const i32, 0, (*sw_frame).height, (*encoder.yuv_frame).data.as_ptr() as *mut *mut u8, (*encoder.yuv_frame).linesize.as_ptr() as *const i32, ); } let scale_us = t_scale.elapsed().as_micros() as u64; // SAFETY: sw_frame was allocated above and is no longer needed after scaling. unsafe { ffi::av_frame_free(&mut sw_frame) }; let encode_us = encode_yuv_frame(&mut encoder, &mut pts)?; let total_us = frame_start.elapsed().as_micros() as u64; stats.import_us.push(import_us); stats.transfer_us.push(transfer_us); stats.scale_us.push(scale_us); stats.encode_us.push(encode_us); stats.total_us.push(total_us); stats.frames_encoded += 1; if stats.frames_encoded <= 3 || stats.frames_encoded % 30 == 0 { println!( " CPU frame {:>4}/{frames}: import={:.2}ms transfer={:.2}ms scale={:.2}ms encode={:.2}ms total={:.2}ms", stats.frames_encoded, import_us as f64 / 1000.0, transfer_us as f64 / 1000.0, scale_us as f64 / 1000.0, encode_us as f64 / 1000.0, total_us as f64 / 1000.0, ); } } finish_encoder(encoder)?; stats.elapsed_secs = total_start.elapsed().as_secs_f64(); Ok(stats) } #[allow(clippy::too_many_arguments)] fn run_gpu_pipeline( cap: &CapPortal, hw_dev: &AvHwDevCtx, frames_ctx: &AvHwFrameCtx, output: &str, frames: u32, src_width: u32, src_height: u32, enc_width: u32, enc_height: u32, ) -> Result { let mut encoder = create_software_encoder(Path::new(output), enc_width, enc_height)?; let format_ctx = create_sws_context( enc_width, enc_height, ffi::AVPixelFormat::AV_PIX_FMT_NV12, enc_width, enc_height, )?; let mut graph = build_gpu_filter_graph( hw_dev, frames_ctx, src_width, src_height, enc_width, enc_height, )?; println!( " Encoder: {}, {}x{} YUV420P", encoder.codec_name, enc_width, enc_height ); println!(" Output: {output}"); println!(" GPU Pipeline: DMA-BUF 4K BGRA -> av_hwframe_map -> scale_vaapi 2K NV12 -> transfer small NV12 -> sws_scale format-only -> encode\n"); let mut stats = FrameStats { codec_name: encoder.codec_name.clone(), output_path: output.to_string(), ..FrameStats::default() }; let total_start = Instant::now(); let mut pts: i64 = 0; while stats.frames_encoded < frames { if let Ok(ctrl) = cap.event_receiver().try_recv() { match ctrl { PwCtrlEvent::StreamEnded => break, PwCtrlEvent::Error(e) => bail!( "PipeWire error after {} GPU frames: {e}", stats.frames_encoded ), } } let frame = match cap .frame_receiver() .recv_timeout(std::time::Duration::from_secs(5)) { Ok(f) => f, Err(_) => break, }; let frame_start = Instant::now(); let t_import = Instant::now(); let vaapi_frame = match import_frame(frames_ctx, &frame) { Ok(f) => f, Err(e) => { stats.import_failures += 1; if stats.import_failures <= 3 { eprintln!("GPU frame {}: import failed: {e}", stats.frames_encoded); } continue; } }; let import_us = t_import.elapsed().as_micros() as u64; let t_filter = Instant::now(); let mut filter_src_ctx = graph.get("in").unwrap(); let mut filter_src = filter_src_ctx.source(); let mut filter_sink_ctx = graph.get("out").unwrap(); let mut filter_sink = filter_sink_ctx.sink(); filter_src .add(&vaapi_frame) .map_err(|e| anyhow::anyhow!("GPU filter source add failed: {e}"))?; let mut filtered = ff::frame::Video::empty(); match filter_sink.frame(&mut filtered) { Ok(()) => {} Err(ff::Error::Other { errno }) if errno == ffi::EAGAIN => continue, Err(e) => bail!("GPU filter sink get frame failed: {e}"), } let filter_us = t_filter.elapsed().as_micros() as u64; let t_transfer = Instant::now(); // SAFETY: sw_nv12 is allocated by FFmpeg and freed after format conversion. let mut sw_nv12 = unsafe { ffi::av_frame_alloc() }; if sw_nv12.is_null() { bail!("GPU frame {}: av_frame_alloc failed", stats.frames_encoded); } // SAFETY: sw_nv12 is an allocated destination; filtered is a valid 2K NV12 VAAPI frame. let transfer_ret = unsafe { ffi::av_hwframe_transfer_data(sw_nv12, filtered.as_ptr(), 0) }; if transfer_ret < 0 { // SAFETY: sw_nv12 was allocated above and has not been freed yet. unsafe { ffi::av_frame_free(&mut sw_nv12) }; bail!( "GPU frame {}: av_hwframe_transfer_data failed: {} ({})", stats.frames_encoded, transfer_ret, av_err_to_string(transfer_ret) ); } let transfer_us = t_transfer.elapsed().as_micros() as u64; let t_format = Instant::now(); // SAFETY: sw_nv12 contains CPU-side NV12 at enc dimensions; encoder.yuv_frame is writable // YUV420P at the same dimensions, so sws_scale performs only chroma deinterleave/format conversion. unsafe { ffi::av_frame_make_writable(encoder.yuv_frame); ffi::sws_scale( format_ctx.0, (*sw_nv12).data.as_ptr() as *const *const u8, (*sw_nv12).linesize.as_ptr() as *const i32, 0, (*sw_nv12).height, (*encoder.yuv_frame).data.as_ptr() as *mut *mut u8, (*encoder.yuv_frame).linesize.as_ptr() as *const i32, ); } let format_us = t_format.elapsed().as_micros() as u64; // SAFETY: sw_nv12 was allocated above and is no longer needed. unsafe { ffi::av_frame_free(&mut sw_nv12) }; let encode_us = encode_yuv_frame(&mut encoder, &mut pts)?; let total_us = frame_start.elapsed().as_micros() as u64; stats.import_us.push(import_us); stats.filter_us.push(filter_us); stats.transfer_us.push(transfer_us); stats.format_us.push(format_us); stats.encode_us.push(encode_us); stats.total_us.push(total_us); stats.frames_encoded += 1; if stats.frames_encoded <= 3 || stats.frames_encoded % 30 == 0 { println!( " GPU frame {:>4}/{frames}: import={:.2}ms filter={:.2}ms transfer={:.2}ms format={:.2}ms encode={:.2}ms total={:.2}ms", stats.frames_encoded, import_us as f64 / 1000.0, filter_us as f64 / 1000.0, transfer_us as f64 / 1000.0, format_us as f64 / 1000.0, encode_us as f64 / 1000.0, total_us as f64 / 1000.0, ); } } finish_encoder(encoder)?; stats.elapsed_secs = total_start.elapsed().as_secs_f64(); Ok(stats) } fn print_detailed_results( label: &str, stats: &FrameStats, src_width: u32, src_height: u32, enc_width: u32, enc_height: u32, ) { println!(); println!("=== {label} Pipeline Results ==="); println!("Capture resolution: {}x{}", src_width, src_height); println!("Encode resolution: {}x{}", enc_width, enc_height); println!("Frames encoded: {}", stats.frames_encoded); println!("Total time: {:.2}s", stats.elapsed_secs); println!("Output: {}", stats.output_path); if stats.import_failures > 0 { println!("Import failures: {}", stats.import_failures); } println!( "import avg: {:.2} ms/frame", FrameStats::avg_ms(&stats.import_us) ); if !stats.filter_us.is_empty() { println!( "filter avg: {:.2} ms/frame", FrameStats::avg_ms(&stats.filter_us) ); } println!( "transfer avg: {:.2} ms/frame", FrameStats::avg_ms(&stats.transfer_us) ); if !stats.scale_us.is_empty() { println!( "scale avg: {:.2} ms/frame", FrameStats::avg_ms(&stats.scale_us) ); } if !stats.format_us.is_empty() { println!( "format avg: {:.2} ms/frame", FrameStats::avg_ms(&stats.format_us) ); } println!( "encode ({}): {:.2} ms/frame", stats.codec_name, FrameStats::avg_ms(&stats.encode_us) ); println!("total avg: {:.2} ms/frame", stats.avg_total_ms()); println!("achieved FPS: {:.1}", stats.achieved_fps()); println!("max theoretical: {:.1} FPS", stats.theoretical_fps()); } fn print_comparison(cpu: Option<&FrameStats>, gpu: Option<&FrameStats>) { println!(); println!("=== Pipeline Comparison ==="); if let Some(s) = cpu { println!( "CPU: import={:.2}ms transfer={:.2}ms scale={:.2}ms encode={:.2}ms total={:.2}ms ({:.1} FPS)", FrameStats::avg_ms(&s.import_us), FrameStats::avg_ms(&s.transfer_us), FrameStats::avg_ms(&s.scale_us), FrameStats::avg_ms(&s.encode_us), s.avg_total_ms(), s.theoretical_fps(), ); } if let Some(s) = gpu { println!( "GPU: import={:.2}ms filter={:.2}ms transfer={:.2}ms format={:.2}ms encode={:.2}ms total={:.2}ms ({:.1} FPS)", FrameStats::avg_ms(&s.import_us), FrameStats::avg_ms(&s.filter_us), FrameStats::avg_ms(&s.transfer_us), FrameStats::avg_ms(&s.format_us), FrameStats::avg_ms(&s.encode_us), s.avg_total_ms(), s.theoretical_fps(), ); } } fn main() -> Result<()> { let bench_args = BenchArgs::parse(); println!("=== VAAPI Import Benchmark ==="); println!("Output: {}", bench_args.output); println!("Target frames: {}", bench_args.frames); println!( "Encode resolution: {}x{}", bench_args.enc_width, bench_args.enc_height ); println!("DRM device: {}", bench_args.drm_device); println!(); ff::init()?; println!("[1/3] Requesting screen capture via XDG Portal..."); println!(" (Select a screen to share in the portal dialog)"); let portal_args = Args { output: Some(bench_args.output.clone()), output_name: None, fps: 60, codec: "h264".to_string(), hw_accel: "vaapi".to_string(), drm_device: None, bitrate: None, gop_size: None, verbose: false, backend: Some("portal".to_string()), port: 0, no_persist: false, }; let cap = CapPortal::new(&portal_args)?; println!("[1/3] Portal connected, PipeWire stream active\n"); println!("[2/3] Waiting for first frame from PipeWire..."); let first_frame = receive_first_frame(&cap)?; let src_width = first_frame.width; let src_height = first_frame.height; let src_format = first_frame.format; println!( "[2/3] First frame: {}x{}, format=0x{:08X}, stride={}, modifier=0x{:X}", src_width, src_height, src_format, first_frame.stride, first_frame.modifier ); println!("\n[2/3] Testing av_hwframe_map with sw_format=BGRA..."); println!( " DRM format chain: PipeWire BGRA -> DRM_FORMAT_ARGB8888 (0x{:08X}) -> VA_FOURCC_BGRA -> AV_PIX_FMT_BGRA", src_format ); let drm_device = Path::new(&bench_args.drm_device); let hw_dev = AvHwDevCtx::new_vaapi(drm_device)?; println!(" VAAPI device context created OK"); let frames_ctx = AvHwFrameCtx::for_capture(&hw_dev, src_width, src_height, ff::format::Pixel::BGRA)?; println!(" VAAPI frames context created OK (sw_format=BGRA)"); let vaapi_frame = unsafe { import_dma_buf_to_vaapi( frames_ctx.as_ptr(), first_frame.fd.as_raw_fd(), first_frame.width, first_frame.height, first_frame.format, first_frame.modifier, first_frame.stride, first_frame.offset, ) }; match &vaapi_frame { Ok(_) => { println!(" Result: SUCCESS — av_hwframe_map imported DMA-BUF to VAAPI surface!"); } Err(e) => { println!(" Result: FAILED"); println!(" Error: {e}"); println!(); println!(" Possible causes:"); println!(" - sw_format mismatch (current: BGRA)"); println!(" - DRM format modifier not supported by VAAPI"); println!(" - VAAPI driver doesn't support DMA-BUF import for this format"); println!(); println!(" Falling back to mmap readback test for comparison..."); let mmap_size = (first_frame.stride as usize) * (first_frame.height as usize); let mmap_start = Instant::now(); let mmap_ptr = unsafe { libc::mmap( ptr::null_mut(), mmap_size, libc::PROT_READ, libc::MAP_SHARED, first_frame.fd.as_raw_fd(), first_frame.offset as i64, ) }; let mmap_elapsed = mmap_start.elapsed(); if mmap_ptr == libc::MAP_FAILED { let errno = std::io::Error::last_os_error(); println!(" mmap also FAILED: {errno}"); } else { println!( " mmap SUCCESS: {:.1} MB, setup in {:.2}ms", mmap_size as f64 / 1024.0 / 1024.0, mmap_elapsed.as_secs_f64() * 1000.0 ); unsafe { libc::munmap(mmap_ptr, mmap_size); } } println!(); println!("=== Benchmark ended: av_hwframe_map import FAILED ==="); println!("Fix the import issue before proceeding to GPU downscale tests."); return Ok(()); } } drop(vaapi_frame); drop(first_frame); println!("\n[3/3] Benchmarking selected pipeline(s)..."); let enc_width = bench_args.enc_width; let enc_height = bench_args.enc_height; let split_outputs = bench_args.mode == PipelineMode::Both; let mut cpu_stats = None; let mut gpu_stats = None; if matches!(bench_args.mode, PipelineMode::Cpu | PipelineMode::Both) { let output = output_for_mode(&bench_args.output, PipelineMode::Cpu, split_outputs); cpu_stats = Some(run_cpu_pipeline( &cap, &frames_ctx, &output, bench_args.frames, src_width, src_height, enc_width, enc_height, )?); } if matches!(bench_args.mode, PipelineMode::Gpu | PipelineMode::Both) { let output = output_for_mode(&bench_args.output, PipelineMode::Gpu, split_outputs); gpu_stats = Some(run_gpu_pipeline( &cap, &hw_dev, &frames_ctx, &output, bench_args.frames, src_width, src_height, enc_width, enc_height, )?); } if let Some(stats) = cpu_stats.as_ref() { print_detailed_results("CPU", stats, src_width, src_height, enc_width, enc_height); } if let Some(stats) = gpu_stats.as_ref() { print_detailed_results("GPU", stats, src_width, src_height, enc_width, enc_height); } print_comparison(cpu_stats.as_ref(), gpu_stats.as_ref()); if cpu_stats .as_ref() .into_iter() .chain(gpu_stats.as_ref()) .any(|stats| stats.achieved_fps() < 30.0 && stats.frames_encoded > 0) { println!("NOTE: At least one achieved FPS result is below 30 FPS target."); } Ok(()) }