Fixes #1 -- --port mode with wlr-screencopy backend caused panic at negotiate_format() because self.args.output is None and .expect() was called unconditionally. Changes: - Introduce StreamingEncoder enum wrapping EncState (MP4) and SwEncState (WebRTC) with unified frames_rgb/encode_frame/flush API - Add WebRTC fields to State<S> (webrtc, webrtc_tx, webrtc_rx, webrtc_frames_sent) matching Portal backend pattern - State::new() returns Result<Self> for clean WebRtcState init failure - negotiate_format() branches on webrtc_tx: WebRTC path uses SwEncState::new_webrtc(), MP4 path unchanged (hardware VAAPI) - Add poll_webrtc() method to drive signaling + channel drain - Event loop calls poll_webrtc() each iteration - Fix pre-existing test/bench Args construction (Option<String> output, missing no_persist field)
1038 lines
34 KiB
Rust
1038 lines
34 KiB
Rust
// vaapi_import_bench.rs — VAAPI DMA-BUF import + GPU-side downscale benchmark
|
|
//
|
|
// Tests: Portal capture -> av_hwframe_map (ARGB sw_format) -> transfer -> sw encode
|
|
//
|
|
// Usage: cargo run --bin vaapi_import_bench -- --output /tmp/vaapi_bench.mp4
|
|
|
|
use std::ffi::CString;
|
|
use std::os::fd::AsRawFd;
|
|
use std::path::Path;
|
|
use std::ptr;
|
|
use std::time::Instant;
|
|
|
|
use anyhow::{bail, Result};
|
|
use clap::{Parser, ValueEnum};
|
|
|
|
use ffmpeg_next as ff;
|
|
use ffmpeg_next::ffi;
|
|
use ffmpeg_next::packet::Mut;
|
|
|
|
use wl_webrtc::args::Args;
|
|
use wl_webrtc::avhw::{import_dma_buf_to_vaapi, AvHwDevCtx, AvHwFrameCtx};
|
|
use wl_webrtc::cap_portal::{CapPortal, PwCtrlEvent};
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(name = "vaapi_import_bench", about = "VAAPI DMA-BUF import benchmark")]
|
|
struct BenchArgs {
|
|
#[arg(short, long)]
|
|
output: String,
|
|
|
|
#[arg(long, default_value_t = 60)]
|
|
frames: u32,
|
|
|
|
#[arg(long, default_value_t = 2560)]
|
|
enc_width: u32,
|
|
|
|
#[arg(long, default_value_t = 1440)]
|
|
enc_height: u32,
|
|
|
|
#[arg(long, default_value = "/dev/dri/renderD128")]
|
|
drm_device: String,
|
|
|
|
#[arg(long, value_enum, default_value_t = PipelineMode::Both)]
|
|
mode: PipelineMode,
|
|
}
|
|
|
|
#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
|
|
enum PipelineMode {
|
|
Cpu,
|
|
Gpu,
|
|
Both,
|
|
}
|
|
|
|
#[derive(Default)]
|
|
struct FrameStats {
|
|
import_us: Vec<u64>,
|
|
filter_us: Vec<u64>,
|
|
transfer_us: Vec<u64>,
|
|
scale_us: Vec<u64>,
|
|
format_us: Vec<u64>,
|
|
encode_us: Vec<u64>,
|
|
total_us: Vec<u64>,
|
|
import_failures: u32,
|
|
frames_encoded: u32,
|
|
elapsed_secs: f64,
|
|
codec_name: String,
|
|
output_path: String,
|
|
}
|
|
|
|
impl FrameStats {
|
|
fn avg_ms(data: &[u64]) -> f64 {
|
|
if data.is_empty() {
|
|
return 0.0;
|
|
}
|
|
data.iter().sum::<u64>() as f64 / data.len() as f64 / 1000.0
|
|
}
|
|
|
|
fn avg_total_ms(&self) -> f64 {
|
|
Self::avg_ms(&self.total_us)
|
|
}
|
|
|
|
fn achieved_fps(&self) -> f64 {
|
|
if self.frames_encoded > 0 && self.elapsed_secs > 0.0 {
|
|
self.frames_encoded as f64 / self.elapsed_secs
|
|
} else {
|
|
0.0
|
|
}
|
|
}
|
|
|
|
fn theoretical_fps(&self) -> f64 {
|
|
let avg = self.avg_total_ms();
|
|
if avg > 0.0 {
|
|
1000.0 / avg
|
|
} else {
|
|
0.0
|
|
}
|
|
}
|
|
}
|
|
|
|
struct SoftwareEncoder {
|
|
enc_video: ff::codec::encoder::video::Video,
|
|
octx: ff::format::context::Output,
|
|
yuv_frame: *mut ffi::AVFrame,
|
|
codec_name: String,
|
|
}
|
|
|
|
impl Drop for SoftwareEncoder {
|
|
fn drop(&mut self) {
|
|
// SAFETY: yuv_frame is allocated by av_frame_alloc in create_software_encoder and
|
|
// owned exclusively by this SoftwareEncoder.
|
|
unsafe {
|
|
ffi::av_frame_free(&mut self.yuv_frame);
|
|
}
|
|
}
|
|
}
|
|
|
|
struct SwsContext(*mut ffi::SwsContext);
|
|
|
|
impl Drop for SwsContext {
|
|
fn drop(&mut self) {
|
|
// SAFETY: Context is either null or returned by sws_getContext and owned here.
|
|
unsafe {
|
|
ffi::sws_freeContext(self.0);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn av_err_to_string(ret: i32) -> String {
|
|
let mut buf = vec![0u8; 128];
|
|
unsafe {
|
|
ffi::av_strerror(ret, buf.as_mut_ptr() as *mut i8, buf.len());
|
|
}
|
|
let end = buf.iter().position(|&b| b == 0).unwrap_or(buf.len());
|
|
String::from_utf8_lossy(&buf[..end]).to_string()
|
|
}
|
|
|
|
fn receive_first_frame(cap: &CapPortal) -> Result<wl_webrtc::cap_portal::PwDmaBufFrame> {
|
|
loop {
|
|
if let Ok(ctrl) = cap.event_receiver().try_recv() {
|
|
match ctrl {
|
|
PwCtrlEvent::StreamEnded => bail!("PipeWire stream ended before first frame"),
|
|
PwCtrlEvent::Error(e) => bail!("PipeWire error: {e}"),
|
|
}
|
|
}
|
|
match cap
|
|
.frame_receiver()
|
|
.recv_timeout(std::time::Duration::from_secs(10))
|
|
{
|
|
Ok(frame) => return Ok(frame),
|
|
Err(crossbeam_channel::RecvTimeoutError::Timeout) => {
|
|
bail!("Timeout waiting for first frame (10s)");
|
|
}
|
|
Err(crossbeam_channel::RecvTimeoutError::Disconnected) => {
|
|
bail!("PipeWire frame channel disconnected");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn drain_encoder(
|
|
enc_video: &mut ff::codec::encoder::video::Video,
|
|
octx: &mut ff::format::context::Output,
|
|
) -> Result<()> {
|
|
loop {
|
|
let mut pkt = ff::Packet::empty();
|
|
let ret = unsafe { ffi::avcodec_receive_packet(enc_video.as_mut_ptr(), pkt.as_mut_ptr()) };
|
|
if ret < 0 {
|
|
if ret == ffi::AVERROR(ffi::EAGAIN) || ret == ffi::AVERROR_EOF {
|
|
break;
|
|
}
|
|
eprintln!("avcodec_receive_packet failed: {ret}");
|
|
break;
|
|
}
|
|
let enc_tb = enc_video.time_base();
|
|
let stream_tb = unsafe {
|
|
let streams = (*octx.as_ptr()).streams;
|
|
let st = *streams.add(0);
|
|
ff::Rational::from((*st).time_base)
|
|
};
|
|
pkt.rescale_ts(enc_tb, stream_tb);
|
|
pkt.set_stream(0);
|
|
pkt.write_interleaved(octx)
|
|
.map_err(|e| anyhow::anyhow!("write packet failed: {e}"))?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn create_software_encoder(output_path: &Path, width: u32, height: u32) -> Result<SoftwareEncoder> {
|
|
let output_cstr = CString::new(output_path.to_str().unwrap())?;
|
|
let codec = ff::encoder::find_by_name("libx264")
|
|
.or_else(|| ff::encoder::find_by_name("libopenh264"))
|
|
.ok_or_else(|| {
|
|
anyhow::anyhow!("No H.264 software encoder found (tried libx264, libopenh264)")
|
|
})?;
|
|
|
|
let codec_name = codec.name().to_string();
|
|
let mut enc = {
|
|
let ctx = ff::codec::Context::new_with_codec(codec);
|
|
ctx.encoder().video()?
|
|
};
|
|
|
|
enc.set_width(width);
|
|
enc.set_height(height);
|
|
enc.set_format(ff::format::Pixel::YUV420P);
|
|
enc.set_time_base(ff::Rational::new(1, 60));
|
|
enc.set_max_b_frames(0);
|
|
enc.set_gop(60);
|
|
|
|
if codec_name == "libx264" {
|
|
// SAFETY: priv_data belongs to the not-yet-opened encoder context. Option strings are
|
|
// valid NUL-terminated C strings for the duration of each av_opt_set call.
|
|
unsafe {
|
|
let key = CString::new("preset").unwrap();
|
|
let val = CString::new("veryfast").unwrap();
|
|
ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0);
|
|
let key = CString::new("tune").unwrap();
|
|
let val = CString::new("zerolatency").unwrap();
|
|
ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0);
|
|
}
|
|
}
|
|
|
|
let opened = enc.open()?;
|
|
let enc_video = opened.0;
|
|
|
|
let use_null_muxer = output_path
|
|
.to_str()
|
|
.map(|s| s.contains("null"))
|
|
.unwrap_or(false);
|
|
let fmt_name = if use_null_muxer {
|
|
CString::new("null").unwrap()
|
|
} else {
|
|
CString::new("").unwrap()
|
|
};
|
|
let fmt_name_ptr = if use_null_muxer {
|
|
fmt_name.as_ptr()
|
|
} else {
|
|
ptr::null()
|
|
};
|
|
|
|
let mut fmt_ctx_ptr: *mut ffi::AVFormatContext = ptr::null_mut();
|
|
// SAFETY: fmt_ctx_ptr is an out pointer initialized by FFmpeg; output_cstr and fmt_name live
|
|
// across the call.
|
|
let ret = unsafe {
|
|
ffi::avformat_alloc_output_context2(
|
|
&mut fmt_ctx_ptr,
|
|
ptr::null_mut(),
|
|
fmt_name_ptr,
|
|
output_cstr.as_ptr(),
|
|
)
|
|
};
|
|
if ret < 0 || fmt_ctx_ptr.is_null() {
|
|
bail!("Failed to allocate output format context: error {ret}");
|
|
}
|
|
|
|
// SAFETY: fmt_ctx_ptr is a valid output context allocated above.
|
|
let stream_ptr = unsafe { ffi::avformat_new_stream(fmt_ctx_ptr, ptr::null()) };
|
|
if stream_ptr.is_null() {
|
|
bail!("Failed to create output stream");
|
|
}
|
|
|
|
// SAFETY: stream and codec context pointers are valid; parameters are copied into stream.
|
|
let ret =
|
|
unsafe { ffi::avcodec_parameters_from_context((*stream_ptr).codecpar, enc_video.as_ptr()) };
|
|
if ret < 0 {
|
|
bail!("Failed to copy codec parameters: error {ret}");
|
|
}
|
|
|
|
// SAFETY: fmt_ctx_ptr is valid; pb is initialized for non-NOFILE muxers.
|
|
unsafe {
|
|
if (*(*fmt_ctx_ptr).oformat).flags & ffi::AVFMT_NOFILE == 0 {
|
|
let ret = ffi::avio_open(
|
|
&mut (*fmt_ctx_ptr).pb,
|
|
output_cstr.as_ptr(),
|
|
ffi::AVIO_FLAG_WRITE,
|
|
);
|
|
if ret < 0 {
|
|
bail!("Failed to open output file: error {ret}");
|
|
}
|
|
}
|
|
}
|
|
|
|
// SAFETY: fmt_ctx_ptr is a fully configured output context.
|
|
let ret = unsafe { ffi::avformat_write_header(fmt_ctx_ptr, ptr::null_mut()) };
|
|
if ret < 0 {
|
|
bail!("Failed to write header: error {ret}");
|
|
}
|
|
|
|
// SAFETY: ownership of fmt_ctx_ptr transfers into ffmpeg-next Output wrapper.
|
|
let octx = unsafe { ff::format::context::Output::wrap(fmt_ctx_ptr) };
|
|
|
|
// SAFETY: Allocate and configure an owned writable YUV420P frame for encoder input.
|
|
let yuv_frame = unsafe {
|
|
let mut f = ffi::av_frame_alloc();
|
|
if f.is_null() {
|
|
bail!("av_frame_alloc failed");
|
|
}
|
|
(*f).width = width as i32;
|
|
(*f).height = height as i32;
|
|
(*f).format = ffi::AVPixelFormat::AV_PIX_FMT_YUV420P as i32;
|
|
let r = ffi::av_frame_get_buffer(f, 0);
|
|
if r < 0 {
|
|
ffi::av_frame_free(&mut f);
|
|
bail!("av_frame_get_buffer failed: {r}");
|
|
}
|
|
f
|
|
};
|
|
|
|
Ok(SoftwareEncoder {
|
|
enc_video,
|
|
octx,
|
|
yuv_frame,
|
|
codec_name,
|
|
})
|
|
}
|
|
|
|
fn output_for_mode(base: &str, mode: PipelineMode, split: bool) -> String {
|
|
if !split || base.contains("null") {
|
|
return base.to_string();
|
|
}
|
|
|
|
let path = Path::new(base);
|
|
let suffix = match mode {
|
|
PipelineMode::Cpu => "cpu",
|
|
PipelineMode::Gpu => "gpu",
|
|
PipelineMode::Both => unreachable!(),
|
|
};
|
|
let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or(base);
|
|
let split_name = if let Some((stem, ext)) = file_name.rsplit_once('.') {
|
|
format!("{stem}.{suffix}.{ext}")
|
|
} else {
|
|
format!("{file_name}.{suffix}")
|
|
};
|
|
path.with_file_name(split_name)
|
|
.to_string_lossy()
|
|
.into_owned()
|
|
}
|
|
|
|
fn create_sws_context(
|
|
src_width: u32,
|
|
src_height: u32,
|
|
src_fmt: ffi::AVPixelFormat,
|
|
dst_width: u32,
|
|
dst_height: u32,
|
|
) -> Result<SwsContext> {
|
|
// SAFETY: sws_getContext creates an owned scaler context for the provided dimensions/formats.
|
|
let ctx = unsafe {
|
|
ffi::sws_getContext(
|
|
src_width as i32,
|
|
src_height as i32,
|
|
src_fmt,
|
|
dst_width as i32,
|
|
dst_height as i32,
|
|
ffi::AVPixelFormat::AV_PIX_FMT_YUV420P,
|
|
2,
|
|
ptr::null_mut(),
|
|
ptr::null_mut(),
|
|
ptr::null_mut(),
|
|
)
|
|
};
|
|
if ctx.is_null() {
|
|
bail!("Failed to create sws_scale context");
|
|
}
|
|
Ok(SwsContext(ctx))
|
|
}
|
|
|
|
fn encode_yuv_frame(encoder: &mut SoftwareEncoder, pts: &mut i64) -> Result<u64> {
|
|
let t_encode = Instant::now();
|
|
// SAFETY: yuv_frame is allocated, writable, and formatted as the encoder's configured
|
|
// YUV420P input frame. FFmpeg consumes but does not take ownership.
|
|
unsafe {
|
|
(*encoder.yuv_frame).pts = *pts;
|
|
*pts += 1;
|
|
let r = ffi::avcodec_send_frame(encoder.enc_video.as_mut_ptr(), encoder.yuv_frame);
|
|
if r < 0 {
|
|
bail!("avcodec_send_frame failed: {r}");
|
|
}
|
|
}
|
|
drain_encoder(&mut encoder.enc_video, &mut encoder.octx)?;
|
|
Ok(t_encode.elapsed().as_micros() as u64)
|
|
}
|
|
|
|
fn finish_encoder(mut encoder: SoftwareEncoder) -> Result<()> {
|
|
// SAFETY: Sending a null frame flushes the encoder; context remains owned by encoder.
|
|
unsafe {
|
|
ffi::avcodec_send_frame(encoder.enc_video.as_mut_ptr(), ptr::null());
|
|
}
|
|
drain_encoder(&mut encoder.enc_video, &mut encoder.octx)?;
|
|
encoder
|
|
.octx
|
|
.write_trailer()
|
|
.map_err(|e| anyhow::anyhow!("Failed to write trailer: {e}"))?;
|
|
Ok(())
|
|
}
|
|
|
|
fn import_frame(
|
|
frames_ctx: &AvHwFrameCtx,
|
|
frame: &wl_webrtc::cap_portal::PwDmaBufFrame,
|
|
) -> Result<ff::frame::Video> {
|
|
// SAFETY: frames_ctx is a live VAAPI frames context configured for the capture format; frame
|
|
// carries a valid DMA-BUF fd and metadata from PipeWire for the duration of the call.
|
|
unsafe {
|
|
import_dma_buf_to_vaapi(
|
|
frames_ctx.as_ptr(),
|
|
frame.fd.as_raw_fd(),
|
|
frame.width,
|
|
frame.height,
|
|
frame.format,
|
|
frame.modifier,
|
|
frame.stride,
|
|
frame.offset,
|
|
)
|
|
}
|
|
}
|
|
|
|
fn build_gpu_filter_graph(
|
|
hw_dev: &AvHwDevCtx,
|
|
frames_rgb: &AvHwFrameCtx,
|
|
width: u32,
|
|
height: u32,
|
|
enc_width: u32,
|
|
enc_height: u32,
|
|
) -> Result<ff::filter::Graph> {
|
|
let mut graph = ff::filter::Graph::new();
|
|
let buffersrc =
|
|
ff::filter::find("buffer").ok_or_else(|| anyhow::anyhow!("filter 'buffer' not found"))?;
|
|
let buffersink = ff::filter::find("buffersink")
|
|
.ok_or_else(|| anyhow::anyhow!("filter 'buffersink' not found"))?;
|
|
let scale_vaapi = ff::filter::find("scale_vaapi")
|
|
.ok_or_else(|| anyhow::anyhow!("filter 'scale_vaapi' not found"))?;
|
|
|
|
// pix_fmt must be set via av_buffersrc_parameters_set (below), not in args —
|
|
// FFmpeg 8.0+ rejects HW pixel formats during init() if hw_frames_ctx is missing.
|
|
// Use a placeholder SW format here; it gets overridden by parameters_set below.
|
|
let args = format!(
|
|
"video_size={}x{}:pix_fmt=bgra:time_base=1/60:pixel_aspect=1/1",
|
|
width, height,
|
|
);
|
|
let mut src_ctx = graph.add(&buffersrc, "in", &args)?;
|
|
|
|
// SAFETY: Allocate buffersrc parameters, attach a ref-counted hw_frames_ctx compatible with
|
|
// imported VAAPI BGRA frames, apply it, then free only the parameter struct (not the ref).
|
|
let par = unsafe { ffi::av_buffersrc_parameters_alloc() };
|
|
if par.is_null() {
|
|
bail!("av_buffersrc_parameters_alloc returned null");
|
|
}
|
|
// SAFETY: par and src_ctx are valid; frames_rgb.ref_clone returns an owned AVBufferRef.
|
|
unsafe {
|
|
(*par).format = Into::<ffi::AVPixelFormat>::into(ff::format::Pixel::VAAPI) as i32;
|
|
(*par).width = width as i32;
|
|
(*par).height = height as i32;
|
|
(*par).time_base = ffi::AVRational { num: 1, den: 60 };
|
|
(*par).hw_frames_ctx = frames_rgb.ref_clone();
|
|
let ret = ffi::av_buffersrc_parameters_set(src_ctx.as_mut_ptr(), par);
|
|
ffi::av_free(par as *mut _);
|
|
if ret < 0 {
|
|
bail!("av_buffersrc_parameters_set failed: error {ret}");
|
|
}
|
|
}
|
|
|
|
let mut scale_ctx = graph.add(
|
|
&scale_vaapi,
|
|
"scale",
|
|
&format!("{enc_width}:{enc_height}:format=nv12"),
|
|
)?;
|
|
// SAFETY: scale_vaapi uses this ref-counted VAAPI device context while graph is alive.
|
|
unsafe {
|
|
(*scale_ctx.as_mut_ptr()).hw_device_ctx = hw_dev.ref_clone();
|
|
}
|
|
|
|
let mut sink_ctx = graph.add(&buffersink, "out", "")?;
|
|
src_ctx.link(0, &mut scale_ctx, 0);
|
|
scale_ctx.link(0, &mut sink_ctx, 0);
|
|
graph
|
|
.validate()
|
|
.map_err(|e| anyhow::anyhow!("GPU filter graph validation failed: {e}"))?;
|
|
|
|
Ok(graph)
|
|
}
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
fn run_cpu_pipeline(
|
|
cap: &CapPortal,
|
|
frames_ctx: &AvHwFrameCtx,
|
|
output: &str,
|
|
frames: u32,
|
|
src_width: u32,
|
|
src_height: u32,
|
|
enc_width: u32,
|
|
enc_height: u32,
|
|
) -> Result<FrameStats> {
|
|
let mut encoder = create_software_encoder(Path::new(output), enc_width, enc_height)?;
|
|
let sws_ctx = create_sws_context(
|
|
src_width,
|
|
src_height,
|
|
ffi::AVPixelFormat::AV_PIX_FMT_BGRA,
|
|
enc_width,
|
|
enc_height,
|
|
)?;
|
|
|
|
println!(
|
|
" Encoder: {}, {}x{} YUV420P",
|
|
encoder.codec_name, enc_width, enc_height
|
|
);
|
|
println!(" Output: {output}");
|
|
println!(" CPU Pipeline: DMA-BUF 4K BGRA -> av_hwframe_map -> av_hwframe_transfer_data -> sws_scale -> YUV420P 2K -> encode\n");
|
|
|
|
let mut stats = FrameStats {
|
|
codec_name: encoder.codec_name.clone(),
|
|
output_path: output.to_string(),
|
|
..FrameStats::default()
|
|
};
|
|
let total_start = Instant::now();
|
|
let mut pts: i64 = 0;
|
|
|
|
while stats.frames_encoded < frames {
|
|
if let Ok(ctrl) = cap.event_receiver().try_recv() {
|
|
match ctrl {
|
|
PwCtrlEvent::StreamEnded => break,
|
|
PwCtrlEvent::Error(e) => bail!(
|
|
"PipeWire error after {} CPU frames: {e}",
|
|
stats.frames_encoded
|
|
),
|
|
}
|
|
}
|
|
|
|
let frame = match cap
|
|
.frame_receiver()
|
|
.recv_timeout(std::time::Duration::from_secs(5))
|
|
{
|
|
Ok(f) => f,
|
|
Err(_) => break,
|
|
};
|
|
|
|
let frame_start = Instant::now();
|
|
let t_import = Instant::now();
|
|
let vaapi_frame = match import_frame(frames_ctx, &frame) {
|
|
Ok(f) => f,
|
|
Err(e) => {
|
|
stats.import_failures += 1;
|
|
if stats.import_failures <= 3 {
|
|
eprintln!("CPU frame {}: import failed: {e}", stats.frames_encoded);
|
|
}
|
|
continue;
|
|
}
|
|
};
|
|
let import_us = t_import.elapsed().as_micros() as u64;
|
|
|
|
let t_transfer = Instant::now();
|
|
// SAFETY: sw_frame is allocated by FFmpeg and freed on all paths below.
|
|
let mut sw_frame = unsafe { ffi::av_frame_alloc() };
|
|
if sw_frame.is_null() {
|
|
bail!("CPU frame {}: av_frame_alloc failed", stats.frames_encoded);
|
|
}
|
|
// SAFETY: sw_frame is an allocated destination; vaapi_frame is a valid VAAPI source frame.
|
|
let transfer_ret =
|
|
unsafe { ffi::av_hwframe_transfer_data(sw_frame, vaapi_frame.as_ptr(), 0) };
|
|
if transfer_ret < 0 {
|
|
// SAFETY: sw_frame was allocated above and has not been freed yet.
|
|
unsafe { ffi::av_frame_free(&mut sw_frame) };
|
|
bail!(
|
|
"CPU frame {}: av_hwframe_transfer_data failed: {} ({})",
|
|
stats.frames_encoded,
|
|
transfer_ret,
|
|
av_err_to_string(transfer_ret)
|
|
);
|
|
}
|
|
let transfer_us = t_transfer.elapsed().as_micros() as u64;
|
|
|
|
let t_scale = Instant::now();
|
|
// SAFETY: sw_frame contains transferred BGRA data; encoder.yuv_frame is writable YUV420P
|
|
// at the configured output dimensions; sws_ctx converts and downscales between them.
|
|
unsafe {
|
|
ffi::av_frame_make_writable(encoder.yuv_frame);
|
|
ffi::sws_scale(
|
|
sws_ctx.0,
|
|
(*sw_frame).data.as_ptr() as *const *const u8,
|
|
(*sw_frame).linesize.as_ptr() as *const i32,
|
|
0,
|
|
(*sw_frame).height,
|
|
(*encoder.yuv_frame).data.as_ptr() as *mut *mut u8,
|
|
(*encoder.yuv_frame).linesize.as_ptr() as *const i32,
|
|
);
|
|
}
|
|
let scale_us = t_scale.elapsed().as_micros() as u64;
|
|
// SAFETY: sw_frame was allocated above and is no longer needed after scaling.
|
|
unsafe { ffi::av_frame_free(&mut sw_frame) };
|
|
|
|
let encode_us = encode_yuv_frame(&mut encoder, &mut pts)?;
|
|
let total_us = frame_start.elapsed().as_micros() as u64;
|
|
|
|
stats.import_us.push(import_us);
|
|
stats.transfer_us.push(transfer_us);
|
|
stats.scale_us.push(scale_us);
|
|
stats.encode_us.push(encode_us);
|
|
stats.total_us.push(total_us);
|
|
stats.frames_encoded += 1;
|
|
|
|
if stats.frames_encoded <= 3 || stats.frames_encoded % 30 == 0 {
|
|
println!(
|
|
" CPU frame {:>4}/{frames}: import={:.2}ms transfer={:.2}ms scale={:.2}ms encode={:.2}ms total={:.2}ms",
|
|
stats.frames_encoded,
|
|
import_us as f64 / 1000.0,
|
|
transfer_us as f64 / 1000.0,
|
|
scale_us as f64 / 1000.0,
|
|
encode_us as f64 / 1000.0,
|
|
total_us as f64 / 1000.0,
|
|
);
|
|
}
|
|
}
|
|
|
|
finish_encoder(encoder)?;
|
|
stats.elapsed_secs = total_start.elapsed().as_secs_f64();
|
|
Ok(stats)
|
|
}
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
fn run_gpu_pipeline(
|
|
cap: &CapPortal,
|
|
hw_dev: &AvHwDevCtx,
|
|
frames_ctx: &AvHwFrameCtx,
|
|
output: &str,
|
|
frames: u32,
|
|
src_width: u32,
|
|
src_height: u32,
|
|
enc_width: u32,
|
|
enc_height: u32,
|
|
) -> Result<FrameStats> {
|
|
let mut encoder = create_software_encoder(Path::new(output), enc_width, enc_height)?;
|
|
let format_ctx = create_sws_context(
|
|
enc_width,
|
|
enc_height,
|
|
ffi::AVPixelFormat::AV_PIX_FMT_NV12,
|
|
enc_width,
|
|
enc_height,
|
|
)?;
|
|
let mut graph = build_gpu_filter_graph(
|
|
hw_dev, frames_ctx, src_width, src_height, enc_width, enc_height,
|
|
)?;
|
|
|
|
println!(
|
|
" Encoder: {}, {}x{} YUV420P",
|
|
encoder.codec_name, enc_width, enc_height
|
|
);
|
|
println!(" Output: {output}");
|
|
println!(" GPU Pipeline: DMA-BUF 4K BGRA -> av_hwframe_map -> scale_vaapi 2K NV12 -> transfer small NV12 -> sws_scale format-only -> encode\n");
|
|
|
|
let mut stats = FrameStats {
|
|
codec_name: encoder.codec_name.clone(),
|
|
output_path: output.to_string(),
|
|
..FrameStats::default()
|
|
};
|
|
let total_start = Instant::now();
|
|
let mut pts: i64 = 0;
|
|
|
|
while stats.frames_encoded < frames {
|
|
if let Ok(ctrl) = cap.event_receiver().try_recv() {
|
|
match ctrl {
|
|
PwCtrlEvent::StreamEnded => break,
|
|
PwCtrlEvent::Error(e) => bail!(
|
|
"PipeWire error after {} GPU frames: {e}",
|
|
stats.frames_encoded
|
|
),
|
|
}
|
|
}
|
|
|
|
let frame = match cap
|
|
.frame_receiver()
|
|
.recv_timeout(std::time::Duration::from_secs(5))
|
|
{
|
|
Ok(f) => f,
|
|
Err(_) => break,
|
|
};
|
|
|
|
let frame_start = Instant::now();
|
|
let t_import = Instant::now();
|
|
let vaapi_frame = match import_frame(frames_ctx, &frame) {
|
|
Ok(f) => f,
|
|
Err(e) => {
|
|
stats.import_failures += 1;
|
|
if stats.import_failures <= 3 {
|
|
eprintln!("GPU frame {}: import failed: {e}", stats.frames_encoded);
|
|
}
|
|
continue;
|
|
}
|
|
};
|
|
let import_us = t_import.elapsed().as_micros() as u64;
|
|
|
|
let t_filter = Instant::now();
|
|
let mut filter_src_ctx = graph.get("in").unwrap();
|
|
let mut filter_src = filter_src_ctx.source();
|
|
let mut filter_sink_ctx = graph.get("out").unwrap();
|
|
let mut filter_sink = filter_sink_ctx.sink();
|
|
filter_src
|
|
.add(&vaapi_frame)
|
|
.map_err(|e| anyhow::anyhow!("GPU filter source add failed: {e}"))?;
|
|
|
|
let mut filtered = ff::frame::Video::empty();
|
|
match filter_sink.frame(&mut filtered) {
|
|
Ok(()) => {}
|
|
Err(ff::Error::Other { errno }) if errno == ffi::EAGAIN => continue,
|
|
Err(e) => bail!("GPU filter sink get frame failed: {e}"),
|
|
}
|
|
let filter_us = t_filter.elapsed().as_micros() as u64;
|
|
|
|
let t_transfer = Instant::now();
|
|
// SAFETY: sw_nv12 is allocated by FFmpeg and freed after format conversion.
|
|
let mut sw_nv12 = unsafe { ffi::av_frame_alloc() };
|
|
if sw_nv12.is_null() {
|
|
bail!("GPU frame {}: av_frame_alloc failed", stats.frames_encoded);
|
|
}
|
|
// SAFETY: sw_nv12 is an allocated destination; filtered is a valid 2K NV12 VAAPI frame.
|
|
let transfer_ret = unsafe { ffi::av_hwframe_transfer_data(sw_nv12, filtered.as_ptr(), 0) };
|
|
if transfer_ret < 0 {
|
|
// SAFETY: sw_nv12 was allocated above and has not been freed yet.
|
|
unsafe { ffi::av_frame_free(&mut sw_nv12) };
|
|
bail!(
|
|
"GPU frame {}: av_hwframe_transfer_data failed: {} ({})",
|
|
stats.frames_encoded,
|
|
transfer_ret,
|
|
av_err_to_string(transfer_ret)
|
|
);
|
|
}
|
|
let transfer_us = t_transfer.elapsed().as_micros() as u64;
|
|
|
|
let t_format = Instant::now();
|
|
// SAFETY: sw_nv12 contains CPU-side NV12 at enc dimensions; encoder.yuv_frame is writable
|
|
// YUV420P at the same dimensions, so sws_scale performs only chroma deinterleave/format conversion.
|
|
unsafe {
|
|
ffi::av_frame_make_writable(encoder.yuv_frame);
|
|
ffi::sws_scale(
|
|
format_ctx.0,
|
|
(*sw_nv12).data.as_ptr() as *const *const u8,
|
|
(*sw_nv12).linesize.as_ptr() as *const i32,
|
|
0,
|
|
(*sw_nv12).height,
|
|
(*encoder.yuv_frame).data.as_ptr() as *mut *mut u8,
|
|
(*encoder.yuv_frame).linesize.as_ptr() as *const i32,
|
|
);
|
|
}
|
|
let format_us = t_format.elapsed().as_micros() as u64;
|
|
// SAFETY: sw_nv12 was allocated above and is no longer needed.
|
|
unsafe { ffi::av_frame_free(&mut sw_nv12) };
|
|
|
|
let encode_us = encode_yuv_frame(&mut encoder, &mut pts)?;
|
|
let total_us = frame_start.elapsed().as_micros() as u64;
|
|
|
|
stats.import_us.push(import_us);
|
|
stats.filter_us.push(filter_us);
|
|
stats.transfer_us.push(transfer_us);
|
|
stats.format_us.push(format_us);
|
|
stats.encode_us.push(encode_us);
|
|
stats.total_us.push(total_us);
|
|
stats.frames_encoded += 1;
|
|
|
|
if stats.frames_encoded <= 3 || stats.frames_encoded % 30 == 0 {
|
|
println!(
|
|
" GPU frame {:>4}/{frames}: import={:.2}ms filter={:.2}ms transfer={:.2}ms format={:.2}ms encode={:.2}ms total={:.2}ms",
|
|
stats.frames_encoded,
|
|
import_us as f64 / 1000.0,
|
|
filter_us as f64 / 1000.0,
|
|
transfer_us as f64 / 1000.0,
|
|
format_us as f64 / 1000.0,
|
|
encode_us as f64 / 1000.0,
|
|
total_us as f64 / 1000.0,
|
|
);
|
|
}
|
|
}
|
|
|
|
finish_encoder(encoder)?;
|
|
stats.elapsed_secs = total_start.elapsed().as_secs_f64();
|
|
Ok(stats)
|
|
}
|
|
|
|
fn print_detailed_results(
|
|
label: &str,
|
|
stats: &FrameStats,
|
|
src_width: u32,
|
|
src_height: u32,
|
|
enc_width: u32,
|
|
enc_height: u32,
|
|
) {
|
|
println!();
|
|
println!("=== {label} Pipeline Results ===");
|
|
println!("Capture resolution: {}x{}", src_width, src_height);
|
|
println!("Encode resolution: {}x{}", enc_width, enc_height);
|
|
println!("Frames encoded: {}", stats.frames_encoded);
|
|
println!("Total time: {:.2}s", stats.elapsed_secs);
|
|
println!("Output: {}", stats.output_path);
|
|
if stats.import_failures > 0 {
|
|
println!("Import failures: {}", stats.import_failures);
|
|
}
|
|
println!(
|
|
"import avg: {:.2} ms/frame",
|
|
FrameStats::avg_ms(&stats.import_us)
|
|
);
|
|
if !stats.filter_us.is_empty() {
|
|
println!(
|
|
"filter avg: {:.2} ms/frame",
|
|
FrameStats::avg_ms(&stats.filter_us)
|
|
);
|
|
}
|
|
println!(
|
|
"transfer avg: {:.2} ms/frame",
|
|
FrameStats::avg_ms(&stats.transfer_us)
|
|
);
|
|
if !stats.scale_us.is_empty() {
|
|
println!(
|
|
"scale avg: {:.2} ms/frame",
|
|
FrameStats::avg_ms(&stats.scale_us)
|
|
);
|
|
}
|
|
if !stats.format_us.is_empty() {
|
|
println!(
|
|
"format avg: {:.2} ms/frame",
|
|
FrameStats::avg_ms(&stats.format_us)
|
|
);
|
|
}
|
|
println!(
|
|
"encode ({}): {:.2} ms/frame",
|
|
stats.codec_name,
|
|
FrameStats::avg_ms(&stats.encode_us)
|
|
);
|
|
println!("total avg: {:.2} ms/frame", stats.avg_total_ms());
|
|
println!("achieved FPS: {:.1}", stats.achieved_fps());
|
|
println!("max theoretical: {:.1} FPS", stats.theoretical_fps());
|
|
}
|
|
|
|
fn print_comparison(cpu: Option<&FrameStats>, gpu: Option<&FrameStats>) {
|
|
println!();
|
|
println!("=== Pipeline Comparison ===");
|
|
if let Some(s) = cpu {
|
|
println!(
|
|
"CPU: import={:.2}ms transfer={:.2}ms scale={:.2}ms encode={:.2}ms total={:.2}ms ({:.1} FPS)",
|
|
FrameStats::avg_ms(&s.import_us),
|
|
FrameStats::avg_ms(&s.transfer_us),
|
|
FrameStats::avg_ms(&s.scale_us),
|
|
FrameStats::avg_ms(&s.encode_us),
|
|
s.avg_total_ms(),
|
|
s.theoretical_fps(),
|
|
);
|
|
}
|
|
if let Some(s) = gpu {
|
|
println!(
|
|
"GPU: import={:.2}ms filter={:.2}ms transfer={:.2}ms format={:.2}ms encode={:.2}ms total={:.2}ms ({:.1} FPS)",
|
|
FrameStats::avg_ms(&s.import_us),
|
|
FrameStats::avg_ms(&s.filter_us),
|
|
FrameStats::avg_ms(&s.transfer_us),
|
|
FrameStats::avg_ms(&s.format_us),
|
|
FrameStats::avg_ms(&s.encode_us),
|
|
s.avg_total_ms(),
|
|
s.theoretical_fps(),
|
|
);
|
|
}
|
|
}
|
|
|
|
fn main() -> Result<()> {
|
|
let bench_args = BenchArgs::parse();
|
|
|
|
println!("=== VAAPI Import Benchmark ===");
|
|
println!("Output: {}", bench_args.output);
|
|
println!("Target frames: {}", bench_args.frames);
|
|
println!(
|
|
"Encode resolution: {}x{}",
|
|
bench_args.enc_width, bench_args.enc_height
|
|
);
|
|
println!("DRM device: {}", bench_args.drm_device);
|
|
println!();
|
|
|
|
ff::init()?;
|
|
|
|
println!("[1/3] Requesting screen capture via XDG Portal...");
|
|
println!(" (Select a screen to share in the portal dialog)");
|
|
|
|
let portal_args = Args {
|
|
output: Some(bench_args.output.clone()),
|
|
output_name: None,
|
|
fps: 60,
|
|
codec: "h264".to_string(),
|
|
hw_accel: "vaapi".to_string(),
|
|
drm_device: None,
|
|
bitrate: None,
|
|
gop_size: None,
|
|
verbose: false,
|
|
backend: Some("portal".to_string()),
|
|
port: 0,
|
|
no_persist: false,
|
|
};
|
|
|
|
let cap = CapPortal::new(&portal_args)?;
|
|
println!("[1/3] Portal connected, PipeWire stream active\n");
|
|
|
|
println!("[2/3] Waiting for first frame from PipeWire...");
|
|
let first_frame = receive_first_frame(&cap)?;
|
|
|
|
let src_width = first_frame.width;
|
|
let src_height = first_frame.height;
|
|
let src_format = first_frame.format;
|
|
|
|
println!(
|
|
"[2/3] First frame: {}x{}, format=0x{:08X}, stride={}, modifier=0x{:X}",
|
|
src_width, src_height, src_format, first_frame.stride, first_frame.modifier
|
|
);
|
|
|
|
println!("\n[2/3] Testing av_hwframe_map with sw_format=BGRA...");
|
|
println!(
|
|
" DRM format chain: PipeWire BGRA -> DRM_FORMAT_ARGB8888 (0x{:08X}) -> VA_FOURCC_BGRA -> AV_PIX_FMT_BGRA",
|
|
src_format
|
|
);
|
|
|
|
let drm_device = Path::new(&bench_args.drm_device);
|
|
let hw_dev = AvHwDevCtx::new_vaapi(drm_device)?;
|
|
println!(" VAAPI device context created OK");
|
|
|
|
let frames_ctx =
|
|
AvHwFrameCtx::for_capture(&hw_dev, src_width, src_height, ff::format::Pixel::BGRA)?;
|
|
println!(" VAAPI frames context created OK (sw_format=BGRA)");
|
|
|
|
let vaapi_frame = unsafe {
|
|
import_dma_buf_to_vaapi(
|
|
frames_ctx.as_ptr(),
|
|
first_frame.fd.as_raw_fd(),
|
|
first_frame.width,
|
|
first_frame.height,
|
|
first_frame.format,
|
|
first_frame.modifier,
|
|
first_frame.stride,
|
|
first_frame.offset,
|
|
)
|
|
};
|
|
|
|
match &vaapi_frame {
|
|
Ok(_) => {
|
|
println!(" Result: SUCCESS — av_hwframe_map imported DMA-BUF to VAAPI surface!");
|
|
}
|
|
Err(e) => {
|
|
println!(" Result: FAILED");
|
|
println!(" Error: {e}");
|
|
println!();
|
|
println!(" Possible causes:");
|
|
println!(" - sw_format mismatch (current: BGRA)");
|
|
println!(" - DRM format modifier not supported by VAAPI");
|
|
println!(" - VAAPI driver doesn't support DMA-BUF import for this format");
|
|
println!();
|
|
println!(" Falling back to mmap readback test for comparison...");
|
|
|
|
let mmap_size = (first_frame.stride as usize) * (first_frame.height as usize);
|
|
let mmap_start = Instant::now();
|
|
let mmap_ptr = unsafe {
|
|
libc::mmap(
|
|
ptr::null_mut(),
|
|
mmap_size,
|
|
libc::PROT_READ,
|
|
libc::MAP_SHARED,
|
|
first_frame.fd.as_raw_fd(),
|
|
first_frame.offset as i64,
|
|
)
|
|
};
|
|
let mmap_elapsed = mmap_start.elapsed();
|
|
|
|
if mmap_ptr == libc::MAP_FAILED {
|
|
let errno = std::io::Error::last_os_error();
|
|
println!(" mmap also FAILED: {errno}");
|
|
} else {
|
|
println!(
|
|
" mmap SUCCESS: {:.1} MB, setup in {:.2}ms",
|
|
mmap_size as f64 / 1024.0 / 1024.0,
|
|
mmap_elapsed.as_secs_f64() * 1000.0
|
|
);
|
|
unsafe {
|
|
libc::munmap(mmap_ptr, mmap_size);
|
|
}
|
|
}
|
|
|
|
println!();
|
|
println!("=== Benchmark ended: av_hwframe_map import FAILED ===");
|
|
println!("Fix the import issue before proceeding to GPU downscale tests.");
|
|
return Ok(());
|
|
}
|
|
}
|
|
|
|
drop(vaapi_frame);
|
|
drop(first_frame);
|
|
|
|
println!("\n[3/3] Benchmarking selected pipeline(s)...");
|
|
|
|
let enc_width = bench_args.enc_width;
|
|
let enc_height = bench_args.enc_height;
|
|
let split_outputs = bench_args.mode == PipelineMode::Both;
|
|
let mut cpu_stats = None;
|
|
let mut gpu_stats = None;
|
|
|
|
if matches!(bench_args.mode, PipelineMode::Cpu | PipelineMode::Both) {
|
|
let output = output_for_mode(&bench_args.output, PipelineMode::Cpu, split_outputs);
|
|
cpu_stats = Some(run_cpu_pipeline(
|
|
&cap,
|
|
&frames_ctx,
|
|
&output,
|
|
bench_args.frames,
|
|
src_width,
|
|
src_height,
|
|
enc_width,
|
|
enc_height,
|
|
)?);
|
|
}
|
|
|
|
if matches!(bench_args.mode, PipelineMode::Gpu | PipelineMode::Both) {
|
|
let output = output_for_mode(&bench_args.output, PipelineMode::Gpu, split_outputs);
|
|
gpu_stats = Some(run_gpu_pipeline(
|
|
&cap,
|
|
&hw_dev,
|
|
&frames_ctx,
|
|
&output,
|
|
bench_args.frames,
|
|
src_width,
|
|
src_height,
|
|
enc_width,
|
|
enc_height,
|
|
)?);
|
|
}
|
|
|
|
if let Some(stats) = cpu_stats.as_ref() {
|
|
print_detailed_results("CPU", stats, src_width, src_height, enc_width, enc_height);
|
|
}
|
|
if let Some(stats) = gpu_stats.as_ref() {
|
|
print_detailed_results("GPU", stats, src_width, src_height, enc_width, enc_height);
|
|
}
|
|
print_comparison(cpu_stats.as_ref(), gpu_stats.as_ref());
|
|
|
|
if cpu_stats
|
|
.as_ref()
|
|
.into_iter()
|
|
.chain(gpu_stats.as_ref())
|
|
.any(|stats| stats.achieved_fps() < 30.0 && stats.frames_encoded > 0)
|
|
{
|
|
println!("NOTE: At least one achieved FPS result is below 30 FPS target.");
|
|
}
|
|
Ok(())
|
|
}
|