From d80b34f44f3d83f9a709409729a1c16b2e120c73 Mon Sep 17 00:00:00 2001 From: dailz Date: Fri, 29 May 2026 22:04:12 +0800 Subject: [PATCH] feat: GPU-downscale + software H.264 encode pipeline (WIP) Add SwEncState in avhw.rs: GPU pipeline using scale_vaapi to downscale 4K BGRA -> 2K NV12 on AMD iGPU, then software encode with libopenh264. - import_dma_buf_to_vaapi: av_hwframe_map based DMA-BUF import - SwEncState: GPU filter graph (scale_vaapi) + NV12->YUV420P + libopenh264 - state_portal.rs: integrated SwEncState, auto DRM device detection - vaapi_import_bench.rs: CPU vs GPU pipeline benchmark - sw_encode_bench.rs: software encode benchmark Benchmark results: GPU pipeline ~91 FPS theoretical (10.95ms/frame) vs CPU pipeline ~33 FPS (30.21ms/frame). Known issue: only 1 frame encoded in production recording, diagnostic STATS logging added to debug frame flow. --- src/avhw.rs | 669 ++++++++++++++++++--- src/backend_detect.rs | 5 +- src/bin/sw_encode_bench.rs | 545 +++++++++++++++++ src/bin/vaapi_import_bench.rs | 1036 +++++++++++++++++++++++++++++++++ src/cap_portal.rs | 104 ++-- src/lib.rs | 9 + src/main.rs | 39 +- src/state.rs | 32 +- src/state_portal.rs | 282 +++++---- 9 files changed, 2416 insertions(+), 305 deletions(-) create mode 100644 src/bin/sw_encode_bench.rs create mode 100644 src/bin/vaapi_import_bench.rs create mode 100644 src/lib.rs diff --git a/src/avhw.rs b/src/avhw.rs index d123e31..8198b7d 100644 --- a/src/avhw.rs +++ b/src/avhw.rs @@ -1,6 +1,7 @@ use std::ffi::CString; use std::mem; -use std::os::fd::AsRawFd; +use std::os::fd::{AsRawFd, RawFd}; +use std::os::raw::c_void; use std::path::Path; use std::ptr; @@ -129,71 +130,125 @@ impl Drop for AvHwFrameCtx { /// Test whether `drm_device` can import the PipeWire DMA-BUF frame via VAAPI. pub fn test_dma_buf_import(drm_device: &Path, frame: &PwDmaBufFrame) -> Result<()> { let hw_dev = AvHwDevCtx::new_vaapi(drm_device)?; - let frames = AvHwFrameCtx::for_capture( - &hw_dev, - frame.width, - frame.height, - ff::format::Pixel::RGBZ, - )?; + let frames = + AvHwFrameCtx::for_capture(&hw_dev, frame.width, frame.height, ff::format::Pixel::BGRA)?; - // SAFETY: AVDRMFrameDescriptor is a C POD struct. Zero-initialization is the - // expected FFmpeg setup before filling the fields used below. - let mut desc: ffi::AVDRMFrameDescriptor = unsafe { mem::zeroed() }; - desc.nb_objects = 1; - desc.objects[0].fd = frame.fd.as_raw_fd(); - desc.objects[0].size = 0; - desc.objects[0].format_modifier = frame.modifier; - desc.nb_layers = 1; - desc.layers[0].format = frame.format; - desc.layers[0].nb_planes = 1; - desc.layers[0].planes[0].object_index = 0; - desc.layers[0].planes[0].offset = frame.offset as isize; - desc.layers[0].planes[0].pitch = frame.stride as isize; - - let desc_box = Box::new(desc); - let mut raw_frame = ff::frame::Video::empty(); - // SAFETY: raw_frame owns a valid AVFrame. data[0] is used by FFmpeg's - // DRM_PRIME frame convention to point at an AVDRMFrameDescriptor. The Box is - // recovered before every return path below. + // SAFETY: frames is a live VAAPI frames context; frame carries valid DMA-BUF metadata. unsafe { - let raw_ptr = raw_frame.as_mut_ptr(); - (*raw_ptr).data[0] = Box::into_raw(desc_box) as *mut u8; - (*raw_ptr).format = ffi::AVPixelFormat::AV_PIX_FMT_DRM_PRIME as i32; - (*raw_ptr).width = frame.width as i32; - (*raw_ptr).height = frame.height as i32; - } - - let mut hw_frame = ff::frame::Video::empty(); - // SAFETY: frames is an initialized AVHWFramesContext and hw_frame is a valid - // writable AVFrame wrapper. - let ret = unsafe { ffi::av_hwframe_get_buffer(frames.as_ptr(), hw_frame.as_mut_ptr(), 0) }; - if ret < 0 { - // SAFETY: data[0] still contains the Box pointer installed above. - unsafe { - let _ = Box::from_raw((*raw_frame.as_ptr()).data[0] as *mut ffi::AVDRMFrameDescriptor); - (*raw_frame.as_mut_ptr()).data[0] = ptr::null_mut(); - } - bail!("av_hwframe_get_buffer failed: error {ret}"); - } - - // SAFETY: hw_frame is a valid VAAPI frame allocated from `frames`; raw_frame - // is a DRM_PRIME source frame whose descriptor describes `frame`'s DMA-BUF. - let ret = unsafe { ffi::av_hwframe_transfer_data(hw_frame.as_mut_ptr(), raw_frame.as_ptr(), 0) }; - - // SAFETY: data[0] still contains the Box pointer installed above. Recover it - // before checking the transfer result so all paths clean up the descriptor. - unsafe { - let _ = Box::from_raw((*raw_frame.as_ptr()).data[0] as *mut ffi::AVDRMFrameDescriptor); - (*raw_frame.as_mut_ptr()).data[0] = ptr::null_mut(); - } - - if ret < 0 { - bail!("av_hwframe_transfer_data failed: error {ret}"); - } + import_dma_buf_to_vaapi( + frames.as_ptr(), + frame.fd.as_raw_fd(), + frame.width, + frame.height, + frame.format, + frame.modifier, + frame.stride, + frame.offset, + ) + }?; Ok(()) } +/// Import a DMA-BUF into a VAAPI hardware frame via zero-copy `av_hwframe_map`. +/// +/// # Safety +/// - `frames_ctx` must point to an initialized AVHWCramesContext for VAAPI +/// - `raw_fd` must be a valid DMA-BUF file descriptor +pub unsafe fn import_dma_buf_to_vaapi( + frames_ctx: *mut ffi::AVBufferRef, + raw_fd: RawFd, + width: u32, + height: u32, + drm_format: u32, + modifier: u64, + stride: u32, + offset: u64, +) -> Result { + let duped_fd = libc::dup(raw_fd); + if duped_fd < 0 { + bail!("dup(fd) failed: {}", std::io::Error::last_os_error()); + } + + let mut desc: ffi::AVDRMFrameDescriptor = mem::zeroed(); + desc.nb_objects = 1; + desc.objects[0].fd = duped_fd; + desc.objects[0].size = (height as usize) * (stride as usize); + desc.objects[0].format_modifier = modifier; + desc.nb_layers = 1; + desc.layers[0].format = drm_format; + desc.layers[0].nb_planes = 1; + desc.layers[0].planes[0].object_index = 0; + desc.layers[0].planes[0].offset = offset as isize; + desc.layers[0].planes[0].pitch = stride as isize; + + let desc_box = Box::new(desc); + let desc_ptr = Box::into_raw(desc_box); + + let buf_ref = ffi::av_buffer_create( + desc_ptr as *mut u8, + std::mem::size_of::(), + Some(cleanup_drm_descriptor), + ptr::null_mut(), + 0, + ); + if buf_ref.is_null() { + let desc_box = Box::from_raw(desc_ptr); + libc::close(desc_box.objects[0].fd); + bail!("av_buffer_create returned null for DRM descriptor"); + } + + let mut src = ff::frame::Video::empty(); + { + let sp = src.as_mut_ptr(); + (*sp).format = ffi::AVPixelFormat::AV_PIX_FMT_DRM_PRIME as i32; + (*sp).width = width as i32; + (*sp).height = height as i32; + (*sp).data[0] = (*buf_ref).data; + (*sp).buf[0] = buf_ref; + } + + let mut dst = ff::frame::Video::empty(); + unsafe { + let dp = dst.as_mut_ptr(); + (*dp).format = ffi::AVPixelFormat::AV_PIX_FMT_VAAPI as i32; + (*dp).hw_frames_ctx = ffi::av_buffer_ref(frames_ctx); + if (*dp).hw_frames_ctx.is_null() { + bail!("av_buffer_ref(frames_ctx) returned null"); + } + } + let ret = unsafe { + ffi::av_hwframe_map( + dst.as_mut_ptr(), + src.as_ptr(), + ffi::AV_HWFRAME_MAP_READ as i32, + ) + }; + if ret < 0 { + let err_str = av_err_to_string(ret); + bail!("av_hwframe_map failed: error {ret} ({err_str})"); + } + + Ok(dst) +} + +unsafe extern "C" fn cleanup_drm_descriptor(_opaque: *mut c_void, data: *mut u8) { + let desc = data as *mut ffi::AVDRMFrameDescriptor; + if !desc.is_null() && (*desc).nb_objects > 0 && (*desc).objects[0].fd >= 0 { + libc::close((*desc).objects[0].fd); + } + let _ = Box::from_raw(data as *mut ffi::AVDRMFrameDescriptor); +} + +fn av_err_to_string(err: i32) -> String { + let mut buf = vec![0u8; 128]; + unsafe { + ffi::av_strerror(err, buf.as_mut_ptr() as *mut i8, buf.len()); + } + String::from_utf8_lossy(&buf) + .trim_end_matches('\0') + .to_string() +} // --------------------------------------------------------------------------- // EncState // --------------------------------------------------------------------------- @@ -234,9 +289,8 @@ impl EncState { None => AvHwDevCtx::new_vaapi(drm_device)?, }; - // 2. Frame context for capture (XRGB/RGBZ) let frames_rgb = - AvHwFrameCtx::for_capture(&hw_device_ctx, width, height, ff::format::Pixel::RGBZ)?; + AvHwFrameCtx::for_capture(&hw_device_ctx, width, height, ff::format::Pixel::BGRA)?; // 3. Filter graph — must be built BEFORE encoder config so we can derive // hw_frames_ctx from the buffersink output (correct surface pool dimensions). @@ -538,6 +592,245 @@ impl EncState { } } +// --------------------------------------------------------------------------- +// SwEncState - VAAPI GPU downscale + software H.264 encode +// --------------------------------------------------------------------------- + +pub struct SwEncState { + hw_dev: AvHwDevCtx, + frames_rgb: AvHwFrameCtx, + filter_graph: ff::filter::Graph, + sws_ctx: *mut ffi::SwsContext, + enc_video: ff::codec::encoder::video::Video, + octx: ff::format::context::Output, + yuv_frame: *mut ffi::AVFrame, + starting_timestamp: Option, + frames_written: bool, +} + +unsafe impl Send for SwEncState {} + +impl SwEncState { + #[allow(clippy::too_many_arguments)] + pub fn new( + drm_device: &Path, + output_path: &Path, + width: u32, + height: u32, + enc_width: u32, + enc_height: u32, + fps: u32, + bitrate: u64, + gop_size: u32, + ) -> Result { + tracing::info!( + "SwEncState::new: GPU downscale {width}x{height} BGRA -> {enc_width}x{enc_height} NV12, software H.264" + ); + + let hw_dev = AvHwDevCtx::new_vaapi(drm_device)?; + let frames_rgb = + AvHwFrameCtx::for_capture(&hw_dev, width, height, ff::format::Pixel::BGRA)?; + let filter_graph = build_swenc_filter_graph( + &hw_dev, + &frames_rgb, + width, + height, + enc_width, + enc_height, + fps, + )?; + + let sws_ctx = create_nv12_to_yuv420p_sws(enc_width, enc_height)?; + let (enc_video, octx) = + create_software_h264_muxer(output_path, enc_width, enc_height, fps, bitrate, gop_size)?; + let yuv_frame = alloc_yuv420p_frame(enc_width, enc_height)?; + + Ok(Self { + hw_dev, + frames_rgb, + filter_graph, + sws_ctx, + enc_video, + octx, + yuv_frame, + starting_timestamp: None, + frames_written: false, + }) + } + + pub fn frames_rgb(&self) -> &AvHwFrameCtx { + &self.frames_rgb + } + + pub fn encode_frame(&mut self, hw_frame: &ff::frame::Video) -> Result<()> { + let mut filter_src_ctx = self.filter_graph.get("in").unwrap(); + let mut filter_src = filter_src_ctx.source(); + let mut filter_sink_ctx = self.filter_graph.get("out").unwrap(); + let mut filter_sink = filter_sink_ctx.sink(); + + filter_src + .add(hw_frame) + .map_err(|e| anyhow::anyhow!("software pipeline filter source add failed: {e}"))?; + + loop { + let mut filtered = ff::frame::Video::empty(); + match filter_sink.frame(&mut filtered) { + Ok(()) => { + if filtered.pts().is_none() { + filtered.set_pts(hw_frame.pts()); + } + self.encode_filtered_frame(&filtered)?; + } + Err(ff::Error::Other { errno }) if errno == ffi::EAGAIN => break, + Err(e) => bail!("software pipeline filter sink get frame failed: {e}"), + } + } + + Ok(()) + } + + pub fn flush(&mut self) -> Result<()> { + let mut filter_src_ctx = self.filter_graph.get("in").unwrap(); + let mut filter_src = filter_src_ctx.source(); + let _ = filter_src.flush(); + + let mut filter_sink_ctx = self.filter_graph.get("out").unwrap(); + let mut filter_sink = filter_sink_ctx.sink(); + loop { + let mut filtered = ff::frame::Video::empty(); + match filter_sink.frame(&mut filtered) { + Ok(()) => self.encode_filtered_frame(&filtered)?, + Err(_) => break, + } + } + + // SAFETY: Sending a null frame flushes the encoder without transferring ownership. + unsafe { + let ret = ffi::avcodec_send_frame(self.enc_video.as_mut_ptr(), ptr::null()); + if ret < 0 && ret != ffi::AVERROR_EOF { + bail!("software encoder flush send failed: error {ret}"); + } + } + let start_ts = self.starting_timestamp.unwrap_or(0); + self.drain_encoder(start_ts)?; + + if self.frames_written { + self.octx + .write_trailer() + .map_err(|e| anyhow::anyhow!("Failed to write trailer: {e}"))?; + } + + Ok(()) + } + + fn encode_filtered_frame(&mut self, filtered: &ff::frame::Video) -> Result<()> { + let mut sw_nv12 = unsafe { ffi::av_frame_alloc() }; + if sw_nv12.is_null() { + bail!("av_frame_alloc failed for NV12 transfer frame"); + } + + // SAFETY: sw_nv12 is an allocated destination frame; filtered is a valid VAAPI NV12 + // surface produced by scale_vaapi at encoder dimensions. + let transfer_ret = unsafe { ffi::av_hwframe_transfer_data(sw_nv12, filtered.as_ptr(), 0) }; + if transfer_ret < 0 { + // SAFETY: sw_nv12 was allocated above and has not been freed yet. + unsafe { ffi::av_frame_free(&mut sw_nv12) }; + bail!( + "av_hwframe_transfer_data failed for GPU-downscaled frame: error {transfer_ret} ({})", + av_err_to_string(transfer_ret) + ); + } + + // SAFETY: yuv_frame is an owned reusable YUV420P frame at the same dimensions as sw_nv12; + // sws_ctx was created for NV12 -> YUV420P with no resize, so sws_scale only converts format. + unsafe { + let ret = ffi::av_frame_make_writable(self.yuv_frame); + if ret < 0 { + ffi::av_frame_free(&mut sw_nv12); + bail!("av_frame_make_writable failed: error {ret}"); + } + ffi::sws_scale( + self.sws_ctx, + (*sw_nv12).data.as_ptr() as *const *const u8, + (*sw_nv12).linesize.as_ptr() as *const i32, + 0, + (*sw_nv12).height, + (*self.yuv_frame).data.as_ptr() as *mut *mut u8, + (*self.yuv_frame).linesize.as_ptr() as *const i32, + ); + ffi::av_frame_free(&mut sw_nv12); + } + + let pts = filtered.pts().unwrap_or(0); + if self.starting_timestamp.is_none() { + self.starting_timestamp = Some(pts); + } + let start_ts = self.starting_timestamp.unwrap_or(0); + + // SAFETY: yuv_frame is initialized, writable, and matches the opened encoder format. + unsafe { + (*self.yuv_frame).pts = pts; + let ret = ffi::avcodec_send_frame(self.enc_video.as_mut_ptr(), self.yuv_frame); + if ret < 0 { + bail!("avcodec_send_frame failed for software encoder: error {ret}"); + } + } + + self.drain_encoder(start_ts) + } + + fn drain_encoder(&mut self, start_ts: i64) -> Result<()> { + loop { + let mut pkt = ff::Packet::empty(); + // SAFETY: enc_video is an open encoder; pkt is writable packet storage. + let ret = unsafe { + ffi::avcodec_receive_packet(self.enc_video.as_mut_ptr(), pkt.as_mut_ptr()) + }; + if ret < 0 { + if ret == ffi::AVERROR(ffi::EAGAIN) || ret == ffi::AVERROR_EOF { + break; + } + bail!("avcodec_receive_packet failed: error {ret}"); + } + + let enc_tb = self.enc_video.time_base(); + let stream_tb = unsafe { + let streams = (*self.octx.as_ptr()).streams; + let st = *streams.add(0); + ff::Rational::from((*st).time_base) + }; + pkt.rescale_ts(enc_tb, stream_tb); + + if let Some(pts) = pkt.pts() { + pkt.set_pts(Some(pts - start_ts)); + } + if let Some(dts) = pkt.dts() { + pkt.set_dts(Some(dts - start_ts)); + } + + pkt.set_stream(0); + pkt.write_interleaved(&mut self.octx) + .map_err(|e| anyhow::anyhow!("Failed to write packet: {e}"))?; + self.frames_written = true; + } + Ok(()) + } +} + +impl Drop for SwEncState { + fn drop(&mut self) { + if !self.sws_ctx.is_null() { + // SAFETY: sws_ctx is owned by this state and was returned by sws_getContext. + unsafe { ffi::sws_freeContext(self.sws_ctx) }; + self.sws_ctx = ptr::null_mut(); + } + if !self.yuv_frame.is_null() { + // SAFETY: yuv_frame is owned by this state and was allocated by av_frame_alloc. + unsafe { ffi::av_frame_free(&mut self.yuv_frame) }; + } + } +} + // --------------------------------------------------------------------------- // Shared encoder creation (used by both wlr-screencopy and portal paths) // --------------------------------------------------------------------------- @@ -559,11 +852,9 @@ pub fn create_encoder( gop_size: Option, existing_hw_ctx: Option, ) -> Result { - let (enc_w, enc_h) = - transpose_if_transform_transposed(transform, width as i32, height as i32); - let actual_bitrate = bitrate.unwrap_or_else(|| { - 2 * (width as u64) * (height as u64) * (fps as u64) / 100 - }); + let (enc_w, enc_h) = transpose_if_transform_transposed(transform, width as i32, height as i32); + let actual_bitrate = + bitrate.unwrap_or_else(|| 2 * (width as u64) * (height as u64) * (fps as u64) / 100); let actual_gop_size = gop_size.unwrap_or(fps); EncState::new( drm_device, @@ -580,6 +871,247 @@ pub fn create_encoder( ) } +// --------------------------------------------------------------------------- +// Software-encode GPU-downscale helpers +// --------------------------------------------------------------------------- + +#[allow(clippy::too_many_arguments)] +fn build_swenc_filter_graph( + hw_dev: &AvHwDevCtx, + frames_rgb: &AvHwFrameCtx, + width: u32, + height: u32, + enc_width: u32, + enc_height: u32, + fps: u32, +) -> Result { + let mut graph = ff::filter::Graph::new(); + let buffersrc = + ff::filter::find("buffer").ok_or_else(|| anyhow::anyhow!("filter 'buffer' not found"))?; + let buffersink = ff::filter::find("buffersink") + .ok_or_else(|| anyhow::anyhow!("filter 'buffersink' not found"))?; + let scale_vaapi = ff::filter::find("scale_vaapi") + .ok_or_else(|| anyhow::anyhow!("filter 'scale_vaapi' not found"))?; + + // FFmpeg 8.0+ rejects VAAPI pix_fmt in buffer args before hw_frames_ctx is attached. + // Use a SW placeholder, then override format/hw_frames_ctx with av_buffersrc_parameters_set. + let args = format!( + "video_size={}x{}:pix_fmt=bgra:time_base=1/{fps}:pixel_aspect=1/1", + width, height, + ); + let mut src_ctx = graph.add(&buffersrc, "in", &args)?; + + let par = unsafe { ffi::av_buffersrc_parameters_alloc() }; + if par.is_null() { + bail!("av_buffersrc_parameters_alloc returned null"); + } + // SAFETY: par and src_ctx are valid; frames_rgb.ref_clone returns an owned hw_frames_ctx ref + // that buffersrc consumes on successful parameter set. + unsafe { + (*par).format = Into::::into(ff::format::Pixel::VAAPI) as i32; + (*par).width = width as i32; + (*par).height = height as i32; + (*par).time_base = ffi::AVRational { + num: 1, + den: fps as i32, + }; + (*par).hw_frames_ctx = frames_rgb.ref_clone(); + let ret = ffi::av_buffersrc_parameters_set(src_ctx.as_mut_ptr(), par); + ffi::av_free(par as *mut _); + if ret < 0 { + bail!("av_buffersrc_parameters_set failed: error {ret}"); + } + } + + let mut scale_ctx = graph.add( + &scale_vaapi, + "scale", + &format!("{enc_width}:{enc_height}:format=nv12"), + )?; + // SAFETY: scale_vaapi keeps a ref-counted device context while the graph is alive. + unsafe { + (*scale_ctx.as_mut_ptr()).hw_device_ctx = hw_dev.ref_clone(); + } + + let mut sink_ctx = graph.add(&buffersink, "out", "")?; + src_ctx.link(0, &mut scale_ctx, 0); + scale_ctx.link(0, &mut sink_ctx, 0); + graph + .validate() + .map_err(|e| anyhow::anyhow!("software GPU filter graph validation failed: {e}"))?; + + Ok(graph) +} + +fn create_nv12_to_yuv420p_sws(width: u32, height: u32) -> Result<*mut ffi::SwsContext> { + // SAFETY: sws_getContext creates an owned scaler context for same-size NV12 -> YUV420P. + let ctx = unsafe { + ffi::sws_getContext( + width as i32, + height as i32, + ffi::AVPixelFormat::AV_PIX_FMT_NV12, + width as i32, + height as i32, + ffi::AVPixelFormat::AV_PIX_FMT_YUV420P, + 2, + ptr::null_mut(), + ptr::null_mut(), + ptr::null_mut(), + ) + }; + if ctx.is_null() { + bail!("Failed to create NV12 -> YUV420P sws_scale context"); + } + Ok(ctx) +} + +fn alloc_yuv420p_frame(width: u32, height: u32) -> Result<*mut ffi::AVFrame> { + // SAFETY: Allocate an AVFrame, configure format/dimensions, then allocate writable buffers. + unsafe { + let mut frame = ffi::av_frame_alloc(); + if frame.is_null() { + bail!("av_frame_alloc failed"); + } + (*frame).width = width as i32; + (*frame).height = height as i32; + (*frame).format = ffi::AVPixelFormat::AV_PIX_FMT_YUV420P as i32; + let ret = ffi::av_frame_get_buffer(frame, 0); + if ret < 0 { + ffi::av_frame_free(&mut frame); + bail!("av_frame_get_buffer failed: error {ret}"); + } + Ok(frame) + } +} + +fn create_software_h264_muxer( + output_path: &Path, + width: u32, + height: u32, + fps: u32, + bitrate: u64, + gop_size: u32, +) -> Result<( + ff::codec::encoder::video::Video, + ff::format::context::Output, +)> { + let output_cstr = CString::new(output_path.to_str().unwrap())?; + let codec = ff::encoder::find_by_name("libopenh264") + .or_else(|| ff::encoder::find_by_name("libx264")) + .ok_or_else(|| { + anyhow::anyhow!("No H.264 software encoder found (tried libopenh264, libx264)") + })?; + let codec_name = codec.name().to_string(); + + let mut enc = { + let ctx = ff::codec::Context::new_with_codec(codec); + ctx.encoder().video()? + }; + enc.set_width(width); + enc.set_height(height); + enc.set_format(ff::format::Pixel::YUV420P); + enc.set_bit_rate(bitrate as usize); + enc.set_gop(gop_size); + enc.set_time_base(ff::Rational::new(1, fps as i32)); + enc.set_max_b_frames(0); + + // SAFETY: global headers are needed by MP4 and harmless for other common muxers. + unsafe { + (*enc.as_mut_ptr()).flags |= ffi::AV_CODEC_FLAG_GLOBAL_HEADER as i32; + } + + if codec_name == "libx264" { + // SAFETY: priv_data belongs to the unopened encoder; strings live for each call. + unsafe { + let key = CString::new("preset").unwrap(); + let val = CString::new("veryfast").unwrap(); + ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); + let key = CString::new("tune").unwrap(); + let val = CString::new("zerolatency").unwrap(); + ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); + } + } + + let opened = enc + .open() + .map_err(|e| anyhow::anyhow!("Failed to open {codec_name} encoder: {e}"))?; + let enc_video = opened.0; + + let use_null = output_path + .to_str() + .map(|s| s.contains("null")) + .unwrap_or(false); + let fmt_name = if use_null { + CString::new("null").unwrap() + } else { + CString::new("").unwrap() + }; + let fmt_name_ptr = if use_null { + fmt_name.as_ptr() + } else { + ptr::null() + }; + + let mut fmt_ctx_ptr: *mut ffi::AVFormatContext = ptr::null_mut(); + // SAFETY: fmt_ctx_ptr is initialized by FFmpeg; C strings live across the call. + let ret = unsafe { + ffi::avformat_alloc_output_context2( + &mut fmt_ctx_ptr, + ptr::null_mut(), + fmt_name_ptr, + output_cstr.as_ptr(), + ) + }; + if ret < 0 || fmt_ctx_ptr.is_null() { + bail!("Failed to allocate output format context: error {ret}"); + } + + // SAFETY: fmt_ctx_ptr is valid; stream and codec parameters are owned by the format context. + let stream_ptr = unsafe { ffi::avformat_new_stream(fmt_ctx_ptr, ptr::null()) }; + if stream_ptr.is_null() { + bail!("Failed to create output stream"); + } + + // SAFETY: stream_ptr and encoder context are valid; parameters are copied into stream. + let ret = + unsafe { ffi::avcodec_parameters_from_context((*stream_ptr).codecpar, enc_video.as_ptr()) }; + if ret < 0 { + bail!("Failed to copy codec parameters to stream: error {ret}"); + } + // SAFETY: stream_ptr is valid and writable during muxer setup. + unsafe { + (*stream_ptr).time_base = (*enc_video.as_ptr()).time_base; + } + + // SAFETY: open an AVIO only for muxers that require files; null muxer advertises NOFILE. + unsafe { + if (*(*fmt_ctx_ptr).oformat).flags & ffi::AVFMT_NOFILE == 0 { + let ret = ffi::avio_open( + &mut (*fmt_ctx_ptr).pb, + output_cstr.as_ptr(), + ffi::AVIO_FLAG_WRITE, + ); + if ret < 0 { + bail!( + "Failed to open output file '{}': error {ret}", + output_path.display() + ); + } + } + } + + // SAFETY: fmt_ctx_ptr is fully configured. + let ret = unsafe { ffi::avformat_write_header(fmt_ctx_ptr, ptr::null_mut()) }; + if ret < 0 { + bail!("Failed to write output header: error {ret}"); + } + + // SAFETY: ownership of fmt_ctx_ptr transfers to ffmpeg-next Output wrapper. + let octx = unsafe { ff::format::context::Output::wrap(fmt_ctx_ptr) }; + tracing::info!("Using software H.264 encoder: {codec_name}"); + Ok((enc_video, octx)) +} + // --------------------------------------------------------------------------- // Filter graph (inline) // --------------------------------------------------------------------------- @@ -668,8 +1200,7 @@ fn build_filter_graph( Transform::Flipped270 => "0", Transform::Normal => unreachable!(), }; - let mut trans_ctx = - graph.add(&transpose, "transpose", &format!("dir={dir_val}"))?; + let mut trans_ctx = graph.add(&transpose, "transpose", &format!("dir={dir_val}"))?; unsafe { (*trans_ctx.as_mut_ptr()).hw_device_ctx = hw_dev.ref_clone(); } diff --git a/src/backend_detect.rs b/src/backend_detect.rs index eb31996..609c373 100644 --- a/src/backend_detect.rs +++ b/src/backend_detect.rs @@ -137,10 +137,7 @@ pub fn detect_backend(args: &Args) -> Result { } other => { // 未知后端名称,返回错误 - anyhow::bail!( - "Unknown backend '{}'. Use 'screencopy' or 'portal'.", - other - ); + anyhow::bail!("Unknown backend '{}'. Use 'screencopy' or 'portal'.", other); } }; } diff --git a/src/bin/sw_encode_bench.rs b/src/bin/sw_encode_bench.rs new file mode 100644 index 0000000..d421224 --- /dev/null +++ b/src/bin/sw_encode_bench.rs @@ -0,0 +1,545 @@ +// sw_encode_bench.rs — Software encoding pipeline benchmark for screen capture +// +// Benchmarks: Portal capture -> mmap DMA-BUF -> sws_scale BGR0->YUV420P -> libx264 encode +// +// Usage: cargo run --bin sw_encode_bench -- --output /tmp/bench_test.mp4 + +use std::ffi::CString; +use std::os::fd::AsRawFd; +use std::path::Path; +use std::ptr; +use std::time::Instant; + +use anyhow::{bail, Result}; +use clap::Parser; + +use ffmpeg_next as ff; +use ffmpeg_next::ffi; +use ffmpeg_next::packet::Mut; + +use wl_webrtc::args::Args; +use wl_webrtc::cap_portal::{CapPortal, PwCtrlEvent}; + +#[derive(Parser, Debug)] +#[command( + name = "sw_encode_bench", + about = "Software encoding pipeline benchmark" +)] +struct BenchArgs { + #[arg(short, long)] + output: String, + + #[arg(long, default_value_t = 120)] + frames: u32, + + #[arg(long, default_value_t = 2560)] + enc_width: u32, + + #[arg(long, default_value_t = 1440)] + enc_height: u32, +} + +#[derive(Default)] +struct FrameStats { + mmap_us: Vec, + scale_us: Vec, + encode_us: Vec, + total_us: Vec, + mmap_failures: u32, +} + +impl FrameStats { + fn avg_ms(data: &[u64]) -> f64 { + if data.is_empty() { + return 0.0; + } + data.iter().sum::() as f64 / data.len() as f64 / 1000.0 + } +} + +fn pix_fmt(p: ff::format::Pixel) -> ffi::AVPixelFormat { + Into::::into(p) +} + +fn receive_first_frame(cap: &CapPortal) -> Result { + loop { + if let Ok(ctrl) = cap.event_receiver().try_recv() { + match ctrl { + PwCtrlEvent::StreamEnded => bail!("PipeWire stream ended before first frame"), + PwCtrlEvent::Error(e) => bail!("PipeWire error: {e}"), + } + } + match cap + .frame_receiver() + .recv_timeout(std::time::Duration::from_secs(10)) + { + Ok(frame) => return Ok(frame), + Err(crossbeam_channel::RecvTimeoutError::Timeout) => { + bail!("Timeout waiting for first frame (10s)"); + } + Err(crossbeam_channel::RecvTimeoutError::Disconnected) => { + bail!("PipeWire frame channel disconnected"); + } + } + } +} + +fn main() -> Result<()> { + let bench_args = BenchArgs::parse(); + + println!("=== Software Encode Benchmark ==="); + println!("Output: {}", bench_args.output); + println!("Target frames: {}", bench_args.frames); + println!( + "Encode resolution: {}x{}", + bench_args.enc_width, bench_args.enc_height + ); + println!(); + + ff::init()?; + + println!("[1/4] Requesting screen capture via XDG Portal..."); + println!(" (Select a screen to share in the portal dialog)"); + + let portal_args = Args { + output: bench_args.output.clone(), + output_name: None, + fps: 60, + codec: "h264".to_string(), + hw_accel: "vaapi".to_string(), + drm_device: None, + bitrate: None, + gop_size: None, + verbose: false, + backend: Some("portal".to_string()), + port: 0, + }; + + let cap = CapPortal::new(&portal_args)?; + println!("[1/4] Portal connected, PipeWire stream active\n"); + + println!("[2/4] Waiting for first frame from PipeWire..."); + let first_frame = receive_first_frame(&cap)?; + + let src_width = first_frame.width; + let src_height = first_frame.height; + let src_stride = first_frame.stride; + let enc_width = bench_args.enc_width; + let enc_height = bench_args.enc_height; + + println!( + "[2/4] First frame: {}x{}, stride={}, format=0x{:08X}", + src_width, src_height, src_stride, first_frame.format + ); + println!( + " Capture: {}x{} Encode: {}x{}\n", + src_width, src_height, enc_width, enc_height + ); + + println!("[3/4] Testing mmap on DMA-BUF..."); + let mmap_size = (src_stride as usize) * (src_height as usize); + let mmap_ptr = unsafe { + libc::mmap( + ptr::null_mut(), + mmap_size, + libc::PROT_READ, + libc::MAP_SHARED, + first_frame.fd.as_raw_fd(), + first_frame.offset as i64, + ) + }; + + if mmap_ptr == libc::MAP_FAILED { + let errno = std::io::Error::last_os_error(); + bail!( + "mmap on DMA-BUF fd FAILED — AMD driver may not support \ + CPU read of screen capture DMA-BUF buffers.\n\ + Error: {} (errno={})\n\ + \n\ + Workarounds:\n\ + 1. Use VAAPI hardware import (av_hwframe_map) instead of mmap\n\ + 2. Use wlroots compositor with wlr-screencopy (SHM-based)\n\ + 3. Use a virtual display or software renderer", + errno, + errno.raw_os_error().unwrap_or(-1) + ); + } + + println!( + "[3/4] mmap SUCCESS — CPU can read DMA-BUF ({:.1} MB)\n", + mmap_size as f64 / 1024.0 / 1024.0 + ); + unsafe { + libc::munmap(mmap_ptr, mmap_size); + } + drop(first_frame); + + // Set up libx264 encoder via FFI (same pattern as avhw.rs) + println!("[4/4] Setting up libx264 encoder..."); + let output_path = Path::new(&bench_args.output); + let output_cstr = CString::new(output_path.to_str().unwrap())?; + + // Try libx264 first (best quality/speed), fall back to openh264 + let codec = ff::encoder::find_by_name("libx264") + .or_else(|| ff::encoder::find_by_name("libopenh264")) + .ok_or_else(|| { + anyhow::anyhow!("No H.264 software encoder found (tried libx264, libopenh264)") + })?; + println!("[4/4] Using encoder: {}\n", codec.name()); + + let mut enc = { + let ctx = ff::codec::Context::new_with_codec(codec); + ctx.encoder().video()? + }; + + enc.set_width(enc_width); + enc.set_height(enc_height); + enc.set_format(ff::format::Pixel::YUV420P); + enc.set_time_base(ff::Rational::new(1, 60)); + enc.set_max_b_frames(0); + enc.set_gop(60); + + let codec_name = codec.name(); + if codec_name == "libx264" { + unsafe { + let key = CString::new("preset").unwrap(); + let val = CString::new("veryfast").unwrap(); + ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); + let key = CString::new("tune").unwrap(); + let val = CString::new("zerolatency").unwrap(); + ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); + } + } + + let opened = enc.open()?; + let mut enc_video = opened.0; + + // Create output format context via FFI + let mut fmt_ctx_ptr: *mut ffi::AVFormatContext = ptr::null_mut(); + let ret = unsafe { + ffi::avformat_alloc_output_context2( + &mut fmt_ctx_ptr, + ptr::null_mut(), + ptr::null(), + output_cstr.as_ptr(), + ) + }; + if ret < 0 || fmt_ctx_ptr.is_null() { + bail!("Failed to allocate output format context: error {ret}"); + } + + let stream_ptr = unsafe { ffi::avformat_new_stream(fmt_ctx_ptr, ptr::null()) }; + if stream_ptr.is_null() { + bail!("Failed to create new stream"); + } + + let ret = + unsafe { ffi::avcodec_parameters_from_context((*stream_ptr).codecpar, enc_video.as_ptr()) }; + if ret < 0 { + bail!("Failed to copy encoder parameters: error {ret}"); + } + + unsafe { + (*stream_ptr).time_base = (*enc_video.as_ptr()).time_base; + } + + let ret = unsafe { + ffi::avio_open( + &mut (*fmt_ctx_ptr).pb, + output_cstr.as_ptr(), + ffi::AVIO_FLAG_WRITE, + ) + }; + if ret < 0 { + bail!( + "Failed to open output file '{}': error {ret}", + output_path.display() + ); + } + + let ret = unsafe { ffi::avformat_write_header(fmt_ctx_ptr, ptr::null_mut()) }; + if ret < 0 { + bail!("Failed to write header: error {ret}"); + } + + let mut octx = unsafe { ff::format::context::Output::wrap(fmt_ctx_ptr) }; + + // Create sws_scale context: BGRZ (BGR0) -> YUV420P + let bgr0_fmt = pix_fmt(ff::format::Pixel::BGRZ); + let yuv420p_fmt = pix_fmt(ff::format::Pixel::YUV420P); + + let sws_ctx = unsafe { + ffi::sws_getContext( + src_width as i32, + src_height as i32, + bgr0_fmt, + enc_width as i32, + enc_height as i32, + yuv420p_fmt, + 2, + ptr::null_mut(), + ptr::null_mut(), + ptr::null_mut(), + ) + }; + if sws_ctx.is_null() { + bail!("Failed to create sws_scale context"); + } + + // Allocate reusable YUV frame + let mut yuv_frame = unsafe { + let mut f = ffi::av_frame_alloc(); + if f.is_null() { + bail!("av_frame_alloc failed"); + } + (*f).width = enc_width as i32; + (*f).height = enc_height as i32; + (*f).format = yuv420p_fmt as i32; + let ret = ffi::av_frame_get_buffer(f, 0); + if ret < 0 { + ffi::av_frame_free(&mut f); + bail!("av_frame_get_buffer failed: {ret}"); + } + f + }; + + println!( + "[4/4] Encoder ready: {}, {}x{}\n", + codec_name, enc_width, enc_height + ); + + println!("=== Encoding {} frames ===\n", bench_args.frames); + + let mut stats = FrameStats::default(); + let total_start = Instant::now(); + let mut frames_encoded: u32 = 0; + let mut pts: i64 = 0; + + while frames_encoded < bench_args.frames { + if let Ok(ctrl) = cap.event_receiver().try_recv() { + match ctrl { + PwCtrlEvent::StreamEnded => { + eprintln!("PipeWire stream ended after {} frames", frames_encoded); + break; + } + PwCtrlEvent::Error(e) => { + eprintln!("PipeWire error after {} frames: {}", frames_encoded, e); + break; + } + } + } + + let frame = match cap + .frame_receiver() + .recv_timeout(std::time::Duration::from_secs(5)) + { + Ok(f) => f, + Err(_) => { + eprintln!("Frame timeout/disconnect after {} frames", frames_encoded); + break; + } + }; + + let frame_start = Instant::now(); + + let mmap_start = Instant::now(); + let frame_size = (frame.stride as usize) * (frame.height as usize); + let mmap_ptr = unsafe { + libc::mmap( + ptr::null_mut(), + frame_size, + libc::PROT_READ, + libc::MAP_SHARED, + frame.fd.as_raw_fd(), + frame.offset as i64, + ) + }; + + if mmap_ptr == libc::MAP_FAILED { + stats.mmap_failures += 1; + eprintln!("mmap failed on frame {}", frames_encoded); + drop(frame); + continue; + } + stats.mmap_us.push(mmap_start.elapsed().as_micros() as u64); + + let scale_start = Instant::now(); + let src_data = unsafe { std::slice::from_raw_parts(mmap_ptr as *const u8, frame_size) }; + + unsafe { + ffi::av_frame_make_writable(yuv_frame); + + let src_ptr = src_data.as_ptr(); + let src_linesize = frame.stride as i32; + + ffi::sws_scale( + sws_ctx, + &src_ptr as *const *const u8, + &src_linesize as *const i32, + 0, + frame.height as i32, + (*yuv_frame).data.as_ptr() as *mut *mut u8, + (*yuv_frame).linesize.as_ptr() as *mut i32, + ); + } + stats + .scale_us + .push(scale_start.elapsed().as_micros() as u64); + + unsafe { + libc::munmap(mmap_ptr, frame_size); + } + drop(frame); + + let encode_start = Instant::now(); + + unsafe { + (*yuv_frame).pts = pts; + pts += 1; + + let ret = ffi::avcodec_send_frame(enc_video.as_mut_ptr(), yuv_frame); + if ret < 0 { + eprintln!("avcodec_send_frame failed: {ret}"); + continue; + } + } + + drain_encoder(&mut enc_video, &mut octx)?; + + stats + .encode_us + .push(encode_start.elapsed().as_micros() as u64); + stats + .total_us + .push(frame_start.elapsed().as_micros() as u64); + + frames_encoded += 1; + if frames_encoded % 30 == 0 { + let fps = frames_encoded as f64 / total_start.elapsed().as_secs_f64(); + println!( + " [{}/{}] {:.1} FPS", + frames_encoded, bench_args.frames, fps + ); + } + } + + let total_elapsed = total_start.elapsed(); + + println!("\nFlushing encoder..."); + unsafe { + ffi::avcodec_send_frame(enc_video.as_mut_ptr(), ptr::null()); + } + drain_encoder(&mut enc_video, &mut octx)?; + + octx.write_trailer() + .map_err(|e| anyhow::anyhow!("Failed to write trailer: {e}"))?; + + // Cleanup + unsafe { + ffi::av_frame_free(&mut yuv_frame as *mut _); + ffi::sws_freeContext(sws_ctx); + } + + drop(cap); + + // Print results + let mmap_count = stats.mmap_us.len() as u32; + let mmap_success_rate = if mmap_count + stats.mmap_failures > 0 { + mmap_count as f64 / (mmap_count + stats.mmap_failures) as f64 * 100.0 + } else { + 0.0 + }; + let total_fps = frames_encoded as f64 / total_elapsed.as_secs_f64(); + let avg_total_ms = FrameStats::avg_ms(&stats.total_us); + let max_fps = if avg_total_ms > 0.0 { + 1000.0 / avg_total_ms + } else { + 0.0 + }; + + println!(); + println!("╔══════════════════════════════════════════════════════════════╗"); + println!("║ Software Encode Benchmark Results ║"); + println!("╚══════════════════════════════════════════════════════════════╝"); + println!(); + println!("Capture resolution: {}x{}", src_width, src_height); + println!("Encode resolution: {}x{}", enc_width, enc_height); + println!("Frames encoded: {}", frames_encoded); + println!("Total time: {:.2}s", total_elapsed.as_secs_f64()); + println!(); + println!("mmap (DMA-BUF -> CPU):"); + println!( + " avg: {:.2} ms/frame", + FrameStats::avg_ms(&stats.mmap_us) + ); + println!( + " success rate: {:.1}% ({}/{})", + mmap_success_rate, + mmap_count, + mmap_count + stats.mmap_failures + ); + println!(); + println!("scale (BGR0 -> YUV420P via sws_scale):"); + println!( + " avg: {:.2} ms/frame", + FrameStats::avg_ms(&stats.scale_us) + ); + println!(); + println!("encode ({}):", codec_name); + println!( + " avg: {:.2} ms/frame", + FrameStats::avg_ms(&stats.encode_us) + ); + println!(); + println!("total pipeline:"); + println!(" avg: {:.2} ms/frame", avg_total_ms); + println!(" achieved FPS: {:.1}", total_fps); + println!(" max theoretical: {:.1} FPS", max_fps); + println!(); + + if mmap_success_rate < 100.0 { + println!( + "WARNING: Some mmap operations failed ({}/{})", + stats.mmap_failures, + stats.mmap_failures + mmap_count + ); + } + if total_fps < 30.0 { + println!( + "NOTE: Achieved FPS ({:.1}) is below 30 FPS target.", + total_fps + ); + } + + println!("Output written to: {}", bench_args.output); + Ok(()) +} + +fn drain_encoder( + enc_video: &mut ff::encoder::video::Video, + octx: &mut ff::format::context::Output, +) -> Result<()> { + loop { + let mut pkt = ff::Packet::empty(); + let ret = unsafe { ffi::avcodec_receive_packet(enc_video.as_mut_ptr(), pkt.as_mut_ptr()) }; + if ret < 0 { + if ret == ffi::AVERROR(ffi::EAGAIN) || ret == ffi::AVERROR_EOF { + break; + } + eprintln!("avcodec_receive_packet failed: {ret}"); + break; + } + + let enc_tb = enc_video.time_base(); + let stream_tb = unsafe { + let streams = (*octx.as_ptr()).streams; + let st = *streams.add(0); + ff::Rational::from((*st).time_base) + }; + pkt.rescale_ts(enc_tb, stream_tb); + pkt.set_stream(0); + pkt.write_interleaved(octx) + .map_err(|e| anyhow::anyhow!("write packet failed: {e}"))?; + } + Ok(()) +} diff --git a/src/bin/vaapi_import_bench.rs b/src/bin/vaapi_import_bench.rs new file mode 100644 index 0000000..ba2a898 --- /dev/null +++ b/src/bin/vaapi_import_bench.rs @@ -0,0 +1,1036 @@ +// vaapi_import_bench.rs — VAAPI DMA-BUF import + GPU-side downscale benchmark +// +// Tests: Portal capture -> av_hwframe_map (ARGB sw_format) -> transfer -> sw encode +// +// Usage: cargo run --bin vaapi_import_bench -- --output /tmp/vaapi_bench.mp4 + +use std::ffi::CString; +use std::os::fd::AsRawFd; +use std::path::Path; +use std::ptr; +use std::time::Instant; + +use anyhow::{bail, Result}; +use clap::{Parser, ValueEnum}; + +use ffmpeg_next as ff; +use ffmpeg_next::ffi; +use ffmpeg_next::packet::Mut; + +use wl_webrtc::args::Args; +use wl_webrtc::avhw::{import_dma_buf_to_vaapi, AvHwDevCtx, AvHwFrameCtx}; +use wl_webrtc::cap_portal::{CapPortal, PwCtrlEvent}; + +#[derive(Parser, Debug)] +#[command(name = "vaapi_import_bench", about = "VAAPI DMA-BUF import benchmark")] +struct BenchArgs { + #[arg(short, long)] + output: String, + + #[arg(long, default_value_t = 60)] + frames: u32, + + #[arg(long, default_value_t = 2560)] + enc_width: u32, + + #[arg(long, default_value_t = 1440)] + enc_height: u32, + + #[arg(long, default_value = "/dev/dri/renderD128")] + drm_device: String, + + #[arg(long, value_enum, default_value_t = PipelineMode::Both)] + mode: PipelineMode, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] +enum PipelineMode { + Cpu, + Gpu, + Both, +} + +#[derive(Default)] +struct FrameStats { + import_us: Vec, + filter_us: Vec, + transfer_us: Vec, + scale_us: Vec, + format_us: Vec, + encode_us: Vec, + total_us: Vec, + import_failures: u32, + frames_encoded: u32, + elapsed_secs: f64, + codec_name: String, + output_path: String, +} + +impl FrameStats { + fn avg_ms(data: &[u64]) -> f64 { + if data.is_empty() { + return 0.0; + } + data.iter().sum::() as f64 / data.len() as f64 / 1000.0 + } + + fn avg_total_ms(&self) -> f64 { + Self::avg_ms(&self.total_us) + } + + fn achieved_fps(&self) -> f64 { + if self.frames_encoded > 0 && self.elapsed_secs > 0.0 { + self.frames_encoded as f64 / self.elapsed_secs + } else { + 0.0 + } + } + + fn theoretical_fps(&self) -> f64 { + let avg = self.avg_total_ms(); + if avg > 0.0 { + 1000.0 / avg + } else { + 0.0 + } + } +} + +struct SoftwareEncoder { + enc_video: ff::codec::encoder::video::Video, + octx: ff::format::context::Output, + yuv_frame: *mut ffi::AVFrame, + codec_name: String, +} + +impl Drop for SoftwareEncoder { + fn drop(&mut self) { + // SAFETY: yuv_frame is allocated by av_frame_alloc in create_software_encoder and + // owned exclusively by this SoftwareEncoder. + unsafe { + ffi::av_frame_free(&mut self.yuv_frame); + } + } +} + +struct SwsContext(*mut ffi::SwsContext); + +impl Drop for SwsContext { + fn drop(&mut self) { + // SAFETY: Context is either null or returned by sws_getContext and owned here. + unsafe { + ffi::sws_freeContext(self.0); + } + } +} + +fn av_err_to_string(ret: i32) -> String { + let mut buf = vec![0u8; 128]; + unsafe { + ffi::av_strerror(ret, buf.as_mut_ptr() as *mut i8, buf.len()); + } + let end = buf.iter().position(|&b| b == 0).unwrap_or(buf.len()); + String::from_utf8_lossy(&buf[..end]).to_string() +} + +fn receive_first_frame(cap: &CapPortal) -> Result { + loop { + if let Ok(ctrl) = cap.event_receiver().try_recv() { + match ctrl { + PwCtrlEvent::StreamEnded => bail!("PipeWire stream ended before first frame"), + PwCtrlEvent::Error(e) => bail!("PipeWire error: {e}"), + } + } + match cap + .frame_receiver() + .recv_timeout(std::time::Duration::from_secs(10)) + { + Ok(frame) => return Ok(frame), + Err(crossbeam_channel::RecvTimeoutError::Timeout) => { + bail!("Timeout waiting for first frame (10s)"); + } + Err(crossbeam_channel::RecvTimeoutError::Disconnected) => { + bail!("PipeWire frame channel disconnected"); + } + } + } +} + +fn drain_encoder( + enc_video: &mut ff::codec::encoder::video::Video, + octx: &mut ff::format::context::Output, +) -> Result<()> { + loop { + let mut pkt = ff::Packet::empty(); + let ret = unsafe { ffi::avcodec_receive_packet(enc_video.as_mut_ptr(), pkt.as_mut_ptr()) }; + if ret < 0 { + if ret == ffi::AVERROR(ffi::EAGAIN) || ret == ffi::AVERROR_EOF { + break; + } + eprintln!("avcodec_receive_packet failed: {ret}"); + break; + } + let enc_tb = enc_video.time_base(); + let stream_tb = unsafe { + let streams = (*octx.as_ptr()).streams; + let st = *streams.add(0); + ff::Rational::from((*st).time_base) + }; + pkt.rescale_ts(enc_tb, stream_tb); + pkt.set_stream(0); + pkt.write_interleaved(octx) + .map_err(|e| anyhow::anyhow!("write packet failed: {e}"))?; + } + Ok(()) +} + +fn create_software_encoder(output_path: &Path, width: u32, height: u32) -> Result { + let output_cstr = CString::new(output_path.to_str().unwrap())?; + let codec = ff::encoder::find_by_name("libopenh264") + .or_else(|| ff::encoder::find_by_name("libx264")) + .ok_or_else(|| { + anyhow::anyhow!("No H.264 software encoder found (tried libopenh264, libx264)") + })?; + + let codec_name = codec.name().to_string(); + let mut enc = { + let ctx = ff::codec::Context::new_with_codec(codec); + ctx.encoder().video()? + }; + + enc.set_width(width); + enc.set_height(height); + enc.set_format(ff::format::Pixel::YUV420P); + enc.set_time_base(ff::Rational::new(1, 60)); + enc.set_max_b_frames(0); + enc.set_gop(60); + + if codec_name == "libx264" { + // SAFETY: priv_data belongs to the not-yet-opened encoder context. Option strings are + // valid NUL-terminated C strings for the duration of each av_opt_set call. + unsafe { + let key = CString::new("preset").unwrap(); + let val = CString::new("veryfast").unwrap(); + ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); + let key = CString::new("tune").unwrap(); + let val = CString::new("zerolatency").unwrap(); + ffi::av_opt_set((*enc.as_mut_ptr()).priv_data, key.as_ptr(), val.as_ptr(), 0); + } + } + + let opened = enc.open()?; + let enc_video = opened.0; + + let use_null_muxer = output_path + .to_str() + .map(|s| s.contains("null")) + .unwrap_or(false); + let fmt_name = if use_null_muxer { + CString::new("null").unwrap() + } else { + CString::new("").unwrap() + }; + let fmt_name_ptr = if use_null_muxer { + fmt_name.as_ptr() + } else { + ptr::null() + }; + + let mut fmt_ctx_ptr: *mut ffi::AVFormatContext = ptr::null_mut(); + // SAFETY: fmt_ctx_ptr is an out pointer initialized by FFmpeg; output_cstr and fmt_name live + // across the call. + let ret = unsafe { + ffi::avformat_alloc_output_context2( + &mut fmt_ctx_ptr, + ptr::null_mut(), + fmt_name_ptr, + output_cstr.as_ptr(), + ) + }; + if ret < 0 || fmt_ctx_ptr.is_null() { + bail!("Failed to allocate output format context: error {ret}"); + } + + // SAFETY: fmt_ctx_ptr is a valid output context allocated above. + let stream_ptr = unsafe { ffi::avformat_new_stream(fmt_ctx_ptr, ptr::null()) }; + if stream_ptr.is_null() { + bail!("Failed to create output stream"); + } + + // SAFETY: stream and codec context pointers are valid; parameters are copied into stream. + let ret = + unsafe { ffi::avcodec_parameters_from_context((*stream_ptr).codecpar, enc_video.as_ptr()) }; + if ret < 0 { + bail!("Failed to copy codec parameters: error {ret}"); + } + + // SAFETY: fmt_ctx_ptr is valid; pb is initialized for non-NOFILE muxers. + unsafe { + if (*(*fmt_ctx_ptr).oformat).flags & ffi::AVFMT_NOFILE == 0 { + let ret = ffi::avio_open( + &mut (*fmt_ctx_ptr).pb, + output_cstr.as_ptr(), + ffi::AVIO_FLAG_WRITE, + ); + if ret < 0 { + bail!("Failed to open output file: error {ret}"); + } + } + } + + // SAFETY: fmt_ctx_ptr is a fully configured output context. + let ret = unsafe { ffi::avformat_write_header(fmt_ctx_ptr, ptr::null_mut()) }; + if ret < 0 { + bail!("Failed to write header: error {ret}"); + } + + // SAFETY: ownership of fmt_ctx_ptr transfers into ffmpeg-next Output wrapper. + let octx = unsafe { ff::format::context::Output::wrap(fmt_ctx_ptr) }; + + // SAFETY: Allocate and configure an owned writable YUV420P frame for encoder input. + let yuv_frame = unsafe { + let mut f = ffi::av_frame_alloc(); + if f.is_null() { + bail!("av_frame_alloc failed"); + } + (*f).width = width as i32; + (*f).height = height as i32; + (*f).format = ffi::AVPixelFormat::AV_PIX_FMT_YUV420P as i32; + let r = ffi::av_frame_get_buffer(f, 0); + if r < 0 { + ffi::av_frame_free(&mut f); + bail!("av_frame_get_buffer failed: {r}"); + } + f + }; + + Ok(SoftwareEncoder { + enc_video, + octx, + yuv_frame, + codec_name, + }) +} + +fn output_for_mode(base: &str, mode: PipelineMode, split: bool) -> String { + if !split || base.contains("null") { + return base.to_string(); + } + + let path = Path::new(base); + let suffix = match mode { + PipelineMode::Cpu => "cpu", + PipelineMode::Gpu => "gpu", + PipelineMode::Both => unreachable!(), + }; + let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or(base); + let split_name = if let Some((stem, ext)) = file_name.rsplit_once('.') { + format!("{stem}.{suffix}.{ext}") + } else { + format!("{file_name}.{suffix}") + }; + path.with_file_name(split_name) + .to_string_lossy() + .into_owned() +} + +fn create_sws_context( + src_width: u32, + src_height: u32, + src_fmt: ffi::AVPixelFormat, + dst_width: u32, + dst_height: u32, +) -> Result { + // SAFETY: sws_getContext creates an owned scaler context for the provided dimensions/formats. + let ctx = unsafe { + ffi::sws_getContext( + src_width as i32, + src_height as i32, + src_fmt, + dst_width as i32, + dst_height as i32, + ffi::AVPixelFormat::AV_PIX_FMT_YUV420P, + 2, + ptr::null_mut(), + ptr::null_mut(), + ptr::null_mut(), + ) + }; + if ctx.is_null() { + bail!("Failed to create sws_scale context"); + } + Ok(SwsContext(ctx)) +} + +fn encode_yuv_frame(encoder: &mut SoftwareEncoder, pts: &mut i64) -> Result { + let t_encode = Instant::now(); + // SAFETY: yuv_frame is allocated, writable, and formatted as the encoder's configured + // YUV420P input frame. FFmpeg consumes but does not take ownership. + unsafe { + (*encoder.yuv_frame).pts = *pts; + *pts += 1; + let r = ffi::avcodec_send_frame(encoder.enc_video.as_mut_ptr(), encoder.yuv_frame); + if r < 0 { + bail!("avcodec_send_frame failed: {r}"); + } + } + drain_encoder(&mut encoder.enc_video, &mut encoder.octx)?; + Ok(t_encode.elapsed().as_micros() as u64) +} + +fn finish_encoder(mut encoder: SoftwareEncoder) -> Result<()> { + // SAFETY: Sending a null frame flushes the encoder; context remains owned by encoder. + unsafe { + ffi::avcodec_send_frame(encoder.enc_video.as_mut_ptr(), ptr::null()); + } + drain_encoder(&mut encoder.enc_video, &mut encoder.octx)?; + encoder + .octx + .write_trailer() + .map_err(|e| anyhow::anyhow!("Failed to write trailer: {e}"))?; + Ok(()) +} + +fn import_frame( + frames_ctx: &AvHwFrameCtx, + frame: &wl_webrtc::cap_portal::PwDmaBufFrame, +) -> Result { + // SAFETY: frames_ctx is a live VAAPI frames context configured for the capture format; frame + // carries a valid DMA-BUF fd and metadata from PipeWire for the duration of the call. + unsafe { + import_dma_buf_to_vaapi( + frames_ctx.as_ptr(), + frame.fd.as_raw_fd(), + frame.width, + frame.height, + frame.format, + frame.modifier, + frame.stride, + frame.offset, + ) + } +} + +fn build_gpu_filter_graph( + hw_dev: &AvHwDevCtx, + frames_rgb: &AvHwFrameCtx, + width: u32, + height: u32, + enc_width: u32, + enc_height: u32, +) -> Result { + let mut graph = ff::filter::Graph::new(); + let buffersrc = + ff::filter::find("buffer").ok_or_else(|| anyhow::anyhow!("filter 'buffer' not found"))?; + let buffersink = ff::filter::find("buffersink") + .ok_or_else(|| anyhow::anyhow!("filter 'buffersink' not found"))?; + let scale_vaapi = ff::filter::find("scale_vaapi") + .ok_or_else(|| anyhow::anyhow!("filter 'scale_vaapi' not found"))?; + + // pix_fmt must be set via av_buffersrc_parameters_set (below), not in args — + // FFmpeg 8.0+ rejects HW pixel formats during init() if hw_frames_ctx is missing. + // Use a placeholder SW format here; it gets overridden by parameters_set below. + let args = format!( + "video_size={}x{}:pix_fmt=bgra:time_base=1/60:pixel_aspect=1/1", + width, height, + ); + let mut src_ctx = graph.add(&buffersrc, "in", &args)?; + + // SAFETY: Allocate buffersrc parameters, attach a ref-counted hw_frames_ctx compatible with + // imported VAAPI BGRA frames, apply it, then free only the parameter struct (not the ref). + let par = unsafe { ffi::av_buffersrc_parameters_alloc() }; + if par.is_null() { + bail!("av_buffersrc_parameters_alloc returned null"); + } + // SAFETY: par and src_ctx are valid; frames_rgb.ref_clone returns an owned AVBufferRef. + unsafe { + (*par).format = Into::::into(ff::format::Pixel::VAAPI) as i32; + (*par).width = width as i32; + (*par).height = height as i32; + (*par).time_base = ffi::AVRational { num: 1, den: 60 }; + (*par).hw_frames_ctx = frames_rgb.ref_clone(); + let ret = ffi::av_buffersrc_parameters_set(src_ctx.as_mut_ptr(), par); + ffi::av_free(par as *mut _); + if ret < 0 { + bail!("av_buffersrc_parameters_set failed: error {ret}"); + } + } + + let mut scale_ctx = graph.add( + &scale_vaapi, + "scale", + &format!("{enc_width}:{enc_height}:format=nv12"), + )?; + // SAFETY: scale_vaapi uses this ref-counted VAAPI device context while graph is alive. + unsafe { + (*scale_ctx.as_mut_ptr()).hw_device_ctx = hw_dev.ref_clone(); + } + + let mut sink_ctx = graph.add(&buffersink, "out", "")?; + src_ctx.link(0, &mut scale_ctx, 0); + scale_ctx.link(0, &mut sink_ctx, 0); + graph + .validate() + .map_err(|e| anyhow::anyhow!("GPU filter graph validation failed: {e}"))?; + + Ok(graph) +} + +#[allow(clippy::too_many_arguments)] +fn run_cpu_pipeline( + cap: &CapPortal, + frames_ctx: &AvHwFrameCtx, + output: &str, + frames: u32, + src_width: u32, + src_height: u32, + enc_width: u32, + enc_height: u32, +) -> Result { + let mut encoder = create_software_encoder(Path::new(output), enc_width, enc_height)?; + let sws_ctx = create_sws_context( + src_width, + src_height, + ffi::AVPixelFormat::AV_PIX_FMT_BGRA, + enc_width, + enc_height, + )?; + + println!( + " Encoder: {}, {}x{} YUV420P", + encoder.codec_name, enc_width, enc_height + ); + println!(" Output: {output}"); + println!(" CPU Pipeline: DMA-BUF 4K BGRA -> av_hwframe_map -> av_hwframe_transfer_data -> sws_scale -> YUV420P 2K -> encode\n"); + + let mut stats = FrameStats { + codec_name: encoder.codec_name.clone(), + output_path: output.to_string(), + ..FrameStats::default() + }; + let total_start = Instant::now(); + let mut pts: i64 = 0; + + while stats.frames_encoded < frames { + if let Ok(ctrl) = cap.event_receiver().try_recv() { + match ctrl { + PwCtrlEvent::StreamEnded => break, + PwCtrlEvent::Error(e) => bail!( + "PipeWire error after {} CPU frames: {e}", + stats.frames_encoded + ), + } + } + + let frame = match cap + .frame_receiver() + .recv_timeout(std::time::Duration::from_secs(5)) + { + Ok(f) => f, + Err(_) => break, + }; + + let frame_start = Instant::now(); + let t_import = Instant::now(); + let vaapi_frame = match import_frame(frames_ctx, &frame) { + Ok(f) => f, + Err(e) => { + stats.import_failures += 1; + if stats.import_failures <= 3 { + eprintln!("CPU frame {}: import failed: {e}", stats.frames_encoded); + } + continue; + } + }; + let import_us = t_import.elapsed().as_micros() as u64; + + let t_transfer = Instant::now(); + // SAFETY: sw_frame is allocated by FFmpeg and freed on all paths below. + let mut sw_frame = unsafe { ffi::av_frame_alloc() }; + if sw_frame.is_null() { + bail!("CPU frame {}: av_frame_alloc failed", stats.frames_encoded); + } + // SAFETY: sw_frame is an allocated destination; vaapi_frame is a valid VAAPI source frame. + let transfer_ret = + unsafe { ffi::av_hwframe_transfer_data(sw_frame, vaapi_frame.as_ptr(), 0) }; + if transfer_ret < 0 { + // SAFETY: sw_frame was allocated above and has not been freed yet. + unsafe { ffi::av_frame_free(&mut sw_frame) }; + bail!( + "CPU frame {}: av_hwframe_transfer_data failed: {} ({})", + stats.frames_encoded, + transfer_ret, + av_err_to_string(transfer_ret) + ); + } + let transfer_us = t_transfer.elapsed().as_micros() as u64; + + let t_scale = Instant::now(); + // SAFETY: sw_frame contains transferred BGRA data; encoder.yuv_frame is writable YUV420P + // at the configured output dimensions; sws_ctx converts and downscales between them. + unsafe { + ffi::av_frame_make_writable(encoder.yuv_frame); + ffi::sws_scale( + sws_ctx.0, + (*sw_frame).data.as_ptr() as *const *const u8, + (*sw_frame).linesize.as_ptr() as *const i32, + 0, + (*sw_frame).height, + (*encoder.yuv_frame).data.as_ptr() as *mut *mut u8, + (*encoder.yuv_frame).linesize.as_ptr() as *const i32, + ); + } + let scale_us = t_scale.elapsed().as_micros() as u64; + // SAFETY: sw_frame was allocated above and is no longer needed after scaling. + unsafe { ffi::av_frame_free(&mut sw_frame) }; + + let encode_us = encode_yuv_frame(&mut encoder, &mut pts)?; + let total_us = frame_start.elapsed().as_micros() as u64; + + stats.import_us.push(import_us); + stats.transfer_us.push(transfer_us); + stats.scale_us.push(scale_us); + stats.encode_us.push(encode_us); + stats.total_us.push(total_us); + stats.frames_encoded += 1; + + if stats.frames_encoded <= 3 || stats.frames_encoded % 30 == 0 { + println!( + " CPU frame {:>4}/{frames}: import={:.2}ms transfer={:.2}ms scale={:.2}ms encode={:.2}ms total={:.2}ms", + stats.frames_encoded, + import_us as f64 / 1000.0, + transfer_us as f64 / 1000.0, + scale_us as f64 / 1000.0, + encode_us as f64 / 1000.0, + total_us as f64 / 1000.0, + ); + } + } + + finish_encoder(encoder)?; + stats.elapsed_secs = total_start.elapsed().as_secs_f64(); + Ok(stats) +} + +#[allow(clippy::too_many_arguments)] +fn run_gpu_pipeline( + cap: &CapPortal, + hw_dev: &AvHwDevCtx, + frames_ctx: &AvHwFrameCtx, + output: &str, + frames: u32, + src_width: u32, + src_height: u32, + enc_width: u32, + enc_height: u32, +) -> Result { + let mut encoder = create_software_encoder(Path::new(output), enc_width, enc_height)?; + let format_ctx = create_sws_context( + enc_width, + enc_height, + ffi::AVPixelFormat::AV_PIX_FMT_NV12, + enc_width, + enc_height, + )?; + let mut graph = build_gpu_filter_graph( + hw_dev, frames_ctx, src_width, src_height, enc_width, enc_height, + )?; + + println!( + " Encoder: {}, {}x{} YUV420P", + encoder.codec_name, enc_width, enc_height + ); + println!(" Output: {output}"); + println!(" GPU Pipeline: DMA-BUF 4K BGRA -> av_hwframe_map -> scale_vaapi 2K NV12 -> transfer small NV12 -> sws_scale format-only -> encode\n"); + + let mut stats = FrameStats { + codec_name: encoder.codec_name.clone(), + output_path: output.to_string(), + ..FrameStats::default() + }; + let total_start = Instant::now(); + let mut pts: i64 = 0; + + while stats.frames_encoded < frames { + if let Ok(ctrl) = cap.event_receiver().try_recv() { + match ctrl { + PwCtrlEvent::StreamEnded => break, + PwCtrlEvent::Error(e) => bail!( + "PipeWire error after {} GPU frames: {e}", + stats.frames_encoded + ), + } + } + + let frame = match cap + .frame_receiver() + .recv_timeout(std::time::Duration::from_secs(5)) + { + Ok(f) => f, + Err(_) => break, + }; + + let frame_start = Instant::now(); + let t_import = Instant::now(); + let vaapi_frame = match import_frame(frames_ctx, &frame) { + Ok(f) => f, + Err(e) => { + stats.import_failures += 1; + if stats.import_failures <= 3 { + eprintln!("GPU frame {}: import failed: {e}", stats.frames_encoded); + } + continue; + } + }; + let import_us = t_import.elapsed().as_micros() as u64; + + let t_filter = Instant::now(); + let mut filter_src_ctx = graph.get("in").unwrap(); + let mut filter_src = filter_src_ctx.source(); + let mut filter_sink_ctx = graph.get("out").unwrap(); + let mut filter_sink = filter_sink_ctx.sink(); + filter_src + .add(&vaapi_frame) + .map_err(|e| anyhow::anyhow!("GPU filter source add failed: {e}"))?; + + let mut filtered = ff::frame::Video::empty(); + match filter_sink.frame(&mut filtered) { + Ok(()) => {} + Err(ff::Error::Other { errno }) if errno == ffi::EAGAIN => continue, + Err(e) => bail!("GPU filter sink get frame failed: {e}"), + } + let filter_us = t_filter.elapsed().as_micros() as u64; + + let t_transfer = Instant::now(); + // SAFETY: sw_nv12 is allocated by FFmpeg and freed after format conversion. + let mut sw_nv12 = unsafe { ffi::av_frame_alloc() }; + if sw_nv12.is_null() { + bail!("GPU frame {}: av_frame_alloc failed", stats.frames_encoded); + } + // SAFETY: sw_nv12 is an allocated destination; filtered is a valid 2K NV12 VAAPI frame. + let transfer_ret = unsafe { ffi::av_hwframe_transfer_data(sw_nv12, filtered.as_ptr(), 0) }; + if transfer_ret < 0 { + // SAFETY: sw_nv12 was allocated above and has not been freed yet. + unsafe { ffi::av_frame_free(&mut sw_nv12) }; + bail!( + "GPU frame {}: av_hwframe_transfer_data failed: {} ({})", + stats.frames_encoded, + transfer_ret, + av_err_to_string(transfer_ret) + ); + } + let transfer_us = t_transfer.elapsed().as_micros() as u64; + + let t_format = Instant::now(); + // SAFETY: sw_nv12 contains CPU-side NV12 at enc dimensions; encoder.yuv_frame is writable + // YUV420P at the same dimensions, so sws_scale performs only chroma deinterleave/format conversion. + unsafe { + ffi::av_frame_make_writable(encoder.yuv_frame); + ffi::sws_scale( + format_ctx.0, + (*sw_nv12).data.as_ptr() as *const *const u8, + (*sw_nv12).linesize.as_ptr() as *const i32, + 0, + (*sw_nv12).height, + (*encoder.yuv_frame).data.as_ptr() as *mut *mut u8, + (*encoder.yuv_frame).linesize.as_ptr() as *const i32, + ); + } + let format_us = t_format.elapsed().as_micros() as u64; + // SAFETY: sw_nv12 was allocated above and is no longer needed. + unsafe { ffi::av_frame_free(&mut sw_nv12) }; + + let encode_us = encode_yuv_frame(&mut encoder, &mut pts)?; + let total_us = frame_start.elapsed().as_micros() as u64; + + stats.import_us.push(import_us); + stats.filter_us.push(filter_us); + stats.transfer_us.push(transfer_us); + stats.format_us.push(format_us); + stats.encode_us.push(encode_us); + stats.total_us.push(total_us); + stats.frames_encoded += 1; + + if stats.frames_encoded <= 3 || stats.frames_encoded % 30 == 0 { + println!( + " GPU frame {:>4}/{frames}: import={:.2}ms filter={:.2}ms transfer={:.2}ms format={:.2}ms encode={:.2}ms total={:.2}ms", + stats.frames_encoded, + import_us as f64 / 1000.0, + filter_us as f64 / 1000.0, + transfer_us as f64 / 1000.0, + format_us as f64 / 1000.0, + encode_us as f64 / 1000.0, + total_us as f64 / 1000.0, + ); + } + } + + finish_encoder(encoder)?; + stats.elapsed_secs = total_start.elapsed().as_secs_f64(); + Ok(stats) +} + +fn print_detailed_results( + label: &str, + stats: &FrameStats, + src_width: u32, + src_height: u32, + enc_width: u32, + enc_height: u32, +) { + println!(); + println!("=== {label} Pipeline Results ==="); + println!("Capture resolution: {}x{}", src_width, src_height); + println!("Encode resolution: {}x{}", enc_width, enc_height); + println!("Frames encoded: {}", stats.frames_encoded); + println!("Total time: {:.2}s", stats.elapsed_secs); + println!("Output: {}", stats.output_path); + if stats.import_failures > 0 { + println!("Import failures: {}", stats.import_failures); + } + println!( + "import avg: {:.2} ms/frame", + FrameStats::avg_ms(&stats.import_us) + ); + if !stats.filter_us.is_empty() { + println!( + "filter avg: {:.2} ms/frame", + FrameStats::avg_ms(&stats.filter_us) + ); + } + println!( + "transfer avg: {:.2} ms/frame", + FrameStats::avg_ms(&stats.transfer_us) + ); + if !stats.scale_us.is_empty() { + println!( + "scale avg: {:.2} ms/frame", + FrameStats::avg_ms(&stats.scale_us) + ); + } + if !stats.format_us.is_empty() { + println!( + "format avg: {:.2} ms/frame", + FrameStats::avg_ms(&stats.format_us) + ); + } + println!( + "encode ({}): {:.2} ms/frame", + stats.codec_name, + FrameStats::avg_ms(&stats.encode_us) + ); + println!("total avg: {:.2} ms/frame", stats.avg_total_ms()); + println!("achieved FPS: {:.1}", stats.achieved_fps()); + println!("max theoretical: {:.1} FPS", stats.theoretical_fps()); +} + +fn print_comparison(cpu: Option<&FrameStats>, gpu: Option<&FrameStats>) { + println!(); + println!("=== Pipeline Comparison ==="); + if let Some(s) = cpu { + println!( + "CPU: import={:.2}ms transfer={:.2}ms scale={:.2}ms encode={:.2}ms total={:.2}ms ({:.1} FPS)", + FrameStats::avg_ms(&s.import_us), + FrameStats::avg_ms(&s.transfer_us), + FrameStats::avg_ms(&s.scale_us), + FrameStats::avg_ms(&s.encode_us), + s.avg_total_ms(), + s.theoretical_fps(), + ); + } + if let Some(s) = gpu { + println!( + "GPU: import={:.2}ms filter={:.2}ms transfer={:.2}ms format={:.2}ms encode={:.2}ms total={:.2}ms ({:.1} FPS)", + FrameStats::avg_ms(&s.import_us), + FrameStats::avg_ms(&s.filter_us), + FrameStats::avg_ms(&s.transfer_us), + FrameStats::avg_ms(&s.format_us), + FrameStats::avg_ms(&s.encode_us), + s.avg_total_ms(), + s.theoretical_fps(), + ); + } +} + +fn main() -> Result<()> { + let bench_args = BenchArgs::parse(); + + println!("=== VAAPI Import Benchmark ==="); + println!("Output: {}", bench_args.output); + println!("Target frames: {}", bench_args.frames); + println!( + "Encode resolution: {}x{}", + bench_args.enc_width, bench_args.enc_height + ); + println!("DRM device: {}", bench_args.drm_device); + println!(); + + ff::init()?; + + println!("[1/3] Requesting screen capture via XDG Portal..."); + println!(" (Select a screen to share in the portal dialog)"); + + let portal_args = Args { + output: bench_args.output.clone(), + output_name: None, + fps: 60, + codec: "h264".to_string(), + hw_accel: "vaapi".to_string(), + drm_device: None, + bitrate: None, + gop_size: None, + verbose: false, + backend: Some("portal".to_string()), + port: 0, + }; + + let cap = CapPortal::new(&portal_args)?; + println!("[1/3] Portal connected, PipeWire stream active\n"); + + println!("[2/3] Waiting for first frame from PipeWire..."); + let first_frame = receive_first_frame(&cap)?; + + let src_width = first_frame.width; + let src_height = first_frame.height; + let src_format = first_frame.format; + + println!( + "[2/3] First frame: {}x{}, format=0x{:08X}, stride={}, modifier=0x{:X}", + src_width, src_height, src_format, first_frame.stride, first_frame.modifier + ); + + println!("\n[2/3] Testing av_hwframe_map with sw_format=BGRA..."); + println!( + " DRM format chain: PipeWire BGRA -> DRM_FORMAT_ARGB8888 (0x{:08X}) -> VA_FOURCC_BGRA -> AV_PIX_FMT_BGRA", + src_format + ); + + let drm_device = Path::new(&bench_args.drm_device); + let hw_dev = AvHwDevCtx::new_vaapi(drm_device)?; + println!(" VAAPI device context created OK"); + + let frames_ctx = + AvHwFrameCtx::for_capture(&hw_dev, src_width, src_height, ff::format::Pixel::BGRA)?; + println!(" VAAPI frames context created OK (sw_format=BGRA)"); + + let vaapi_frame = unsafe { + import_dma_buf_to_vaapi( + frames_ctx.as_ptr(), + first_frame.fd.as_raw_fd(), + first_frame.width, + first_frame.height, + first_frame.format, + first_frame.modifier, + first_frame.stride, + first_frame.offset, + ) + }; + + match &vaapi_frame { + Ok(_) => { + println!(" Result: SUCCESS — av_hwframe_map imported DMA-BUF to VAAPI surface!"); + } + Err(e) => { + println!(" Result: FAILED"); + println!(" Error: {e}"); + println!(); + println!(" Possible causes:"); + println!(" - sw_format mismatch (current: BGRA)"); + println!(" - DRM format modifier not supported by VAAPI"); + println!(" - VAAPI driver doesn't support DMA-BUF import for this format"); + println!(); + println!(" Falling back to mmap readback test for comparison..."); + + let mmap_size = (first_frame.stride as usize) * (first_frame.height as usize); + let mmap_start = Instant::now(); + let mmap_ptr = unsafe { + libc::mmap( + ptr::null_mut(), + mmap_size, + libc::PROT_READ, + libc::MAP_SHARED, + first_frame.fd.as_raw_fd(), + first_frame.offset as i64, + ) + }; + let mmap_elapsed = mmap_start.elapsed(); + + if mmap_ptr == libc::MAP_FAILED { + let errno = std::io::Error::last_os_error(); + println!(" mmap also FAILED: {errno}"); + } else { + println!( + " mmap SUCCESS: {:.1} MB, setup in {:.2}ms", + mmap_size as f64 / 1024.0 / 1024.0, + mmap_elapsed.as_secs_f64() * 1000.0 + ); + unsafe { + libc::munmap(mmap_ptr, mmap_size); + } + } + + println!(); + println!("=== Benchmark ended: av_hwframe_map import FAILED ==="); + println!("Fix the import issue before proceeding to GPU downscale tests."); + return Ok(()); + } + } + + drop(vaapi_frame); + drop(first_frame); + + println!("\n[3/3] Benchmarking selected pipeline(s)..."); + + let enc_width = bench_args.enc_width; + let enc_height = bench_args.enc_height; + let split_outputs = bench_args.mode == PipelineMode::Both; + let mut cpu_stats = None; + let mut gpu_stats = None; + + if matches!(bench_args.mode, PipelineMode::Cpu | PipelineMode::Both) { + let output = output_for_mode(&bench_args.output, PipelineMode::Cpu, split_outputs); + cpu_stats = Some(run_cpu_pipeline( + &cap, + &frames_ctx, + &output, + bench_args.frames, + src_width, + src_height, + enc_width, + enc_height, + )?); + } + + if matches!(bench_args.mode, PipelineMode::Gpu | PipelineMode::Both) { + let output = output_for_mode(&bench_args.output, PipelineMode::Gpu, split_outputs); + gpu_stats = Some(run_gpu_pipeline( + &cap, + &hw_dev, + &frames_ctx, + &output, + bench_args.frames, + src_width, + src_height, + enc_width, + enc_height, + )?); + } + + if let Some(stats) = cpu_stats.as_ref() { + print_detailed_results("CPU", stats, src_width, src_height, enc_width, enc_height); + } + if let Some(stats) = gpu_stats.as_ref() { + print_detailed_results("GPU", stats, src_width, src_height, enc_width, enc_height); + } + print_comparison(cpu_stats.as_ref(), gpu_stats.as_ref()); + + if cpu_stats + .as_ref() + .into_iter() + .chain(gpu_stats.as_ref()) + .any(|stats| stats.achieved_fps() < 30.0 && stats.frames_encoded > 0) + { + println!("NOTE: At least one achieved FPS result is below 30 FPS target."); + } + Ok(()) +} diff --git a/src/cap_portal.rs b/src/cap_portal.rs index cb1495c..2112e21 100644 --- a/src/cap_portal.rs +++ b/src/cap_portal.rs @@ -16,7 +16,7 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::thread::{self, JoinHandle}; use anyhow::Result; -use crossbeam_channel::{Receiver, Sender, bounded}; +use crossbeam_channel::{bounded, Receiver, Sender}; use tokio::runtime::Runtime; use crate::args::Args; @@ -105,9 +105,7 @@ impl CapPortal { // 通过 Portal 获取 PipeWire 连接 fd 和节点 ID // block_on 在此处同步等待异步 Portal 调用完成 - let (pw_fd, node_id) = rt.block_on(async { - Self::setup_portal().await - })?; + let (pw_fd, node_id) = rt.block_on(async { Self::setup_portal().await })?; let (frame_tx, frame_rx) = bounded(3); let (event_tx, event_rx) = bounded(8); @@ -181,9 +179,9 @@ impl CapPortal { use ashpd::desktop::PersistMode; // 创建 Screencast D-Bus 代理,与桌面环境的 Portal 服务通信 - let proxy = Screencast::new().await.map_err(|e| { - anyhow::anyhow!("Failed to create Screencast proxy: {e}") - })?; + let proxy = Screencast::new() + .await + .map_err(|e| anyhow::anyhow!("Failed to create Screencast proxy: {e}"))?; // 创建 ScreenCast 会话(每个会话对应一次屏幕录制请求) let session = proxy @@ -287,10 +285,10 @@ impl Drop for CapPortal { fn pipewire_thread(ctx: PwThreadCtx) { use pipewire as pw; use pw::properties::properties; + use pw::spa::param::video::VideoInfoRaw; use pw::stream::{StreamBox, StreamFlags}; use std::cell::Cell; use std::rc::Rc; - use pw::spa::param::video::VideoInfoRaw; // 初始化 PipeWire 进程全局库。 // @@ -329,9 +327,7 @@ fn pipewire_thread(ctx: PwThreadCtx) { let core = match context.connect_fd(pw_fd, None) { Ok(c) => c, Err(e) => { - let _ = event_tx.try_send(PwCtrlEvent::Error(format!( - "connect_fd failed: {e}" - ))); + let _ = event_tx.try_send(PwCtrlEvent::Error(format!("connect_fd failed: {e}"))); return; } }; @@ -357,8 +353,7 @@ fn pipewire_thread(ctx: PwThreadCtx) { } }; - let format_info: Rc>> = - Rc::new(Cell::new(None)); + let format_info: Rc>> = Rc::new(Cell::new(None)); let event_tx_state = event_tx.clone(); let _listener = stream @@ -366,8 +361,7 @@ fn pipewire_thread(ctx: PwThreadCtx) { .state_changed(move |_, _, old, new| { tracing::debug!("PipeWire stream state: {old:?} -> {new:?}"); match new { - pw::stream::StreamState::Error(_) - | pw::stream::StreamState::Unconnected => { + pw::stream::StreamState::Error(_) | pw::stream::StreamState::Unconnected => { let _ = event_tx_state.try_send(PwCtrlEvent::StreamEnded); } _ => {} @@ -436,7 +430,8 @@ fn pipewire_thread(ctx: PwThreadCtx) { // 从第一个数据项中获取 DMA-BUF 文件描述符 // 通过 libspa 的 Data 包装类型安全地访问 SPA 数据结构 - let data_ref: &pw::spa::buffer::Data = unsafe { &*(datas_ptr as *const pw::spa::buffer::Data) }; + let data_ref: &pw::spa::buffer::Data = + unsafe { &*(datas_ptr as *const pw::spa::buffer::Data) }; let fd = data_ref.fd(); if fd < 0 { unsafe { stream.queue_raw_buffer(raw_buf) }; @@ -462,7 +457,8 @@ fn pipewire_thread(ctx: PwThreadCtx) { for i in 0..n_metas { let meta = &*metas.add(i as usize); if meta.type_ == libspa::sys::SPA_META_Header - && meta.size as usize >= std::mem::size_of::() + && meta.size as usize + >= std::mem::size_of::() && !meta.data.is_null() { let header = &*(meta.data as *const libspa::sys::spa_meta_header); @@ -505,9 +501,7 @@ fn pipewire_thread(ctx: PwThreadCtx) { pts, }; - if let Err(crossbeam_channel::TrySendError::Full(_)) = - frame_tx.try_send(frame) - { + if let Err(crossbeam_channel::TrySendError::Full(_)) = frame_tx.try_send(frame) { let prev = dropped.fetch_add(1, Ordering::Relaxed); if prev > 0 && prev % 30 == 0 { tracing::warn!("dropped {prev} frames total: encoder backlog"); @@ -593,35 +587,65 @@ const fn fourcc(a: u8, b: u8, c: u8, d: u8) -> u32 { /// 此函数建立了两者之间的映射关系。 /// /// 支持的格式: -/// - BGRA/BGRx: 蓝绿红(Alpha/X) 32位格式 -/// - RGBA/RGBx: 红绿蓝(Alpha/X) 32位格式 -/// - ARGB/xRGB: Alpha/X-红绿蓝 32位格式 (映射为 AR24/XR24) -/// - ABGR/xBGR: Alpha/X-蓝绿红 32位格式 (映射为 AB24/XB24) -/// /// 不支持的格式返回 0 +/// DRM 格式名描述像素值位布局(大端序),而非内存字节序。 +/// 例如 DRM_FORMAT_ARGB8888 在小端 x86 上内存为 [B,G,R,A] = PipeWire BGRA。 fn spa_to_drm_fourcc(format: libspa::param::video::VideoFormat) -> u32 { + use drm_fourcc::DrmFourcc; use libspa::param::video::VideoFormat; match format { - VideoFormat::BGRA => fourcc(b'B', b'G', b'R', b'A'), - VideoFormat::BGRx => fourcc(b'B', b'G', b'R', b'X'), - VideoFormat::RGBA => fourcc(b'R', b'G', b'B', b'A'), - VideoFormat::RGBx => fourcc(b'R', b'G', b'B', b'X'), - VideoFormat::ARGB => fourcc(b'A', b'R', b'2', b'4'), - VideoFormat::xRGB => fourcc(b'X', b'R', b'2', b'4'), - VideoFormat::ABGR => fourcc(b'A', b'B', b'2', b'4'), - VideoFormat::xBGR => fourcc(b'X', b'B', b'2', b'4'), - // 不支持的格式返回 0,调用者应检查此值 - _ => 0, } + VideoFormat::BGRA => DrmFourcc::Argb8888 as u32, + VideoFormat::BGRx => DrmFourcc::Xrgb8888 as u32, + VideoFormat::RGBA => DrmFourcc::Abgr8888 as u32, + VideoFormat::RGBx => DrmFourcc::Xbgr8888 as u32, + VideoFormat::ARGB => DrmFourcc::Bgra8888 as u32, + VideoFormat::xRGB => DrmFourcc::Bgrx8888 as u32, + VideoFormat::ABGR => DrmFourcc::Rgba8888 as u32, + VideoFormat::xBGR => DrmFourcc::Rgbx8888 as u32, + _ => 0, + } } #[cfg(test)] mod tests { use super::*; + use drm_fourcc::DrmFourcc; #[test] - fn spa_to_drm_fourcc_bgra() { + fn spa_to_drm_fourcc_all_32bit() { use libspa::param::video::VideoFormat; - assert_eq!(spa_to_drm_fourcc(VideoFormat::BGRA), fourcc(b'B', b'G', b'R', b'A')); + assert_eq!( + spa_to_drm_fourcc(VideoFormat::BGRA), + DrmFourcc::Argb8888 as u32 + ); + assert_eq!( + spa_to_drm_fourcc(VideoFormat::BGRx), + DrmFourcc::Xrgb8888 as u32 + ); + assert_eq!( + spa_to_drm_fourcc(VideoFormat::RGBA), + DrmFourcc::Abgr8888 as u32 + ); + assert_eq!( + spa_to_drm_fourcc(VideoFormat::RGBx), + DrmFourcc::Xbgr8888 as u32 + ); + assert_eq!( + spa_to_drm_fourcc(VideoFormat::ARGB), + DrmFourcc::Bgra8888 as u32 + ); + assert_eq!( + spa_to_drm_fourcc(VideoFormat::xRGB), + DrmFourcc::Bgrx8888 as u32 + ); + assert_eq!( + spa_to_drm_fourcc(VideoFormat::ABGR), + DrmFourcc::Rgba8888 as u32 + ); + assert_eq!( + spa_to_drm_fourcc(VideoFormat::xBGR), + DrmFourcc::Rgbx8888 as u32 + ); } #[test] @@ -629,10 +653,4 @@ mod tests { use libspa::param::video::VideoFormat; assert_eq!(spa_to_drm_fourcc(VideoFormat::NV12), 0); } - - #[test] - fn fourcc_values() { - assert_eq!(fourcc(b'B', b'G', b'R', b'A'), 0x41524742); - assert_eq!(fourcc(b'R', b'G', b'B', b'A'), 0x41424752); - } } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f71fb0c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,9 @@ +pub mod args; +pub mod avhw; +pub mod backend_detect; +pub mod cap_portal; +pub mod cap_wlr_screencopy; +pub mod fps_limit; +pub mod state; +pub mod state_portal; +pub mod transform; diff --git a/src/main.rs b/src/main.rs index 8209b13..2990b12 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,15 +9,15 @@ use wayland_client::globals::registry_queue_init; use wayland_client::Connection; // 各功能模块声明 -mod args; // 命令行参数解析 -mod avhw; // 音视频硬件加速 -mod backend_detect; // 截屏后端自动检测(wlroots vs Portal/PipeWire) -mod cap_portal; // XDG Portal 屏幕捕获 -mod cap_wlr_screencopy; // wlroots wlr-screencopy 截屏协议 -mod fps_limit; // 帧率限制器 -mod state; // wlr-screencopy 后端的主状态机 -mod state_portal; // Portal/PipeWire 后端的主状态机 -mod transform; // 图像变换(旋转/翻转) +mod args; // 命令行参数解析 +mod avhw; // 音视频硬件加速 +mod backend_detect; // 截屏后端自动检测(wlroots vs Portal/PipeWire) +mod cap_portal; // XDG Portal 屏幕捕获 +mod cap_wlr_screencopy; // wlroots wlr-screencopy 截屏协议 +mod fps_limit; // 帧率限制器 +mod state; // wlr-screencopy 后端的主状态机 +mod state_portal; // Portal/PipeWire 后端的主状态机 +mod transform; // 图像变换(旋转/翻转) use crate::args::Args; use crate::cap_wlr_screencopy::CapWlrScreencopy; @@ -65,12 +65,8 @@ fn main() -> Result<()> { // 根据检测结果进入对应的事件循环 match backend { - crate::backend_detect::CaptureBackend::WlrScreencopy => { - run_wlr_screencopy(args) - } - crate::backend_detect::CaptureBackend::PortalPipeWire => { - run_portal_pipewire(args) - } + crate::backend_detect::CaptureBackend::WlrScreencopy => run_wlr_screencopy(args), + crate::backend_detect::CaptureBackend::PortalPipeWire => run_portal_pipewire(args), } } @@ -130,7 +126,7 @@ fn run_wlr_screencopy(args: Args) -> Result<()> { { let mut pfd = libc::pollfd { fd: wayland_fd, - events: libc::POLLIN, // 监听可读事件 + events: libc::POLLIN, // 监听可读事件 revents: 0, }; // timeout=0 表示非阻塞,立即返回当前 fd 状态 @@ -160,8 +156,8 @@ fn run_wlr_screencopy(args: Args) -> Result<()> { // signal_hook_mio 将 Unix 信号转换为 fd 可读事件, // 这样信号也可以通过 epoll 统一监听,不需要单独的信号处理器 let mut signals = signal_hook_mio::v1_0::Signals::new(&[ - signal_hook::consts::SIGINT, // Ctrl+C - signal_hook::consts::SIGTERM, // kill 命令默认信号 + signal_hook::consts::SIGINT, // Ctrl+C + signal_hook::consts::SIGTERM, // kill 命令默认信号 ])?; poll.registry() .register(&mut signals, TOKEN_QUIT, Interest::READABLE)?; @@ -305,11 +301,8 @@ fn run_portal_pipewire(args: Args) -> Result<()> { // 只注册信号 fd,没有 Wayland fd // 所以 poll.poll 在这里只负责检测 SIGINT/SIGTERM // 实际的帧采集完全依赖 poll_and_encode 的轮询 - poll.registry().register( - &mut signals, - mio::Token(1), - mio::Interest::READABLE, - )?; + poll.registry() + .register(&mut signals, mio::Token(1), mio::Interest::READABLE)?; // 主事件循环(超时 10ms,比 wlr-screencopy 更短,因为不依赖 Wayland fd 唤醒) // 10ms 超时的作用是让循环高频转动,以便及时处理 PipeWire 投递的帧 diff --git a/src/state.rs b/src/state.rs index 5e428c0..c2ab75a 100644 --- a/src/state.rs +++ b/src/state.rs @@ -568,11 +568,7 @@ impl State { tracing::error!("compositor copy failed"); let taken = mem::replace(&mut self.in_flight_surface, InFlightSurface::None); match taken { - InFlightSurface::CopyQueued { - buffer, - frame, - .. - } => { + InFlightSurface::CopyQueued { buffer, frame, .. } => { drop(buffer); if let EncConstructionStage::Streaming { cap, .. } = &mut self.stage { cap.on_done_with_frame(frame); @@ -594,7 +590,14 @@ impl State { cap, screencopy_manager, dmabuf, - } => (output_info, output, hw_device_ctx, cap, screencopy_manager, dmabuf), + } => ( + output_info, + output, + hw_device_ctx, + cap, + screencopy_manager, + dmabuf, + ), other => { tracing::warn!("negotiate_format: not in EverythingButFmt stage"); self.stage = other; @@ -604,9 +607,10 @@ impl State { let (output_info, output, hw_device_ctx, cap, screencopy_manager, dmabuf) = stage_data; let drm_path = self.resolve_drm_path(); let fps = self.args.fps; - let bitrate = self.args.bitrate.unwrap_or_else(|| { - 2 * (width as u64) * (height as u64) * (fps as u64) / 100 - }); + let bitrate = self + .args + .bitrate + .unwrap_or_else(|| 2 * (width as u64) * (height as u64) * (fps as u64) / 100); let enc = match crate::avhw::create_encoder( &drm_path, Path::new(&self.args.output), @@ -1199,11 +1203,7 @@ impl Dispatch for State { tracing::error!("DMA-BUF buffer creation failed"); let taken = mem::replace(&mut state.in_flight_surface, InFlightSurface::None); match taken { - InFlightSurface::CopyQueued { - buffer, - frame, - .. - } => { + InFlightSurface::CopyQueued { buffer, frame, .. } => { drop(buffer); if let EncConstructionStage::Streaming { cap, .. } = &mut state.stage { cap.on_done_with_frame(frame); @@ -1239,9 +1239,7 @@ impl Dispatch for State { // types (buffer and/or linux_dmabuf) before buffer_done. We only // support DMA-BUF, so just log and wait for linux_dmabuf / buffer_done. ScreencopyFrameEvent::Buffer { .. } => { - tracing::debug!( - "Received SHM Buffer offer — only DMA-BUF capture is supported" - ); + tracing::debug!("Received SHM Buffer offer — only DMA-BUF capture is supported"); } ScreencopyFrameEvent::LinuxDmabuf { format, diff --git a/src/state_portal.rs b/src/state_portal.rs index 8cdc686..4206092 100644 --- a/src/state_portal.rs +++ b/src/state_portal.rs @@ -1,17 +1,13 @@ // 采集门户状态模块 —— 通过 PipeWire/DMA-BUF 进行屏幕采集并编码 -use std::mem; use std::os::fd::AsRawFd; use std::path::PathBuf; use anyhow::{bail, Result}; -use ffmpeg_next as ff; -use ffmpeg_next::ffi; use crate::args::Args; -use crate::avhw::{self, EncState}; +use crate::avhw::{self, SwEncState}; use crate::cap_portal::{CapPortal, PwCtrlEvent, PwDmaBufFrame}; use crate::fps_limit::FpsLimit; -use crate::transform::Transform; /// 门户采集的阶段状态 /// - WaitingForFormat: 等待接收到第一帧 DMA-BUF 以确定视频格式参数 @@ -28,8 +24,8 @@ enum PortalStage { pub struct StatePortal { /// 当前采集阶段 stage: PortalStage, - /// 硬件编码器状态(第一帧到达后才初始化) - enc: Option, + /// GPU 缩放 + 软件编码器状态(第一帧到达后才初始化) + enc: Option, /// 帧率限制器 fps_limit: FpsLimit<()>, /// PipeWire 屏幕采集端点 @@ -44,6 +40,14 @@ pub struct StatePortal { drm_device: Option, /// 第一帧的时间戳(纳秒),用于计算相对 PTS first_pts_ns: Option, + /// Diagnostic: frames received from PipeWire channel + frames_received: u64, + /// Diagnostic: frames dropped by FPS limiter + frames_fps_dropped: u64, + /// Diagnostic: frames successfully encoded + frames_encoded: u64, + /// Diagnostic: last time we printed stats + last_stats_time: Option, } impl StatePortal { @@ -70,6 +74,10 @@ impl StatePortal { first_frame: true, drm_device, first_pts_ns: None, + frames_received: 0, + frames_fps_dropped: 0, + frames_encoded: 0, + last_stats_time: None, }) } @@ -94,7 +102,11 @@ impl StatePortal { } let frame = match self.cap.frame_receiver().try_recv() { - Ok(frame) => frame, + Ok(frame) => { + self.frames_received += 1; + tracing::debug!("poll_and_encode: got frame #{} from channel", self.frames_received); + frame + } Err(_) => return Ok(false), }; @@ -110,20 +122,35 @@ impl StatePortal { ); let drm_path = self.resolve_drm_device_for_frame(&frame)?; - let enc = avhw::create_encoder( + let (enc_width, enc_height) = portal_encode_dimensions(frame.width, frame.height); + tracing::info!( + "Portal software encode target: {}x{} -> {}x{} @ {} fps", + frame.width, + frame.height, + enc_width, + enc_height, + self.args.fps, + ); + let actual_bitrate = self.args.bitrate.unwrap_or_else(|| { + 2 * (enc_width as u64) * (enc_height as u64) * (self.args.fps as u64) / 100 + }); + let actual_gop_size = self.args.gop_size.unwrap_or(self.args.fps); + + let enc = avhw::SwEncState::new( &drm_path, self.args.output.as_ref(), frame.width, frame.height, + enc_width, + enc_height, self.args.fps, - Transform::Normal, - self.args.bitrate, - self.args.gop_size, - None, + actual_bitrate, + actual_gop_size, )?; self.enc = Some(enc); self.stage = PortalStage::Streaming; + tracing::info!("First frame processed, encoder initialized, transitioning to Streaming"); drop(frame); } PortalStage::Streaming => { @@ -149,160 +176,105 @@ impl StatePortal { match crate::avhw::test_dma_buf_import(candidate, frame) { Ok(()) => { tracing::info!( - "Auto-selected DRM device: {} (can import PipeWire DMA-BUF)", - candidate.display() + "Auto-detected DRM device: {} (tested {} candidates)", + candidate.display(), + candidates.len(), ); self.drm_device = Some(candidate.clone()); return Ok(candidate.clone()); } - Err(err) => { + Err(e) => { tracing::debug!( - "DRM device {} cannot import frame: {err:#}", - candidate.display() + "DRM device {} cannot import DMA-BUF: {e}", + candidate.display(), ); - failures.push(format!("{}: {err:#}", candidate.display())); + failures.push((candidate, e)); } } } bail!( - "No DRM render device can import the PipeWire DMA-BUF frame. \ - Specify --drm-device. Tried: {}", - failures.join("; ") - ) + "No DRM render device can import the DMA-BUF frame. Tried: {}", + failures + .into_iter() + .map(|(p, e)| format!("{} ({e})", p.display())) + .collect::>() + .join(", ") + ); } /// 处理单帧 DMA-BUF 数据 /// - /// 完整的帧处理流水线: - /// 1. 帧率限制(首帧跳过) - /// 2. 构建 DRM 描述符 - /// 3. 分配 DRM_PRIME 源帧 - /// 4. 分配 VAAPI 硬件目标帧 - /// 5. 通过 DMA-BUF 导入将帧数据导入 VAAPI - /// 6. 计算 PTS 时间戳 - /// 7. 回收 DRM 描述符内存 - /// 8. 编码输出 + /// 通过 `av_hwframe_map` 零拷贝导入 VAAPI,然后交给 SwEncState 完成: + /// scale_vaapi GPU 缩放、2K NV12 回读、YUV420P 格式转换、软件 H.264 编码。 fn handle_pw_frame(&mut self, frame: PwDmaBufFrame) -> Result<()> { - // 1. FPS limiting (first frame bypasses) - // 帧率限制(首帧跳过限制,确保立即编码) if self.first_frame { self.first_frame = false; } else { let now = std::time::Instant::now(); if self.fps_limit.on_new_frame((), now).is_none() { + self.frames_fps_dropped += 1; + tracing::debug!("handle_pw_frame: FPS limit, dropping frame (#{})", self.frames_fps_dropped); + self.maybe_print_stats(now); return Ok(()); } } - // 2. Build DRM descriptor for DMA-BUF import - // 根据 DMA-BUF 帧信息构建 FFmpeg DRM 描述符 - let desc = build_drm_descriptor(&frame); - let desc_box = Box::new(desc); + tracing::debug!("handle_pw_frame: processing frame, pts={}", frame.pts); - // 3. Allocate raw DRM_PRIME source frame using Video wrapper - // 分配 DRM_PRIME 格式的源帧,将描述符指针挂载到 data[0] - let mut raw_frame = ff::frame::Video::empty(); - unsafe { - let raw_ptr = raw_frame.as_mut_ptr(); - (*raw_ptr).data[0] = Box::into_raw(desc_box) as *mut u8; - (*raw_ptr).format = ffi::AVPixelFormat::AV_PIX_FMT_DRM_PRIME as i32; - (*raw_ptr).width = frame.width as i32; - (*raw_ptr).height = frame.height as i32; - } - - // 4. Get encoder reference - // 获取编码器引用 let enc = match self.enc.as_mut() { - Some(e) => e, - None => { - // Recover the Box to prevent memory leak of the descriptor - // 编码器未初始化时回收描述符以防止内存泄漏 - unsafe { - let desc_ptr = (*raw_frame.as_ptr()).data[0] as *mut ffi::AVDRMFrameDescriptor; - if !desc_ptr.is_null() { - let _ = Box::from_raw(desc_ptr); - } - (*raw_frame.as_mut_ptr()).data[0] = std::ptr::null_mut(); - } - bail!("encoder not initialized"); - } + Some(enc) => enc, + None => bail!("encoder not initialized"), }; - // 5. Allocate VAAPI hardware target frame - // 分配 VAAPI 硬件帧缓冲区 - let mut hw_frame = ff::frame::Video::empty(); - let ret = unsafe { - ffi::av_hwframe_get_buffer(enc.frames_rgb().as_ptr(), hw_frame.as_mut_ptr(), 0) - }; - if ret < 0 { - // Recover the Box to prevent memory leak of the descriptor - // 分配失败时回收描述符防止内存泄漏 - unsafe { - let desc_ptr = (*raw_frame.as_ptr()).data[0] as *mut ffi::AVDRMFrameDescriptor; - if !desc_ptr.is_null() { - let _ = Box::from_raw(desc_ptr); - } - (*raw_frame.as_mut_ptr()).data[0] = std::ptr::null_mut(); - } - bail!("av_hwframe_get_buffer failed: error {ret}"); - } + // SAFETY: frames_rgb is a live VAAPI frames context configured for capture; frame carries + // valid DMA-BUF fd/format/modifier/stride/offset metadata for the duration of this call. + let mut vaapi_frame = unsafe { + avhw::import_dma_buf_to_vaapi( + enc.frames_rgb().as_ptr(), + frame.fd.as_raw_fd(), + frame.width, + frame.height, + frame.format, + frame.modifier, + frame.stride, + frame.offset, + ) + }?; - // 6. Import DMA-BUF into VAAPI via transfer_data - // 通过 DMA-BUF 导入将帧数据从 DRM 传输到 VAAPI 硬件表面 - let ret = unsafe { - ffi::av_hwframe_transfer_data(hw_frame.as_mut_ptr(), raw_frame.as_ptr(), 0) - }; - if ret < 0 { - // 传输失败时回收描述符防止内存泄漏 - unsafe { - let desc_ptr = (*raw_frame.as_ptr()).data[0] as *mut ffi::AVDRMFrameDescriptor; - if !desc_ptr.is_null() { - let _ = Box::from_raw(desc_ptr); - } - (*raw_frame.as_mut_ptr()).data[0] = std::ptr::null_mut(); - } - if ret == -(ffi::EINVAL as i32) { - bail!( - "VAAPI does not support DMA-BUF modifier 0x{:X}", - frame.modifier - ); - } - bail!("av_hwframe_transfer_data failed: error {ret}"); - } + tracing::debug!("handle_pw_frame: DMA-BUF import OK"); - // 7. Set PTS — convert PipeWire nanoseconds to encoder frame-number units let pts = compute_pts(&mut self.first_pts_ns, frame.pts, self.args.fps); unsafe { - (*hw_frame.as_mut_ptr()).pts = pts; + (*vaapi_frame.as_mut_ptr()).pts = pts; } - // 8. Recover the Boxed descriptor from raw_frame *before* encoding. - // av_hwframe_transfer_data has already imported the DMA-BUF into the - // VAAPI surface, so FFmpeg no longer references the descriptor struct. - // Doing this before encode_frame ensures the descriptor is reclaimed - // even if encode_frame returns early via `?`. - // - // 在编码前回收描述符内存。 - // 此时 DMA-BUF 数据已导入 VAAPI 表面,FFmpeg 不再引用描述符结构体。 - // 在 encode_frame 之前回收确保即使编码返回错误也能正确释放内存。 - unsafe { - let desc_ptr = (*raw_frame.as_ptr()).data[0] as *mut ffi::AVDRMFrameDescriptor; - if !desc_ptr.is_null() { - let _ = Box::from_raw(desc_ptr); - } - (*raw_frame.as_mut_ptr()).data[0] = std::ptr::null_mut(); - } + enc.encode_frame(&vaapi_frame)?; + self.frames_encoded += 1; + tracing::info!("handle_pw_frame: frame #{} encoded OK, pts={}", self.frames_encoded, pts); + + let now = std::time::Instant::now(); + self.maybe_print_stats(now); - // 9. Encode — safe to early-return via `?` now that descriptor is recovered. - // 编码帧数据(此时描述符已回收,可安全通过 `?` 提前返回) - enc.encode_frame(&hw_frame)?; - - // raw_frame and hw_frame drop here via Video::drop → av_frame_free - // raw_frame 和 hw_frame 在此处通过 Video::drop → av_frame_free 释放 Ok(()) } + fn maybe_print_stats(&mut self, now: std::time::Instant) { + let should_print = match self.last_stats_time { + None => true, + Some(last) => now.duration_since(last) >= std::time::Duration::from_secs(2), + }; + if should_print { + self.last_stats_time = Some(now); + tracing::info!( + "STATS: received={}, fps_dropped={}, encoded={}", + self.frames_received, + self.frames_fps_dropped, + self.frames_encoded, + ); + } + } + /// 关闭状态:刷新编码器并清理资源 /// /// 使用 `enc.take()` 确保编码器只被 flush 一次,即使多次调用也安全(幂等)。 @@ -327,28 +299,21 @@ impl Drop for StatePortal { } } -/// 根据 DMA-BUF 帧信息构建 FFmpeg DRM 帧描述符 -/// -/// 将 PipeWire 提供的 DMA-BUF 参数(fd、偏移量、步长、修饰符等) -/// 转换为 FFmpeg 的 AVDRMFrameDescriptor 结构体,用于零拷贝硬件导入。 -fn build_drm_descriptor(frame: &PwDmaBufFrame) -> ffi::AVDRMFrameDescriptor { - let mut desc: ffi::AVDRMFrameDescriptor = unsafe { mem::zeroed() }; +fn portal_encode_dimensions(width: u32, height: u32) -> (u32, u32) { + const TARGET_W: u32 = 2560; + const TARGET_H: u32 = 1440; - // DMA-BUF 对象层:一个 fd 对应一个内存对象 - desc.nb_objects = 1; - desc.objects[0].fd = frame.fd.as_raw_fd(); - desc.objects[0].size = 0; // 大小为 0 表示整个 fd - desc.objects[0].format_modifier = frame.modifier; + if width <= TARGET_W && height <= TARGET_H { + return (width & !1, height & !1); + } - // 像素格式层:单层单平面布局(如 XR24 格式) - desc.nb_layers = 1; - desc.layers[0].format = frame.format; - desc.layers[0].nb_planes = 1; - desc.layers[0].planes[0].object_index = 0; - desc.layers[0].planes[0].offset = frame.offset as isize; - desc.layers[0].planes[0].pitch = frame.stride as isize; - - desc + let width_limited_h = ((height as u64) * (TARGET_W as u64) / (width as u64)) as u32; + if width_limited_h <= TARGET_H { + (TARGET_W & !1, width_limited_h & !1) + } else { + let height_limited_w = ((width as u64) * (TARGET_H as u64) / (height as u64)) as u32; + (height_limited_w & !1, TARGET_H & !1) + } } /// Convert PipeWire nanosecond PTS to encoder frame-number units. @@ -372,6 +337,22 @@ fn resolve_drm_device(args: &Args) -> Result> { Ok(None) } +#[cfg(test)] +fn build_drm_descriptor(frame: &PwDmaBufFrame) -> ffmpeg_next::ffi::AVDRMFrameDescriptor { + let mut desc: ffmpeg_next::ffi::AVDRMFrameDescriptor = unsafe { std::mem::zeroed() }; + desc.nb_objects = 1; + desc.objects[0].fd = frame.fd.as_raw_fd(); + desc.objects[0].size = 0; + desc.objects[0].format_modifier = frame.modifier; + desc.nb_layers = 1; + desc.layers[0].format = frame.format; + desc.layers[0].nb_planes = 1; + desc.layers[0].planes[0].object_index = 0; + desc.layers[0].planes[0].offset = frame.offset as isize; + desc.layers[0].planes[0].pitch = frame.stride as isize; + desc +} + #[cfg(test)] mod tests { use super::*; @@ -426,7 +407,10 @@ mod tests { port: 0, }; let result = resolve_drm_device(&args).unwrap(); - assert_eq!(result, Some(std::path::PathBuf::from("/dev/dri/renderD128"))); + assert_eq!( + result, + Some(std::path::PathBuf::from("/dev/dri/renderD128")) + ); } #[test]