feat: GPU-downscale + software H.264 encode pipeline (WIP)

Add SwEncState in avhw.rs: GPU pipeline using scale_vaapi to downscale
4K BGRA -> 2K NV12 on AMD iGPU, then software encode with libopenh264.

- import_dma_buf_to_vaapi: av_hwframe_map based DMA-BUF import
- SwEncState: GPU filter graph (scale_vaapi) + NV12->YUV420P + libopenh264
- state_portal.rs: integrated SwEncState, auto DRM device detection
- vaapi_import_bench.rs: CPU vs GPU pipeline benchmark
- sw_encode_bench.rs: software encode benchmark

Benchmark results: GPU pipeline ~91 FPS theoretical (10.95ms/frame)
vs CPU pipeline ~33 FPS (30.21ms/frame).

Known issue: only 1 frame encoded in production recording,
diagnostic STATS logging added to debug frame flow.
This commit is contained in:
dailz
2026-05-29 22:04:12 +08:00
parent 55abb5e56d
commit d80b34f44f
9 changed files with 2416 additions and 305 deletions

View File

@@ -9,15 +9,15 @@ use wayland_client::globals::registry_queue_init;
use wayland_client::Connection;
// 各功能模块声明
mod args; // 命令行参数解析
mod avhw; // 音视频硬件加速
mod backend_detect; // 截屏后端自动检测wlroots vs Portal/PipeWire
mod cap_portal; // XDG Portal 屏幕捕获
mod cap_wlr_screencopy; // wlroots wlr-screencopy 截屏协议
mod fps_limit; // 帧率限制器
mod state; // wlr-screencopy 后端的主状态机
mod state_portal; // Portal/PipeWire 后端的主状态机
mod transform; // 图像变换(旋转/翻转)
mod args; // 命令行参数解析
mod avhw; // 音视频硬件加速
mod backend_detect; // 截屏后端自动检测wlroots vs Portal/PipeWire
mod cap_portal; // XDG Portal 屏幕捕获
mod cap_wlr_screencopy; // wlroots wlr-screencopy 截屏协议
mod fps_limit; // 帧率限制器
mod state; // wlr-screencopy 后端的主状态机
mod state_portal; // Portal/PipeWire 后端的主状态机
mod transform; // 图像变换(旋转/翻转)
use crate::args::Args;
use crate::cap_wlr_screencopy::CapWlrScreencopy;
@@ -65,12 +65,8 @@ fn main() -> Result<()> {
// 根据检测结果进入对应的事件循环
match backend {
crate::backend_detect::CaptureBackend::WlrScreencopy => {
run_wlr_screencopy(args)
}
crate::backend_detect::CaptureBackend::PortalPipeWire => {
run_portal_pipewire(args)
}
crate::backend_detect::CaptureBackend::WlrScreencopy => run_wlr_screencopy(args),
crate::backend_detect::CaptureBackend::PortalPipeWire => run_portal_pipewire(args),
}
}
@@ -130,7 +126,7 @@ fn run_wlr_screencopy(args: Args) -> Result<()> {
{
let mut pfd = libc::pollfd {
fd: wayland_fd,
events: libc::POLLIN, // 监听可读事件
events: libc::POLLIN, // 监听可读事件
revents: 0,
};
// timeout=0 表示非阻塞,立即返回当前 fd 状态
@@ -160,8 +156,8 @@ fn run_wlr_screencopy(args: Args) -> Result<()> {
// signal_hook_mio 将 Unix 信号转换为 fd 可读事件,
// 这样信号也可以通过 epoll 统一监听,不需要单独的信号处理器
let mut signals = signal_hook_mio::v1_0::Signals::new(&[
signal_hook::consts::SIGINT, // Ctrl+C
signal_hook::consts::SIGTERM, // kill 命令默认信号
signal_hook::consts::SIGINT, // Ctrl+C
signal_hook::consts::SIGTERM, // kill 命令默认信号
])?;
poll.registry()
.register(&mut signals, TOKEN_QUIT, Interest::READABLE)?;
@@ -305,11 +301,8 @@ fn run_portal_pipewire(args: Args) -> Result<()> {
// 只注册信号 fd没有 Wayland fd
// 所以 poll.poll 在这里只负责检测 SIGINT/SIGTERM
// 实际的帧采集完全依赖 poll_and_encode 的轮询
poll.registry().register(
&mut signals,
mio::Token(1),
mio::Interest::READABLE,
)?;
poll.registry()
.register(&mut signals, mio::Token(1), mio::Interest::READABLE)?;
// 主事件循环(超时 10ms比 wlr-screencopy 更短,因为不依赖 Wayland fd 唤醒)
// 10ms 超时的作用是让循环高频转动,以便及时处理 PipeWire 投递的帧