feat: GPU-downscale + software H.264 encode pipeline (WIP)

Add SwEncState in avhw.rs: GPU pipeline using scale_vaapi to downscale
4K BGRA -> 2K NV12 on AMD iGPU, then software encode with libopenh264.

- import_dma_buf_to_vaapi: av_hwframe_map based DMA-BUF import
- SwEncState: GPU filter graph (scale_vaapi) + NV12->YUV420P + libopenh264
- state_portal.rs: integrated SwEncState, auto DRM device detection
- vaapi_import_bench.rs: CPU vs GPU pipeline benchmark
- sw_encode_bench.rs: software encode benchmark

Benchmark results: GPU pipeline ~91 FPS theoretical (10.95ms/frame)
vs CPU pipeline ~33 FPS (30.21ms/frame).

Known issue: only 1 frame encoded in production recording,
diagnostic STATS logging added to debug frame flow.
This commit is contained in:
dailz
2026-05-29 22:04:12 +08:00
parent 55abb5e56d
commit d80b34f44f
9 changed files with 2416 additions and 305 deletions

View File

@@ -568,11 +568,7 @@ impl<S: CaptureSource> State<S> {
tracing::error!("compositor copy failed");
let taken = mem::replace(&mut self.in_flight_surface, InFlightSurface::None);
match taken {
InFlightSurface::CopyQueued {
buffer,
frame,
..
} => {
InFlightSurface::CopyQueued { buffer, frame, .. } => {
drop(buffer);
if let EncConstructionStage::Streaming { cap, .. } = &mut self.stage {
cap.on_done_with_frame(frame);
@@ -594,7 +590,14 @@ impl<S: CaptureSource> State<S> {
cap,
screencopy_manager,
dmabuf,
} => (output_info, output, hw_device_ctx, cap, screencopy_manager, dmabuf),
} => (
output_info,
output,
hw_device_ctx,
cap,
screencopy_manager,
dmabuf,
),
other => {
tracing::warn!("negotiate_format: not in EverythingButFmt stage");
self.stage = other;
@@ -604,9 +607,10 @@ impl<S: CaptureSource> State<S> {
let (output_info, output, hw_device_ctx, cap, screencopy_manager, dmabuf) = stage_data;
let drm_path = self.resolve_drm_path();
let fps = self.args.fps;
let bitrate = self.args.bitrate.unwrap_or_else(|| {
2 * (width as u64) * (height as u64) * (fps as u64) / 100
});
let bitrate = self
.args
.bitrate
.unwrap_or_else(|| 2 * (width as u64) * (height as u64) * (fps as u64) / 100);
let enc = match crate::avhw::create_encoder(
&drm_path,
Path::new(&self.args.output),
@@ -1199,11 +1203,7 @@ impl<S: CaptureSource> Dispatch<ZwpLinuxBufferParamsV1, ()> for State<S> {
tracing::error!("DMA-BUF buffer creation failed");
let taken = mem::replace(&mut state.in_flight_surface, InFlightSurface::None);
match taken {
InFlightSurface::CopyQueued {
buffer,
frame,
..
} => {
InFlightSurface::CopyQueued { buffer, frame, .. } => {
drop(buffer);
if let EncConstructionStage::Streaming { cap, .. } = &mut state.stage {
cap.on_done_with_frame(frame);
@@ -1239,9 +1239,7 @@ impl Dispatch<ZwlrScreencopyFrameV1, ()> for State<CapWlrScreencopy> {
// types (buffer and/or linux_dmabuf) before buffer_done. We only
// support DMA-BUF, so just log and wait for linux_dmabuf / buffer_done.
ScreencopyFrameEvent::Buffer { .. } => {
tracing::debug!(
"Received SHM Buffer offer — only DMA-BUF capture is supported"
);
tracing::debug!("Received SHM Buffer offer — only DMA-BUF capture is supported");
}
ScreencopyFrameEvent::LinuxDmabuf {
format,