diff --git a/Cargo.lock b/Cargo.lock index e8926df..95b68a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -63,8 +63,8 @@ dependencies = [ [[package]] name = "ash" -version = "0.38.0+1.3.296" -source = "git+https://github.com/ash-rs/ash?branch=master#c9292cf1b3fb70e4416fe9c47d6d938bd80d52b4" +version = "0.38.0+1.4.329" +source = "git+https://github.com/ash-rs/ash?rev=55dd56906bbb5760e9e9e6c56f45be67f67e0649#55dd56906bbb5760e9e9e6c56f45be67f67e0649" dependencies = [ "libloading", ] diff --git a/Cargo.toml b/Cargo.toml index fb60865..6f6b80a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ default = [] dmabuf = [] [dependencies] -ash = { git = "https://github.com/ash-rs/ash", branch = "master", features = ["loaded"] } +ash = { git = "https://github.com/ash-rs/ash", rev = "55dd56906bbb5760e9e9e6c56f45be67f67e0649", features = ["loaded"] } thiserror = "1.0" tracing = "0.1" shaderc = "0.8" diff --git a/README.md b/README.md index bc6ee41..fb24367 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ # PixelForge -A Vulkan-based video encoding library for Rust, supporting H.264 and H.265 codecs. +A Vulkan-based video encoding library for Rust, supporting H.264, H.265, and AV1 codecs. > ⚠️ **Disclaimer**: This library was developed using AI ("vibe-coding") - partly to > see if it could be done, partly because I have practically zero experience with Vulkan. @@ -14,7 +14,7 @@ A Vulkan-based video encoding library for Rust, supporting H.264 and H.265 codec ## Features - **Hardware-accelerated** video encoding using Vulkan Video extensions. -- **Multiple codec support**: H.264/AVC, H.265/HEVC. +- **Multiple codec support**: H.264/AVC, H.265/HEVC, AV1. - **GPU-native API**: Encode directly from Vulkan images (`vk::Image`). - **Flexible configuration**: Rate control (CBR, VBR, CQP), quality levels, GOP settings. - **Utility helpers**: [`InputImage`] for easy YUV data upload to GPU. @@ -28,6 +28,12 @@ A Vulkan-based video encoding library for Rust, supporting H.264 and H.265 codec |-------|--------| | H.264/AVC | ✓ | | H.265/HEVC | ✓ | +| AV1 | ✓ (experimental) | + +> ⚠️ **AV1 Warning**: AV1 encoding is experimental. On NVIDIA GPUs, P-frames cannot +> reference other P-frames, causing all P-frames to reference the I-frame instead. This +> leads to progressively larger frame sizes over time. Consider using H.264 or HEVC +> until this is resolved. ## Requirements @@ -67,7 +73,7 @@ fn main() -> Result<(), Box> { .app_name("My App") .build()?; - for codec in [Codec::H264, Codec::H265] { + for codec in [Codec::H264, Codec::H265, Codec::AV1] { println!("{:?}: encode={}", codec, context.supports_encode(codec) @@ -130,13 +136,15 @@ cargo run --example encode_h264 # H.265 encoding example cargo run --example encode_h265 + +# AV1 encoding example +cargo run --example encode_av1 ``` ## TODO's 1. [] Decoding. 1. [] B-frames support. -1. [] AV1 support (depends on a new version of ash with more up-to-date Vulkan support). ## Contributing diff --git a/examples/encode_av1.rs b/examples/encode_av1.rs new file mode 100644 index 0000000..2e08bca --- /dev/null +++ b/examples/encode_av1.rs @@ -0,0 +1,130 @@ +//! Example: AV1 Video Encoding +//! +//! Demonstrates AV1 video encoding using PixelForge with Vulkan Video. +//! Loads raw YUV420 frames from `testdata/test_frames.yuv`. + +use pixelforge::{ + Codec, EncodeBitDepth, EncodeConfig, Encoder, InputImage, PixelFormat, RateControlMode, + VideoContextBuilder, +}; +use std::fs::File; +use std::io::{Read, Write}; +use std::path::Path; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, Layer}; + +const TEST_FRAMES_PATH: &str = "testdata/test_frames.yuv"; +const WIDTH: u32 = 320; +const HEIGHT: u32 = 240; + +fn main() -> Result<(), Box> { + // Initialize tracing with RUST_LOG support. + tracing_subscriber::registry() + .with( + tracing_subscriber::fmt::layer().with_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("warn")), + ), + ) + .init(); + + println!("PixelForge AV1 Encode Example\n"); + + // Load test frames. + let test_path = Path::new(TEST_FRAMES_PATH); + if !test_path.exists() { + eprintln!("Test frames not found at '{TEST_FRAMES_PATH}'"); + eprintln!("Generate with: ffmpeg -f lavfi -i testsrc=duration=0.5:size=320x240:rate=30 -pix_fmt yuv420p -f rawvideo testdata/test_frames.yuv"); + return Ok(()); + } + + let mut yuv_data = Vec::new(); + File::open(test_path)?.read_to_end(&mut yuv_data)?; + + let frame_size = (WIDTH * HEIGHT * 3 / 2) as usize; + let num_frames = yuv_data.len() / frame_size; + println!( + "Input: {num_frames} frames, {WIDTH}x{HEIGHT} YUV420, {} bytes", + yuv_data.len() + ); + + // Create video context. + let context = VideoContextBuilder::new() + .app_name("AV1 Encode Example") + .enable_validation(cfg!(debug_assertions)) + .require_encode(Codec::AV1) + .build()?; + + if !context.supports_encode(Codec::AV1) { + eprintln!("AV1 encode not supported"); + return Ok(()); + } + + // Configure encoder. + let config = EncodeConfig::av1(WIDTH, HEIGHT) + .with_rate_control(RateControlMode::Cqp) + .with_quality_level(26) + .with_frame_rate(30, 1) + .with_gop_size(30) + .with_b_frames(0); + + println!( + "Config: {:?}, QP={}, GOP={}, B-frames={}\n", + config.rate_control_mode, config.quality_level, config.gop_size, config.b_frame_count + ); + + // Create input image for uploading frames. + let mut input_image = InputImage::new( + context.clone(), + Codec::AV1, + WIDTH, + HEIGHT, + EncodeBitDepth::Eight, + PixelFormat::Yuv420, + )?; + let mut encoder = Encoder::new(context, config)?; + let mut output = File::create("output.av1")?; + let mut total_bytes = 0; + + // Encode frames. + for i in 0..num_frames { + let frame = &yuv_data[i * frame_size..(i + 1) * frame_size]; + + // Upload YUV420 data to the input image. + input_image.upload_yuv420(frame)?; + + // Encode the image (passing InputImage's image, which triggers + // an internal copy to the encoder's input image with proper + // layout transitions). + for packet in encoder.encode(input_image.image())? { + total_bytes += packet.data.len(); + output.write_all(&packet.data)?; + println!( + " pts={:<2} dts={:<2}: {:>5} bytes, {:?}{}", + packet.pts, + packet.dts, + packet.data.len(), + packet.frame_type, + if packet.is_key_frame { " [KEY]" } else { "" } + ); + } + } + + // Flush remaining frames. + for packet in encoder.flush()? { + total_bytes += packet.data.len(); + output.write_all(&packet.data)?; + println!( + " pts={:<2} dts={:<2}: {:>5} bytes, {:?} (flushed)", + packet.pts, + packet.dts, + packet.data.len(), + packet.frame_type + ); + } + + let ratio = (num_frames * frame_size) as f64 / total_bytes as f64; + println!("\nEncoded {num_frames} frames, {total_bytes} bytes, {ratio:.1}:1 compression"); + println!("Output: output.av1"); + + Ok(()) +} diff --git a/examples/encode_h264.rs b/examples/encode_h264.rs index b742b8a..64bfc24 100644 --- a/examples/encode_h264.rs +++ b/examples/encode_h264.rs @@ -20,8 +20,10 @@ fn main() -> Result<(), Box> { // Initialize tracing. tracing_subscriber::registry() .with( - tracing_subscriber::fmt::layer() - .with_filter(tracing_subscriber::filter::LevelFilter::INFO), + tracing_subscriber::fmt::layer().with_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("warn")), + ), ) .init(); diff --git a/examples/encode_h265.rs b/examples/encode_h265.rs index 2a5f468..1ac971c 100644 --- a/examples/encode_h265.rs +++ b/examples/encode_h265.rs @@ -20,8 +20,10 @@ fn main() -> Result<(), Box> { // Initialize tracing. tracing_subscriber::registry() .with( - tracing_subscriber::fmt::layer() - .with_filter(tracing_subscriber::filter::LevelFilter::INFO), + tracing_subscriber::fmt::layer().with_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("warn")), + ), ) .init(); diff --git a/examples/query_capabilities.rs b/examples/query_capabilities.rs index 8f9409f..8a26427 100644 --- a/examples/query_capabilities.rs +++ b/examples/query_capabilities.rs @@ -5,9 +5,19 @@ use ash::vk; use pixelforge::{Codec, VideoContextBuilder}; -use std::ffi::CStr; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, Layer}; fn main() -> Result<(), Box> { + // Initialize tracing. + tracing_subscriber::registry() + .with( + tracing_subscriber::fmt::layer().with_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("warn")), + ), + ) + .init(); + println!("PixelForge Codec Capabilities Example"); println!("======================================\n"); diff --git a/examples/verify_all.rs b/examples/verify_all.rs index 9eefaec..dd80a63 100644 --- a/examples/verify_all.rs +++ b/examples/verify_all.rs @@ -1,6 +1,6 @@ //! Example: Verify all encoding combinations //! -//! Verifies H.264/H.265, 8-bit/10-bit, YUV420/YUV444 combinations. +//! Verifies H.264/H.265/AV1, 8-bit/10-bit, YUV420/YUV444 combinations. //! Runs PSNR analysis for each combination. use pixelforge::{ @@ -21,14 +21,19 @@ fn main() -> Result<(), Box> { // Initialize tracing. tracing_subscriber::registry() .with( - tracing_subscriber::fmt::layer() - .with_filter(tracing_subscriber::EnvFilter::from_default_env()), + tracing_subscriber::fmt::layer().with_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("warn")), + ), ) .init(); - // Ensure test data exists - ensure_test_data("yuv420p", "testdata/test_frames_yuv420p.yuv")?; - ensure_test_data("yuv444p", "testdata/test_frames_yuv444p.yuv")?; + // Ensure test data exists (dimensions encoded in filename to avoid stale data + // when switching between branches with different WIDTH/HEIGHT constants). + let yuv420_path = format!("testdata/test_frames_{}x{}_yuv420p.yuv", WIDTH, HEIGHT); + let yuv444_path = format!("testdata/test_frames_{}x{}_yuv444p.yuv", WIDTH, HEIGHT); + ensure_test_data("yuv420p", &yuv420_path)?; + ensure_test_data("yuv444p", &yuv444_path)?; let combinations = [ (Codec::H264, EncodeBitDepth::Eight, PixelFormat::Yuv420), @@ -39,6 +44,10 @@ fn main() -> Result<(), Box> { (Codec::H265, EncodeBitDepth::Eight, PixelFormat::Yuv444), (Codec::H265, EncodeBitDepth::Ten, PixelFormat::Yuv420), (Codec::H265, EncodeBitDepth::Ten, PixelFormat::Yuv444), + (Codec::AV1, EncodeBitDepth::Eight, PixelFormat::Yuv420), + (Codec::AV1, EncodeBitDepth::Eight, PixelFormat::Yuv444), + (Codec::AV1, EncodeBitDepth::Ten, PixelFormat::Yuv420), + (Codec::AV1, EncodeBitDepth::Ten, PixelFormat::Yuv444), ]; let context = VideoContextBuilder::new() @@ -101,7 +110,10 @@ fn run_test( depth: EncodeBitDepth, format: PixelFormat, ) -> Result> { - let output_filename = format!("output_{:?}_{:?}_{:?}.bin", codec, depth, format); + // AV1 uses .obu extension for raw OBU streams (with temporal delimiters). + // H.264/H.265 use .bin for raw Annex B bitstreams. + let output_ext = if codec == Codec::AV1 { "obu" } else { "bin" }; + let output_filename = format!("output_{:?}_{:?}_{:?}.{}", codec, depth, format, output_ext); let decoded_filename = format!("decoded_{:?}_{:?}_{:?}.yuv", codec, depth, format); // 1. Encode @@ -109,7 +121,7 @@ fn run_test( let config = match codec { Codec::H264 => EncodeConfig::h264(WIDTH, HEIGHT), Codec::H265 => EncodeConfig::h265(WIDTH, HEIGHT), - _ => return Err("Unsupported codec".into()), + Codec::AV1 => EncodeConfig::av1(WIDTH, HEIGHT), } .with_rate_control(RateControlMode::Cqp) .with_quality_level(10) @@ -125,13 +137,13 @@ fn run_test( InputImage::new(context.clone(), codec, WIDTH, HEIGHT, depth, format)?; let input_path = match format { - PixelFormat::Yuv420 => "testdata/test_frames_yuv420p.yuv", - PixelFormat::Yuv444 => "testdata/test_frames_yuv444p.yuv", + PixelFormat::Yuv420 => format!("testdata/test_frames_{}x{}_yuv420p.yuv", WIDTH, HEIGHT), + PixelFormat::Yuv444 => format!("testdata/test_frames_{}x{}_yuv444p.yuv", WIDTH, HEIGHT), _ => return Err("Unsupported format".into()), }; let mut yuv_data = Vec::new(); - File::open(input_path)?.read_to_end(&mut yuv_data)?; + File::open(&input_path)?.read_to_end(&mut yuv_data)?; let frame_size = match format { PixelFormat::Yuv420 => (WIDTH * HEIGHT * 3 / 2) as usize, @@ -149,31 +161,20 @@ fn run_test( } let frame = &yuv_data[start..end]; + // Upload directly to encoder's input image to avoid cross-queue + // copy issues (InputImage uses the transfer queue, encoder uses the + // video encode queue which doesn't support transfer ops). + let encoder_image = encoder.input_image(); match format { - PixelFormat::Yuv420 => input_image.upload_yuv420(frame)?, - PixelFormat::Yuv444 => { - // Bypass InputImage's internal image and upload directly to encoder's image - // to avoid potential issues with vkCmdCopyImage between images. - let encoder_image = encoder.input_image(); - input_image.upload_yuv444_to(encoder_image, frame)?; - } + PixelFormat::Yuv420 => input_image.upload_yuv420_to(encoder_image, frame)?, + PixelFormat::Yuv444 => input_image.upload_yuv444_to(encoder_image, frame)?, _ => return Err("Unsupported format".into()), } - // For YUV444, we uploaded directly to encoder image. - // For YUV420, we uploaded to input_image.image(). - let src_image = match format { - PixelFormat::Yuv420 => input_image.image(), - PixelFormat::Yuv444 => encoder.input_image(), - _ => return Err("Unsupported format".into()), - }; - - for packet in encoder.encode(src_image)? { + for packet in encoder.encode(encoder_image)? { output_file.write_all(&packet.data)?; } } - // Flush? The encoder doesn't seem to have a flush method in the example, - // but usually we just stop feeding frames. } // 2. Decode to raw YUV @@ -191,8 +192,14 @@ fn run_test( // Let's decode to the input format (8-bit). let (input_pix_fmt, input_path) = match format { - PixelFormat::Yuv420 => ("yuv420p", "testdata/test_frames_yuv420p.yuv"), - PixelFormat::Yuv444 => ("yuv444p", "testdata/test_frames_yuv444p.yuv"), + PixelFormat::Yuv420 => ( + "yuv420p", + format!("testdata/test_frames_{}x{}_yuv420p.yuv", WIDTH, HEIGHT), + ), + PixelFormat::Yuv444 => ( + "yuv444p", + format!("testdata/test_frames_{}x{}_yuv444p.yuv", WIDTH, HEIGHT), + ), _ => return Err("Unsupported format".into()), }; @@ -233,7 +240,7 @@ fn run_test( "-f", "rawvideo", "-i", - input_path, + &input_path, "-s", &format!("{}x{}", WIDTH, HEIGHT), "-pix_fmt", diff --git a/src/encoder/av1/api.rs b/src/encoder/av1/api.rs new file mode 100644 index 0000000..8dca6fe --- /dev/null +++ b/src/encoder/av1/api.rs @@ -0,0 +1,210 @@ +use super::AV1Encoder; + +use crate::encoder::gop::{GopFrameType, GopPosition}; +use crate::encoder::EncodedPacket; +use crate::error::{PixelForgeError, Result}; +use ash::vk; +use tracing::debug; + +impl AV1Encoder { + /// Get the internal input image. + /// + /// This image can be used as a target for `ColorConverter::convert` to avoid + /// an intermediate copy. + pub fn input_image(&self) -> vk::Image { + self.input_image + } + + /// Encode a frame from a GPU image. + /// + /// This accepts a source image on the GPU and encodes it directly without + /// any CPU-side data copies. The source image must be in the correct format + /// with the same dimensions as the encoder configuration, and should be in GENERAL layout. + /// + /// # Panics + /// + /// The encoder will panic at creation time if B-frames are enabled (b_frame_count > 0), + /// as B-frame encoding is not yet supported. + pub fn encode(&mut self, src_image: vk::Image) -> Result> { + let gop_position = self.gop.get_next_frame(); + let display_order = self.input_frame_num; + self.input_frame_num += 1; + + debug!( + "AV1 encode: frame {} from GPU image, type={:?}", + display_order, gop_position.frame_type + ); + + // Upload from GPU image. + self.upload_from_image(src_image)?; + + // Encode immediately. + let packet = self.encode_current_frame(&gop_position, display_order)?; + + Ok(vec![packet]) + } + + /// Internal method to encode the current frame already uploaded to input_image. + fn encode_current_frame( + &mut self, + gop_position: &GopPosition, + display_order: u64, + ) -> Result { + let is_key_frame = + gop_position.frame_type.is_idr() || gop_position.frame_type == GopFrameType::I; + let is_reference = gop_position.is_reference; + let frame_type = match gop_position.frame_type { + GopFrameType::Idr | GopFrameType::I => crate::encoder::FrameType::I, + GopFrameType::P => crate::encoder::FrameType::P, + GopFrameType::B => crate::encoder::FrameType::B, + }; + + debug!( + "Encoding frame: display_order={}, type={:?}, key={}, ref={}", + display_order, frame_type, is_key_frame, is_reference + ); + + if is_key_frame { + self.frame_num = 0; + self.order_hint = 0; + // Reset references for key frames. + self.references.clear(); + // Reset DPB slot activation tracking on key frame - all slots become inactive. + for active in &mut self.dpb_slot_active { + *active = false; + } + } + + let mut encoded_data = Vec::new(); + + // AV1 Temporal Delimiter OBU: type=2, has_size=1, size=0. + // Required as the first OBU in each temporal unit for conformant bitstreams. + // This enables ffmpeg's AV1 demuxer to detect frame boundaries in raw OBU streams. + encoded_data.extend_from_slice(&[0x12, 0x00]); + + // For key frames, prepend the AV1 Sequence Header OBU. + // This is required for AV1 decoders to initialize (equivalent to H.265 VPS/SPS/PPS). + if is_key_frame { + if self.header_data.is_none() { + let header = self.get_av1_sequence_header()?; + debug!( + "AV1 sequence header ({} bytes): {:02X?}", + header.len(), + &header[..std::cmp::min(32, header.len())] + ); + self.header_data = Some(header); + } + if let Some(ref header) = self.header_data { + encoded_data.extend_from_slice(header); + } + } + + encoded_data.extend_from_slice(&self.encode_frame_internal(gop_position, is_key_frame)?); + + // Save the order_hint used during encoding BEFORE incrementing. + let encoded_order_hint = self.order_hint; + self.encode_frame_num += 1; + self.frame_num += 1; + self.order_hint = (self.order_hint + 1) & 0xFF; // 8-bit order hint + + // Only KEY frames are stored as references. P frames all reference the KEY frame + // and don't update any reference buffer, avoiding P→P which produces corrupt output + // on NVIDIA AV1 encoders. + if is_key_frame { + let ref_info = super::ReferenceInfo { + dpb_slot: self.current_dpb_slot, + order_hint: encoded_order_hint, + frame_type: ash::vk::native::StdVideoAV1FrameType_STD_VIDEO_AV1_FRAME_TYPE_KEY, + }; + self.references.clear(); + self.references.push(ref_info); + + // KEY frame uses the current DPB slot; pick a different one for P frames. + let used_slots: Vec = self.references.iter().map(|r| r.dpb_slot).collect(); + for i in 0..self.dpb_slot_count as u8 { + if !used_slots.contains(&i) { + self.current_dpb_slot = i; + break; + } + } + } + // P frames reuse the same scratch DPB slot (current_dpb_slot stays unchanged + // between P frames since it's always different from the KEY frame's slot). + + Ok(EncodedPacket { + data: encoded_data, + frame_type, + is_key_frame, + pts: display_order, + dts: self.encode_frame_num - 1, + }) + } + + /// Flush the encoder and get any remaining packets. + pub fn flush(&mut self) -> Result> { + // No buffered frames in the current implementation. + Ok(Vec::new()) + } + + /// Request that the next frame be an IDR/key frame. + pub fn request_idr(&mut self) { + self.gop.request_idr(); + } + + /// Retrieve encoded AV1 Sequence Header OBU from video session parameters. + /// + /// Uses vkGetEncodedVideoSessionParametersKHR to get the driver-generated OBU. + /// The driver's sequence header must be used because the frame OBUs it produces + /// reference values from its internal sequence header (not ours). + fn get_av1_sequence_header(&self) -> Result> { + let get_info = vk::VideoEncodeSessionParametersGetInfoKHR { + video_session_parameters: self.session_params, + ..Default::default() + }; + + let mut data = vec![0u8; 4096]; + let mut data_size: usize = data.len(); + let mut feedback = vk::VideoEncodeSessionParametersFeedbackInfoKHR::default(); + + let mut attempts = 0; + loop { + attempts += 1; + let result = unsafe { + (self + .video_encode_fn + .fp() + .get_encoded_video_session_parameters_khr)( + self.context.device().handle(), + &get_info, + &mut feedback, + &mut data_size, + data.as_mut_ptr() as *mut std::ffi::c_void, + ) + }; + + match result { + vk::Result::SUCCESS => { + if data_size == 0 { + return Err(PixelForgeError::SessionParametersCreation( + "AV1 sequence header size is 0".to_string(), + )); + } + data.truncate(data_size); + debug!("Retrieved AV1 sequence header: {} bytes", data.len()); + return Ok(data); + } + vk::Result::INCOMPLETE if attempts < 3 => { + let new_size = data_size.max(data.len() * 2).max(1); + data.resize(new_size, 0); + data_size = data.len(); + } + err => { + return Err(PixelForgeError::SessionParametersCreation(format!( + "Failed to get AV1 sequence header: {:?}", + err + ))); + } + } + } + } +} diff --git a/src/encoder/av1/encode.rs b/src/encoder/av1/encode.rs new file mode 100644 index 0000000..9ac5fa1 --- /dev/null +++ b/src/encoder/av1/encode.rs @@ -0,0 +1,552 @@ +use super::AV1Encoder; + +use crate::encoder::gop::GopPosition; +use crate::encoder::resources::{ + prepare_encode_command_buffer, record_dpb_barriers, record_post_encode_dpb_barrier, + submit_encode_and_read_bitstream, +}; +use crate::error::{PixelForgeError, Result}; +use ash::vk; +use tracing::debug; + +impl AV1Encoder { + pub(super) fn encode_frame_internal( + &mut self, + _gop_position: &GopPosition, + is_key_frame: bool, + ) -> Result> { + // All frames need a setup reference slot (DPB write) per Vulkan spec when maxDpbSlots > 0. + let is_reference = true; + + debug!( + "encode_frame_internal: key={}, ref={}, refs_len={}, dpb_slot={}", + is_key_frame, + is_reference, + self.references.len(), + self.current_dpb_slot + ); + + // Rate control setup (matches H265 pattern: CQP/Disabled uses DISABLED mode). + let (rc_mode, average_bitrate, max_bitrate, qp) = match self.config.rate_control_mode { + crate::encoder::RateControlMode::Cqp | crate::encoder::RateControlMode::Disabled => ( + vk::VideoEncodeRateControlModeFlagsKHR::DISABLED, + 0, + 0, + self.config.quality_level, + ), + crate::encoder::RateControlMode::Cbr => ( + vk::VideoEncodeRateControlModeFlagsKHR::CBR, + self.config.target_bitrate, + self.config.target_bitrate, + 128u32, + ), + crate::encoder::RateControlMode::Vbr => ( + vk::VideoEncodeRateControlModeFlagsKHR::VBR, + self.config.target_bitrate, + self.config.max_bitrate, + 128u32, + ), + }; + + // Prepare command buffer for recording. + unsafe { + prepare_encode_command_buffer( + self.context.device(), + self.encode_command_buffer, + self.query_pool, + )?; + } + + // Transition DPB images for encode. + let ref_dpb_slots: Vec = self.references.iter().map(|r| r.dpb_slot).collect(); + unsafe { + record_dpb_barriers( + self.context.device(), + self.encode_command_buffer, + &self.dpb_images, + false, // AV1 does not use layered DPB + self.current_dpb_slot, + &ref_dpb_slots, + self.dpb_slot_active[self.current_dpb_slot as usize], + ); + } + + // AV1 frame type. + let frame_type = if is_key_frame { + ash::vk::native::StdVideoAV1FrameType_STD_VIDEO_AV1_FRAME_TYPE_KEY + } else { + ash::vk::native::StdVideoAV1FrameType_STD_VIDEO_AV1_FRAME_TYPE_INTER + }; + + // Build picture info flags using ash's accessor methods. + // show_frame must be set for all frames; error_resilient_mode for key frames (match FFmpeg). + let mut picture_info_flags = ash::vk::native::StdVideoEncodeAV1PictureInfoFlags { + _bitfield_align_1: [], + _bitfield_1: Default::default(), + }; + picture_info_flags.set_show_frame(1); + if is_key_frame { + picture_info_flags.set_error_resilient_mode(1); + } else { + picture_info_flags.set_showable_frame(1); + } + + // Frame extent uses display dimensions for all picture resources. + // Per Vulkan spec, without MOTION_VECTOR_SCALING support, all picture resource + // codedExtent values must match, and srcPictureResource.codedExtent must equal + // the sequence header's max_frame_width/height. + let frame_extent = vk::Extent2D { + width: self.config.dimensions.width, + height: self.config.dimensions.height, + }; + + // Setup reconstructed picture (DPB slot for output). + let setup_picture_resource = vk::VideoPictureResourceInfoKHR::default() + .coded_offset(vk::Offset2D { x: 0, y: 0 }) + .coded_extent(frame_extent) + .base_array_layer(0) + .image_view_binding(self.dpb_image_views[self.current_dpb_slot as usize]); + // AV1 reference info for the setup slot. + let reference_info_flags = ash::vk::native::StdVideoEncodeAV1ReferenceInfoFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoEncodeAV1ReferenceInfoFlags::new_bitfield_1( + 0, 0, 0, + ), + }; + + let std_reference_info = ash::vk::native::StdVideoEncodeAV1ReferenceInfo { + flags: reference_info_flags, + frame_type: if is_key_frame { + ash::vk::native::StdVideoAV1FrameType_STD_VIDEO_AV1_FRAME_TYPE_KEY + } else { + ash::vk::native::StdVideoAV1FrameType_STD_VIDEO_AV1_FRAME_TYPE_INTER + }, + RefFrameId: self.current_dpb_slot as u32, + OrderHint: self.order_hint as u8, + reserved1: [0; 3], + pExtensionHeader: std::ptr::null(), + }; + + // AV1 DPB slot info for the setup reference slot (the slot being written). + let setup_av1_dpb_info = + vk::VideoEncodeAV1DpbSlotInfoKHR::default().std_reference_info(&std_reference_info); + + let mut setup_reference_slot = vk::VideoReferenceSlotInfoKHR::default() + .slot_index(self.current_dpb_slot as i32) + .picture_resource(&setup_picture_resource); + + // Attach AV1 DPB info to setup slot's pNext chain. + setup_reference_slot.p_next = + (&setup_av1_dpb_info as *const vk::VideoEncodeAV1DpbSlotInfoKHR).cast(); + + // Reference frames for inter frames. + let mut reference_slots = Vec::new(); + let mut av1_reference_infos = Vec::new(); + let mut ref_picture_resources = Vec::new(); + let mut ref_std_infos = Vec::new(); // Store std info to keep it alive + + if !is_key_frame && !self.references.is_empty() { + // Use the most recent reference frame. + let ref_info = &self.references[0]; + + // Create StdVideoEncodeAV1ReferenceInfo for the reference slot. + let ref_std_info = ash::vk::native::StdVideoEncodeAV1ReferenceInfo { + flags: reference_info_flags, + frame_type: ref_info.frame_type, + RefFrameId: ref_info.dpb_slot as u32, + OrderHint: ref_info.order_hint as u8, + reserved1: [0; 3], + pExtensionHeader: std::ptr::null(), + }; + ref_std_infos.push(ref_std_info); + + // Create AV1 DPB slot info for the reference (without pointer first). + let av1_ref_info = vk::VideoEncodeAV1DpbSlotInfoKHR::default(); + av1_reference_infos.push(av1_ref_info); + // Now set the pointer after it's in the vector at its final location. + av1_reference_infos[0] = av1_reference_infos[0].std_reference_info(&ref_std_infos[0]); + + let ref_picture_resource = vk::VideoPictureResourceInfoKHR::default() + .coded_offset(vk::Offset2D { x: 0, y: 0 }) + .coded_extent(frame_extent) + .base_array_layer(0) + .image_view_binding(self.dpb_image_views[ref_info.dpb_slot as usize]); + ref_picture_resources.push(ref_picture_resource); + + // Create reference slot (without pNext first). + let ref_slot = vk::VideoReferenceSlotInfoKHR::default() + .slot_index(ref_info.dpb_slot as i32) + .picture_resource(&ref_picture_resources[0]); + reference_slots.push(ref_slot); + // Now set pNext after it's in the vector at its final location. + reference_slots[0].p_next = + (&av1_reference_infos[0] as *const vk::VideoEncodeAV1DpbSlotInfoKHR).cast(); + } + + // AV1 quantization parameters - required structure. + // Start with a moderate QP that the rate controller can adjust. + let quantization_flags = ash::vk::native::StdVideoAV1QuantizationFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoAV1QuantizationFlags::new_bitfield_1( + 0, // using_qmatrix + 0, // diff_uv_delta + 0, // reserved + ), + }; + + let quantization = ash::vk::native::StdVideoAV1Quantization { + flags: quantization_flags, + base_q_idx: qp as u8, // Use the same QP as constant_q_index + DeltaQYDc: 0, + DeltaQUDc: 0, + DeltaQUAc: 0, + DeltaQVDc: 0, + DeltaQVAc: 0, + qm_y: 0, + qm_u: 0, + qm_v: 0, + }; + + // CDEF (Constrained Directional Enhancement Filter) - required since we enabled it in sequence header. + // Match FFmpeg's default initialization (all zeros). + let cdef = ash::vk::native::StdVideoAV1CDEF { + cdef_damping_minus_3: 0, // Match FFmpeg: damping = 3 + cdef_bits: 0, // 1 CDEF strength combination (2^0) + cdef_y_pri_strength: [0, 0, 0, 0, 0, 0, 0, 0], // Match FFmpeg: all zeros + cdef_y_sec_strength: [0, 0, 0, 0, 0, 0, 0, 0], + cdef_uv_pri_strength: [0, 0, 0, 0, 0, 0, 0, 0], + cdef_uv_sec_strength: [0, 0, 0, 0, 0, 0, 0, 0], + }; + + // Loop filter - deblocking filter parameters. + // Match FFmpeg's default initialization. + let loop_filter_flags = ash::vk::native::StdVideoAV1LoopFilterFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoAV1LoopFilterFlags::new_bitfield_1( + 0, // loop_filter_delta_enabled + 0, // loop_filter_delta_update + 0, // reserved + ), + }; + + let loop_filter = ash::vk::native::StdVideoAV1LoopFilter { + flags: loop_filter_flags, + loop_filter_level: [0, 0, 0, 0], // Match FFmpeg: disable filter initially + loop_filter_sharpness: 0, + update_ref_delta: 0, + // Match FFmpeg's default_loop_filter_ref_deltas: { 1, 0, 0, 0, -1, 0, -1, -1 } + loop_filter_ref_deltas: [1, 0, 0, 0, -1, 0, -1, -1], + update_mode_delta: 1, // Match FFmpeg: set to 1 + loop_filter_mode_deltas: [0; 2], + }; + + // Tile info - FFmpeg has this commented out with "TODO FIX" at line 340. + // Match FFmpeg: don't provide tile info (set to null). + // Note: If this causes issues, we may need to re-enable it with proper values. + + // Build ref_frame_idx, ref_order_hint, refresh_frame_flags, and primary_ref_frame. + // + // All P frames reference only the KEY frame (stored in buffer 0). + // P frames set refresh_frame_flags=0x00 (don't update any reference buffer). + // This avoids P→P references which produce corrupt output on NVIDIA AV1 encoders. + let (ref_frame_idx, ref_order_hint, primary_ref_frame, refresh_frame_flags) = + if !is_key_frame && !self.references.is_empty() { + // All reference names point to buffer 0 (where KEY frame lives). + let ref_idx = [0i8; 7]; + let ref_info = &self.references[0]; + let mut order_hints = [0u8; 8]; + order_hints[0] = ref_info.order_hint as u8; + + // P frames don't refresh any reference buffer. + (ref_idx, order_hints, 0u8, 0x00u8) + } else { + // KEY frame: refresh all buffers. + ([0i8; 7], [0u8; 8], 7u8, 0xFFu8) + }; + + // AV1 encode picture info. + let std_picture_info = ash::vk::native::StdVideoEncodeAV1PictureInfo { + flags: picture_info_flags, + frame_type, + frame_presentation_time: self.frame_num, + current_frame_id: self.current_dpb_slot as u32, // Match FFmpeg: slot index + order_hint: self.order_hint as u8, + primary_ref_frame, + refresh_frame_flags, + coded_denom: 0, + render_width_minus_1: (self.config.dimensions.width - 1) as u16, + render_height_minus_1: (self.config.dimensions.height - 1) as u16, + interpolation_filter: ash::vk::native::StdVideoAV1InterpolationFilter_STD_VIDEO_AV1_INTERPOLATION_FILTER_EIGHTTAP, + TxMode: ash::vk::native::StdVideoAV1TxMode_STD_VIDEO_AV1_TX_MODE_SELECT, + delta_q_res: 0, + delta_lf_res: 0, + ref_order_hint, + ref_frame_idx, + reserved1: [0; 3], + delta_frame_id_minus_1: [0; 7], + pTileInfo: std::ptr::null(), + pQuantization: &quantization, + pSegmentation: std::ptr::null(), + pLoopFilter: &loop_filter, + pCDEF: &cdef, + pLoopRestoration: std::ptr::null(), + pGlobalMotion: std::ptr::null(), + pExtensionHeader: std::ptr::null(), + pBufferRemovalTimes: std::ptr::null(), + }; + + // Reference name slot indices - maps AV1 reference names to Vulkan DPB slot indices. + // Only set entries for reference names that appear in pReferenceSlots. + // For SINGLE_REFERENCE mode, only LAST_FRAME (index 0) is used. + let mut reference_name_slot_indices = [-1i32; 7]; + + if !is_key_frame && !self.references.is_empty() { + // Map LAST_FRAME to the reference's DPB slot. + let ref_info = &self.references[0]; + reference_name_slot_indices[0] = ref_info.dpb_slot as i32; + } + + // Set prediction mode and rate control group based on frame type. + let (prediction_mode, rate_control_group) = if is_key_frame { + ( + vk::VideoEncodeAV1PredictionModeKHR::INTRA_ONLY, + vk::VideoEncodeAV1RateControlGroupKHR::INTRA, + ) + } else { + ( + vk::VideoEncodeAV1PredictionModeKHR::SINGLE_REFERENCE, + vk::VideoEncodeAV1RateControlGroupKHR::PREDICTIVE, + ) + }; + + let mut av1_picture_info = vk::VideoEncodeAV1PictureInfoKHR::default() + .std_picture_info(&std_picture_info) + .prediction_mode(prediction_mode) + .rate_control_group(rate_control_group) + .reference_name_slot_indices(reference_name_slot_indices); + + // For DISABLED rate control mode, set constant_q_index on the picture info. + if rc_mode == vk::VideoEncodeRateControlModeFlagsKHR::DISABLED { + av1_picture_info = av1_picture_info.constant_q_index(qp); + } + + // AV1-specific rate control layer info. + let min_q_index = vk::VideoEncodeAV1QIndexKHR { + intra_q_index: qp, + predictive_q_index: qp, + bipredictive_q_index: qp, + }; + let max_q_index = vk::VideoEncodeAV1QIndexKHR { + intra_q_index: qp, + predictive_q_index: qp, + bipredictive_q_index: qp, + }; + let mut av1_rc_layer_info = vk::VideoEncodeAV1RateControlLayerInfoKHR::default() + .use_min_q_index(true) + .min_q_index(min_q_index) + .use_max_q_index(true) + .max_q_index(max_q_index); + + let mut rc_layer_info = vk::VideoEncodeRateControlLayerInfoKHR::default() + .average_bitrate(average_bitrate as u64) + .max_bitrate(max_bitrate as u64) + .frame_rate_numerator(self.config.frame_rate_numerator) + .frame_rate_denominator(self.config.frame_rate_denominator); + rc_layer_info.p_next = + (&mut av1_rc_layer_info as *mut vk::VideoEncodeAV1RateControlLayerInfoKHR).cast(); + let rc_layers = [rc_layer_info]; + + // AV1-specific rate control info. + let mut av1_rc_info = vk::VideoEncodeAV1RateControlInfoKHR::default() + .gop_frame_count(self.config.gop_size) + .key_frame_period(self.config.gop_size) + .consecutive_bipredictive_frame_count(0) + .temporal_layer_count(1); + + // Rate control info (matches H265 pattern: only add layers/buffer for non-DISABLED modes). + let mut rc_info = vk::VideoEncodeRateControlInfoKHR::default().rate_control_mode(rc_mode); + + if rc_mode != vk::VideoEncodeRateControlModeFlagsKHR::DISABLED { + rc_info = rc_info + .layers(&rc_layers) + .virtual_buffer_size_in_ms(self.config.virtual_buffer_size_ms) + .initial_virtual_buffer_size_in_ms(self.config.initial_virtual_buffer_size_ms); + rc_info.p_next = (&mut av1_rc_info as *mut vk::VideoEncodeAV1RateControlInfoKHR).cast(); + } + + // Video begin coding info. + // Include the setup slot (with slot_index -1 to indicate it's not yet active) + // and any reference slots that will be used for reading during encoding. + let mut all_reference_slots = Vec::new(); + + if is_reference { + // Build a separate setup slot for begin coding with slot_index = -1. + // This tells the implementation the slot is being set up, not yet active. + let mut setup_slot_for_begin = vk::VideoReferenceSlotInfoKHR::default() + .slot_index(-1) + .picture_resource(&setup_picture_resource); + setup_slot_for_begin.p_next = + (&setup_av1_dpb_info as *const vk::VideoEncodeAV1DpbSlotInfoKHR).cast(); + all_reference_slots.push(setup_slot_for_begin); + } + + // Add reference slots (already active slots we're reading from) + all_reference_slots.extend_from_slice(&reference_slots); + + debug!( + "Begin coding: {} reference slots (setup={}, refs={})", + all_reference_slots.len(), + if is_reference { 1 } else { 0 }, + reference_slots.len() + ); + + // Begin video coding with rate control info for non-first frames. + let is_first_frame = self.encode_frame_num == 0; + + let begin_coding_info = if is_first_frame { + vk::VideoBeginCodingInfoKHR::default() + .video_session(self.session) + .video_session_parameters(self.session_params) + .reference_slots(&all_reference_slots) + } else { + let mut info = vk::VideoBeginCodingInfoKHR::default() + .video_session(self.session) + .video_session_parameters(self.session_params) + .reference_slots(&all_reference_slots); + info.p_next = (&rc_info as *const vk::VideoEncodeRateControlInfoKHR).cast(); + info + }; + + unsafe { + self.video_queue_fn + .cmd_begin_video_coding(self.encode_command_buffer, &begin_coding_info); + } + + // Reset video coding state for the first frame. + // Combine RESET + RATE_CONTROL + QUALITY_LEVEL into a single control command. + // This matches the H265 approach and is required for AMD RADV. + if is_first_frame { + let mut quality_level_info = + vk::VideoEncodeQualityLevelInfoKHR::default().quality_level(0); + quality_level_info.p_next = + (&rc_info as *const vk::VideoEncodeRateControlInfoKHR).cast(); + + let mut control_info = vk::VideoCodingControlInfoKHR::default().flags( + vk::VideoCodingControlFlagsKHR::RESET + | vk::VideoCodingControlFlagsKHR::ENCODE_RATE_CONTROL + | vk::VideoCodingControlFlagsKHR::ENCODE_QUALITY_LEVEL, + ); + control_info.p_next = + (&quality_level_info as *const vk::VideoEncodeQualityLevelInfoKHR).cast(); + + unsafe { + self.video_queue_fn + .cmd_control_video_coding(self.encode_command_buffer, &control_info); + } + } + + // Encode info. + let src_picture_resource = vk::VideoPictureResourceInfoKHR::default() + .coded_offset(vk::Offset2D { x: 0, y: 0 }) + .coded_extent(frame_extent) + .base_array_layer(0) + .image_view_binding(self.input_image_view); + + let mut encode_info = vk::VideoEncodeInfoKHR::default() + .src_picture_resource(src_picture_resource) + .dst_buffer(self.bitstream_buffer) + .dst_buffer_offset(0) + .dst_buffer_range(self.bitstream_buffer_size as u64); + + if is_reference { + encode_info = encode_info.setup_reference_slot(&setup_reference_slot); + } + + if !reference_slots.is_empty() { + encode_info = encode_info.reference_slots(&reference_slots); + } + + encode_info.p_next = (&av1_picture_info as *const vk::VideoEncodeAV1PictureInfoKHR).cast(); + + // Begin query to capture encode feedback (bitstream size, status). + unsafe { + self.context.device().cmd_begin_query( + self.encode_command_buffer, + self.query_pool, + 0, + vk::QueryControlFlags::empty(), + ); + } + + unsafe { + self.video_encode_fn + .cmd_encode_video(self.encode_command_buffer, &encode_info); + } + + // End query. + unsafe { + self.context + .device() + .cmd_end_query(self.encode_command_buffer, self.query_pool, 0); + } + + // Add DPB synchronization barrier after encoding. + unsafe { + record_post_encode_dpb_barrier( + self.context.device(), + self.encode_command_buffer, + &self.dpb_images, + false, // AV1 does not use layered DPB + self.current_dpb_slot, + ); + } + + // End video coding. + let end_coding_info = vk::VideoEndCodingInfoKHR::default(); + unsafe { + self.video_queue_fn + .cmd_end_video_coding(self.encode_command_buffer, &end_coding_info); + } + + // End command buffer. + unsafe { + self.context + .device() + .end_command_buffer(self.encode_command_buffer) + } + .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; + + // Submit, wait, and read bitstream. + let encode_queue = self.context.video_encode_queue().ok_or_else(|| { + PixelForgeError::NoSuitableDevice("No video encode queue available".to_string()) + })?; + + debug!( + "Submitting frame {} to GPU: key={}, num_refs={}, cur_slot={}", + self.encode_frame_num, + is_key_frame, + self.references.len(), + self.current_dpb_slot + ); + + let gpu_start = std::time::Instant::now(); + + let encoded_data = unsafe { + submit_encode_and_read_bitstream( + self.context.device(), + self.encode_command_buffer, + self.encode_fence, + encode_queue, + self.query_pool, + self.bitstream_buffer_ptr, + )? + }; + + debug!("GPU encode took {:?}", gpu_start.elapsed()); + + // Mark current DPB slot as active. + self.dpb_slot_active[self.current_dpb_slot as usize] = true; + + Ok(encoded_data) + } +} diff --git a/src/encoder/av1/init.rs b/src/encoder/av1/init.rs new file mode 100644 index 0000000..1e12b90 --- /dev/null +++ b/src/encoder/av1/init.rs @@ -0,0 +1,545 @@ +use super::{AV1Encoder, MIN_BITSTREAM_BUFFER_SIZE, SUPERBLOCK_SIZE}; + +use crate::encoder::gop::GopStructure; +use crate::encoder::resources::{ + allocate_session_memory, clear_input_image, create_bitstream_buffer, create_command_resources, + create_dpb_images, create_image, get_video_format, make_codec_name, map_bitstream_buffer, + query_supported_video_formats, ClearImageParams, +}; +use crate::encoder::PixelFormat; +use crate::error::{PixelForgeError, Result}; +use crate::vulkan::VideoContext; +use ash::vk; +use std::ptr; +use tracing::{debug, info, warn}; + +impl AV1Encoder { + /// Create a new AV1 encoder. + pub fn new(context: VideoContext, config: crate::encoder::EncodeConfig) -> Result { + // B-frames are not yet supported. + if config.b_frame_count > 0 { + panic!( + "B-frame encoding is not yet supported. Set b_frame_count=0 in encoder config. \ + Got b_frame_count={}", + config.b_frame_count + ); + } + + let width = config.dimensions.width; + let height = config.dimensions.height; + + warn!( + "AV1 encoding is experimental. On NVIDIA GPUs, P-frames cannot reference other \ + P-frames, causing all P-frames to reference the I-frame instead. This leads to \ + progressively larger frame sizes over time. Consider using H.264 or HEVC until \ + this is resolved." + ); + + info!( + "Creating AV1 encoder: requested {}x{}, pixel_format={:?}", + width, height, config.pixel_format + ); + + // Load video queue extension functions. + let video_queue_fn = + ash::khr::video_queue::Device::load(context.instance(), context.device()); + let video_encode_fn = + ash::khr::video_encode_queue::Device::load(context.instance(), context.device()); + + // Get chroma subsampling from pixel format. + let chroma_subsampling: vk::VideoChromaSubsamplingFlagsKHR = config.pixel_format.into(); + let luma_bit_depth: vk::VideoComponentBitDepthFlagsKHR = config.bit_depth.into(); + let chroma_bit_depth: vk::VideoComponentBitDepthFlagsKHR = config.bit_depth.into(); + + // AV1 profile selection based on chroma subsampling (not bit depth). + // Main profile: 8/10-bit, 4:2:0 only. + // High profile: 8/10-bit, 4:2:0 and 4:4:4. + let profile = match config.pixel_format { + PixelFormat::Yuv420 => ash::vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_MAIN, + _ => ash::vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_HIGH, + }; + + // Preferred input format based on pixel format and bit depth. + let preferred_src_format = get_video_format(config.pixel_format, config.bit_depth); + + // Create AV1 encode profile. + let mut av1_profile_info = vk::VideoEncodeAV1ProfileInfoKHR::default().std_profile(profile); + + let mut profile_info = vk::VideoProfileInfoKHR::default() + .video_codec_operation(vk::VideoCodecOperationFlagsKHR::ENCODE_AV1) + .chroma_subsampling(chroma_subsampling) + .luma_bit_depth(luma_bit_depth) + .chroma_bit_depth(chroma_bit_depth); + profile_info.p_next = + (&mut av1_profile_info as *mut vk::VideoEncodeAV1ProfileInfoKHR).cast(); + + // Query encode capabilities. + let video_queue_instance = + ash::khr::video_queue::Instance::load(context.entry(), context.instance()); + let mut av1_capabilities = vk::VideoEncodeAV1CapabilitiesKHR::default(); + let mut encode_capabilities = vk::VideoEncodeCapabilitiesKHR { + p_next: (&mut av1_capabilities as *mut vk::VideoEncodeAV1CapabilitiesKHR).cast(), + ..Default::default() + }; + let mut capabilities = vk::VideoCapabilitiesKHR { + p_next: (&mut encode_capabilities as *mut vk::VideoEncodeCapabilitiesKHR).cast(), + ..Default::default() + }; + + let result = unsafe { + (video_queue_instance + .fp() + .get_physical_device_video_capabilities_khr)( + context.physical_device(), + &profile_info, + &mut capabilities, + ) + }; + if result != vk::Result::SUCCESS { + return Err(PixelForgeError::NoSuitableDevice(format!( + "Failed to query Vulkan Video encode capabilities for AV1: {:?}", + result + ))); + } + + // Helper functions for alignment calculations. + let gcd = |mut a: u32, mut b: u32| { + while b != 0 { + let tmp = a % b; + a = b; + b = tmp; + } + a + }; + let lcm = |a: u32, b: u32| { + if a == 0 || b == 0 { + 0 + } else { + a / gcd(a, b) * b + } + }; + let align_up = |value: u32, alignment: u32| { + if alignment <= 1 { + value + } else { + value.div_ceil(alignment) * alignment + } + }; + + let gran_w = capabilities.picture_access_granularity.width.max(1); + let gran_h = capabilities.picture_access_granularity.height.max(1); + let align_w = lcm(SUPERBLOCK_SIZE, gran_w); + let align_h = lcm(SUPERBLOCK_SIZE, gran_h); + + let mut aligned_width = align_up(width, align_w); + let mut aligned_height = align_up(height, align_h); + + aligned_width = aligned_width.max(capabilities.min_coded_extent.width); + aligned_height = aligned_height.max(capabilities.min_coded_extent.height); + + if aligned_width > capabilities.max_coded_extent.width + || aligned_height > capabilities.max_coded_extent.height + { + return Err(PixelForgeError::InvalidInput(format!( + "Requested coded extent {}x{} (aligned to {}x{}) exceeds device max {}x{}", + width, + height, + aligned_width, + aligned_height, + capabilities.max_coded_extent.width, + capabilities.max_coded_extent.height + ))); + } + + info!( + "Using coded extent {}x{} (granularity {}x{})", + aligned_width, aligned_height, gran_w, gran_h + ); + + // Query supported formats. + let supported_src_formats = query_supported_video_formats( + &context, + &profile_info, + vk::ImageUsageFlags::VIDEO_ENCODE_SRC_KHR, + )?; + let supported_dpb_formats = query_supported_video_formats( + &context, + &profile_info, + vk::ImageUsageFlags::VIDEO_ENCODE_DPB_KHR, + )?; + + if supported_src_formats.is_empty() { + return Err(PixelForgeError::NoSuitableDevice( + "No supported Vulkan Video SRC formats for AV1".to_string(), + )); + } + if supported_dpb_formats.is_empty() { + return Err(PixelForgeError::NoSuitableDevice( + "No supported Vulkan Video DPB formats for AV1".to_string(), + )); + } + + info!("Supported SRC formats: {:?}", supported_src_formats); + info!("Supported DPB formats: {:?}", supported_dpb_formats); + + let picture_format = if supported_src_formats.contains(&preferred_src_format) { + preferred_src_format + } else { + return Err(PixelForgeError::NoSuitableDevice(format!( + "Preferred input format {:?} not supported for AV1", + preferred_src_format + ))); + }; + + let reference_picture_format = supported_dpb_formats + .iter() + .copied() + .find(|f| *f == picture_format) + .unwrap_or(supported_dpb_formats[0]); + + debug!( + "Selected formats: picture={:?}, reference={:?}", + picture_format, reference_picture_format + ); + + // Get encode queue family. + let encode_queue_family = context.video_encode_queue_family().ok_or_else(|| { + PixelForgeError::NoSuitableDevice("No video encode queue family available".to_string()) + })?; + + // Create video session. + let std_header_version = vk::ExtensionProperties { + extension_name: make_codec_name(b"VK_STD_vulkan_video_codec_av1_encode"), + spec_version: vk::make_api_version(0, 1, 0, 0), + }; + + // Calculate DPB slots and active references. + let max_dpb_slots_supported = capabilities.max_dpb_slots as usize; + let max_active_reference_pictures_supported = + capabilities.max_active_reference_pictures as usize; + + if max_dpb_slots_supported < 2 { + return Err(PixelForgeError::NoSuitableDevice(format!( + "Device reports max_dpb_slots={}, need at least 2", + max_dpb_slots_supported + ))); + } + + let mut target_active_refs = (config.max_reference_frames as usize) + .min(max_active_reference_pictures_supported) + .min(8); // AV1 supports up to 8 reference frames + + if target_active_refs < 1 && max_active_reference_pictures_supported >= 1 { + target_active_refs = 1; + } + + // AV1 typically needs: active refs + 1 for current frame being setup + let requested_dpb_slots = (target_active_refs + 1).min(max_dpb_slots_supported); + + info!( + "DPB configuration: slots={}, active_refs={} (max_supported: slots={}, refs={})", + requested_dpb_slots, + target_active_refs, + max_dpb_slots_supported, + max_active_reference_pictures_supported + ); + + let session_create_info = vk::VideoSessionCreateInfoKHR::default() + .queue_family_index(encode_queue_family) + .video_profile(&profile_info) + .picture_format(picture_format) + .max_coded_extent(vk::Extent2D { + width: aligned_width, + height: aligned_height, + }) + .reference_picture_format(reference_picture_format) + .max_dpb_slots(requested_dpb_slots as u32) + .max_active_reference_pictures(target_active_refs as u32) + .std_header_version(&std_header_version); + + let mut session = vk::VideoSessionKHR::null(); + let result = unsafe { + (video_queue_fn.fp().create_video_session_khr)( + context.device().handle(), + &session_create_info, + ptr::null(), + &mut session, + ) + }; + if result != vk::Result::SUCCESS { + return Err(PixelForgeError::VideoSessionCreation(format!( + "{:?}", + result + ))); + } + + // Allocate session memory. + let session_memory = allocate_session_memory(&context, session, &video_queue_fn)?; + + // Create AV1 sequence header - similar to H.265 VPS/SPS/PPS but for AV1. + // Calculate frame dimension representation bits. + // Use actual display dimensions for sequence header (not coded extent). + // The video session's max_coded_extent is the upper bound for alignment, + // but the sequence header and per-frame coded extents use display dimensions. + let frame_width_bits = 32 - (width - 1).leading_zeros(); + let frame_height_bits = 32 - (height - 1).leading_zeros(); + + // AV1 color configuration. + let color_config_flags = ash::vk::native::StdVideoAV1ColorConfigFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoAV1ColorConfigFlags::new_bitfield_1( + 0, // mono_chrome + 1, // color_range (full range) + 0, // separate_uv_delta_q + 1, // color_description_present_flag (we provide color primaries/transfer/matrix) + 0, // reserved + ), + }; + + // Bit depth: 8 for Eight, 10 for Ten + let bit_depth = match config.bit_depth { + crate::encoder::BitDepth::Eight => 8, + crate::encoder::BitDepth::Ten => 10, + }; + + // Chroma subsampling based on pixel format. + let (subsampling_x, subsampling_y) = match config.pixel_format { + PixelFormat::Yuv420 => (1u8, 1u8), // 4:2:0 + PixelFormat::Yuv444 => (0u8, 0u8), // 4:4:4 + _ => (1u8, 1u8), // Default to 4:2:0 + }; + + let color_config = ash::vk::native::StdVideoAV1ColorConfig { + flags: color_config_flags, + BitDepth: bit_depth, + subsampling_x, + subsampling_y, + reserved1: 0, + color_primaries: ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_709, + transfer_characteristics: ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_BT_709, + matrix_coefficients: ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_709, + chroma_sample_position: ash::vk::native::StdVideoAV1ChromaSamplePosition_STD_VIDEO_AV1_CHROMA_SAMPLE_POSITION_UNKNOWN, + }; + + // AV1 sequence header flags - use minimal set to avoid driver issues. + // Disable features we're not providing data for (restoration, most inter-frame features). + let seq_flags = ash::vk::native::StdVideoAV1SequenceHeaderFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoAV1SequenceHeaderFlags::new_bitfield_1( + 0, // still_picture + 0, // reduced_still_picture_header + 0, // use_128x128_superblock (use 64x64 superblocks) + 0, // enable_filter_intra - disable for simplicity + 0, // enable_intra_edge_filter - disable for simplicity + 0, // enable_interintra_compound + 0, // enable_masked_compound + 0, // enable_warped_motion - disable for simplicity + 0, // enable_dual_filter - disable for simplicity + 1, // enable_order_hint - keep for reference frames + 0, // enable_jnt_comp + 0, // enable_ref_frame_mvs + 0, // frame_id_numbers_present_flag + 0, // enable_superres + 1, // enable_cdef - keep enabled + 0, // enable_restoration - DISABLE (we don't provide restoration data) + 0, // film_grain_params_present + 0, // timing_info_present_flag + 0, // initial_display_delay_present_flag + 0, // reserved + ), + }; + + let av1_sequence_header = ash::vk::native::StdVideoAV1SequenceHeader { + flags: seq_flags, + seq_profile: profile, + frame_width_bits_minus_1: (frame_width_bits - 1) as u8, + frame_height_bits_minus_1: (frame_height_bits - 1) as u8, + max_frame_width_minus_1: (width - 1) as u16, + max_frame_height_minus_1: (height - 1) as u16, + delta_frame_id_length_minus_2: 0, + additional_frame_id_length_minus_1: 0, + order_hint_bits_minus_1: 7, // 8 bits for order hint + seq_force_integer_mv: 0, + seq_force_screen_content_tools: 0, + reserved1: [0; 5], + pColorConfig: &color_config, + pTimingInfo: ptr::null(), // No timing info + }; + + // Create decoder model info (zero-initialized like FFmpeg). + let decoder_model_info = ash::vk::native::StdVideoEncodeAV1DecoderModelInfo { + buffer_delay_length_minus_1: 0, + buffer_removal_time_length_minus_1: 0, + frame_presentation_time_length_minus_1: 0, + reserved1: 0, + num_units_in_decoding_tick: 0, + }; + + // Create operating point info (single operating point like FFmpeg). + let operating_point = ash::vk::native::StdVideoEncodeAV1OperatingPointInfo { + flags: ash::vk::native::StdVideoEncodeAV1OperatingPointInfoFlags { + _bitfield_align_1: [], + _bitfield_1: + ash::vk::native::StdVideoEncodeAV1OperatingPointInfoFlags::new_bitfield_1( + 0, // decoder_model_present_for_this_op + 0, // low_delay_mode_flag + 0, // initial_display_delay_present_for_this_op + 0, // reserved + ), + }, + operating_point_idc: 0, + seq_level_idx: 5, // Level 3.1 (encoded as: 2.0=0, 2.1=1, ... 3.0=4, 3.1=5) + seq_tier: 0, + initial_display_delay_minus_1: 0, + decoder_buffer_delay: 0, + encoder_buffer_delay: 0, + }; + + // Create session parameters with all required structures (matching FFmpeg). + let mut av1_session_params_create_info = + vk::VideoEncodeAV1SessionParametersCreateInfoKHR::default() + .std_sequence_header(&av1_sequence_header) + .std_decoder_model_info(&decoder_model_info) + .std_operating_points(std::slice::from_ref(&operating_point)); + + // Add quality level info to pNext chain (matching FFmpeg). + // Chain: SessionParametersCreateInfo -> QualityLevelInfo -> AV1SessionParametersCreateInfo + let mut quality_info = vk::VideoEncodeQualityLevelInfoKHR::default().quality_level(0); // Default quality level + + quality_info.p_next = (&mut av1_session_params_create_info + as *mut vk::VideoEncodeAV1SessionParametersCreateInfoKHR) + .cast(); + + let session_params_create_info = vk::VideoSessionParametersCreateInfoKHR { + video_session: session, + p_next: (&mut quality_info as *mut vk::VideoEncodeQualityLevelInfoKHR).cast(), + ..Default::default() + }; + + let session_params = unsafe { + video_queue_fn + .create_video_session_parameters(&session_params_create_info, None) + .map_err(|e| { + PixelForgeError::SessionParametersCreation(format!( + "Failed to create AV1 session parameters: {:?}", + e + )) + })? + }; + // Create input image. + let (input_image, input_image_memory, input_image_view) = create_image( + &context, + aligned_width, + aligned_height, + picture_format, + false, // is_dpb + &profile_info, + )?; + let input_image_layout = vk::ImageLayout::UNDEFINED; + + // Create DPB images. + let (dpb_images, dpb_image_memories, dpb_image_views) = create_dpb_images( + &context, + aligned_width, + aligned_height, + reference_picture_format, + requested_dpb_slots, + &profile_info, + false, + )?; + // Create bitstream buffer. + let bitstream_buffer_size = MIN_BITSTREAM_BUFFER_SIZE.max(width as usize * height as usize); + let (bitstream_buffer, bitstream_buffer_memory) = + create_bitstream_buffer(&context, bitstream_buffer_size, &profile_info)?; + // Map bitstream buffer persistently. + let bitstream_buffer_ptr = + map_bitstream_buffer(&context, bitstream_buffer_memory, bitstream_buffer_size)?; + // Create command resources. + let upload_queue_family = context.transfer_queue_family(); + let cmd_resources = + create_command_resources(&context, encode_queue_family, upload_queue_family)?; + let command_pool = cmd_resources.command_pool; + let upload_command_buffer = cmd_resources.upload_command_buffer; + let upload_fence = cmd_resources.upload_fence; + let encode_command_buffer = cmd_resources.encode_command_buffer; + let encode_fence = cmd_resources.encode_fence; + // Clear the input image so padding between user dimensions and the + // aligned coded extent is zero-initialized. + clear_input_image( + &context, + &ClearImageParams { + command_buffer: upload_command_buffer, + fence: upload_fence, + queue: context.transfer_queue(), + image: input_image, + width: aligned_width, + height: aligned_height, + pixel_format: config.pixel_format, + bit_depth: config.bit_depth, + }, + )?; + // Create query pool for bitstream size queries. + // Need 1 query to capture bitstream offset and size. + // Need to provide profile info and feedback flags in pNext chain. + let mut query_feedback_info = vk::QueryPoolVideoEncodeFeedbackCreateInfoKHR::default() + .encode_feedback_flags( + vk::VideoEncodeFeedbackFlagsKHR::BITSTREAM_BUFFER_OFFSET + | vk::VideoEncodeFeedbackFlagsKHR::BITSTREAM_BYTES_WRITTEN, + ); + query_feedback_info.p_next = (&profile_info as *const vk::VideoProfileInfoKHR).cast(); + + let mut query_pool_create_info = vk::QueryPoolCreateInfo::default() + .query_type(vk::QueryType::VIDEO_ENCODE_FEEDBACK_KHR) + .query_count(1); + query_pool_create_info.p_next = + (&query_feedback_info as *const vk::QueryPoolVideoEncodeFeedbackCreateInfoKHR).cast(); + + let query_pool = unsafe { + context + .device() + .create_query_pool(&query_pool_create_info, None) + .map_err(|e| PixelForgeError::QueryPool(e.to_string()))? + }; + + // Initialize GOP structure. + let gop = GopStructure::new(config.gop_size, config.b_frame_count, config.gop_size); + + Ok(Self { + context, + config, + gop, + video_queue_fn, + video_encode_fn, + session, + session_params, + session_memory, + input_frame_num: 0, + encode_frame_num: 0, + frame_num: 0, + order_hint: 0, + input_image, + input_image_memory, + input_image_view, + input_image_layout, + dpb_images, + dpb_image_memories, + dpb_image_views, + dpb_slot_count: requested_dpb_slots, + dpb_slot_active: vec![false; requested_dpb_slots], + bitstream_buffer, + bitstream_buffer_memory, + bitstream_buffer_size, + bitstream_buffer_ptr, + command_pool, + upload_command_pool: cmd_resources.upload_command_pool, + upload_command_buffer, + upload_fence, + encode_command_buffer, + encode_fence, + query_pool, + header_data: None, + current_dpb_slot: 0, + references: Vec::new(), + }) + } +} diff --git a/src/encoder/av1/mod.rs b/src/encoder/av1/mod.rs new file mode 100644 index 0000000..2a5f322 --- /dev/null +++ b/src/encoder/av1/mod.rs @@ -0,0 +1,525 @@ +//! AV1 encoder implementation using Vulkan Video. +//! +//! This module implements AV1 video encoding using Vulkan Video extensions. + +mod api; +mod encode; +mod init; + +use ash::vk; +use tracing::debug; + +use crate::encoder::resources::{upload_image_to_input, UploadParams}; +use crate::error::Result; + +use crate::encoder::gop::GopStructure; +use crate::encoder::EncodeConfig; +use crate::vulkan::VideoContext; + +/// Minimum bitstream buffer size. +const MIN_BITSTREAM_BUFFER_SIZE: usize = 2 * 1024 * 1024; + +/// AV1 superblock size in pixels (64x64, matching use_128x128_superblock=0 in the sequence header). +pub const SUPERBLOCK_SIZE: u32 = 64; + +#[derive(Clone, Copy, Debug)] +pub(crate) struct ReferenceInfo { + pub dpb_slot: u8, + pub order_hint: u32, + pub frame_type: u32, +} + +/// AV1 encoder. +pub struct AV1Encoder { + context: VideoContext, + config: EncodeConfig, + gop: GopStructure, + + // Video session. + video_queue_fn: ash::khr::video_queue::Device, + video_encode_fn: ash::khr::video_encode_queue::Device, + session: vk::VideoSessionKHR, + session_params: vk::VideoSessionParametersKHR, + session_memory: Vec, + + // Frame counters. + input_frame_num: u64, + encode_frame_num: u64, + frame_num: u32, + order_hint: u32, + + // Resources + input_image: vk::Image, + input_image_memory: vk::DeviceMemory, + input_image_view: vk::ImageView, + /// Current Vulkan image layout of `input_image` (tracked to avoid UB when transitioning). + input_image_layout: vk::ImageLayout, + /// DPB images for reference frames. + dpb_images: Vec, + dpb_image_memories: Vec, + dpb_image_views: Vec, + /// Number of DPB slots allocated. + dpb_slot_count: usize, + /// Whether each DPB slot has been activated (written to at least once). + dpb_slot_active: Vec, + bitstream_buffer: vk::Buffer, + bitstream_buffer_memory: vk::DeviceMemory, + /// Size of the allocated bitstream buffer in bytes. + bitstream_buffer_size: usize, + /// Persistently mapped pointer to the bitstream buffer (avoids per-frame map/unmap). + bitstream_buffer_ptr: *mut u8, + + // Command resources. + command_pool: vk::CommandPool, + upload_command_pool: vk::CommandPool, + upload_command_buffer: vk::CommandBuffer, + upload_fence: vk::Fence, + encode_command_buffer: vk::CommandBuffer, + encode_fence: vk::Fence, + query_pool: vk::QueryPool, + + // Cached AV1 sequence header OBU (retrieved from session parameters). + header_data: Option>, + + // Reference picture tracking. + /// Current DPB slot to use for setup (the reconstructed picture). + current_dpb_slot: u8, + /// Active reference pictures. Ordered from most recent to oldest. + references: Vec, +} + +impl AV1Encoder { + /// Upload input frame from a GPU image. + /// + /// This copies from a source image directly to the encoder's input image, + /// avoiding any CPU-side data copies. The source image must match the + /// encoder's configured pixel format and dimensions, and should be in + /// GENERAL layout. + fn upload_from_image(&mut self, src_image: vk::Image) -> Result<()> { + if src_image == self.input_image { + debug!("Source image is the encoder's input image, skipping upload copy"); + return Ok(()); + } + + let params = UploadParams { + upload_command_buffer: self.upload_command_buffer, + upload_fence: self.upload_fence, + src_image, + dst_image: self.input_image, + width: self.config.dimensions.width, + height: self.config.dimensions.height, + pixel_format: self.config.pixel_format, + input_image_layout: self.input_image_layout, + upload_queue: self.context.transfer_queue(), + }; + + upload_image_to_input(&self.context, ¶ms)?; + + // Update tracked layout. + self.input_image_layout = vk::ImageLayout::VIDEO_ENCODE_SRC_KHR; + + Ok(()) + } +} + +// SAFETY: The raw pointer bitstream_buffer_ptr is only used within the encoder's +// thread and is properly synchronized via Vulkan fences before access +unsafe impl Send for AV1Encoder {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_superblock_size() { + assert_eq!(SUPERBLOCK_SIZE, 64); + } + + #[test] + fn test_superblock_alignment() { + // Dimensions should be aligned up to superblock boundaries. + let align = |v: u32| (v + SUPERBLOCK_SIZE - 1) & !(SUPERBLOCK_SIZE - 1); + + assert_eq!(align(1920), 1920); // Already aligned + assert_eq!(align(1080), 1088); // 1080 rounds up to 1088 + assert_eq!(align(2560), 2560); // Already aligned + assert_eq!(align(1440), 1472); // 1440 rounds up to 1472 + assert_eq!(align(1), 64); // Minimum is one superblock + } + + #[test] + fn test_reference_info() { + let ref_info = ReferenceInfo { + dpb_slot: 2, + order_hint: 42, + frame_type: 0, + }; + + assert_eq!(ref_info.dpb_slot, 2); + assert_eq!(ref_info.order_hint, 42); + + // Should be Copy + Clone. + let copied = ref_info; + assert_eq!(copied.dpb_slot, ref_info.dpb_slot); + assert_eq!(copied.order_hint, ref_info.order_hint); + } + + #[test] + fn test_order_hint_wrapping() { + // AV1 order hints are 8-bit, wrapping at 256. + let mut order_hint: u32 = 254; + for _ in 0..4 { + order_hint = (order_hint + 1) & 0xFF; + } + // 254 -> 255 -> 0 -> 1 -> 2 + assert_eq!(order_hint, 2); + } + + #[test] + fn test_reference_tracking() { + // Simulate building up a reference list like the encoder does. + let mut references: Vec = Vec::new(); + let max_refs = 4usize; + + for i in 0..6u8 { + let ref_info = ReferenceInfo { + dpb_slot: i % max_refs as u8, + order_hint: i as u32, + frame_type: 0, + }; + references.insert(0, ref_info); + while references.len() > max_refs { + references.pop(); + } + } + + // Should have exactly max_refs entries. + assert_eq!(references.len(), max_refs); + // Most recent should be first. + assert_eq!(references[0].order_hint, 5); + assert_eq!(references[max_refs - 1].order_hint, 2); + } + + #[test] + fn test_key_frame_clears_references() { + // Simulate the encoder's key frame behavior: references.clear() on IDR. + let mut references: Vec = Vec::new(); + + // Build up some references (like a sequence of P-frames). + for i in 0..3u8 { + references.insert( + 0, + ReferenceInfo { + dpb_slot: i, + order_hint: i as u32, + frame_type: 0, + }, + ); + } + assert_eq!(references.len(), 3); + + // Key frame resets everything. + references.clear(); + assert!(references.is_empty()); + + // First frame after key should start fresh at slot 0. + references.insert( + 0, + ReferenceInfo { + dpb_slot: 0, + order_hint: 0, + frame_type: 0, + }, + ); + assert_eq!(references.len(), 1); + assert_eq!(references[0].dpb_slot, 0); + assert_eq!(references[0].order_hint, 0); + } + + #[test] + fn test_dpb_slot_reuse() { + // Simulate the encoder's DPB slot allocation: after encoding a reference + // frame, find the first slot not used by any active reference. + let max_refs = 2usize; + let dpb_slot_count = 3u8; // active_refs + 1 + let mut references: Vec = Vec::new(); + let mut current_dpb_slot: u8 = 0; + + // Helper: find next free slot (mirrors api.rs logic). + let find_free_slot = |refs: &[ReferenceInfo], slot_count: u8| -> u8 { + let used: Vec = refs.iter().map(|r| r.dpb_slot).collect(); + for i in 0..slot_count { + if !used.contains(&i) { + return i; + } + } + 0 // fallback (shouldn't happen with correct slot_count) + }; + + // Frame 0 (IDR): uses slot 0. + assert_eq!(current_dpb_slot, 0); + references.insert( + 0, + ReferenceInfo { + dpb_slot: 0, + order_hint: 0, + frame_type: 0, + }, + ); + while references.len() > max_refs { + references.pop(); + } + current_dpb_slot = find_free_slot(&references, dpb_slot_count); + assert_eq!(current_dpb_slot, 1); // slot 0 is used, next free is 1 + + // Frame 1 (P): uses slot 1. + references.insert( + 0, + ReferenceInfo { + dpb_slot: 1, + order_hint: 1, + frame_type: 0, + }, + ); + while references.len() > max_refs { + references.pop(); + } + current_dpb_slot = find_free_slot(&references, dpb_slot_count); + assert_eq!(current_dpb_slot, 2); // slots 0,1 used, next free is 2 + + // Frame 2 (P): uses slot 2. Now all 3 slots have been touched, + // but max_refs=2 means the oldest reference (slot 0) gets evicted. + references.insert( + 0, + ReferenceInfo { + dpb_slot: 2, + order_hint: 2, + frame_type: 0, + }, + ); + while references.len() > max_refs { + references.pop(); + } + // references = [{slot:2, hint:2}, {slot:1, hint:1}] - slot 0 evicted + assert_eq!(references.len(), 2); + assert_eq!(references[0].dpb_slot, 2); + assert_eq!(references[1].dpb_slot, 1); + current_dpb_slot = find_free_slot(&references, dpb_slot_count); + assert_eq!(current_dpb_slot, 0); // slot 0 is now free again (reuse!) + + // Frame 3 (P): uses recycled slot 0. + references.insert( + 0, + ReferenceInfo { + dpb_slot: 0, + order_hint: 3, + frame_type: 0, + }, + ); + while references.len() > max_refs { + references.pop(); + } + // references = [{slot:0, hint:3}, {slot:2, hint:2}] - slot 1 evicted + current_dpb_slot = find_free_slot(&references, dpb_slot_count); + assert_eq!(current_dpb_slot, 1); // slot 1 recycled + } + + #[test] + fn test_idr_p_p_idr_cycle() { + // Full GOP cycle: IDR -> P -> P -> IDR, verifying DPB slot allocation + // and reference list state at each step. + let max_refs = 2usize; + let dpb_slot_count = 3u8; + let mut references: Vec = Vec::new(); + let mut current_dpb_slot: u8 = 0; + + let find_free_slot = |refs: &[ReferenceInfo], slot_count: u8| -> u8 { + let used: Vec = refs.iter().map(|r| r.dpb_slot).collect(); + for i in 0..slot_count { + if !used.contains(&i) { + return i; + } + } + 0 + }; + + // IDR frame: clears refs, writes to slot 0. + references.clear(); + references.insert( + 0, + ReferenceInfo { + dpb_slot: current_dpb_slot, + order_hint: 0, + frame_type: 0, + }, + ); + current_dpb_slot = find_free_slot(&references, dpb_slot_count); + + assert_eq!(references.len(), 1); + assert_eq!(references[0].order_hint, 0); + assert_eq!(current_dpb_slot, 1); + + // P frame 1: writes to slot 1. + references.insert( + 0, + ReferenceInfo { + dpb_slot: current_dpb_slot, + order_hint: 1, + frame_type: 0, + }, + ); + while references.len() > max_refs { + references.pop(); + } + current_dpb_slot = find_free_slot(&references, dpb_slot_count); + + assert_eq!(references.len(), 2); + assert_eq!(references[0].order_hint, 1); + assert_eq!(current_dpb_slot, 2); + + // P frame 2: writes to slot 2, evicts oldest ref (slot 0). + references.insert( + 0, + ReferenceInfo { + dpb_slot: current_dpb_slot, + order_hint: 2, + frame_type: 0, + }, + ); + while references.len() > max_refs { + references.pop(); + } + current_dpb_slot = find_free_slot(&references, dpb_slot_count); + + assert_eq!(references.len(), 2); + assert_eq!(references[0].order_hint, 2); + assert_eq!(current_dpb_slot, 0); // slot 0 recycled + + // Second IDR: everything resets. + references.clear(); + assert!(references.is_empty()); + } + + #[test] + fn test_single_reference_slot() { + // Edge case: only 1 active reference with 2 DPB slots (minimum viable). + let max_refs = 1usize; + let dpb_slot_count = 2u8; + let mut references: Vec = Vec::new(); + + let find_free_slot = |refs: &[ReferenceInfo], slot_count: u8| -> u8 { + let used: Vec = refs.iter().map(|r| r.dpb_slot).collect(); + for i in 0..slot_count { + if !used.contains(&i) { + return i; + } + } + 0 + }; + + // Frame 0: slot 0. + references.insert( + 0, + ReferenceInfo { + dpb_slot: 0, + order_hint: 0, + frame_type: 0, + }, + ); + while references.len() > max_refs { + references.pop(); + } + let mut current_dpb_slot = find_free_slot(&references, dpb_slot_count); + assert_eq!(current_dpb_slot, 1); + + // Frame 1: slot 1. Old ref (slot 0) evicted since max_refs=1. + references.insert( + 0, + ReferenceInfo { + dpb_slot: 1, + order_hint: 1, + frame_type: 0, + }, + ); + while references.len() > max_refs { + references.pop(); + } + assert_eq!(references.len(), 1); + assert_eq!(references[0].dpb_slot, 1); + current_dpb_slot = find_free_slot(&references, dpb_slot_count); + assert_eq!(current_dpb_slot, 0); // ping-pong between 0 and 1 + + // Frame 2: slot 0 again. + references.insert( + 0, + ReferenceInfo { + dpb_slot: 0, + order_hint: 2, + frame_type: 0, + }, + ); + while references.len() > max_refs { + references.pop(); + } + current_dpb_slot = find_free_slot(&references, dpb_slot_count); + assert_eq!(current_dpb_slot, 1); // ping-pong back + } +} + +impl Drop for AV1Encoder { + fn drop(&mut self) { + unsafe { + let _ = self.context.device().device_wait_idle(); + self.context + .device() + .destroy_query_pool(self.query_pool, None); + self.context.device().destroy_fence(self.upload_fence, None); + self.context.device().destroy_fence(self.encode_fence, None); + self.context + .device() + .destroy_command_pool(self.command_pool, None); + if self.upload_command_pool != self.command_pool { + self.context + .device() + .destroy_command_pool(self.upload_command_pool, None); + } + self.context + .device() + .destroy_buffer(self.bitstream_buffer, None); + // Unmap the persistently mapped bitstream buffer before freeing memory. + self.context + .device() + .unmap_memory(self.bitstream_buffer_memory); + self.context + .device() + .free_memory(self.bitstream_buffer_memory, None); + self.context + .device() + .destroy_image_view(self.input_image_view, None); + self.context.device().destroy_image(self.input_image, None); + self.context + .device() + .free_memory(self.input_image_memory, None); + + for i in 0..self.dpb_images.len() { + self.context + .device() + .destroy_image_view(self.dpb_image_views[i], None); + self.context + .device() + .destroy_image(self.dpb_images[i], None); + self.context + .device() + .free_memory(self.dpb_image_memories[i], None); + } + + self.video_queue_fn + .destroy_video_session_parameters(self.session_params, None); + self.video_queue_fn + .destroy_video_session(self.session, None); + for mem in &self.session_memory { + self.context.device().free_memory(*mem, None); + } + } + } +} diff --git a/src/encoder/h264/encode.rs b/src/encoder/h264/encode.rs index 3cc19a4..5f3f3d3 100644 --- a/src/encoder/h264/encode.rs +++ b/src/encoder/h264/encode.rs @@ -1,7 +1,10 @@ use super::H264Encoder; use crate::encoder::gop::{GopFrameType, GopPosition}; -use crate::encoder::resources::{record_dpb_barriers, MIN_BITSTREAM_BUFFER_SIZE}; +use crate::encoder::resources::{ + prepare_encode_command_buffer, record_dpb_barriers, submit_encode_and_read_bitstream, + MIN_BITSTREAM_BUFFER_SIZE, +}; use crate::error::{PixelForgeError, Result}; use ash::vk; use tracing::debug; @@ -48,34 +51,13 @@ impl H264Encoder { ), }; - // Reset command buffer before recording. + // Prepare command buffer for recording. unsafe { - self.context.device().reset_command_buffer( - self.encode_command_buffer, - vk::CommandBufferResetFlags::empty(), - ) - } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - // Begin command buffer. - let begin_info = vk::CommandBufferBeginInfo::default() - .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT); - - unsafe { - self.context - .device() - .begin_command_buffer(self.encode_command_buffer, &begin_info) - } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - // Reset query pool. - unsafe { - self.context.device().cmd_reset_query_pool( + prepare_encode_command_buffer( + self.context.device(), self.encode_command_buffer, self.query_pool, - 0, - 1, - ); + )?; } // Transition DPB images for encode. @@ -633,77 +615,25 @@ impl H264Encoder { } .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - // Submit - let submit_info = vk::SubmitInfo::default() - .command_buffers(std::slice::from_ref(&self.encode_command_buffer)); - + // Submit, wait, and read bitstream. let encode_queue = self.context.video_encode_queue().ok_or_else(|| { PixelForgeError::NoSuitableDevice("No video encode queue available".to_string()) })?; - unsafe { - self.context - .device() - .queue_submit(encode_queue, &[submit_info], self.encode_fence) - } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - // Wait for encode to complete. - unsafe { - self.context - .device() - .wait_for_fences(&[self.encode_fence], true, u64::MAX) - } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - unsafe { self.context.device().reset_fences(&[self.encode_fence]) } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - // Read back query results to get actual encoded size. - #[repr(C)] - struct QueryResult { - offset: u32, - bytes_written: u32, - } - - let mut query_results = [QueryResult { - offset: 0, - bytes_written: 0, - }]; - - unsafe { - self.context.device().get_query_pool_results( + let encoded_data = unsafe { + submit_encode_and_read_bitstream( + self.context.device(), + self.encode_command_buffer, + self.encode_fence, + encode_queue, self.query_pool, - 0, // first_query - &mut query_results, - vk::QueryResultFlags::WAIT, - ) - } - .map_err(|e| PixelForgeError::QueryPool(e.to_string()))?; - - let query_result = &query_results[0]; - - debug!( - "Encode complete: offset={}, bytes_written={}", - query_result.offset, query_result.bytes_written - ); + self.bitstream_buffer_ptr, + )? + }; // Mark DPB slot as active. self.dpb_slot_active[self.current_dpb_slot as usize] = true; - // Read back the bitstream data using the persistently mapped buffer pointer. - // This avoids per-frame map/unmap overhead (the buffer is mapped once at init) - // Note: The Vulkan encoder output already includes NAL start codes (Annex B format) - let mut encoded_data = Vec::with_capacity(query_result.bytes_written as usize); - - unsafe { - let src = std::slice::from_raw_parts( - self.bitstream_buffer_ptr.add(query_result.offset as usize), - query_result.bytes_written as usize, - ); - encoded_data.extend_from_slice(src); - } - Ok(encoded_data) } } diff --git a/src/encoder/h265/encode.rs b/src/encoder/h265/encode.rs index ca442f6..b7a8ce0 100644 --- a/src/encoder/h265/encode.rs +++ b/src/encoder/h265/encode.rs @@ -5,7 +5,10 @@ use super::H265Encoder; use crate::encoder::gop::{GopFrameType, GopPosition}; -use crate::encoder::resources::{record_dpb_barriers, MIN_BITSTREAM_BUFFER_SIZE}; +use crate::encoder::resources::{ + prepare_encode_command_buffer, record_dpb_barriers, record_post_encode_dpb_barrier, + submit_encode_and_read_bitstream, MIN_BITSTREAM_BUFFER_SIZE, +}; use crate::error::{PixelForgeError, Result}; use ash::vk; use tracing::debug; @@ -24,34 +27,13 @@ impl H265Encoder { pic_order_cnt: i32, is_idr: bool, ) -> Result> { - // Reset command buffer. + // Prepare command buffer for recording. unsafe { - self.context.device().reset_command_buffer( - self.encode_command_buffer, - vk::CommandBufferResetFlags::empty(), - ) - } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - // Begin command buffer. - let begin_info = vk::CommandBufferBeginInfo::default() - .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT); - - unsafe { - self.context - .device() - .begin_command_buffer(self.encode_command_buffer, &begin_info) - } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - // Reset query pool before beginning queries (required by validation layers) - unsafe { - self.context.device().cmd_reset_query_pool( + prepare_encode_command_buffer( + self.context.device(), self.encode_command_buffer, self.query_pool, - 0, - 1, - ); + )?; } // Transition DPB images for encode. @@ -649,45 +631,14 @@ impl H265Encoder { } // Add DPB synchronization barrier after encoding. - { - let post_dpb_image = if self.use_layered_dpb { - self.dpb_images[0] - } else { - self.dpb_images[self.current_dpb_slot as usize] - }; - let post_dpb_layer = if self.use_layered_dpb { - self.current_dpb_slot as u32 - } else { - 0 - }; - - let dpb_sync_barrier = vk::ImageMemoryBarrier::default() - .old_layout(vk::ImageLayout::VIDEO_ENCODE_DPB_KHR) - .new_layout(vk::ImageLayout::VIDEO_ENCODE_DPB_KHR) - .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .image(post_dpb_image) - .subresource_range(vk::ImageSubresourceRange { - aspect_mask: vk::ImageAspectFlags::COLOR, - base_mip_level: 0, - level_count: 1, - base_array_layer: post_dpb_layer, - layer_count: 1, - }) - .src_access_mask(vk::AccessFlags::MEMORY_WRITE) - .dst_access_mask(vk::AccessFlags::MEMORY_READ); - - unsafe { - self.context.device().cmd_pipeline_barrier( - self.encode_command_buffer, - vk::PipelineStageFlags::ALL_COMMANDS, - vk::PipelineStageFlags::ALL_COMMANDS, - vk::DependencyFlags::empty(), - &[], - &[], - &[dpb_sync_barrier], - ); - } + unsafe { + record_post_encode_dpb_barrier( + self.context.device(), + self.encode_command_buffer, + &self.dpb_images, + self.use_layered_dpb, + self.current_dpb_slot, + ); } // End video coding. @@ -707,10 +658,7 @@ impl H265Encoder { } .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - // Submit encode command. - let submit_info = vk::SubmitInfo::default() - .command_buffers(std::slice::from_ref(&self.encode_command_buffer)); - + // Submit, wait, and read bitstream. let encode_queue = self.context.video_encode_queue().ok_or_else(|| { PixelForgeError::NoSuitableDevice("No video encode queue available".to_string()) })?; @@ -725,68 +673,22 @@ impl H265Encoder { let gpu_start = std::time::Instant::now(); - unsafe { - self.context - .device() - .queue_submit(encode_queue, &[submit_info], self.encode_fence) - } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - // Wait for completion. - unsafe { - self.context - .device() - .wait_for_fences(&[self.encode_fence], true, u64::MAX) - } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - let gpu_elapsed = gpu_start.elapsed(); - debug!("GPU encode took {:?}", gpu_elapsed); - - unsafe { self.context.device().reset_fences(&[self.encode_fence]) } - .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - - // Get query results. - #[repr(C)] - struct VideoEncodeFeedbackResult { - offset: u32, - bytes_written: u32, - } - let mut query_results = [VideoEncodeFeedbackResult { - offset: 0, - bytes_written: 0, - }]; - unsafe { - self.context.device().get_query_pool_results( + let encoded_data = unsafe { + submit_encode_and_read_bitstream( + self.context.device(), + self.encode_command_buffer, + self.encode_fence, + encode_queue, self.query_pool, - 0, - &mut query_results, - vk::QueryResultFlags::WAIT, - ) - } - .map_err(|e| PixelForgeError::QueryPool(e.to_string()))?; - - let offset = query_results[0].offset as usize; - let size = query_results[0].bytes_written as usize; - - if size == 0 { - return Err(PixelForgeError::QueryPool( - "Encoder produced 0 bytes".to_string(), - )); - } + self.bitstream_buffer_ptr, + )? + }; - debug!("Encoded frame: offset={}, size={}", offset, size); + debug!("GPU encode took {:?}", gpu_start.elapsed()); // Mark DPB slot as active. self.dpb_slot_active[self.current_dpb_slot as usize] = true; - // Copy data from bitstream buffer. - let mut encoded_data = vec![0u8; size]; - unsafe { - let src = std::slice::from_raw_parts(self.bitstream_buffer_ptr.add(offset), size); - encoded_data.copy_from_slice(src); - } - Ok(encoded_data) } } diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs index 45e22fb..06e211d 100644 --- a/src/encoder/mod.rs +++ b/src/encoder/mod.rs @@ -5,6 +5,7 @@ //! - GOP structure management (`gop` module) - reusable for H.264/H.265. //! - Frame reordering for B-frame support (`reorder` module) - reusable for H.264/H.265. +pub mod av1; pub mod bitwriter; pub mod dpb; pub mod gop; @@ -38,7 +39,7 @@ pub const DEFAULT_H265_QP: u32 = 28; /// Default maximum number of reference frames. pub const DEFAULT_MAX_REFERENCE_FRAMES: u32 = 4; -use crate::error::{PixelForgeError, Result}; +use crate::error::Result; use crate::vulkan::VideoContext; /// Video codec types. @@ -233,6 +234,30 @@ impl EncodeConfig { } } + /// Create a new AV1 encode configuration with default settings. + pub fn av1(width: u32, height: u32) -> Self { + assert!(width > 0, "width must be non-zero"); + assert!(height > 0, "height must be non-zero"); + + Self { + codec: Codec::AV1, + dimensions: Dimensions { width, height }, + pixel_format: PixelFormat::Yuv420, + bit_depth: BitDepth::Eight, + rate_control_mode: RateControlMode::Disabled, + target_bitrate: DEFAULT_TARGET_BITRATE, + max_bitrate: DEFAULT_MAX_BITRATE, + quality_level: 128, // AV1 uses 0-255 QP range + frame_rate_numerator: DEFAULT_FRAME_RATE, + frame_rate_denominator: 1, + gop_size: DEFAULT_GOP_SIZE, + b_frame_count: 0, // Start without B-frames for simplicity. + max_reference_frames: DEFAULT_MAX_REFERENCE_FRAMES, + virtual_buffer_size_ms: 1000, + initial_virtual_buffer_size_ms: 1000, + } + } + /// Set the rate control mode. pub fn with_rate_control(mut self, mode: RateControlMode) -> Self { self.rate_control_mode = mode; @@ -336,6 +361,8 @@ pub enum Encoder { H264(self::h264::H264Encoder), /// H.265/HEVC encoder. H265(self::h265::H265Encoder), + /// AV1 encoder. + AV1(self::av1::AV1Encoder), } impl Encoder { @@ -347,6 +374,7 @@ impl Encoder { match self { Encoder::H264(encoder) => encoder.input_image(), Encoder::H265(encoder) => encoder.input_image(), + Encoder::AV1(encoder) => encoder.input_image(), } } @@ -359,9 +387,7 @@ impl Encoder { Codec::H265 => Ok(Encoder::H265(self::h265::H265Encoder::new( context, config, )?)), - Codec::AV1 => Err(PixelForgeError::CodecNotSupported( - "AV1 encoding not yet implemented".to_string(), - )), + Codec::AV1 => Ok(Encoder::AV1(self::av1::AV1Encoder::new(context, config)?)), } } @@ -399,6 +425,7 @@ impl Encoder { match self { Encoder::H264(encoder) => encoder.encode(src_image), Encoder::H265(encoder) => encoder.encode(src_image), + Encoder::AV1(encoder) => encoder.encode(src_image), } } @@ -407,6 +434,7 @@ impl Encoder { match self { Encoder::H264(encoder) => encoder.flush(), Encoder::H265(encoder) => encoder.flush(), + Encoder::AV1(encoder) => encoder.flush(), } } @@ -415,6 +443,7 @@ impl Encoder { match self { Encoder::H264(encoder) => encoder.request_idr(), Encoder::H265(encoder) => encoder.request_idr(), + Encoder::AV1(encoder) => encoder.request_idr(), } } } @@ -616,6 +645,44 @@ mod tests { assert_eq!(config.max_bitrate, 12_000_000); } + #[test] + fn test_av1_defaults() { + let config = EncodeConfig::av1(2560, 1440); + + assert_eq!(config.codec, Codec::AV1); + assert_eq!(config.dimensions.width, 2560); + assert_eq!(config.dimensions.height, 1440); + assert_eq!(config.pixel_format, PixelFormat::Yuv420); + assert_eq!(config.bit_depth, BitDepth::Eight); + assert_eq!(config.rate_control_mode, RateControlMode::Disabled); + assert_eq!(config.quality_level, 128); // AV1 uses 0-255 QP range + assert_eq!(config.gop_size, 30); + assert_eq!(config.b_frame_count, 0); + assert_eq!(config.frame_rate_numerator, 30); + assert_eq!(config.frame_rate_denominator, 1); + } + + #[test] + fn test_av1_builder_chaining() { + let config = EncodeConfig::av1(1920, 1080) + .with_rate_control(RateControlMode::Vbr) + .with_target_bitrate(8_000_000) + .with_max_bitrate(12_000_000) + .with_gop_size(60) + .with_frame_rate(60, 1) + .with_quality_level(100) + .with_max_reference_frames(2); + + assert_eq!(config.codec, Codec::AV1); + assert_eq!(config.rate_control_mode, RateControlMode::Vbr); + assert_eq!(config.target_bitrate, 8_000_000); + assert_eq!(config.max_bitrate, 12_000_000); + assert_eq!(config.gop_size, 60); + assert_eq!(config.frame_rate_numerator, 60); + assert_eq!(config.quality_level, 100); + assert_eq!(config.max_reference_frames, 2); + } + #[test] fn test_builder_chaining() { let config = EncodeConfig::h264(1920, 1080) diff --git a/src/encoder/resources.rs b/src/encoder/resources.rs index ab71254..d97f0f6 100644 --- a/src/encoder/resources.rs +++ b/src/encoder/resources.rs @@ -1234,15 +1234,15 @@ pub(crate) unsafe fn record_dpb_barriers( base_array_layer: ref_layer, layer_count: 1, }) - .src_access_mask(vk::AccessFlags::empty()) - .dst_access_mask(vk::AccessFlags::empty()), + .src_access_mask(vk::AccessFlags::MEMORY_WRITE) + .dst_access_mask(vk::AccessFlags::MEMORY_READ), ); } device.cmd_pipeline_barrier( command_buffer, - vk::PipelineStageFlags::TOP_OF_PIPE, - vk::PipelineStageFlags::BOTTOM_OF_PIPE, + vk::PipelineStageFlags::ALL_COMMANDS, + vk::PipelineStageFlags::ALL_COMMANDS, vk::DependencyFlags::empty(), &[], &[], @@ -1250,6 +1250,153 @@ pub(crate) unsafe fn record_dpb_barriers( ); } +/// Prepare an encode command buffer for recording. +/// +/// Resets the command buffer, begins recording with ONE_TIME_SUBMIT, and resets +/// the query pool. This is the common preamble for all encode operations. +/// +/// # Safety +/// +/// The command buffer must not be in use by the GPU. +pub(crate) unsafe fn prepare_encode_command_buffer( + device: &ash::Device, + command_buffer: vk::CommandBuffer, + query_pool: vk::QueryPool, +) -> Result<()> { + device + .reset_command_buffer(command_buffer, vk::CommandBufferResetFlags::empty()) + .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; + + let begin_info = + vk::CommandBufferBeginInfo::default().flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT); + device + .begin_command_buffer(command_buffer, &begin_info) + .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; + + device.cmd_reset_query_pool(command_buffer, query_pool, 0, 1); + + Ok(()) +} + +/// Record a post-encode DPB synchronization barrier. +/// +/// Ensures the DPB image write from the encode operation is visible to subsequent +/// reads (e.g. as a reference frame for the next encode). +/// +/// # Safety +/// +/// The command buffer must be in recording state. +pub(crate) unsafe fn record_post_encode_dpb_barrier( + device: &ash::Device, + command_buffer: vk::CommandBuffer, + dpb_images: &[vk::Image], + use_layered_dpb: bool, + current_dpb_slot: u8, +) { + let (post_dpb_image, post_dpb_layer) = if use_layered_dpb { + (dpb_images[0], current_dpb_slot as u32) + } else { + (dpb_images[current_dpb_slot as usize], 0) + }; + + let dpb_sync_barrier = vk::ImageMemoryBarrier::default() + .old_layout(vk::ImageLayout::VIDEO_ENCODE_DPB_KHR) + .new_layout(vk::ImageLayout::VIDEO_ENCODE_DPB_KHR) + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(post_dpb_image) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: post_dpb_layer, + layer_count: 1, + }) + .src_access_mask(vk::AccessFlags::MEMORY_WRITE) + .dst_access_mask(vk::AccessFlags::MEMORY_READ); + + device.cmd_pipeline_barrier( + command_buffer, + vk::PipelineStageFlags::ALL_COMMANDS, + vk::PipelineStageFlags::ALL_COMMANDS, + vk::DependencyFlags::empty(), + &[], + &[], + &[dpb_sync_barrier], + ); +} + +/// Submit an encode command buffer and wait for completion. +/// +/// Submits the command buffer to the encode queue, waits for the fence, resets it, +/// then reads query results and copies the encoded bitstream data. +/// +/// # Safety +/// +/// The command buffer must have been ended. The fence must be in the unsignaled state. +/// The bitstream buffer pointer must be valid and the buffer must be persistently mapped. +pub(crate) unsafe fn submit_encode_and_read_bitstream( + device: &ash::Device, + command_buffer: vk::CommandBuffer, + fence: vk::Fence, + encode_queue: vk::Queue, + query_pool: vk::QueryPool, + bitstream_buffer_ptr: *const u8, +) -> Result> { + let submit_info = + vk::SubmitInfo::default().command_buffers(std::slice::from_ref(&command_buffer)); + + device + .queue_submit(encode_queue, &[submit_info], fence) + .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; + + device + .wait_for_fences(&[fence], true, u64::MAX) + .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; + + device + .reset_fences(&[fence]) + .map_err(|e| PixelForgeError::Synchronization(e.to_string()))?; + + // Read query results (offset + bytes_written). + #[repr(C)] + struct QueryResult { + offset: u32, + bytes_written: u32, + } + + let mut query_results = [QueryResult { + offset: 0, + bytes_written: 0, + }]; + + device + .get_query_pool_results( + query_pool, + 0, + &mut query_results, + vk::QueryResultFlags::WAIT, + ) + .map_err(|e| PixelForgeError::QueryPool(e.to_string()))?; + + let offset = query_results[0].offset as usize; + let size = query_results[0].bytes_written as usize; + + if size == 0 { + return Err(PixelForgeError::QueryPool( + "Encoder produced 0 bytes".to_string(), + )); + } + + tracing::debug!("Encoded frame: offset={}, size={}", offset, size); + + let mut encoded_data = vec![0u8; size]; + let src = std::slice::from_raw_parts(bitstream_buffer_ptr.add(offset), size); + encoded_data.copy_from_slice(src); + + Ok(encoded_data) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/image.rs b/src/image.rs index 6e08d5d..13cc03c 100644 --- a/src/image.rs +++ b/src/image.rs @@ -1,21 +1,38 @@ //! Image utilities for creating and uploading video frames. //! -//! This module provides utilities for creating Vulkan images suitable for video encoding. -//! and uploading YUV data to them. +//! This module provides [`InputImage`], a helper for uploading YUV data from the CPU +//! to the GPU. The image it creates is a transfer-only staging image (not a Vulkan Video +//! image) and must be copied into an encoder's input image before encoding. +//! Use [`InputImage::upload_yuv420`] to upload to the internal image, then pass +//! `input_image.image()` to [`Encoder::encode`](crate::encoder::Encoder::encode). +//! Alternatively, use [`InputImage::upload_yuv420_to`] / [`InputImage::upload_yuv444_to`] +//! to upload directly into the encoder's input image (obtained via +//! [`Encoder::input_image`](crate::encoder::Encoder::input_image)). use crate::encoder::{BitDepth, Codec, PixelFormat}; use crate::error::{PixelForgeError, Result}; use crate::vulkan::VideoContext; use ash::vk; -/// An image on the GPU ready for video encoding. +/// A GPU image for staging YUV frame data before encoding. /// -/// This struct owns a Vulkan image with NV12/P010 format (YUV420) or -/// 2-plane semi-planar YUV444 format and provides methods to upload YUV data to it. -/// The image can be passed directly to the encoder. +/// This struct owns a transfer-only Vulkan image (no video profile, no +/// `VIDEO_ENCODE_SRC_KHR` usage) with NV12/P010 format (YUV420) or 2-plane +/// semi-planar YUV444 format. It provides methods to upload YUV data from the +/// CPU, which can then be copied into an encoder's input image for encoding. +/// +/// The image is **not** directly usable as a Vulkan Video encode source. To +/// encode, either: +/// - Upload to this image and pass `self.image()` to [`Encoder::encode`](crate::encoder::Encoder::encode), which +/// will copy it into the encoder's internal input image, or +/// - Use [`upload_yuv420_to`](Self::upload_yuv420_to) / +/// [`upload_yuv444_to`](Self::upload_yuv444_to) to upload directly into the +/// encoder's input image. pub struct InputImage { context: VideoContext, image: vk::Image, + /// Current layout of `self.image`. + image_layout: vk::ImageLayout, memory: vk::DeviceMemory, staging_buffer: vk::Buffer, staging_memory: vk::DeviceMemory, @@ -32,23 +49,22 @@ pub struct InputImage { } impl InputImage { - /// Create a new input image for video encoding. + /// Create a new staging image for uploading YUV frame data. /// - /// Creates an image suitable for use as input to the video encoder. - /// For YUV420: NV12 (8-bit) or P010 (10-bit) format. - /// For YUV444: 2-plane semi-planar format (8-bit or 10-bit). - /// The image is allocated in device-local memory for optimal performance. + /// Creates a transfer-only image suitable for staging YUV data before + /// copying it into an encoder's input image. The image has no video + /// profile and uses `TRANSFER_DST | TRANSFER_SRC` usage flags. /// /// # Arguments /// * `context` - The Vulkan video context - /// * `codec` - The video codec (H.264/H.265) + /// * `codec` - Unused. Kept for API compatibility. /// * `width` - Image width in pixels /// * `height` - Image height in pixels /// * `bit_depth` - Bit depth for the image (8-bit or 10-bit) /// * `pixel_format` - Pixel format (YUV420 or YUV444) pub fn new( context: VideoContext, - codec: Codec, + _codec: Codec, width: u32, height: u32, bit_depth: BitDepth, @@ -56,46 +72,6 @@ impl InputImage { ) -> Result { let device = context.device(); - // Determine H.264 profile IDC based on pixel format. - let h264_profile_idc = if pixel_format == PixelFormat::Yuv444 { - ash::vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_HIGH_444_PREDICTIVE - } else { - ash::vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_HIGH - }; - - // Create video profile info for the image creation. - let mut h264_profile = - vk::VideoEncodeH264ProfileInfoKHR::default().std_profile_idc(h264_profile_idc); - let mut h265_profile = vk::VideoEncodeH265ProfileInfoKHR::default().std_profile_idc( - ash::vk::native::StdVideoH265ProfileIdc_STD_VIDEO_H265_PROFILE_IDC_MAIN, - ); - - let mut profile_info = vk::VideoProfileInfoKHR::default() - .chroma_subsampling(pixel_format.into()) - .luma_bit_depth(bit_depth.into()) - .chroma_bit_depth(bit_depth.into()); - - match codec { - Codec::H264 => { - profile_info = profile_info - .video_codec_operation(vk::VideoCodecOperationFlagsKHR::ENCODE_H264); - profile_info.p_next = &mut h264_profile as *mut _ as *mut std::ffi::c_void; - } - Codec::H265 => { - profile_info = profile_info - .video_codec_operation(vk::VideoCodecOperationFlagsKHR::ENCODE_H265); - profile_info.p_next = &mut h265_profile as *mut _ as *mut std::ffi::c_void; - } - _ => { - return Err(PixelForgeError::InvalidInput( - "Unsupported codec".to_string(), - )) - } - } - - let mut profile_list = - vk::VideoProfileListInfoKHR::default().profiles(std::slice::from_ref(&profile_info)); - // Select format based on pixel format and bit depth. // Use 2-plane semi-planar formats for both YUV420 and YUV444. let format = match (pixel_format, bit_depth) { @@ -115,22 +91,10 @@ impl InputImage { } }; - // Determine sharing mode and queue families. - let mut queue_families = vec![context.transfer_queue_family()]; - if let Some(encode_family) = context.video_encode_queue_family() { - if encode_family != context.transfer_queue_family() { - queue_families.push(encode_family); - } - } - - let sharing_mode = if queue_families.len() > 1 { - vk::SharingMode::CONCURRENT - } else { - vk::SharingMode::EXCLUSIVE - }; - // Create the image. - let mut image_create_info = vk::ImageCreateInfo::default() + // This image is used purely for staging (buffer→image copy on the transfer queue), + // so it only needs TRANSFER_DST | TRANSFER_SRC and no video profile pNext. + let image_create_info = vk::ImageCreateInfo::default() .image_type(vk::ImageType::TYPE_2D) .format(format) .extent(vk::Extent3D { @@ -142,18 +106,10 @@ impl InputImage { .array_layers(1) .samples(vk::SampleCountFlags::TYPE_1) .tiling(vk::ImageTiling::OPTIMAL) - .usage( - vk::ImageUsageFlags::TRANSFER_DST - | vk::ImageUsageFlags::TRANSFER_SRC - | vk::ImageUsageFlags::SAMPLED - | vk::ImageUsageFlags::VIDEO_ENCODE_SRC_KHR, - ) - .sharing_mode(sharing_mode) - .queue_family_indices(&queue_families) + .usage(vk::ImageUsageFlags::TRANSFER_DST | vk::ImageUsageFlags::TRANSFER_SRC) + .sharing_mode(vk::SharingMode::EXCLUSIVE) .initial_layout(vk::ImageLayout::UNDEFINED); - image_create_info.p_next = &mut profile_list as *mut _ as *mut std::ffi::c_void; - let image = unsafe { device.create_image(&image_create_info, None) } .map_err(|e| PixelForgeError::ResourceCreation(format!("image creation: {}", e)))?; @@ -243,6 +199,7 @@ impl InputImage { Ok(Self { context, image, + image_layout: vk::ImageLayout::UNDEFINED, memory, staging_buffer, staging_memory, @@ -468,6 +425,87 @@ impl InputImage { Ok(()) } + /// Upload YUV420 (I420) data to an external image. + /// + /// Same as `upload_yuv420()` but copies the data to the specified target image + /// instead of this InputImage's own image. Useful for uploading directly to an + /// encoder's input image to avoid cross-queue copy issues. + pub fn upload_yuv420_to(&mut self, target_image: vk::Image, yuv_data: &[u8]) -> Result<()> { + if self.pixel_format != PixelFormat::Yuv420 { + return Err(PixelForgeError::InvalidInput( + "upload_yuv420_to can only be used with YUV420 InputImage".to_string(), + )); + } + + let expected_size = (self.width * self.height * 3 / 2) as usize; + if yuv_data.len() < expected_size { + return Err(PixelForgeError::InvalidInput(format!( + "YUV data too small: expected {} bytes, got {}", + expected_size, + yuv_data.len() + ))); + } + + let device = self.context.device(); + let width = self.width as usize; + let height = self.height as usize; + let y_size = width * height; + + // Map staging buffer and convert I420 to NV12/P010. + let data_ptr = unsafe { + device.map_memory( + self.staging_memory, + 0, + self.staging_size as vk::DeviceSize, + vk::MemoryMapFlags::empty(), + ) + } + .map_err(|e| PixelForgeError::MemoryAllocation(e.to_string()))?; + + unsafe { + match self.bit_depth { + BitDepth::Eight => { + let dst = + std::slice::from_raw_parts_mut(data_ptr as *mut u8, self.staging_size); + dst[..y_size].copy_from_slice(&yuv_data[..y_size]); + let u_plane = &yuv_data[y_size..y_size + y_size / 4]; + let v_plane = &yuv_data[y_size + y_size / 4..]; + for i in 0..y_size / 4 { + dst[y_size + i * 2] = u_plane[i]; + dst[y_size + i * 2 + 1] = v_plane[i]; + } + } + BitDepth::Ten => { + let dst = + std::slice::from_raw_parts_mut(data_ptr as *mut u16, self.staging_size / 2); + for i in 0..y_size { + let val = yuv_data[i] as u16; + dst[i] = val << 8; + } + let u_plane = &yuv_data[y_size..y_size + y_size / 4]; + let v_plane = &yuv_data[y_size + y_size / 4..]; + for i in 0..y_size / 4 { + let u_val = u_plane[i] as u16; + let v_val = v_plane[i] as u16; + dst[y_size + i * 2] = u_val << 8; + dst[y_size + i * 2 + 1] = v_val << 8; + } + } + } + + device.unmap_memory(self.staging_memory); + } + + // Record and submit copy commands to the target image. + self.copy_staging_to_image_internal( + target_image, + vk::ImageLayout::UNDEFINED, + vk::ImageLayout::VIDEO_ENCODE_SRC_KHR, + )?; + + Ok(()) + } + /// Upload YUV444 (planar) data to an external image. pub fn upload_yuv444_to(&mut self, target_image: vk::Image, yuv_data: &[u8]) -> Result<()> { if self.pixel_format != PixelFormat::Yuv444 { @@ -540,16 +578,28 @@ impl InputImage { } // Record and submit copy commands. - self.copy_staging_to_image_internal(target_image)?; + self.copy_staging_to_image_internal( + target_image, + vk::ImageLayout::UNDEFINED, + vk::ImageLayout::VIDEO_ENCODE_SRC_KHR, + )?; Ok(()) } fn copy_staging_to_image(&mut self) -> Result<()> { - self.copy_staging_to_image_internal(self.image) + let old_layout = self.image_layout; + self.copy_staging_to_image_internal(self.image, old_layout, vk::ImageLayout::GENERAL)?; + self.image_layout = vk::ImageLayout::GENERAL; + Ok(()) } - fn copy_staging_to_image_internal(&mut self, target_image: vk::Image) -> Result<()> { + fn copy_staging_to_image_internal( + &mut self, + target_image: vk::Image, + old_layout: vk::ImageLayout, + final_layout: vk::ImageLayout, + ) -> Result<()> { let device = self.context.device(); let width = self.width; let height = self.height; @@ -575,7 +625,7 @@ impl InputImage { // Transition image to transfer destination. let barrier = vk::ImageMemoryBarrier::default() - .old_layout(vk::ImageLayout::UNDEFINED) + .old_layout(old_layout) .new_layout(vk::ImageLayout::TRANSFER_DST_OPTIMAL) .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) @@ -700,10 +750,10 @@ impl InputImage { ); } - // Transition image to GENERAL (ready for encoding) + // Transition image to final layout (ready for its intended use) let barrier = vk::ImageMemoryBarrier::default() .old_layout(vk::ImageLayout::TRANSFER_DST_OPTIMAL) - .new_layout(vk::ImageLayout::GENERAL) + .new_layout(final_layout) .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) .image(target_image) diff --git a/src/lib.rs b/src/lib.rs index ffe3868..5fc76bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ //! # PixelForge //! -//! A Vulkan-based video encoding library for Rust, supporting H.264 and H.265 codecs. +//! A Vulkan-based video encoding library for Rust, supporting H.264, H.265, and AV1 codecs. //! //! > ⚠️ **Disclaimer**: This library was developed using AI ("vibe-coding") - partly to //! > see if it could be done, partly because I have practically zero experience with Vulkan. @@ -10,7 +10,7 @@ //! ## Features //! //! - **Hardware-accelerated** video encoding using Vulkan Video extensions. -//! - **Multiple codec support**: H.264/AVC, H.265/HEVC. +//! - **Multiple codec support**: H.264/AVC, H.265/HEVC, AV1. //! - **GPU-native API**: Encode directly from Vulkan images (`vk::Image`). //! - **Flexible configuration**: Rate control (CBR, VBR, CQP), quality levels, GOP settings. //! - **Utility helpers**: [`InputImage`] for easy YUV data upload to GPU. @@ -24,6 +24,12 @@ //! |-------|--------| //! | H.264/AVC | ✓ | //! | H.265/HEVC | ✓ | +//! | AV1 | ✓ (experimental) | +//! +//! > ⚠️ **AV1 Warning**: AV1 encoding is experimental. On NVIDIA GPUs, P-frames cannot +//! > reference other P-frames, causing all P-frames to reference the I-frame instead. This +//! > leads to progressively larger frame sizes over time. Consider using H.264 or HEVC +//! > until this is resolved. //! //! ## Requirements //! @@ -63,7 +69,7 @@ //! .app_name("My App") //! .build()?; //! -//! for codec in [Codec::H264, Codec::H265] { +//! for codec in [Codec::H264, Codec::H265, Codec::AV1] { //! println!("{:?}: encode={}", //! codec, //! context.supports_encode(codec) @@ -126,13 +132,15 @@ //! //! # H.265 encoding example //! cargo run --example encode_h265 +//! +//! # AV1 encoding example +//! cargo run --example encode_av1 //! ``` //! //! ## TODO's //! //! 1. [] Decoding. //! 1. [] B-frames support. -//! 1. [] AV1 support (depends on a new version of ash with more up-to-date Vulkan support). //! //! ## Contributing //! diff --git a/src/vulkan.rs b/src/vulkan.rs index 34a76a7..1e195b4 100644 --- a/src/vulkan.rs +++ b/src/vulkan.rs @@ -285,14 +285,47 @@ impl VideoContext { // Check codec support for encoding. let mut encode_codecs = Vec::new(); if let Some(eq) = encode_queue { - if Self::check_h264_encode_support(&entry, &instance, physical_device, eq) { + // Get list of available device extensions + let available_extensions = match unsafe { + instance.enumerate_device_extension_properties(physical_device) + } { + Ok(exts) => exts, + Err(e) => { + warn!( + "Failed to enumerate device extension properties for {}: {}. Skipping device.", + device_name, e + ); + continue; + } + }; + + let has_extension = |name: &std::ffi::CStr| -> bool { + available_extensions.iter().any(|ext| { + let ext_name = + unsafe { std::ffi::CStr::from_ptr(ext.extension_name.as_ptr()) }; + ext_name == name + }) + }; + + // Only check codec support if the extension exists + if has_extension(ash::khr::video_encode_h264::NAME) + && Self::check_h264_encode_support(&entry, &instance, physical_device, eq) + { encode_codecs.push(Codec::H264); debug!("Device {} supports H.264 encode", device_name); } - if Self::check_h265_encode_support(&entry, &instance, physical_device, eq) { + if has_extension(ash::khr::video_encode_h265::NAME) + && Self::check_h265_encode_support(&entry, &instance, physical_device, eq) + { encode_codecs.push(Codec::H265); debug!("Device {} supports H.265 encode", device_name); } + if has_extension(ash::khr::video_encode_av1::NAME) + && Self::check_av1_encode_support(&entry, &instance, physical_device, eq) + { + encode_codecs.push(Codec::AV1); + debug!("Device {} supports AV1 encode", device_name); + } } // Check if all required encode codecs are supported. @@ -384,6 +417,9 @@ impl VideoContext { if supported_encode_codecs.contains(&Codec::H265) { push_ext(ash::khr::video_encode_h265::NAME.as_ptr()); } + if supported_encode_codecs.contains(&Codec::AV1) { + push_ext(ash::khr::video_encode_av1::NAME.as_ptr()); + } } // Enable synchronization2 feature. @@ -402,13 +438,32 @@ impl VideoContext { // Add the 2-plane 444 formats extension. push_ext(ash::ext::ycbcr_2plane_444_formats::NAME.as_ptr()); - // Manually chain p_next pointers: sync2_features -> ycbcr_features -> ycbcr_2plane_444_features. + // Enable AV1 video encode feature only if AV1 is supported. + // Only include AV1 features in the pNext chain when AV1 is actually supported, + // to avoid chaining unknown feature structs on devices without AV1. + let mut av1_encode_features = + vk::PhysicalDeviceVideoEncodeAV1FeaturesKHR::default().video_encode_av1(true); + + if supported_encode_codecs.contains(&Codec::AV1) { + ycbcr_2plane_444_features.p_next = (&mut av1_encode_features + as *mut vk::PhysicalDeviceVideoEncodeAV1FeaturesKHR) + .cast(); + } + + // Chain: sync2_features -> ycbcr_features -> ycbcr_2plane_444_features (-> av1 if supported) ycbcr_features.p_next = (&mut ycbcr_2plane_444_features as *mut vk::PhysicalDeviceYcbcr2Plane444FormatsFeaturesEXT) .cast(); sync2_features.p_next = (&mut ycbcr_features as *mut vk::PhysicalDeviceSamplerYcbcrConversionFeatures).cast(); + // Log all extensions being enabled + debug!("Enabling {} device extensions:", extension_names.len()); + for ext_name_ptr in &extension_names { + let ext_name = unsafe { std::ffi::CStr::from_ptr(*ext_name_ptr) }; + debug!(" - {}", ext_name.to_string_lossy()); + } + let mut device_create_info = vk::DeviceCreateInfo::default() .queue_create_infos(&queue_create_infos) .enabled_extension_names(&extension_names); @@ -578,6 +633,68 @@ impl VideoContext { } } + fn check_av1_encode_support( + entry: &ash::Entry, + instance: &ash::Instance, + physical_device: vk::PhysicalDevice, + _queue_family: u32, + ) -> bool { + // Create video queue instance extension. + let video_queue = ash::khr::video_queue::Instance::load(entry, instance); + + // Create AV1 encode profile info (must stay alive during the call) + let mut av1_profile = vk::VideoEncodeAV1ProfileInfoKHR::default() + .std_profile(ash::vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_MAIN); + + // Create video profile info for AV1 encode with typical 8-bit 4:2:0. + let mut profile_info = vk::VideoProfileInfoKHR::default() + .video_codec_operation(vk::VideoCodecOperationFlagsKHR::ENCODE_AV1) + .chroma_subsampling(vk::VideoChromaSubsamplingFlagsKHR::TYPE_420) + .luma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8) + .chroma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8); + + // Chain the codec-specific profile into profile_info. + profile_info.p_next = (&mut av1_profile as *mut vk::VideoEncodeAV1ProfileInfoKHR).cast(); + + // Create capabilities structures. + let mut encode_capabilities = vk::VideoEncodeCapabilitiesKHR::default(); + let mut av1_capabilities = vk::VideoEncodeAV1CapabilitiesKHR::default(); + encode_capabilities.p_next = + &mut av1_capabilities as *mut vk::VideoEncodeAV1CapabilitiesKHR as *mut _; + let mut capabilities = vk::VideoCapabilitiesKHR::default(); + capabilities.p_next = + &mut encode_capabilities as *mut vk::VideoEncodeCapabilitiesKHR as *mut _; + + // Query capabilities. + let result = unsafe { + (video_queue.fp().get_physical_device_video_capabilities_khr)( + physical_device, + &profile_info, + &mut capabilities, + ) + }; + + match result { + vk::Result::SUCCESS => { + debug!( + "AV1 encode supported: max {}x{}, {} DPB slots", + capabilities.max_coded_extent.width, + capabilities.max_coded_extent.height, + capabilities.max_dpb_slots + ); + true + } + vk::Result::ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR => { + debug!("AV1 encode not supported on this device"); + false + } + err => { + warn!("Failed to query AV1 encode capabilities: {:?}", err); + false + } + } + } + /// Check if a codec is supported for encoding. pub fn supports_encode(&self, codec: Codec) -> bool { self.inner.supported_encode_codecs.contains(&codec)