From 5fd82bd8c847b481f6588dcd7cc10d9195101ee6 Mon Sep 17 00:00:00 2001 From: Colin Marc Date: Tue, 30 Apr 2024 22:54:05 +0200 Subject: [PATCH] feat(mmclient): support playing HDR streams --- mm-client/src/audio.rs | 1 + mm-client/src/bin/mmclient.rs | 21 ++- mm-client/src/render.rs | 145 ++++++++++++----- mm-client/src/render.slang | 87 ++++++++--- mm-client/src/video.rs | 287 +++++++++++++++++++++++----------- mm-client/src/vulkan.rs | 31 ++-- 6 files changed, 396 insertions(+), 176 deletions(-) diff --git a/mm-client/src/audio.rs b/mm-client/src/audio.rs index 599faf8..2795ab4 100644 --- a/mm-client/src/audio.rs +++ b/mm-client/src/audio.rs @@ -146,6 +146,7 @@ where if frames_remaining < frames_needed { out.fill(Default::default()); + trace!("audio buffer underrun"); return; } diff --git a/mm-client/src/bin/mmclient.rs b/mm-client/src/bin/mmclient.rs index 9721109..dd36ae1 100644 --- a/mm-client/src/bin/mmclient.rs +++ b/mm-client/src/bin/mmclient.rs @@ -96,9 +96,14 @@ struct Cli { /// resizes. #[arg(long, required = false, default_value = "auto")] resolution: Resolution, + /// Request 10-bit video output from the server. This will only work if + /// both your display and the application in question support rendering + /// HDR color. + #[arg(long, required = false)] + hdr: bool, /// The UI scale to communicate to the server. If not specified, this will /// be determined from the client-side window scale factor. - #[arg(long)] + #[arg(long, required = false)] ui_scale: Option, /// Video codec to use. #[arg(long, default_value = "h265")] @@ -121,6 +126,7 @@ struct MainLoop { struct App { configured_resolution: Resolution, configured_codec: protocol::VideoCodec, + configured_profile: protocol::VideoProfile, configured_framerate: u32, window: Arc, @@ -621,7 +627,7 @@ impl App { session_id: self.session_id, streaming_resolution: self.remote_display_params.resolution.clone(), video_codec: self.configured_codec.into(), - video_profile: protocol::VideoProfile::Hd.into(), + video_profile: self.configured_profile.into(), ..Default::default() }, None, @@ -816,6 +822,12 @@ fn main() -> Result<()> { Some(v) => bail!("invalid codec: {:?}", v), }; + let configured_profile = if args.hdr { + protocol::VideoProfile::Hdr10 + } else { + protocol::VideoProfile::Hd + }; + // TODO: anyhow errors are garbage for end-users. debug!("establishing connection to {:}", &args.host); let mut conn = Conn::new(&args.host).context("failed to establish connection")?; @@ -928,7 +940,7 @@ fn main() -> Result<()> { window.clone(), cfg!(debug_assertions), )?); - let renderer = Renderer::new(vk.clone(), window.clone())?; + let renderer = Renderer::new(vk.clone(), window.clone(), args.hdr)?; debug!("attaching session {:?}", session.session_id); let attachment_sid = conn.send( @@ -938,7 +950,7 @@ fn main() -> Result<()> { session_id: session.session_id, streaming_resolution: Some(streaming_resolution), video_codec: configured_codec.into(), - video_profile: protocol::VideoProfile::Hd.into(), + video_profile: configured_profile.into(), ..Default::default() }, None, @@ -966,6 +978,7 @@ fn main() -> Result<()> { configured_codec, configured_framerate: args.framerate, configured_resolution: args.resolution, + configured_profile, window, _proxy: proxy.clone(), diff --git a/mm-client/src/render.rs b/mm-client/src/render.rs index 8c7cedb..847c4c2 100644 --- a/mm-client/src/render.rs +++ b/mm-client/src/render.rs @@ -23,16 +23,36 @@ use crate::vulkan::*; const FONT_SIZE: f32 = 8.0; +// Matches the definition in render.slang. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[repr(u32)] +enum TextureColorSpace { + Bt709 = 0, + Bt2020Pq = 1, +} + +impl From for TextureColorSpace { + fn from(cs: crate::video::ColorSpace) -> Self { + match cs { + crate::video::ColorSpace::Bt709 => TextureColorSpace::Bt709, + crate::video::ColorSpace::Bt2020Pq => TextureColorSpace::Bt2020Pq, + } + } +} + #[derive(Copy, Clone, Debug)] #[repr(C)] struct PushConstants { aspect: glam::Vec2, + texture_color_space: TextureColorSpace, + output_color_space: vk::ColorSpaceKHR, } pub struct Renderer { width: u32, height: u32, scale_factor: f64, + hdr_mode: bool, imgui: imgui::Context, imgui_platform: imgui_winit_support::WinitPlatform, @@ -43,12 +63,18 @@ pub struct Renderer { swapchain: Option, swapchain_dirty: bool, - video_texture: Option, + new_video_texture: Option<(Arc, VideoStreamParams)>, vk: Arc, window: Arc, } +struct VideoTexture { + image: Arc, + view: vk::ImageView, + color_space: TextureColorSpace, +} + struct Swapchain { swapchain: vk::SwapchainKHR, frames: Vec, @@ -57,11 +83,13 @@ struct Swapchain { sampler_conversion: vk::SamplerYcbcrConversion, sampler: vk::Sampler, - bound_video_texture: Option<(Arc, vk::ImageView)>, + bound_video_texture: Option, + /// The normalized relationship between the output and the video texture, /// after scaling. For example, a 500x500 video texture in a 1000x500 /// swapchain would have the aspect (2.0, 1.0), as would a 250x250 texture. aspect: (f64, f64), + surface_format: vk::SurfaceFormatKHR, descriptor_set_layout: vk::DescriptorSetLayout, descriptor_pool: vk::DescriptorPool, pipeline_layout: vk::PipelineLayout, @@ -85,13 +113,12 @@ struct SwapImage { view: vk::ImageView, } -struct VideoTexture { - params: VideoStreamParams, - texture: Arc, -} - impl Renderer { - pub fn new(vk: Arc, window: Arc) -> Result { + pub fn new( + vk: Arc, + window: Arc, + hdr_mode: bool, + ) -> Result { let window_size = window.inner_size(); let scale_factor = window.scale_factor(); @@ -112,6 +139,7 @@ impl Renderer { width: window_size.width, height: window_size.height, scale_factor, + hdr_mode, window, imgui, imgui_platform, @@ -120,7 +148,7 @@ impl Renderer { imgui_time: time::Instant::now(), swapchain: None, swapchain_dirty: false, - video_texture: None, + new_video_texture: None, vk, }; @@ -134,8 +162,7 @@ impl Renderer { let start = time::Instant::now(); let device = &self.vk.device; - let surface_format = select_surface_format(self.vk.clone())?; - trace!(?surface_format, "surface format"); + let surface_format = select_surface_format(self.vk.clone(), self.hdr_mode)?; let surface_capabilities = self .vk @@ -219,12 +246,16 @@ impl Renderer { // We need to create a sampler, even if we don't have a video stream yet // and don't know what the fields should be. - let video_params = match self.video_texture.as_ref() { - Some(tex) => tex.params, - None => VideoStreamParams::default(), + let (video_texture_format, video_params) = match self.new_video_texture.as_ref() { + Some((tex, params)) => (tex.format, *params), + None => ( + vk::Format::G8_B8_R8_3PLANE_420_UNORM, + VideoStreamParams::default(), + ), }; - let sampler_conversion = sampler_conversion(device, &video_params)?; + let sampler_conversion = + create_ycbcr_sampler_conversion(device, video_texture_format, &video_params)?; let sampler = { let mut conversion_info = vk::SamplerYcbcrConversionInfo::builder() @@ -243,22 +274,26 @@ impl Renderer { unsafe { device.create_sampler(&create_info, None)? } }; - let bound_video_texture = if let Some(tex) = self.video_texture.as_ref() { + let bound_video_texture = if let Some((tex, params)) = self.new_video_texture.as_ref() { let view = create_image_view( &self.vk.device, - tex.texture.image, - tex.texture.format, + tex.image, + tex.format, Some(sampler_conversion), )?; // Increment the reference count on the texture. - Some((tex.texture.clone(), view)) + Some(VideoTexture { + image: tex.clone(), + view, + color_space: params.color_space.into(), + }) } else { None }; - let aspect = if let Some((tex, _)) = bound_video_texture.as_ref() { - calculate_aspect(self.width, self.height, tex.width, tex.height) + let aspect = if let Some(tex) = bound_video_texture.as_ref() { + calculate_aspect(self.width, self.height, tex.image.width, tex.image.height) } else { (1.0, 1.0) }; @@ -294,7 +329,7 @@ impl Renderer { let pipeline_layout = { let pc_ranges = [vk::PushConstantRange::builder() - .stage_flags(vk::ShaderStageFlags::VERTEX) + .stage_flags(vk::ShaderStageFlags::VERTEX | vk::ShaderStageFlags::FRAGMENT) .offset(0) .size(std::mem::size_of::() as u32) .build()]; @@ -435,10 +470,10 @@ impl Renderer { .unwrap(); // TODO: do the write in bind_video_texture? - if let Some((_, view)) = bound_video_texture.as_ref() { + if let Some(tex) = bound_video_texture.as_ref() { let info = vk::DescriptorImageInfo::builder() .image_layout(vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL) - .image_view(*view); + .image_view(tex.view); let image_info = &[info.build()]; let sampler_write = vk::WriteDescriptorSet::builder() @@ -522,6 +557,7 @@ impl Renderer { sampler, bound_video_texture, aspect, + surface_format, pipeline_layout, pipeline, @@ -592,7 +628,7 @@ impl Renderer { params: VideoStreamParams, ) -> Result<()> { // TODO: no need to recreate the sampler if the params match. - self.video_texture = Some(VideoTexture { params, texture }); + self.new_video_texture = Some((texture, params)); self.swapchain_dirty = true; Ok(()) } @@ -603,7 +639,7 @@ impl Renderer { // 1.0). pub fn get_texture_aspect(&self) -> Option<(f64, f64)> { if let Some(Swapchain { - bound_video_texture: Some((_, _)), + bound_video_texture: Some(_), aspect, .. }) = self.swapchain.as_ref() @@ -738,21 +774,23 @@ impl Renderer { ); } - if self.video_texture.is_none() || swapchain.aspect != (1.0, 1.0) { + if self.new_video_texture.is_none() || swapchain.aspect != (1.0, 1.0) { // TODO Draw the background // https://www.toptal.com/designers/subtlepatterns/prism/ } // Draw the video texture. - if let Some((_texture, _)) = &swapchain.bound_video_texture { + if let Some(tex) = &swapchain.bound_video_texture { let pc = PushConstants { aspect: glam::Vec2::new(swapchain.aspect.0 as f32, swapchain.aspect.1 as f32), + texture_color_space: tex.color_space, + output_color_space: swapchain.surface_format.color_space, }; device.cmd_push_constants( frame.render_cb, swapchain.pipeline_layout, - vk::ShaderStageFlags::VERTEX, + vk::ShaderStageFlags::VERTEX | vk::ShaderStageFlags::FRAGMENT, 0, std::slice::from_raw_parts( &pc as *const _ as *const u8, @@ -899,8 +937,8 @@ impl Renderer { device.destroy_sampler(swapchain.sampler, None); device.destroy_sampler_ycbcr_conversion(swapchain.sampler_conversion, None); - if let Some((_img, view)) = swapchain.bound_video_texture.take() { - device.destroy_image_view(view, None); + if let Some(tex) = swapchain.bound_video_texture.take() { + device.destroy_image_view(tex.view, None); // We probably drop the last reference to the image here, which then // gets destroyed. } @@ -912,8 +950,11 @@ impl Renderer { } } -fn select_surface_format(vk: Arc) -> Result { - let surface_formats = unsafe { +fn select_surface_format( + vk: Arc, + hdr_mode: bool, +) -> Result { + let mut surface_formats = unsafe { vk.surface_loader .get_physical_device_surface_formats(vk.pdevice, vk.surface)? }; @@ -924,16 +965,36 @@ fn select_surface_format(vk: Arc) -> Result= 0.0031) - return 1.055 * pow(s, 1.0 / 2.4) - 0.055; - else - return s * 12.92; + if (vk_color_space == VK_COLOR_SPACE_BT709_NONLINEAR_EXT) + { + return color; + } + + let linear = bt709_eotf(color); + switch (vk_color_space) + { + case VK_COLOR_SPACE_SRGB_NONLINEAR_EXT: + return srgb_inverse_eotf(linear); + case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT: + return linear; + // case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT: + // return srgb_inverse_eotf(transform(color, Primaries::BT709, Primaries::P3)) + case VK_COLOR_SPACE_HDR10_ST2084_EXT: + return pq_inverse_eotf(transform(linear, Primaries::BT709, Primaries::BT2020)); + default: + return srgb_inverse_eotf(linear); + } } -float bt709_linearize(float s) +float3 bt2020_pq_to_display(float3 color, int vk_color_space) { - if (s > 0.081) - return pow((s + 0.099) / 1.099, 1.0 / 0.45); - else - return s / 4.5; + if (vk_color_space == VK_COLOR_SPACE_HDR10_ST2084_EXT) + { + return color; + } + + let linear = transform(pq_eotf(color) * PQ_MAX_WHITE / SDR_REFERENCE_WHITE, Primaries::BT2020, Primaries::BT709); + switch (vk_color_space) + { + case VK_COLOR_SPACE_SRGB_NONLINEAR_EXT: + return srgb_inverse_eotf(linear); + case VK_COLOR_SPACE_BT709_NONLINEAR_EXT: + return bt709_inverse_eotf(clamp(linear, 0.0, 1.0)); + case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT: + return linear; + // case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT: + // return srgb_inverse_eotf(transform(color, Primaries::BT2020, Primaries::P3)) + default: + return srgb_inverse_eotf(linear); + } } [shader("fragment")] float4 frag(float2 uv: TextureCoord) : SV_Target { - float4 color = texture.Sample(uv); + float4 color = clamp(texture.Sample(uv), 0.0, 1.0); // When sampling the video texture, vulkan does the matrix multiplication - // for us, but doesn't apply any transfer function. So we need to convert - // from the BT.709 transfer function to the sRGB one. - return float4( - srgb_unlinear(bt709_linearize(color.r)), - srgb_unlinear(bt709_linearize(color.g)), - srgb_unlinear(bt709_linearize(color.b)), - color.a); + // for us, but doesn't apply any transfer function, so the values are + // still nonlinear in either BT.709 or BT.2020/ST2048. + switch (pc.texture_color_space) + { + case TextureColorSpace::Bt709: + return float4(bt709_to_display(color.rgb, pc.vk_color_space), 1.0); + case TextureColorSpace::Bt2020Pq: + return float4(bt2020_pq_to_display(color.rgb, pc.vk_color_space), 1.0); + default: + return float4(0.0, 0.5, 1.0, 1.0); + } } diff --git a/mm-client/src/video.rs b/mm-client/src/video.rs index 1bc38f4..7015794 100644 --- a/mm-client/src/video.rs +++ b/mm-client/src/video.rs @@ -7,7 +7,7 @@ use std::{ time, }; -use anyhow::{anyhow, Context}; +use anyhow::{anyhow, bail, Context}; use ash::vk; use bytes::{Bytes, BytesMut}; use ffmpeg_next as ffmpeg; @@ -31,20 +31,25 @@ pub struct FrameMetadata { pub pts: u64, } +#[derive(Debug, Clone)] struct YUVPicture { - y: Bytes, - u: Bytes, - v: Bytes, + planes: [Bytes; 3], + num_planes: usize, info: FrameMetadata, } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ColorSpace { + Bt709, + Bt2020Pq, +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct VideoStreamParams { pub width: u32, pub height: u32, - pub pixel_format: ffmpeg::format::Pixel, - pub color_space: ffmpeg::color::Space, - pub color_range: ffmpeg::color::Range, + pub color_space: ColorSpace, + pub color_full_range: bool, } impl Default for VideoStreamParams { @@ -52,9 +57,8 @@ impl Default for VideoStreamParams { Self { width: 0, height: 0, - pixel_format: ffmpeg::format::Pixel::YUV420P, - color_space: ffmpeg::color::Space::BT709, - color_range: ffmpeg::color::Range::MPEG, + color_space: ColorSpace::Bt709, + color_full_range: false, } } } @@ -387,10 +391,29 @@ impl DecoderInit { match self.decoder.receive_frame(&mut frame) { Ok(()) => { self.first_frame = match frame.format() { - ffmpeg::format::Pixel::YUV420P => Some((frame, info)), ffmpeg::format::Pixel::VIDEOTOOLBOX => { + let sw_format = unsafe { + let ctx_ref = (*self.decoder.as_ptr()).hw_frames_ctx; + assert!(!ctx_ref.is_null()); + + let mut transfer_fmt_list = std::ptr::null_mut(); + if ffmpeg_sys::av_hwframe_transfer_get_formats( + ctx_ref, + ffmpeg_sys::AVHWFrameTransferDirection::AV_HWFRAME_TRANSFER_DIRECTION_FROM, + &mut transfer_fmt_list, + 0) < 0 + { + bail!("call to av_hwframe_transfer_get_formats failed"); + }; + + let transfer_formats = read_format_list(transfer_fmt_list); + assert!(!transfer_formats.is_empty()); + + transfer_formats[0] + }; + let mut sw_frame = ffmpeg::frame::Video::new( - ffmpeg::format::Pixel::YUV420P, + sw_format, self.decoder.width(), self.decoder.height(), ); @@ -409,7 +432,7 @@ impl DecoderInit { Some((sw_frame, info)) } } - f => return Err(anyhow!("unexpected stream format: {:?}", f)), + _ => Some((frame, info)), }; Ok(true) @@ -439,25 +462,31 @@ impl DecoderInit { None => return Err(anyhow!("no frames received yet")), }; - // debug_assert_eq!(self.decoder.color_space(), ffmpeg::color::Space::BT709); - - let output_format = first_frame.0.format(); - assert_eq!(output_format, ffmpeg::format::Pixel::YUV420P); - // If we're using VideoToolbox, create a "hardware" frame to use with // receive_frame. + let output_format = first_frame.0.format(); + let ((mut frame, info), mut hw_frame) = match decoder_format { - ffmpeg::format::Pixel::YUV420P => (first_frame, None), ffmpeg::format::Pixel::VIDEOTOOLBOX => { let hw_frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::VIDEOTOOLBOX, width, height); (first_frame, Some(hw_frame)) } - _ => return Err(anyhow!("unexpected stream format: {:?}", decoder_format)), + _ => (first_frame, None), }; - let texture_format = vk::Format::G8_B8_R8_3PLANE_420_UNORM; + // For 10-bit textures, we need to end up in on the GPU in P010LE, + // because that's better supported. To make the copy easier, we'll use + // swscale to convert to a matching intermediate format. + let (intermediate_format, texture_format) = match output_format { + ffmpeg::format::Pixel::YUV420P => (None, vk::Format::G8_B8_R8_3PLANE_420_UNORM), + ffmpeg::format::Pixel::YUV420P10 | ffmpeg::format::Pixel::YUV420P10LE => ( + Some(ffmpeg::format::Pixel::P010LE), + vk::Format::G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, + ), + _ => return Err(anyhow!("unexpected pixel format: {:?}", output_format)), + }; debug_assert_eq!(frame.width(), width); debug_assert_eq!(frame.height(), height); @@ -472,22 +501,26 @@ impl DecoderInit { )); } - let y_stride = frame.stride(0); - let u_stride = frame.stride(1); - let v_stride = frame.stride(2); - - let y_len = y_stride * frame.plane_height(0) as usize; - let u_len = u_stride * frame.plane_height(1) as usize; - let v_len = v_stride * frame.plane_height(2) as usize; - - // This is vanishingly unlikely, and I have no idea how the pixels - // would be layed out in that case. - debug_assert_eq!(y_len % 4, 0); - - // Precalculate the offsets into the buffer for each plane. - let buffer_size = y_len + u_len + v_len; - let buffer_offsets = [0, y_len, y_len + u_len]; - let buffer_strides = [y_stride, u_stride, v_stride]; + let mut intermediate_frame = + intermediate_format.map(|fmt| ffmpeg::frame::Video::new(fmt, width, height)); + + // For the purposes of determining the size of and offsets into the + // staging buffer, we use the intermediate frame if it exists, otherwise + // the output frame. + let model_frame = intermediate_frame.as_ref().unwrap_or(&frame); + + // Precalculate the layout of the staging buffer. + let mut buffer_strides = [0; 3]; + let mut buffer_offsets = [0; 3]; + let mut buffer_size = 0; + for plane in 0..model_frame.planes() { + let stride = model_frame.stride(plane); + let len = stride * model_frame.plane_height(plane) as usize; + + buffer_strides[plane] = stride; + buffer_offsets[plane] = buffer_size; + buffer_size += len; + } let staging_buffer = create_host_buffer( &self.vk.device, @@ -496,21 +529,32 @@ impl DecoderInit { buffer_size, )?; - let color_space = match self.decoder.color_space() { - ffmpeg::color::Space::BT709 => ffmpeg::color::Space::BT709, - ffmpeg::color::Space::BT2020NCL => ffmpeg::color::Space::BT2020NCL, - cs => { - warn!("unexpected color space: {:?}", cs); - ffmpeg::color::Space::BT709 + let color_space = match ( + self.decoder.color_space(), + self.decoder.color_transfer_characteristic(), + ) { + (ffmpeg::color::Space::BT709, ffmpeg::color::TransferCharacteristic::BT709) => { + ColorSpace::Bt709 + } + (ffmpeg::color::Space::BT2020NCL, ffmpeg::color::TransferCharacteristic::SMPTE2084) => { + ColorSpace::Bt2020Pq } + ( + ffmpeg::color::Space::Unspecified, + ffmpeg::color::TransferCharacteristic::Unspecified, + ) => { + warn!("video stream has unspecified color primaries or transfer function"); + ColorSpace::Bt709 + } + (cs, ctrc) => bail!("unexpected color description: {:?} / {:?}", cs, ctrc), }; - let color_range = match self.decoder.color_range() { - ffmpeg::color::Range::MPEG => ffmpeg::color::Range::MPEG, - ffmpeg::color::Range::JPEG => ffmpeg::color::Range::JPEG, + let color_full_range = match self.decoder.color_range() { + ffmpeg::color::Range::MPEG => false, + ffmpeg::color::Range::JPEG => true, cr => { warn!("unexpected color range: {:?}", cr); - ffmpeg::color::Range::MPEG + false } }; @@ -536,7 +580,12 @@ impl DecoderInit { // Send the frame we have from before. decoded_send - .send(copy_frame(&mut frame, &mut BytesMut::new(), info)) + .send(copy_frame( + &mut frame, + intermediate_frame.as_mut(), + &mut BytesMut::new(), + info, + )) .unwrap(); // Spawn another thread that receives packets on one channel and sends @@ -580,11 +629,12 @@ impl DecoderInit { loop { match receive_frame(&mut decoder, &mut frame, hw_frame.as_mut()) { Ok(()) => { - let pic = copy_frame(&mut frame, &mut scratch, info); - - debug_assert_eq!(pic.y.len(), y_len); - debug_assert_eq!(pic.u.len(), u_len); - debug_assert_eq!(pic.v.len(), v_len); + let pic = copy_frame( + &mut frame, + intermediate_frame.as_mut(), + &mut scratch, + info, + ); let span = trace_span!("send"); let _guard = span.enter(); @@ -640,9 +690,8 @@ impl DecoderInit { let params = VideoStreamParams { width, height, - pixel_format: output_format, color_space, - color_range, + color_full_range, }; Ok((dec, video_texture, params)) @@ -715,14 +764,15 @@ impl CPUDecoder { // Copy data into the staging buffer. self.yuv_buffer_offsets .iter() - .zip([pic.y, pic.u, pic.v]) + .zip(pic.planes.iter()) + .take(pic.num_planes) .for_each(|(offset, src)| { let dst = std::slice::from_raw_parts_mut( (self.staging_buffer.access as *mut u8).add(*offset), src.len(), ); - dst.copy_from_slice(&src); + dst.copy_from_slice(src); }); // Trace the upload, including loading timestamps for the previous upload. @@ -793,6 +843,12 @@ impl CPUDecoder { // Upload from the staging buffer to the texture. { + let num_planes = match self.video_texture.format { + vk::Format::G8_B8_R8_3PLANE_420_UNORM => 3, + vk::Format::G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 => 2, + _ => unreachable!(), + }; + let regions = [ vk::ImageAspectFlags::PLANE_0, vk::ImageAspectFlags::PLANE_1, @@ -800,6 +856,7 @@ impl CPUDecoder { ] .into_iter() .enumerate() + .take(num_planes) .map(|(plane, plane_aspect_mask)| { // Vulkan considers the image width/height to be 1/2 the size // for the U and V planes. @@ -809,10 +866,21 @@ impl CPUDecoder { (self.texture_width / 2, self.texture_height / 2) }; + let texel_width = match self.video_texture.format { + vk::Format::G8_B8_R8_3PLANE_420_UNORM => 1, + vk::Format::G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 => { + if plane == 0 { + 2 + } else { + 4 + } + } + _ => unreachable!(), + }; + vk::BufferImageCopy::builder() .buffer_offset(self.yuv_buffer_offsets[plane] as u64) - // This is actually in texels, but each plane uses 1bpp. - .buffer_row_length(self.yuv_buffer_strides[plane] as u32) + .buffer_row_length((self.yuv_buffer_strides[plane] / texel_width) as u32) // In texels. .image_subresource(vk::ImageSubresourceLayers { aspect_mask: plane_aspect_mask, mip_level: 0, @@ -935,58 +1003,93 @@ fn copy_packet(pkt: &mut ffmpeg::Packet, buf: Undecoded) -> anyhow::Result<()> { #[instrument(skip_all)] fn copy_frame( frame: &mut ffmpeg::frame::Video, + intermediate_frame: Option<&mut ffmpeg::frame::Video>, scratch: &mut BytesMut, info: FrameMetadata, ) -> YUVPicture { - scratch.truncate(0); - - scratch.extend_from_slice(frame.data(0)); - let y = scratch.split().freeze(); + let transfer_src = if let Some(intermediate) = intermediate_frame { + // TODO reuse + let mut ctx = ffmpeg::software::scaling::Context::get( + frame.format(), + frame.width(), + frame.height(), + intermediate.format(), + intermediate.width(), + intermediate.height(), + ffmpeg::software::scaling::Flags::empty(), + ) + .expect("failed to create sws ctx"); + + ctx.run(frame, intermediate).expect("failed to convert"); + + intermediate + } else { + frame + }; - scratch.extend_from_slice(frame.data(1)); - let u = scratch.split().freeze(); + let mut pic = YUVPicture { + planes: [Bytes::new(), Bytes::new(), Bytes::new()], + num_planes: transfer_src.planes(), + info, + }; - scratch.extend_from_slice(frame.data(2)); - let v = scratch.split().freeze(); + scratch.truncate(0); + for plane in 0..transfer_src.planes() { + scratch.extend_from_slice(transfer_src.data(plane)); + pic.planes[plane] = scratch.split().freeze(); + } - YUVPicture { y, u, v, info } + pic } #[no_mangle] unsafe extern "C" fn get_hw_format_videotoolbox( ctx: *mut ffmpeg_sys::AVCodecContext, - mut formats: *const ffmpeg_sys::AVPixelFormat, + list: *const ffmpeg_sys::AVPixelFormat, ) -> ffmpeg_sys::AVPixelFormat { use ffmpeg_sys::AVPixelFormat::*; - while *formats != AV_PIX_FMT_NONE { - if *formats == AV_PIX_FMT_VIDEOTOOLBOX { - let frames_ctx_ref = ffmpeg_sys::av_hwframe_ctx_alloc((*ctx).hw_device_ctx); - if frames_ctx_ref.is_null() { - error!("call to av_hwframe_ctx_alloc failed"); - break; - } + let sw_pix_fmt = (*ctx).sw_pix_fmt; + let formats = read_format_list(list); - let frames_ctx = (*frames_ctx_ref).data as *mut ffmpeg_sys::AVHWFramesContext; - (*frames_ctx).width = (*ctx).width; - (*frames_ctx).height = (*ctx).height; - (*frames_ctx).format = AV_PIX_FMT_VIDEOTOOLBOX; - (*frames_ctx).sw_format = AV_PIX_FMT_YUV420P; + if formats.contains(&ffmpeg::format::Pixel::VIDEOTOOLBOX) { + let frames_ctx_ref = ffmpeg_sys::av_hwframe_ctx_alloc((*ctx).hw_device_ctx); + if frames_ctx_ref.is_null() { + error!("call to av_hwframe_ctx_alloc failed"); + return sw_pix_fmt; + } - let res = ffmpeg_sys::av_hwframe_ctx_init(frames_ctx_ref); - if res < 0 { - error!("call to av_hwframe_ctx_init failed"); - break; - } + debug!(?formats, sw_pix_fmt = ?sw_pix_fmt, "get_hw_format_videotoolbox"); - debug!("using VideoToolbox hardware encoder"); - (*ctx).hw_frames_ctx = frames_ctx_ref; - return *formats; + let frames_ctx = (*frames_ctx_ref).data as *mut ffmpeg_sys::AVHWFramesContext; + (*frames_ctx).width = (*ctx).width; + (*frames_ctx).height = (*ctx).height; + (*frames_ctx).format = AV_PIX_FMT_VIDEOTOOLBOX; + (*frames_ctx).sw_format = AV_PIX_FMT_YUV420P; + + let res = ffmpeg_sys::av_hwframe_ctx_init(frames_ctx_ref); + if res < 0 { + error!("call to av_hwframe_ctx_init failed"); + return sw_pix_fmt; } - formats = formats.add(1); + debug!("using VideoToolbox hardware encoder"); + (*ctx).hw_frames_ctx = frames_ctx_ref; + return AV_PIX_FMT_VIDEOTOOLBOX; + } + + warn!("unable to determine ffmpeg hw format"); + sw_pix_fmt +} + +unsafe fn read_format_list( + mut ptr: *const ffmpeg_sys::AVPixelFormat, +) -> Vec { + let mut formats = Vec::new(); + while !ptr.is_null() && *ptr != ffmpeg_sys::AVPixelFormat::AV_PIX_FMT_NONE { + formats.push((*ptr).into()); + ptr = ptr.add(1); } - warn!("VideoToolbox setup failed, falling back to CPU decoder"); - AV_PIX_FMT_YUV420P + formats } diff --git a/mm-client/src/vulkan.rs b/mm-client/src/vulkan.rs index 417e785..32a734c 100644 --- a/mm-client/src/vulkan.rs +++ b/mm-client/src/vulkan.rs @@ -21,10 +21,11 @@ use ash::{ vk, }; use cstr::cstr; -use ffmpeg_next as ffmpeg; use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle}; use tracing::{debug, error, info, warn}; +use crate::video::ColorSpace; + pub struct VkDebugContext { debug: DebugUtils, messenger: vk::DebugUtilsMessengerEXT, @@ -957,34 +958,24 @@ pub fn load_shader(device: &ash::Device, bytes: &[u8]) -> anyhow::Result anyhow::Result { let ycbcr_model = match params.color_space { - ffmpeg::color::Space::BT709 => vk::SamplerYcbcrModelConversion::YCBCR_709, - ffmpeg::color::Space::BT2020NCL => vk::SamplerYcbcrModelConversion::YCBCR_2020, - _ => return Err(anyhow!("unsupported color space: {:?}", params.color_space)), - }; - - let ycbcr_range = match params.color_range { - ffmpeg::color::Range::MPEG => vk::SamplerYcbcrRange::ITU_NARROW, - ffmpeg::color::Range::JPEG => vk::SamplerYcbcrRange::ITU_FULL, - _ => return Err(anyhow!("unsupported color range: {:?}", params.color_range)), + ColorSpace::Bt709 => vk::SamplerYcbcrModelConversion::YCBCR_709, + ColorSpace::Bt2020Pq => vk::SamplerYcbcrModelConversion::YCBCR_2020, }; - let texture_format = match params.pixel_format { - ffmpeg::format::Pixel::YUV420P => vk::Format::G8_B8_R8_3PLANE_420_UNORM, - _ => { - return Err(anyhow!( - "unsupported pixel format: {:?}", - params.pixel_format - )) - } + let ycbcr_range = if params.color_full_range { + vk::SamplerYcbcrRange::ITU_FULL + } else { + vk::SamplerYcbcrRange::ITU_NARROW }; let create_info = vk::SamplerYcbcrConversionCreateInfo::builder() - .format(texture_format) + .format(format) .ycbcr_model(ycbcr_model) .ycbcr_range(ycbcr_range) .chroma_filter(vk::Filter::LINEAR)