Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

renderer_vulkan: Handle depth-stencil copies through depth render overrides. #2134

Merged
merged 1 commit into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/core/devtools/widget/reg_popup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) {
"DEPTH_SLICE.TILE_MAX", depth_buffer.depth_slice.tile_max,
"Pitch()", depth_buffer.Pitch(),
"Height()", depth_buffer.Height(),
"Address()", depth_buffer.Address(),
"DepthAddress()", depth_buffer.DepthAddress(),
"StencilAddress()", depth_buffer.StencilAddress(),
"NumSamples()", depth_buffer.NumSamples(),
"NumBits()", depth_buffer.NumBits(),
"GetDepthSliceSize()", depth_buffer.GetDepthSliceSize()
Expand Down
4 changes: 2 additions & 2 deletions src/core/devtools/widget/reg_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ void RegView::DrawGraphicsRegs() {
TableNextColumn();
TextUnformatted("Depth buffer");
TableNextColumn();
if (regs.depth_buffer.Address() == 0 || !regs.depth_control.depth_enable) {
if (regs.depth_buffer.DepthAddress() == 0 || !regs.depth_control.depth_enable) {
TextUnformatted("N/A");
} else {
const char* text = last_selected_cb == depth_id && default_reg_popup.open ? "x" : "->";
Expand Down Expand Up @@ -241,7 +241,7 @@ void RegView::SetData(DebugStateType::RegDump _data, const std::string& base_tit
default_reg_popup.open = false;
if (last_selected_cb == depth_id) {
const auto& has_depth =
regs.depth_buffer.Address() != 0 && regs.depth_control.depth_enable;
regs.depth_buffer.DepthAddress() != 0 && regs.depth_control.depth_enable;
if (has_depth) {
default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control);
default_reg_popup.open = true;
Expand Down
65 changes: 61 additions & 4 deletions src/video_core/amdgpu/liverpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,11 +429,19 @@ struct Liverpool {
} depth_slice;

bool DepthValid() const {
return Address() != 0 && z_info.format != ZFormat::Invalid;
return DepthAddress() != 0 && z_info.format != ZFormat::Invalid;
}

bool StencilValid() const {
return Address() != 0 && stencil_info.format != StencilFormat::Invalid;
return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
}

bool DepthWriteValid() const {
return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid;
}

bool StencilWriteValid() const {
return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
}

u32 Pitch() const {
Expand All @@ -444,14 +452,22 @@ struct Liverpool {
return (depth_size.height_tile_max + 1) << 3;
}

u64 Address() const {
u64 DepthAddress() const {
return u64(z_read_base) << 8;
}

u64 StencilAddress() const {
return u64(stencil_read_base) << 8;
}

u64 DepthWriteAddress() const {
return u64(z_write_base) << 8;
}

u64 StencilWriteAddress() const {
return u64(stencil_write_base) << 8;
}

u32 NumSamples() const {
return 1u << z_info.num_samples; // spec doesn't say it is a log2
}
Expand Down Expand Up @@ -1008,6 +1024,46 @@ struct Liverpool {
}
};

enum class ForceEnable : u32 {
Off = 0,
Enable = 1,
Disable = 2,
};

enum class ForceSumm : u32 {
Off = 0,
MinZ = 1,
MaxZ = 2,
Both = 3,
};

union DepthRenderOverride {
u32 raw;
BitField<0, 2, ForceEnable> force_hiz_enable;
BitField<2, 2, ForceEnable> force_his_enable0;
BitField<4, 2, ForceEnable> force_his_enable1;
BitField<6, 1, u32> force_shader_z_order;
BitField<7, 1, u32> fast_z_disable;
BitField<8, 1, u32> fast_stencil_disable;
BitField<9, 1, u32> noop_cull_disable;
BitField<10, 1, u32> force_color_kill;
BitField<11, 1, u32> force_z_read;
BitField<12, 1, u32> force_stencil_read;
BitField<13, 2, ForceEnable> force_full_z_range;
BitField<15, 1, u32> force_qc_smask_conflict;
BitField<16, 1, u32> disable_viewport_clamp;
BitField<17, 1, u32> ignore_sc_zrange;
BitField<18, 1, u32> disable_fully_covered;
BitField<19, 2, ForceSumm> force_z_limit_summ;
BitField<21, 5, u32> max_tiles_in_dtt;
BitField<26, 1, u32> disable_tile_rate_tiles;
BitField<27, 1, u32> force_z_dirty;
BitField<28, 1, u32> force_stencil_dirty;
BitField<29, 1, u32> force_z_valid;
BitField<30, 1, u32> force_stencil_valid;
BitField<31, 1, u32> preserve_compression;
};

union AaConfig {
BitField<0, 3, u32> msaa_num_samples;
BitField<4, 1, u32> aa_mask_centroid_dtmn;
Expand Down Expand Up @@ -1209,7 +1265,8 @@ struct Liverpool {
DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(1);
DepthView depth_view;
INSERT_PADDING_WORDS(2);
DepthRenderOverride depth_render_override;
INSERT_PADDING_WORDS(1);
Address depth_htile_data_base;
INSERT_PADDING_WORDS(2);
float depth_bounds_min;
Expand Down
73 changes: 73 additions & 0 deletions src/video_core/renderer_vulkan/vk_rasterizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,26 @@ bool Rasterizer::FilterDraw() {
return false;
}

const bool cb_disabled =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
const auto depth_copy =
regs.depth_render_override.force_z_dirty && regs.depth_render_override.force_z_valid &&
regs.depth_buffer.DepthValid() && regs.depth_buffer.DepthWriteValid() &&
regs.depth_buffer.DepthAddress() != regs.depth_buffer.DepthWriteAddress();
const auto stencil_copy =
regs.depth_render_override.force_stencil_dirty &&
regs.depth_render_override.force_stencil_valid && regs.depth_buffer.StencilValid() &&
regs.depth_buffer.StencilWriteValid() &&
regs.depth_buffer.StencilAddress() != regs.depth_buffer.StencilWriteAddress();
if (cb_disabled && (depth_copy || stencil_copy)) {
// Games may disable color buffer and enable force depth/stencil dirty and valid to
// do a copy from one depth-stencil surface to another, without a pixel shader.
// We need to detect this case and perform the copy, otherwise it will have no effect.
LOG_TRACE(Render_Vulkan, "Performing depth-stencil override copy");
DepthStencilCopy(depth_copy, stencil_copy);
return false;
}

return true;
}

Expand Down Expand Up @@ -899,6 +919,59 @@ void Rasterizer::Resolve() {
}
}

void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
auto& regs = liverpool->regs;

auto read_desc = VideoCore::TextureCache::DepthTargetDesc(
regs.depth_buffer, regs.depth_view, regs.depth_control,
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, false);
auto write_desc = VideoCore::TextureCache::DepthTargetDesc(
regs.depth_buffer, regs.depth_view, regs.depth_control,
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, true);

auto& read_image = texture_cache.GetImage(texture_cache.FindImage(read_desc));
auto& write_image = texture_cache.GetImage(texture_cache.FindImage(write_desc));

VideoCore::SubresourceRange sub_range;
sub_range.base.layer = liverpool->regs.depth_view.slice_start;
sub_range.extent.layers = liverpool->regs.depth_view.NumSlices() - sub_range.base.layer;

read_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
sub_range);
write_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
sub_range);

auto aspect_mask = vk::ImageAspectFlags(0);
if (is_depth) {
aspect_mask |= vk::ImageAspectFlagBits::eDepth;
}
if (is_stencil) {
aspect_mask |= vk::ImageAspectFlagBits::eStencil;
}
vk::ImageCopy region = {
.srcSubresource =
{
.aspectMask = aspect_mask,
.mipLevel = 0,
.baseArrayLayer = sub_range.base.layer,
.layerCount = sub_range.extent.layers,
},
.srcOffset = {0, 0, 0},
.dstSubresource =
{
.aspectMask = aspect_mask,
.mipLevel = 0,
.baseArrayLayer = sub_range.base.layer,
.layerCount = sub_range.extent.layers,
},
.dstOffset = {0, 0, 0},
.extent = {write_image.info.size.width, write_image.info.size.height, 1},
};
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImage(read_image.image, vk::ImageLayout::eTransferSrcOptimal, write_image.image,
vk::ImageLayout::eTransferDstOptimal, region);
}

void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
buffer_cache.InlineData(address, value, num_bytes, is_gds);
}
Expand Down
1 change: 1 addition & 0 deletions src/video_core/renderer_vulkan/vk_rasterizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class Rasterizer {
RenderState PrepareRenderState(u32 mrt_mask);
void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state);
void Resolve();
void DepthStencilCopy(bool is_depth, bool is_stencil);
void EliminateFastClear();

void UpdateDynamicState(const GraphicsPipeline& pipeline);
Expand Down
7 changes: 4 additions & 3 deletions src/video_core/texture_cache/image_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
}

ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint,
bool write_buffer) noexcept {
props.is_tiled = false;
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = vk::ImageType::e2D;
Expand All @@ -111,10 +112,10 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;

stencil_addr = buffer.StencilAddress();
stencil_addr = write_buffer ? buffer.StencilWriteAddress() : buffer.StencilAddress();
stencil_size = pitch * size.height * sizeof(u8);

guest_address = buffer.Address();
guest_address = write_buffer ? buffer.DepthWriteAddress() : buffer.DepthAddress();
const auto depth_slice_sz = buffer.GetDepthSliceSize();
guest_size = depth_slice_sz * num_slices;
mips_layout.emplace_back(depth_slice_sz, pitch, 0);
Expand Down
2 changes: 1 addition & 1 deletion src/video_core/texture_cache/image_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct ImageInfo {
ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) noexcept;
ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;

bool IsTiled() const {
Expand Down
4 changes: 2 additions & 2 deletions src/video_core/texture_cache/texture_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ class TextureCache {
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::DepthView& view,
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {})
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false)
: BaseDesc{BindingType::DepthTarget,
ImageInfo{buffer, view.NumSlices(), htile_address, hint},
ImageInfo{buffer, view.NumSlices(), htile_address, hint, write_buffer},
ImageViewInfo{buffer, view, ctl}} {}
};

Expand Down