forked from shadps4-emu/shadPS4
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
texture_cache: 32bpp and 64bpp macro detilers (shadps4-emu#1852)
* added 32bpp macro detiler * added 64bpp macro detiler * consider 3d depth alignment in size calculations
- Loading branch information
Showing
12 changed files
with
236 additions
and
173 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project | ||
// SPDX-License-Identifier: GPL-2.0-or-later | ||
|
||
#version 450 | ||
|
||
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; | ||
|
||
layout(std430, binding = 0) buffer input_buf { | ||
uint in_data[]; | ||
}; | ||
layout(std430, binding = 1) buffer output_buf { | ||
uint out_data[]; | ||
}; | ||
|
||
layout(push_constant) uniform image_info { | ||
uint num_levels; | ||
uint pitch; | ||
uint height; | ||
uint c0; | ||
uint c1; | ||
} info; | ||
|
||
// Each LUT is 64 bytes, so should fit into K$ given tiled slices locality | ||
const uint lut_32bpp[][64] = { | ||
{ | ||
0x00, 0x01, 0x04, 0x05, 0x40, 0x41, 0x44, 0x45, | ||
0x02, 0x03, 0x06, 0x07, 0x42, 0x43, 0x46, 0x47, | ||
0x10, 0x11, 0x14, 0x15, 0x50, 0x51, 0x54, 0x55, | ||
0x12, 0x13, 0x16, 0x17, 0x52, 0x53, 0x56, 0x57, | ||
0x80, 0x81, 0x84, 0x85, 0xc0, 0xc1, 0xc4, 0xc5, | ||
0x82, 0x83, 0x86, 0x87, 0xc2, 0xc3, 0xc6, 0xc7, | ||
0x90, 0x91, 0x94, 0x95, 0xd0, 0xd1, 0xd4, 0xd5, | ||
0x92, 0x93, 0x96, 0x97, 0xd2, 0xd3, 0xd6, 0xd7, | ||
}, | ||
{ | ||
0x08, 0x09, 0x0c, 0x0d, 0x48, 0x49, 0x4c, 0x4d, | ||
0x0a, 0x0b, 0x0e, 0x0f, 0x4a, 0x4b, 0x4e, 0x4f, | ||
0x18, 0x19, 0x1c, 0x1d, 0x58, 0x59, 0x5c, 0x5d, | ||
0x1a, 0x1b, 0x1e, 0x1f, 0x5a, 0x5b, 0x5e, 0x5f, | ||
0x88, 0x89, 0x8c, 0x8d, 0xc8, 0xc9, 0xcc, 0xcd, | ||
0x8a, 0x8b, 0x8e, 0x8f, 0xca, 0xcb, 0xce, 0xcf, | ||
0x98, 0x99, 0x9c, 0x9d, 0xd8, 0xd9, 0xdc, 0xdd, | ||
0x9a, 0x9b, 0x9e, 0x9f, 0xda, 0xdb, 0xde, 0xdf, | ||
}, | ||
{ | ||
0x20, 0x21, 0x24, 0x25, 0x60, 0x61, 0x64, 0x65, | ||
0x22, 0x23, 0x26, 0x27, 0x62, 0x63, 0x66, 0x67, | ||
0x30, 0x31, 0x34, 0x35, 0x70, 0x71, 0x74, 0x75, | ||
0x32, 0x33, 0x36, 0x37, 0x72, 0x73, 0x76, 0x77, | ||
0xa0, 0xa1, 0xa4, 0xa5, 0xe0, 0xe1, 0xe4, 0xe5, | ||
0xa2, 0xa3, 0xa6, 0xa7, 0xe2, 0xe3, 0xe6, 0xe7, | ||
0xb0, 0xb1, 0xb4, 0xb5, 0xf0, 0xf1, 0xf4, 0xf5, | ||
0xb2, 0xb3, 0xb6, 0xb7, 0xf2, 0xf3, 0xf6, 0xf7, | ||
}, | ||
{ | ||
0x28, 0x29, 0x2c, 0x2d, 0x68, 0x69, 0x6c, 0x6d, | ||
0x2a, 0x2b, 0x2e, 0x2f, 0x6a, 0x6b, 0x6e, 0x6f, | ||
0x38, 0x39, 0x3c, 0x3d, 0x78, 0x79, 0x7c, 0x7d, | ||
0x3a, 0x3b, 0x3e, 0x3f, 0x7a, 0x7b, 0x7e, 0x7f, | ||
0xa8, 0xa9, 0xac, 0xad, 0xe8, 0xe9, 0xec, 0xed, | ||
0xaa, 0xab, 0xae, 0xaf, 0xea, 0xeb, 0xee, 0xef, | ||
0xb8, 0xb9, 0xbc, 0xbd, 0xf8, 0xf9, 0xfc, 0xfd, | ||
0xba, 0xbb, 0xbe, 0xbf, 0xfa, 0xfb, 0xfe, 0xff, | ||
} | ||
}; | ||
|
||
#define MICRO_TILE_DIM (8) | ||
#define MICRO_TILE_SZ (1024) | ||
#define TEXELS_PER_ELEMENT (1) | ||
#define BPP (32) | ||
|
||
void main() { | ||
uint x = gl_GlobalInvocationID.x % info.pitch; | ||
uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height; | ||
uint z = gl_GlobalInvocationID.x / (info.pitch * info.height); | ||
|
||
uint col = bitfieldExtract(x, 0, 3); | ||
uint row = bitfieldExtract(y, 0, 3); | ||
uint lut = bitfieldExtract(z, 0, 2); | ||
uint idx = lut_32bpp[lut][col + row * MICRO_TILE_DIM]; | ||
|
||
uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; | ||
uint tile_row = y / MICRO_TILE_DIM; | ||
uint tile_column = x / MICRO_TILE_DIM; | ||
uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ; | ||
uint offs = slice_offs + tile_offs + (idx * BPP / 8); | ||
|
||
uint p0 = in_data[offs >> 2u]; | ||
out_data[gl_GlobalInvocationID.x] = p0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project | ||
// SPDX-License-Identifier: GPL-2.0-or-later | ||
|
||
#version 450 | ||
|
||
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; | ||
|
||
layout(std430, binding = 0) buffer input_buf { | ||
uint in_data[]; | ||
}; | ||
layout(std430, binding = 1) buffer output_buf { | ||
uint out_data[]; | ||
}; | ||
|
||
layout(push_constant) uniform image_info { | ||
uint num_levels; | ||
uint pitch; | ||
uint height; | ||
uint c0; | ||
uint c1; | ||
} info; | ||
|
||
const uint lut_64bpp[][64] = { | ||
{ | ||
0x00, 0x01, 0x08, 0x09, 0x40, 0x41, 0x48, 0x49, | ||
0x02, 0x03, 0x0a, 0x0b, 0x42, 0x43, 0x4a, 0x4b, | ||
0x10, 0x11, 0x18, 0x19, 0x50, 0x51, 0x58, 0x59, | ||
0x12, 0x13, 0x1a, 0x1b, 0x52, 0x53, 0x5a, 0x5b, | ||
0x80, 0x81, 0x88, 0x89, 0xc0, 0xc1, 0xc8, 0xc9, | ||
0x82, 0x83, 0x8a, 0x8b, 0xc2, 0xc3, 0xca, 0xcb, | ||
0x90, 0x91, 0x98, 0x99, 0xd0, 0xd1, 0xd8, 0xd9, | ||
0x92, 0x93, 0x9a, 0x9b, 0xd2, 0xd3, 0xda, 0xdb, | ||
}, | ||
{ | ||
0x04, 0x05, 0x0c, 0x0d, 0x44, 0x45, 0x4c, 0x4d, | ||
0x06, 0x07, 0x0e, 0x0f, 0x46, 0x47, 0x4e, 0x4f, | ||
0x14, 0x15, 0x1c, 0x1d, 0x54, 0x55, 0x5c, 0x5d, | ||
0x16, 0x17, 0x1e, 0x1f, 0x56, 0x57, 0x5e, 0x5f, | ||
0x84, 0x85, 0x8c, 0x8d, 0xc4, 0xc5, 0xcc, 0xcd, | ||
0x86, 0x87, 0x8e, 0x8f, 0xc6, 0xc7, 0xce, 0xcf, | ||
0x94, 0x95, 0x9c, 0x9d, 0xd4, 0xd5, 0xdc, 0xdd, | ||
0x96, 0x97, 0x9e, 0x9f, 0xd6, 0xd7, 0xde, 0xdf, | ||
}, | ||
{ | ||
0x20, 0x21, 0x28, 0x29, 0x60, 0x61, 0x68, 0x69, | ||
0x22, 0x23, 0x2a, 0x2b, 0x62, 0x63, 0x6a, 0x6b, | ||
0x30, 0x31, 0x38, 0x39, 0x70, 0x71, 0x78, 0x79, | ||
0x32, 0x33, 0x3a, 0x3b, 0x72, 0x73, 0x7a, 0x7b, | ||
0xa0, 0xa1, 0xa8, 0xa9, 0xe0, 0xe1, 0xe8, 0xe9, | ||
0xa2, 0xa3, 0xaa, 0xab, 0xe2, 0xe3, 0xea, 0xeb, | ||
0xb0, 0xb1, 0xb8, 0xb9, 0xf0, 0xf1, 0xf8, 0xf9, | ||
0xb2, 0xb3, 0xba, 0xbb, 0xf2, 0xf3, 0xfa, 0xfb, | ||
}, | ||
{ | ||
0x24, 0x25, 0x2c, 0x2d, 0x64, 0x65, 0x6c, 0x6d, | ||
0x26, 0x27, 0x2e, 0x2f, 0x66, 0x67, 0x6e, 0x6f, | ||
0x34, 0x35, 0x3c, 0x3d, 0x74, 0x75, 0x7c, 0x7d, | ||
0x36, 0x37, 0x3e, 0x3f, 0x76, 0x77, 0x7e, 0x7f, | ||
0xa4, 0xa5, 0xac, 0xad, 0xe4, 0xe5, 0xec, 0xed, | ||
0xa6, 0xa7, 0xae, 0xaf, 0xe6, 0xe7, 0xee, 0xef, | ||
0xb4, 0xb5, 0xbc, 0xbd, 0xf4, 0xf5, 0xfc, 0xfd, | ||
0xb6, 0xb7, 0xbe, 0xbf, 0xf6, 0xf7, 0xfe, 0xff, | ||
}, | ||
}; | ||
|
||
#define MICRO_TILE_DIM (8) | ||
#define MICRO_TILE_SZ (2048) | ||
#define TEXELS_PER_ELEMENT (1) | ||
#define BPP (64) | ||
|
||
void main() { | ||
uint x = gl_GlobalInvocationID.x % info.pitch; | ||
uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height; | ||
uint z = gl_GlobalInvocationID.x / (info.pitch * info.height); | ||
|
||
uint col = bitfieldExtract(x, 0, 3); | ||
uint row = bitfieldExtract(y, 0, 3); | ||
uint lut = bitfieldExtract(z, 0, 2); | ||
uint idx = lut_64bpp[lut][col + row * MICRO_TILE_DIM]; | ||
|
||
uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; | ||
uint tile_row = y / MICRO_TILE_DIM; | ||
uint tile_column = x / MICRO_TILE_DIM; | ||
uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ; | ||
uint offs = slice_offs + tile_offs + (idx * BPP / 8); | ||
|
||
uint p0 = in_data[(offs >> 2) + 0]; | ||
uint p1 = in_data[(offs >> 2) + 1]; | ||
out_data[2 * gl_GlobalInvocationID.x + 0] = p0; | ||
out_data[2 * gl_GlobalInvocationID.x + 1] = p1; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.