From 14ba30acc73f6e32d7098855dbea091cdc54e1db Mon Sep 17 00:00:00 2001 From: Alex Gunter Date: Mon, 9 Jan 2023 23:17:15 -0600 Subject: [PATCH] - Fixed [branch] annotations breaking tex2D() calls in DX9 - Swapped uncropping back to old algorithm - Added advanced preprocessor setting to swap uncropping to the faster algorithm, just in case --- .../crt-royale/lib/bind-shader-params.fxh | 7 ++ .../crt-royale/lib/downsampling-functions.fxh | 4 +- .../crt-royale/lib/gamma-management.fxh | 5 +- .../lib/helper-functions-and-macros.fxh | 5 + .../crt-royale/shaders/content-box.fxh | 119 ++++++++++++++---- .../crt-royale/shaders/deinterlace.fxh | 10 +- .../crt-royale/shaders/electron-beams.fxh | 32 ++--- .../shaders/geometry-aa-last-pass.fxh | 1 - .../Shaders/crt-royale/version-number.fxh | 2 +- 9 files changed, 135 insertions(+), 50 deletions(-) diff --git a/reshade-shaders/Shaders/crt-royale/lib/bind-shader-params.fxh b/reshade-shaders/Shaders/crt-royale/lib/bind-shader-params.fxh index 3256d9c..9e61381 100644 --- a/reshade-shaders/Shaders/crt-royale/lib/bind-shader-params.fxh +++ b/reshade-shaders/Shaders/crt-royale/lib/bind-shader-params.fxh @@ -66,6 +66,12 @@ #endif #if ADVANCED_SETTINGS == 1 + // Using vertex uncropping is marginally faster, but vulnerable to DX9 weirdness. + // Most users will likely prefer the slower algorithm. + #ifndef USE_VERTEX_UNCROPPING + #define USE_VERTEX_UNCROPPING 0 + #endif + #ifndef NUM_BEAMDIST_COLOR_SAMPLES #define NUM_BEAMDIST_COLOR_SAMPLES 1024 #endif @@ -85,6 +91,7 @@ static const uint num_beamdist_dist_samples = uint(NUM_BEAMDIST_DIST_SAMPLES); static const float bloomapprox_downsizing_factor = float(BLOOMAPPROX_DOWNSIZING_FACTOR); #else + static const uint USE_VERTEX_CROPPING = 0; static const uint num_beamdist_color_samples = 1024; static const uint num_beamdist_dist_samples = 120; static const float bloomapprox_downsizing_factor = 4.0; diff --git a/reshade-shaders/Shaders/crt-royale/lib/downsampling-functions.fxh b/reshade-shaders/Shaders/crt-royale/lib/downsampling-functions.fxh index d36ef55..be39d2d 100644 --- a/reshade-shaders/Shaders/crt-royale/lib/downsampling-functions.fxh +++ b/reshade-shaders/Shaders/crt-royale/lib/downsampling-functions.fxh @@ -35,7 +35,7 @@ float3 opaque_linear_downsample( float3 acc = 0; for(int i = 0; i < total_num_samples; i++) { const float2 coord = coord_left + i * delta_uv; - acc += tex2Dlod(tex, float4(coord, 0, 0)).rgb; + acc += tex2D_nograd(tex, coord).rgb; } return acc / total_num_samples; @@ -64,7 +64,7 @@ float3 opaque_lanczos_downsample( num_sinc_lobes * sin(pi*sinc_x) * sin(pi*sinc_x/num_sinc_lobes) / (pi*pi * sinc_x*sinc_x) : weight_at_center; - acc += weight * tex2Dlod(tex, float4(coord, 0, 0)).rgb; + acc += weight * tex2D_nograd(tex, coord).rgb; w_sum += weight; } diff --git a/reshade-shaders/Shaders/crt-royale/lib/gamma-management.fxh b/reshade-shaders/Shaders/crt-royale/lib/gamma-management.fxh index 2a89e7e..ec41f49 100644 --- a/reshade-shaders/Shaders/crt-royale/lib/gamma-management.fxh +++ b/reshade-shaders/Shaders/crt-royale/lib/gamma-management.fxh @@ -213,8 +213,11 @@ float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float // { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); } // tex2Dlod: +float4 tex2Dlod_linearize(const sampler2D tex, const float2 tex_coords, const float gamma) +{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0), gamma); } + float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const float gamma) -{ return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), 0.0), gamma); } +{ return decode_input(tex2Dlod(tex, float4(tex_coords.xy, 0, 0), 0.0), gamma); } // float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off, const float gamma) // { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); } diff --git a/reshade-shaders/Shaders/crt-royale/lib/helper-functions-and-macros.fxh b/reshade-shaders/Shaders/crt-royale/lib/helper-functions-and-macros.fxh index 5ac2902..18de77a 100644 --- a/reshade-shaders/Shaders/crt-royale/lib/helper-functions-and-macros.fxh +++ b/reshade-shaders/Shaders/crt-royale/lib/helper-functions-and-macros.fxh @@ -24,6 +24,11 @@ // IN THE SOFTWARE. +float4 tex2D_nograd(sampler2D tex, float2 tex_coords) +{ + return tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0); +} + // ReShade 4 does not permit the use of functions or the ternary operator // outside of a function definition. This is a problem for this port // because the original crt-royale shader makes heavy use of these diff --git a/reshade-shaders/Shaders/crt-royale/shaders/content-box.fxh b/reshade-shaders/Shaders/crt-royale/shaders/content-box.fxh index ff2ab6c..7f0f6bd 100644 --- a/reshade-shaders/Shaders/crt-royale/shaders/content-box.fxh +++ b/reshade-shaders/Shaders/crt-royale/shaders/content-box.fxh @@ -33,36 +33,107 @@ void contentCropVS( out float4 position : SV_Position, out float2 texcoord : TEXCOORD0 ) { - texcoord.x = (id & 1) ? content_right : content_left; - texcoord.y = (id & 2) ? content_lower : content_upper; + #if _DX9_ACTIVE + texcoord.x = (id == 1 || id == 3) ? content_right : content_left; + texcoord.y = (id > 1) ? content_lower : content_upper; - position.x = (id & 1) ? 1 : -1; - position.y = (id & 2) ? -1 : 1; - position.zw = 1; + position.x = (id == 1 || id == 3) ? 1 : -1; + position.y = (id > 1) ? -1 : 1; + position.zw = 1; + #else + texcoord.x = (id & 1) ? content_right : content_left; + texcoord.y = (id & 2) ? content_lower : content_upper; + + position.x = (id & 1) ? 1 : -1; + position.y = (id & 2) ? -1 : 1; + position.zw = 1; + #endif } -void contentUncropVS( - in uint id : SV_VertexID, +#if USE_VERTEX_UNCROPPING +/* + * Using the vertex shader for uncropping can save about 0.1ms in some apps. + * However, some apps like SNES9X w/ DX9 don't trigger a refresh of the entire screen, + * which in turn causes the ReShade UI to "stick around" after it's closed. + * + * The slower algorithm forces the entire screen to refresh, which forces the + * area outside the content box to be black. I assume most users will prefer + * the results of the slower algorithm and won't notice the 0.1ms. Users who + * need that 0.1ms can use a preprocessor def to recover that time. + */ + void contentUncropVS( + in uint id : SV_VertexID, + + out float4 position : SV_Position, + out float2 texcoord : TEXCOORD0 + ) { + #if _DX9_ACTIVE + texcoord.x = id == 1 || id == 3; + texcoord.y = id < 2; + + position.x = (id == 1 || id == 3) ? content_scale.x : -content_scale.x; + position.y = (id > 1) ? content_scale.y : -content_scale.y; + position.zw = 1; + #else + texcoord.x = id & 1; + texcoord.y = !(id & 2); + + position.x = (id & 1) ? content_scale.x : -content_scale.x; + position.y = (id & 2) ? content_scale.y : -content_scale.y; + position.zw = 1; + #endif + } - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0 -) { - texcoord.x = id & 1; - texcoord.y = !(id & 2); - - position.x = (id & 1) ? content_scale.x : -content_scale.x; - position.y = (id & 2) ? content_scale.y : -content_scale.y; - position.zw = 1; -} + void uncropContentPixelShader( + in float4 pos : SV_Position, + in float2 texcoord : TEXCOORD0, + + out float4 color : SV_Target + ) { + color = tex2D(samplerGeometry, texcoord); + } +#else + void contentUncropVS( + in uint id : SV_VertexID, -void uncropContentPixelShader( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, + out float4 position : SV_Position, + out float2 texcoord : TEXCOORD0 + ) { + // TODO: There's probably a better way to code this. + // I'll figure it out later. + #if _DX9_ACTIVE + texcoord.x = id == 1 || id == 3; + texcoord.y = id < 2; + + position.x = (id == 1 || id == 3) ? 1 : -1; + position.y = (id > 1) ? 1 : -1; + position.zw = 1; + #else + texcoord.x = id & 1; + texcoord.y = !(id & 2); + + position.x = (id & 1) ? 1 : -1; + position.y = (id & 2) ? 1 : -1; + position.zw = 1; + #endif + } - out float4 color : SV_Target -) { - color = tex2D(samplerGeometry, texcoord); -} + void uncropContentPixelShader( + in float4 pos : SV_Position, + in float2 texcoord : TEXCOORD0, + + out float4 color : SV_Target + ) { + const bool is_in_boundary = float( + texcoord.x >= content_left && texcoord.x <= content_right && + texcoord.y >= content_upper && texcoord.y <= content_lower + ); + const float2 texcoord_uncropped = ((texcoord - content_offset) * buffer_size + 0) / content_size; + + const float4 raw_color = tex2D(samplerGeometry, texcoord_uncropped); + color = float4(is_in_boundary * raw_color.rgb, raw_color.a); + } +#endif #if CONTENT_BOX_VISIBLE diff --git a/reshade-shaders/Shaders/crt-royale/shaders/deinterlace.fxh b/reshade-shaders/Shaders/crt-royale/shaders/deinterlace.fxh index a699814..25807fb 100644 --- a/reshade-shaders/Shaders/crt-royale/shaders/deinterlace.fxh +++ b/reshade-shaders/Shaders/crt-royale/shaders/deinterlace.fxh @@ -100,8 +100,8 @@ void deinterlacePS( // const float cur_scanline_idx = get_curr_scanline_idx(texcoord.y, content_size.y); // const float wrong_field = curr_line_is_wrong_field(cur_scanline_idx); - const float4 cur_line_color = tex2D(samplerBeamConvergence, texcoord); - const float4 cur_line_prev_color = tex2D(samplerFreezeFrame, texcoord); + const float4 cur_line_color = tex2D_nograd(samplerBeamConvergence, texcoord); + const float4 cur_line_prev_color = tex2D_nograd(samplerFreezeFrame, texcoord); const float4 avg_color = (cur_line_color + cur_line_prev_color) / 2.0; @@ -121,8 +121,8 @@ void deinterlacePS( const float2 curr_offset = lerp(0, raw_offset, interpolation_data.wrong_field); const float2 prev_offset = lerp(raw_offset, 0, interpolation_data.wrong_field); - const float4 cur_line_color = tex2D(samplerBeamConvergence, texcoord + curr_offset); - const float4 prev_line_color = tex2D(samplerFreezeFrame, texcoord + prev_offset); + const float4 cur_line_color = tex2D_nograd(samplerBeamConvergence, texcoord + curr_offset); + const float4 prev_line_color = tex2D_nograd(samplerFreezeFrame, texcoord + prev_offset); const float4 avg_color = (cur_line_color + prev_line_color) / 2.0; const float4 raw_out_color = lerp(cur_line_color, avg_color, interpolation_data.wrong_field); @@ -130,7 +130,7 @@ void deinterlacePS( } // No temporal blending else { - color = tex2D(samplerBeamConvergence, texcoord); + color = tex2D_nograd(samplerBeamConvergence, texcoord); } } diff --git a/reshade-shaders/Shaders/crt-royale/shaders/electron-beams.fxh b/reshade-shaders/Shaders/crt-royale/shaders/electron-beams.fxh index c9036d2..2137d26 100644 --- a/reshade-shaders/Shaders/crt-royale/shaders/electron-beams.fxh +++ b/reshade-shaders/Shaders/crt-royale/shaders/electron-beams.fxh @@ -257,15 +257,15 @@ void simulateEletronBeamsPS( [branch] if (beam_shape_mode < 3) { - const float4 scanline_color = tex2D_linearize( + const float4 scanline_color = tex2Dlod_linearize( source_sampler, texcoord_uncropped, get_input_gamma() ); - const float beam_strength_r = tex2D(samplerBeamDist, float2(scanline_color.r, ypos)).x; - const float beam_strength_g = tex2D(samplerBeamDist, float2(scanline_color.g, ypos)).x; - const float beam_strength_b = tex2D(samplerBeamDist, float2(scanline_color.b, ypos)).x; + const float beam_strength_r = tex2D_nograd(samplerBeamDist, float2(scanline_color.r, ypos)).x; + const float beam_strength_g = tex2D_nograd(samplerBeamDist, float2(scanline_color.g, ypos)).x; + const float beam_strength_b = tex2D_nograd(samplerBeamDist, float2(scanline_color.b, ypos)).x; const float4 beam_strength = float4(beam_strength_r, beam_strength_g, beam_strength_b, 1); color = beam_strength; @@ -273,33 +273,33 @@ void simulateEletronBeamsPS( else { const float2 offset = float2(0, scanline_thickness) * (1 + enable_interlacing) * rcp(content_size); - const float4 curr_scanline_color = tex2D_linearize( + const float4 curr_scanline_color = tex2Dlod_linearize( source_sampler, texcoord_uncropped, get_input_gamma() ); - const float4 upper_scanline_color = tex2D_linearize( + const float4 upper_scanline_color = tex2Dlod_linearize( source_sampler, texcoord_uncropped - offset, get_input_gamma() ); - const float4 lower_scanline_color = tex2D_linearize( + const float4 lower_scanline_color = tex2Dlod_linearize( source_sampler, texcoord_uncropped + offset, get_input_gamma() ); - const float curr_beam_strength_r = tex2D(samplerBeamDist, float2(curr_scanline_color.r, ypos)).x; - const float curr_beam_strength_g = tex2D(samplerBeamDist, float2(curr_scanline_color.g, ypos)).x; - const float curr_beam_strength_b = tex2D(samplerBeamDist, float2(curr_scanline_color.b, ypos)).x; + const float curr_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.r, ypos)).x; + const float curr_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.g, ypos)).x; + const float curr_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.b, ypos)).x; - const float upper_beam_strength_r = tex2D(samplerBeamDist, float2(upper_scanline_color.r, ypos)).y; - const float upper_beam_strength_g = tex2D(samplerBeamDist, float2(upper_scanline_color.g, ypos)).y; - const float upper_beam_strength_b = tex2D(samplerBeamDist, float2(upper_scanline_color.b, ypos)).y; + const float upper_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.r, ypos)).y; + const float upper_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.g, ypos)).y; + const float upper_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.b, ypos)).y; - const float lower_beam_strength_r = tex2D(samplerBeamDist, float2(lower_scanline_color.r, ypos)).z; - const float lower_beam_strength_g = tex2D(samplerBeamDist, float2(lower_scanline_color.g, ypos)).z; - const float lower_beam_strength_b = tex2D(samplerBeamDist, float2(lower_scanline_color.b, ypos)).z; + const float lower_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.r, ypos)).z; + const float lower_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.g, ypos)).z; + const float lower_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.b, ypos)).z; color = float4( curr_beam_strength_r + upper_beam_strength_r + lower_beam_strength_r, diff --git a/reshade-shaders/Shaders/crt-royale/shaders/geometry-aa-last-pass.fxh b/reshade-shaders/Shaders/crt-royale/shaders/geometry-aa-last-pass.fxh index a7e8cdc..e167411 100644 --- a/reshade-shaders/Shaders/crt-royale/shaders/geometry-aa-last-pass.fxh +++ b/reshade-shaders/Shaders/crt-royale/shaders/geometry-aa-last-pass.fxh @@ -199,7 +199,6 @@ void geometryPS( // Sample the input with antialiasing (due to sharp phosphors, etc.): raw_color = tex2Daa(samplerBloomHorizontal, tex_uv, pixel_to_tex_uv, float(frame_count), get_intermediate_gamma()); } - else if(antialias_level > 0.5 && need_subpixel_aa) { // Sample at each subpixel location: diff --git a/reshade-shaders/Shaders/crt-royale/version-number.fxh b/reshade-shaders/Shaders/crt-royale/version-number.fxh index 5f220f3..34106ba 100644 --- a/reshade-shaders/Shaders/crt-royale/version-number.fxh +++ b/reshade-shaders/Shaders/crt-royale/version-number.fxh @@ -26,7 +26,7 @@ #define MAJOR_VERSION 2 #define MINOR_VERSION 0 -#define PATCH_VERSION 0 +#define PATCH_VERSION 1 // Yes, both sibling preprocessor functions are necessary. // Don't "simplify" this, or the substitution won't work.