Skip to content

Commit

Permalink
- Fixed [branch] annotations breaking tex2D() calls in DX9
Browse files Browse the repository at this point in the history
- Swapped uncropping back to old algorithm
- Added advanced preprocessor setting to swap uncropping to the faster algorithm, just in case
  • Loading branch information
akgunter committed Jan 10, 2023
1 parent faf4ee7 commit 14ba30a
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 50 deletions.
7 changes: 7 additions & 0 deletions reshade-shaders/Shaders/crt-royale/lib/bind-shader-params.fxh
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@
#endif

#if ADVANCED_SETTINGS == 1
// Using vertex uncropping is marginally faster, but vulnerable to DX9 weirdness.
// Most users will likely prefer the slower algorithm.
#ifndef USE_VERTEX_UNCROPPING
#define USE_VERTEX_UNCROPPING 0
#endif

#ifndef NUM_BEAMDIST_COLOR_SAMPLES
#define NUM_BEAMDIST_COLOR_SAMPLES 1024
#endif
Expand All @@ -85,6 +91,7 @@
static const uint num_beamdist_dist_samples = uint(NUM_BEAMDIST_DIST_SAMPLES);
static const float bloomapprox_downsizing_factor = float(BLOOMAPPROX_DOWNSIZING_FACTOR);
#else
static const uint USE_VERTEX_CROPPING = 0;
static const uint num_beamdist_color_samples = 1024;
static const uint num_beamdist_dist_samples = 120;
static const float bloomapprox_downsizing_factor = 4.0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ float3 opaque_linear_downsample(
float3 acc = 0;
for(int i = 0; i < total_num_samples; i++) {
const float2 coord = coord_left + i * delta_uv;
acc += tex2Dlod(tex, float4(coord, 0, 0)).rgb;
acc += tex2D_nograd(tex, coord).rgb;
}

return acc / total_num_samples;
Expand Down Expand Up @@ -64,7 +64,7 @@ float3 opaque_lanczos_downsample(
num_sinc_lobes * sin(pi*sinc_x) * sin(pi*sinc_x/num_sinc_lobes) / (pi*pi * sinc_x*sinc_x) :
weight_at_center;

acc += weight * tex2Dlod(tex, float4(coord, 0, 0)).rgb;
acc += weight * tex2D_nograd(tex, coord).rgb;
w_sum += weight;
}

Expand Down
5 changes: 4 additions & 1 deletion reshade-shaders/Shaders/crt-royale/lib/gamma-management.fxh
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,11 @@ float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float
// { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); }

// tex2Dlod:
float4 tex2Dlod_linearize(const sampler2D tex, const float2 tex_coords, const float gamma)
{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0), gamma); }

float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const float gamma)
{ return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), 0.0), gamma); }
{ return decode_input(tex2Dlod(tex, float4(tex_coords.xy, 0, 0), 0.0), gamma); }

// float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off, const float gamma)
// { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
// IN THE SOFTWARE.


float4 tex2D_nograd(sampler2D tex, float2 tex_coords)
{
return tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0);
}

// ReShade 4 does not permit the use of functions or the ternary operator
// outside of a function definition. This is a problem for this port
// because the original crt-royale shader makes heavy use of these
Expand Down
119 changes: 95 additions & 24 deletions reshade-shaders/Shaders/crt-royale/shaders/content-box.fxh
Original file line number Diff line number Diff line change
Expand Up @@ -33,36 +33,107 @@ void contentCropVS(
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0
) {
texcoord.x = (id & 1) ? content_right : content_left;
texcoord.y = (id & 2) ? content_lower : content_upper;
#if _DX9_ACTIVE
texcoord.x = (id == 1 || id == 3) ? content_right : content_left;
texcoord.y = (id > 1) ? content_lower : content_upper;

position.x = (id & 1) ? 1 : -1;
position.y = (id & 2) ? -1 : 1;
position.zw = 1;
position.x = (id == 1 || id == 3) ? 1 : -1;
position.y = (id > 1) ? -1 : 1;
position.zw = 1;
#else
texcoord.x = (id & 1) ? content_right : content_left;
texcoord.y = (id & 2) ? content_lower : content_upper;

position.x = (id & 1) ? 1 : -1;
position.y = (id & 2) ? -1 : 1;
position.zw = 1;
#endif
}

void contentUncropVS(
in uint id : SV_VertexID,
#if USE_VERTEX_UNCROPPING
/*
* Using the vertex shader for uncropping can save about 0.1ms in some apps.
* However, some apps like SNES9X w/ DX9 don't trigger a refresh of the entire screen,
* which in turn causes the ReShade UI to "stick around" after it's closed.
*
* The slower algorithm forces the entire screen to refresh, which forces the
* area outside the content box to be black. I assume most users will prefer
* the results of the slower algorithm and won't notice the 0.1ms. Users who
* need that 0.1ms can use a preprocessor def to recover that time.
*/
void contentUncropVS(
in uint id : SV_VertexID,

out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0
) {
#if _DX9_ACTIVE
texcoord.x = id == 1 || id == 3;
texcoord.y = id < 2;

position.x = (id == 1 || id == 3) ? content_scale.x : -content_scale.x;
position.y = (id > 1) ? content_scale.y : -content_scale.y;
position.zw = 1;
#else
texcoord.x = id & 1;
texcoord.y = !(id & 2);

position.x = (id & 1) ? content_scale.x : -content_scale.x;
position.y = (id & 2) ? content_scale.y : -content_scale.y;
position.zw = 1;
#endif
}

out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0
) {
texcoord.x = id & 1;
texcoord.y = !(id & 2);

position.x = (id & 1) ? content_scale.x : -content_scale.x;
position.y = (id & 2) ? content_scale.y : -content_scale.y;
position.zw = 1;
}
void uncropContentPixelShader(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,

out float4 color : SV_Target
) {
color = tex2D(samplerGeometry, texcoord);
}
#else
void contentUncropVS(
in uint id : SV_VertexID,

void uncropContentPixelShader(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0
) {
// TODO: There's probably a better way to code this.
// I'll figure it out later.
#if _DX9_ACTIVE
texcoord.x = id == 1 || id == 3;
texcoord.y = id < 2;

position.x = (id == 1 || id == 3) ? 1 : -1;
position.y = (id > 1) ? 1 : -1;
position.zw = 1;
#else
texcoord.x = id & 1;
texcoord.y = !(id & 2);

position.x = (id & 1) ? 1 : -1;
position.y = (id & 2) ? 1 : -1;
position.zw = 1;
#endif
}

out float4 color : SV_Target
) {
color = tex2D(samplerGeometry, texcoord);
}
void uncropContentPixelShader(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,

out float4 color : SV_Target
) {
const bool is_in_boundary = float(
texcoord.x >= content_left && texcoord.x <= content_right &&
texcoord.y >= content_upper && texcoord.y <= content_lower
);
const float2 texcoord_uncropped = ((texcoord - content_offset) * buffer_size + 0) / content_size;

const float4 raw_color = tex2D(samplerGeometry, texcoord_uncropped);
color = float4(is_in_boundary * raw_color.rgb, raw_color.a);
}
#endif


#if CONTENT_BOX_VISIBLE
Expand Down
10 changes: 5 additions & 5 deletions reshade-shaders/Shaders/crt-royale/shaders/deinterlace.fxh
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ void deinterlacePS(
// const float cur_scanline_idx = get_curr_scanline_idx(texcoord.y, content_size.y);
// const float wrong_field = curr_line_is_wrong_field(cur_scanline_idx);

const float4 cur_line_color = tex2D(samplerBeamConvergence, texcoord);
const float4 cur_line_prev_color = tex2D(samplerFreezeFrame, texcoord);
const float4 cur_line_color = tex2D_nograd(samplerBeamConvergence, texcoord);
const float4 cur_line_prev_color = tex2D_nograd(samplerFreezeFrame, texcoord);

const float4 avg_color = (cur_line_color + cur_line_prev_color) / 2.0;

Expand All @@ -121,16 +121,16 @@ void deinterlacePS(
const float2 curr_offset = lerp(0, raw_offset, interpolation_data.wrong_field);
const float2 prev_offset = lerp(raw_offset, 0, interpolation_data.wrong_field);

const float4 cur_line_color = tex2D(samplerBeamConvergence, texcoord + curr_offset);
const float4 prev_line_color = tex2D(samplerFreezeFrame, texcoord + prev_offset);
const float4 cur_line_color = tex2D_nograd(samplerBeamConvergence, texcoord + curr_offset);
const float4 prev_line_color = tex2D_nograd(samplerFreezeFrame, texcoord + prev_offset);

const float4 avg_color = (cur_line_color + prev_line_color) / 2.0;
const float4 raw_out_color = lerp(cur_line_color, avg_color, interpolation_data.wrong_field);
color = encode_output(raw_out_color, deinterlacing_blend_gamma);
}
// No temporal blending
else {
color = tex2D(samplerBeamConvergence, texcoord);
color = tex2D_nograd(samplerBeamConvergence, texcoord);
}
}

Expand Down
32 changes: 16 additions & 16 deletions reshade-shaders/Shaders/crt-royale/shaders/electron-beams.fxh
Original file line number Diff line number Diff line change
Expand Up @@ -257,49 +257,49 @@ void simulateEletronBeamsPS(

[branch]
if (beam_shape_mode < 3) {
const float4 scanline_color = tex2D_linearize(
const float4 scanline_color = tex2Dlod_linearize(
source_sampler,
texcoord_uncropped,
get_input_gamma()
);

const float beam_strength_r = tex2D(samplerBeamDist, float2(scanline_color.r, ypos)).x;
const float beam_strength_g = tex2D(samplerBeamDist, float2(scanline_color.g, ypos)).x;
const float beam_strength_b = tex2D(samplerBeamDist, float2(scanline_color.b, ypos)).x;
const float beam_strength_r = tex2D_nograd(samplerBeamDist, float2(scanline_color.r, ypos)).x;
const float beam_strength_g = tex2D_nograd(samplerBeamDist, float2(scanline_color.g, ypos)).x;
const float beam_strength_b = tex2D_nograd(samplerBeamDist, float2(scanline_color.b, ypos)).x;
const float4 beam_strength = float4(beam_strength_r, beam_strength_g, beam_strength_b, 1);

color = beam_strength;
}
else {
const float2 offset = float2(0, scanline_thickness) * (1 + enable_interlacing) * rcp(content_size);

const float4 curr_scanline_color = tex2D_linearize(
const float4 curr_scanline_color = tex2Dlod_linearize(
source_sampler,
texcoord_uncropped,
get_input_gamma()
);
const float4 upper_scanline_color = tex2D_linearize(
const float4 upper_scanline_color = tex2Dlod_linearize(
source_sampler,
texcoord_uncropped - offset,
get_input_gamma()
);
const float4 lower_scanline_color = tex2D_linearize(
const float4 lower_scanline_color = tex2Dlod_linearize(
source_sampler,
texcoord_uncropped + offset,
get_input_gamma()
);

const float curr_beam_strength_r = tex2D(samplerBeamDist, float2(curr_scanline_color.r, ypos)).x;
const float curr_beam_strength_g = tex2D(samplerBeamDist, float2(curr_scanline_color.g, ypos)).x;
const float curr_beam_strength_b = tex2D(samplerBeamDist, float2(curr_scanline_color.b, ypos)).x;
const float curr_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.r, ypos)).x;
const float curr_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.g, ypos)).x;
const float curr_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.b, ypos)).x;

const float upper_beam_strength_r = tex2D(samplerBeamDist, float2(upper_scanline_color.r, ypos)).y;
const float upper_beam_strength_g = tex2D(samplerBeamDist, float2(upper_scanline_color.g, ypos)).y;
const float upper_beam_strength_b = tex2D(samplerBeamDist, float2(upper_scanline_color.b, ypos)).y;
const float upper_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.r, ypos)).y;
const float upper_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.g, ypos)).y;
const float upper_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.b, ypos)).y;

const float lower_beam_strength_r = tex2D(samplerBeamDist, float2(lower_scanline_color.r, ypos)).z;
const float lower_beam_strength_g = tex2D(samplerBeamDist, float2(lower_scanline_color.g, ypos)).z;
const float lower_beam_strength_b = tex2D(samplerBeamDist, float2(lower_scanline_color.b, ypos)).z;
const float lower_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.r, ypos)).z;
const float lower_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.g, ypos)).z;
const float lower_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.b, ypos)).z;

color = float4(
curr_beam_strength_r + upper_beam_strength_r + lower_beam_strength_r,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,6 @@ void geometryPS(
// Sample the input with antialiasing (due to sharp phosphors, etc.):
raw_color = tex2Daa(samplerBloomHorizontal, tex_uv, pixel_to_tex_uv, float(frame_count), get_intermediate_gamma());
}

else if(antialias_level > 0.5 && need_subpixel_aa)
{
// Sample at each subpixel location:
Expand Down
2 changes: 1 addition & 1 deletion reshade-shaders/Shaders/crt-royale/version-number.fxh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

#define MAJOR_VERSION 2
#define MINOR_VERSION 0
#define PATCH_VERSION 0
#define PATCH_VERSION 1

// Yes, both sibling preprocessor functions are necessary.
// Don't "simplify" this, or the substitution won't work.
Expand Down

0 comments on commit 14ba30a

Please sign in to comment.