Skip to content

Commit 01842a3

Browse files
GS/HW: Allow blending on normal shuffles
1 parent 4cd385d commit 01842a3

File tree

6 files changed

+271
-151
lines changed

6 files changed

+271
-151
lines changed

bin/resources/shaders/dx11/tfx.fx

+76-42
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,25 @@ float4 ps_color(PS_INPUT input)
742742
float4 T = sample_color(st, input.t.w);
743743
#endif
744744

745+
if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC)
746+
{
747+
uint4 denorm_c_before = uint4(T);
748+
if (PS_READ_BA)
749+
{
750+
T.r = float((denorm_c_before.b << 3) & 0xF8);
751+
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
752+
T.b = float((denorm_c_before.a << 1) & 0xF8);
753+
T.a = float(denorm_c_before.a & 0x80);
754+
}
755+
else
756+
{
757+
T.r = float((denorm_c_before.r << 3) & 0xF8);
758+
T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0));
759+
T.b = float((denorm_c_before.g << 1) & 0xF8);
760+
T.a = float(denorm_c_before.g & 0x80);
761+
}
762+
}
763+
745764
float4 C = tfx(T, input.c);
746765

747766
atst(C);
@@ -925,48 +944,6 @@ PS_OUTPUT ps_main(PS_INPUT input)
925944
discard;
926945
}
927946

928-
if (PS_SHUFFLE)
929-
{
930-
uint4 denorm_c = uint4(C);
931-
uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f);
932-
933-
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
934-
if (PS_SHUFFLE_SAME)
935-
{
936-
if (PS_READ_BA)
937-
C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
938-
else
939-
C.ga = C.rg;
940-
}
941-
// Copy of a 16bit source in to this target
942-
else if (PS_READ16_SRC)
943-
{
944-
C.rb = (float2)float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5));
945-
if (denorm_c.a & 0x80u)
946-
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u));
947-
else
948-
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u));
949-
}
950-
// Write RB part. Mask will take care of the correct destination
951-
else if (PS_READ_BA)
952-
{
953-
C.rb = C.bb;
954-
if (denorm_c.a & 0x80u)
955-
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
956-
else
957-
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
958-
}
959-
else
960-
{
961-
C.rb = C.rr;
962-
if (denorm_c.g & 0x80u)
963-
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
964-
965-
else
966-
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
967-
}
968-
}
969-
970947
// Must be done before alpha correction
971948

972949
// AA (Fixed one) will output a coverage of 1.0 as alpha
@@ -1023,6 +1000,63 @@ PS_OUTPUT ps_main(PS_INPUT input)
10231000

10241001
ps_blend(C, alpha_blend, input.p.xy);
10251002

1003+
if (PS_SHUFFLE)
1004+
{
1005+
if (!PS_SHUFFLE_SAME && !PS_READ16_SRC)
1006+
{
1007+
uint4 denorm_c_after = uint4(C);
1008+
if (PS_READ_BA)
1009+
{
1010+
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
1011+
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
1012+
}
1013+
else
1014+
{
1015+
C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
1016+
C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
1017+
}
1018+
}
1019+
1020+
uint4 denorm_c = uint4(C);
1021+
uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f);
1022+
1023+
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
1024+
if (PS_SHUFFLE_SAME)
1025+
{
1026+
if (PS_READ_BA)
1027+
C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
1028+
else
1029+
C.ga = C.rg;
1030+
}
1031+
// Copy of a 16bit source in to this target
1032+
else if (PS_READ16_SRC)
1033+
{
1034+
C.rb = (float2)float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5));
1035+
if (denorm_c.a & 0x80u)
1036+
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u));
1037+
else
1038+
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u));
1039+
}
1040+
// Write RB part. Mask will take care of the correct destination
1041+
else if (PS_READ_BA)
1042+
{
1043+
C.rb = C.bb;
1044+
if (denorm_c.a & 0x80u)
1045+
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
1046+
else
1047+
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
1048+
}
1049+
else
1050+
{
1051+
C.rb = C.rr;
1052+
if (denorm_c.g & 0x80u)
1053+
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
1054+
1055+
else
1056+
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
1057+
}
1058+
}
1059+
10261060
ps_dither(C.rgb, input.p.xy);
10271061

10281062
// Color clamp/wrap needs to be done after sw blending and dithering

bin/resources/shaders/opengl/tfx_fs.glsl

+64-37
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,21 @@ vec4 ps_color()
687687
vec4 T = sample_color(st);
688688
#endif
689689

690+
#if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC
691+
uvec4 denorm_c_before = uvec4(T);
692+
#if PS_READ_BA
693+
T.r = float((denorm_c_before.b << 3) & 0xF8);
694+
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
695+
T.b = float((denorm_c_before.a << 1) & 0xF8);
696+
T.a = float(denorm_c_before.a & 0x80);
697+
#else
698+
T.r = float((denorm_c_before.r << 3) & 0xF8);
699+
T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0));
700+
T.b = float((denorm_c_before.g << 1) & 0xF8);
701+
T.a = float(denorm_c_before.g & 0x80);
702+
#endif
703+
#endif
704+
690705
vec4 C = tfx(T, PSin.c);
691706

692707
atst(C);
@@ -937,7 +952,56 @@ void ps_main()
937952

938953
vec4 C = ps_color();
939954

955+
// Must be done before alpha correction
956+
957+
// AA (Fixed one) will output a coverage of 1.0 as alpha
958+
#if PS_FIXED_ONE_A
959+
C.a = 128.0f;
960+
#endif
961+
962+
#if SW_AD_TO_HW
963+
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
964+
vec4 alpha_blend = vec4(RT.a / 128.0f);
965+
#else
966+
vec4 alpha_blend = vec4(C.a / 128.0f);
967+
#endif
968+
969+
// Correct the ALPHA value based on the output format
970+
#if (PS_DST_FMT == FMT_16)
971+
float A_one = 128.0f; // alpha output will be 0x80
972+
C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;
973+
#elif (PS_DST_FMT == FMT_32) && (PS_FBA != 0)
974+
if(C.a < 128.0f) C.a += 128.0f;
975+
#endif
976+
977+
// Get first primitive that will write a failling alpha value
978+
#if PS_DATE == 1
979+
// DATM == 0
980+
// Pixel with alpha equal to 1 will failed (128-255)
981+
SV_Target0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
982+
return;
983+
#elif PS_DATE == 2
984+
// DATM == 1
985+
// Pixel with alpha equal to 0 will failed (0-127)
986+
SV_Target0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
987+
return;
988+
#endif
989+
990+
ps_blend(C, alpha_blend);
991+
992+
940993
#if PS_SHUFFLE
994+
#if !PS_SHUFFLE_SAME && !PS_READ16_SRC
995+
uvec4 denorm_c_after = uvec4(C);
996+
#if PS_READ_BA
997+
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
998+
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
999+
#else
1000+
C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
1001+
C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
1002+
#endif
1003+
#endif
1004+
9411005
uvec4 denorm_c = uvec4(C);
9421006
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
9431007

@@ -991,43 +1055,6 @@ void ps_main()
9911055
#endif // PS_SHUFFLE_SAME
9921056
#endif // PS_SHUFFLE
9931057

994-
// Must be done before alpha correction
995-
996-
// AA (Fixed one) will output a coverage of 1.0 as alpha
997-
#if PS_FIXED_ONE_A
998-
C.a = 128.0f;
999-
#endif
1000-
1001-
#if SW_AD_TO_HW
1002-
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
1003-
vec4 alpha_blend = vec4(RT.a / 128.0f);
1004-
#else
1005-
vec4 alpha_blend = vec4(C.a / 128.0f);
1006-
#endif
1007-
1008-
// Correct the ALPHA value based on the output format
1009-
#if (PS_DST_FMT == FMT_16)
1010-
float A_one = 128.0f; // alpha output will be 0x80
1011-
C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;
1012-
#elif (PS_DST_FMT == FMT_32) && (PS_FBA != 0)
1013-
if(C.a < 128.0f) C.a += 128.0f;
1014-
#endif
1015-
1016-
// Get first primitive that will write a failling alpha value
1017-
#if PS_DATE == 1
1018-
// DATM == 0
1019-
// Pixel with alpha equal to 1 will failed (128-255)
1020-
SV_Target0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
1021-
return;
1022-
#elif PS_DATE == 2
1023-
// DATM == 1
1024-
// Pixel with alpha equal to 0 will failed (0-127)
1025-
SV_Target0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
1026-
return;
1027-
#endif
1028-
1029-
ps_blend(C, alpha_blend);
1030-
10311058
ps_dither(C.rgb);
10321059

10331060
// Color clamp/wrap needs to be done after sw blending and dithering

bin/resources/shaders/vulkan/tfx.glsl

+60-35
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,21 @@ vec4 ps_color()
933933
vec4 T = sample_color(st);
934934
#endif
935935

936+
#if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC
937+
uvec4 denorm_c_before = uvec4(T);
938+
#if PS_READ_BA
939+
T.r = float((denorm_c_before.b << 3) & 0xF8);
940+
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
941+
T.b = float((denorm_c_before.a << 1) & 0xF8);
942+
T.a = float(denorm_c_before.a & 0x80);
943+
#else
944+
T.r = float((denorm_c_before.r << 3) & 0xF8);
945+
T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0));
946+
T.b = float((denorm_c_before.g << 1) & 0xF8);
947+
T.a = float(denorm_c_before.g & 0x80);
948+
#endif
949+
#endif
950+
936951
vec4 C = tfx(T, vsIn.c);
937952

938953
atst(C);
@@ -1184,40 +1199,6 @@ void main()
11841199

11851200
vec4 C = ps_color();
11861201

1187-
#if PS_SHUFFLE
1188-
uvec4 denorm_c = uvec4(C);
1189-
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
1190-
1191-
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
1192-
#if PS_SHUFFLE_SAME
1193-
#if (PS_READ_BA)
1194-
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
1195-
#else
1196-
C.ga = C.rg;
1197-
#endif
1198-
// Copy of a 16bit source in to this target
1199-
#elif PS_READ16_SRC
1200-
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
1201-
if ((denorm_c.a & 0x80u) != 0u)
1202-
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
1203-
else
1204-
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
1205-
// Write RB part. Mask will take care of the correct destination
1206-
#elif PS_READ_BA
1207-
C.rb = C.bb;
1208-
if ((denorm_c.a & 0x80u) != 0u)
1209-
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
1210-
else
1211-
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
1212-
#else
1213-
C.rb = C.rr;
1214-
if ((denorm_c.g & 0x80u) != 0u)
1215-
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
1216-
else
1217-
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
1218-
#endif // PS_SHUFFLE_SAME
1219-
#endif // PS_SHUFFLE
1220-
12211202
// Must be done before alpha correction
12221203

12231204
// AA (Fixed one) will output a coverage of 1.0 as alpha
@@ -1254,9 +1235,53 @@ void main()
12541235
o_col0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
12551236

12561237
#else
1257-
12581238
ps_blend(C, alpha_blend);
12591239

1240+
#if PS_SHUFFLE
1241+
#if !PS_SHUFFLE_SAME && !PS_READ16_SRC
1242+
uvec4 denorm_c_after = uvec4(C);
1243+
#if PS_READ_BA
1244+
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
1245+
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
1246+
#else
1247+
C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
1248+
C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
1249+
#endif
1250+
#endif
1251+
1252+
uvec4 denorm_c = uvec4(C);
1253+
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
1254+
1255+
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
1256+
#if PS_SHUFFLE_SAME
1257+
#if (PS_READ_BA)
1258+
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
1259+
#else
1260+
C.ga = C.rg;
1261+
#endif
1262+
// Copy of a 16bit source in to this target
1263+
#elif PS_READ16_SRC
1264+
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
1265+
if ((denorm_c.a & 0x80u) != 0u)
1266+
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
1267+
else
1268+
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
1269+
// Write RB part. Mask will take care of the correct destination
1270+
#elif PS_READ_BA
1271+
C.rb = C.bb;
1272+
if ((denorm_c.a & 0x80u) != 0u)
1273+
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
1274+
else
1275+
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
1276+
#else
1277+
C.rb = C.rr;
1278+
if ((denorm_c.g & 0x80u) != 0u)
1279+
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
1280+
else
1281+
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
1282+
#endif // PS_SHUFFLE_SAME
1283+
#endif // PS_SHUFFLE
1284+
12601285
ps_dither(C.rgb);
12611286

12621287
// Color clamp/wrap needs to be done after sw blending and dithering

pcsx2/GS/Renderers/HW/GSRendererHW.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -5169,7 +5169,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
51695169
}
51705170

51715171
bool blending_alpha_pass = false;
5172-
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && (m_conf.colormask.wrgba & 0x7))
5172+
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
51735173
{
51745174
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass);
51755175
}

0 commit comments

Comments
 (0)