From 0a790c3019f24dcf905c1c6245827525a11f2a44 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 11 Jul 2019 15:31:50 +0100 Subject: [PATCH] nir/algebraic: add a few masking-before-unpack optimizations Helps some Dawn of War 3 and F1 2017 shaders with ACO: Totals from affected shaders: SGPRS: 2136 -> 2128 (-0.37 %) VGPRS: 1624 -> 1628 (0.25 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 168068 -> 164332 (-2.22 %) bytes LDS: 44 -> 44 (0.00 %) blocks Max Waves: 222 -> 221 (-0.45 %) Wait states: 0 -> 0 (0.00 %) Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Reviewed-by: Eric Anholt --- src/compiler/nir/nir_opt_algebraic.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 40f718e..26e2fc3 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -882,7 +882,15 @@ optimizations.extend([ (('ishr', 'a@16', 8), ('extract_i8', a, 1), '!options->lower_extract_byte'), (('ishr', 'a@32', 24), ('extract_i8', a, 3), '!options->lower_extract_byte'), (('ishr', 'a@64', 56), ('extract_i8', a, 7), '!options->lower_extract_byte'), - (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte') + (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte'), + + # Useless masking before unpacking + (('unpack_half_2x16_split_x', ('iand', a, 0xffff)), ('unpack_half_2x16_split_x', a)), + (('unpack_32_2x16_split_x', ('iand', a, 0xffff)), ('unpack_32_2x16_split_x', a)), + (('unpack_64_2x32_split_x', ('iand', a, 0xffffffff)), ('unpack_64_2x32_split_x', a)), + (('unpack_half_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_half_2x16_split_y', a)), + (('unpack_32_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_32_2x16_split_y', a)), + (('unpack_64_2x32_split_y', ('iand', a, 0xffffffff00000000)), ('unpack_64_2x32_split_y', a)), ]) # After the ('extract_u8', a, 0) pattern, above, triggers, there will be -- 2.7.4