From 2255375c4db6efb0024b06c14c4024c1ab76d0f5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marcin=20=C5=9Alusarz?= Date: Wed, 7 Dec 2022 15:31:58 +0100 Subject: [PATCH] nir: add nir_mod_analysis & its tests Reviewed-by: Caio Oliveira Part-of: --- src/compiler/nir/meson.build | 2 + src/compiler/nir/nir.h | 2 + src/compiler/nir/nir_mod_analysis.c | 184 ++++++++++++++ src/compiler/nir/tests/mod_analysis_tests.cpp | 345 ++++++++++++++++++++++++++ 4 files changed, 533 insertions(+) create mode 100644 src/compiler/nir/nir_mod_analysis.c create mode 100644 src/compiler/nir/tests/mod_analysis_tests.cpp diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index ab69747..4752728 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -221,6 +221,7 @@ files_libnir = files( 'nir_lower_uniforms_to_ubo.c', 'nir_lower_sysvals_to_varyings.c', 'nir_metadata.c', + 'nir_mod_analysis.c', 'nir_move_vec_src_uses_to_dest.c', 'nir_normalize_cubemap_coords.c', 'nir_opt_access.c', @@ -401,6 +402,7 @@ if with_tests 'tests/core_tests.cpp', 'tests/loop_analyze_tests.cpp', 'tests/lower_returns_tests.cpp', + 'tests/mod_analysis_tests.cpp', 'tests/negative_equal_tests.cpp', 'tests/opt_if_tests.cpp', 'tests/opt_shrink_vectors_tests.cpp', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c7559f9..f821efc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5837,6 +5837,8 @@ nir_function_impl *nir_shader_get_preamble(nir_shader *shader); bool nir_lower_point_smooth(nir_shader *shader); bool nir_lower_poly_line_smooth(nir_shader *shader, unsigned num_smooth_aa_sample); +bool nir_mod_analysis(nir_ssa_scalar val, nir_alu_type val_type, unsigned div, unsigned *mod); + #include "nir_inline_helpers.h" #ifdef __cplusplus diff --git a/src/compiler/nir/nir_mod_analysis.c b/src/compiler/nir/nir_mod_analysis.c new file mode 100644 index 0000000..680de3b --- /dev/null +++ b/src/compiler/nir/nir_mod_analysis.c @@ -0,0 +1,184 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +static nir_alu_type +nir_alu_src_type(const nir_alu_instr *instr, unsigned src) +{ + return nir_alu_type_get_base_type(nir_op_infos[instr->op].input_types[src]) | + nir_src_bit_size(instr->src[src].src); +} + +static nir_ssa_scalar +nir_alu_arg(const nir_alu_instr *alu, unsigned arg, unsigned comp) +{ + const nir_alu_src *src = &alu->src[arg]; + return nir_get_ssa_scalar(src->src.ssa, src->swizzle[comp]); +} + +/* Tries to determine the value of expression "val % div", assuming that val + * is interpreted as value of type "val_type". "div" must be a power of two. + * Returns true if it can statically tell the value of "val % div", false if not. + * Value of *mod is undefined if this function returned false. + * + * Tests are in mod_analysis_tests.cpp. + */ +bool +nir_mod_analysis(nir_ssa_scalar val, nir_alu_type val_type, unsigned div, unsigned *mod) +{ + if (div == 1) { + *mod = 0; + return true; + } + + assert(util_is_power_of_two_nonzero(div)); + + switch (val.def->parent_instr->type) { + case nir_instr_type_load_const: { + nir_load_const_instr *load = + nir_instr_as_load_const(val.def->parent_instr); + nir_alu_type base_type = nir_alu_type_get_base_type(val_type); + + if (base_type == nir_type_uint) { + assert(val.comp < load->def.num_components); + uint64_t ival = nir_const_value_as_uint(load->value[val.comp], + load->def.bit_size); + *mod = ival % div; + return true; + } else if (base_type == nir_type_int) { + assert(val.comp < load->def.num_components); + int64_t ival = nir_const_value_as_int(load->value[val.comp], + load->def.bit_size); + + /* whole analysis collapses the moment we allow negative values */ + if (ival < 0) + return false; + + *mod = ((uint64_t)ival) % div; + return true; + } + + break; + } + + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(val.def->parent_instr); + + if (alu->dest.dest.ssa.num_components != 1) + return false; + + switch (alu->op) { + case nir_op_ishr: { + if (nir_src_is_const(alu->src[1].src)) { + assert(alu->src[1].src.ssa->num_components == 1); + uint64_t shift = nir_src_as_uint(alu->src[1].src); + + if (util_last_bit(div) + shift > 32) + break; + + nir_alu_type type0 = nir_alu_src_type(alu, 0); + if (!nir_mod_analysis(nir_alu_arg(alu, 0, val.comp), type0, div << shift, mod)) + return false; + + *mod >>= shift; + return true; + } + break; + } + + case nir_op_iadd: { + unsigned mod0; + nir_alu_type type0 = nir_alu_src_type(alu, 0); + if (!nir_mod_analysis(nir_alu_arg(alu, 0, val.comp), type0, div, &mod0)) + return false; + + unsigned mod1; + nir_alu_type type1 = nir_alu_src_type(alu, 1); + if (!nir_mod_analysis(nir_alu_arg(alu, 1, val.comp), type1, div, &mod1)) + return false; + + *mod = (mod0 + mod1) % div; + return true; + } + + case nir_op_ishl: { + if (nir_src_is_const(alu->src[1].src)) { + assert(alu->src[1].src.ssa->num_components == 1); + uint64_t shift = nir_src_as_uint(alu->src[1].src); + + if ((div >> shift) == 0) { + *mod = 0; + return true; + } + nir_alu_type type0 = nir_alu_src_type(alu, 0); + return nir_mod_analysis(nir_alu_arg(alu, 0, val.comp), type0, div >> shift, mod); + } + break; + } + + case nir_op_imul_32x16: /* multiply 32-bits with low 16-bits */ + case nir_op_imul: { + unsigned mod0; + nir_alu_type type0 = nir_alu_src_type(alu, 0); + bool s1 = nir_mod_analysis(nir_alu_arg(alu, 0, val.comp), type0, div, &mod0); + + if (s1 && (mod0 == 0)) { + *mod = 0; + return true; + } + + /* if divider is larger than 2nd source max (interpreted) value + * then modulo of multiplication is unknown + */ + if (alu->op == nir_op_imul_32x16 && div > (1u << 16)) + return false; + + unsigned mod1; + nir_alu_type type1 = nir_alu_src_type(alu, 1); + bool s2 = nir_mod_analysis(nir_alu_arg(alu, 1, val.comp), type1, div, &mod1); + + if (s2 && (mod1 == 0)) { + *mod = 0; + return true; + } + + if (!s1 || !s2) + return false; + + *mod = (mod0 * mod1) % div; + return true; + } + + default: + break; + } + break; + } + + default: + break; + } + + return false; +} diff --git a/src/compiler/nir/tests/mod_analysis_tests.cpp b/src/compiler/nir/tests/mod_analysis_tests.cpp new file mode 100644 index 0000000..7f3f308 --- /dev/null +++ b/src/compiler/nir/tests/mod_analysis_tests.cpp @@ -0,0 +1,345 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "nir.h" +#include "nir_builder.h" +#include "util/u_math.h" + +static inline bool +nir_mod_analysis_comp0(nir_ssa_def *val, nir_alu_type val_type, unsigned div, unsigned *mod) +{ + return nir_mod_analysis(nir_get_ssa_scalar(val, 0), val_type, div, mod); +} + +class nir_mod_analysis_test : public ::testing::Test { +protected: + nir_mod_analysis_test(); + ~nir_mod_analysis_test(); + nir_ssa_def *nir_imul_vec2y(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1); + + nir_builder b; + nir_ssa_def *v[50]; + nir_ssa_def *invocation; +}; + +nir_mod_analysis_test::nir_mod_analysis_test() +{ + static const nir_shader_compiler_options options = { }; + b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, &options, + "mod analysis"); + for (int i = 0; i < 50; ++i) + v[i] = nir_imm_int(&b, i); + invocation = nir_load_local_invocation_index(&b); +} + +nir_mod_analysis_test::~nir_mod_analysis_test() +{ + ralloc_free(b.shader); +} + +/* returns src0 * src1.y */ +nir_ssa_def * +nir_mod_analysis_test::nir_imul_vec2y(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1) +{ + nir_alu_instr *instr = nir_alu_instr_create(b->shader, nir_op_imul); + + instr->src[0].src = nir_src_for_ssa(src0); + instr->src[1].src = nir_src_for_ssa(src1); + instr->src[1].swizzle[0] = 1; + + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, 1, 32, NULL); + instr->dest.write_mask = 1; + + nir_builder_instr_insert(b, &instr->instr); + return &instr->dest.dest.ssa; +} + +TEST_F(nir_mod_analysis_test, const_val) +{ + /* const % const_mod should be always known */ + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + for (int cnst = 0; cnst < 10; ++cnst) { + unsigned mod = INT32_MAX; + + EXPECT_TRUE(nir_mod_analysis_comp0(v[cnst], nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, cnst % const_mod); + } + } +} + +TEST_F(nir_mod_analysis_test, dynamic) +{ + /* invocation % const_mod should never be known unless const_mod is 1 */ + + unsigned mod = INT32_MAX; + + EXPECT_TRUE(nir_mod_analysis_comp0(invocation, nir_type_uint, 1, &mod)); + EXPECT_EQ(mod, 0); + + for (unsigned const_mod = 2; const_mod <= 1024; const_mod *= 2) + EXPECT_FALSE(nir_mod_analysis_comp0(invocation, nir_type_uint, const_mod, &mod)); +} + +TEST_F(nir_mod_analysis_test, const_plus_const) +{ + /* (const1 + const2) % const_mod should always be known */ + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + for (unsigned c1 = 0; c1 < 10; ++c1) { + for (unsigned c2 = 0; c2 < 10; ++c2) { + nir_ssa_def *sum = nir_iadd(&b, v[c1], v[c2]); + + unsigned mod = INT32_MAX; + + EXPECT_TRUE(nir_mod_analysis_comp0(sum, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, (c1 + c2) % const_mod); + } + } + } +} + +TEST_F(nir_mod_analysis_test, dynamic_plus_const) +{ + /* (invocation + const) % const_mod should never be known unless const_mod is 1 */ + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + for (unsigned c = 0; c < 10; ++c) { + nir_ssa_def *sum = nir_iadd(&b, invocation, v[c]); + + unsigned mod = INT32_MAX; + + if (const_mod == 1) { + EXPECT_TRUE(nir_mod_analysis_comp0(sum, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, 0); + } else { + EXPECT_FALSE(nir_mod_analysis_comp0(sum, nir_type_uint, const_mod, &mod)); + } + } + } +} + +TEST_F(nir_mod_analysis_test, const_mul_const) +{ + /* (const1 * const2) % const_mod should always be known */ + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + for (unsigned c1 = 0; c1 < 10; ++c1) { + for (unsigned c2 = 0; c2 < 10; ++c2) { + nir_ssa_def *mul = nir_imul(&b, v[c1], v[c2]); + + unsigned mod = INT32_MAX; + + EXPECT_TRUE(nir_mod_analysis_comp0(mul, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, (c1 * c2) % const_mod); + } + } + } +} + +TEST_F(nir_mod_analysis_test, dynamic_mul_const) +{ + /* (invocation * const) % const_mod == 0 only if const % const_mod == 0, unknown otherwise */ + for (unsigned const_mod = 2; const_mod <= 1024; const_mod *= 2) { + for (unsigned c = 0; c < 10; ++c) { + nir_ssa_def *mul = nir_imul(&b, invocation, v[c]); + + unsigned mod = INT32_MAX; + + if (c % const_mod == 0) { + EXPECT_TRUE(nir_mod_analysis_comp0(mul, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, 0); + } else { + EXPECT_FALSE(nir_mod_analysis_comp0(mul, nir_type_uint, const_mod, &mod)); + } + } + } +} + +TEST_F(nir_mod_analysis_test, dynamic_mul_const_swizzled) +{ + /* (invocation * const.y) % const_mod == 0 only if const.y % const_mod == 0, unknown otherwise */ + for (unsigned const_mod = 2; const_mod <= 1024; const_mod *= 2) { + for (unsigned c = 0; c < 10; ++c) { + nir_ssa_def *vec2 = nir_imm_ivec2(&b, 10 - c, c); + nir_ssa_def *mul = nir_imul_vec2y(&b, invocation, vec2); + + unsigned mod = INT32_MAX; + + if (c % const_mod == 0) { + EXPECT_TRUE(nir_mod_analysis_comp0(mul, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, 0); + } else { + EXPECT_FALSE(nir_mod_analysis_comp0(mul, nir_type_uint, const_mod, &mod)); + } + } + } +} + +TEST_F(nir_mod_analysis_test, dynamic_mul32x16_const) +{ + /* (invocation mul32x16 const) % const_mod == 0 only if const % const_mod == 0 + * and const_mod <= 2^16, unknown otherwise + */ + for (unsigned const_mod = 1; const_mod <= (1u << 24); const_mod *= 2) { + for (unsigned c = 0; c < 10; ++c) { + nir_ssa_def *mul = nir_imul_32x16(&b, invocation, v[c]); + + unsigned mod = INT32_MAX; + + if (c % const_mod == 0 && const_mod <= (1u << 16)) { + EXPECT_TRUE(nir_mod_analysis_comp0(mul, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, 0); + } else { + EXPECT_FALSE(nir_mod_analysis_comp0(mul, nir_type_uint, const_mod, &mod)); + } + } + } +} + +TEST_F(nir_mod_analysis_test, dynamic_shl_const) +{ + /* (invocation << const) % const_mod == 0 only if const >= log2(const_mod), unknown otherwise */ + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + for (unsigned c = 0; c < 10; ++c) { + nir_ssa_def *shl = nir_ishl(&b, invocation, v[c]); + + unsigned mod = INT32_MAX; + + if (c >= util_logbase2(const_mod)) { + EXPECT_TRUE(nir_mod_analysis_comp0(shl, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, 0); + } else { + EXPECT_FALSE(nir_mod_analysis_comp0(shl, nir_type_uint, const_mod, &mod)); + } + } + } +} + +TEST_F(nir_mod_analysis_test, dynamic_shr_const) +{ + /* (invocation >> const) % const_mod should never be known, unless const_mod is 1 */ + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + for (unsigned i = 0; i < 10; ++i) { + nir_ssa_def *shr = nir_ishr(&b, invocation, v[i]); + + unsigned mod = INT32_MAX; + + if (const_mod == 1) { + EXPECT_TRUE(nir_mod_analysis_comp0(shr, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, 0); + } else { + EXPECT_FALSE(nir_mod_analysis_comp0(shr, nir_type_uint, const_mod, &mod)); + } + } + } +} + +TEST_F(nir_mod_analysis_test, dynamic_mul_const_shr_const) +{ + /* ((invocation * 32) >> const) % const_mod == 0 only if + * const_mod is 1 or + * (32 >> const) is not 0 and (32 >> const) % const_mod == 0 + * + */ + nir_ssa_def *inv_mul_32 = nir_imul(&b, invocation, v[32]); + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + for (unsigned c = 0; c < 8; ++c) { + nir_ssa_def *shr = nir_ishr(&b, inv_mul_32, v[c]); + + unsigned mod = INT32_MAX; + + if (const_mod == 1 || ((32 >> c) > 0 && (32 >> c) % const_mod == 0)) { + EXPECT_TRUE(nir_mod_analysis_comp0(shr, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, 0); + } else { + EXPECT_FALSE(nir_mod_analysis_comp0(shr, nir_type_uint, const_mod, &mod)); + } + } + } +} + +TEST_F(nir_mod_analysis_test, dynamic_mul_const_swizzled_shr_const) +{ + /* ((invocation * ivec2(31, 32).y) >> const) % const_mod == 0 only if + * const_mod is 1 or + * (32 >> const) is not 0 and (32 >> const) % const_mod == 0 + * + */ + nir_ssa_def *vec2 = nir_imm_ivec2(&b, 31, 32); + nir_ssa_def *inv_mul_32 = nir_imul_vec2y(&b, invocation, vec2); + + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + for (unsigned c = 0; c < 8; ++c) { + nir_ssa_def *shr = nir_ishr(&b, inv_mul_32, v[c]); + + unsigned mod = INT32_MAX; + + if (const_mod == 1 || ((32 >> c) > 0 && (32 >> c) % const_mod == 0)) { + EXPECT_TRUE(nir_mod_analysis_comp0(shr, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, 0); + } else { + EXPECT_FALSE(nir_mod_analysis_comp0(shr, nir_type_uint, const_mod, &mod)); + } + } + } +} + +TEST_F(nir_mod_analysis_test, const_shr_const) +{ + /* (const >> const) % const_mod should always be known */ + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + for (unsigned i = 0; i < 50; ++i) { + for (unsigned j = 0; j < 6; ++j) { + nir_ssa_def *shr = nir_ishr(&b, v[i], v[j]); + + unsigned mod = INT32_MAX; + + EXPECT_TRUE(nir_mod_analysis_comp0(shr, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, (i >> j) % const_mod); + } + } + } +} + +TEST_F(nir_mod_analysis_test, const_shr_const_overflow) +{ + /* (large_const >> const_shr) % const_mod should be known if + * const_mod << const_shr is still below UINT32_MAX. + */ + unsigned large_const_int = 0x12345678; + nir_ssa_def *large_const = nir_imm_int(&b, large_const_int); + + for (unsigned shift = 0; shift < 30; ++shift) { + nir_ssa_def *shr = nir_ishr(&b, large_const, v[shift]); + + for (unsigned const_mod = 1; const_mod <= 1024; const_mod *= 2) { + unsigned mod = INT32_MAX; + + if ((((uint64_t)const_mod) << shift) > UINT32_MAX) { + EXPECT_FALSE(nir_mod_analysis_comp0(shr, nir_type_uint, const_mod, &mod)); + } else { + EXPECT_TRUE(nir_mod_analysis_comp0(shr, nir_type_uint, const_mod, &mod)); + EXPECT_EQ(mod, (large_const_int >> shift) % const_mod); + } + } + } +} -- 2.7.4