From ba7519ca36ce0de74657b01fe4fa2c97aace538e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 15 Feb 2019 11:00:39 -0800 Subject: [PATCH] radeonsi: Go back to using llvm.pow intrinsic for nir_op_fpow ARB_vertex_program and ARB_fragment_program define 0^0 = 1 (while GLSL leaves it undefined). Performing fpow lowering in NIR would break this behavior, preventing us from using prog_to_nir. According to llvm/lib/Target/AMDGPU/SIInstructions.td, POW_common expands to , which presumably does a zero-wins multiply. Lowering in NIR results in a non-legacy multiply, where: pow(0, 0) = 2^(log2(0) * 0) = 2^(-INF * 0) = 2^(-NaN) = -NaN which isn't the desired result. This reverts: - commit d6b75392067712908bdc372f1007e085439bf9f5 (ac/nir: remove emission of nir_op_fpow) - commit 22430224fec31591432d4a3e65c6f457ba1c1653 (radeonsi/nir: enable lowering of fpow) and prevents a regression in gl-1.0-spot-light with AMD_DEBUG=nir after enabling prog_to_nir in st/mesa later in this series. Reviewed-by: Timothy Arceri --- src/amd/common/ac_nir_to_llvm.c | 4 ++++ src/gallium/drivers/radeonsi/si_get.c | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 8fafe76..40a35c3 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -801,6 +801,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64", ctx->ac.f64, src, 1, AC_FUNC_ATTR_READNONE); break; + case nir_op_fpow: + result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", + ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); + break; case nir_op_fmax: result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index f8ca02d..a5cb209 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -486,7 +486,6 @@ static const struct nir_shader_compiler_options nir_options = { .lower_scmp = true, .lower_flrp32 = true, .lower_flrp64 = true, - .lower_fpow = true, .lower_fsat = true, .lower_fdiv = true, .lower_sub = true, -- 2.7.4