From 31a0574b968b8dbb2b024fb332bcba87a02bef46 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 30 Sep 2020 10:48:29 +0200 Subject: [PATCH] ac/nir: implement nir_op_fsat With fmed3 if available, otherwise fallback to fmin/fmax. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/llvm/ac_llvm_build.c | 44 +++++++++++++++++++++++++++++++++++++++++++ src/amd/llvm/ac_llvm_build.h | 3 +++ src/amd/llvm/ac_nir_to_llvm.c | 5 +++++ 3 files changed, 52 insertions(+) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index e37c3da..44ebb01 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -2447,6 +2447,50 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags) ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0); } +LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMTypeRef type) +{ + unsigned bitsize = ac_get_elem_bits(ctx, type); + LLVMValueRef zero = LLVMConstReal(type, 0.0); + LLVMValueRef one = LLVMConstReal(type, 1.0); + LLVMValueRef result; + + if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8)) { + /* Use fmin/fmax for 64-bit fsat or 16-bit on GFX6-GFX8 because LLVM + * doesn't expose an intrinsic. + */ + result = ac_build_fmin(ctx, ac_build_fmax(ctx, src, zero), one); + } else { + LLVMTypeRef type; + char *intr; + + if (bitsize == 16) { + intr = "llvm.amdgcn.fmed3.f16"; + type = ctx->f16; + } else { + assert(bitsize == 32); + intr = "llvm.amdgcn.fmed3.f32"; + type = ctx->f32; + } + + LLVMValueRef params[] = { + zero, + one, + src, + }; + + result = ac_build_intrinsic(ctx, intr, type, params, 3, + AC_FUNC_ATTR_READNONE); + } + + if (ctx->chip_class < GFX9 && bitsize == 32) { + /* Only pre-GFX9 chips do not flush denorms. */ + result = ac_build_canonicalize(ctx, result, bitsize); + } + + return result; +} + LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) { LLVMTypeRef type; diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 2e08a99..8423c87 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -455,6 +455,9 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0); LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src); LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0); +LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMTypeRef type); + LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0); void ac_optimize_vs_outputs(struct ac_llvm_context *ac, LLVMValueRef main_fn, diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index f07960e..89e4938 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -743,6 +743,11 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size); } break; + case nir_op_fsat: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = ac_build_fsat(&ctx->ac, src[0], + ac_to_float_type(&ctx->ac, def_type)); + break; case nir_op_iabs: result = emit_iabs(&ctx->ac, src[0]); break; -- 2.7.4