radeonsi: keep using v_rcp_f32 for division in future LLVM (v2)

author Marek Olšák <marek.olsak@amd.com>

Wed, 22 Jun 2016 20:24:52 +0000 (22:24 +0200)

committer Marek Olšák <marek.olsak@amd.com>

Mon, 4 Jul 2016 22:47:12 +0000 (00:47 +0200)
author Marek Olšák <marek.olsak@amd.com>
Wed, 22 Jun 2016 20:24:52 +0000 (22:24 +0200)
committer Marek Olšák <marek.olsak@amd.com>
Mon, 4 Jul 2016 22:47:12 +0000 (00:47 +0200)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h

index ec16def..61afa7a 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -101,6 +101,9 @@ struct radeon_llvm_context {
         LLVMValueRef main_fn;
         LLVMTypeRef return_type;
  
+       unsigned fpmath_md_kind;
+       LLVMValueRef fpmath_md_2p5_ulp;
+
         struct gallivm_state gallivm;
  };
  
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

index d183ff0..cf56c3b 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1523,19 +1523,36 @@ static void emit_up2h(const struct lp_build_tgsi_action *action,
         }
  }
  
+static void emit_fdiv(const struct lp_build_tgsi_action *action,
+                     struct lp_build_tgsi_context *bld_base,
+                     struct lp_build_emit_data *emit_data)
+{
+       struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
+
+       emit_data->output[emit_data->chan] =
+               LLVMBuildFDiv(bld_base->base.gallivm->builder,
+                             emit_data->args[0], emit_data->args[1], "");
+
+       /* Use v_rcp_f32 instead of precise division. */
+       if (HAVE_LLVM >= 0x0309 &&
+           !LLVMIsConstant(emit_data->output[emit_data->chan]))
+               LLVMSetMetadata(emit_data->output[emit_data->chan],
+                               ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
+}
+
  /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
   * the target machine. f64 needs global unsafe math flags to get rsq. */
  static void emit_rsq(const struct lp_build_tgsi_action *action,
                      struct lp_build_tgsi_context *bld_base,
                      struct lp_build_emit_data *emit_data)
  {
-       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
         LLVMValueRef sqrt =
                 lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
                                          emit_data->args[0]);
  
         emit_data->output[emit_data->chan] =
-               LLVMBuildFDiv(builder, bld_base->base.one, sqrt, "");
+               lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
+                                         bld_base->base.one, sqrt);
  }
  
  void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple)
@@ -1586,6 +1603,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *trip
         bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
         bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
  
+       /* metadata allowing 2.5 ULP */
+       ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
+                                                      "fpmath", 6);
+       LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
+       ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
+                                                    &arg, 1);
+
         /* Allocate outputs */
         ctx->soa.outputs = ctx->outputs;
  
@@ -1615,6 +1639,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *trip
         bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
         bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
         bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
+       bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
         bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
         bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
         bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
author	Marek Olšák <marek.olsak@amd.com>
	Wed, 22 Jun 2016 20:24:52 +0000 (22:24 +0200)
committer	Marek Olšák <marek.olsak@amd.com>
	Mon, 4 Jul 2016 22:47:12 +0000 (00:47 +0200)
src/gallium/drivers/radeon/radeon_llvm.h		patch \| blob \| history
src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c		patch \| blob \| history