radeonsi/ac: move vertex export remove to common code.
authorDave Airlie <airlied@redhat.com>
Tue, 25 Apr 2017 22:33:29 +0000 (23:33 +0100)
committerDave Airlie <airlied@redhat.com>
Thu, 27 Apr 2017 04:17:47 +0000 (05:17 +0100)
This code can be shared by radv, we bump the max to
VARYING_SLOT_MAX here, but that shouldn't have too
much fallout.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_exp_param.h [new file with mode: 0644]
src/amd/common/ac_llvm_build.c
src/amd/common/ac_llvm_build.h
src/amd/common/ac_llvm_helper.cpp
src/amd/common/ac_llvm_util.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

diff --git a/src/amd/common/ac_exp_param.h b/src/amd/common/ac_exp_param.h
new file mode 100644 (file)
index 0000000..b97ce81
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+#ifndef AC_EXP_PARAM_H
+#define AC_EXP_PARAM_H
+
+enum {
+       /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
+       AC_EXP_PARAM_OFFSET_0 = 0,
+       AC_EXP_PARAM_OFFSET_31 = 31,
+       /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
+       AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
+       AC_EXP_PARAM_DEFAULT_VAL_0001,
+       AC_EXP_PARAM_DEFAULT_VAL_1110,
+       AC_EXP_PARAM_DEFAULT_VAL_1111,
+       AC_EXP_PARAM_UNDEFINED = 255,
+};
+
+#endif
index d45094c..9729756 100644 (file)
 #include <stdio.h>
 
 #include "ac_llvm_util.h"
-
+#include "ac_exp_param.h"
 #include "util/bitscan.h"
 #include "util/macros.h"
 #include "sid.h"
 
+#include "shader_enums.h"
+
 /* Initialize module-independent parts of the context.
  *
  * The caller is responsible for initializing ctx::module and ctx::builder.
@@ -1244,3 +1246,155 @@ void ac_get_image_intr_name(const char *base_name,
                          data_type_name, coords_type_name, rsrc_type_name);
         }
 }
+
+#define AC_EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
+#define AC_EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
+
+/* Return true if the PARAM export has been eliminated. */
+static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
+                                     uint32_t num_outputs,
+                                     LLVMValueRef inst, unsigned offset)
+{
+       unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+       bool is_zero[4] = {}, is_one[4] = {};
+
+       for (i = 0; i < 4; i++) {
+               LLVMBool loses_info;
+               LLVMValueRef p = LLVMGetOperand(inst, AC_EXP_OUT0 + i);
+
+               /* It's a constant expression. Undef outputs are eliminated too. */
+               if (LLVMIsUndef(p)) {
+                       is_zero[i] = true;
+                       is_one[i] = true;
+               } else if (LLVMIsAConstantFP(p)) {
+                       double a = LLVMConstRealGetDouble(p, &loses_info);
+
+                       if (a == 0)
+                               is_zero[i] = true;
+                       else if (a == 1)
+                               is_one[i] = true;
+                       else
+                               return false; /* other constant */
+               } else
+                       return false;
+       }
+
+       /* Only certain combinations of 0 and 1 can be eliminated. */
+       if (is_zero[0] && is_zero[1] && is_zero[2])
+               default_val = is_zero[3] ? 0 : 1;
+       else if (is_one[0] && is_one[1] && is_one[2])
+               default_val = is_zero[3] ? 2 : 3;
+       else
+               return false;
+
+       /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+       LLVMInstructionEraseFromParent(inst);
+
+       /* Change OFFSET to DEFAULT_VAL. */
+       for (i = 0; i < num_outputs; i++) {
+               if (vs_output_param_offset[i] == offset) {
+                       vs_output_param_offset[i] =
+                               AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
+                       break;
+               }
+       }
+       return true;
+}
+
+struct ac_vs_exports {
+       unsigned num;
+       unsigned offset[VARYING_SLOT_MAX];
+       LLVMValueRef inst[VARYING_SLOT_MAX];
+};
+
+void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ctx,
+                                  LLVMValueRef main_fn,
+                                  uint8_t *vs_output_param_offset,
+                                  uint32_t num_outputs,
+                                  uint8_t *num_param_exports)
+{
+       LLVMBasicBlockRef bb;
+       bool removed_any = false;
+       struct ac_vs_exports exports;
+
+       assert(num_outputs <= VARYING_SLOT_MAX);
+       exports.num = 0;
+
+       /* Process all LLVM instructions. */
+       bb = LLVMGetFirstBasicBlock(main_fn);
+       while (bb) {
+               LLVMValueRef inst = LLVMGetFirstInstruction(bb);
+
+               while (inst) {
+                       LLVMValueRef cur = inst;
+                       inst = LLVMGetNextInstruction(inst);
+
+                       if (LLVMGetInstructionOpcode(cur) != LLVMCall)
+                               continue;
+
+                       LLVMValueRef callee = ac_llvm_get_called_value(cur);
+
+                       if (!ac_llvm_is_function(callee))
+                               continue;
+
+                       const char *name = LLVMGetValueName(callee);
+                       unsigned num_args = LLVMCountParams(callee);
+
+                       /* Check if this is an export instruction. */
+                       if ((num_args != 9 && num_args != 8) ||
+                           (strcmp(name, "llvm.SI.export") &&
+                            strcmp(name, "llvm.amdgcn.exp.f32")))
+                               continue;
+
+                       LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
+                       unsigned target = LLVMConstIntGetZExtValue(arg);
+
+                       if (target < V_008DFC_SQ_EXP_PARAM)
+                               continue;
+
+                       target -= V_008DFC_SQ_EXP_PARAM;
+
+                       /* Eliminate constant value PARAM exports. */
+                       if (ac_eliminate_const_output(vs_output_param_offset,
+                                                     num_outputs, cur, target)) {
+                               removed_any = true;
+                       } else {
+                               exports.offset[exports.num] = target;
+                               exports.inst[exports.num] = cur;
+                               exports.num++;
+                       }
+               }
+               bb = LLVMGetNextBasicBlock(bb);
+       }
+
+       /* Remove holes in export memory due to removed PARAM exports.
+        * This is done by renumbering all PARAM exports.
+        */
+       if (removed_any) {
+               uint8_t current_offset[VARYING_SLOT_MAX];
+               unsigned new_count = 0;
+               unsigned out, i;
+
+               /* Make a copy of the offsets. We need the old version while
+                * we are modifying some of them. */
+               memcpy(current_offset, vs_output_param_offset,
+                      sizeof(current_offset));
+
+               for (i = 0; i < exports.num; i++) {
+                       unsigned offset = exports.offset[i];
+
+                       for (out = 0; out < num_outputs; out++) {
+                               if (current_offset[out] != offset)
+                                       continue;
+
+                               LLVMSetOperand(exports.inst[i], AC_EXP_TARGET,
+                                              LLVMConstInt(ctx->i32,
+                                                           V_008DFC_SQ_EXP_PARAM + new_count, 0));
+                               vs_output_param_offset[out] = new_count;
+                               new_count++;
+                               break;
+                       }
+               }
+               *num_param_exports = new_count;
+       }
+}
index d6edcde..1c3610a 100644 (file)
@@ -239,6 +239,12 @@ void ac_get_image_intr_name(const char *base_name,
                            LLVMTypeRef coords_type,
                            LLVMTypeRef rsrc_type,
                            char *out_name, unsigned out_len);
+
+void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ac,
+                                  LLVMValueRef main_fn,
+                                  uint8_t *vs_output_param_offset,
+                                  uint32_t num_outputs,
+                                  uint8_t *num_param_exports);
 #ifdef __cplusplus
 }
 #endif
index 11fa809..03877d5 100644 (file)
@@ -61,3 +61,21 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
        return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
               AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
 }
+
+LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
+{
+#if HAVE_LLVM >= 0x0309
+       return LLVMGetCalledValue(call);
+#else
+       return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue());
+#endif
+}
+
+bool ac_llvm_is_function(LLVMValueRef v)
+{
+#if HAVE_LLVM >= 0x0309
+       return LLVMGetValueKind(v) == LLVMFunctionValueKind;
+#else
+       return llvm::isa<llvm::Function>(llvm::unwrap(v));
+#endif
+}
index faecf1e..38e7dde 100644 (file)
@@ -64,6 +64,8 @@ void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
                            unsigned attrib_mask);
 void ac_dump_module(LLVMModuleRef module);
 
+LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
+bool ac_llvm_is_function(LLVMValueRef v);
 #ifdef __cplusplus
 }
 #endif
index 69634b1..125affb 100644 (file)
@@ -41,6 +41,7 @@
 
 #include "ac_binary.h"
 #include "ac_llvm_util.h"
+#include "ac_exp_param.h"
 #include "si_shader_internal.h"
 #include "si_pipe.h"
 #include "sid.h"
@@ -6809,76 +6810,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
        bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 }
 
-#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
-#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
-
-/* Return true if the PARAM export has been eliminated. */
-static bool si_eliminate_const_output(struct si_shader_context *ctx,
-                                     LLVMValueRef inst, unsigned offset)
-{
-       struct si_shader *shader = ctx->shader;
-       unsigned num_outputs = shader->selector->info.num_outputs;
-       unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
-       bool is_zero[4] = {}, is_one[4] = {};
-
-       for (i = 0; i < 4; i++) {
-               LLVMBool loses_info;
-               LLVMValueRef p = LLVMGetOperand(inst, EXP_OUT0 + i);
-
-               /* It's a constant expression. Undef outputs are eliminated too. */
-               if (LLVMIsUndef(p)) {
-                       is_zero[i] = true;
-                       is_one[i] = true;
-               } else if (LLVMIsAConstantFP(p)) {
-                       double a = LLVMConstRealGetDouble(p, &loses_info);
-
-                       if (a == 0)
-                               is_zero[i] = true;
-                       else if (a == 1)
-                               is_one[i] = true;
-                       else
-                               return false; /* other constant */
-               } else
-                       return false;
-       }
-
-       /* Only certain combinations of 0 and 1 can be eliminated. */
-       if (is_zero[0] && is_zero[1] && is_zero[2])
-               default_val = is_zero[3] ? 0 : 1;
-       else if (is_one[0] && is_one[1] && is_one[2])
-               default_val = is_zero[3] ? 2 : 3;
-       else
-               return false;
-
-       /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
-       LLVMInstructionEraseFromParent(inst);
-
-       /* Change OFFSET to DEFAULT_VAL. */
-       for (i = 0; i < num_outputs; i++) {
-               if (shader->info.vs_output_param_offset[i] == offset) {
-                       shader->info.vs_output_param_offset[i] =
-                               EXP_PARAM_DEFAULT_VAL_0000 + default_val;
-                       break;
-               }
-       }
-       return true;
-}
-
-struct si_vs_exports {
-       unsigned num;
-       unsigned offset[SI_MAX_VS_OUTPUTS];
-       LLVMValueRef inst[SI_MAX_VS_OUTPUTS];
-};
-
 static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
 {
        struct si_shader *shader = ctx->shader;
        struct tgsi_shader_info *info = &shader->selector->info;
-       LLVMBasicBlockRef bb;
-       struct si_vs_exports exports;
-       bool removed_any = false;
-
-       exports.num = 0;
 
        if (ctx->type == PIPE_SHADER_FRAGMENT ||
            ctx->type == PIPE_SHADER_COMPUTE ||
@@ -6886,84 +6821,11 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
            shader->key.as_ls)
                return;
 
-       /* Process all LLVM instructions. */
-       bb = LLVMGetFirstBasicBlock(ctx->main_fn);
-       while (bb) {
-               LLVMValueRef inst = LLVMGetFirstInstruction(bb);
-
-               while (inst) {
-                       LLVMValueRef cur = inst;
-                       inst = LLVMGetNextInstruction(inst);
-
-                       if (LLVMGetInstructionOpcode(cur) != LLVMCall)
-                               continue;
-
-                       LLVMValueRef callee = lp_get_called_value(cur);
-
-                       if (!lp_is_function(callee))
-                               continue;
-
-                       const char *name = LLVMGetValueName(callee);
-                       unsigned num_args = LLVMCountParams(callee);
-
-                       /* Check if this is an export instruction. */
-                       if ((num_args != 9 && num_args != 8) ||
-                           (strcmp(name, "llvm.SI.export") &&
-                            strcmp(name, "llvm.amdgcn.exp.f32")))
-                               continue;
-
-                       LLVMValueRef arg = LLVMGetOperand(cur, EXP_TARGET);
-                       unsigned target = LLVMConstIntGetZExtValue(arg);
-
-                       if (target < V_008DFC_SQ_EXP_PARAM)
-                               continue;
-
-                       target -= V_008DFC_SQ_EXP_PARAM;
-
-                       /* Eliminate constant value PARAM exports. */
-                       if (si_eliminate_const_output(ctx, cur, target)) {
-                               removed_any = true;
-                       } else {
-                               exports.offset[exports.num] = target;
-                               exports.inst[exports.num] = cur;
-                               exports.num++;
-                       }
-               }
-               bb = LLVMGetNextBasicBlock(bb);
-       }
-
-       /* Remove holes in export memory due to removed PARAM exports.
-        * This is done by renumbering all PARAM exports.
-        */
-       if (removed_any) {
-               ubyte current_offset[SI_MAX_VS_OUTPUTS];
-               unsigned new_count = 0;
-               unsigned out, i;
-
-               /* Make a copy of the offsets. We need the old version while
-                * we are modifying some of them. */
-               assert(sizeof(current_offset) ==
-                      sizeof(shader->info.vs_output_param_offset));
-               memcpy(current_offset, shader->info.vs_output_param_offset,
-                      sizeof(current_offset));
-
-               for (i = 0; i < exports.num; i++) {
-                       unsigned offset = exports.offset[i];
-
-                       for (out = 0; out < info->num_outputs; out++) {
-                               if (current_offset[out] != offset)
-                                       continue;
-
-                               LLVMSetOperand(exports.inst[i], EXP_TARGET,
-                                              LLVMConstInt(ctx->i32,
-                                                           V_008DFC_SQ_EXP_PARAM + new_count, 0));
-                               shader->info.vs_output_param_offset[out] = new_count;
-                               new_count++;
-                               break;
-                       }
-               }
-               shader->info.nr_param_exports = new_count;
-       }
+       ac_eliminate_const_vs_outputs(&ctx->ac,
+                                     ctx->main_fn,
+                                     shader->info.vs_output_param_offset,
+                                     info->num_outputs,
+                                     &shader->info.nr_param_exports);
 }
 
 static void si_count_scratch_private_memory(struct si_shader_context *ctx)
@@ -7537,7 +7399,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
        si_init_shader_ctx(&ctx, sscreen, shader, tm);
        ctx.separate_prolog = !is_monolithic;
 
-       memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED,
+       memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
               sizeof(shader->info.vs_output_param_offset));
 
        shader->info.uses_instanceid = sel->info.uses_instanceid;
index 05c0e62..3c01a3d 100644 (file)
@@ -482,18 +482,6 @@ struct si_shader_config {
        unsigned                        rsrc2;
 };
 
-enum {
-       /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
-       EXP_PARAM_OFFSET_0 = 0,
-       EXP_PARAM_OFFSET_31 = 31,
-       /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
-       EXP_PARAM_DEFAULT_VAL_0000 = 64,
-       EXP_PARAM_DEFAULT_VAL_0001,
-       EXP_PARAM_DEFAULT_VAL_1110,
-       EXP_PARAM_DEFAULT_VAL_1111,
-       EXP_PARAM_UNDEFINED = 255,
-};
-
 /* GCN-specific shader info. */
 struct si_shader_info {
        ubyte                   vs_output_param_offset[SI_MAX_VS_OUTPUTS];
index 8afc731..06ea99c 100644 (file)
@@ -39,6 +39,7 @@
 
 #include "util/disk_cache.h"
 #include "util/mesa-sha1.h"
+#include "ac_exp_param.h"
 
 /* SHADER_CACHE */
 
@@ -1506,7 +1507,7 @@ void si_init_shader_selector_async(void *job, int thread_index)
                        for (i = 0; i < sel->info.num_outputs; i++) {
                                unsigned offset = shader->info.vs_output_param_offset[i];
 
-                               if (offset <= EXP_PARAM_OFFSET_31)
+                               if (offset <= AC_EXP_PARAM_OFFSET_31)
                                        continue;
 
                                unsigned name = sel->info.output_semantic_name[i];
@@ -2001,18 +2002,18 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
                    index == vsinfo->output_semantic_index[j]) {
                        offset = vs->info.vs_output_param_offset[j];
 
-                       if (offset <= EXP_PARAM_OFFSET_31) {
+                       if (offset <= AC_EXP_PARAM_OFFSET_31) {
                                /* The input is loaded from parameter memory. */
                                ps_input_cntl |= S_028644_OFFSET(offset);
                        } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
-                               if (offset == EXP_PARAM_UNDEFINED) {
+                               if (offset == AC_EXP_PARAM_UNDEFINED) {
                                        /* This can happen with depth-only rendering. */
                                        offset = 0;
                                } else {
                                        /* The input is a DEFAULT_VAL constant. */
-                                       assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
-                                              offset <= EXP_PARAM_DEFAULT_VAL_1111);
-                                       offset -= EXP_PARAM_DEFAULT_VAL_0000;
+                                       assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
+                                              offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
+                                       offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
                                }
 
                                ps_input_cntl = S_028644_OFFSET(0x20) |