radeonsi: do force_persample_interp in shaders for non-trivial cases
authorMarek Olšák <marek.olsak@amd.com>
Mon, 28 Sep 2015 21:46:04 +0000 (23:46 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 3 Oct 2015 20:06:09 +0000 (22:06 +0200)
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index a3df648..32a702f 100644 (file)
@@ -855,6 +855,56 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
        }
 }
 
+/* This shouldn't be used by explicit INTERP opcodes. */
+static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
+                                    unsigned param)
+{
+       struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+       unsigned sample_param = 0;
+       LLVMValueRef default_ij, sample_ij, force_sample;
+
+       default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
+
+       /* If the shader doesn't use center/centroid, just return the parameter.
+        *
+        * If the shader only uses one set of (i,j), "si_emit_spi_ps_input" can
+        * switch between center/centroid and sample without shader changes.
+        */
+       switch (param) {
+       case SI_PARAM_PERSP_CENTROID:
+       case SI_PARAM_PERSP_CENTER:
+               if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
+                       return default_ij;
+
+               sample_param = SI_PARAM_PERSP_SAMPLE;
+               break;
+
+       case SI_PARAM_LINEAR_CENTROID:
+       case SI_PARAM_LINEAR_CENTER:
+               if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
+                       return default_ij;
+
+               sample_param = SI_PARAM_LINEAR_SAMPLE;
+               break;
+
+       default:
+               return default_ij;
+       }
+
+       /* Otherwise, we have to select (i,j) based on a user data SGPR. */
+       sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
+
+       /* TODO: this can be done more efficiently by switching between
+        * 2 prologs.
+        */
+       force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                   SI_PARAM_PS_STATE_BITS);
+       force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
+                                     LLVMInt1TypeInContext(gallivm->context), "");
+       return LLVMBuildSelect(gallivm->builder, force_sample,
+                              sample_ij, default_ij, "");
+}
+
 static void declare_input_fs(
        struct radeon_llvm_context *radeon_bld,
        unsigned input_index,
@@ -925,7 +975,7 @@ static void declare_input_fs(
        if (interp_param_idx == -1)
                return;
        else if (interp_param_idx)
-               interp_param = LLVMGetParam(main_fn, interp_param_idx);
+               interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
 
        /* fs.constant returns the param from the middle vertex, so it's not
         * really useful for flat shading. It's meant to be used for custom
@@ -3458,6 +3508,7 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 
        case TGSI_PROCESSOR_FRAGMENT:
                params[SI_PARAM_ALPHA_REF] = f32;
+               params[SI_PARAM_PS_STATE_BITS] = i32;
                params[SI_PARAM_PRIM_MASK] = i32;
                last_sgpr = SI_PARAM_PRIM_MASK;
                params[SI_PARAM_PERSP_SAMPLE] = v2i32;
index 2305b99..b92fa02 100644 (file)
@@ -88,6 +88,7 @@ struct radeon_shader_reloc;
 #define SI_SGPR_TCS_OUT_LAYOUT 9  /* TCS & TES only */
 #define SI_SGPR_TCS_IN_LAYOUT  10 /* TCS only */
 #define SI_SGPR_ALPHA_REF      8  /* PS only */
+#define SI_SGPR_PS_STATE_BITS  9  /* PS only */
 
 #define SI_VS_NUM_USER_SGPR    12
 #define SI_LS_NUM_USER_SGPR    13
@@ -95,7 +96,7 @@ struct radeon_shader_reloc;
 #define SI_TES_NUM_USER_SGPR   10
 #define SI_GS_NUM_USER_SGPR    8
 #define SI_GSCOPY_NUM_USER_SGPR        4
-#define SI_PS_NUM_USER_SGPR    9
+#define SI_PS_NUM_USER_SGPR    10
 
 /* LLVM function parameter indices */
 #define SI_PARAM_RW_BUFFERS    0
@@ -148,23 +149,27 @@ struct radeon_shader_reloc;
 
 /* PS only parameters */
 #define SI_PARAM_ALPHA_REF             4
-#define SI_PARAM_PRIM_MASK             5
-#define SI_PARAM_PERSP_SAMPLE          6
-#define SI_PARAM_PERSP_CENTER          7
-#define SI_PARAM_PERSP_CENTROID                8
-#define SI_PARAM_PERSP_PULL_MODEL      9
-#define SI_PARAM_LINEAR_SAMPLE         10
-#define SI_PARAM_LINEAR_CENTER         11
-#define SI_PARAM_LINEAR_CENTROID       12
-#define SI_PARAM_LINE_STIPPLE_TEX      13
-#define SI_PARAM_POS_X_FLOAT           14
-#define SI_PARAM_POS_Y_FLOAT           15
-#define SI_PARAM_POS_Z_FLOAT           16
-#define SI_PARAM_POS_W_FLOAT           17
-#define SI_PARAM_FRONT_FACE            18
-#define SI_PARAM_ANCILLARY             19
-#define SI_PARAM_SAMPLE_COVERAGE       20
-#define SI_PARAM_POS_FIXED_PT          21
+/* Bits:
+ * 0: force_persample_interp
+ */
+#define SI_PARAM_PS_STATE_BITS         5
+#define SI_PARAM_PRIM_MASK             6
+#define SI_PARAM_PERSP_SAMPLE          7
+#define SI_PARAM_PERSP_CENTER          8
+#define SI_PARAM_PERSP_CENTROID                9
+#define SI_PARAM_PERSP_PULL_MODEL      10
+#define SI_PARAM_LINEAR_SAMPLE         11
+#define SI_PARAM_LINEAR_CENTER         12
+#define SI_PARAM_LINEAR_CENTROID       13
+#define SI_PARAM_LINE_STIPPLE_TEX      14
+#define SI_PARAM_POS_X_FLOAT           15
+#define SI_PARAM_POS_Y_FLOAT           16
+#define SI_PARAM_POS_Z_FLOAT           17
+#define SI_PARAM_POS_W_FLOAT           18
+#define SI_PARAM_FRONT_FACE            19
+#define SI_PARAM_ANCILLARY             20
+#define SI_PARAM_SAMPLE_COVERAGE       21
+#define SI_PARAM_POS_FIXED_PT          22
 
 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
 
@@ -182,6 +187,14 @@ struct si_shader_selector {
        /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
        unsigned        type;
 
+       /* Whether the shader has to use a conditional assignment to
+        * choose between weights when emulating
+        * pipe_rasterizer_state::force_persample_interp.
+        * If false, "si_emit_spi_ps_input" will take care of it instead.
+        */
+       bool            forces_persample_interp_for_persp;
+       bool            forces_persample_interp_for_linear;
+
        unsigned        gs_output_prim;
        unsigned        gs_max_out_vertices;
        unsigned        gs_num_invocations;
index aeb2879..77c585f 100644 (file)
@@ -667,6 +667,34 @@ static void *si_create_shader_state(struct pipe_context *ctx,
        tgsi_scan_shader(state->tokens, &sel->info);
        p_atomic_inc(&sscreen->b.num_shaders_created);
 
+       /* First set which opcode uses which (i,j) pair. */
+       if (sel->info.uses_persp_opcode_interp_centroid)
+               sel->info.uses_persp_centroid = true;
+
+       if (sel->info.uses_linear_opcode_interp_centroid)
+               sel->info.uses_linear_centroid = true;
+
+       if (sel->info.uses_persp_opcode_interp_offset ||
+           sel->info.uses_persp_opcode_interp_sample)
+               sel->info.uses_persp_center = true;
+
+       if (sel->info.uses_linear_opcode_interp_offset ||
+           sel->info.uses_linear_opcode_interp_sample)
+               sel->info.uses_linear_center = true;
+
+       /* Determine if the shader has to use a conditional assignment when
+        * emulating force_persample_interp.
+        */
+       sel->forces_persample_interp_for_persp =
+               sel->info.uses_persp_center +
+               sel->info.uses_persp_centroid +
+               sel->info.uses_persp_sample >= 2;
+
+       sel->forces_persample_interp_for_linear =
+               sel->info.uses_linear_center +
+               sel->info.uses_linear_centroid +
+               sel->info.uses_linear_sample >= 2;
+
        switch (pipe_shader_type) {
        case PIPE_SHADER_GEOMETRY:
                sel->gs_output_prim =
@@ -1100,6 +1128,12 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom
        radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
        radeon_emit(cs, input_ena);
        radeon_emit(cs, input_ena);
+
+       if (ps->selector->forces_persample_interp_for_persp ||
+           ps->selector->forces_persample_interp_for_linear)
+               radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
+                                     SI_SGPR_PS_STATE_BITS * 4,
+                                 sctx->force_persample_interp);
 }
 
 /* Initialize state related to ESGS / GSVS ring buffers */