r600g: Implement GL_ARB_sample_shading
authorGlenn Kennard <glenn.kennard@gmail.com>
Wed, 10 Sep 2014 09:54:40 +0000 (11:54 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Sun, 12 Oct 2014 21:53:57 +0000 (23:53 +0200)
Also fixes two sided lighting which was broken at least
on pre-evergreen by commit b1eb00.

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
12 files changed:
docs/GL3.txt
docs/relnotes/10.4.html
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/evergreend.h
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_shader.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c
src/gallium/drivers/r600/r600d.h
src/gallium/drivers/r600/sb/sb_bc_parser.cpp

index 5adc759..07d1d2c 100644 (file)
@@ -110,7 +110,7 @@ GL 4.0, GLSL 4.00:
   - Interpolation functions                            DONE ()
   - New overload resolution rules                      DONE
   GL_ARB_gpu_shader_fp64                               started (Dave)
-  GL_ARB_sample_shading                                DONE (i965, nv50, nvc0, radeonsi)
+  GL_ARB_sample_shading                                DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_shader_subroutine                             not started
   GL_ARB_tessellation_shader                           started (Chris, Ilia)
   GL_ARB_texture_buffer_object_rgb32                   DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
index e6813d3..64cbfae 100644 (file)
@@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
+<li>GL_ARB_sample_shading on r600</li>
 <li>GL_ARB_texture_view on nv50, nvc0</li>
 </ul>
 
index 27a9ad9..78c1b68 100644 (file)
@@ -1400,7 +1400,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 
        /* MSAA. */
        if (rctx->b.chip_class == EVERGREEN)
-               rctx->framebuffer.atom.num_dw += 14; /* Evergreen */
+               rctx->framebuffer.atom.num_dw += 17; /* Evergreen */
        else
                rctx->framebuffer.atom.num_dw += 28; /* Cayman */
 
@@ -1420,8 +1420,22 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
        }
 
        rctx->framebuffer.atom.dirty = true;
+
+       r600_set_sample_locations_constant_buffer(rctx);
 }
 
+static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+
+       if (rctx->ps_iter_samples == min_samples)
+               return;
+
+       rctx->ps_iter_samples = min_samples;
+       if (rctx->framebuffer.nr_samples > 1) {
+               rctx->framebuffer.atom.dirty = true;
+       }
+}
 
 /* 8xMSAA */
 static uint32_t sample_locs_8x[] = {
@@ -1475,7 +1489,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx,
        }
 }
 
-static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples)
+static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples)
 {
 
        struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
@@ -1508,10 +1522,12 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples)
                                     S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
                radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
                                     S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
+               r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
        } else {
                r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
                radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
                radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
+               r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
        }
 }
 
@@ -1672,10 +1688,10 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
        radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
 
        if (rctx->b.chip_class == EVERGREEN) {
-               evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples);
+               evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples);
        } else {
                cayman_emit_msaa_sample_locs(cs, rctx->framebuffer.nr_samples);
-               cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, 1);
+               cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, rctx->ps_iter_samples);
        }
 }
 
@@ -2432,8 +2448,6 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
                r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
        }
 
-       r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
-
        /* The cs checker requires this register to be set. */
        r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
 
@@ -2786,11 +2800,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
        struct r600_command_buffer *cb = &shader->command_buffer;
        struct r600_shader *rshader = &shader->shader;
        unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0;
-       int pos_index = -1, face_index = -1;
+       int pos_index = -1, face_index = -1, fixed_pt_position_index = -1;
        int ninterp = 0;
-       boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
-       unsigned spi_baryc_cntl, sid, tmp, num = 0;
-       unsigned z_export = 0, stencil_export = 0;
+       boolean have_perspective = FALSE, have_linear = FALSE;
+       static const unsigned spi_baryc_enable_bit[6] = {
+               S_0286E0_PERSP_SAMPLE_ENA(1),
+               S_0286E0_PERSP_CENTER_ENA(1),
+               S_0286E0_PERSP_CENTROID_ENA(1),
+               S_0286E0_LINEAR_SAMPLE_ENA(1),
+               S_0286E0_LINEAR_CENTER_ENA(1),
+               S_0286E0_LINEAR_CENTROID_ENA(1)
+       };
+       unsigned spi_baryc_cntl = 0, sid, tmp, num = 0;
+       unsigned z_export = 0, stencil_export = 0, mask_export = 0;
        unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
        uint32_t spi_ps_input_cntl[32];
 
@@ -2813,14 +2835,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                        if (face_index == -1)
                                face_index = i; /* lives in same register, same enable bit */
                }
+               else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) {
+                       fixed_pt_position_index = i;
+               }
                else {
                        ninterp++;
-                       if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
-                               have_linear = TRUE;
-                       if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
-                               have_perspective = TRUE;
-                       if (rshader->input[i].centroid)
-                               have_centroid = TRUE;
+                       int k = eg_get_interpolator_index(
+                               rshader->input[i].interpolate,
+                               rshader->input[i].interpolate_location);
+                       if (k >= 0) {
+                               spi_baryc_cntl |= spi_baryc_enable_bit[k];
+                               have_perspective |= k < 3;
+                               have_linear |= !(k < 3);
+                       }
                }
 
                sid = rshader->input[i].spi_sid;
@@ -2852,17 +2879,22 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                        z_export = 1;
                if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
                        stencil_export = 1;
+               if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK &&
+                       rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0)
+                       mask_export = 1;
        }
        if (rshader->uses_kill)
                db_shader_control |= S_02880C_KILL_ENABLE(1);
 
        db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export);
        db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export);
+       db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
 
        exports_ps = 0;
        for (i = 0; i < rshader->noutput; i++) {
                if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
-                   rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+                   rshader->output[i].name == TGSI_SEMANTIC_STENCIL ||
+                   rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK)
                        exports_ps |= 1;
        }
 
@@ -2878,6 +2910,8 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                ninterp = 1;
                have_perspective = TRUE;
        }
+       if (!spi_baryc_cntl)
+               spi_baryc_cntl |= spi_baryc_enable_bit[0];
 
        if (!have_perspective && !have_linear)
                have_perspective = TRUE;
@@ -2888,7 +2922,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
        spi_input_z = 0;
        if (pos_index != -1) {
                spi_ps_in_control_0 |=  S_0286CC_POSITION_ENA(1) |
-                       S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+                       S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) |
                        S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr);
                spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
        }
@@ -2898,14 +2932,10 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
                        S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
        }
-
-       spi_baryc_cntl = 0;
-       if (have_perspective)
-               spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) |
-                                 S_0286E0_PERSP_CENTROID_ENA(have_centroid);
-       if (have_linear)
-               spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) |
-                                 S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
+       if (fixed_pt_position_index != -1) {
+               spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) |
+                       S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr);
+       }
 
        r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2);
        r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */
@@ -2924,7 +2954,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
        /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
 
        shader->db_shader_control = db_shader_control;
-       shader->ps_depth_export = z_export | stencil_export;
+       shader->ps_depth_export = z_export | stencil_export | mask_export;
 
        shader->sprite_coord_enable = sprite_coord_enable;
        if (rctx->rasterizer)
@@ -3446,6 +3476,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
        rctx->b.b.create_sampler_view = evergreen_create_sampler_view;
        rctx->b.b.set_framebuffer_state = evergreen_set_framebuffer_state;
        rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple;
+       rctx->b.b.set_min_samples = evergreen_set_min_samples;
        rctx->b.b.set_scissor_states = evergreen_set_scissor_states;
 
        if (rctx->b.chip_class == EVERGREEN)
index 784d495..4989996 100644 (file)
 #define   S_02880C_KILL_ENABLE(x)                      (((x) & 0x1) << 6)
 #define   G_02880C_KILL_ENABLE(x)                      (((x) >> 6) & 0x1)
 #define   C_02880C_KILL_ENABLE                         0xFFFFFFBF
+#define   S_02880C_MASK_EXPORT_ENABLE(x)               (((x) & 0x1) << 8)
+#define   G_02880C_MASK_EXPORT_ENABLE(x)               (((x) >> 8) & 0x1)
+#define   C_02880C_MASK_EXPORT_ENABLE                  0XFFFFFEFF
 #define   S_02880C_DUAL_EXPORT_ENABLE(x)               (((x) & 0x1) << 9)
 #define   G_02880C_DUAL_EXPORT_ENABLE(x)               (((x) >> 9) & 0x1)
 #define   C_02880C_DUAL_EXPORT_ENABLE                  0xFFFFFDFF
index c6459d8..3962fee 100644 (file)
@@ -265,6 +265,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
        case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
        case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+       case PIPE_CAP_SAMPLE_SHADING:
                return 1;
 
        case PIPE_CAP_COMPUTE:
@@ -319,7 +320,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
        case PIPE_CAP_VERTEX_COLOR_CLAMPED:
        case PIPE_CAP_USER_VERTEX_BUFFERS:
-       case PIPE_CAP_SAMPLE_SHADING:
        case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
        case PIPE_CAP_DRAW_INDIRECT:
        case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
index 2df168f..fa9d34b 100644 (file)
 #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
 #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
 #define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3)
+/* Currently R600_MAX_CONST_BUFFERS is too large, the hardware only has 16 buffers, but the driver is
+ * trying to use 17. Avoid accidentally aliasing with user UBOs for SAMPLE_POSITIONS by using an id<16.
+ * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
+ *
+ * Fixing this properly would require the driver to combine its buffers into a single hardware buffer,
+ * which would also allow supporting the d3d 11 mandated minimum of 15 user const buffers.
+ */
+#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 
 #define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
 
@@ -452,6 +460,7 @@ struct r600_context {
        bool                            force_blend_disable;
        boolean                         dual_src_blend;
        unsigned                        zwritemask;
+       int                                     ps_iter_samples;
 
        /* Index buffer. */
        struct pipe_index_buffer        index_buffer;
@@ -639,6 +648,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
 void r600_sampler_states_dirty(struct r600_context *rctx,
                               struct r600_sampler_states *state);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
+void r600_set_sample_locations_constant_buffer(struct r600_context *rctx);
 uint32_t r600_translate_stencil_op(int s_op);
 uint32_t r600_translate_fill(uint32_t func);
 unsigned r600_tex_wrap(unsigned wrap);
index 9f10c20..9e9a557 100644 (file)
@@ -64,6 +64,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                                 struct r600_pipe_shader *pipeshader,
                                 struct r600_shader_key key);
 
+
 static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
                            int size, unsigned comp_mask) {
 
@@ -267,6 +268,11 @@ struct r600_shader_src {
        uint32_t                                value[4];
 };
 
+struct eg_interp {
+       boolean                                 enabled;
+       unsigned                                ij_index;
+};
+
 struct r600_shader_ctx {
        struct tgsi_shader_info                 info;
        struct tgsi_parse_context               parse;
@@ -283,13 +289,11 @@ struct r600_shader_ctx {
        uint32_t                                max_driver_temp_used;
        boolean use_llvm;
        /* needed for evergreen interpolation */
-       boolean                                 input_centroid;
-       boolean                                 input_linear;
-       boolean                                 input_perspective;
-       int                                     num_interp_gpr;
+       struct eg_interp                eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
        /* evergreen/cayman also store sample mask in face register */
        int                                     face_gpr;
-       boolean                                 has_samplemask;
+       /* sample id is .w component stored in fixed point position register */
+       int                                     fixed_pt_position_gpr;
        int                                     colors_used;
        boolean                 clip_vertex_write;
        unsigned                cv_output;
@@ -320,6 +324,12 @@ static int tgsi_endif(struct r600_shader_ctx *ctx);
 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
 static int tgsi_endloop(struct r600_shader_ctx *ctx);
 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
+                                unsigned int cb_idx, unsigned int offset, unsigned ar_chan,
+                                unsigned int dst_reg);
+static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
+                       const struct r600_shader_src *shader_src,
+                       unsigned chan);
 
 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
 {
@@ -364,27 +374,41 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
        return 0;
 }
 
-static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
-               int input)
+int eg_get_interpolator_index(unsigned interpolate, unsigned location)
 {
-       int ij_index = 0;
+       if (interpolate == TGSI_INTERPOLATE_COLOR ||
+               interpolate == TGSI_INTERPOLATE_LINEAR ||
+               interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+       {
+               int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR;
+               int loc;
 
-       if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
-               if (ctx->shader->input[input].centroid)
-                       ij_index++;
-       } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
-               /* if we have perspective add one */
-               if (ctx->input_perspective)  {
-                       ij_index++;
-                       /* if we have perspective centroid */
-                       if (ctx->input_centroid)
-                               ij_index++;
+               switch(location) {
+               case TGSI_INTERPOLATE_LOC_CENTER:
+                       loc = 1;
+                       break;
+               case TGSI_INTERPOLATE_LOC_CENTROID:
+                       loc = 2;
+                       break;
+               case TGSI_INTERPOLATE_LOC_SAMPLE:
+               default:
+                       loc = 0; break;
                }
-               if (ctx->shader->input[input].centroid)
-                       ij_index++;
+
+               return is_linear * 3 + loc;
        }
 
-       ctx->shader->input[input].ij_index = ij_index;
+       return -1;
+}
+
+static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
+               int input)
+{
+       int i = eg_get_interpolator_index(
+               ctx->shader->input[input].interpolate,
+               ctx->shader->input[input].interpolate_location);
+       assert(i >= 0);
+       ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index;
 }
 
 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
@@ -582,13 +606,15 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                ctx->shader->input[i].name = d->Semantic.Name;
                ctx->shader->input[i].sid = d->Semantic.Index;
                ctx->shader->input[i].interpolate = d->Interp.Interpolate;
-               ctx->shader->input[i].centroid = d->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID;
+               ctx->shader->input[i].interpolate_location = d->Interp.Location;
                ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
                if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
                        ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
                        switch (ctx->shader->input[i].name) {
                        case TGSI_SEMANTIC_FACE:
-                               if (ctx->face_gpr == -1)
+                               if (ctx->face_gpr != -1)
+                                       ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
+                               else
                                        ctx->face_gpr = ctx->shader->input[i].gpr;
                                break;
                        case TGSI_SEMANTIC_COLOR:
@@ -679,14 +705,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                break;
 
        case TGSI_FILE_SYSTEM_VALUE:
-               if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK) {
-                       ctx->has_samplemask = true;
-                       /* lives in Front Face GPR */
-                       if (ctx->face_gpr == -1)
-                               ctx->face_gpr = ctx->file_offset[TGSI_FILE_SYSTEM_VALUE] + d->Range.First;
-                       break;
-               }
-               else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+               if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
+                       d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
+                       d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
+                       break; /* Already handled from allocate_system_value_inputs */
+               } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
                        if (!ctx->native_integers) {
                                struct r600_bytecode_alu alu;
                                memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -720,12 +743,69 @@ static int r600_get_temp(struct r600_shader_ctx *ctx)
        return ctx->temp_reg + ctx->max_driver_temp_used++;
 }
 
+static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset)
+{
+       struct tgsi_parse_context parse;
+       struct {
+               boolean enabled;
+               int *reg;
+               unsigned name, alternate_name;
+       } inputs[2] = {
+               { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */
+
+               { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */
+       };
+       int i, k, num_regs = 0;
+
+       if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
+               return 0;
+       }
+
+       while (!tgsi_parse_end_of_tokens(&parse)) {
+               tgsi_parse_token(&parse);
+
+               if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
+                       struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
+                       if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
+                               for (k = 0; k < Elements(inputs); k++) {
+                                       if (d->Semantic.Name == inputs[k].name ||
+                                               d->Semantic.Name == inputs[k].alternate_name) {
+                                               inputs[k].enabled = true;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       tgsi_parse_free(&parse);
+
+       for (i = 0; i < Elements(inputs); i++) {
+               boolean enabled = inputs[i].enabled;
+               int *reg = inputs[i].reg;
+               unsigned name = inputs[i].name;
+
+               if (enabled) {
+                       int gpr = gpr_offset + num_regs++;
+
+                       // add to inputs, allocate a gpr
+                       k = ctx->shader->ninput ++;
+                       ctx->shader->input[k].name = name;
+                       ctx->shader->input[k].sid = 0;
+                       ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT;
+                       ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER;
+                       *reg = ctx->shader->input[k].gpr = gpr;
+               }
+       }
+
+       return gpr_offset + num_regs;
+}
+
 /*
  * for evergreen we need to scan the shader to find the number of GPRs we need to
- * reserve for interpolation.
+ * reserve for interpolation and system values
  *
  * we need to know if we are going to emit
- * any centroid inputs
+ * any sample or centroid inputs
  * if perspective and linear are required
 */
 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
@@ -733,39 +813,92 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
        int i;
        int num_baryc;
 
-       ctx->input_linear = FALSE;
-       ctx->input_perspective = FALSE;
-       ctx->input_centroid = FALSE;
-       ctx->num_interp_gpr = 1;
+       memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators));
 
-       /* any centroid inputs */
        for (i = 0; i < ctx->info.num_inputs; i++) {
-               /* skip position/face */
+               int k;
+               /* skip position/face/mask/sampleid */
                if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
                    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE ||
-                   ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK)
+                   ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK ||
+                   ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID)
                        continue;
-               if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
-                       ctx->input_linear = TRUE;
-               if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
-                       ctx->input_perspective = TRUE;
-               if (ctx->info.input_interpolate_loc[i] == TGSI_INTERPOLATE_LOC_CENTROID)
-                       ctx->input_centroid = TRUE;
+
+               k = eg_get_interpolator_index(
+                       ctx->info.input_interpolate[i],
+                       ctx->info.input_interpolate_loc[i]);
+               if (k >= 0)
+                       ctx->eg_interpolators[k].enabled = TRUE;
        }
 
+       /* assign gpr to each interpolator according to priority */
        num_baryc = 0;
-       /* ignoring sample for now */
-       if (ctx->input_perspective)
-               num_baryc++;
-       if (ctx->input_linear)
-               num_baryc++;
-       if (ctx->input_centroid)
-               num_baryc *= 2;
-
-       ctx->num_interp_gpr += (num_baryc + 1) >> 1;
-
-       /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
-       return ctx->num_interp_gpr;
+       for (i = 0; i < Elements(ctx->eg_interpolators); i++) {
+               if (ctx->eg_interpolators[i].enabled) {
+                       ctx->eg_interpolators[i].ij_index = num_baryc;
+                       num_baryc ++;
+               }
+       }
+
+       /* XXX PULL MODEL and LINE STIPPLE */
+
+       num_baryc = (num_baryc + 1) >> 1;
+       return allocate_system_value_inputs(ctx, num_baryc);
+}
+
+/* sample_id_sel == NULL means fetch for current sample */
+static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel)
+{
+       struct r600_bytecode_vtx vtx;
+       int r, t1;
+
+       assert(ctx->fixed_pt_position_gpr != -1);
+
+       t1 = r600_get_temp(ctx);
+
+       memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+       vtx.op = FETCH_OP_VFETCH;
+       vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
+       vtx.fetch_type = 2;     /* VTX_FETCH_NO_INDEX_OFFSET */
+       if (sample_id == NULL) {
+               vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
+               vtx.src_sel_x = 3;
+       }
+       else {
+               struct r600_bytecode_alu alu;
+
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               r600_bytecode_src(&alu.src[0], sample_id, chan_sel);
+               alu.dst.sel = t1;
+               alu.dst.write = 1;
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+
+               vtx.src_gpr = t1;
+               vtx.src_sel_x = 0;
+       }
+       vtx.mega_fetch_count = 16;
+       vtx.dst_gpr = t1;
+       vtx.dst_sel_x = 0;
+       vtx.dst_sel_y = 1;
+       vtx.dst_sel_z = 7;
+       vtx.dst_sel_w = 7;
+       vtx.data_format = FMT_32_32_32_32_FLOAT;
+       vtx.num_format_all = 2;
+       vtx.format_comp_all = 1;
+       vtx.use_const_fields = 0;
+       vtx.offset = 1; // first element is size of buffer
+       vtx.endian = r600_endian_swap(32);
+       vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+       r = r600_bytecode_add_vtx(ctx->bc, &vtx);
+       if (r)
+               return r;
+
+       return t1;
 }
 
 static void tgsi_src(struct r600_shader_ctx *ctx,
@@ -797,10 +930,22 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
        } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
                if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) {
                        r600_src->swizzle[0] = 2; // Z value
-                       r600_src->swizzle[0] = 2;
-                       r600_src->swizzle[0] = 2;
-                       r600_src->swizzle[0] = 2;
+                       r600_src->swizzle[1] = 2;
+                       r600_src->swizzle[2] = 2;
+                       r600_src->swizzle[3] = 2;
                        r600_src->sel = ctx->face_gpr;
+               } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) {
+                       r600_src->swizzle[0] = 3; // W value
+                       r600_src->swizzle[1] = 3;
+                       r600_src->swizzle[2] = 3;
+                       r600_src->swizzle[3] = 3;
+                       r600_src->sel = ctx->fixed_pt_position_gpr;
+               } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) {
+                       r600_src->swizzle[0] = 0;
+                       r600_src->swizzle[1] = 1;
+                       r600_src->swizzle[2] = 4;
+                       r600_src->swizzle[3] = 4;
+                       r600_src->sel = load_sample_position(ctx, NULL, -1);
                } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
                        r600_src->swizzle[0] = 3;
                        r600_src->swizzle[1] = 3;
@@ -1612,7 +1757,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
        ctx.gs_next_vertex = 0;
 
        ctx.face_gpr = -1;
-       ctx.has_samplemask = false;
+       ctx.fixed_pt_position_gpr = -1;
        ctx.fragcoord_input = -1;
        ctx.colors_used = 0;
        ctx.clip_vertex_write = 0;
@@ -1661,8 +1806,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                        r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
                }
        }
-       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
-               ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
+       if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
+               if (ctx.bc->chip_class >= EVERGREEN)
+                       ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
+               else
+                       ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
        }
        if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
                /* FIXME 1 would be enough in some cases (3 or less input vertices) */
@@ -1775,14 +1923,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
        
        shader->ring_item_size = ctx.next_ring_offset;
 
-       /* Need to tell setup to program FACE register */
-       if (ctx.has_samplemask && ctx.face_gpr != -1) {
-               i = ctx.shader->ninput++;
-               ctx.shader->input[i].name = TGSI_SEMANTIC_SAMPLEMASK;
-               ctx.shader->input[i].spi_sid = 0;
-               ctx.shader->input[i].gpr = ctx.face_gpr;
-       }
-
        /* Process two side if needed */
        if (shader->two_side && ctx.colors_used) {
                int i, count = ctx.shader->ninput;
@@ -1795,6 +1935,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
                                ctx.info.file_max[TGSI_FILE_INPUT] + 1;
 
+               /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */
                if (ctx.face_gpr == -1) {
                        i = ctx.shader->ninput++;
                        ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
@@ -2162,6 +2303,13 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                                        output[j].swizzle_y = 1;
                                        output[j].swizzle_z = output[j].swizzle_w = 7;
                                        output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
+                                       output[j].array_base = 61;
+                                       output[j].swizzle_x = 7;
+                                       output[j].swizzle_y = 7;
+                                       output[j].swizzle_z = 0;
+                                       output[j].swizzle_w = 7;
+                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                                } else {
                                        R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
                                        r = -EINVAL;
index 4b27ede..20829fd 100644 (file)
@@ -33,7 +33,7 @@ struct r600_shader_io {
        int                     spi_sid;
        unsigned                interpolate;
        unsigned                ij_index;
-       boolean                 centroid;
+       unsigned        interpolate_location; //  TGSI_INTERPOLATE_LOC_CENTER, CENTROID, SAMPLE
        unsigned                lds_pos; /* for evergreen */
        unsigned                back_color_input;
        unsigned                write_mask;
@@ -115,4 +115,8 @@ struct r600_pipe_shader {
        unsigned                ps_depth_export;
 };
 
+/* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
+ TGSI_INTERPOLATE_LOC_CENTER/SAMPLE/COUNT. Other input values return -1. */
+int eg_get_interpolator_index(unsigned interpolate, unsigned location);
+
 #endif
index 9ca6171..1f933ef 100644 (file)
@@ -486,7 +486,12 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 
        sc_mode_cntl = S_028A4C_MSAA_ENABLE(state->multisample) |
                       S_028A4C_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
-                      S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1);
+                      S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+                      S_028A4C_PS_ITER_SAMPLE(state->multisample && rctx->ps_iter_samples > 1);
+       if (rctx->b.family == CHIP_RV770) {
+               /* workaround possible rendering corruption on RV770 with hyperz together with sample shading */
+               sc_mode_cntl |= S_028A4C_TILE_COVER_DISABLE(state->multisample && rctx->ps_iter_samples > 1);
+       }
        if (rctx->b.chip_class >= R700) {
                sc_mode_cntl |= S_028A4C_FORCE_EOV_REZ_ENABLE(1) |
                                S_028A4C_R700_ZMM_LINE_OFFSET(1) |
@@ -1245,6 +1250,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
        }
 
        rctx->framebuffer.atom.dirty = true;
+
+       r600_set_sample_locations_constant_buffer(rctx);
 }
 
 static uint32_t sample_locs_2x[] = {
@@ -1524,6 +1531,21 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
        r600_emit_msaa_state(rctx, rctx->framebuffer.nr_samples);
 }
 
+static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+
+       if (rctx->ps_iter_samples == min_samples)
+               return;
+
+       rctx->ps_iter_samples = min_samples;
+       if (rctx->framebuffer.nr_samples > 1) {
+               rctx->rasterizer_state.atom.dirty = true;
+               if (rctx->b.chip_class == R600)
+                       rctx->db_misc_state.atom.dirty = true;
+       }
+}
+
 static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
@@ -1603,6 +1625,10 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
        } else {
                db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
        }
+       if (rctx->b.chip_class == R600 && rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) {
+               /* sample shading and hyperz causes lockups on R6xx chips */
+               db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
+       }
        if (a->flush_depthstencil_through_cb) {
                assert(a->copy_depth || a->copy_stencil);
 
@@ -2418,10 +2444,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
        struct r600_command_buffer *cb = &shader->command_buffer;
        struct r600_shader *rshader = &shader->shader;
        unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
-       int pos_index = -1, face_index = -1;
+       int pos_index = -1, face_index = -1, fixed_pt_position_index = -1;
        unsigned tmp, sid, ufi = 0;
        int need_linear = 0;
-       unsigned z_export = 0, stencil_export = 0;
+       unsigned z_export = 0, stencil_export = 0, mask_export = 0;
        unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
 
        if (!cb->buf) {
@@ -2434,8 +2460,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
        for (i = 0; i < rshader->ninput; i++) {
                if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
                        pos_index = i;
-               if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
+               if (rshader->input[i].name == TGSI_SEMANTIC_FACE && face_index == -1)
                        face_index = i;
+               if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID)
+                       fixed_pt_position_index = i;
 
                sid = rshader->input[i].spi_sid;
 
@@ -2452,9 +2480,12 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
                        tmp |= S_028644_PT_SPRITE_TEX(1);
                }
 
-               if (rshader->input[i].centroid)
+               if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID)
                        tmp |= S_028644_SEL_CENTROID(1);
 
+               if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE)
+                       tmp |= S_028644_SEL_SAMPLE(1);
+
                if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) {
                        need_linear = 1;
                        tmp |= S_028644_SEL_LINEAR(1);
@@ -2469,16 +2500,21 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
                        z_export = 1;
                if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
                        stencil_export = 1;
+               if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK &&
+                       rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0)
+                       mask_export = 1;
        }
        db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export);
        db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(stencil_export);
+       db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
        if (rshader->uses_kill)
                db_shader_control |= S_02880C_KILL_ENABLE(1);
 
        exports_ps = 0;
        for (i = 0; i < rshader->noutput; i++) {
                if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
-                   rshader->output[i].name == TGSI_SEMANTIC_STENCIL) {
+                   rshader->output[i].name == TGSI_SEMANTIC_STENCIL ||
+                   rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
                        exports_ps |= 1;
                }
        }
@@ -2497,9 +2533,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
        spi_input_z = 0;
        if (pos_index != -1) {
                spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
-                                       S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+                                       S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) |
                                        S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
-                                       S_0286CC_BARYC_SAMPLE_CNTL(1));
+                                       S_0286CC_BARYC_SAMPLE_CNTL(1)) |
+                                       S_0286CC_POSITION_SAMPLE(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE);
                spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
        }
 
@@ -2508,6 +2545,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
                spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
                        S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
        }
+       if (fixed_pt_position_index != -1) {
+               spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) |
+                       S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr);
+       }
 
        /* HW bug in original R600 */
        if (rctx->b.family == CHIP_R600)
@@ -2531,7 +2572,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 
        /* only set some bits here, the other bits are set in the dsa state */
        shader->db_shader_control = db_shader_control;
-       shader->ps_depth_export = z_export | stencil_export;
+       shader->ps_depth_export = z_export | stencil_export | mask_export;
 
        shader->sprite_coord_enable = sprite_coord_enable;
        if (rctx->rasterizer)
@@ -3046,6 +3087,7 @@ void r600_init_state_functions(struct r600_context *rctx)
        rctx->b.b.create_sampler_view = r600_create_sampler_view;
        rctx->b.b.set_framebuffer_state = r600_set_framebuffer_state;
        rctx->b.b.set_polygon_stipple = r600_set_polygon_stipple;
+       rctx->b.b.set_min_samples = r600_set_min_samples;
        rctx->b.b.set_scissor_states = r600_set_scissor_states;
        rctx->b.b.get_sample_position = r600_get_sample_position;
        rctx->b.dma_copy = r600_dma_copy;
index d9174a5..68365f9 100644 (file)
@@ -1085,6 +1085,26 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s
        pipe_resource_reference(&cb.buffer, NULL);
 }
 
+/* set sample xy locations as array of fragment shader constants */
+void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
+{
+       struct pipe_constant_buffer constbuf = {0};
+       float values[4*16] = {0.0f};
+       int i;
+       struct pipe_context *ctx = &rctx->b.b;
+
+       assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
+       for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
+               ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]);
+       }
+
+       constbuf.user_buffer = values;
+       constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
+       ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
+               R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf);
+       pipe_resource_reference(&constbuf.buffer, NULL);
+}
+
 static void update_shader_atom(struct pipe_context *ctx,
                               struct r600_shader_state *state,
                               struct r600_pipe_shader *shader)
index 3cf7b88..6a5b964 100644 (file)
 #define   S_02880C_KILL_ENABLE(x)                      (((x) & 0x1) << 6)
 #define   G_02880C_KILL_ENABLE(x)                      (((x) >> 6) & 0x1)
 #define   C_02880C_KILL_ENABLE                         0xFFFFFFBF
+#define   S_02880C_MASK_EXPORT_ENABLE(x)               (((x) & 0x1) << 8)
+#define   G_02880C_MASK_EXPORT_ENABLE(x)               (((x) >> 8) & 0x1)
+#define   C_02880C_MASK_EXPORT_ENABLE                  0xFFFFFEFF
 #define   S_02880C_DUAL_EXPORT_ENABLE(x)               (((x) & 0x1) << 9)
 #define   G_02880C_DUAL_EXPORT_ENABLE(x)               (((x) >> 9) & 0x1)
 #define   C_02880C_DUAL_EXPORT_ENABLE                  0xFFFFFDFF
index 346ccc9..d787e5b 100644 (file)
@@ -147,25 +147,28 @@ int bc_parser::parse_decls() {
        bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
                        && sh->target == TARGET_PS;
 
-       unsigned linear = 0, persp = 0, centroid = 1;
+       bool ij_interpolators[6];
+       memset(ij_interpolators, 0, sizeof(ij_interpolators));
 
        for (unsigned i = 0; i < pshader->ninput; ++i) {
                r600_shader_io & in = pshader->input[i];
                bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
                sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
                if (ps_interp && in.spi_sid) {
-                       if (in.interpolate == TGSI_INTERPOLATE_LINEAR ||
-                                       in.interpolate == TGSI_INTERPOLATE_COLOR)
-                               linear = 1;
-                       else if (in.interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
-                               persp = 1;
-                       if (in.centroid)
-                               centroid = 2;
+                       int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
+                       if (k >= 0)
+                               ij_interpolators[k] |= true;
                }
        }
 
        if (ps_interp) {
-               unsigned mask = (1 << (2 * (linear + persp) * centroid)) - 1;
+               /* add the egcm ij interpolators to live inputs */
+               unsigned num_ij = 0;
+               for (unsigned i = 0; i < Elements(ij_interpolators); i++) {
+                       num_ij += ij_interpolators[i];
+               }
+
+               unsigned mask = (1 << (2 * num_ij)) - 1;
                unsigned gpr = 0;
 
                while (mask) {