radeonsi: Use FP16 shader export format when necessary / possible.
authorMichel Dänzer <michel.daenzer@amd.com>
Wed, 22 Aug 2012 16:15:36 +0000 (18:15 +0200)
committerMichel Dänzer <michel@daenzer.net>
Mon, 27 Aug 2012 09:51:56 +0000 (11:51 +0200)
Fixes piglit fbo-blending-formats.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/radeon/SIInstructions.td
src/gallium/drivers/radeon/SIIntrinsics.td
src/gallium/drivers/radeonsi/radeonsi_pipe.h
src/gallium/drivers/radeonsi/radeonsi_shader.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_draw.c

index f09d604..3047321 100644 (file)
@@ -726,7 +726,9 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
 ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
 ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
 ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
-////def V_CVT_PKRTZ_F16_F32 : VOP2_F16 <0x0000002f, "V_CVT_PKRTZ_F16_F32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
+ [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))]
+>;
 ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
 ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
 def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
index 6eadc94..b9544f1 100644 (file)
@@ -14,6 +14,7 @@
 
 let TargetPrefix = "SI", isTarget = 1 in {
 
+  def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
   /* XXX: We may need a seperate intrinsic here for loading integer values */
   def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
index 989bb49..099b509 100644 (file)
@@ -134,7 +134,8 @@ struct r600_context {
        unsigned                        saved_render_cond_mode;
        /* shader information */
        unsigned                        sprite_coord_enable;
-       boolean                         export_16bpc;
+       unsigned                        export_16bpc;
+       unsigned                        spi_shader_col_format;
        unsigned                        alpha_ref;
        boolean                         alpha_ref_dirty;
        struct r600_textures_info       vs_samplers;
index fd614dd..98866c4 100644 (file)
@@ -390,13 +390,47 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
        unsigned compressed = 0;
        unsigned chan;
 
-       for (chan = 0; chan < 4; chan++ ) {
-               LLVMValueRef out_ptr =
-                       si_shader_ctx->radeon_bld.soa.outputs[index][chan];
-               /* +5 because the first output value will be
-                * the 6th argument to the intrinsic. */
-               args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
-                                              out_ptr, "");
+       if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+               int cbuf = target - V_008DFC_SQ_EXP_MRT;
+
+               if (cbuf >= 0 && cbuf < 8) {
+                       struct r600_context *rctx = si_shader_ctx->rctx;
+                       compressed = (rctx->export_16bpc >> cbuf) & 0x1;
+               }
+       }
+
+       if (compressed) {
+               /* Pixel shader needs to pack output values before export */
+               for (chan = 0; chan < 2; chan++ ) {
+                       LLVMValueRef *out_ptr =
+                               si_shader_ctx->radeon_bld.soa.outputs[index];
+                       args[0] = LLVMBuildLoad(base->gallivm->builder,
+                                               out_ptr[2 * chan], "");
+                       args[1] = LLVMBuildLoad(base->gallivm->builder,
+                                               out_ptr[2 * chan + 1], "");
+                       args[chan + 5] =
+                               build_intrinsic(base->gallivm->builder,
+                                               "llvm.SI.packf16",
+                                               LLVMInt32TypeInContext(base->gallivm->context),
+                                               args, 2,
+                                               LLVMReadNoneAttribute);
+                       args[chan + 7] = args[chan + 5];
+               }
+
+               /* Set COMPR flag */
+               args[4] = uint->one;
+       } else {
+               for (chan = 0; chan < 4; chan++ ) {
+                       LLVMValueRef out_ptr =
+                               si_shader_ctx->radeon_bld.soa.outputs[index][chan];
+                       /* +5 because the first output value will be
+                        * the 6th argument to the intrinsic. */
+                       args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
+                                                      out_ptr, "");
+               }
+
+               /* Clear COMPR flag */
+               args[4] = uint->zero;
        }
 
        /* XXX: This controls which components of the output
@@ -415,9 +449,6 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
        /* Specify the target we are exporting */
        args[3] = lp_build_const_int32(base->gallivm, target);
 
-       /* Set COMPR flag */
-       args[4] = uint->zero;
-
        /* XXX: We probably need to keep track of the output
         * values, so we know what we are passing to the next
         * stage. */
index 5c2e743..fced24c 100644 (file)
@@ -996,6 +996,53 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
        }
 }
 
+/* Returns the size in bits of the widest component of a CB format */
+static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
+{
+       switch(colorformat) {
+       case V_028C70_COLOR_4_4_4_4:
+               return 4;
+
+       case V_028C70_COLOR_1_5_5_5:
+       case V_028C70_COLOR_5_5_5_1:
+               return 5;
+
+       case V_028C70_COLOR_5_6_5:
+               return 6;
+
+       case V_028C70_COLOR_8:
+       case V_028C70_COLOR_8_8:
+       case V_028C70_COLOR_8_8_8_8:
+               return 8;
+
+       case V_028C70_COLOR_10_10_10_2:
+       case V_028C70_COLOR_2_10_10_10:
+               return 10;
+
+       case V_028C70_COLOR_10_11_11:
+       case V_028C70_COLOR_11_11_10:
+               return 11;
+
+       case V_028C70_COLOR_16:
+       case V_028C70_COLOR_16_16:
+       case V_028C70_COLOR_16_16_16_16:
+               return 16;
+
+       case V_028C70_COLOR_8_24:
+       case V_028C70_COLOR_24_8:
+               return 24;
+
+       case V_028C70_COLOR_32:
+       case V_028C70_COLOR_32_32:
+       case V_028C70_COLOR_32_32_32_32:
+       case V_028C70_COLOR_X24_8_32_FLOAT:
+               return 32;
+       }
+
+       assert(!"Unknown maximum component size");
+       return 0;
+}
+
 static uint32_t si_translate_dbformat(enum pipe_format format)
 {
        switch (format) {
@@ -1409,6 +1456,7 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
        const struct util_format_description *desc;
        int i;
        unsigned blend_clamp = 0, blend_bypass = 0;
+       unsigned max_comp_size;
 
        surf = (struct r600_surface *)state->cbufs[cb];
        rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
@@ -1549,6 +1597,17 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
        }
        si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info);
        si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib);
+
+       /* Determine pixel shader export format */
+       max_comp_size = si_colorformat_max_comp_size(format);
+       if (ntype == V_028C70_NUMBER_SRGB ||
+           ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
+            max_comp_size <= 10) ||
+           (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
+               rctx->export_16bpc |= 1 << cb;
+               rctx->spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * cb);
+       } else
+               rctx->spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * cb);
 }
 
 static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
@@ -1667,9 +1726,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
        /* build states */
        rctx->have_depth_fb = 0;
+       rctx->export_16bpc = 0;
+       rctx->spi_shader_col_format = 0;
        for (int i = 0; i < state->nr_cbufs; i++) {
                si_cb(rctx, pm4, state, i);
        }
+       assert(!(rctx->export_16bpc & ~0xff));
        si_db(rctx, pm4, state);
 
        shader_mask = 0;
@@ -1706,6 +1768,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000);
        si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
        si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader_mask);
+       si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
+                      rctx->spi_shader_col_format);
        si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0x00000000);
 
        si_pm4_set_state(rctx, framebuffer, pm4);
@@ -1727,9 +1791,10 @@ static INLINE unsigned si_shader_selector_key(struct pipe_context *ctx,
        if (sel->type == PIPE_SHADER_FRAGMENT) {
                if (sel->fs_write_all)
                        key |= rctx->framebuffer.nr_cbufs;
+               key |= rctx->export_16bpc << 4;
                /*if (rctx->queued.named.rasterizer)
-                         key |= rctx->queued.named.rasterizer->flatshade << 4;*/
-               /*key |== rctx->two_side << 5;*/
+                         key |= rctx->queued.named.rasterizer->flatshade << 12;*/
+               /*key |== rctx->two_side << 13;*/
        }
 
        return key;
index 95821dc..5f8e211 100644 (file)
@@ -186,10 +186,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s
        /* XXX: Depends on Z buffer format? */
        si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, 0);
 
-       /* XXX: Depends on color buffer format? */
-       si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
-                      S_028714_COL0_EXPORT_FORMAT(V_028714_SPI_SHADER_32_ABGR));
-
        va = r600_resource_va(ctx->screen, (void *)shader->bo);
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
        si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);