intel/compiler: handle coarse pixel in render target writes descriptors
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Thu, 22 Oct 2020 10:23:06 +0000 (13:23 +0300)
committerMarge Bot <eric+marge@anholt.net>
Sun, 2 May 2021 20:20:06 +0000 (20:20 +0000)
v2: Use the new inst->ex_desc field (Jason)

v3: Drop CPS LoD compensation from sampler messages (Lionel)

v4: Drop useless uses_rate_shading (Ken)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7455>

src/intel/compiler/brw_compiler.h
src/intel/compiler/brw_disasm.c
src/intel/compiler/brw_eu.h
src/intel/compiler/brw_eu_emit.c
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs_generator.cpp

index 57ab81e..03eb7e1 100644 (file)
@@ -479,6 +479,7 @@ struct brw_wm_prog_key {
    bool force_dual_color_blend:1;
    bool coherent_fb_fetch:1;
    bool ignore_sample_mask_out:1;
+   bool coarse_pixel:1;
 
    uint8_t color_outputs_valid;
    uint64_t input_slots_valid;
@@ -853,6 +854,11 @@ struct brw_wm_prog_data {
    bool contains_noperspective_varying;
 
    /**
+    * Shader is ran at the coarse pixel shading dispatch rate (3DSTATE_CPS).
+    */
+   bool per_coarse_pixel_dispatch;
+
+   /**
     * Mask of which interpolation modes are required by the fragment shader.
     * Used in hardware setup on gfx6+.
     */
index 6e3312e..4dbaeab 100644 (file)
@@ -1972,6 +1972,9 @@ brw_disassemble_inst(FILE *file, const struct intel_device_info *devinfo,
                   string(file, " Hi");
                if (brw_fb_write_desc_last_render_target(devinfo, imm_desc))
                   string(file, " LastRT");
+               if (devinfo->ver >= 10 &&
+                   brw_fb_write_desc_coarse_write(devinfo, imm_desc))
+                  string(file, " CoarseWrite");
                if (devinfo->ver < 7 &&
                    brw_fb_write_desc_write_commit(devinfo, imm_desc))
                   string(file, " WriteCommit");
index 4d2197f..d84af2b 100644 (file)
@@ -1078,16 +1078,20 @@ static inline uint32_t
 brw_fb_write_desc(const struct intel_device_info *devinfo,
                   unsigned binding_table_index,
                   unsigned msg_control,
-                  bool last_render_target)
+                  bool last_render_target,
+                  bool coarse_write)
 {
    const unsigned msg_type =
       devinfo->ver >= 6 ?
       GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
       BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
 
+   assert(devinfo->ver >= 10 || !coarse_write);
+
    if (devinfo->ver >= 6) {
       return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
-             SET_BITS(last_render_target, 12, 12);
+             SET_BITS(last_render_target, 12, 12) |
+             SET_BITS(coarse_write, 18, 18);
    } else {
       return (SET_BITS(binding_table_index, 7, 0) |
               SET_BITS(msg_control, 11, 8) |
@@ -1137,6 +1141,14 @@ brw_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
       return GET_BITS(desc, 15, 15);
 }
 
+static inline bool
+brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
+                               uint32_t desc)
+{
+   assert(devinfo->ver >= 10);
+   return GET_BITS(desc, 18, 18);
+}
+
 static inline uint32_t
 brw_mdc_sm2(unsigned exec_size)
 {
@@ -1202,12 +1214,15 @@ static inline uint32_t
 brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
                       unsigned msg_type,
                       bool noperspective,
+                      bool coarse_pixel_rate,
                       unsigned simd_mode,
                       unsigned slot_group)
 {
+   assert(devinfo->ver >= 10 || !coarse_pixel_rate);
    return (SET_BITS(slot_group, 11, 11) |
            SET_BITS(msg_type, 13, 12) |
            SET_BITS(!!noperspective, 14, 14) |
+           SET_BITS(coarse_pixel_rate, 15, 15) |
            SET_BITS(simd_mode, 16, 16));
 }
 
@@ -1453,6 +1468,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              struct brw_reg dest,
                              struct brw_reg mrf,
                              bool noperspective,
+                             bool coarse_pixel_rate,
                              unsigned mode,
                              struct brw_reg data,
                              unsigned msg_length,
index fbe6c31..138c48e 100644 (file)
@@ -2463,7 +2463,8 @@ brw_fb_WRITE(struct brw_codegen *p,
                 brw_message_desc(devinfo, msg_length, response_length,
                                  header_present) |
                 brw_fb_write_desc(devinfo, binding_table_index, msg_control,
-                                  last_render_target));
+                                  last_render_target,
+                                  false /* coarse_write */));
    brw_inst_set_eot(devinfo, insn, eot);
 
    return insn;
@@ -3247,6 +3248,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              struct brw_reg dest,
                              struct brw_reg mrf,
                              bool noperspective,
+                             bool coarse_pixel_rate,
                              unsigned mode,
                              struct brw_reg data,
                              unsigned msg_length,
@@ -3258,8 +3260,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
    const unsigned simd_mode = (exec_size == BRW_EXECUTE_16);
    const unsigned desc =
       brw_message_desc(devinfo, msg_length, response_length, false) |
-      brw_pixel_interp_desc(devinfo, mode, noperspective, simd_mode,
-                            slot_group);
+      brw_pixel_interp_desc(devinfo, mode, noperspective, coarse_pixel_rate,
+                            simd_mode, slot_group);
 
    /* brw_send_indirect_message will automatically use a direct send message
     * if data is actually immediate.
index 0a4dfa9..5e9a4a5 100644 (file)
@@ -4663,7 +4663,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
 
       inst->desc =
          (inst->group / 16) << 11 | /* rt slot group */
-         brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt);
+         brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt,
+                           prog_data->per_coarse_pixel_dispatch);
 
       uint32_t ex_desc = 0;
       if (devinfo->ver >= 11) {
@@ -5340,7 +5341,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
                                     simd_mode,
                                     0 /* return_format unused on gfx7+ */);
       inst->src[0] = brw_imm_ud(0);
-      inst->src[1] = brw_imm_ud(0); /* ex_desc */
+      inst->src[1] = brw_imm_ud(0);
    } else if (surface_handle.file != BAD_FILE) {
       /* Bindless surface */
       assert(devinfo->ver >= 9);
@@ -5398,6 +5399,8 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
       inst->src[1] = brw_imm_ud(0); /* ex_desc */
    }
 
+   inst->ex_desc = 0;
+
    inst->src[2] = src_payload;
    inst->resize_sources(3);
 
@@ -9103,6 +9106,13 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
    prog_data->barycentric_interp_modes =
       brw_compute_barycentric_interp_modes(devinfo, shader);
 
+   prog_data->per_coarse_pixel_dispatch =
+      key->coarse_pixel &&
+      !prog_data->persample_dispatch &&
+      !prog_data->uses_sample_mask &&
+      (prog_data->computed_depth_mode == BRW_PSCDEPTH_OFF) &&
+      !prog_data->computed_stencil;
+
    calculate_urb_setup(devinfo, key, prog_data, shader);
    brw_compute_flat_inputs(prog_data, shader);
 }
index 89c5147..cdd361e 100644 (file)
@@ -1760,11 +1760,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
    assert(msg_data.type == BRW_REGISTER_TYPE_UD);
    assert(inst->size_written % REG_SIZE == 0);
 
+   struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
+
    brw_pixel_interpolator_query(p,
          retype(dst, BRW_REGISTER_TYPE_UW),
          /* If we don't have a payload, what we send doesn't matter */
          has_payload ? src : brw_vec8_grf(0, 0),
          inst->pi_noperspective,
+         prog_data->per_coarse_pixel_dispatch,
          msg_type,
          msg_data,
          has_payload ? 2 * inst->exec_size / 8 : 1,