ac,radeonsi: start adding support for gfx10.3
authorMarek Olšák <marek.olsak@amd.com>
Fri, 27 Mar 2020 02:02:13 +0000 (22:02 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 9 Jun 2020 16:17:36 +0000 (16:17 +0000)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5383>

src/amd/common/ac_gpu_info.c
src/amd/common/ac_surface.c
src/amd/common/amd_family.h
src/amd/registers/gfx10.json
src/gallium/drivers/radeonsi/si_perfcounter.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index a8a43fd..517de22 100644 (file)
@@ -582,7 +582,8 @@ bool ac_query_gpu_info(int fd, void *dev_p,
                                info->family == CHIP_VEGA12 ||
                                info->family == CHIP_RAVEN ||
                                info->family == CHIP_RAVEN2 ||
-                               info->family == CHIP_RENOIR);
+                               info->family == CHIP_RENOIR ||
+                               info->chip_class >= GFX10_3);
 
        info->has_out_of_order_rast = info->chip_class >= GFX8 &&
                                      info->chip_class <= GFX9 &&
@@ -736,7 +737,9 @@ bool ac_query_gpu_info(int fd, void *dev_p,
        if (info->chip_class >= GFX10)
                info->num_sdp_interfaces = device_info.num_tcc_blocks;
 
-       if (info->chip_class >= GFX10)
+       if (info->chip_class >= GFX10_3)
+               info->max_wave64_per_simd = 16;
+       else if (info->chip_class == GFX10)
                info->max_wave64_per_simd = 20;
        else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
                info->max_wave64_per_simd = 8;
index cbbd860..d7dd956 100644 (file)
@@ -2127,6 +2127,7 @@ bool ac_surface_set_umd_metadata(const struct radeon_info *info,
          break;
 
       case GFX10:
+      case GFX10_3:
          surf->dcc_offset =
             ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
          surf->u.gfx9.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
@@ -2169,6 +2170,7 @@ void ac_surface_get_umd_metadata(const struct radeon_info *info,
       desc[5] |= S_008F24_META_DATA_ADDRESS(surf->dcc_offset >> 40);
       break;
    case GFX10:
+   case GFX10_3:
       desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
       desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->dcc_offset >> 8);
       desc[7] = surf->dcc_offset >> 16;
index ffcc1bd..8262a3a 100644 (file)
@@ -119,6 +119,7 @@ enum chip_class {
     GFX8,
     GFX9,
     GFX10,
+    GFX10_3,
 };
 
 enum ring_type {
index 08f111c..5c2b251 100644 (file)
   },
   {
    "chips": ["gfx10"],
+   "map": {"at": 165712, "to": "mm"},
+   "name": "SX_PS_DOWNCONVERT_CONTROL_GFX103",
+   "type_ref": "SX_PS_DOWNCONVERT_CONTROL"
+  },
+  {
+   "chips": ["gfx10"],
    "map": {"at": 165716, "to": "mm"},
    "name": "SX_PS_DOWNCONVERT",
    "type_ref": "SX_PS_DOWNCONVERT"
     {"bits": [14, 17], "name": "LOSSY_ALPHA_PRECISION"},
     {"bits": [18, 18], "name": "DISABLE_CONSTANT_ENCODE_REG"},
     {"bits": [19, 19], "name": "ENABLE_CONSTANT_ENCODE_REG_WRITE"},
-    {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"}
+    {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"},
+    {"bits": [21, 21], "name": "SKIP_LOW_COMP_RATIO_GFX103"},
+    {"bits": [22, 22], "name": "DCC_COMPRESS_DISABLE_GFX103"}
    ]
   },
   "CB_COLOR0_INFO": {
     {"bits": [21, 21], "name": "PRESERVE_ZRANGE"},
     {"bits": [22, 22], "name": "PRESERVE_SRESULTS"},
     {"bits": [23, 23], "name": "DISABLE_FAST_PASS"},
-    {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"}
+    {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"},
+    {"bits": [27, 28], "name": "CENTROID_COMPUTATION_MODE_GFX103"}
    ]
   },
   "DB_RMI_L2_CACHE_CONTROL": {
   "PA_CL_NGG_CNTL": {
    "fields": [
     {"bits": [0, 0], "name": "VERTEX_REUSE_OFF"},
-    {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"}
+    {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"},
+    {"bits": [2, 9], "name": "VERTEX_REUSE_DEPTH_GFX103"}
    ]
   },
   "PA_CL_OBJPRIM_ID_CNTL": {
     {"bits": [23, 23], "name": "VS_OUT_CCDIST1_VEC_ENA"},
     {"bits": [24, 24], "name": "VS_OUT_MISC_SIDE_BUS_ENA"},
     {"bits": [25, 25], "name": "USE_VTX_GS_CUT_FLAG"},
-    {"bits": [26, 26], "name": "USE_VTX_SHD_OBJPRIM_ID"},
-    {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"}
+    {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"},
+    {"bits": [29, 29], "name": "BYPASS_VTX_RATE_COMBINER_GFX103"},
+    {"bits": [30, 30], "name": "BYPASS_PRIM_RATE_COMBINER_GFX103"}
    ]
   },
   "PA_CL_VTE_CNTL": {
     {"bits": [13, 16], "name": "MAX_SAMPLE_DIST"},
     {"bits": [20, 22], "name": "MSAA_EXPOSED_SAMPLES"},
     {"bits": [24, 25], "name": "DETAIL_TO_EXPOSED_MODE"},
-    {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"}
+    {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"},
+    {"bits": [28, 28], "name": "SAMPLE_COVERAGE_ENCODING_GFX103"},
+    {"bits": [29, 29], "name": "COVERED_CENTROID_IS_CENTER_GFX103"}
    ]
   },
   "PA_SC_AA_MASK_X0Y0_X1Y0": {
     {"bits": [10, 19], "name": "PERFCOUNTER_SELECT3"}
    ]
   },
+  "SX_PS_DOWNCONVERT_CONTROL": {
+   "fields": [
+    {"bits": [0, 0], "name": "MRT0_FMT_MAPPING_DISABLE"},
+    {"bits": [1, 1], "name": "MRT1_FMT_MAPPING_DISABLE"},
+    {"bits": [2, 2], "name": "MRT2_FMT_MAPPING_DISABLE"},
+    {"bits": [3, 3], "name": "MRT3_FMT_MAPPING_DISABLE"},
+    {"bits": [4, 4], "name": "MRT4_FMT_MAPPING_DISABLE"},
+    {"bits": [5, 5], "name": "MRT5_FMT_MAPPING_DISABLE"},
+    {"bits": [6, 6], "name": "MRT6_FMT_MAPPING_DISABLE"},
+    {"bits": [7, 7], "name": "MRT7_FMT_MAPPING_DISABLE"}
+   ]
+  },
   "SX_PS_DOWNCONVERT": {
    "fields": [
     {"bits": [0, 3], "enum_ref": "SX_DOWNCONVERT_FORMAT", "name": "MRT0"},
   "VGT_HS_OFFCHIP_PARAM_UMD": {
    "fields": [
     {"bits": [0, 8], "name": "OFFCHIP_BUFFERING"},
-    {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"}
+    {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"},
+    {"bits": [0, 9], "name": "OFFCHIP_BUFFERING_GFX103"},
+    {"bits": [10, 11], "name": "OFFCHIP_GRANULARITY_GFX103"}
    ]
   },
   "VGT_INSTANCE_BASE_ID": {
index d6b3fc8..8825926 100644 (file)
@@ -1438,6 +1438,7 @@ void si_init_perfcounters(struct si_screen *screen)
       num_blocks = ARRAY_SIZE(groups_gfx9);
       break;
    case GFX10:
+   case GFX10_3:
       blocks = groups_gfx10;
       num_blocks = ARRAY_SIZE(groups_gfx10);
       break;
index 1d14442..7fdbfa2 100644 (file)
@@ -1088,7 +1088,11 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
    sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se;
    sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4;
 
-   if (sscreen->info.chip_class >= GFX7) {
+   if (sscreen->info.chip_class >= GFX10_3) {
+      sscreen->vgt_hs_offchip_param =
+            S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
+            S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
+   } else if (sscreen->info.chip_class >= GFX7) {
       if (sscreen->info.chip_class >= GFX8)
          --max_offchip_buffers;
       sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
@@ -1125,7 +1129,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
    /* Only enable primitive binning on APUs by default. */
    if (sscreen->info.chip_class >= GFX10) {
       sscreen->dpbb_allowed = true;
-      sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram;
+      /* DFSM is not supported on GFX 10.3 and not beneficial on Navi1x. */
    } else if (sscreen->info.chip_class == GFX9) {
       sscreen->dpbb_allowed = !sscreen->info.has_dedicated_vram;
       sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram;
index ecce673..b59f28e 100644 (file)
@@ -757,8 +757,9 @@ static void si_emit_clip_regs(struct si_context *sctx)
 
    unsigned initial_cdw = sctx->gfx_cs->current.cdw;
    unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
-                         S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask |
-                         (culldist_mask << 8);
+                         S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+                         S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
+                         clipdist_mask | (culldist_mask << 8);
 
    if (sctx->chip_class >= GFX10) {
       radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
@@ -1384,8 +1385,9 @@ static void si_emit_db_render_state(struct si_context *sctx)
    radeon_opt_set_context_reg(
       sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
       S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
-         S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
-         S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
+      S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
+      S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
+      S_028010_CENTROID_COMPUTATION_MODE_GFX103(sctx->chip_class >= GFX10_3 ? 2 : 0));
 
    db_shader_control = sctx->ps_db_shader_control;
 
@@ -3535,7 +3537,8 @@ static void si_emit_msaa_config(struct si_context *sctx)
       sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
       sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
                      S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
-                     S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples);
+                     S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
+                     S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(sctx->chip_class >= GFX10_3);
 
       if (sctx->framebuffer.nr_samples > 1) {
          db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
@@ -5329,6 +5332,7 @@ static void si_init_config(struct si_context *sctx)
        * a single primitive shader subgroup.
        */
       si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+      /* Reuse for legacy (non-NGG) only. */
       si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
 
       if (!has_clear_state) {
@@ -5370,6 +5374,9 @@ static void si_init_config(struct si_context *sctx)
                      S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
       si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
    }
+   if (sctx->chip_class >= GFX10_3) {
+      si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
+   }
 
    if (sctx->chip_class >= GFX9) {
       si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
index 0fd1714..520eead 100644 (file)
@@ -1217,7 +1217,9 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
     * this.
     */
    shader->ctx_reg.ngg.pa_cl_ngg_cntl =
-      S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX);
+      S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX) |
+      /* Reuse for NGG. */
+      S_028838_VERTEX_REUSE_DEPTH_GFX103(sscreen->info.chip_class >= GFX10_3 ? 30 : 0);
    shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
 
    /* Oversubscribe PC. This improves performance when there are too many varyings. */