radv: add support for Sienna Cichlid
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 8 Jun 2020 16:16:13 +0000 (18:16 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 19 Jun 2020 06:18:41 +0000 (08:18 +0200)
Bits copied from RadeonSI. Totally untested.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5389>

src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_image.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/si_cmd_buffer.c
src/amd/vulkan/winsys/null/radv_null_winsys.c

index f9afd72..07d6689 100644 (file)
@@ -3679,7 +3679,10 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
        }
 
        *max_offchip_buffers_p = max_offchip_buffers;
-       if (device->physical_device->rad_info.chip_class >= GFX7) {
+       if (device->physical_device->rad_info.chip_class >= GFX10_3) {
+               hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
+                                  S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
+       } else if (device->physical_device->rad_info.chip_class >= GFX7) {
                if (device->physical_device->rad_info.chip_class >= GFX8)
                        --max_offchip_buffers;
                hs_offchip_param =
index 8c65eb6..1cbe60a 100644 (file)
@@ -833,11 +833,27 @@ gfx10_make_texture_descriptor(struct radv_device *device,
                                        last_level) |
                   S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
                   S_00A00C_TYPE(type);
-       /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
-        * to know the total number of layers.
-        */
-       state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
-                  S_00A010_BASE_ARRAY(first_layer);
+
+       if (type == V_008F1C_SQ_RSRC_IMG_1D ||
+           type == V_008F1C_SQ_RSRC_IMG_2D ||
+           type == V_008F1C_SQ_RSRC_IMG_2D_MSAA) {
+               /* 1D, 2D, and 2D_MSAA can set a custom pitch for shader
+                * resources starting with gfx10.3 (ignored if pitch <=
+                * width). Other texture targets can't. CB and DB can't set a
+                * custom pitch for any target.
+                * */
+               if (device->physical_device->rad_info.chip_class >= GFX10_3)
+                       state[4] = S_00A010_DEPTH(image->planes[0].surface.u.gfx9.surf_pitch - 1);
+               else
+                       state[4] = 0;
+       } else {
+               /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
+                * to know the total number of layers.
+                */
+               state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
+                          S_00A010_BASE_ARRAY(first_layer);
+       }
+
        state[5] = S_00A014_ARRAY_PITCH(0) |
                   S_00A014_MAX_MIP(image->info.samples > 1 ?
                                    util_logbase2(image->info.samples) :
index fcc23aa..312611a 100644 (file)
@@ -1170,7 +1170,8 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
                        S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
                ms->pa_sc_aa_config |= S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
                        S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) |
-                       S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+                       S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */
+                       S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3);
                ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
                if (ps_iter_samples > 1)
                        pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
@@ -3589,6 +3590,9 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
 
                /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
                db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2);
+
+               if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
+                       db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE_GFX103(2);
        }
 
        if (has_stencil_attachment && vkds && vkds->stencilTestEnable) {
@@ -3884,6 +3888,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
                               S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
                               S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
                               S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+                              S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) |
                               cull_dist_mask << 8 |
                               clip_dist_mask);
 
@@ -4003,6 +4008,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
                               S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
                               S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
                               S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+                              S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) |
                               cull_dist_mask << 8 |
                               clip_dist_mask);
 
@@ -4041,7 +4047,9 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
         */
        radeon_set_context_reg(ctx_cs, R_028838_PA_CL_NGG_CNTL,
                               S_028838_INDEX_BUF_EDGE_FLAG_ENA(!radv_pipeline_has_tess(pipeline) &&
-                                                               !radv_pipeline_has_gs(pipeline)));
+                                                               !radv_pipeline_has_gs(pipeline)) |
+                              /* Reuse for NGG. */
+                              S_028838_VERTEX_REUSE_DEPTH_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 30 : 0));
 
        ge_cntl = S_03096C_PRIM_GRP_SIZE(ngg_state->max_gsprims) |
                  S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
index bef579f..d53aaf3 100644 (file)
@@ -418,6 +418,10 @@ si_emit_graphics(struct radv_device *device,
                                  S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
                radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
 
+               if (physical_device->rad_info.chip_class >= GFX10_3) {
+                       radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
+               }
+
                if (physical_device->rad_info.chip_class == GFX10) {
                        /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
index cb9a10d..462ed8b 100644 (file)
@@ -80,7 +80,9 @@ static void radv_null_winsys_query_info(struct radeon_winsys *rws,
                        info->family = i;
                        info->name = "OVERRIDDEN";
 
-                       if (i >= CHIP_NAVI10)
+                       if (i >= CHIP_SIENNA)
+                               info->chip_class = GFX10_3;
+                       else if (i >= CHIP_NAVI10)
                                info->chip_class = GFX10;
                        else if (i >= CHIP_VEGA10)
                                info->chip_class = GFX9;
@@ -101,7 +103,9 @@ static void radv_null_winsys_query_info(struct radeon_winsys *rws,
        info->pci_id = gpu_info[info->family].pci_id;
        info->has_syncobj_wait_for_submit = true;
        info->max_se = 4;
-       if (info->chip_class >= GFX10)
+       if (info->chip_class >= GFX10_3)
+               info->max_wave64_per_simd = 16;
+       else if (info->chip_class >= GFX10)
                info->max_wave64_per_simd = 20;
        else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
                info->max_wave64_per_simd = 8;