drm/amd/display: Update MBLK calculation for SubVP
authorAlvin Lee <Alvin.Lee2@amd.com>
Tue, 23 Aug 2022 21:14:03 +0000 (17:14 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 13 Sep 2022 18:26:59 +0000 (14:26 -0400)
[Description]
Update MBLK calculation according to hardware doc.  For DCC case we were
not allocation enough MALL due to an inaccurate MBLK calculation.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Jun Lei <Jun.Lei@amd.com>
Acked-by: Pavle Kotarac <Pavle.Kotarac@amd.com>
Signed-off-by: Alvin Lee <Alvin.Lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c

index 1e7e620..cf15d0e 100644 (file)
@@ -30,6 +30,9 @@
 
 #define DCN3_2_DET_SEG_SIZE 64
 #define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024
+#define DCN3_2_MBLK_WIDTH 128
+#define DCN3_2_MBLK_HEIGHT_4BPE 128
+#define DCN3_2_MBLK_HEIGHT_8BPE 64
 
 #define TO_DCN32_RES_POOL(pool)\
        container_of(pool, struct dcn32_resource_pool, base)
index ab918fe..1f195c5 100644 (file)
@@ -46,7 +46,6 @@
 uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_state *context)
 {
        uint32_t num_ways = 0;
-       uint32_t mall_region_pixels = 0;
        uint32_t bytes_per_pixel = 0;
        uint32_t cache_lines_used = 0;
        uint32_t lines_per_way = 0;
@@ -54,20 +53,64 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
        uint32_t bytes_in_mall = 0;
        uint32_t num_mblks = 0;
        uint32_t cache_lines_per_plane = 0;
-       uint32_t i = 0;
+       uint32_t i = 0, j = 0;
+       uint32_t mblk_width = 0;
+       uint32_t mblk_height = 0;
+       uint32_t full_vp_width_blk_aligned = 0;
+       uint32_t full_vp_height_blk_aligned = 0;
+       uint32_t mall_alloc_width_blk_aligned = 0;
+       uint32_t mall_alloc_height_blk_aligned = 0;
+       uint32_t full_vp_height = 0;
 
        for (i = 0; i < dc->res_pool->pipe_count; i++) {
                struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
                // Find the phantom pipes
-               if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
+               if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe &&
                                pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-                       bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
-                       mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable;
+                       struct pipe_ctx *main_pipe = NULL;
+
+                       /* Get full viewport height from main pipe (required for MBLK calculation) */
+                       for (j = 0; j < dc->res_pool->pipe_count; j++) {
+                               main_pipe = &context->res_ctx.pipe_ctx[j];
+                               if (main_pipe->stream == pipe->stream->mall_stream_config.paired_stream) {
+                                       full_vp_height = main_pipe->plane_res.scl_data.viewport.height;
+                                       break;
+                               }
+                       }
 
-                       // For bytes required in MALL, calculate based on number of MBlks required
-                       num_mblks = (mall_region_pixels * bytes_per_pixel +
-                                       DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES;
+                       bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
+                       mblk_width = DCN3_2_MBLK_WIDTH;
+                       mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : DCN3_2_MBLK_HEIGHT_8BPE;
+
+                       /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) -
+                        * FLOOR(vp_x_start, blk_width)
+                        */
+                       full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
+                                       pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) +
+                                       (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
+
+                       /* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
+                        * FLOOR(vp_y_start, blk_height)
+                        */
+                       full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
+                                       full_vp_height + mblk_height - 1) / mblk_height * mblk_height) +
+                                       (pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
+
+                       /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */
+                       mall_alloc_width_blk_aligned = full_vp_width_blk_aligned;
+
+                       /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */
+                       mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) /
+                                       mblk_height * mblk_height + mblk_height;
+
+                       /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c;
+                        * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c;
+                        * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c);
+                        * (Should be divisible, but round up if not)
+                        */
+                       num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
+                                       ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
                        bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
                        // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
                        // (MALL is 64-byte aligned)