drm/amd/display: add pixel rate based CRB allocation support
authorDmytro Laktyushkin <Dmytro.Laktyushkin@amd.com>
Fri, 9 Dec 2022 20:13:18 +0000 (15:13 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 3 Aug 2023 08:23:52 +0000 (10:23 +0200)
[ Upstream commit 9ba90d760e9354c124fa9bbea08017d96699a82c ]

This feature is meant to unblock PSTATE for certain high end display
configs on dcn315. This is achieved by allocating CRB to detile buffer
based on display requirements to meet pstate latency hiding needs.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Charlene Liu <Charlene.Liu@amd.com>
Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Stable-dep-of: 49f26218c344 ("drm/amd/display: fix dcn315 single stream crb allocation")
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c

index 6360dc9502e7039db13eea60e4e1c96f996bfd44..0f231e42e42061b56b02a12ef551d52ae4dde5f4 100644 (file)
@@ -103,6 +103,7 @@ static void dcn31_program_det_size(struct hubbub *hubbub, int hubp_inst, unsigne
        default:
                break;
        }
+       DC_LOG_DEBUG("Set DET%d to %d segments\n", hubp_inst, det_size_segments);
        /* Should never be hit, if it is we have an erroneous hw config*/
        ASSERT(hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size
                        + hubbub2->det3_size + hubbub2->compbuf_size_segments <= hubbub2->crb_size_segs);
index 19f2025cb79074520d7a197ee72241820465702b..88c4a378daa12ae32fa9af5f296279eb845c8faa 100644 (file)
 
 #define DCN3_15_MAX_DET_SIZE 384
 #define DCN3_15_CRB_SEGMENT_SIZE_KB 64
+#define DCN3_15_MAX_DET_SEGS (DCN3_15_MAX_DET_SIZE / DCN3_15_CRB_SEGMENT_SIZE_KB)
+/* Minimum 2 extra segments need to be in compbuf and claimable to guarantee seamless mpo transitions */
+#define MIN_RESERVED_DET_SEGS 2
 
 enum dcn31_clk_src_array_id {
        DCN31_CLK_SRC_PLL0,
@@ -1636,21 +1639,57 @@ static bool is_dual_plane(enum surface_pixel_format format)
        return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
 }
 
+static int source_format_to_bpp (enum source_format_class SourcePixelFormat)
+{
+       if (SourcePixelFormat == dm_444_64)
+               return 8;
+       else if (SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_16)
+               return 2;
+       else if (SourcePixelFormat == dm_444_8)
+               return 1;
+       else if (SourcePixelFormat == dm_rgbe_alpha)
+               return 5;
+       else if (SourcePixelFormat == dm_420_8)
+               return 3;
+       else if (SourcePixelFormat == dm_420_12)
+               return 6;
+       else
+               return 4;
+}
+
+static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context)
+{
+       int i;
+       struct resource_context *res_ctx = &context->res_ctx;
+
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               if (!res_ctx->pipe_ctx[i].stream)
+                       continue;
+
+               /*Don't apply if MPO to avoid transition issues*/
+               if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state != res_ctx->pipe_ctx[i].plane_state)
+                       return false;
+       }
+       return true;
+}
+
 static int dcn315_populate_dml_pipes_from_context(
        struct dc *dc, struct dc_state *context,
        display_e2e_pipe_params_st *pipes,
        bool fast_validate)
 {
-       int i, pipe_cnt;
+       int i, pipe_cnt, crb_idx, crb_pipes;
        struct resource_context *res_ctx = &context->res_ctx;
        struct pipe_ctx *pipe;
        const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB;
+       int remaining_det_segs = max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB;
+       bool pixel_rate_crb = allow_pixel_rate_crb(dc, context);
 
        DC_FP_START();
        dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
        DC_FP_END();
 
-       for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+       for (i = 0, pipe_cnt = 0, crb_pipes = 0; i < dc->res_pool->pipe_count; i++) {
                struct dc_crtc_timing *timing;
 
                if (!res_ctx->pipe_ctx[i].stream)
@@ -1672,6 +1711,23 @@ static int dcn315_populate_dml_pipes_from_context(
                pipes[pipe_cnt].dout.dsc_input_bpc = 0;
                DC_FP_START();
                dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+               if (pixel_rate_crb && !pipe->top_pipe && !pipe->prev_odm_pipe) {
+                       int bpp = source_format_to_bpp(pipes[pipe_cnt].pipe.src.source_format);
+                       /* Ceil to crb segment size */
+                       int approx_det_segs_required_for_pstate = dcn_get_approx_det_segs_required_for_pstate(
+                                       &context->bw_ctx.dml.soc, timing->pix_clk_100hz, bpp, DCN3_15_CRB_SEGMENT_SIZE_KB);
+                       if (approx_det_segs_required_for_pstate <= 2 * DCN3_15_MAX_DET_SEGS) {
+                               bool split_required = approx_det_segs_required_for_pstate > DCN3_15_MAX_DET_SEGS;
+                               split_required = split_required || timing->pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc);
+                               split_required = split_required || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120);
+                               if (split_required)
+                                       approx_det_segs_required_for_pstate += approx_det_segs_required_for_pstate % 2;
+                               pipes[pipe_cnt].pipe.src.det_size_override = approx_det_segs_required_for_pstate;
+                               remaining_det_segs -= approx_det_segs_required_for_pstate;
+                       } else
+                               remaining_det_segs = -1;
+                       crb_pipes++;
+               }
                DC_FP_END();
 
                if (pipes[pipe_cnt].dout.dsc_enable) {
@@ -1690,16 +1746,49 @@ static int dcn315_populate_dml_pipes_from_context(
                                break;
                        }
                }
-
                pipe_cnt++;
        }
 
+       /* Spread remaining unreserved crb evenly among all pipes, use default policy if not enough det or single pipe */
+       if (pixel_rate_crb) {
+               for (i = 0, pipe_cnt = 0, crb_idx = 0; i < dc->res_pool->pipe_count; i++) {
+                       pipe = &res_ctx->pipe_ctx[i];
+                       if (!pipe->stream)
+                               continue;
+
+                       if (!pipe->top_pipe && !pipe->prev_odm_pipe) {
+                               bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc)
+                                               || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120);
+
+                               if (remaining_det_segs < 0 || crb_pipes == 1)
+                                       pipes[pipe_cnt].pipe.src.det_size_override = 0;
+                               if (remaining_det_segs > MIN_RESERVED_DET_SEGS)
+                                       pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes +
+                                                       (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0);
+                               if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) {
+                                       /* Clamp to 2 pipe split max det segments */
+                                       remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS);
+                                       pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS;
+                               }
+                               if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) {
+                                       /* If we are splitting we must have an even number of segments */
+                                       remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2;
+                                       pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2;
+                               }
+                               /* Convert segments into size for DML use */
+                               pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB;
+                               crb_idx++;
+                       }
+                       pipe_cnt++;
+               }
+       }
+
        if (pipe_cnt)
                context->bw_ctx.dml.ip.det_buffer_size_kbytes =
                                (max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB / pipe_cnt) * DCN3_15_CRB_SEGMENT_SIZE_KB;
        if (context->bw_ctx.dml.ip.det_buffer_size_kbytes > DCN3_15_MAX_DET_SIZE)
                context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_15_MAX_DET_SIZE;
-       ASSERT(context->bw_ctx.dml.ip.det_buffer_size_kbytes >= DCN3_15_DEFAULT_DET_SIZE);
+
        dc->config.enable_4to1MPC = false;
        if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
                if (is_dual_plane(pipe->plane_state->format)
index e48923f314b36fb8e6e0aff1a6afe7f0598db24f..0c0feec88e4f133986cb6f82d66e84461cf7056c 100644 (file)
@@ -483,7 +483,7 @@ void dcn31_calculate_wm_and_dlg_fp(
                int pipe_cnt,
                int vlevel)
 {
-       int i, pipe_idx, active_hubp_count = 0;
+       int i, pipe_idx, total_det = 0, active_hubp_count = 0;
        double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
 
        dc_assert_fp_enabled();
@@ -560,6 +560,18 @@ void dcn31_calculate_wm_and_dlg_fp(
                context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
                context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
        }
+       for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+               if (!context->res_ctx.pipe_ctx[i].stream)
+                       continue;
+
+               context->res_ctx.pipe_ctx[i].det_buffer_size_kb =
+                               get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+               if (context->res_ctx.pipe_ctx[i].det_buffer_size_kb > 384)
+                       context->res_ctx.pipe_ctx[i].det_buffer_size_kb /= 2;
+               total_det += context->res_ctx.pipe_ctx[i].det_buffer_size_kb;
+               pipe_idx++;
+       }
+       context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - total_det;
 }
 
 void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
@@ -812,3 +824,14 @@ int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc)
 {
        return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0);
 }
+
+int dcn_get_approx_det_segs_required_for_pstate(
+               struct _vcs_dpi_soc_bounding_box_st *soc,
+               int pix_clk_100hz, int bpp, int seg_size_kb)
+{
+       /* Roughly calculate required crb to hide latency. In practice there is slightly
+        * more buffer available for latency hiding
+        */
+       return (int)(soc->dram_clock_change_latency_us * pix_clk_100hz * bpp
+                                       / 10240000 + seg_size_kb - 1) / seg_size_kb;
+}
index ab8c48b8b7e053d1c33eeaca6199d248f37b8aef..99518f64d83dd169986e0be8955491b581da0e7f 100644 (file)
@@ -47,5 +47,8 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
 void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
 void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
 int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc);
+int dcn_get_approx_det_segs_required_for_pstate(
+               struct _vcs_dpi_soc_bounding_box_st *soc,
+               int pix_clk_100hz, int bpp, int seg_size_kb);
 
 #endif /* __DCN31_FPU_H__*/
index cf8f3d690fa66520f7172d7b1034accc33e362ba..ebc04b72b284b24871d1324c54fa9e8b67b12f7b 100644 (file)
@@ -533,7 +533,8 @@ static void CalculateStutterEfficiency(
 static void CalculateSwathAndDETConfiguration(
                bool ForceSingleDPP,
                int NumberOfActivePlanes,
-               unsigned int DETBufferSizeInKByte,
+               bool DETSharedByAllDPP,
+               unsigned int DETBufferSizeInKByte[],
                double MaximumSwathWidthLuma[],
                double MaximumSwathWidthChroma[],
                enum scan_direction_class SourceScan[],
@@ -3116,7 +3117,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                                v->SurfaceWidthC[k],
                                v->SurfaceHeightY[k],
                                v->SurfaceHeightC[k],
-                               v->DETBufferSizeInKByte[0] * 1024,
+                               v->DETBufferSizeInKByte[k] * 1024,
                                v->BlockHeight256BytesY[k],
                                v->BlockHeight256BytesC[k],
                                v->SurfaceTiling[k],
@@ -3311,7 +3312,8 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
        CalculateSwathAndDETConfiguration(
                        false,
                        v->NumberOfActivePlanes,
-                       v->DETBufferSizeInKByte[0],
+                       mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
+                       v->DETBufferSizeInKByte,
                        dummy1,
                        dummy2,
                        v->SourceScan,
@@ -3777,14 +3779,16 @@ static noinline void CalculatePrefetchSchedulePerPlane(
                &v->VReadyOffsetPix[k]);
 }
 
-static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int *DETBufferSizeInKByte)
+static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[])
 {
        int i, total_pipes = 0;
        for (i = 0; i < NumberOfActivePlanes; i++)
                total_pipes += NoOfDPPThisState[i];
-       *DETBufferSizeInKByte = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
-       if (*DETBufferSizeInKByte > DCN3_15_MAX_DET_SIZE)
-               *DETBufferSizeInKByte = DCN3_15_MAX_DET_SIZE;
+       DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
+       if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE)
+               DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE;
+       for (i = 1; i < NumberOfActivePlanes; i++)
+               DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0];
 }
 
 
@@ -4024,7 +4028,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
        CalculateSwathAndDETConfiguration(
                        true,
                        v->NumberOfActivePlanes,
-                       v->DETBufferSizeInKByte[0],
+                       mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
+                       v->DETBufferSizeInKByte,
                        v->MaximumSwathWidthLuma,
                        v->MaximumSwathWidthChroma,
                        v->SourceScan,
@@ -4164,6 +4169,10 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                                || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
                                        v->DISPCLK_DPPCLK_Support[i][j] = false;
                                }
+                               if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) {
+                                       v->MPCCombine[i][j][k] = true;
+                                       v->NoOfDPP[i][j][k] = 2;
+                               }
                        }
                        v->TotalNumberOfActiveDPP[i][j] = 0;
                        v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
@@ -4640,12 +4649,13 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
                        }
 
-                       if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315)
-                               PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, &v->DETBufferSizeInKByte[0]);
+                       if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0])
+                               PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte);
                        CalculateSwathAndDETConfiguration(
                                        false,
                                        v->NumberOfActivePlanes,
-                                       v->DETBufferSizeInKByte[0],
+                                       mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
+                                       v->DETBufferSizeInKByte,
                                        v->MaximumSwathWidthLuma,
                                        v->MaximumSwathWidthChroma,
                                        v->SourceScan,
@@ -6557,7 +6567,8 @@ static void CalculateStutterEfficiency(
 static void CalculateSwathAndDETConfiguration(
                bool ForceSingleDPP,
                int NumberOfActivePlanes,
-               unsigned int DETBufferSizeInKByte,
+               bool DETSharedByAllDPP,
+               unsigned int DETBufferSizeInKByteA[],
                double MaximumSwathWidthLuma[],
                double MaximumSwathWidthChroma[],
                enum scan_direction_class SourceScan[],
@@ -6641,6 +6652,10 @@ static void CalculateSwathAndDETConfiguration(
 
        *ViewportSizeSupport = true;
        for (k = 0; k < NumberOfActivePlanes; ++k) {
+               unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k];
+
+               if (DETSharedByAllDPP && DPPPerPlane[k])
+                       DETBufferSizeInKByte /= DPPPerPlane[k];
                if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
                                || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
                        if (SurfaceTiling[k] == dm_sw_linear
index 8e6585dab20ef31158cd28f47378f9a805529946..1070cf8701960bda820e986afc81d49c5ea0c466 100644 (file)
@@ -569,6 +569,10 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
                mode_lib->vba.OutputLinkDPRate[mode_lib->vba.NumberOfActivePlanes] = dout->dp_rate;
                mode_lib->vba.ODMUse[mode_lib->vba.NumberOfActivePlanes] = dst->odm_combine_policy;
                mode_lib->vba.DETSizeOverride[mode_lib->vba.NumberOfActivePlanes] = src->det_size_override;
+               if (src->det_size_override)
+                       mode_lib->vba.DETBufferSizeInKByte[mode_lib->vba.NumberOfActivePlanes] = src->det_size_override;
+               else
+                       mode_lib->vba.DETBufferSizeInKByte[mode_lib->vba.NumberOfActivePlanes] = ip->det_buffer_size_kbytes;
                //TODO: Need to assign correct values to dp_multistream vars
                mode_lib->vba.OutputMultistreamEn[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_en;
                mode_lib->vba.OutputMultistreamId[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_id;
@@ -783,6 +787,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
                                        mode_lib->vba.pipe_plane[k] =
                                                        mode_lib->vba.NumberOfActivePlanes;
                                        mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes]++;
+                                       if (src_k->det_size_override)
+                                               mode_lib->vba.DETBufferSizeInKByte[mode_lib->vba.NumberOfActivePlanes] = src_k->det_size_override;
                                        if (mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes]
                                                        == dm_horz) {
                                                mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] +=