2 * Copyright 2020 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
26 #ifdef CONFIG_DRM_AMD_DC_DCN
29 #include "../display_mode_lib.h"
30 #include "display_mode_vba_30.h"
31 #include "../dml_inline_defs.h"
36 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
38 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
39 * ways. Unless there is something clearly wrong with it the code should
40 * remain as-is as it provides us with a guarantee from HW that it is correct.
48 double DCFCLKDeepSleep;
49 unsigned int DPPPerPlane;
51 enum scan_direction_class SourceScan;
52 unsigned int BlockWidth256BytesY;
53 unsigned int BlockHeight256BytesY;
54 unsigned int BlockWidth256BytesC;
55 unsigned int BlockHeight256BytesC;
56 unsigned int InterlaceEnable;
57 unsigned int NumberOfCursors;
60 unsigned int DCCEnable;
61 bool ODMCombineEnabled;
65 #define BPP_BLENDED_PIPE 0xffffffff
66 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
67 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
69 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
70 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
71 struct display_mode_lib *mode_lib);
72 static unsigned int dscceComputeDelay(
75 unsigned int sliceWidth,
76 unsigned int numSlices,
77 enum output_format_class pixelFormat,
78 enum output_encoder_class Output);
79 static unsigned int dscComputeDelay(
80 enum output_format_class pixelFormat,
81 enum output_encoder_class Output);
82 // Super monster function with some 45 argument
83 static bool CalculatePrefetchSchedule(
84 struct display_mode_lib *mode_lib,
85 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
86 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
88 unsigned int DSCDelay,
89 double DPPCLKDelaySubtotalPlusCNVCFormater,
90 double DPPCLKDelaySCL,
91 double DPPCLKDelaySCLLBOnly,
92 double DPPCLKDelayCNVCCursor,
93 double DISPCLKDelaySubtotal,
94 unsigned int DPP_RECOUT_WIDTH,
95 enum output_format_class OutputFormat,
96 unsigned int MaxInterDCNTileRepeaters,
97 unsigned int VStartup,
98 unsigned int MaxVStartup,
99 unsigned int GPUVMPageTableLevels,
102 unsigned int HostVMMaxNonCachedPageTableLevels,
103 double HostVMMinPageSize,
104 bool DynamicMetadataEnable,
105 bool DynamicMetadataVMEnabled,
106 int DynamicMetadataLinesBeforeActiveRequired,
107 unsigned int DynamicMetadataTransmittedBytes,
108 double UrgentLatency,
109 double UrgentExtraLatency,
111 unsigned int PDEAndMetaPTEBytesFrame,
112 unsigned int MetaRowByte,
113 unsigned int PixelPTEBytesPerRow,
114 double PrefetchSourceLinesY,
115 unsigned int SwathWidthY,
117 double VInitPreFillY,
118 unsigned int MaxNumSwathY,
119 double PrefetchSourceLinesC,
120 unsigned int SwathWidthC,
122 double VInitPreFillC,
123 unsigned int MaxNumSwathC,
124 long swath_width_luma_ub,
125 long swath_width_chroma_ub,
126 unsigned int SwathHeightY,
127 unsigned int SwathHeightC,
129 bool ProgressiveToInterlaceUnitInOPP,
130 double *DSTXAfterScaler,
131 double *DSTYAfterScaler,
132 double *DestinationLinesForPrefetch,
133 double *PrefetchBandwidth,
134 double *DestinationLinesToRequestVMInVBlank,
135 double *DestinationLinesToRequestRowInVBlank,
136 double *VRatioPrefetchY,
137 double *VRatioPrefetchC,
138 double *RequiredPrefetchPixDataBWLuma,
139 double *RequiredPrefetchPixDataBWChroma,
140 bool *NotEnoughTimeForDynamicMetadata,
142 double *prefetch_vmrow_bw,
145 unsigned int *VUpdateOffsetPix,
146 double *VUpdateWidthPix,
147 double *VReadyOffsetPix);
148 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
149 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
150 static void CalculateDCCConfiguration(
152 bool DCCProgrammingAssumesScanDirectionUnknown,
153 enum source_format_class SourcePixelFormat,
154 unsigned int ViewportWidthLuma,
155 unsigned int ViewportWidthChroma,
156 unsigned int ViewportHeightLuma,
157 unsigned int ViewportHeightChroma,
158 double DETBufferSize,
159 unsigned int RequestHeight256ByteLuma,
160 unsigned int RequestHeight256ByteChroma,
161 enum dm_swizzle_mode TilingFormat,
162 unsigned int BytePerPixelY,
163 unsigned int BytePerPixelC,
164 double BytePerPixelDETY,
165 double BytePerPixelDETC,
166 enum scan_direction_class ScanOrientation,
167 unsigned int *MaxUncompressedBlockLuma,
168 unsigned int *MaxUncompressedBlockChroma,
169 unsigned int *MaxCompressedBlockLuma,
170 unsigned int *MaxCompressedBlockChroma,
171 unsigned int *IndependentBlockLuma,
172 unsigned int *IndependentBlockChroma);
173 static double CalculatePrefetchSourceLines(
174 struct display_mode_lib *mode_lib,
178 bool ProgressiveToInterlaceUnitInOPP,
179 unsigned int SwathHeight,
180 unsigned int ViewportYStart,
181 double *VInitPreFill,
182 unsigned int *MaxNumSwath);
183 static unsigned int CalculateVMAndRowBytes(
184 struct display_mode_lib *mode_lib,
186 unsigned int BlockHeight256Bytes,
187 unsigned int BlockWidth256Bytes,
188 enum source_format_class SourcePixelFormat,
189 unsigned int SurfaceTiling,
190 unsigned int BytePerPixel,
191 enum scan_direction_class ScanDirection,
192 unsigned int SwathWidth,
193 unsigned int ViewportHeight,
196 unsigned int HostVMMaxNonCachedPageTableLevels,
197 unsigned int GPUVMMinPageSize,
198 unsigned int HostVMMinPageSize,
199 unsigned int PTEBufferSizeInRequests,
201 unsigned int DCCMetaPitch,
202 unsigned int *MacroTileWidth,
203 unsigned int *MetaRowByte,
204 unsigned int *PixelPTEBytesPerRow,
205 bool *PTEBufferSizeNotExceeded,
206 unsigned int *dpte_row_width_ub,
207 unsigned int *dpte_row_height,
208 unsigned int *MetaRequestWidth,
209 unsigned int *MetaRequestHeight,
210 unsigned int *meta_row_width,
211 unsigned int *meta_row_height,
212 unsigned int *vm_group_bytes,
213 unsigned int *dpte_group_bytes,
214 unsigned int *PixelPTEReqWidth,
215 unsigned int *PixelPTEReqHeight,
216 unsigned int *PTERequestSize,
217 unsigned int *DPDE0BytesFrame,
218 unsigned int *MetaPTEBytesFrame);
219 static double CalculateTWait(
220 unsigned int PrefetchMode,
221 double DRAMClockChangeLatency,
222 double UrgentLatency,
223 double SREnterPlusExitTime);
224 static void CalculateRowBandwidth(
226 enum source_format_class SourcePixelFormat,
231 unsigned int MetaRowByteLuma,
232 unsigned int MetaRowByteChroma,
233 unsigned int meta_row_height_luma,
234 unsigned int meta_row_height_chroma,
235 unsigned int PixelPTEBytesPerRowLuma,
236 unsigned int PixelPTEBytesPerRowChroma,
237 unsigned int dpte_row_height_luma,
238 unsigned int dpte_row_height_chroma,
240 double *dpte_row_bw);
241 static void CalculateFlipSchedule(
242 struct display_mode_lib *mode_lib,
243 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
244 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
245 double UrgentExtraLatency,
246 double UrgentLatency,
247 unsigned int GPUVMMaxPageTableLevels,
249 unsigned int HostVMMaxNonCachedPageTableLevels,
251 double HostVMMinPageSize,
252 double PDEAndMetaPTEBytesPerFrame,
254 double DPTEBytesPerRow,
255 double BandwidthAvailableForImmediateFlip,
256 unsigned int TotImmediateFlipBytes,
257 enum source_format_class SourcePixelFormat,
263 unsigned int dpte_row_height,
264 unsigned int meta_row_height,
265 unsigned int dpte_row_height_chroma,
266 unsigned int meta_row_height_chroma,
267 double *DestinationLinesToRequestVMInImmediateFlip,
268 double *DestinationLinesToRequestRowInImmediateFlip,
269 double *final_flip_bw,
270 bool *ImmediateFlipSupportedForPipe);
271 static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 long WritebackDestinationWidth,
277 long WritebackDestinationHeight,
278 long WritebackSourceHeight,
279 unsigned int HTotal);
280 static void CalculateDynamicMetadataParameters(
281 int MaxInterDCNTileRepeaters,
284 double DCFClkDeepSleep,
288 long DynamicMetadataTransmittedBytes,
289 long DynamicMetadataLinesBeforeActiveRequired,
291 bool ProgressiveToInterlaceUnitInOPP,
296 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
297 struct display_mode_lib *mode_lib,
298 unsigned int PrefetchMode,
299 unsigned int NumberOfActivePlanes,
300 unsigned int MaxLineBufferLines,
301 unsigned int LineBufferSize,
302 unsigned int DPPOutputBufferPixels,
303 unsigned int DETBufferSizeInKByte,
304 unsigned int WritebackInterfaceBufferSize,
308 unsigned int dpte_group_bytes[],
309 unsigned int MetaChunkSize,
310 double UrgentLatency,
312 double WritebackLatency,
313 double WritebackChunkSize,
315 double DRAMClockChangeLatency,
317 double SREnterPlusExitTime,
318 double DCFCLKDeepSleep,
319 unsigned int DPPPerPlane[],
322 unsigned int DETBufferSizeY[],
323 unsigned int DETBufferSizeC[],
324 unsigned int SwathHeightY[],
325 unsigned int SwathHeightC[],
326 unsigned int LBBitPerPixel[],
327 double SwathWidthY[],
328 double SwathWidthC[],
330 double HRatioChroma[],
331 unsigned int vtaps[],
332 unsigned int VTAPsChroma[],
334 double VRatioChroma[],
335 unsigned int HTotal[],
337 unsigned int BlendingAndTiming[],
338 double BytePerPixelDETY[],
339 double BytePerPixelDETC[],
340 double DSTXAfterScaler[],
341 double DSTYAfterScaler[],
342 bool WritebackEnable[],
343 enum source_format_class WritebackPixelFormat[],
344 double WritebackDestinationWidth[],
345 double WritebackDestinationHeight[],
346 double WritebackSourceHeight[],
347 enum clock_change_support *DRAMClockChangeSupport,
348 double *UrgentWatermark,
349 double *WritebackUrgentWatermark,
350 double *DRAMClockChangeWatermark,
351 double *WritebackDRAMClockChangeWatermark,
352 double *StutterExitWatermark,
353 double *StutterEnterPlusExitWatermark,
354 double *MinActiveDRAMClockChangeLatencySupported);
355 static void CalculateDCFCLKDeepSleep(
356 struct display_mode_lib *mode_lib,
357 unsigned int NumberOfActivePlanes,
361 double VRatioChroma[],
362 double SwathWidthY[],
363 double SwathWidthC[],
364 unsigned int DPPPerPlane[],
366 double HRatioChroma[],
368 double PSCL_THROUGHPUT[],
369 double PSCL_THROUGHPUT_CHROMA[],
371 double ReadBandwidthLuma[],
372 double ReadBandwidthChroma[],
374 double *DCFCLKDeepSleep);
375 static void CalculateUrgentBurstFactor(
376 long swath_width_luma_ub,
377 long swath_width_chroma_ub,
378 unsigned int DETBufferSizeInKByte,
379 unsigned int SwathHeightY,
380 unsigned int SwathHeightC,
382 double UrgentLatency,
383 double CursorBufferSize,
384 unsigned int CursorWidth,
385 unsigned int CursorBPP,
388 double BytePerPixelInDETY,
389 double BytePerPixelInDETC,
390 double DETBufferSizeY,
391 double DETBufferSizeC,
392 double *UrgentBurstFactorCursor,
393 double *UrgentBurstFactorLuma,
394 double *UrgentBurstFactorChroma,
395 bool *NotEnoughUrgentLatencyHiding);
397 static void UseMinimumDCFCLK(
398 struct display_mode_lib *mode_lib,
399 int MaxInterDCNTileRepeaters,
401 double FinalDRAMClockChangeLatency,
402 double SREnterPlusExitTime,
404 int RoundTripPingLatencyCycles,
406 int PixelChunkSizeInKByte,
409 int GPUVMMaxPageTableLevels,
411 int NumberOfActivePlanes,
412 double HostVMMinPageSize,
413 int HostVMMaxNonCachedPageTableLevels,
414 bool DynamicMetadataVMEnabled,
415 enum immediate_flip_requirement ImmediateFlipRequirement,
416 bool ProgressiveToInterlaceUnitInOPP,
417 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
418 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
419 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
420 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
423 int DynamicMetadataTransmittedBytes[],
424 int DynamicMetadataLinesBeforeActiveRequired[],
426 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
427 double RequiredDISPCLK[][2],
429 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
430 double ProjectedDCFCLKDeepSleep[][2],
431 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
432 double TotalVActivePixelBandwidth[][2],
433 double TotalVActiveCursorBandwidth[][2],
434 double TotalMetaRowBandwidth[][2],
435 double TotalDPTERowBandwidth[][2],
436 unsigned int TotalNumberOfActiveDPP[][2],
437 unsigned int TotalNumberOfDCCActiveDPP[][2],
438 int dpte_group_bytes[],
439 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
440 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
441 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
442 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
447 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
448 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
449 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
450 bool DynamicMetadataEnable[],
451 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
452 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
453 double ReadBandwidthLuma[],
454 double ReadBandwidthChroma[],
455 double DCFCLKPerState[],
456 double DCFCLKState[][2]);
457 static void CalculatePixelDeliveryTimes(
458 unsigned int NumberOfActivePlanes,
460 double VRatioChroma[],
461 double VRatioPrefetchY[],
462 double VRatioPrefetchC[],
463 unsigned int swath_width_luma_ub[],
464 unsigned int swath_width_chroma_ub[],
465 unsigned int DPPPerPlane[],
467 double HRatioChroma[],
469 double PSCL_THROUGHPUT[],
470 double PSCL_THROUGHPUT_CHROMA[],
473 enum scan_direction_class SourceScan[],
474 unsigned int NumberOfCursors[],
475 unsigned int CursorWidth[][2],
476 unsigned int CursorBPP[][2],
477 unsigned int BlockWidth256BytesY[],
478 unsigned int BlockHeight256BytesY[],
479 unsigned int BlockWidth256BytesC[],
480 unsigned int BlockHeight256BytesC[],
481 double DisplayPipeLineDeliveryTimeLuma[],
482 double DisplayPipeLineDeliveryTimeChroma[],
483 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
484 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
485 double DisplayPipeRequestDeliveryTimeLuma[],
486 double DisplayPipeRequestDeliveryTimeChroma[],
487 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
488 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
489 double CursorRequestDeliveryTime[],
490 double CursorRequestDeliveryTimePrefetch[]);
492 static void CalculateMetaAndPTETimes(
493 int NumberOfActivePlanes,
496 int MinMetaChunkSizeBytes,
499 double VRatioChroma[],
500 double DestinationLinesToRequestRowInVBlank[],
501 double DestinationLinesToRequestRowInImmediateFlip[],
506 enum scan_direction_class SourceScan[],
507 int dpte_row_height[],
508 int dpte_row_height_chroma[],
509 int meta_row_width[],
510 int meta_row_width_chroma[],
511 int meta_row_height[],
512 int meta_row_height_chroma[],
513 int meta_req_width[],
514 int meta_req_width_chroma[],
515 int meta_req_height[],
516 int meta_req_height_chroma[],
517 int dpte_group_bytes[],
518 int PTERequestSizeY[],
519 int PTERequestSizeC[],
520 int PixelPTEReqWidthY[],
521 int PixelPTEReqHeightY[],
522 int PixelPTEReqWidthC[],
523 int PixelPTEReqHeightC[],
524 int dpte_row_width_luma_ub[],
525 int dpte_row_width_chroma_ub[],
526 double DST_Y_PER_PTE_ROW_NOM_L[],
527 double DST_Y_PER_PTE_ROW_NOM_C[],
528 double DST_Y_PER_META_ROW_NOM_L[],
529 double DST_Y_PER_META_ROW_NOM_C[],
530 double TimePerMetaChunkNominal[],
531 double TimePerChromaMetaChunkNominal[],
532 double TimePerMetaChunkVBlank[],
533 double TimePerChromaMetaChunkVBlank[],
534 double TimePerMetaChunkFlip[],
535 double TimePerChromaMetaChunkFlip[],
536 double time_per_pte_group_nom_luma[],
537 double time_per_pte_group_vblank_luma[],
538 double time_per_pte_group_flip_luma[],
539 double time_per_pte_group_nom_chroma[],
540 double time_per_pte_group_vblank_chroma[],
541 double time_per_pte_group_flip_chroma[]);
543 static void CalculateVMGroupAndRequestTimes(
544 unsigned int NumberOfActivePlanes,
546 unsigned int GPUVMMaxPageTableLevels,
547 unsigned int HTotal[],
549 double DestinationLinesToRequestVMInVBlank[],
550 double DestinationLinesToRequestVMInImmediateFlip[],
553 int dpte_row_width_luma_ub[],
554 int dpte_row_width_chroma_ub[],
555 int vm_group_bytes[],
556 unsigned int dpde0_bytes_per_frame_ub_l[],
557 unsigned int dpde0_bytes_per_frame_ub_c[],
558 int meta_pte_bytes_per_frame_ub_l[],
559 int meta_pte_bytes_per_frame_ub_c[],
560 double TimePerVMGroupVBlank[],
561 double TimePerVMGroupFlip[],
562 double TimePerVMRequestVBlank[],
563 double TimePerVMRequestFlip[]);
565 static void CalculateStutterEfficiency(
566 int NumberOfActivePlanes,
567 long ROBBufferSizeInKByte,
568 double TotalDataReadBandwidth,
572 bool SynchronizedVBlank,
574 unsigned int DETBufferSizeY[],
576 double BytePerPixelDETY[],
577 double SwathWidthY[],
580 double DCCRateLuma[],
581 double DCCRateChroma[],
586 enum scan_direction_class SourceScan[],
587 int BlockHeight256BytesY[],
588 int BlockWidth256BytesY[],
589 int BlockHeight256BytesC[],
590 int BlockWidth256BytesC[],
591 int DCCYMaxUncompressedBlock[],
592 int DCCCMaxUncompressedBlock[],
595 bool WritebackEnable[],
596 double ReadBandwidthPlaneLuma[],
597 double ReadBandwidthPlaneChroma[],
598 double meta_row_bw[],
599 double dpte_row_bw[],
600 double *StutterEfficiencyNotIncludingVBlank,
601 double *StutterEfficiency,
602 double *StutterPeriodOut);
604 static void CalculateSwathAndDETConfiguration(
606 int NumberOfActivePlanes,
607 unsigned int DETBufferSizeInKByte,
608 double MaximumSwathWidthLuma[],
609 double MaximumSwathWidthChroma[],
610 enum scan_direction_class SourceScan[],
611 enum source_format_class SourcePixelFormat[],
612 enum dm_swizzle_mode SurfaceTiling[],
614 int ViewportHeight[],
617 int SurfaceHeightY[],
618 int SurfaceHeightC[],
619 int Read256BytesBlockHeightY[],
620 int Read256BytesBlockHeightC[],
621 int Read256BytesBlockWidthY[],
622 int Read256BytesBlockWidthC[],
623 enum odm_combine_mode ODMCombineEnabled[],
624 int BlendingAndTiming[],
627 double BytePerPixDETY[],
628 double BytePerPixDETC[],
631 double HRatioChroma[],
633 int swath_width_luma_ub[],
634 int swath_width_chroma_ub[],
636 double SwathWidthChroma[],
639 unsigned int DETBufferSizeY[],
640 unsigned int DETBufferSizeC[],
641 bool ViewportSizeSupportPerPlane[],
642 bool *ViewportSizeSupport);
643 static void CalculateSwathWidth(
645 int NumberOfActivePlanes,
646 enum source_format_class SourcePixelFormat[],
647 enum scan_direction_class SourceScan[],
648 unsigned int ViewportWidth[],
649 unsigned int ViewportHeight[],
650 unsigned int SurfaceWidthY[],
651 unsigned int SurfaceWidthC[],
652 unsigned int SurfaceHeightY[],
653 unsigned int SurfaceHeightC[],
654 enum odm_combine_mode ODMCombineEnabled[],
657 int Read256BytesBlockHeightY[],
658 int Read256BytesBlockHeightC[],
659 int Read256BytesBlockWidthY[],
660 int Read256BytesBlockWidthC[],
661 int BlendingAndTiming[],
662 unsigned int HActive[],
665 double SwathWidthSingleDPPY[],
666 double SwathWidthSingleDPPC[],
667 double SwathWidthY[],
668 double SwathWidthC[],
669 int MaximumSwathHeightY[],
670 int MaximumSwathHeightC[],
671 unsigned int swath_width_luma_ub[],
672 unsigned int swath_width_chroma_ub[]);
673 static double CalculateExtraLatency(
674 long RoundTripPingLatencyCycles,
675 long ReorderingBytes,
677 int TotalNumberOfActiveDPP,
678 int PixelChunkSizeInKByte,
679 int TotalNumberOfDCCActiveDPP,
684 int NumberOfActivePlanes,
686 int dpte_group_bytes[],
687 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
688 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
689 double HostVMMinPageSize,
690 int HostVMMaxNonCachedPageTableLevels);
691 static double CalculateExtraLatencyBytes(
692 long ReorderingBytes,
693 int TotalNumberOfActiveDPP,
694 int PixelChunkSizeInKByte,
695 int TotalNumberOfDCCActiveDPP,
699 int NumberOfActivePlanes,
701 int dpte_group_bytes[],
702 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
703 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
704 double HostVMMinPageSize,
705 int HostVMMaxNonCachedPageTableLevels);
706 static double CalculateUrgentLatency(
707 double UrgentLatencyPixelDataOnly,
708 double UrgentLatencyPixelMixedWithVMData,
709 double UrgentLatencyVMDataOnly,
710 bool DoUrgentLatencyAdjustment,
711 double UrgentLatencyAdjustmentFabricClockComponent,
712 double UrgentLatencyAdjustmentFabricClockReference,
713 double FabricClockSingle);
715 void dml30_recalculate(struct display_mode_lib *mode_lib)
717 ModeSupportAndSystemConfiguration(mode_lib);
718 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
719 DisplayPipeConfiguration(mode_lib);
720 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
723 static unsigned int dscceComputeDelay(
726 unsigned int sliceWidth,
727 unsigned int numSlices,
728 enum output_format_class pixelFormat,
729 enum output_encoder_class Output)
731 // valid bpc = source bits per component in the set of {8, 10, 12}
732 // valid bpp = increments of 1/16 of a bit
733 // min = 6/7/8 in N420/N422/444, respectively
734 // max = such that compression is 1:1
735 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
736 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
737 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
740 unsigned int rcModelSize = 8192;
742 // N422/N420 operate at 2 pixels per clock
743 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
746 if (pixelFormat == dm_420)
748 // #all other modes operate at 1 pixel per clock
749 else if (pixelFormat == dm_444)
751 else if (pixelFormat == dm_n422)
756 //initial transmit delay as per PPS
757 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
767 //divide by pixel per cycle to compute slice width as seen by DSC
768 w = sliceWidth / pixelsPerClock;
770 //422 mode has an additional cycle of delay
771 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
776 //main calculation for the dscce
777 ix = initalXmitDelay + 45;
782 ax = (a + 2) / 3 + D + 6 + 1;
783 L = (ax + wx - 1) / wx;
784 if ((ix % w) == 0 && P != 0)
788 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
790 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
791 pixels = Delay * 3 * pixelsPerClock;
795 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
797 unsigned int Delay = 0;
799 if (pixelFormat == dm_420) {
804 // dscc - input deserializer
806 // dscc gets pixels every other cycle
808 // dscc - input cdc fifo
810 // dscc gets pixels every other cycle
812 // dscc - cdc uncertainty
814 // dscc - output cdc fifo
816 // dscc gets pixels every other cycle
818 // dscc - cdc uncertainty
820 // dscc - output serializer
824 } else if (pixelFormat == dm_n422) {
829 // dscc - input deserializer
831 // dscc - input cdc fifo
833 // dscc - cdc uncertainty
835 // dscc - output cdc fifo
837 // dscc - cdc uncertainty
839 // dscc - output serializer
849 // dscc - input deserializer
851 // dscc - input cdc fifo
853 // dscc - cdc uncertainty
855 // dscc - output cdc fifo
857 // dscc - output serializer
859 // dscc - cdc uncertainty
868 static bool CalculatePrefetchSchedule(
869 struct display_mode_lib *mode_lib,
870 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
871 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
873 unsigned int DSCDelay,
874 double DPPCLKDelaySubtotalPlusCNVCFormater,
875 double DPPCLKDelaySCL,
876 double DPPCLKDelaySCLLBOnly,
877 double DPPCLKDelayCNVCCursor,
878 double DISPCLKDelaySubtotal,
879 unsigned int DPP_RECOUT_WIDTH,
880 enum output_format_class OutputFormat,
881 unsigned int MaxInterDCNTileRepeaters,
882 unsigned int VStartup,
883 unsigned int MaxVStartup,
884 unsigned int GPUVMPageTableLevels,
887 unsigned int HostVMMaxNonCachedPageTableLevels,
888 double HostVMMinPageSize,
889 bool DynamicMetadataEnable,
890 bool DynamicMetadataVMEnabled,
891 int DynamicMetadataLinesBeforeActiveRequired,
892 unsigned int DynamicMetadataTransmittedBytes,
893 double UrgentLatency,
894 double UrgentExtraLatency,
896 unsigned int PDEAndMetaPTEBytesFrame,
897 unsigned int MetaRowByte,
898 unsigned int PixelPTEBytesPerRow,
899 double PrefetchSourceLinesY,
900 unsigned int SwathWidthY,
902 double VInitPreFillY,
903 unsigned int MaxNumSwathY,
904 double PrefetchSourceLinesC,
905 unsigned int SwathWidthC,
907 double VInitPreFillC,
908 unsigned int MaxNumSwathC,
909 long swath_width_luma_ub,
910 long swath_width_chroma_ub,
911 unsigned int SwathHeightY,
912 unsigned int SwathHeightC,
914 bool ProgressiveToInterlaceUnitInOPP,
915 double *DSTXAfterScaler,
916 double *DSTYAfterScaler,
917 double *DestinationLinesForPrefetch,
918 double *PrefetchBandwidth,
919 double *DestinationLinesToRequestVMInVBlank,
920 double *DestinationLinesToRequestRowInVBlank,
921 double *VRatioPrefetchY,
922 double *VRatioPrefetchC,
923 double *RequiredPrefetchPixDataBWLuma,
924 double *RequiredPrefetchPixDataBWChroma,
925 bool *NotEnoughTimeForDynamicMetadata,
927 double *prefetch_vmrow_bw,
930 unsigned int *VUpdateOffsetPix,
931 double *VUpdateWidthPix,
932 double *VReadyOffsetPix)
934 bool MyError = false;
935 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
936 double DSTTotalPixelsAfterScaler = 0;
937 double LineTime = 0, Tsetup = 0;
938 double dst_y_prefetch_equ = 0;
940 double prefetch_bw_oto = 0;
943 double Tvm_oto_lines = 0;
944 double Tr0_oto_lines = 0;
945 double dst_y_prefetch_oto = 0;
946 double TimeForFetchingMetaPTE = 0;
947 double TimeForFetchingRowInVBlank = 0;
948 double LinesToRequestPrefetchPixelData = 0;
949 double HostVMInefficiencyFactor = 0;
950 unsigned int HostVMDynamicLevelsTrips = 0;
951 double trip_to_mem = 0;
952 double Tvm_trips = 0;
953 double Tr0_trips = 0;
954 double Tvm_trips_rounded = 0;
955 double Tr0_trips_rounded = 0;
957 double Tpre_rounded = 0;
958 double prefetch_bw_equ = 0;
965 if (GPUVMEnable == true && HostVMEnable == true) {
966 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
967 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
969 HostVMInefficiencyFactor = 1;
970 HostVMDynamicLevelsTrips = 0;
973 CalculateDynamicMetadataParameters(
974 MaxInterDCNTileRepeaters,
977 myPipe->DCFCLKDeepSleep,
981 DynamicMetadataTransmittedBytes,
982 DynamicMetadataLinesBeforeActiveRequired,
983 myPipe->InterlaceEnable,
984 ProgressiveToInterlaceUnitInOPP,
990 LineTime = myPipe->HTotal / myPipe->PixelClock;
991 trip_to_mem = UrgentLatency;
992 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
994 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
995 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
997 *Tdmdl = TWait + UrgentExtraLatency;
1000 if (DynamicMetadataEnable == true) {
1001 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1002 *NotEnoughTimeForDynamicMetadata = true;
1004 *NotEnoughTimeForDynamicMetadata = false;
1005 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
1006 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1007 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1008 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1009 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1012 *NotEnoughTimeForDynamicMetadata = false;
1015 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1017 if (myPipe->ScalerEnabled)
1018 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1020 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1022 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1024 DISPCLKCycles = DISPCLKDelaySubtotal;
1026 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1029 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
1032 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1034 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
1035 *DSTYAfterScaler = 1;
1037 *DSTYAfterScaler = 0;
1039 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1040 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1041 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1046 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1047 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1048 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1051 if (GPUVMPageTableLevels >= 3) {
1052 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1055 } else if (!myPipe->DCCEnable)
1058 *Tno_bw = LineTime / 4;
1060 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1061 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1063 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1064 Tsw_oto = Lsw_oto * LineTime;
1066 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1068 if (GPUVMEnable == true) {
1069 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1073 Tvm_oto = LineTime / 4.0;
1075 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1077 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1078 LineTime - Tvm_oto, LineTime / 4);
1080 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1082 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1083 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1084 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1086 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1087 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1089 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1090 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1092 dml_print("DML: LineTime: %f\n", LineTime);
1093 dml_print("DML: VStartup: %d\n", VStartup);
1094 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1095 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1096 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1097 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1098 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1099 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1100 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1101 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1102 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1103 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1104 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1106 *PrefetchBandwidth = 0;
1107 *DestinationLinesToRequestVMInVBlank = 0;
1108 *DestinationLinesToRequestRowInVBlank = 0;
1109 *VRatioPrefetchY = 0;
1110 *VRatioPrefetchC = 0;
1111 *RequiredPrefetchPixDataBWLuma = 0;
1112 if (dst_y_prefetch_equ > 1) {
1113 double PrefetchBandwidth1 = 0;
1114 double PrefetchBandwidth2 = 0;
1115 double PrefetchBandwidth3 = 0;
1116 double PrefetchBandwidth4 = 0;
1118 if (Tpre_rounded - *Tno_bw > 0)
1119 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1120 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1121 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1122 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1123 / (Tpre_rounded - *Tno_bw);
1125 PrefetchBandwidth1 = 0;
1127 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1128 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1131 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1132 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1133 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1134 swath_width_luma_ub * BytePerPixelY +
1135 PrefetchSourceLinesC * swath_width_chroma_ub *
1137 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1139 PrefetchBandwidth2 = 0;
1141 if (Tpre_rounded - Tvm_trips_rounded > 0)
1142 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1143 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1144 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1145 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1148 PrefetchBandwidth3 = 0;
1150 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1151 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1154 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1155 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1156 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1158 PrefetchBandwidth4 = 0;
1165 if (PrefetchBandwidth1 > 0) {
1166 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1167 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1176 if (PrefetchBandwidth2 > 0) {
1177 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1178 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1187 if (PrefetchBandwidth3 > 0) {
1188 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1189 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1199 prefetch_bw_equ = PrefetchBandwidth1;
1200 } else if (Case2OK) {
1201 prefetch_bw_equ = PrefetchBandwidth2;
1202 } else if (Case3OK) {
1203 prefetch_bw_equ = PrefetchBandwidth3;
1205 prefetch_bw_equ = PrefetchBandwidth4;
1208 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1210 if (prefetch_bw_equ > 0) {
1212 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1214 Tvm_equ = LineTime / 4;
1217 if ((GPUVMEnable || myPipe->DCCEnable)) {
1219 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1221 (LineTime - Tvm_equ) / 2,
1224 Tr0_equ = (LineTime - Tvm_equ) / 2;
1229 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1233 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1234 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1235 TimeForFetchingMetaPTE = Tvm_oto;
1236 TimeForFetchingRowInVBlank = Tr0_oto;
1237 *PrefetchBandwidth = prefetch_bw_oto;
1239 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1240 TimeForFetchingMetaPTE = Tvm_equ;
1241 TimeForFetchingRowInVBlank = Tr0_equ;
1242 *PrefetchBandwidth = prefetch_bw_equ;
1245 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1247 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1250 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1251 - 2 * *DestinationLinesToRequestRowInVBlank;
1253 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1255 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1256 / LinesToRequestPrefetchPixelData;
1257 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1258 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1259 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1260 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1261 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1262 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1265 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1266 *VRatioPrefetchY = 0;
1270 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1271 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1273 if ((SwathHeightC > 4)) {
1274 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1275 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1276 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1277 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1280 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1281 *VRatioPrefetchC = 0;
1285 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1286 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1289 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1290 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1291 *VRatioPrefetchY = 0;
1292 *VRatioPrefetchC = 0;
1293 *RequiredPrefetchPixDataBWLuma = 0;
1294 *RequiredPrefetchPixDataBWChroma = 0;
1297 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1298 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1299 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1300 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1301 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1302 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1303 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1304 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1305 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1309 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1313 double prefetch_vm_bw = 0;
1314 double prefetch_row_bw = 0;
1316 if (PDEAndMetaPTEBytesFrame == 0) {
1318 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1319 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1323 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1325 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1326 prefetch_row_bw = 0;
1327 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1328 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1330 prefetch_row_bw = 0;
1332 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1335 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1339 *PrefetchBandwidth = 0;
1340 TimeForFetchingMetaPTE = 0;
1341 TimeForFetchingRowInVBlank = 0;
1342 *DestinationLinesToRequestVMInVBlank = 0;
1343 *DestinationLinesToRequestRowInVBlank = 0;
1344 *DestinationLinesForPrefetch = 0;
1345 LinesToRequestPrefetchPixelData = 0;
1346 *VRatioPrefetchY = 0;
1347 *VRatioPrefetchC = 0;
1348 *RequiredPrefetchPixDataBWLuma = 0;
1349 *RequiredPrefetchPixDataBWChroma = 0;
1355 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1357 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1360 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1362 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1365 static void CalculateDCCConfiguration(
1367 bool DCCProgrammingAssumesScanDirectionUnknown,
1368 enum source_format_class SourcePixelFormat,
1369 unsigned int SurfaceWidthLuma,
1370 unsigned int SurfaceWidthChroma,
1371 unsigned int SurfaceHeightLuma,
1372 unsigned int SurfaceHeightChroma,
1373 double DETBufferSize,
1374 unsigned int RequestHeight256ByteLuma,
1375 unsigned int RequestHeight256ByteChroma,
1376 enum dm_swizzle_mode TilingFormat,
1377 unsigned int BytePerPixelY,
1378 unsigned int BytePerPixelC,
1379 double BytePerPixelDETY,
1380 double BytePerPixelDETC,
1381 enum scan_direction_class ScanOrientation,
1382 unsigned int *MaxUncompressedBlockLuma,
1383 unsigned int *MaxUncompressedBlockChroma,
1384 unsigned int *MaxCompressedBlockLuma,
1385 unsigned int *MaxCompressedBlockChroma,
1386 unsigned int *IndependentBlockLuma,
1387 unsigned int *IndependentBlockChroma)
1395 int req128_horz_wc_l = 0;
1396 int req128_horz_wc_c = 0;
1397 int req128_vert_wc_l = 0;
1398 int req128_vert_wc_c = 0;
1399 int segment_order_horz_contiguous_luma = 0;
1400 int segment_order_horz_contiguous_chroma = 0;
1401 int segment_order_vert_contiguous_luma = 0;
1402 int segment_order_vert_contiguous_chroma = 0;
1404 long full_swath_bytes_horz_wc_l = 0;
1405 long full_swath_bytes_horz_wc_c = 0;
1406 long full_swath_bytes_vert_wc_l = 0;
1407 long full_swath_bytes_vert_wc_c = 0;
1409 long swath_buf_size = 0;
1410 double detile_buf_vp_horz_limit = 0;
1411 double detile_buf_vp_vert_limit = 0;
1413 long MAS_vp_horz_limit = 0;
1414 long MAS_vp_vert_limit = 0;
1415 long max_vp_horz_width = 0;
1416 long max_vp_vert_height = 0;
1417 long eff_surf_width_l = 0;
1418 long eff_surf_width_c = 0;
1419 long eff_surf_height_l = 0;
1420 long eff_surf_height_c = 0;
1424 REQ_128BytesNonContiguous,
1425 REQ_128BytesContiguous,
1429 RequestType RequestLuma;
1430 RequestType RequestChroma;
1432 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1438 if (BytePerPixelY == 1)
1440 if (BytePerPixelC == 1)
1442 if (BytePerPixelY == 8
1443 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1444 || TilingFormat == dm_sw_64kb_s_x))
1446 if (BytePerPixelC == 8
1447 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1448 || TilingFormat == dm_sw_64kb_s_x))
1451 if (BytePerPixelC == 0) {
1452 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1453 detile_buf_vp_horz_limit = (double) swath_buf_size
1454 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1455 / (1 + horz_div_l));
1456 detile_buf_vp_vert_limit = (double) swath_buf_size
1457 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1459 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1460 detile_buf_vp_horz_limit = (double) swath_buf_size
1461 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1463 + (double) RequestHeight256ByteChroma
1464 * BytePerPixelC / (1 + horz_div_c)
1466 detile_buf_vp_vert_limit = (double) swath_buf_size
1467 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1468 + 256.0 / RequestHeight256ByteChroma
1469 / (1 + vert_div_c) / (1 + yuv420));
1472 if (SourcePixelFormat == dm_420_10) {
1473 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1474 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1477 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1478 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1480 MAS_vp_horz_limit = 5760;
1481 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1482 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1483 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1485 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1486 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1487 eff_surf_height_l = (
1488 SurfaceHeightLuma > max_vp_vert_height ?
1489 max_vp_vert_height : SurfaceHeightLuma);
1490 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1492 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1493 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1494 if (BytePerPixelC > 0) {
1495 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1497 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1499 full_swath_bytes_horz_wc_c = 0;
1500 full_swath_bytes_vert_wc_c = 0;
1503 if (SourcePixelFormat == dm_420_10) {
1504 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1505 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1506 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1507 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1510 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1511 req128_horz_wc_l = 0;
1512 req128_horz_wc_c = 0;
1513 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1514 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1516 req128_horz_wc_l = 0;
1517 req128_horz_wc_c = 1;
1518 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1519 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1521 req128_horz_wc_l = 1;
1522 req128_horz_wc_c = 0;
1524 req128_horz_wc_l = 1;
1525 req128_horz_wc_c = 1;
1528 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1529 req128_vert_wc_l = 0;
1530 req128_vert_wc_c = 0;
1531 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1532 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1534 req128_vert_wc_l = 0;
1535 req128_vert_wc_c = 1;
1536 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1537 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1539 req128_vert_wc_l = 1;
1540 req128_vert_wc_c = 0;
1542 req128_vert_wc_l = 1;
1543 req128_vert_wc_c = 1;
1546 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1547 segment_order_horz_contiguous_luma = 0;
1549 segment_order_horz_contiguous_luma = 1;
1551 if ((BytePerPixelY == 8
1552 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1553 || TilingFormat == dm_sw_64kb_d_t
1554 || TilingFormat == dm_sw_64kb_r_x))
1555 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1556 segment_order_vert_contiguous_luma = 0;
1558 segment_order_vert_contiguous_luma = 1;
1560 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1561 segment_order_horz_contiguous_chroma = 0;
1563 segment_order_horz_contiguous_chroma = 1;
1565 if ((BytePerPixelC == 8
1566 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1567 || TilingFormat == dm_sw_64kb_d_t
1568 || TilingFormat == dm_sw_64kb_r_x))
1569 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1570 segment_order_vert_contiguous_chroma = 0;
1572 segment_order_vert_contiguous_chroma = 1;
1575 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1576 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1577 RequestLuma = REQ_256Bytes;
1578 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1579 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1580 RequestLuma = REQ_128BytesNonContiguous;
1582 RequestLuma = REQ_128BytesContiguous;
1584 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1585 RequestChroma = REQ_256Bytes;
1586 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1587 || (req128_vert_wc_c == 1
1588 && segment_order_vert_contiguous_chroma == 0)) {
1589 RequestChroma = REQ_128BytesNonContiguous;
1591 RequestChroma = REQ_128BytesContiguous;
1593 } else if (ScanOrientation != dm_vert) {
1594 if (req128_horz_wc_l == 0) {
1595 RequestLuma = REQ_256Bytes;
1596 } else if (segment_order_horz_contiguous_luma == 0) {
1597 RequestLuma = REQ_128BytesNonContiguous;
1599 RequestLuma = REQ_128BytesContiguous;
1601 if (req128_horz_wc_c == 0) {
1602 RequestChroma = REQ_256Bytes;
1603 } else if (segment_order_horz_contiguous_chroma == 0) {
1604 RequestChroma = REQ_128BytesNonContiguous;
1606 RequestChroma = REQ_128BytesContiguous;
1609 if (req128_vert_wc_l == 0) {
1610 RequestLuma = REQ_256Bytes;
1611 } else if (segment_order_vert_contiguous_luma == 0) {
1612 RequestLuma = REQ_128BytesNonContiguous;
1614 RequestLuma = REQ_128BytesContiguous;
1616 if (req128_vert_wc_c == 0) {
1617 RequestChroma = REQ_256Bytes;
1618 } else if (segment_order_vert_contiguous_chroma == 0) {
1619 RequestChroma = REQ_128BytesNonContiguous;
1621 RequestChroma = REQ_128BytesContiguous;
1625 if (RequestLuma == REQ_256Bytes) {
1626 *MaxUncompressedBlockLuma = 256;
1627 *MaxCompressedBlockLuma = 256;
1628 *IndependentBlockLuma = 0;
1629 } else if (RequestLuma == REQ_128BytesContiguous) {
1630 *MaxUncompressedBlockLuma = 256;
1631 *MaxCompressedBlockLuma = 128;
1632 *IndependentBlockLuma = 128;
1634 *MaxUncompressedBlockLuma = 256;
1635 *MaxCompressedBlockLuma = 64;
1636 *IndependentBlockLuma = 64;
1639 if (RequestChroma == REQ_256Bytes) {
1640 *MaxUncompressedBlockChroma = 256;
1641 *MaxCompressedBlockChroma = 256;
1642 *IndependentBlockChroma = 0;
1643 } else if (RequestChroma == REQ_128BytesContiguous) {
1644 *MaxUncompressedBlockChroma = 256;
1645 *MaxCompressedBlockChroma = 128;
1646 *IndependentBlockChroma = 128;
1648 *MaxUncompressedBlockChroma = 256;
1649 *MaxCompressedBlockChroma = 64;
1650 *IndependentBlockChroma = 64;
1653 if (DCCEnabled != true || BytePerPixelC == 0) {
1654 *MaxUncompressedBlockChroma = 0;
1655 *MaxCompressedBlockChroma = 0;
1656 *IndependentBlockChroma = 0;
1659 if (DCCEnabled != true) {
1660 *MaxUncompressedBlockLuma = 0;
1661 *MaxCompressedBlockLuma = 0;
1662 *IndependentBlockLuma = 0;
1667 static double CalculatePrefetchSourceLines(
1668 struct display_mode_lib *mode_lib,
1672 bool ProgressiveToInterlaceUnitInOPP,
1673 unsigned int SwathHeight,
1674 unsigned int ViewportYStart,
1675 double *VInitPreFill,
1676 unsigned int *MaxNumSwath)
1678 unsigned int MaxPartialSwath = 0;
1680 if (ProgressiveToInterlaceUnitInOPP)
1681 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1683 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1685 if (!mode_lib->vba.IgnoreViewportPositioning) {
1687 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1689 if (*VInitPreFill > 1.0)
1690 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1692 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1694 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1698 if (ViewportYStart != 0)
1700 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1702 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1704 if (*VInitPreFill > 1.0)
1705 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1707 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1711 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1714 static unsigned int CalculateVMAndRowBytes(
1715 struct display_mode_lib *mode_lib,
1717 unsigned int BlockHeight256Bytes,
1718 unsigned int BlockWidth256Bytes,
1719 enum source_format_class SourcePixelFormat,
1720 unsigned int SurfaceTiling,
1721 unsigned int BytePerPixel,
1722 enum scan_direction_class ScanDirection,
1723 unsigned int SwathWidth,
1724 unsigned int ViewportHeight,
1727 unsigned int HostVMMaxNonCachedPageTableLevels,
1728 unsigned int GPUVMMinPageSize,
1729 unsigned int HostVMMinPageSize,
1730 unsigned int PTEBufferSizeInRequests,
1732 unsigned int DCCMetaPitch,
1733 unsigned int *MacroTileWidth,
1734 unsigned int *MetaRowByte,
1735 unsigned int *PixelPTEBytesPerRow,
1736 bool *PTEBufferSizeNotExceeded,
1737 unsigned int *dpte_row_width_ub,
1738 unsigned int *dpte_row_height,
1739 unsigned int *MetaRequestWidth,
1740 unsigned int *MetaRequestHeight,
1741 unsigned int *meta_row_width,
1742 unsigned int *meta_row_height,
1743 unsigned int *vm_group_bytes,
1744 unsigned int *dpte_group_bytes,
1745 unsigned int *PixelPTEReqWidth,
1746 unsigned int *PixelPTEReqHeight,
1747 unsigned int *PTERequestSize,
1748 unsigned int *DPDE0BytesFrame,
1749 unsigned int *MetaPTEBytesFrame)
1751 unsigned int MPDEBytesFrame = 0;
1752 unsigned int DCCMetaSurfaceBytes = 0;
1753 unsigned int MacroTileSizeBytes = 0;
1754 unsigned int MacroTileHeight = 0;
1755 unsigned int ExtraDPDEBytesFrame = 0;
1756 unsigned int PDEAndMetaPTEBytesFrame = 0;
1757 unsigned int PixelPTEReqHeightPTEs = 0;
1758 unsigned int HostVMDynamicLevels = 0;
1760 double FractionOfPTEReturnDrop;
1762 if (GPUVMEnable == true && HostVMEnable == true) {
1763 if (HostVMMinPageSize < 2048) {
1764 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1765 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1766 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1768 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1772 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1773 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1774 if (ScanDirection != dm_vert) {
1775 *meta_row_height = *MetaRequestHeight;
1776 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1777 + *MetaRequestWidth;
1778 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1780 *meta_row_height = *MetaRequestWidth;
1781 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1782 + *MetaRequestHeight;
1783 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1785 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1786 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1787 if (GPUVMEnable == true) {
1788 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1789 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1791 *MetaPTEBytesFrame = 0;
1795 if (DCCEnable != true) {
1796 *MetaPTEBytesFrame = 0;
1801 if (SurfaceTiling == dm_sw_linear) {
1802 MacroTileSizeBytes = 256;
1803 MacroTileHeight = BlockHeight256Bytes;
1805 MacroTileSizeBytes = 65536;
1806 MacroTileHeight = 16 * BlockHeight256Bytes;
1808 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1810 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1811 if (ScanDirection != dm_vert) {
1812 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1814 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1816 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1818 *DPDE0BytesFrame = 0;
1819 ExtraDPDEBytesFrame = 0;
1822 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1823 + ExtraDPDEBytesFrame;
1825 if (HostVMEnable == true) {
1826 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1829 if (SurfaceTiling == dm_sw_linear) {
1830 PixelPTEReqHeightPTEs = 1;
1831 *PixelPTEReqHeight = 1;
1832 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1833 *PTERequestSize = 64;
1834 FractionOfPTEReturnDrop = 0;
1835 } else if (MacroTileSizeBytes == 4096) {
1836 PixelPTEReqHeightPTEs = 1;
1837 *PixelPTEReqHeight = MacroTileHeight;
1838 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1839 *PTERequestSize = 64;
1840 if (ScanDirection != dm_vert)
1841 FractionOfPTEReturnDrop = 0;
1843 FractionOfPTEReturnDrop = 7 / 8;
1844 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1845 PixelPTEReqHeightPTEs = 16;
1846 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1847 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1848 *PTERequestSize = 128;
1849 FractionOfPTEReturnDrop = 0;
1851 PixelPTEReqHeightPTEs = 1;
1852 *PixelPTEReqHeight = MacroTileHeight;
1853 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1854 *PTERequestSize = 64;
1855 FractionOfPTEReturnDrop = 0;
1858 if (SurfaceTiling == dm_sw_linear) {
1859 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1860 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1861 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1862 } else if (ScanDirection != dm_vert) {
1863 *dpte_row_height = *PixelPTEReqHeight;
1864 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1865 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1867 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1868 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1869 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1871 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1872 <= 64 * PTEBufferSizeInRequests) {
1873 *PTEBufferSizeNotExceeded = true;
1875 *PTEBufferSizeNotExceeded = false;
1878 if (GPUVMEnable != true) {
1879 *PixelPTEBytesPerRow = 0;
1880 *PTEBufferSizeNotExceeded = true;
1882 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1884 if (HostVMEnable == true) {
1885 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1888 if (HostVMEnable == true) {
1889 *vm_group_bytes = 512;
1890 *dpte_group_bytes = 512;
1891 } else if (GPUVMEnable == true) {
1892 *vm_group_bytes = 2048;
1893 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1894 *dpte_group_bytes = 512;
1896 *dpte_group_bytes = 2048;
1899 *vm_group_bytes = 0;
1900 *dpte_group_bytes = 0;
1903 return PDEAndMetaPTEBytesFrame;
1906 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1907 struct display_mode_lib *mode_lib)
1909 struct vba_vars_st *v = &mode_lib->vba;
1911 long ReorderBytes = 0;
1912 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1913 double MaxTotalRDBandwidth = 0;
1914 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1915 bool DestinationLineTimesForPrefetchLessThan2 = false;
1916 bool VRatioPrefetchMoreThan4 = false;
1919 v->WritebackDISPCLK = 0.0;
1920 v->DISPCLKWithRamping = 0;
1921 v->DISPCLKWithoutRamping = 0;
1922 v->GlobalDPPCLK = 0.0;
1923 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1924 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1925 v->ReturnBusWidth * v->DCFCLK,
1926 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1927 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1928 if (v->HostVMEnable != true) {
1929 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1931 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1933 /* End DAL custom code */
1935 // DISPCLK and DPPCLK Calculation
1937 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1938 if (v->WritebackEnable[k]) {
1939 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1940 dml30_CalculateWriteBackDISPCLK(
1941 v->WritebackPixelFormat[k],
1943 v->WritebackHRatio[k],
1944 v->WritebackVRatio[k],
1945 v->WritebackHTaps[k],
1946 v->WritebackVTaps[k],
1947 v->WritebackSourceWidth[k],
1948 v->WritebackDestinationWidth[k],
1950 v->WritebackLineBufferSize));
1954 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1955 if (v->HRatio[k] > 1) {
1956 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1957 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1959 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1960 v->MaxDCHUBToPSCLThroughput,
1961 v->MaxPSCLToLBThroughput);
1964 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1965 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1966 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1968 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1969 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1970 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1973 if ((v->SourcePixelFormat[k] != dm_420_8
1974 && v->SourcePixelFormat[k] != dm_420_10
1975 && v->SourcePixelFormat[k] != dm_420_12
1976 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1977 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1978 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1980 if (v->HRatioChroma[k] > 1) {
1981 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1982 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1984 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1985 v->MaxDCHUBToPSCLThroughput,
1986 v->MaxPSCLToLBThroughput);
1988 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
1989 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
1990 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
1992 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
1993 && v->DPPCLKUsingSingleDPPChroma
1994 < 2 * v->PixelClock[k]) {
1995 v->DPPCLKUsingSingleDPPChroma = 2
1999 v->DPPCLKUsingSingleDPP[k] = dml_max(
2000 v->DPPCLKUsingSingleDPPLuma,
2001 v->DPPCLKUsingSingleDPPChroma);
2005 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2006 if (v->BlendingAndTiming[k] != k)
2008 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2009 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2010 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2011 * (1 + v->DISPCLKRampingMargin / 100));
2012 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2013 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2014 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2015 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2016 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2017 * (1 + v->DISPCLKRampingMargin / 100));
2018 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2019 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2021 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2022 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2023 * (1 + v->DISPCLKRampingMargin / 100));
2024 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2025 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2029 v->DISPCLKWithRamping = dml_max(
2030 v->DISPCLKWithRamping,
2031 v->WritebackDISPCLK);
2032 v->DISPCLKWithoutRamping = dml_max(
2033 v->DISPCLKWithoutRamping,
2034 v->WritebackDISPCLK);
2036 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2037 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2038 v->DISPCLKWithRamping,
2039 v->DISPCLKDPPCLKVCOSpeed);
2040 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2041 v->DISPCLKWithoutRamping,
2042 v->DISPCLKDPPCLKVCOSpeed);
2043 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2044 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
2045 v->DISPCLKDPPCLKVCOSpeed);
2046 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
2047 > v->MaxDispclkRoundedToDFSGranularity) {
2048 v->DISPCLK_calculated =
2049 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2050 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
2051 > v->MaxDispclkRoundedToDFSGranularity) {
2052 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2054 v->DISPCLK_calculated =
2055 v->DISPCLKWithRampingRoundedToDFSGranularity;
2057 v->DISPCLK = v->DISPCLK_calculated;
2058 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2060 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2061 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
2063 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2064 v->GlobalDPPCLK = dml_max(
2066 v->DPPCLK_calculated[k]);
2068 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2070 v->DISPCLKDPPCLKVCOSpeed);
2071 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2072 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2074 v->DPPCLK_calculated[k] * 255.0
2077 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2078 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2081 // Urgent and B P-State/DRAM Clock Change Watermark
2082 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2083 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2085 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2086 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2087 v->SourcePixelFormat[k],
2088 v->SurfaceTiling[k],
2089 &v->BytePerPixelY[k],
2090 &v->BytePerPixelC[k],
2091 &v->BytePerPixelDETY[k],
2092 &v->BytePerPixelDETC[k],
2093 &v->BlockHeight256BytesY[k],
2094 &v->BlockHeight256BytesC[k],
2095 &v->BlockWidth256BytesY[k],
2096 &v->BlockWidth256BytesC[k]);
2099 CalculateSwathWidth(
2101 v->NumberOfActivePlanes,
2102 v->SourcePixelFormat,
2110 v->ODMCombineEnabled,
2113 v->BlockHeight256BytesY,
2114 v->BlockHeight256BytesC,
2115 v->BlockWidth256BytesY,
2116 v->BlockWidth256BytesC,
2117 v->BlendingAndTiming,
2121 v->SwathWidthSingleDPPY,
2122 v->SwathWidthSingleDPPC,
2127 v->swath_width_luma_ub,
2128 v->swath_width_chroma_ub);
2131 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2132 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2133 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2134 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2138 // DCFCLK Deep Sleep
2139 CalculateDCFCLKDeepSleep(
2141 v->NumberOfActivePlanes,
2152 v->PSCL_THROUGHPUT_LUMA,
2153 v->PSCL_THROUGHPUT_CHROMA,
2155 v->ReadBandwidthPlaneLuma,
2156 v->ReadBandwidthPlaneChroma,
2158 &v->DCFCLKDeepSleep);
2161 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2162 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2163 v->DSCCLK_calculated[k] = 0.0;
2165 if (v->OutputFormat[k] == dm_420)
2166 v->DSCFormatFactor = 2;
2167 else if (v->OutputFormat[k] == dm_444)
2168 v->DSCFormatFactor = 1;
2169 else if (v->OutputFormat[k] == dm_n422)
2170 v->DSCFormatFactor = 2;
2172 v->DSCFormatFactor = 1;
2173 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2174 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2175 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2176 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2177 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2178 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2180 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2181 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2186 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2187 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2189 if (v->DSCEnabled[k] && BPP != 0) {
2190 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2191 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2193 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2194 v->NumberOfDSCSlices[k],
2197 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2198 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2199 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2201 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2202 v->NumberOfDSCSlices[k] / 2.0,
2205 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2207 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2209 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2210 v->NumberOfDSCSlices[k] / 4.0,
2213 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2215 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2221 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2222 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2223 if (j != k && v->BlendingAndTiming[k] == j
2224 && v->DSCEnabled[j])
2225 v->DSCDelay[k] = v->DSCDelay[j];
2228 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2229 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2230 unsigned int PixelPTEBytesPerRowY = 0;
2231 unsigned int MetaRowByteY = 0;
2232 unsigned int MetaRowByteC = 0;
2233 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2234 unsigned int PixelPTEBytesPerRowC = 0;
2235 bool PTEBufferSizeNotExceededY = 0;
2236 bool PTEBufferSizeNotExceededC = 0;
2239 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2240 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2241 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2242 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2244 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2245 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2248 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2251 v->BlockHeight256BytesC[k],
2252 v->BlockWidth256BytesC[k],
2253 v->SourcePixelFormat[k],
2254 v->SurfaceTiling[k],
2255 v->BytePerPixelC[k],
2258 v->ViewportHeightChroma[k],
2261 v->HostVMMaxNonCachedPageTableLevels,
2262 v->GPUVMMinPageSize,
2263 v->HostVMMinPageSize,
2264 v->PTEBufferSizeInRequestsForChroma,
2266 v->DCCMetaPitchC[k],
2267 &v->MacroTileWidthC[k],
2269 &PixelPTEBytesPerRowC,
2270 &PTEBufferSizeNotExceededC,
2271 &v->dpte_row_width_chroma_ub[k],
2272 &v->dpte_row_height_chroma[k],
2273 &v->meta_req_width_chroma[k],
2274 &v->meta_req_height_chroma[k],
2275 &v->meta_row_width_chroma[k],
2276 &v->meta_row_height_chroma[k],
2279 &v->PixelPTEReqWidthC[k],
2280 &v->PixelPTEReqHeightC[k],
2281 &v->PTERequestSizeC[k],
2282 &v->dpde0_bytes_per_frame_ub_c[k],
2283 &v->meta_pte_bytes_per_frame_ub_c[k]);
2285 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2290 v->ProgressiveToInterlaceUnitInOPP,
2292 v->ViewportYStartC[k],
2293 &v->VInitPreFillC[k],
2294 &v->MaxNumSwathC[k]);
2296 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2297 v->PTEBufferSizeInRequestsForChroma = 0;
2298 PixelPTEBytesPerRowC = 0;
2299 PDEAndMetaPTEBytesFrameC = 0;
2301 v->MaxNumSwathC[k] = 0;
2302 v->PrefetchSourceLinesC[k] = 0;
2305 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2308 v->BlockHeight256BytesY[k],
2309 v->BlockWidth256BytesY[k],
2310 v->SourcePixelFormat[k],
2311 v->SurfaceTiling[k],
2312 v->BytePerPixelY[k],
2315 v->ViewportHeight[k],
2318 v->HostVMMaxNonCachedPageTableLevels,
2319 v->GPUVMMinPageSize,
2320 v->HostVMMinPageSize,
2321 v->PTEBufferSizeInRequestsForLuma,
2323 v->DCCMetaPitchY[k],
2324 &v->MacroTileWidthY[k],
2326 &PixelPTEBytesPerRowY,
2327 &PTEBufferSizeNotExceededY,
2328 &v->dpte_row_width_luma_ub[k],
2329 &v->dpte_row_height[k],
2330 &v->meta_req_width[k],
2331 &v->meta_req_height[k],
2332 &v->meta_row_width[k],
2333 &v->meta_row_height[k],
2334 &v->vm_group_bytes[k],
2335 &v->dpte_group_bytes[k],
2336 &v->PixelPTEReqWidthY[k],
2337 &v->PixelPTEReqHeightY[k],
2338 &v->PTERequestSizeY[k],
2339 &v->dpde0_bytes_per_frame_ub_l[k],
2340 &v->meta_pte_bytes_per_frame_ub_l[k]);
2342 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2347 v->ProgressiveToInterlaceUnitInOPP,
2349 v->ViewportYStartY[k],
2350 &v->VInitPreFillY[k],
2351 &v->MaxNumSwathY[k]);
2352 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2353 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2354 + PDEAndMetaPTEBytesFrameC;
2355 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2357 CalculateRowBandwidth(
2359 v->SourcePixelFormat[k],
2363 v->HTotal[k] / v->PixelClock[k],
2366 v->meta_row_height[k],
2367 v->meta_row_height_chroma[k],
2368 PixelPTEBytesPerRowY,
2369 PixelPTEBytesPerRowC,
2370 v->dpte_row_height[k],
2371 v->dpte_row_height_chroma[k],
2373 &v->dpte_row_bw[k]);
2376 v->TotalDCCActiveDPP = 0;
2377 v->TotalActiveDPP = 0;
2378 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2379 v->TotalActiveDPP = v->TotalActiveDPP
2380 + v->DPPPerPlane[k];
2381 if (v->DCCEnable[k])
2382 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2383 + v->DPPPerPlane[k];
2387 ReorderBytes = v->NumberOfChannels * dml_max3(
2388 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2389 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2390 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2392 v->UrgentExtraLatency = CalculateExtraLatency(
2393 v->RoundTripPingLatencyCycles,
2397 v->PixelChunkSizeInKByte,
2398 v->TotalDCCActiveDPP,
2403 v->NumberOfActivePlanes,
2405 v->dpte_group_bytes,
2406 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2407 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2408 v->HostVMMinPageSize,
2409 v->HostVMMaxNonCachedPageTableLevels);
2411 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2413 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2414 if (v->BlendingAndTiming[k] == k) {
2415 if (v->WritebackEnable[k] == true) {
2416 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2417 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2418 v->WritebackHRatio[k],
2419 v->WritebackVRatio[k],
2420 v->WritebackVTaps[k],
2421 v->WritebackDestinationWidth[k],
2422 v->WritebackDestinationHeight[k],
2423 v->WritebackSourceHeight[k],
2424 v->HTotal[k]) / v->DISPCLK;
2426 v->WritebackDelay[v->VoltageLevel][k] = 0;
2427 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2428 if (v->BlendingAndTiming[j] == k
2429 && v->WritebackEnable[j] == true) {
2430 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2431 v->WritebackLatency + CalculateWriteBackDelay(
2432 v->WritebackPixelFormat[j],
2433 v->WritebackHRatio[j],
2434 v->WritebackVRatio[j],
2435 v->WritebackVTaps[j],
2436 v->WritebackDestinationWidth[j],
2437 v->WritebackDestinationHeight[j],
2438 v->WritebackSourceHeight[j],
2439 v->HTotal[k]) / v->DISPCLK);
2445 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2446 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2447 if (v->BlendingAndTiming[k] == j)
2448 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2450 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2451 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2454 v->MaximumMaxVStartupLines = 0;
2455 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2456 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2458 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2459 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2461 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2463 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2466 v->FractionOfUrgentBandwidth = 0.0;
2467 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2469 v->VStartupLines = 13;
2472 MaxTotalRDBandwidth = 0;
2473 MaxTotalRDBandwidthNoUrgentBurst = 0;
2474 DestinationLineTimesForPrefetchLessThan2 = false;
2475 VRatioPrefetchMoreThan4 = false;
2476 TWait = CalculateTWait(
2478 v->FinalDRAMClockChangeLatency,
2480 v->SREnterPlusExitTime);
2482 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2483 Pipe myPipe = { 0 };
2485 myPipe.DPPCLK = v->DPPCLK[k];
2486 myPipe.DISPCLK = v->DISPCLK;
2487 myPipe.PixelClock = v->PixelClock[k];
2488 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2489 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2490 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2491 myPipe.SourceScan = v->SourceScan[k];
2492 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2493 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2494 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2495 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2496 myPipe.InterlaceEnable = v->Interlace[k];
2497 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2498 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2499 myPipe.HTotal = v->HTotal[k];
2500 myPipe.DCCEnable = v->DCCEnable[k];
2501 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2503 v->ErrorResult[k] = CalculatePrefetchSchedule(
2505 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2506 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2509 v->DPPCLKDelaySubtotal
2510 + v->DPPCLKDelayCNVCFormater,
2512 v->DPPCLKDelaySCLLBOnly,
2513 v->DPPCLKDelayCNVCCursor,
2514 v->DISPCLKDelaySubtotal,
2515 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2517 v->MaxInterDCNTileRepeaters,
2518 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2519 v->MaxVStartupLines[k],
2520 v->GPUVMMaxPageTableLevels,
2523 v->HostVMMaxNonCachedPageTableLevels,
2524 v->HostVMMinPageSize,
2525 v->DynamicMetadataEnable[k],
2526 v->DynamicMetadataVMEnabled,
2527 v->DynamicMetadataLinesBeforeActiveRequired[k],
2528 v->DynamicMetadataTransmittedBytes[k],
2530 v->UrgentExtraLatency,
2532 v->PDEAndMetaPTEBytesFrame[k],
2534 v->PixelPTEBytesPerRow[k],
2535 v->PrefetchSourceLinesY[k],
2537 v->BytePerPixelY[k],
2538 v->VInitPreFillY[k],
2540 v->PrefetchSourceLinesC[k],
2542 v->BytePerPixelC[k],
2543 v->VInitPreFillC[k],
2545 v->swath_width_luma_ub[k],
2546 v->swath_width_chroma_ub[k],
2550 v->ProgressiveToInterlaceUnitInOPP,
2551 &v->DSTXAfterScaler[k],
2552 &v->DSTYAfterScaler[k],
2553 &v->DestinationLinesForPrefetch[k],
2554 &v->PrefetchBandwidth[k],
2555 &v->DestinationLinesToRequestVMInVBlank[k],
2556 &v->DestinationLinesToRequestRowInVBlank[k],
2557 &v->VRatioPrefetchY[k],
2558 &v->VRatioPrefetchC[k],
2559 &v->RequiredPrefetchPixDataBWLuma[k],
2560 &v->RequiredPrefetchPixDataBWChroma[k],
2561 &v->NotEnoughTimeForDynamicMetadata[k],
2563 &v->prefetch_vmrow_bw[k],
2566 &v->VUpdateOffsetPix[k],
2567 &v->VUpdateWidthPix[k],
2568 &v->VReadyOffsetPix[k]);
2569 if (v->BlendingAndTiming[k] == k) {
2570 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2571 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2572 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2573 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2574 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2576 int x = v->BlendingAndTiming[k];
2577 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2578 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2579 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2580 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2581 if (!v->MaxVStartupLines[x])
2582 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2583 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2587 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2588 v->NotEnoughUrgentLatencyHidingPre = false;
2590 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2591 v->cursor_bw[k] = v->NumberOfCursors[k]
2592 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2594 / (v->HTotal[k] / v->PixelClock[k])
2596 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2597 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2599 / (v->HTotal[k] / v->PixelClock[k])
2600 * v->VRatioPrefetchY[k];
2602 CalculateUrgentBurstFactor(
2603 v->swath_width_luma_ub[k],
2604 v->swath_width_chroma_ub[k],
2605 v->DETBufferSizeInKByte[0],
2608 v->HTotal[k] / v->PixelClock[k],
2610 v->CursorBufferSize,
2611 v->CursorWidth[k][0],
2615 v->BytePerPixelDETY[k],
2616 v->BytePerPixelDETC[k],
2617 v->DETBufferSizeY[k],
2618 v->DETBufferSizeC[k],
2619 &v->UrgentBurstFactorCursor[k],
2620 &v->UrgentBurstFactorLuma[k],
2621 &v->UrgentBurstFactorChroma[k],
2622 &v->NoUrgentLatencyHiding[k]);
2624 CalculateUrgentBurstFactor(
2625 v->swath_width_luma_ub[k],
2626 v->swath_width_chroma_ub[k],
2627 v->DETBufferSizeInKByte[0],
2630 v->HTotal[k] / v->PixelClock[k],
2632 v->CursorBufferSize,
2633 v->CursorWidth[k][0],
2635 v->VRatioPrefetchY[k],
2636 v->VRatioPrefetchC[k],
2637 v->BytePerPixelDETY[k],
2638 v->BytePerPixelDETC[k],
2639 v->DETBufferSizeY[k],
2640 v->DETBufferSizeC[k],
2641 &v->UrgentBurstFactorCursorPre[k],
2642 &v->UrgentBurstFactorLumaPre[k],
2643 &v->UrgentBurstFactorChromaPre[k],
2644 &v->NoUrgentLatencyHidingPre[k]);
2646 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2647 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2648 v->ReadBandwidthPlaneLuma[k] *
2649 v->UrgentBurstFactorLuma[k] +
2650 v->ReadBandwidthPlaneChroma[k] *
2651 v->UrgentBurstFactorChroma[k] +
2653 v->UrgentBurstFactorCursor[k] +
2654 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2655 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2656 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2657 v->UrgentBurstFactorCursorPre[k]);
2659 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2660 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2661 v->ReadBandwidthPlaneLuma[k] +
2662 v->ReadBandwidthPlaneChroma[k] +
2664 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2665 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2667 if (v->DestinationLinesForPrefetch[k] < 2)
2668 DestinationLineTimesForPrefetchLessThan2 = true;
2669 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2670 VRatioPrefetchMoreThan4 = true;
2671 if (v->NoUrgentLatencyHiding[k] == true)
2672 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2674 if (v->NoUrgentLatencyHidingPre[k] == true)
2675 v->NotEnoughUrgentLatencyHidingPre = true;
2677 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2680 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2681 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2682 && !DestinationLineTimesForPrefetchLessThan2)
2683 v->PrefetchModeSupported = true;
2685 v->PrefetchModeSupported = false;
2686 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2687 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2688 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2689 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2692 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2693 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2694 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2695 v->BandwidthAvailableForImmediateFlip =
2696 v->BandwidthAvailableForImmediateFlip
2698 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2699 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2700 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2701 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2702 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2703 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2706 v->TotImmediateFlipBytes = 0;
2707 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2708 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2710 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2711 CalculateFlipSchedule(
2713 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2714 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2715 v->UrgentExtraLatency,
2717 v->GPUVMMaxPageTableLevels,
2719 v->HostVMMaxNonCachedPageTableLevels,
2721 v->HostVMMinPageSize,
2722 v->PDEAndMetaPTEBytesFrame[k],
2724 v->PixelPTEBytesPerRow[k],
2725 v->BandwidthAvailableForImmediateFlip,
2726 v->TotImmediateFlipBytes,
2727 v->SourcePixelFormat[k],
2728 v->HTotal[k] / v->PixelClock[k],
2733 v->dpte_row_height[k],
2734 v->meta_row_height[k],
2735 v->dpte_row_height_chroma[k],
2736 v->meta_row_height_chroma[k],
2737 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2738 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2739 &v->final_flip_bw[k],
2740 &v->ImmediateFlipSupportedForPipe[k]);
2742 v->total_dcn_read_bw_with_flip = 0.0;
2743 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2744 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2745 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2746 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2747 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2748 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2749 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2750 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2751 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2752 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2753 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2754 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2755 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2756 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2757 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2758 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2759 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2762 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2764 v->ImmediateFlipSupported = true;
2765 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2766 v->ImmediateFlipSupported = false;
2767 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2769 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2770 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2771 v->ImmediateFlipSupported = false;
2775 v->ImmediateFlipSupported = false;
2778 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2779 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2780 v->PrefetchModeSupported = false;
2781 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2785 v->VStartupLines = v->VStartupLines + 1;
2786 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2787 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2788 v->ImmediateFlipSupported)) ? true : false;
2789 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2790 ASSERT(v->PrefetchModeSupported);
2792 //Watermarks and NB P-State/DRAM Clock Change Support
2794 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2795 CalculateWatermarksAndDRAMSpeedChangeSupport(
2798 v->NumberOfActivePlanes,
2799 v->MaxLineBufferLines,
2801 v->DPPOutputBufferPixels,
2802 v->DETBufferSizeInKByte[0],
2803 v->WritebackInterfaceBufferSize,
2807 v->dpte_group_bytes,
2810 v->UrgentExtraLatency,
2811 v->WritebackLatency,
2812 v->WritebackChunkSize,
2814 v->FinalDRAMClockChangeLatency,
2816 v->SREnterPlusExitTime,
2836 v->BlendingAndTiming,
2837 v->BytePerPixelDETY,
2838 v->BytePerPixelDETC,
2842 v->WritebackPixelFormat,
2843 v->WritebackDestinationWidth,
2844 v->WritebackDestinationHeight,
2845 v->WritebackSourceHeight,
2846 &DRAMClockChangeSupport,
2847 &v->UrgentWatermark,
2848 &v->WritebackUrgentWatermark,
2849 &v->DRAMClockChangeWatermark,
2850 &v->WritebackDRAMClockChangeWatermark,
2851 &v->StutterExitWatermark,
2852 &v->StutterEnterPlusExitWatermark,
2853 &v->MinActiveDRAMClockChangeLatencySupported);
2855 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2856 if (v->WritebackEnable[k] == true) {
2857 if (v->BlendingAndTiming[k] == k) {
2858 v->ThisVStartup = v->VStartup[k];
2860 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2861 if (v->BlendingAndTiming[k] == j) {
2862 v->ThisVStartup = v->VStartup[j];
2866 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2867 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2869 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2876 //Display Pipeline Delivery Time in Prefetch, Groups
2877 CalculatePixelDeliveryTimes(
2878 v->NumberOfActivePlanes,
2883 v->swath_width_luma_ub,
2884 v->swath_width_chroma_ub,
2889 v->PSCL_THROUGHPUT_LUMA,
2890 v->PSCL_THROUGHPUT_CHROMA,
2897 v->BlockWidth256BytesY,
2898 v->BlockHeight256BytesY,
2899 v->BlockWidth256BytesC,
2900 v->BlockHeight256BytesC,
2901 v->DisplayPipeLineDeliveryTimeLuma,
2902 v->DisplayPipeLineDeliveryTimeChroma,
2903 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2904 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2905 v->DisplayPipeRequestDeliveryTimeLuma,
2906 v->DisplayPipeRequestDeliveryTimeChroma,
2907 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2908 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2909 v->CursorRequestDeliveryTime,
2910 v->CursorRequestDeliveryTimePrefetch);
2912 CalculateMetaAndPTETimes(
2913 v->NumberOfActivePlanes,
2916 v->MinMetaChunkSizeBytes,
2920 v->DestinationLinesToRequestRowInVBlank,
2921 v->DestinationLinesToRequestRowInImmediateFlip,
2928 v->dpte_row_height_chroma,
2930 v->meta_row_width_chroma,
2932 v->meta_row_height_chroma,
2934 v->meta_req_width_chroma,
2936 v->meta_req_height_chroma,
2937 v->dpte_group_bytes,
2940 v->PixelPTEReqWidthY,
2941 v->PixelPTEReqHeightY,
2942 v->PixelPTEReqWidthC,
2943 v->PixelPTEReqHeightC,
2944 v->dpte_row_width_luma_ub,
2945 v->dpte_row_width_chroma_ub,
2946 v->DST_Y_PER_PTE_ROW_NOM_L,
2947 v->DST_Y_PER_PTE_ROW_NOM_C,
2948 v->DST_Y_PER_META_ROW_NOM_L,
2949 v->DST_Y_PER_META_ROW_NOM_C,
2950 v->TimePerMetaChunkNominal,
2951 v->TimePerChromaMetaChunkNominal,
2952 v->TimePerMetaChunkVBlank,
2953 v->TimePerChromaMetaChunkVBlank,
2954 v->TimePerMetaChunkFlip,
2955 v->TimePerChromaMetaChunkFlip,
2956 v->time_per_pte_group_nom_luma,
2957 v->time_per_pte_group_vblank_luma,
2958 v->time_per_pte_group_flip_luma,
2959 v->time_per_pte_group_nom_chroma,
2960 v->time_per_pte_group_vblank_chroma,
2961 v->time_per_pte_group_flip_chroma);
2963 CalculateVMGroupAndRequestTimes(
2964 v->NumberOfActivePlanes,
2966 v->GPUVMMaxPageTableLevels,
2969 v->DestinationLinesToRequestVMInVBlank,
2970 v->DestinationLinesToRequestVMInImmediateFlip,
2973 v->dpte_row_width_luma_ub,
2974 v->dpte_row_width_chroma_ub,
2976 v->dpde0_bytes_per_frame_ub_l,
2977 v->dpde0_bytes_per_frame_ub_c,
2978 v->meta_pte_bytes_per_frame_ub_l,
2979 v->meta_pte_bytes_per_frame_ub_c,
2980 v->TimePerVMGroupVBlank,
2981 v->TimePerVMGroupFlip,
2982 v->TimePerVMRequestVBlank,
2983 v->TimePerVMRequestFlip);
2987 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2988 if (PrefetchMode == 0) {
2989 v->AllowDRAMClockChangeDuringVBlank[k] = true;
2990 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2991 v->MinTTUVBlank[k] = dml_max(
2992 v->DRAMClockChangeWatermark,
2994 v->StutterEnterPlusExitWatermark,
2995 v->UrgentWatermark));
2996 } else if (PrefetchMode == 1) {
2997 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2998 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2999 v->MinTTUVBlank[k] = dml_max(
3000 v->StutterEnterPlusExitWatermark,
3001 v->UrgentWatermark);
3003 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3004 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3005 v->MinTTUVBlank[k] = v->UrgentWatermark;
3007 if (!v->DynamicMetadataEnable[k])
3008 v->MinTTUVBlank[k] = v->TCalc
3009 + v->MinTTUVBlank[k];
3012 // DCC Configuration
3014 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3015 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3016 v->SourcePixelFormat[k],
3017 v->SurfaceWidthY[k],
3018 v->SurfaceWidthC[k],
3019 v->SurfaceHeightY[k],
3020 v->SurfaceHeightC[k],
3021 v->DETBufferSizeInKByte[0] * 1024,
3022 v->BlockHeight256BytesY[k],
3023 v->BlockHeight256BytesC[k],
3024 v->SurfaceTiling[k],
3025 v->BytePerPixelY[k],
3026 v->BytePerPixelC[k],
3027 v->BytePerPixelDETY[k],
3028 v->BytePerPixelDETC[k],
3030 &v->DCCYMaxUncompressedBlock[k],
3031 &v->DCCCMaxUncompressedBlock[k],
3032 &v->DCCYMaxCompressedBlock[k],
3033 &v->DCCCMaxCompressedBlock[k],
3034 &v->DCCYIndependentBlock[k],
3035 &v->DCCCIndependentBlock[k]);
3039 //Maximum Bandwidth Used
3040 double TotalWRBandwidth = 0;
3041 double MaxPerPlaneVActiveWRBandwidth = 0;
3042 double WRBandwidth = 0;
3043 double MaxUsedBW = 0;
3044 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3045 if (v->WritebackEnable[k] == true
3046 && v->WritebackPixelFormat[k] == dm_444_32) {
3047 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3048 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3049 } else if (v->WritebackEnable[k] == true) {
3050 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3051 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3053 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3054 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3057 v->TotalDataReadBandwidth = 0;
3058 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3059 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
3060 + v->ReadBandwidthPlaneLuma[k]
3061 + v->ReadBandwidthPlaneChroma[k];
3065 double MaxPerPlaneVActiveRDBandwidth = 0;
3066 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3067 MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth,
3068 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
3073 MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth;
3077 v->VStartupMargin = 0;
3078 v->FirstMainPlane = true;
3079 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3080 if (v->BlendingAndTiming[k] == k) {
3081 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
3083 if (v->FirstMainPlane == true) {
3084 v->VStartupMargin = margin;
3085 v->FirstMainPlane = false;
3087 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
3092 // Stutter Efficiency
3093 CalculateStutterEfficiency(
3094 v->NumberOfActivePlanes,
3095 v->ROBBufferSizeInKByte,
3096 v->TotalDataReadBandwidth,
3100 v->SynchronizedVBlank,
3104 v->BytePerPixelDETY,
3115 v->BlockHeight256BytesY,
3116 v->BlockWidth256BytesY,
3117 v->BlockHeight256BytesC,
3118 v->BlockWidth256BytesC,
3119 v->DCCYMaxUncompressedBlock,
3120 v->DCCCMaxUncompressedBlock,
3124 v->ReadBandwidthPlaneLuma,
3125 v->ReadBandwidthPlaneChroma,
3128 &v->StutterEfficiencyNotIncludingVBlank,
3129 &v->StutterEfficiency,
3133 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3135 // Display Pipe Configuration
3136 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3137 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3138 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3139 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3140 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3141 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3142 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3143 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3144 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3145 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3146 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3147 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3148 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3149 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3150 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3151 bool dummysinglestring = 0;
3154 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3156 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3157 mode_lib->vba.SourcePixelFormat[k],
3158 mode_lib->vba.SurfaceTiling[k],
3163 &Read256BytesBlockHeightY[k],
3164 &Read256BytesBlockHeightC[k],
3165 &Read256BytesBlockWidthY[k],
3166 &Read256BytesBlockWidthC[k]);
3168 CalculateSwathAndDETConfiguration(
3170 mode_lib->vba.NumberOfActivePlanes,
3171 mode_lib->vba.DETBufferSizeInKByte[0],
3174 mode_lib->vba.SourceScan,
3175 mode_lib->vba.SourcePixelFormat,
3176 mode_lib->vba.SurfaceTiling,
3177 mode_lib->vba.ViewportWidth,
3178 mode_lib->vba.ViewportHeight,
3179 mode_lib->vba.SurfaceWidthY,
3180 mode_lib->vba.SurfaceWidthC,
3181 mode_lib->vba.SurfaceHeightY,
3182 mode_lib->vba.SurfaceHeightC,
3183 Read256BytesBlockHeightY,
3184 Read256BytesBlockHeightC,
3185 Read256BytesBlockWidthY,
3186 Read256BytesBlockWidthC,
3187 mode_lib->vba.ODMCombineEnabled,
3188 mode_lib->vba.BlendingAndTiming,
3193 mode_lib->vba.HActive,
3194 mode_lib->vba.HRatio,
3195 mode_lib->vba.HRatioChroma,
3196 mode_lib->vba.DPPPerPlane,
3201 mode_lib->vba.SwathHeightY,
3202 mode_lib->vba.SwathHeightC,
3203 mode_lib->vba.DETBufferSizeY,
3204 mode_lib->vba.DETBufferSizeC,
3206 &dummysinglestring);
3209 void dml30_CalculateBytePerPixelAnd256BBlockSizes(
3210 enum source_format_class SourcePixelFormat,
3211 enum dm_swizzle_mode SurfaceTiling,
3212 unsigned int *BytePerPixelY,
3213 unsigned int *BytePerPixelC,
3214 double *BytePerPixelDETY,
3215 double *BytePerPixelDETC,
3216 unsigned int *BlockHeight256BytesY,
3217 unsigned int *BlockHeight256BytesC,
3218 unsigned int *BlockWidth256BytesY,
3219 unsigned int *BlockWidth256BytesC)
3221 if (SourcePixelFormat == dm_444_64) {
3222 *BytePerPixelDETY = 8;
3223 *BytePerPixelDETC = 0;
3226 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3227 *BytePerPixelDETY = 4;
3228 *BytePerPixelDETC = 0;
3231 } else if (SourcePixelFormat == dm_444_16) {
3232 *BytePerPixelDETY = 2;
3233 *BytePerPixelDETC = 0;
3236 } else if (SourcePixelFormat == dm_444_8) {
3237 *BytePerPixelDETY = 1;
3238 *BytePerPixelDETC = 0;
3241 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3242 *BytePerPixelDETY = 4;
3243 *BytePerPixelDETC = 1;
3246 } else if (SourcePixelFormat == dm_420_8) {
3247 *BytePerPixelDETY = 1;
3248 *BytePerPixelDETC = 2;
3251 } else if (SourcePixelFormat == dm_420_12) {
3252 *BytePerPixelDETY = 2;
3253 *BytePerPixelDETC = 4;
3257 *BytePerPixelDETY = 4.0 / 3;
3258 *BytePerPixelDETC = 8.0 / 3;
3263 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3264 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3265 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3266 || SourcePixelFormat == dm_rgbe)) {
3267 if (SurfaceTiling == dm_sw_linear) {
3268 *BlockHeight256BytesY = 1;
3269 } else if (SourcePixelFormat == dm_444_64) {
3270 *BlockHeight256BytesY = 4;
3271 } else if (SourcePixelFormat == dm_444_8) {
3272 *BlockHeight256BytesY = 16;
3274 *BlockHeight256BytesY = 8;
3276 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3277 *BlockHeight256BytesC = 0;
3278 *BlockWidth256BytesC = 0;
3280 if (SurfaceTiling == dm_sw_linear) {
3281 *BlockHeight256BytesY = 1;
3282 *BlockHeight256BytesC = 1;
3283 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3284 *BlockHeight256BytesY = 8;
3285 *BlockHeight256BytesC = 16;
3286 } else if (SourcePixelFormat == dm_420_8) {
3287 *BlockHeight256BytesY = 16;
3288 *BlockHeight256BytesC = 8;
3290 *BlockHeight256BytesY = 8;
3291 *BlockHeight256BytesC = 8;
3293 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3294 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3298 static double CalculateTWait(
3299 unsigned int PrefetchMode,
3300 double DRAMClockChangeLatency,
3301 double UrgentLatency,
3302 double SREnterPlusExitTime)
3304 if (PrefetchMode == 0) {
3305 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3306 dml_max(SREnterPlusExitTime, UrgentLatency));
3307 } else if (PrefetchMode == 1) {
3308 return dml_max(SREnterPlusExitTime, UrgentLatency);
3310 return UrgentLatency;
3314 double dml30_CalculateWriteBackDISPCLK(
3315 enum source_format_class WritebackPixelFormat,
3317 double WritebackHRatio,
3318 double WritebackVRatio,
3319 unsigned int WritebackHTaps,
3320 unsigned int WritebackVTaps,
3321 long WritebackSourceWidth,
3322 long WritebackDestinationWidth,
3323 unsigned int HTotal,
3324 unsigned int WritebackLineBufferSize)
3326 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3328 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3329 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3330 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3331 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3334 static double CalculateWriteBackDelay(
3335 enum source_format_class WritebackPixelFormat,
3336 double WritebackHRatio,
3337 double WritebackVRatio,
3338 unsigned int WritebackVTaps,
3339 long WritebackDestinationWidth,
3340 long WritebackDestinationHeight,
3341 long WritebackSourceHeight,
3342 unsigned int HTotal)
3344 double CalculateWriteBackDelay = 0;
3345 double Line_length = 0;
3346 double Output_lines_last_notclamped = 0;
3347 double WritebackVInit = 0;
3349 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3350 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3351 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3352 if (Output_lines_last_notclamped < 0) {
3353 CalculateWriteBackDelay = 0;
3355 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3357 return CalculateWriteBackDelay;
3361 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3362 double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
3363 long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3364 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3366 double TotalRepeaterDelayTime = 0;
3367 double VUpdateWidthPix = 0;
3368 double VReadyOffsetPix = 0;
3369 double VUpdateOffsetPix = 0;
3370 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3371 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3372 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3373 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3374 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3375 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3376 *Tdmec = HTotal / PixelClock;
3377 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3378 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3380 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3382 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3383 *Tdmsks = *Tdmsks / 2;
3387 static void CalculateRowBandwidth(
3389 enum source_format_class SourcePixelFormat,
3391 double VRatioChroma,
3394 unsigned int MetaRowByteLuma,
3395 unsigned int MetaRowByteChroma,
3396 unsigned int meta_row_height_luma,
3397 unsigned int meta_row_height_chroma,
3398 unsigned int PixelPTEBytesPerRowLuma,
3399 unsigned int PixelPTEBytesPerRowChroma,
3400 unsigned int dpte_row_height_luma,
3401 unsigned int dpte_row_height_chroma,
3402 double *meta_row_bw,
3403 double *dpte_row_bw)
3405 if (DCCEnable != true) {
3407 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3408 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3409 + VRatioChroma * MetaRowByteChroma
3410 / (meta_row_height_chroma * LineTime);
3412 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3415 if (GPUVMEnable != true) {
3417 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3418 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3419 + VRatioChroma * PixelPTEBytesPerRowChroma
3420 / (dpte_row_height_chroma * LineTime);
3422 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3426 static void CalculateFlipSchedule(
3427 struct display_mode_lib *mode_lib,
3428 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3429 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3430 double UrgentExtraLatency,
3431 double UrgentLatency,
3432 unsigned int GPUVMMaxPageTableLevels,
3434 unsigned int HostVMMaxNonCachedPageTableLevels,
3436 double HostVMMinPageSize,
3437 double PDEAndMetaPTEBytesPerFrame,
3438 double MetaRowBytes,
3439 double DPTEBytesPerRow,
3440 double BandwidthAvailableForImmediateFlip,
3441 unsigned int TotImmediateFlipBytes,
3442 enum source_format_class SourcePixelFormat,
3445 double VRatioChroma,
3448 unsigned int dpte_row_height,
3449 unsigned int meta_row_height,
3450 unsigned int dpte_row_height_chroma,
3451 unsigned int meta_row_height_chroma,
3452 double *DestinationLinesToRequestVMInImmediateFlip,
3453 double *DestinationLinesToRequestRowInImmediateFlip,
3454 double *final_flip_bw,
3455 bool *ImmediateFlipSupportedForPipe)
3457 double min_row_time = 0.0;
3458 unsigned int HostVMDynamicLevelsTrips = 0;
3459 double TimeForFetchingMetaPTEImmediateFlip = 0;
3460 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3461 double ImmediateFlipBW = 0;
3462 double HostVMInefficiencyFactor = 0;
3464 if (GPUVMEnable == true && HostVMEnable == true) {
3465 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3466 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3468 HostVMInefficiencyFactor = 1;
3469 HostVMDynamicLevelsTrips = 0;
3472 if (GPUVMEnable == true || DCCEnable == true) {
3473 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3476 if (GPUVMEnable == true) {
3477 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3478 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3480 TimeForFetchingMetaPTEImmediateFlip = 0;
3483 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3484 if ((GPUVMEnable == true || DCCEnable == true)) {
3485 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3486 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3488 TimeForFetchingRowInVBlankImmediateFlip = 0;
3491 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3493 if (GPUVMEnable == true) {
3494 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3495 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3496 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3497 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3503 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3504 if (GPUVMEnable == true && DCCEnable != true) {
3505 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3506 } else if (GPUVMEnable != true && DCCEnable == true) {
3507 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3509 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3510 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3513 if (GPUVMEnable == true && DCCEnable != true) {
3514 min_row_time = dpte_row_height * LineTime / VRatio;
3515 } else if (GPUVMEnable != true && DCCEnable == true) {
3516 min_row_time = meta_row_height * LineTime / VRatio;
3518 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3522 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3523 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3524 *ImmediateFlipSupportedForPipe = false;
3526 *ImmediateFlipSupportedForPipe = true;
3530 static double TruncToValidBPP(
3538 enum output_encoder_class Output,
3539 enum output_format_class Format,
3540 unsigned int DSCInputBitPerComponent,
3544 enum odm_combine_mode ODMCombine)
3546 double MaxLinkBPP = 0;
3548 double MaxDSCBPP = 0;
3553 if (Format == dm_420) {
3558 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3559 } else if (Format == dm_444) {
3564 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3570 if (Format == dm_n422) {
3572 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3576 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3580 if (DSCEnable && Output == dm_dp) {
3581 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3583 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3586 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3588 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3593 if (DesiredBPP == 0) {
3595 if (MaxLinkBPP < MinDSCBPP) {
3597 } else if (MaxLinkBPP >= MaxDSCBPP) {
3600 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3603 if (MaxLinkBPP >= NonDSCBPP2) {
3605 } else if (MaxLinkBPP >= NonDSCBPP1) {
3607 } else if (MaxLinkBPP >= NonDSCBPP0) {
3614 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3615 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3624 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3626 struct vba_vars_st *v = &mode_lib->vba;
3627 int MinPrefetchMode, MaxPrefetchMode;
3629 unsigned int j, k, m;
3630 bool EnoughWritebackUnits = true;
3631 bool WritebackModeSupport = true;
3632 bool ViewportExceedsSurface = false;
3633 double MaxTotalVActiveRDBandwidth = 0;
3634 long ReorderingBytes = 0;
3635 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3637 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3639 CalculateMinAndMaxPrefetchMode(
3640 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3641 &MinPrefetchMode, &MaxPrefetchMode);
3643 /*Scale Ratio, taps Support Check*/
3645 v->ScaleRatioAndTapsSupport = true;
3646 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3647 if (v->ScalerEnabled[k] == false
3648 && ((v->SourcePixelFormat[k] != dm_444_64
3649 && v->SourcePixelFormat[k] != dm_444_32
3650 && v->SourcePixelFormat[k] != dm_444_16
3651 && v->SourcePixelFormat[k] != dm_mono_16
3652 && v->SourcePixelFormat[k] != dm_mono_8
3653 && v->SourcePixelFormat[k] != dm_rgbe
3654 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3655 || v->HRatio[k] != 1.0
3656 || v->htaps[k] != 1.0
3657 || v->VRatio[k] != 1.0
3658 || v->vtaps[k] != 1.0)) {
3659 v->ScaleRatioAndTapsSupport = false;
3660 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3661 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3662 || (v->htaps[k] > 1.0
3663 && (v->htaps[k] % 2) == 1)
3664 || v->HRatio[k] > v->MaxHSCLRatio
3665 || v->VRatio[k] > v->MaxVSCLRatio
3666 || v->HRatio[k] > v->htaps[k]
3667 || v->VRatio[k] > v->vtaps[k]
3668 || (v->SourcePixelFormat[k] != dm_444_64
3669 && v->SourcePixelFormat[k] != dm_444_32
3670 && v->SourcePixelFormat[k] != dm_444_16
3671 && v->SourcePixelFormat[k] != dm_mono_16
3672 && v->SourcePixelFormat[k] != dm_mono_8
3673 && v->SourcePixelFormat[k] != dm_rgbe
3674 && (v->VTAPsChroma[k] < 1
3675 || v->VTAPsChroma[k] > 8
3676 || v->HTAPsChroma[k] < 1
3677 || v->HTAPsChroma[k] > 8
3678 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3679 || v->HRatioChroma[k] > v->MaxHSCLRatio
3680 || v->VRatioChroma[k] > v->MaxVSCLRatio
3681 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3682 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3683 v->ScaleRatioAndTapsSupport = false;
3686 /*Source Format, Pixel Format and Scan Support Check*/
3688 v->SourceFormatPixelAndScanSupport = true;
3689 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3690 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3691 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3692 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3693 v->SourceFormatPixelAndScanSupport = false;
3696 /*Bandwidth Support Check*/
3698 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3699 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3700 v->SourcePixelFormat[k],
3701 v->SurfaceTiling[k],
3702 &v->BytePerPixelY[k],
3703 &v->BytePerPixelC[k],
3704 &v->BytePerPixelInDETY[k],
3705 &v->BytePerPixelInDETC[k],
3706 &v->Read256BlockHeightY[k],
3707 &v->Read256BlockHeightC[k],
3708 &v->Read256BlockWidthY[k],
3709 &v->Read256BlockWidthC[k]);
3711 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3712 if (v->SourceScan[k] != dm_vert) {
3713 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3714 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3716 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3717 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3720 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3721 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3722 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3724 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3725 if (v->WritebackEnable[k] == true
3726 && v->WritebackPixelFormat[k] == dm_444_64) {
3727 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3728 * v->WritebackDestinationHeight[k]
3729 / (v->WritebackSourceHeight[k]
3731 / v->PixelClock[k]) * 8.0;
3732 } else if (v->WritebackEnable[k] == true) {
3733 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3734 * v->WritebackDestinationHeight[k]
3735 / (v->WritebackSourceHeight[k]
3737 / v->PixelClock[k]) * 4.0;
3739 v->WriteBandwidth[k] = 0.0;
3743 /*Writeback Latency support check*/
3745 v->WritebackLatencySupport = true;
3746 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3747 if (v->WritebackEnable[k] == true) {
3748 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3749 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3750 if (v->WriteBandwidth[k]
3751 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3752 / v->WritebackLatency) {
3753 v->WritebackLatencySupport = false;
3756 if (v->WriteBandwidth[k]
3757 > v->WritebackInterfaceBufferSize * 1024
3758 / v->WritebackLatency) {
3759 v->WritebackLatencySupport = false;
3765 /*Writeback Mode Support Check*/
3767 v->TotalNumberOfActiveWriteback = 0;
3768 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3769 if (v->WritebackEnable[k] == true) {
3770 v->TotalNumberOfActiveWriteback =
3771 v->TotalNumberOfActiveWriteback + 1;
3775 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3776 EnoughWritebackUnits = false;
3778 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3779 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3780 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3782 WritebackModeSupport = false;
3784 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3785 WritebackModeSupport = false;
3788 /*Writeback Scale Ratio and Taps Support Check*/
3790 v->WritebackScaleRatioAndTapsSupport = true;
3791 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3792 if (v->WritebackEnable[k] == true) {
3793 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3794 || v->WritebackVRatio[k]
3795 > v->WritebackMaxVSCLRatio
3796 || v->WritebackHRatio[k]
3797 < v->WritebackMinHSCLRatio
3798 || v->WritebackVRatio[k]
3799 < v->WritebackMinVSCLRatio
3800 || v->WritebackHTaps[k]
3801 > v->WritebackMaxHSCLTaps
3802 || v->WritebackVTaps[k]
3803 > v->WritebackMaxVSCLTaps
3804 || v->WritebackHRatio[k]
3805 > v->WritebackHTaps[k]
3806 || v->WritebackVRatio[k]
3807 > v->WritebackVTaps[k]
3808 || (v->WritebackHTaps[k] > 2.0
3809 && ((v->WritebackHTaps[k] % 2)
3811 v->WritebackScaleRatioAndTapsSupport = false;
3813 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3814 v->WritebackScaleRatioAndTapsSupport = false;
3818 /*Maximum DISPCLK/DPPCLK Support check*/
3820 v->WritebackRequiredDISPCLK = 0.0;
3821 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3822 if (v->WritebackEnable[k] == true) {
3823 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3824 dml30_CalculateWriteBackDISPCLK(
3825 v->WritebackPixelFormat[k],
3827 v->WritebackHRatio[k],
3828 v->WritebackVRatio[k],
3829 v->WritebackHTaps[k],
3830 v->WritebackVTaps[k],
3831 v->WritebackSourceWidth[k],
3832 v->WritebackDestinationWidth[k],
3834 v->WritebackLineBufferSize));
3837 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3838 if (v->HRatio[k] > 1.0) {
3839 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3841 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3843 if (v->BytePerPixelC[k] == 0.0) {
3844 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3845 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3846 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3847 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3848 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3851 if (v->HRatioChroma[k] > 1.0) {
3852 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3853 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3855 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3857 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3858 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3859 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3860 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3862 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3863 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3864 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3868 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3869 int MaximumSwathWidthSupportLuma = 0;
3870 int MaximumSwathWidthSupportChroma = 0;
3872 if (v->SurfaceTiling[k] == dm_sw_linear) {
3873 MaximumSwathWidthSupportLuma = 8192.0;
3874 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3875 MaximumSwathWidthSupportLuma = 2880.0;
3877 MaximumSwathWidthSupportLuma = 5760.0;
3880 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3881 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3883 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3885 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3886 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3887 if (v->BytePerPixelC[k] == 0.0) {
3888 v->MaximumSwathWidthInLineBufferChroma = 0;
3890 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3891 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3893 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3894 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3897 CalculateSwathAndDETConfiguration(
3899 v->NumberOfActivePlanes,
3900 v->DETBufferSizeInKByte[0],
3901 v->MaximumSwathWidthLuma,
3902 v->MaximumSwathWidthChroma,
3904 v->SourcePixelFormat,
3912 v->Read256BlockHeightY,
3913 v->Read256BlockHeightC,
3914 v->Read256BlockWidthY,
3915 v->Read256BlockWidthC,
3916 v->odm_combine_dummy,
3917 v->BlendingAndTiming,
3920 v->BytePerPixelInDETY,
3921 v->BytePerPixelInDETC,
3926 v->swath_width_luma_ub,
3927 v->swath_width_chroma_ub,
3934 v->SingleDPPViewportSizeSupportPerPlane,
3935 &v->ViewportSizeSupport[0][0]);
3937 for (i = 0; i < v->soc.num_states; i++) {
3938 for (j = 0; j < 2; j++) {
3939 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3940 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3941 v->RequiredDISPCLK[i][j] = 0.0;
3942 v->DISPCLK_DPPCLK_Support[i][j] = true;
3943 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3944 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3945 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3946 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3947 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3948 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3950 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3951 * (1 + v->DISPCLKRampingMargin / 100.0);
3952 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3953 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3954 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3956 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3957 * (1 + v->DISPCLKRampingMargin / 100.0);
3958 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3959 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3960 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3963 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3964 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3965 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3966 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3967 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3968 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3969 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3970 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3971 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3972 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3973 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3974 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3975 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3977 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3978 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3980 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
3981 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3982 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
3983 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3984 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3986 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3987 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3990 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
3991 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3992 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
3993 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3994 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3996 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3997 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4000 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4001 v->MPCCombine[i][j][k] = false;
4002 v->NoOfDPP[i][j][k] = 4;
4003 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4004 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4005 v->MPCCombine[i][j][k] = false;
4006 v->NoOfDPP[i][j][k] = 2;
4007 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4008 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4009 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
4010 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4011 v->MPCCombine[i][j][k] = false;
4012 v->NoOfDPP[i][j][k] = 1;
4013 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4015 v->MPCCombine[i][j][k] = true;
4016 v->NoOfDPP[i][j][k] = 2;
4017 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4019 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4020 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4021 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4022 v->DISPCLK_DPPCLK_Support[i][j] = false;
4025 v->TotalNumberOfActiveDPP[i][j] = 0;
4026 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4027 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4028 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4029 if (v->NoOfDPP[i][j][k] == 1)
4030 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4032 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
4033 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4034 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4035 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4036 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4037 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4038 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4039 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4040 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4041 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4042 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4045 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4046 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4047 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4048 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4049 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4050 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4053 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4054 v->RequiredDISPCLK[i][j] = 0.0;
4055 v->DISPCLK_DPPCLK_Support[i][j] = true;
4056 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4057 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4058 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4059 v->MPCCombine[i][j][k] = true;
4060 v->NoOfDPP[i][j][k] = 2;
4061 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4063 v->MPCCombine[i][j][k] = false;
4064 v->NoOfDPP[i][j][k] = 1;
4065 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4067 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4068 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4069 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4071 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4073 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4074 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4075 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4076 v->DISPCLK_DPPCLK_Support[i][j] = false;
4079 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4080 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4081 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4084 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4085 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4086 v->DISPCLK_DPPCLK_Support[i][j] = false;
4091 /*Total Available Pipes Support Check*/
4093 for (i = 0; i < v->soc.num_states; i++) {
4094 for (j = 0; j < 2; j++) {
4095 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4096 v->TotalAvailablePipesSupport[i][j] = true;
4098 v->TotalAvailablePipesSupport[i][j] = false;
4102 /*Display IO and DSC Support Check*/
4104 v->NonsupportedDSCInputBPC = false;
4105 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4106 if (!(v->DSCInputBitPerComponent[k] == 12.0
4107 || v->DSCInputBitPerComponent[k] == 10.0
4108 || v->DSCInputBitPerComponent[k] == 8.0)) {
4109 v->NonsupportedDSCInputBPC = true;
4113 /*Number Of DSC Slices*/
4114 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4115 if (v->BlendingAndTiming[k] == k) {
4116 if (v->PixelClockBackEnd[k] > 3200) {
4117 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4118 } else if (v->PixelClockBackEnd[k] > 1360) {
4119 v->NumberOfDSCSlices[k] = 8;
4120 } else if (v->PixelClockBackEnd[k] > 680) {
4121 v->NumberOfDSCSlices[k] = 4;
4122 } else if (v->PixelClockBackEnd[k] > 340) {
4123 v->NumberOfDSCSlices[k] = 2;
4125 v->NumberOfDSCSlices[k] = 1;
4128 v->NumberOfDSCSlices[k] = 0;
4132 for (i = 0; i < v->soc.num_states; i++) {
4133 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4134 v->RequiresDSC[i][k] = false;
4135 v->RequiresFEC[i][k] = false;
4136 if (v->BlendingAndTiming[k] == k) {
4137 if (v->Output[k] == dm_hdmi) {
4138 v->RequiresDSC[i][k] = false;
4139 v->RequiresFEC[i][k] = false;
4140 v->OutputBppPerState[i][k] = TruncToValidBPP(
4141 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4145 v->PixelClockBackEnd[k],
4146 v->ForcedOutputLinkBPP[k],
4150 v->DSCInputBitPerComponent[k],
4151 v->NumberOfDSCSlices[k],
4152 v->AudioSampleRate[k],
4153 v->AudioSampleLayout[k],
4154 v->ODMCombineEnablePerState[i][k]);
4155 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4156 if (v->DSCEnable[k] == true) {
4157 v->RequiresDSC[i][k] = true;
4158 v->LinkDSCEnable = true;
4159 if (v->Output[k] == dm_dp) {
4160 v->RequiresFEC[i][k] = true;
4162 v->RequiresFEC[i][k] = false;
4165 v->RequiresDSC[i][k] = false;
4166 v->LinkDSCEnable = false;
4167 v->RequiresFEC[i][k] = false;
4170 v->Outbpp = BPP_INVALID;
4171 if (v->PHYCLKPerState[i] >= 270.0) {
4172 v->Outbpp = TruncToValidBPP(
4173 (1.0 - v->Downspreading / 100.0) * 2700,
4174 v->OutputLinkDPLanes[k],
4177 v->PixelClockBackEnd[k],
4178 v->ForcedOutputLinkBPP[k],
4182 v->DSCInputBitPerComponent[k],
4183 v->NumberOfDSCSlices[k],
4184 v->AudioSampleRate[k],
4185 v->AudioSampleLayout[k],
4186 v->ODMCombineEnablePerState[i][k]);
4187 v->OutputBppPerState[i][k] = v->Outbpp;
4188 // TODO: Need some other way to handle this nonsense
4189 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4191 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4192 v->Outbpp = TruncToValidBPP(
4193 (1.0 - v->Downspreading / 100.0) * 5400,
4194 v->OutputLinkDPLanes[k],
4197 v->PixelClockBackEnd[k],
4198 v->ForcedOutputLinkBPP[k],
4202 v->DSCInputBitPerComponent[k],
4203 v->NumberOfDSCSlices[k],
4204 v->AudioSampleRate[k],
4205 v->AudioSampleLayout[k],
4206 v->ODMCombineEnablePerState[i][k]);
4207 v->OutputBppPerState[i][k] = v->Outbpp;
4208 // TODO: Need some other way to handle this nonsense
4209 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4211 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4212 v->Outbpp = TruncToValidBPP(
4213 (1.0 - v->Downspreading / 100.0) * 8100,
4214 v->OutputLinkDPLanes[k],
4217 v->PixelClockBackEnd[k],
4218 v->ForcedOutputLinkBPP[k],
4222 v->DSCInputBitPerComponent[k],
4223 v->NumberOfDSCSlices[k],
4224 v->AudioSampleRate[k],
4225 v->AudioSampleLayout[k],
4226 v->ODMCombineEnablePerState[i][k]);
4227 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4228 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4229 v->RequiresDSC[i][k] = true;
4230 v->LinkDSCEnable = true;
4231 if (v->Output[k] == dm_dp) {
4232 v->RequiresFEC[i][k] = true;
4234 v->Outbpp = TruncToValidBPP(
4235 (1.0 - v->Downspreading / 100.0) * 8100,
4236 v->OutputLinkDPLanes[k],
4239 v->PixelClockBackEnd[k],
4240 v->ForcedOutputLinkBPP[k],
4244 v->DSCInputBitPerComponent[k],
4245 v->NumberOfDSCSlices[k],
4246 v->AudioSampleRate[k],
4247 v->AudioSampleLayout[k],
4248 v->ODMCombineEnablePerState[i][k]);
4250 v->OutputBppPerState[i][k] = v->Outbpp;
4251 // TODO: Need some other way to handle this nonsense
4252 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4256 v->OutputBppPerState[i][k] = 0;
4260 for (i = 0; i < v->soc.num_states; i++) {
4261 v->DIOSupport[i] = true;
4262 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4263 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4264 && (v->OutputBppPerState[i][k] == 0
4265 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4266 v->DIOSupport[i] = false;
4271 for (i = 0; i < v->soc.num_states; ++i) {
4272 v->ODMCombine4To1SupportCheckOK[i] = true;
4273 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4274 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4275 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4276 v->ODMCombine4To1SupportCheckOK[i] = false;
4281 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4283 for (i = 0; i < v->soc.num_states; i++) {
4284 v->NotEnoughDSCUnits[i] = false;
4285 v->TotalDSCUnitsRequired = 0.0;
4286 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4287 if (v->RequiresDSC[i][k] == true) {
4288 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4289 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4290 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4291 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4293 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4297 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4298 v->NotEnoughDSCUnits[i] = true;
4301 /*DSC Delay per state*/
4303 for (i = 0; i < v->soc.num_states; i++) {
4304 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4305 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4308 v->BPP = v->OutputBppPerState[i][k];
4310 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4311 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4312 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4313 v->DSCInputBitPerComponent[k],
4315 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4316 v->NumberOfDSCSlices[k],
4318 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4319 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4320 v->DSCDelayPerState[i][k] = 2.0
4321 * dscceComputeDelay(
4322 v->DSCInputBitPerComponent[k],
4324 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4325 v->NumberOfDSCSlices[k] / 2,
4327 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4329 v->DSCDelayPerState[i][k] = 4.0
4330 * (dscceComputeDelay(
4331 v->DSCInputBitPerComponent[k],
4333 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4334 v->NumberOfDSCSlices[k] / 4,
4336 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4338 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4340 v->DSCDelayPerState[i][k] = 0.0;
4343 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4344 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4345 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4346 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4352 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4354 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4355 for (j = 0; j <= 1; ++j) {
4356 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4357 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4358 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4359 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4362 CalculateSwathAndDETConfiguration(
4364 v->NumberOfActivePlanes,
4365 v->DETBufferSizeInKByte[0],
4366 v->MaximumSwathWidthLuma,
4367 v->MaximumSwathWidthChroma,
4369 v->SourcePixelFormat,
4377 v->Read256BlockHeightY,
4378 v->Read256BlockHeightC,
4379 v->Read256BlockWidthY,
4380 v->Read256BlockWidthC,
4381 v->ODMCombineEnableThisState,
4382 v->BlendingAndTiming,
4385 v->BytePerPixelInDETY,
4386 v->BytePerPixelInDETC,
4390 v->NoOfDPPThisState,
4391 v->swath_width_luma_ub_this_state,
4392 v->swath_width_chroma_ub_this_state,
4393 v->SwathWidthYThisState,
4394 v->SwathWidthCThisState,
4395 v->SwathHeightYThisState,
4396 v->SwathHeightCThisState,
4397 v->DETBufferSizeYThisState,
4398 v->DETBufferSizeCThisState,
4400 &v->ViewportSizeSupport[i][j]);
4402 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4403 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4404 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4405 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4406 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4407 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4408 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4409 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4410 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4415 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4416 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4419 for (i = 0; i < v->soc.num_states; i++) {
4420 for (j = 0; j < 2; j++) {
4421 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4422 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4423 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4424 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4425 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4426 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4427 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4428 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4429 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4432 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4433 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4434 if (v->DCCEnable[k] == true) {
4435 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4439 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4440 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4441 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4443 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4444 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4445 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4447 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4448 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4451 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4454 v->Read256BlockHeightC[k],
4455 v->Read256BlockWidthY[k],
4456 v->SourcePixelFormat[k],
4457 v->SurfaceTiling[k],
4458 v->BytePerPixelC[k],
4460 v->SwathWidthCThisState[k],
4461 v->ViewportHeightChroma[k],
4464 v->HostVMMaxNonCachedPageTableLevels,
4465 v->GPUVMMinPageSize,
4466 v->HostVMMinPageSize,
4467 v->PTEBufferSizeInRequestsForChroma,
4470 &v->MacroTileWidthC[k],
4472 &v->DPTEBytesPerRowC,
4473 &v->PTEBufferSizeNotExceededC[i][j][k],
4475 &v->dpte_row_height_chroma[k],
4479 &v->meta_row_height_chroma[k],
4486 &v->dummyinteger11);
4488 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4493 v->ProgressiveToInterlaceUnitInOPP,
4494 v->SwathHeightCThisState[k],
4495 v->ViewportYStartC[k],
4499 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4500 v->PTEBufferSizeInRequestsForChroma = 0;
4501 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4502 v->MetaRowBytesC = 0.0;
4503 v->DPTEBytesPerRowC = 0.0;
4504 v->PrefetchLinesC[i][j][k] = 0.0;
4505 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4507 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4510 v->Read256BlockHeightY[k],
4511 v->Read256BlockWidthY[k],
4512 v->SourcePixelFormat[k],
4513 v->SurfaceTiling[k],
4514 v->BytePerPixelY[k],
4516 v->SwathWidthYThisState[k],
4517 v->ViewportHeight[k],
4520 v->HostVMMaxNonCachedPageTableLevels,
4521 v->GPUVMMinPageSize,
4522 v->HostVMMinPageSize,
4523 v->PTEBufferSizeInRequestsForLuma,
4525 v->DCCMetaPitchY[k],
4526 &v->MacroTileWidthY[k],
4528 &v->DPTEBytesPerRowY,
4529 &v->PTEBufferSizeNotExceededY[i][j][k],
4531 &v->dpte_row_height[k],
4535 &v->meta_row_height[k],
4537 &v->dpte_group_bytes[k],
4543 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4548 v->ProgressiveToInterlaceUnitInOPP,
4549 v->SwathHeightYThisState[k],
4550 v->ViewportYStartY[k],
4553 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4554 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4555 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4557 CalculateRowBandwidth(
4559 v->SourcePixelFormat[k],
4563 v->HTotal[k] / v->PixelClock[k],
4566 v->meta_row_height[k],
4567 v->meta_row_height_chroma[k],
4568 v->DPTEBytesPerRowY,
4569 v->DPTEBytesPerRowC,
4570 v->dpte_row_height[k],
4571 v->dpte_row_height_chroma[k],
4572 &v->meta_row_bandwidth[i][j][k],
4573 &v->dpte_row_bandwidth[i][j][k]);
4575 v->UrgLatency[i] = CalculateUrgentLatency(
4576 v->UrgentLatencyPixelDataOnly,
4577 v->UrgentLatencyPixelMixedWithVMData,
4578 v->UrgentLatencyVMDataOnly,
4579 v->DoUrgentLatencyAdjustment,
4580 v->UrgentLatencyAdjustmentFabricClockComponent,
4581 v->UrgentLatencyAdjustmentFabricClockReference,
4582 v->FabricClockPerState[i]);
4584 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4585 CalculateUrgentBurstFactor(
4586 v->swath_width_luma_ub_this_state[k],
4587 v->swath_width_chroma_ub_this_state[k],
4588 v->DETBufferSizeInKByte[0],
4589 v->SwathHeightYThisState[k],
4590 v->SwathHeightCThisState[k],
4591 v->HTotal[k] / v->PixelClock[k],
4593 v->CursorBufferSize,
4594 v->CursorWidth[k][0],
4598 v->BytePerPixelInDETY[k],
4599 v->BytePerPixelInDETC[k],
4600 v->DETBufferSizeYThisState[k],
4601 v->DETBufferSizeCThisState[k],
4602 &v->UrgentBurstFactorCursor[k],
4603 &v->UrgentBurstFactorLuma[k],
4604 &v->UrgentBurstFactorChroma[k],
4605 &NotUrgentLatencyHiding[k]);
4608 v->NotUrgentLatencyHiding[i][j] = false;
4609 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4610 if (NotUrgentLatencyHiding[k]) {
4611 v->NotUrgentLatencyHiding[i][j] = true;
4615 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4616 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4617 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4618 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4621 v->TotalVActivePixelBandwidth[i][j] = 0;
4622 v->TotalVActiveCursorBandwidth[i][j] = 0;
4623 v->TotalMetaRowBandwidth[i][j] = 0;
4624 v->TotalDPTERowBandwidth[i][j] = 0;
4625 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4626 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4627 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4628 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4629 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4632 CalculateDCFCLKDeepSleep(
4634 v->NumberOfActivePlanes,
4639 v->SwathWidthYThisState,
4640 v->SwathWidthCThisState,
4641 v->NoOfDPPThisState,
4646 v->PSCL_FACTOR_CHROMA,
4647 v->RequiredDPPCLKThisState,
4648 v->ReadBandwidthLuma,
4649 v->ReadBandwidthChroma,
4651 &v->ProjectedDCFCLKDeepSleep[i][j]);
4655 //Calculate Return BW
4657 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4658 for (j = 0; j <= 1; ++j) {
4659 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4660 if (v->BlendingAndTiming[k] == k) {
4661 if (v->WritebackEnable[k] == true) {
4662 v->WritebackDelayTime[k] = v->WritebackLatency
4663 + CalculateWriteBackDelay(
4664 v->WritebackPixelFormat[k],
4665 v->WritebackHRatio[k],
4666 v->WritebackVRatio[k],
4667 v->WritebackVTaps[k],
4668 v->WritebackDestinationWidth[k],
4669 v->WritebackDestinationHeight[k],
4670 v->WritebackSourceHeight[k],
4671 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4673 v->WritebackDelayTime[k] = 0.0;
4675 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4676 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4677 v->WritebackDelayTime[k] = dml_max(
4678 v->WritebackDelayTime[k],
4680 + CalculateWriteBackDelay(
4681 v->WritebackPixelFormat[m],
4682 v->WritebackHRatio[m],
4683 v->WritebackVRatio[m],
4684 v->WritebackVTaps[m],
4685 v->WritebackDestinationWidth[m],
4686 v->WritebackDestinationHeight[m],
4687 v->WritebackSourceHeight[m],
4688 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4693 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4694 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4695 if (v->BlendingAndTiming[k] == m) {
4696 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4700 v->MaxMaxVStartup[i][j] = 0;
4701 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4702 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4703 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4704 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4709 ReorderingBytes = v->NumberOfChannels
4711 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4712 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4713 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4714 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4716 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4717 for (j = 0; j <= 1; ++j) {
4718 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4722 if (v->UseMinimumRequiredDCFCLK == true) {
4725 v->MaxInterDCNTileRepeaters,
4727 v->FinalDRAMClockChangeLatency,
4728 v->SREnterPlusExitTime,
4730 v->RoundTripPingLatencyCycles,
4732 v->PixelChunkSizeInKByte,
4735 v->GPUVMMaxPageTableLevels,
4737 v->NumberOfActivePlanes,
4738 v->HostVMMinPageSize,
4739 v->HostVMMaxNonCachedPageTableLevels,
4740 v->DynamicMetadataVMEnabled,
4741 v->ImmediateFlipRequirement[0],
4742 v->ProgressiveToInterlaceUnitInOPP,
4743 v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
4744 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4745 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4746 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
4749 v->DynamicMetadataTransmittedBytes,
4750 v->DynamicMetadataLinesBeforeActiveRequired,
4756 v->ProjectedDCFCLKDeepSleep,
4758 v->TotalVActivePixelBandwidth,
4759 v->TotalVActiveCursorBandwidth,
4760 v->TotalMetaRowBandwidth,
4761 v->TotalDPTERowBandwidth,
4762 v->TotalNumberOfActiveDPP,
4763 v->TotalNumberOfDCCActiveDPP,
4764 v->dpte_group_bytes,
4767 v->swath_width_luma_ub_all_states,
4768 v->swath_width_chroma_ub_all_states,
4773 v->PDEAndMetaPTEBytesPerFrame,
4776 v->DynamicMetadataEnable,
4777 v->VActivePixelBandwidth,
4778 v->VActiveCursorBandwidth,
4779 v->ReadBandwidthLuma,
4780 v->ReadBandwidthChroma,
4784 if (v->ClampMinDCFCLK) {
4785 /* Clamp calculated values to actual minimum */
4786 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4787 for (j = 0; j <= 1; ++j) {
4788 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4789 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4796 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4797 for (j = 0; j <= 1; ++j) {
4798 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4799 v->ReturnBusWidth * v->DCFCLKState[i][j],
4800 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4801 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4802 if (v->HostVMEnable != true) {
4803 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4806 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4807 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4812 //Re-ordering Buffer Support Check
4814 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4815 for (j = 0; j <= 1; ++j) {
4816 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4817 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4818 v->ROBSupport[i][j] = true;
4820 v->ROBSupport[i][j] = false;
4825 //Vertical Active BW support check
4827 MaxTotalVActiveRDBandwidth = 0;
4828 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4829 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4832 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4833 for (j = 0; j <= 1; ++j) {
4834 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4835 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4836 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4838 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4839 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4841 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4848 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4849 for (j = 0; j <= 1; ++j) {
4850 int NextPrefetchModeState = MinPrefetchMode;
4852 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4854 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4855 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4856 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4857 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4860 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4861 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4862 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4863 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4864 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4865 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4866 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4867 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4868 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4869 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4870 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4873 v->ExtraLatency = CalculateExtraLatency(
4874 v->RoundTripPingLatencyCycles,
4876 v->DCFCLKState[i][j],
4877 v->TotalNumberOfActiveDPP[i][j],
4878 v->PixelChunkSizeInKByte,
4879 v->TotalNumberOfDCCActiveDPP[i][j],
4881 v->ReturnBWPerState[i][j],
4884 v->NumberOfActivePlanes,
4885 v->NoOfDPPThisState,
4886 v->dpte_group_bytes,
4887 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4888 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4889 v->HostVMMinPageSize,
4890 v->HostVMMaxNonCachedPageTableLevels);
4892 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4894 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4895 v->MaxVStartup = v->NextMaxVStartup;
4897 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4899 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4900 Pipe myPipe = { 0 };
4902 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4903 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4904 myPipe.PixelClock = v->PixelClock[k];
4905 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4906 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4907 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4908 myPipe.SourceScan = v->SourceScan[k];
4909 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4910 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4911 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4912 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4913 myPipe.InterlaceEnable = v->Interlace[k];
4914 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4915 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4916 myPipe.HTotal = v->HTotal[k];
4917 myPipe.DCCEnable = v->DCCEnable[k];
4918 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4920 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4922 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4923 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4925 v->DSCDelayPerState[i][k],
4926 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4928 v->DPPCLKDelaySCLLBOnly,
4929 v->DPPCLKDelayCNVCCursor,
4930 v->DISPCLKDelaySubtotal,
4931 v->SwathWidthYThisState[k] / v->HRatio[k],
4933 v->MaxInterDCNTileRepeaters,
4934 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4935 v->MaximumVStartup[i][j][k],
4936 v->GPUVMMaxPageTableLevels,
4939 v->HostVMMaxNonCachedPageTableLevels,
4940 v->HostVMMinPageSize,
4941 v->DynamicMetadataEnable[k],
4942 v->DynamicMetadataVMEnabled,
4943 v->DynamicMetadataLinesBeforeActiveRequired[k],
4944 v->DynamicMetadataTransmittedBytes[k],
4948 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4949 v->MetaRowBytes[i][j][k],
4950 v->DPTEBytesPerRow[i][j][k],
4951 v->PrefetchLinesY[i][j][k],
4952 v->SwathWidthYThisState[k],
4953 v->BytePerPixelY[k],
4956 v->PrefetchLinesC[i][j][k],
4957 v->SwathWidthCThisState[k],
4958 v->BytePerPixelC[k],
4961 v->swath_width_luma_ub_this_state[k],
4962 v->swath_width_chroma_ub_this_state[k],
4963 v->SwathHeightYThisState[k],
4964 v->SwathHeightCThisState[k],
4966 v->ProgressiveToInterlaceUnitInOPP,
4967 &v->DSTXAfterScaler[k],
4968 &v->DSTYAfterScaler[k],
4969 &v->LineTimesForPrefetch[k],
4971 &v->LinesForMetaPTE[k],
4972 &v->LinesForMetaAndDPTERow[k],
4973 &v->VRatioPreY[i][j][k],
4974 &v->VRatioPreC[i][j][k],
4975 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4976 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
4977 &v->NoTimeForDynamicMetadata[i][j][k],
4979 &v->prefetch_vmrow_bw[k],
4982 &v->VUpdateOffsetPix[k],
4983 &v->VUpdateWidthPix[k],
4984 &v->VReadyOffsetPix[k]);
4987 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4988 CalculateUrgentBurstFactor(
4989 v->swath_width_luma_ub_this_state[k],
4990 v->swath_width_chroma_ub_this_state[k],
4991 v->DETBufferSizeInKByte[0],
4992 v->SwathHeightYThisState[k],
4993 v->SwathHeightCThisState[k],
4994 v->HTotal[k] / v->PixelClock[k],
4996 v->CursorBufferSize,
4997 v->CursorWidth[k][0],
4999 v->VRatioPreY[i][j][k],
5000 v->VRatioPreC[i][j][k],
5001 v->BytePerPixelInDETY[k],
5002 v->BytePerPixelInDETC[k],
5003 v->DETBufferSizeYThisState[k],
5004 v->DETBufferSizeCThisState[k],
5005 &v->UrgentBurstFactorCursorPre[k],
5006 &v->UrgentBurstFactorLumaPre[k],
5007 &v->UrgentBurstFactorChroma[k],
5008 &v->NoUrgentLatencyHidingPre[k]);
5011 v->MaximumReadBandwidthWithPrefetch = 0.0;
5012 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5013 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
5014 * v->VRatioPreY[i][j][k];
5016 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
5018 v->VActivePixelBandwidth[i][j][k],
5019 v->VActiveCursorBandwidth[i][j][k]
5020 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
5021 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5023 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5024 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5025 * v->UrgentBurstFactorChromaPre[k])
5026 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5029 v->NotEnoughUrgentLatencyHidingPre = false;
5030 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5031 if (v->NoUrgentLatencyHidingPre[k] == true) {
5032 v->NotEnoughUrgentLatencyHidingPre = true;
5036 v->PrefetchSupported[i][j] = true;
5037 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5038 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5039 v->PrefetchSupported[i][j] = false;
5041 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5042 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5043 || v->NoTimeForPrefetch[i][j][k] == true) {
5044 v->PrefetchSupported[i][j] = false;
5048 v->DynamicMetadataSupported[i][j] = true;
5049 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5050 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5051 v->DynamicMetadataSupported[i][j] = false;
5055 v->VRatioInPrefetchSupported[i][j] = true;
5056 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5057 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5058 v->VRatioInPrefetchSupported[i][j] = false;
5061 v->AnyLinesForVMOrRowTooLarge = false;
5062 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5063 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5064 v->AnyLinesForVMOrRowTooLarge = true;
5068 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5069 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5070 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5071 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5073 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5075 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5076 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5077 * v->UrgentBurstFactorChromaPre[k])
5078 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5080 v->TotImmediateFlipBytes = 0.0;
5081 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5082 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k]
5083 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k];
5086 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5087 CalculateFlipSchedule(
5089 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
5090 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
5093 v->GPUVMMaxPageTableLevels,
5095 v->HostVMMaxNonCachedPageTableLevels,
5097 v->HostVMMinPageSize,
5098 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5099 v->MetaRowBytes[i][j][k],
5100 v->DPTEBytesPerRow[i][j][k],
5101 v->BandwidthAvailableForImmediateFlip,
5102 v->TotImmediateFlipBytes,
5103 v->SourcePixelFormat[k],
5104 v->HTotal[k] / v->PixelClock[k],
5109 v->dpte_row_height[k],
5110 v->meta_row_height[k],
5111 v->dpte_row_height_chroma[k],
5112 v->meta_row_height_chroma[k],
5113 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5114 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5115 &v->final_flip_bw[k],
5116 &v->ImmediateFlipSupportedForPipe[k]);
5118 v->total_dcn_read_bw_with_flip = 0.0;
5119 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5120 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5122 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5123 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5124 + v->VActiveCursorBandwidth[i][j][k],
5126 * (v->final_flip_bw[k]
5127 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5128 * v->UrgentBurstFactorLumaPre[k]
5129 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5130 * v->UrgentBurstFactorChromaPre[k])
5131 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5133 v->ImmediateFlipSupportedForState[i][j] = true;
5134 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5135 v->ImmediateFlipSupportedForState[i][j] = false;
5137 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5138 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5139 v->ImmediateFlipSupportedForState[i][j] = false;
5143 v->ImmediateFlipSupportedForState[i][j] = false;
5145 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
5146 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5147 NextPrefetchModeState = NextPrefetchModeState + 1;
5149 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5151 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5152 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5153 || v->ImmediateFlipSupportedForState[i][j] == true))
5154 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5156 CalculateWatermarksAndDRAMSpeedChangeSupport(
5158 v->PrefetchModePerState[i][j],
5159 v->NumberOfActivePlanes,
5160 v->MaxLineBufferLines,
5162 v->DPPOutputBufferPixels,
5163 v->DETBufferSizeInKByte[0],
5164 v->WritebackInterfaceBufferSize,
5165 v->DCFCLKState[i][j],
5166 v->ReturnBWPerState[i][j],
5168 v->dpte_group_bytes,
5172 v->WritebackLatency,
5173 v->WritebackChunkSize,
5174 v->SOCCLKPerState[i],
5175 v->FinalDRAMClockChangeLatency,
5177 v->SREnterPlusExitTime,
5178 v->ProjectedDCFCLKDeepSleep[i][j],
5179 v->NoOfDPPThisState,
5181 v->RequiredDPPCLKThisState,
5182 v->DETBufferSizeYThisState,
5183 v->DETBufferSizeCThisState,
5184 v->SwathHeightYThisState,
5185 v->SwathHeightCThisState,
5187 v->SwathWidthYThisState,
5188 v->SwathWidthCThisState,
5197 v->BlendingAndTiming,
5198 v->BytePerPixelInDETY,
5199 v->BytePerPixelInDETC,
5203 v->WritebackPixelFormat,
5204 v->WritebackDestinationWidth,
5205 v->WritebackDestinationHeight,
5206 v->WritebackSourceHeight,
5207 &v->DRAMClockChangeSupport[i][j],
5208 &v->UrgentWatermark,
5209 &v->WritebackUrgentWatermark,
5210 &v->DRAMClockChangeWatermark,
5211 &v->WritebackDRAMClockChangeWatermark,
5212 &v->StutterExitWatermark,
5213 &v->StutterEnterPlusExitWatermark,
5214 &v->MinActiveDRAMClockChangeLatencySupported);
5218 /*PTE Buffer Size Check*/
5220 for (i = 0; i < v->soc.num_states; i++) {
5221 for (j = 0; j < 2; j++) {
5222 v->PTEBufferSizeNotExceeded[i][j] = true;
5223 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5224 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5225 v->PTEBufferSizeNotExceeded[i][j] = false;
5230 /*Cursor Support Check*/
5232 v->CursorSupport = true;
5233 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5234 if (v->CursorWidth[k][0] > 0.0) {
5235 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5236 v->CursorSupport = false;
5240 /*Valid Pitch Check*/
5242 v->PitchSupport = true;
5243 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5244 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5245 if (v->DCCEnable[k] == true) {
5246 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5248 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5250 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5251 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5252 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5253 if (v->DCCEnable[k] == true) {
5254 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5256 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5259 v->AlignedCPitch[k] = v->PitchC[k];
5260 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5262 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5263 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5264 v->PitchSupport = false;
5268 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5269 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5270 ViewportExceedsSurface = true;
5272 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5273 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5274 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5275 ViewportExceedsSurface = true;
5279 /*Mode Support, Voltage State and SOC Configuration*/
5281 for (i = v->soc.num_states - 1; i >= 0; i--) {
5282 for (j = 0; j < 2; j++) {
5283 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5284 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5285 && v->NotEnoughDSCUnits[i] == 0
5286 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5287 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5288 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5289 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5290 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5291 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5292 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5293 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5294 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5295 v->ModeSupport[i][j] = true;
5297 v->ModeSupport[i][j] = false;
5302 unsigned int MaximumMPCCombine = 0;
5303 for (i = v->soc.num_states; i >= 0; i--) {
5304 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5305 v->VoltageLevel = i;
5306 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5307 if (v->ModeSupport[i][1] == true) {
5308 MaximumMPCCombine = 1;
5310 MaximumMPCCombine = 0;
5314 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5315 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5316 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5317 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5319 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5320 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5321 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5322 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5323 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5324 v->maxMpcComb = MaximumMPCCombine;
5328 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5329 struct display_mode_lib *mode_lib,
5330 unsigned int PrefetchMode,
5331 unsigned int NumberOfActivePlanes,
5332 unsigned int MaxLineBufferLines,
5333 unsigned int LineBufferSize,
5334 unsigned int DPPOutputBufferPixels,
5335 unsigned int DETBufferSizeInKByte,
5336 unsigned int WritebackInterfaceBufferSize,
5340 unsigned int dpte_group_bytes[],
5341 unsigned int MetaChunkSize,
5342 double UrgentLatency,
5343 double ExtraLatency,
5344 double WritebackLatency,
5345 double WritebackChunkSize,
5347 double DRAMClockChangeLatency,
5349 double SREnterPlusExitTime,
5350 double DCFCLKDeepSleep,
5351 unsigned int DPPPerPlane[],
5354 unsigned int DETBufferSizeY[],
5355 unsigned int DETBufferSizeC[],
5356 unsigned int SwathHeightY[],
5357 unsigned int SwathHeightC[],
5358 unsigned int LBBitPerPixel[],
5359 double SwathWidthY[],
5360 double SwathWidthC[],
5362 double HRatioChroma[],
5363 unsigned int vtaps[],
5364 unsigned int VTAPsChroma[],
5366 double VRatioChroma[],
5367 unsigned int HTotal[],
5368 double PixelClock[],
5369 unsigned int BlendingAndTiming[],
5370 double BytePerPixelDETY[],
5371 double BytePerPixelDETC[],
5372 double DSTXAfterScaler[],
5373 double DSTYAfterScaler[],
5374 bool WritebackEnable[],
5375 enum source_format_class WritebackPixelFormat[],
5376 double WritebackDestinationWidth[],
5377 double WritebackDestinationHeight[],
5378 double WritebackSourceHeight[],
5379 enum clock_change_support *DRAMClockChangeSupport,
5380 double *UrgentWatermark,
5381 double *WritebackUrgentWatermark,
5382 double *DRAMClockChangeWatermark,
5383 double *WritebackDRAMClockChangeWatermark,
5384 double *StutterExitWatermark,
5385 double *StutterEnterPlusExitWatermark,
5386 double *MinActiveDRAMClockChangeLatencySupported)
5388 double EffectiveLBLatencyHidingY = 0;
5389 double EffectiveLBLatencyHidingC = 0;
5390 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5391 double LinesInDETC = 0;
5392 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5393 unsigned int LinesInDETCRoundedDownToSwath = 0;
5394 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5395 double FullDETBufferingTimeC = 0;
5396 double ActiveDRAMClockChangeLatencyMarginY = 0;
5397 double ActiveDRAMClockChangeLatencyMarginC = 0;
5398 double WritebackDRAMClockChangeLatencyMargin = 0;
5399 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5400 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5401 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5402 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5403 double WritebackDRAMClockChangeLatencyHiding = 0;
5406 mode_lib->vba.TotalActiveDPP = 0;
5407 mode_lib->vba.TotalDCCActiveDPP = 0;
5408 for (k = 0; k < NumberOfActivePlanes; ++k) {
5409 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5410 if (DCCEnable[k] == true) {
5411 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5415 *UrgentWatermark = UrgentLatency + ExtraLatency;
5417 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5419 mode_lib->vba.TotalActiveWriteback = 0;
5420 for (k = 0; k < NumberOfActivePlanes; ++k) {
5421 if (WritebackEnable[k] == true) {
5422 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5426 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5427 *WritebackUrgentWatermark = WritebackLatency;
5429 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5432 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5433 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5435 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5438 for (k = 0; k < NumberOfActivePlanes; ++k) {
5440 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5442 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5444 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5446 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5448 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5449 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5450 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5451 if (BytePerPixelDETC[k] > 0) {
5452 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5453 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5454 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5457 FullDETBufferingTimeC = 999999;
5460 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5462 if (NumberOfActivePlanes > 1) {
5463 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5466 if (BytePerPixelDETC[k] > 0) {
5467 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5469 if (NumberOfActivePlanes > 1) {
5470 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5472 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5474 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5477 if (WritebackEnable[k] == true) {
5479 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5480 if (WritebackPixelFormat[k] == dm_444_64) {
5481 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5483 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5484 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5486 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5487 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5491 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5492 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5493 for (k = 0; k < NumberOfActivePlanes; ++k) {
5494 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5495 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5496 if (BlendingAndTiming[k] == k) {
5497 PlaneWithMinActiveDRAMClockChangeMargin = k;
5499 for (j = 0; j < NumberOfActivePlanes; ++j) {
5500 if (BlendingAndTiming[k] == j) {
5501 PlaneWithMinActiveDRAMClockChangeMargin = j;
5508 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5510 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5511 for (k = 0; k < NumberOfActivePlanes; ++k) {
5512 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5513 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5517 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5518 for (k = 0; k < NumberOfActivePlanes; ++k) {
5519 if (BlendingAndTiming[k] == k) {
5520 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5524 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5525 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5526 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5527 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5529 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5532 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5533 for (k = 0; k < NumberOfActivePlanes; ++k) {
5534 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5535 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5536 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5540 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5541 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5545 static void CalculateDCFCLKDeepSleep(
5546 struct display_mode_lib *mode_lib,
5547 unsigned int NumberOfActivePlanes,
5548 int BytePerPixelY[],
5549 int BytePerPixelC[],
5551 double VRatioChroma[],
5552 double SwathWidthY[],
5553 double SwathWidthC[],
5554 unsigned int DPPPerPlane[],
5556 double HRatioChroma[],
5557 double PixelClock[],
5558 double PSCL_THROUGHPUT[],
5559 double PSCL_THROUGHPUT_CHROMA[],
5561 double ReadBandwidthLuma[],
5562 double ReadBandwidthChroma[],
5564 double *DCFCLKDeepSleep)
5566 double DisplayPipeLineDeliveryTimeLuma = 0;
5567 double DisplayPipeLineDeliveryTimeChroma = 0;
5569 double ReadBandwidth = 0.0;
5571 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5572 for (k = 0; k < NumberOfActivePlanes; ++k) {
5574 if (VRatio[k] <= 1) {
5575 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5577 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5579 if (BytePerPixelC[k] == 0) {
5580 DisplayPipeLineDeliveryTimeChroma = 0;
5582 if (VRatioChroma[k] <= 1) {
5583 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5585 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5589 if (BytePerPixelC[k] > 0) {
5590 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5592 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5594 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5598 for (k = 0; k < NumberOfActivePlanes; ++k) {
5599 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5602 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5604 for (k = 0; k < NumberOfActivePlanes; ++k) {
5605 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5609 static void CalculateUrgentBurstFactor(
5610 long swath_width_luma_ub,
5611 long swath_width_chroma_ub,
5612 unsigned int DETBufferSizeInKByte,
5613 unsigned int SwathHeightY,
5614 unsigned int SwathHeightC,
5616 double UrgentLatency,
5617 double CursorBufferSize,
5618 unsigned int CursorWidth,
5619 unsigned int CursorBPP,
5622 double BytePerPixelInDETY,
5623 double BytePerPixelInDETC,
5624 double DETBufferSizeY,
5625 double DETBufferSizeC,
5626 double *UrgentBurstFactorCursor,
5627 double *UrgentBurstFactorLuma,
5628 double *UrgentBurstFactorChroma,
5629 bool *NotEnoughUrgentLatencyHiding)
5631 double LinesInDETLuma = 0;
5632 double LinesInDETChroma = 0;
5633 unsigned int LinesInCursorBuffer = 0;
5634 double CursorBufferSizeInTime = 0;
5635 double DETBufferSizeInTimeLuma = 0;
5636 double DETBufferSizeInTimeChroma = 0;
5638 *NotEnoughUrgentLatencyHiding = 0;
5640 if (CursorWidth > 0) {
5641 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5643 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5644 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5645 *NotEnoughUrgentLatencyHiding = 1;
5646 *UrgentBurstFactorCursor = 0;
5648 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5651 *UrgentBurstFactorCursor = 1;
5655 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5657 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5658 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5659 *NotEnoughUrgentLatencyHiding = 1;
5660 *UrgentBurstFactorLuma = 0;
5662 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5665 *UrgentBurstFactorLuma = 1;
5668 if (BytePerPixelInDETC > 0) {
5669 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5671 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5672 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5673 *NotEnoughUrgentLatencyHiding = 1;
5674 *UrgentBurstFactorChroma = 0;
5676 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5679 *UrgentBurstFactorChroma = 1;
5684 static void CalculatePixelDeliveryTimes(
5685 unsigned int NumberOfActivePlanes,
5687 double VRatioChroma[],
5688 double VRatioPrefetchY[],
5689 double VRatioPrefetchC[],
5690 unsigned int swath_width_luma_ub[],
5691 unsigned int swath_width_chroma_ub[],
5692 unsigned int DPPPerPlane[],
5694 double HRatioChroma[],
5695 double PixelClock[],
5696 double PSCL_THROUGHPUT[],
5697 double PSCL_THROUGHPUT_CHROMA[],
5699 int BytePerPixelC[],
5700 enum scan_direction_class SourceScan[],
5701 unsigned int NumberOfCursors[],
5702 unsigned int CursorWidth[][2],
5703 unsigned int CursorBPP[][2],
5704 unsigned int BlockWidth256BytesY[],
5705 unsigned int BlockHeight256BytesY[],
5706 unsigned int BlockWidth256BytesC[],
5707 unsigned int BlockHeight256BytesC[],
5708 double DisplayPipeLineDeliveryTimeLuma[],
5709 double DisplayPipeLineDeliveryTimeChroma[],
5710 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5711 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5712 double DisplayPipeRequestDeliveryTimeLuma[],
5713 double DisplayPipeRequestDeliveryTimeChroma[],
5714 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5715 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5716 double CursorRequestDeliveryTime[],
5717 double CursorRequestDeliveryTimePrefetch[])
5719 double req_per_swath_ub = 0;
5722 for (k = 0; k < NumberOfActivePlanes; ++k) {
5723 if (VRatio[k] <= 1) {
5724 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5726 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5729 if (BytePerPixelC[k] == 0) {
5730 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5732 if (VRatioChroma[k] <= 1) {
5733 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5735 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5739 if (VRatioPrefetchY[k] <= 1) {
5740 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5742 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5745 if (BytePerPixelC[k] == 0) {
5746 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5748 if (VRatioPrefetchC[k] <= 1) {
5749 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5751 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5756 for (k = 0; k < NumberOfActivePlanes; ++k) {
5757 if (SourceScan[k] != dm_vert) {
5758 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5760 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5762 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5763 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5764 if (BytePerPixelC[k] == 0) {
5765 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5766 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5768 if (SourceScan[k] != dm_vert) {
5769 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5771 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5773 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5774 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5778 for (k = 0; k < NumberOfActivePlanes; ++k) {
5779 int cursor_req_per_width = 0;
5780 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5781 if (NumberOfCursors[k] > 0) {
5782 if (VRatio[k] <= 1) {
5783 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5785 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5787 if (VRatioPrefetchY[k] <= 1) {
5788 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5790 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5793 CursorRequestDeliveryTime[k] = 0;
5794 CursorRequestDeliveryTimePrefetch[k] = 0;
5799 static void CalculateMetaAndPTETimes(
5800 int NumberOfActivePlanes,
5803 int MinMetaChunkSizeBytes,
5806 double VRatioChroma[],
5807 double DestinationLinesToRequestRowInVBlank[],
5808 double DestinationLinesToRequestRowInImmediateFlip[],
5810 double PixelClock[],
5811 int BytePerPixelY[],
5812 int BytePerPixelC[],
5813 enum scan_direction_class SourceScan[],
5814 int dpte_row_height[],
5815 int dpte_row_height_chroma[],
5816 int meta_row_width[],
5817 int meta_row_width_chroma[],
5818 int meta_row_height[],
5819 int meta_row_height_chroma[],
5820 int meta_req_width[],
5821 int meta_req_width_chroma[],
5822 int meta_req_height[],
5823 int meta_req_height_chroma[],
5824 int dpte_group_bytes[],
5825 int PTERequestSizeY[],
5826 int PTERequestSizeC[],
5827 int PixelPTEReqWidthY[],
5828 int PixelPTEReqHeightY[],
5829 int PixelPTEReqWidthC[],
5830 int PixelPTEReqHeightC[],
5831 int dpte_row_width_luma_ub[],
5832 int dpte_row_width_chroma_ub[],
5833 double DST_Y_PER_PTE_ROW_NOM_L[],
5834 double DST_Y_PER_PTE_ROW_NOM_C[],
5835 double DST_Y_PER_META_ROW_NOM_L[],
5836 double DST_Y_PER_META_ROW_NOM_C[],
5837 double TimePerMetaChunkNominal[],
5838 double TimePerChromaMetaChunkNominal[],
5839 double TimePerMetaChunkVBlank[],
5840 double TimePerChromaMetaChunkVBlank[],
5841 double TimePerMetaChunkFlip[],
5842 double TimePerChromaMetaChunkFlip[],
5843 double time_per_pte_group_nom_luma[],
5844 double time_per_pte_group_vblank_luma[],
5845 double time_per_pte_group_flip_luma[],
5846 double time_per_pte_group_nom_chroma[],
5847 double time_per_pte_group_vblank_chroma[],
5848 double time_per_pte_group_flip_chroma[])
5850 unsigned int meta_chunk_width = 0;
5851 unsigned int min_meta_chunk_width = 0;
5852 unsigned int meta_chunk_per_row_int = 0;
5853 unsigned int meta_row_remainder = 0;
5854 unsigned int meta_chunk_threshold = 0;
5855 unsigned int meta_chunks_per_row_ub = 0;
5856 unsigned int meta_chunk_width_chroma = 0;
5857 unsigned int min_meta_chunk_width_chroma = 0;
5858 unsigned int meta_chunk_per_row_int_chroma = 0;
5859 unsigned int meta_row_remainder_chroma = 0;
5860 unsigned int meta_chunk_threshold_chroma = 0;
5861 unsigned int meta_chunks_per_row_ub_chroma = 0;
5862 unsigned int dpte_group_width_luma = 0;
5863 unsigned int dpte_groups_per_row_luma_ub = 0;
5864 unsigned int dpte_group_width_chroma = 0;
5865 unsigned int dpte_groups_per_row_chroma_ub = 0;
5868 for (k = 0; k < NumberOfActivePlanes; ++k) {
5869 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5870 if (BytePerPixelC[k] == 0) {
5871 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5873 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5875 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5876 if (BytePerPixelC[k] == 0) {
5877 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5879 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5883 for (k = 0; k < NumberOfActivePlanes; ++k) {
5884 if (DCCEnable[k] == true) {
5885 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5886 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5887 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5888 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5889 if (SourceScan[k] != dm_vert) {
5890 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5892 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5894 if (meta_row_remainder <= meta_chunk_threshold) {
5895 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5897 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5899 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5900 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5901 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5902 if (BytePerPixelC[k] == 0) {
5903 TimePerChromaMetaChunkNominal[k] = 0;
5904 TimePerChromaMetaChunkVBlank[k] = 0;
5905 TimePerChromaMetaChunkFlip[k] = 0;
5907 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5908 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5909 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5910 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5911 if (SourceScan[k] != dm_vert) {
5912 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5914 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5916 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5917 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5919 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5921 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5922 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5923 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5926 TimePerMetaChunkNominal[k] = 0;
5927 TimePerMetaChunkVBlank[k] = 0;
5928 TimePerMetaChunkFlip[k] = 0;
5929 TimePerChromaMetaChunkNominal[k] = 0;
5930 TimePerChromaMetaChunkVBlank[k] = 0;
5931 TimePerChromaMetaChunkFlip[k] = 0;
5935 for (k = 0; k < NumberOfActivePlanes; ++k) {
5936 if (GPUVMEnable == true) {
5937 if (SourceScan[k] != dm_vert) {
5938 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5940 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5942 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5943 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5944 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5945 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5946 if (BytePerPixelC[k] == 0) {
5947 time_per_pte_group_nom_chroma[k] = 0;
5948 time_per_pte_group_vblank_chroma[k] = 0;
5949 time_per_pte_group_flip_chroma[k] = 0;
5951 if (SourceScan[k] != dm_vert) {
5952 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5954 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5956 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5957 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5958 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5959 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5962 time_per_pte_group_nom_luma[k] = 0;
5963 time_per_pte_group_vblank_luma[k] = 0;
5964 time_per_pte_group_flip_luma[k] = 0;
5965 time_per_pte_group_nom_chroma[k] = 0;
5966 time_per_pte_group_vblank_chroma[k] = 0;
5967 time_per_pte_group_flip_chroma[k] = 0;
5972 static void CalculateVMGroupAndRequestTimes(
5973 unsigned int NumberOfActivePlanes,
5975 unsigned int GPUVMMaxPageTableLevels,
5976 unsigned int HTotal[],
5977 int BytePerPixelC[],
5978 double DestinationLinesToRequestVMInVBlank[],
5979 double DestinationLinesToRequestVMInImmediateFlip[],
5981 double PixelClock[],
5982 int dpte_row_width_luma_ub[],
5983 int dpte_row_width_chroma_ub[],
5984 int vm_group_bytes[],
5985 unsigned int dpde0_bytes_per_frame_ub_l[],
5986 unsigned int dpde0_bytes_per_frame_ub_c[],
5987 int meta_pte_bytes_per_frame_ub_l[],
5988 int meta_pte_bytes_per_frame_ub_c[],
5989 double TimePerVMGroupVBlank[],
5990 double TimePerVMGroupFlip[],
5991 double TimePerVMRequestVBlank[],
5992 double TimePerVMRequestFlip[])
5994 int num_group_per_lower_vm_stage = 0;
5995 int num_req_per_lower_vm_stage = 0;
5998 for (k = 0; k < NumberOfActivePlanes; ++k) {
5999 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6000 if (DCCEnable[k] == false) {
6001 if (BytePerPixelC[k] > 0) {
6002 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6003 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
6004 / (double) (vm_group_bytes[k]), 1);
6006 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6007 / (double) (vm_group_bytes[k]), 1);
6010 if (GPUVMMaxPageTableLevels == 1) {
6011 if (BytePerPixelC[k] > 0) {
6012 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6013 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
6014 / (double) (vm_group_bytes[k]), 1);
6016 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6017 / (double) (vm_group_bytes[k]), 1);
6020 if (BytePerPixelC[k] > 0) {
6021 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6022 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6023 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6024 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6026 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6027 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6032 if (DCCEnable[k] == false) {
6033 if (BytePerPixelC[k] > 0) {
6034 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6036 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6039 if (GPUVMMaxPageTableLevels == 1) {
6040 if (BytePerPixelC[k] > 0) {
6041 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6042 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6044 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6047 if (BytePerPixelC[k] > 0) {
6048 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6049 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
6050 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6052 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6053 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6058 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6059 / num_group_per_lower_vm_stage;
6060 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6061 / num_group_per_lower_vm_stage;
6062 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6063 / num_req_per_lower_vm_stage;
6064 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6065 / num_req_per_lower_vm_stage;
6067 if (GPUVMMaxPageTableLevels > 2) {
6068 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6069 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6070 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6071 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6075 TimePerVMGroupVBlank[k] = 0;
6076 TimePerVMGroupFlip[k] = 0;
6077 TimePerVMRequestVBlank[k] = 0;
6078 TimePerVMRequestFlip[k] = 0;
6083 static void CalculateStutterEfficiency(
6084 int NumberOfActivePlanes,
6085 long ROBBufferSizeInKByte,
6086 double TotalDataReadBandwidth,
6090 bool SynchronizedVBlank,
6092 unsigned int DETBufferSizeY[],
6093 int BytePerPixelY[],
6094 double BytePerPixelDETY[],
6095 double SwathWidthY[],
6098 double DCCRateLuma[],
6099 double DCCRateChroma[],
6102 double PixelClock[],
6104 enum scan_direction_class SourceScan[],
6105 int BlockHeight256BytesY[],
6106 int BlockWidth256BytesY[],
6107 int BlockHeight256BytesC[],
6108 int BlockWidth256BytesC[],
6109 int DCCYMaxUncompressedBlock[],
6110 int DCCCMaxUncompressedBlock[],
6113 bool WritebackEnable[],
6114 double ReadBandwidthPlaneLuma[],
6115 double ReadBandwidthPlaneChroma[],
6116 double meta_row_bw[],
6117 double dpte_row_bw[],
6118 double *StutterEfficiencyNotIncludingVBlank,
6119 double *StutterEfficiency,
6120 double *StutterPeriodOut)
6122 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
6123 double FrameTimeForMinFullDETBufferingTime = 0;
6124 double StutterPeriod = 0;
6125 double AverageReadBandwidth = 0;
6126 double TotalRowReadBandwidth = 0;
6127 double AverageDCCCompressionRate = 0;
6128 double PartOfBurstThatFitsInROB = 0;
6129 double StutterBurstTime = 0;
6130 int TotalActiveWriteback = 0;
6131 double VBlankTime = 0;
6132 double SmallestVBlank = 0;
6133 int BytePerPixelYCriticalPlane = 0;
6134 double SwathWidthYCriticalPlane = 0;
6135 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
6136 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
6137 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6138 double MaximumEffectiveCompressionLuma = 0;
6139 double MaximumEffectiveCompressionChroma = 0;
6142 for (k = 0; k < NumberOfActivePlanes; ++k) {
6143 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
6144 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
6145 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
6148 StutterPeriod = FullDETBufferingTimeY[0];
6149 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6150 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6151 SwathWidthYCriticalPlane = SwathWidthY[0];
6152 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6153 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6155 for (k = 0; k < NumberOfActivePlanes; ++k) {
6156 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6157 StutterPeriod = FullDETBufferingTimeY[k];
6158 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6159 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6160 SwathWidthYCriticalPlane = SwathWidthY[k];
6161 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6162 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6166 AverageReadBandwidth = 0;
6167 TotalRowReadBandwidth = 0;
6168 for (k = 0; k < NumberOfActivePlanes; ++k) {
6169 if (DCCEnable[k] == true) {
6170 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6171 || (SourceScan[k] != dm_vert
6172 && BlockHeight256BytesY[k] > SwathHeightY[k])
6173 || DCCYMaxUncompressedBlock[k] < 256) {
6174 MaximumEffectiveCompressionLuma = 2;
6176 MaximumEffectiveCompressionLuma = 4;
6178 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6180 if (ReadBandwidthPlaneChroma[k] > 0) {
6181 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6182 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6183 || DCCCMaxUncompressedBlock[k] < 256) {
6184 MaximumEffectiveCompressionChroma = 2;
6186 MaximumEffectiveCompressionChroma = 4;
6188 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6191 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6193 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6196 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6197 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6198 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6199 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6200 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6202 TotalActiveWriteback = 0;
6203 for (k = 0; k < NumberOfActivePlanes; ++k) {
6204 if (WritebackEnable[k] == true) {
6205 TotalActiveWriteback = TotalActiveWriteback + 1;
6209 if (TotalActiveWriteback == 0) {
6210 *StutterEfficiencyNotIncludingVBlank = (1
6211 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6213 *StutterEfficiencyNotIncludingVBlank = 0;
6216 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6217 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6221 for (k = 0; k < NumberOfActivePlanes; ++k) {
6222 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6223 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6227 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6230 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6232 if (StutterPeriodOut)
6233 *StutterPeriodOut = StutterPeriod;
6236 static void CalculateSwathAndDETConfiguration(
6237 bool ForceSingleDPP,
6238 int NumberOfActivePlanes,
6239 unsigned int DETBufferSizeInKByte,
6240 double MaximumSwathWidthLuma[],
6241 double MaximumSwathWidthChroma[],
6242 enum scan_direction_class SourceScan[],
6243 enum source_format_class SourcePixelFormat[],
6244 enum dm_swizzle_mode SurfaceTiling[],
6245 int ViewportWidth[],
6246 int ViewportHeight[],
6247 int SurfaceWidthY[],
6248 int SurfaceWidthC[],
6249 int SurfaceHeightY[],
6250 int SurfaceHeightC[],
6251 int Read256BytesBlockHeightY[],
6252 int Read256BytesBlockHeightC[],
6253 int Read256BytesBlockWidthY[],
6254 int Read256BytesBlockWidthC[],
6255 enum odm_combine_mode ODMCombineEnabled[],
6256 int BlendingAndTiming[],
6259 double BytePerPixDETY[],
6260 double BytePerPixDETC[],
6263 double HRatioChroma[],
6265 int swath_width_luma_ub[],
6266 int swath_width_chroma_ub[],
6267 double SwathWidth[],
6268 double SwathWidthChroma[],
6271 unsigned int DETBufferSizeY[],
6272 unsigned int DETBufferSizeC[],
6273 bool ViewportSizeSupportPerPlane[],
6274 bool *ViewportSizeSupport)
6276 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6277 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6278 int MinimumSwathHeightY = 0;
6279 int MinimumSwathHeightC = 0;
6280 long RoundedUpMaxSwathSizeBytesY = 0;
6281 long RoundedUpMaxSwathSizeBytesC = 0;
6282 long RoundedUpMinSwathSizeBytesY = 0;
6283 long RoundedUpMinSwathSizeBytesC = 0;
6284 long RoundedUpSwathSizeBytesY = 0;
6285 long RoundedUpSwathSizeBytesC = 0;
6286 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6287 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6290 CalculateSwathWidth(
6292 NumberOfActivePlanes,
6304 Read256BytesBlockHeightY,
6305 Read256BytesBlockHeightC,
6306 Read256BytesBlockWidthY,
6307 Read256BytesBlockWidthC,
6312 SwathWidthSingleDPP,
6313 SwathWidthSingleDPPChroma,
6316 MaximumSwathHeightY,
6317 MaximumSwathHeightC,
6318 swath_width_luma_ub,
6319 swath_width_chroma_ub);
6321 *ViewportSizeSupport = true;
6322 for (k = 0; k < NumberOfActivePlanes; ++k) {
6323 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6324 || SourcePixelFormat[k] == dm_444_16
6325 || SourcePixelFormat[k] == dm_mono_16
6326 || SourcePixelFormat[k] == dm_mono_8
6327 || SourcePixelFormat[k] == dm_rgbe)) {
6328 if (SurfaceTiling[k] == dm_sw_linear
6329 || (SourcePixelFormat[k] == dm_444_64
6330 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6331 && SourceScan[k] != dm_vert)) {
6332 MinimumSwathHeightY = MaximumSwathHeightY[k];
6333 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6334 MinimumSwathHeightY = MaximumSwathHeightY[k];
6336 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6338 MinimumSwathHeightC = MaximumSwathHeightC[k];
6340 if (SurfaceTiling[k] == dm_sw_linear) {
6341 MinimumSwathHeightY = MaximumSwathHeightY[k];
6342 MinimumSwathHeightC = MaximumSwathHeightC[k];
6343 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6344 && SourceScan[k] == dm_vert) {
6345 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6346 MinimumSwathHeightC = MaximumSwathHeightC[k];
6347 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6348 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6349 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6350 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6351 MinimumSwathHeightY = MaximumSwathHeightY[k];
6352 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6354 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6355 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6359 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6360 * MaximumSwathHeightY[k];
6361 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6362 * MinimumSwathHeightY;
6363 if (SourcePixelFormat[k] == dm_420_10) {
6364 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6365 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6367 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6368 * MaximumSwathHeightC[k];
6369 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6370 * MinimumSwathHeightC;
6371 if (SourcePixelFormat[k] == dm_420_10) {
6372 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6373 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6376 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6377 <= DETBufferSizeInKByte * 1024 / 2) {
6378 SwathHeightY[k] = MaximumSwathHeightY[k];
6379 SwathHeightC[k] = MaximumSwathHeightC[k];
6380 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6381 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6382 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6383 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6384 <= DETBufferSizeInKByte * 1024 / 2) {
6385 SwathHeightY[k] = MinimumSwathHeightY;
6386 SwathHeightC[k] = MaximumSwathHeightC[k];
6387 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6388 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6389 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6390 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6391 <= DETBufferSizeInKByte * 1024 / 2) {
6392 SwathHeightY[k] = MaximumSwathHeightY[k];
6393 SwathHeightC[k] = MinimumSwathHeightC;
6394 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6395 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6397 SwathHeightY[k] = MinimumSwathHeightY;
6398 SwathHeightC[k] = MinimumSwathHeightC;
6399 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6400 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6403 if (SwathHeightC[k] == 0) {
6404 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6405 DETBufferSizeC[k] = 0;
6406 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6407 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6408 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6410 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6411 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6414 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6415 > DETBufferSizeInKByte * 1024 / 2
6416 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6417 || (SwathHeightC[k] > 0
6418 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6419 *ViewportSizeSupport = false;
6420 ViewportSizeSupportPerPlane[k] = false;
6422 ViewportSizeSupportPerPlane[k] = true;
6427 static void CalculateSwathWidth(
6428 bool ForceSingleDPP,
6429 int NumberOfActivePlanes,
6430 enum source_format_class SourcePixelFormat[],
6431 enum scan_direction_class SourceScan[],
6432 unsigned int ViewportWidth[],
6433 unsigned int ViewportHeight[],
6434 unsigned int SurfaceWidthY[],
6435 unsigned int SurfaceWidthC[],
6436 unsigned int SurfaceHeightY[],
6437 unsigned int SurfaceHeightC[],
6438 enum odm_combine_mode ODMCombineEnabled[],
6441 int Read256BytesBlockHeightY[],
6442 int Read256BytesBlockHeightC[],
6443 int Read256BytesBlockWidthY[],
6444 int Read256BytesBlockWidthC[],
6445 int BlendingAndTiming[],
6446 unsigned int HActive[],
6449 double SwathWidthSingleDPPY[],
6450 double SwathWidthSingleDPPC[],
6451 double SwathWidthY[],
6452 double SwathWidthC[],
6453 int MaximumSwathHeightY[],
6454 int MaximumSwathHeightC[],
6455 unsigned int swath_width_luma_ub[],
6456 unsigned int swath_width_chroma_ub[])
6459 long surface_width_ub_l;
6460 long surface_height_ub_l;
6461 long surface_width_ub_c;
6462 long surface_height_ub_c;
6464 for (k = 0; k < NumberOfActivePlanes; ++k) {
6465 enum odm_combine_mode MainPlaneODMCombine = 0;
6466 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6467 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6468 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6469 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6471 if (SourceScan[k] != dm_vert) {
6472 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6474 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6477 MainPlaneODMCombine = ODMCombineEnabled[k];
6478 for (j = 0; j < NumberOfActivePlanes; ++j) {
6479 if (BlendingAndTiming[k] == j) {
6480 MainPlaneODMCombine = ODMCombineEnabled[j];
6484 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6485 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6486 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6487 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6488 } else if (DPPPerPlane[k] == 2) {
6489 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6491 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6494 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6495 SwathWidthC[k] = SwathWidthY[k] / 2;
6496 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6498 SwathWidthC[k] = SwathWidthY[k];
6499 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6502 if (ForceSingleDPP == true) {
6503 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6504 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6507 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6508 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6509 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6510 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6512 if (SourceScan[k] != dm_vert) {
6513 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6514 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6515 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6516 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6517 if (BytePerPixC[k] > 0) {
6518 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6519 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6521 swath_width_chroma_ub[k] = 0;
6524 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6525 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6526 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6527 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6528 if (BytePerPixC[k] > 0) {
6529 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6530 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6532 swath_width_chroma_ub[k] = 0;
6538 static double CalculateExtraLatency(
6539 long RoundTripPingLatencyCycles,
6540 long ReorderingBytes,
6542 int TotalNumberOfActiveDPP,
6543 int PixelChunkSizeInKByte,
6544 int TotalNumberOfDCCActiveDPP,
6549 int NumberOfActivePlanes,
6551 int dpte_group_bytes[],
6552 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6553 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6554 double HostVMMinPageSize,
6555 int HostVMMaxNonCachedPageTableLevels)
6557 double ExtraLatencyBytes = 0;
6558 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6560 TotalNumberOfActiveDPP,
6561 PixelChunkSizeInKByte,
6562 TotalNumberOfDCCActiveDPP,
6566 NumberOfActivePlanes,
6569 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6570 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6572 HostVMMaxNonCachedPageTableLevels);
6574 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6577 static double CalculateExtraLatencyBytes(
6578 long ReorderingBytes,
6579 int TotalNumberOfActiveDPP,
6580 int PixelChunkSizeInKByte,
6581 int TotalNumberOfDCCActiveDPP,
6585 int NumberOfActivePlanes,
6587 int dpte_group_bytes[],
6588 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6589 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6590 double HostVMMinPageSize,
6591 int HostVMMaxNonCachedPageTableLevels)
6594 double HostVMInefficiencyFactor = 0;
6595 int HostVMDynamicLevels = 0;
6598 if (GPUVMEnable == true && HostVMEnable == true) {
6599 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6600 if (HostVMMinPageSize < 2048) {
6601 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6602 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6603 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6605 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6608 HostVMInefficiencyFactor = 1;
6609 HostVMDynamicLevels = 0;
6612 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6614 if (GPUVMEnable == true) {
6615 for (k = 0; k < NumberOfActivePlanes; ++k) {
6616 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6623 static double CalculateUrgentLatency(
6624 double UrgentLatencyPixelDataOnly,
6625 double UrgentLatencyPixelMixedWithVMData,
6626 double UrgentLatencyVMDataOnly,
6627 bool DoUrgentLatencyAdjustment,
6628 double UrgentLatencyAdjustmentFabricClockComponent,
6629 double UrgentLatencyAdjustmentFabricClockReference,
6634 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6635 if (DoUrgentLatencyAdjustment == true) {
6636 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6642 static void UseMinimumDCFCLK(
6643 struct display_mode_lib *mode_lib,
6644 int MaxInterDCNTileRepeaters,
6645 int MaxPrefetchMode,
6646 double FinalDRAMClockChangeLatency,
6647 double SREnterPlusExitTime,
6649 int RoundTripPingLatencyCycles,
6650 int ReorderingBytes,
6651 int PixelChunkSizeInKByte,
6654 int GPUVMMaxPageTableLevels,
6656 int NumberOfActivePlanes,
6657 double HostVMMinPageSize,
6658 int HostVMMaxNonCachedPageTableLevels,
6659 bool DynamicMetadataVMEnabled,
6660 enum immediate_flip_requirement ImmediateFlipRequirement,
6661 bool ProgressiveToInterlaceUnitInOPP,
6662 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
6663 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6664 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6665 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
6668 int DynamicMetadataTransmittedBytes[],
6669 int DynamicMetadataLinesBeforeActiveRequired[],
6671 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
6672 double RequiredDISPCLK[][2],
6673 double UrgLatency[],
6674 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
6675 double ProjectedDCFCLKDeepSleep[][2],
6676 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
6677 double TotalVActivePixelBandwidth[][2],
6678 double TotalVActiveCursorBandwidth[][2],
6679 double TotalMetaRowBandwidth[][2],
6680 double TotalDPTERowBandwidth[][2],
6681 unsigned int TotalNumberOfActiveDPP[][2],
6682 unsigned int TotalNumberOfDCCActiveDPP[][2],
6683 int dpte_group_bytes[],
6684 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
6685 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
6686 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
6687 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
6688 int BytePerPixelY[],
6689 int BytePerPixelC[],
6691 double PixelClock[],
6692 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
6693 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
6694 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
6695 bool DynamicMetadataEnable[],
6696 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
6697 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
6698 double ReadBandwidthLuma[],
6699 double ReadBandwidthChroma[],
6700 double DCFCLKPerState[],
6701 double DCFCLKState[][2])
6703 double NormalEfficiency = 0;
6704 double PTEEfficiency = 0;
6705 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6706 unsigned int i, j, k;
6708 NormalEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6709 : PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6710 PTEEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6711 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6712 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6713 for (j = 0; j <= 1; ++j) {
6714 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6715 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6716 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6717 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6718 double MinimumTWait = 0;
6719 double NonDPTEBandwidth = 0;
6720 double DPTEBandwidth = 0;
6721 double DCFCLKRequiredForAverageBandwidth = 0;
6722 double ExtraLatencyBytes = 0;
6723 double ExtraLatencyCycles = 0;
6724 double DCFCLKRequiredForPeakBandwidth = 0;
6725 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6726 double MinimumTvmPlus2Tr0 = 0;
6728 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6729 for (k = 0; k < NumberOfActivePlanes; ++k) {
6730 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6731 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
6734 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6735 NoOfDPPState[k] = NoOfDPP[i][j][k];
6738 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
6739 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
6740 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
6741 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
6742 DCFCLKRequiredForAverageBandwidth = dml_max3(ProjectedDCFCLKDeepSleep[i][j],
6743 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth / (MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6744 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / ReturnBusWidth);
6746 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP[i][j],
6747 MetaChunkSize, GPUVMEnable, HostVMEnable, NumberOfActivePlanes, NoOfDPPState, dpte_group_bytes,
6748 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6749 HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
6750 ExtraLatencyCycles = RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
6751 for (k = 0; k < NumberOfActivePlanes; ++k) {
6752 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6753 double ExpectedPrefetchBWAcceleration = { 0 };
6754 double PrefetchTime = { 0 };
6756 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
6757 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
6758 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6759 / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * DPTEBytesPerRow[i][j][k] / PTEEfficiency
6760 / NormalEfficiency / ReturnBusWidth + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6761 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
6762 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
6763 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
6764 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6765 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - UrgLatency[i] * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels
6766 : GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6768 if (PrefetchTime > 0) {
6769 double ExpectedVRatioPrefetch = { 0 };
6770 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6771 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6772 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6773 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
6774 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6775 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / ReturnBusWidth;
6778 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6780 if (DynamicMetadataEnable[k] == true) {
6781 double TsetupPipe = { 0 };
6782 double TdmbfPipe = { 0 };
6783 double TdmsksPipe = { 0 };
6784 double TdmecPipe = { 0 };
6785 double AllowedTimeForUrgentExtraLatency = { 0 };
6787 CalculateDynamicMetadataParameters(
6788 MaxInterDCNTileRepeaters,
6789 RequiredDPPCLK[i][j][k],
6790 RequiredDISPCLK[i][j],
6791 ProjectedDCFCLKDeepSleep[i][j],
6794 VTotal[k] - VActive[k],
6795 DynamicMetadataTransmittedBytes[k],
6796 DynamicMetadataLinesBeforeActiveRequired[k],
6798 ProgressiveToInterlaceUnitInOPP,
6803 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TsetupPipe
6804 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6805 if (AllowedTimeForUrgentExtraLatency > 0) {
6806 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6807 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6809 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6813 DCFCLKRequiredForPeakBandwidth = 0;
6814 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6815 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6817 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? (HostVMEnable == true ?
6818 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
6819 for (k = 0; k < NumberOfActivePlanes; ++k) {
6820 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6821 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6822 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6823 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
6825 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6826 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6827 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6830 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6831 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6836 #endif /* CONFIG_DRM_AMD_DC_DCN */