2 * Copyright 2020 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
26 #ifdef CONFIG_DRM_AMD_DC_DCN
29 #include "../display_mode_lib.h"
30 #include "display_mode_vba_30.h"
31 #include "../dml_inline_defs.h"
36 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
38 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
39 * ways. Unless there is something clearly wrong with it the code should
40 * remain as-is as it provides us with a guarantee from HW that it is correct.
48 double DCFCLKDeepSleep;
49 unsigned int DPPPerPlane;
51 enum scan_direction_class SourceScan;
52 unsigned int BlockWidth256BytesY;
53 unsigned int BlockHeight256BytesY;
54 unsigned int BlockWidth256BytesC;
55 unsigned int BlockHeight256BytesC;
56 unsigned int InterlaceEnable;
57 unsigned int NumberOfCursors;
60 unsigned int DCCEnable;
61 bool ODMCombineEnabled;
65 #define BPP_BLENDED_PIPE 0xffffffff
66 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
67 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
69 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
70 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
71 struct display_mode_lib *mode_lib);
72 static unsigned int dscceComputeDelay(
75 unsigned int sliceWidth,
76 unsigned int numSlices,
77 enum output_format_class pixelFormat,
78 enum output_encoder_class Output);
79 static unsigned int dscComputeDelay(
80 enum output_format_class pixelFormat,
81 enum output_encoder_class Output);
82 // Super monster function with some 45 argument
83 static bool CalculatePrefetchSchedule(
84 struct display_mode_lib *mode_lib,
85 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
86 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
88 unsigned int DSCDelay,
89 double DPPCLKDelaySubtotalPlusCNVCFormater,
90 double DPPCLKDelaySCL,
91 double DPPCLKDelaySCLLBOnly,
92 double DPPCLKDelayCNVCCursor,
93 double DISPCLKDelaySubtotal,
94 unsigned int DPP_RECOUT_WIDTH,
95 enum output_format_class OutputFormat,
96 unsigned int MaxInterDCNTileRepeaters,
97 unsigned int VStartup,
98 unsigned int MaxVStartup,
99 unsigned int GPUVMPageTableLevels,
102 unsigned int HostVMMaxNonCachedPageTableLevels,
103 double HostVMMinPageSize,
104 bool DynamicMetadataEnable,
105 bool DynamicMetadataVMEnabled,
106 int DynamicMetadataLinesBeforeActiveRequired,
107 unsigned int DynamicMetadataTransmittedBytes,
108 double UrgentLatency,
109 double UrgentExtraLatency,
111 unsigned int PDEAndMetaPTEBytesFrame,
112 unsigned int MetaRowByte,
113 unsigned int PixelPTEBytesPerRow,
114 double PrefetchSourceLinesY,
115 unsigned int SwathWidthY,
117 double VInitPreFillY,
118 unsigned int MaxNumSwathY,
119 double PrefetchSourceLinesC,
120 unsigned int SwathWidthC,
122 double VInitPreFillC,
123 unsigned int MaxNumSwathC,
124 long swath_width_luma_ub,
125 long swath_width_chroma_ub,
126 unsigned int SwathHeightY,
127 unsigned int SwathHeightC,
129 bool ProgressiveToInterlaceUnitInOPP,
130 double *DSTXAfterScaler,
131 double *DSTYAfterScaler,
132 double *DestinationLinesForPrefetch,
133 double *PrefetchBandwidth,
134 double *DestinationLinesToRequestVMInVBlank,
135 double *DestinationLinesToRequestRowInVBlank,
136 double *VRatioPrefetchY,
137 double *VRatioPrefetchC,
138 double *RequiredPrefetchPixDataBWLuma,
139 double *RequiredPrefetchPixDataBWChroma,
140 bool *NotEnoughTimeForDynamicMetadata,
142 double *prefetch_vmrow_bw,
145 unsigned int *VUpdateOffsetPix,
146 double *VUpdateWidthPix,
147 double *VReadyOffsetPix);
148 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
149 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
150 static void CalculateDCCConfiguration(
152 bool DCCProgrammingAssumesScanDirectionUnknown,
153 enum source_format_class SourcePixelFormat,
154 unsigned int ViewportWidthLuma,
155 unsigned int ViewportWidthChroma,
156 unsigned int ViewportHeightLuma,
157 unsigned int ViewportHeightChroma,
158 double DETBufferSize,
159 unsigned int RequestHeight256ByteLuma,
160 unsigned int RequestHeight256ByteChroma,
161 enum dm_swizzle_mode TilingFormat,
162 unsigned int BytePerPixelY,
163 unsigned int BytePerPixelC,
164 double BytePerPixelDETY,
165 double BytePerPixelDETC,
166 enum scan_direction_class ScanOrientation,
167 unsigned int *MaxUncompressedBlockLuma,
168 unsigned int *MaxUncompressedBlockChroma,
169 unsigned int *MaxCompressedBlockLuma,
170 unsigned int *MaxCompressedBlockChroma,
171 unsigned int *IndependentBlockLuma,
172 unsigned int *IndependentBlockChroma);
173 static double CalculatePrefetchSourceLines(
174 struct display_mode_lib *mode_lib,
178 bool ProgressiveToInterlaceUnitInOPP,
179 unsigned int SwathHeight,
180 unsigned int ViewportYStart,
181 double *VInitPreFill,
182 unsigned int *MaxNumSwath);
183 static unsigned int CalculateVMAndRowBytes(
184 struct display_mode_lib *mode_lib,
186 unsigned int BlockHeight256Bytes,
187 unsigned int BlockWidth256Bytes,
188 enum source_format_class SourcePixelFormat,
189 unsigned int SurfaceTiling,
190 unsigned int BytePerPixel,
191 enum scan_direction_class ScanDirection,
192 unsigned int SwathWidth,
193 unsigned int ViewportHeight,
196 unsigned int HostVMMaxNonCachedPageTableLevels,
197 unsigned int GPUVMMinPageSize,
198 unsigned int HostVMMinPageSize,
199 unsigned int PTEBufferSizeInRequests,
201 unsigned int DCCMetaPitch,
202 unsigned int *MacroTileWidth,
203 unsigned int *MetaRowByte,
204 unsigned int *PixelPTEBytesPerRow,
205 bool *PTEBufferSizeNotExceeded,
206 unsigned int *dpte_row_width_ub,
207 unsigned int *dpte_row_height,
208 unsigned int *MetaRequestWidth,
209 unsigned int *MetaRequestHeight,
210 unsigned int *meta_row_width,
211 unsigned int *meta_row_height,
212 unsigned int *vm_group_bytes,
213 unsigned int *dpte_group_bytes,
214 unsigned int *PixelPTEReqWidth,
215 unsigned int *PixelPTEReqHeight,
216 unsigned int *PTERequestSize,
217 unsigned int *DPDE0BytesFrame,
218 unsigned int *MetaPTEBytesFrame);
219 static double CalculateTWait(
220 unsigned int PrefetchMode,
221 double DRAMClockChangeLatency,
222 double UrgentLatency,
223 double SREnterPlusExitTime);
224 static void CalculateRowBandwidth(
226 enum source_format_class SourcePixelFormat,
231 unsigned int MetaRowByteLuma,
232 unsigned int MetaRowByteChroma,
233 unsigned int meta_row_height_luma,
234 unsigned int meta_row_height_chroma,
235 unsigned int PixelPTEBytesPerRowLuma,
236 unsigned int PixelPTEBytesPerRowChroma,
237 unsigned int dpte_row_height_luma,
238 unsigned int dpte_row_height_chroma,
240 double *dpte_row_bw);
241 static void CalculateFlipSchedule(
242 struct display_mode_lib *mode_lib,
243 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
244 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
245 double UrgentExtraLatency,
246 double UrgentLatency,
247 unsigned int GPUVMMaxPageTableLevels,
249 unsigned int HostVMMaxNonCachedPageTableLevels,
251 double HostVMMinPageSize,
252 double PDEAndMetaPTEBytesPerFrame,
254 double DPTEBytesPerRow,
255 double BandwidthAvailableForImmediateFlip,
256 unsigned int TotImmediateFlipBytes,
257 enum source_format_class SourcePixelFormat,
263 unsigned int dpte_row_height,
264 unsigned int meta_row_height,
265 unsigned int dpte_row_height_chroma,
266 unsigned int meta_row_height_chroma,
267 double *DestinationLinesToRequestVMInImmediateFlip,
268 double *DestinationLinesToRequestRowInImmediateFlip,
269 double *final_flip_bw,
270 bool *ImmediateFlipSupportedForPipe);
271 static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 long WritebackDestinationWidth,
277 long WritebackDestinationHeight,
278 long WritebackSourceHeight,
279 unsigned int HTotal);
280 static void CalculateDynamicMetadataParameters(
281 int MaxInterDCNTileRepeaters,
284 double DCFClkDeepSleep,
288 long DynamicMetadataTransmittedBytes,
289 long DynamicMetadataLinesBeforeActiveRequired,
291 bool ProgressiveToInterlaceUnitInOPP,
296 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
297 struct display_mode_lib *mode_lib,
298 unsigned int PrefetchMode,
299 unsigned int NumberOfActivePlanes,
300 unsigned int MaxLineBufferLines,
301 unsigned int LineBufferSize,
302 unsigned int DPPOutputBufferPixels,
303 unsigned int DETBufferSizeInKByte,
304 unsigned int WritebackInterfaceBufferSize,
308 unsigned int dpte_group_bytes[],
309 unsigned int MetaChunkSize,
310 double UrgentLatency,
312 double WritebackLatency,
313 double WritebackChunkSize,
315 double DRAMClockChangeLatency,
317 double SREnterPlusExitTime,
318 double DCFCLKDeepSleep,
319 unsigned int DPPPerPlane[],
322 unsigned int DETBufferSizeY[],
323 unsigned int DETBufferSizeC[],
324 unsigned int SwathHeightY[],
325 unsigned int SwathHeightC[],
326 unsigned int LBBitPerPixel[],
327 double SwathWidthY[],
328 double SwathWidthC[],
330 double HRatioChroma[],
331 unsigned int vtaps[],
332 unsigned int VTAPsChroma[],
334 double VRatioChroma[],
335 unsigned int HTotal[],
337 unsigned int BlendingAndTiming[],
338 double BytePerPixelDETY[],
339 double BytePerPixelDETC[],
340 double DSTXAfterScaler[],
341 double DSTYAfterScaler[],
342 bool WritebackEnable[],
343 enum source_format_class WritebackPixelFormat[],
344 double WritebackDestinationWidth[],
345 double WritebackDestinationHeight[],
346 double WritebackSourceHeight[],
347 enum clock_change_support *DRAMClockChangeSupport,
348 double *UrgentWatermark,
349 double *WritebackUrgentWatermark,
350 double *DRAMClockChangeWatermark,
351 double *WritebackDRAMClockChangeWatermark,
352 double *StutterExitWatermark,
353 double *StutterEnterPlusExitWatermark,
354 double *MinActiveDRAMClockChangeLatencySupported);
355 static void CalculateDCFCLKDeepSleep(
356 struct display_mode_lib *mode_lib,
357 unsigned int NumberOfActivePlanes,
361 double VRatioChroma[],
362 double SwathWidthY[],
363 double SwathWidthC[],
364 unsigned int DPPPerPlane[],
366 double HRatioChroma[],
368 double PSCL_THROUGHPUT[],
369 double PSCL_THROUGHPUT_CHROMA[],
371 double ReadBandwidthLuma[],
372 double ReadBandwidthChroma[],
374 double *DCFCLKDeepSleep);
375 static void CalculateUrgentBurstFactor(
376 long swath_width_luma_ub,
377 long swath_width_chroma_ub,
378 unsigned int DETBufferSizeInKByte,
379 unsigned int SwathHeightY,
380 unsigned int SwathHeightC,
382 double UrgentLatency,
383 double CursorBufferSize,
384 unsigned int CursorWidth,
385 unsigned int CursorBPP,
388 double BytePerPixelInDETY,
389 double BytePerPixelInDETC,
390 double DETBufferSizeY,
391 double DETBufferSizeC,
392 double *UrgentBurstFactorCursor,
393 double *UrgentBurstFactorLuma,
394 double *UrgentBurstFactorChroma,
395 bool *NotEnoughUrgentLatencyHiding);
397 static void UseMinimumDCFCLK(
398 struct display_mode_lib *mode_lib,
399 int MaxInterDCNTileRepeaters,
401 double FinalDRAMClockChangeLatency,
402 double SREnterPlusExitTime,
404 int RoundTripPingLatencyCycles,
406 int PixelChunkSizeInKByte,
409 int GPUVMMaxPageTableLevels,
411 int NumberOfActivePlanes,
412 double HostVMMinPageSize,
413 int HostVMMaxNonCachedPageTableLevels,
414 bool DynamicMetadataVMEnabled,
415 enum immediate_flip_requirement ImmediateFlipRequirement,
416 bool ProgressiveToInterlaceUnitInOPP,
417 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
418 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
419 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
420 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
423 int DynamicMetadataTransmittedBytes[],
424 int DynamicMetadataLinesBeforeActiveRequired[],
426 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
427 double RequiredDISPCLK[][2],
429 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
430 double ProjectedDCFCLKDeepSleep[][2],
431 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
432 double TotalVActivePixelBandwidth[][2],
433 double TotalVActiveCursorBandwidth[][2],
434 double TotalMetaRowBandwidth[][2],
435 double TotalDPTERowBandwidth[][2],
436 unsigned int TotalNumberOfActiveDPP[][2],
437 unsigned int TotalNumberOfDCCActiveDPP[][2],
438 int dpte_group_bytes[],
439 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
440 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
441 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
442 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
447 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
448 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
449 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
450 bool DynamicMetadataEnable[],
451 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
452 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
453 double ReadBandwidthLuma[],
454 double ReadBandwidthChroma[],
455 double DCFCLKPerState[],
456 double DCFCLKState[][2]);
457 static void CalculatePixelDeliveryTimes(
458 unsigned int NumberOfActivePlanes,
460 double VRatioChroma[],
461 double VRatioPrefetchY[],
462 double VRatioPrefetchC[],
463 unsigned int swath_width_luma_ub[],
464 unsigned int swath_width_chroma_ub[],
465 unsigned int DPPPerPlane[],
467 double HRatioChroma[],
469 double PSCL_THROUGHPUT[],
470 double PSCL_THROUGHPUT_CHROMA[],
473 enum scan_direction_class SourceScan[],
474 unsigned int NumberOfCursors[],
475 unsigned int CursorWidth[][2],
476 unsigned int CursorBPP[][2],
477 unsigned int BlockWidth256BytesY[],
478 unsigned int BlockHeight256BytesY[],
479 unsigned int BlockWidth256BytesC[],
480 unsigned int BlockHeight256BytesC[],
481 double DisplayPipeLineDeliveryTimeLuma[],
482 double DisplayPipeLineDeliveryTimeChroma[],
483 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
484 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
485 double DisplayPipeRequestDeliveryTimeLuma[],
486 double DisplayPipeRequestDeliveryTimeChroma[],
487 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
488 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
489 double CursorRequestDeliveryTime[],
490 double CursorRequestDeliveryTimePrefetch[]);
492 static void CalculateMetaAndPTETimes(
493 int NumberOfActivePlanes,
496 int MinMetaChunkSizeBytes,
499 double VRatioChroma[],
500 double DestinationLinesToRequestRowInVBlank[],
501 double DestinationLinesToRequestRowInImmediateFlip[],
506 enum scan_direction_class SourceScan[],
507 int dpte_row_height[],
508 int dpte_row_height_chroma[],
509 int meta_row_width[],
510 int meta_row_width_chroma[],
511 int meta_row_height[],
512 int meta_row_height_chroma[],
513 int meta_req_width[],
514 int meta_req_width_chroma[],
515 int meta_req_height[],
516 int meta_req_height_chroma[],
517 int dpte_group_bytes[],
518 int PTERequestSizeY[],
519 int PTERequestSizeC[],
520 int PixelPTEReqWidthY[],
521 int PixelPTEReqHeightY[],
522 int PixelPTEReqWidthC[],
523 int PixelPTEReqHeightC[],
524 int dpte_row_width_luma_ub[],
525 int dpte_row_width_chroma_ub[],
526 double DST_Y_PER_PTE_ROW_NOM_L[],
527 double DST_Y_PER_PTE_ROW_NOM_C[],
528 double DST_Y_PER_META_ROW_NOM_L[],
529 double DST_Y_PER_META_ROW_NOM_C[],
530 double TimePerMetaChunkNominal[],
531 double TimePerChromaMetaChunkNominal[],
532 double TimePerMetaChunkVBlank[],
533 double TimePerChromaMetaChunkVBlank[],
534 double TimePerMetaChunkFlip[],
535 double TimePerChromaMetaChunkFlip[],
536 double time_per_pte_group_nom_luma[],
537 double time_per_pte_group_vblank_luma[],
538 double time_per_pte_group_flip_luma[],
539 double time_per_pte_group_nom_chroma[],
540 double time_per_pte_group_vblank_chroma[],
541 double time_per_pte_group_flip_chroma[]);
543 static void CalculateVMGroupAndRequestTimes(
544 unsigned int NumberOfActivePlanes,
546 unsigned int GPUVMMaxPageTableLevels,
547 unsigned int HTotal[],
549 double DestinationLinesToRequestVMInVBlank[],
550 double DestinationLinesToRequestVMInImmediateFlip[],
553 int dpte_row_width_luma_ub[],
554 int dpte_row_width_chroma_ub[],
555 int vm_group_bytes[],
556 unsigned int dpde0_bytes_per_frame_ub_l[],
557 unsigned int dpde0_bytes_per_frame_ub_c[],
558 int meta_pte_bytes_per_frame_ub_l[],
559 int meta_pte_bytes_per_frame_ub_c[],
560 double TimePerVMGroupVBlank[],
561 double TimePerVMGroupFlip[],
562 double TimePerVMRequestVBlank[],
563 double TimePerVMRequestFlip[]);
565 static void CalculateStutterEfficiency(
566 int NumberOfActivePlanes,
567 long ROBBufferSizeInKByte,
568 double TotalDataReadBandwidth,
572 bool SynchronizedVBlank,
574 unsigned int DETBufferSizeY[],
576 double BytePerPixelDETY[],
577 double SwathWidthY[],
580 double DCCRateLuma[],
581 double DCCRateChroma[],
586 enum scan_direction_class SourceScan[],
587 int BlockHeight256BytesY[],
588 int BlockWidth256BytesY[],
589 int BlockHeight256BytesC[],
590 int BlockWidth256BytesC[],
591 int DCCYMaxUncompressedBlock[],
592 int DCCCMaxUncompressedBlock[],
595 bool WritebackEnable[],
596 double ReadBandwidthPlaneLuma[],
597 double ReadBandwidthPlaneChroma[],
598 double meta_row_bw[],
599 double dpte_row_bw[],
600 double *StutterEfficiencyNotIncludingVBlank,
601 double *StutterEfficiency,
602 double *StutterPeriodOut);
604 static void CalculateSwathAndDETConfiguration(
606 int NumberOfActivePlanes,
607 unsigned int DETBufferSizeInKByte,
608 double MaximumSwathWidthLuma[],
609 double MaximumSwathWidthChroma[],
610 enum scan_direction_class SourceScan[],
611 enum source_format_class SourcePixelFormat[],
612 enum dm_swizzle_mode SurfaceTiling[],
614 int ViewportHeight[],
617 int SurfaceHeightY[],
618 int SurfaceHeightC[],
619 int Read256BytesBlockHeightY[],
620 int Read256BytesBlockHeightC[],
621 int Read256BytesBlockWidthY[],
622 int Read256BytesBlockWidthC[],
623 enum odm_combine_mode ODMCombineEnabled[],
624 int BlendingAndTiming[],
627 double BytePerPixDETY[],
628 double BytePerPixDETC[],
631 double HRatioChroma[],
633 int swath_width_luma_ub[],
634 int swath_width_chroma_ub[],
636 double SwathWidthChroma[],
639 unsigned int DETBufferSizeY[],
640 unsigned int DETBufferSizeC[],
641 bool ViewportSizeSupportPerPlane[],
642 bool *ViewportSizeSupport);
643 static void CalculateSwathWidth(
645 int NumberOfActivePlanes,
646 enum source_format_class SourcePixelFormat[],
647 enum scan_direction_class SourceScan[],
648 unsigned int ViewportWidth[],
649 unsigned int ViewportHeight[],
650 unsigned int SurfaceWidthY[],
651 unsigned int SurfaceWidthC[],
652 unsigned int SurfaceHeightY[],
653 unsigned int SurfaceHeightC[],
654 enum odm_combine_mode ODMCombineEnabled[],
657 int Read256BytesBlockHeightY[],
658 int Read256BytesBlockHeightC[],
659 int Read256BytesBlockWidthY[],
660 int Read256BytesBlockWidthC[],
661 int BlendingAndTiming[],
662 unsigned int HActive[],
665 double SwathWidthSingleDPPY[],
666 double SwathWidthSingleDPPC[],
667 double SwathWidthY[],
668 double SwathWidthC[],
669 int MaximumSwathHeightY[],
670 int MaximumSwathHeightC[],
671 unsigned int swath_width_luma_ub[],
672 unsigned int swath_width_chroma_ub[]);
673 static double CalculateExtraLatency(
674 long RoundTripPingLatencyCycles,
675 long ReorderingBytes,
677 int TotalNumberOfActiveDPP,
678 int PixelChunkSizeInKByte,
679 int TotalNumberOfDCCActiveDPP,
684 int NumberOfActivePlanes,
686 int dpte_group_bytes[],
687 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
688 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
689 double HostVMMinPageSize,
690 int HostVMMaxNonCachedPageTableLevels);
691 static double CalculateExtraLatencyBytes(
692 long ReorderingBytes,
693 int TotalNumberOfActiveDPP,
694 int PixelChunkSizeInKByte,
695 int TotalNumberOfDCCActiveDPP,
699 int NumberOfActivePlanes,
701 int dpte_group_bytes[],
702 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
703 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
704 double HostVMMinPageSize,
705 int HostVMMaxNonCachedPageTableLevels);
706 static double CalculateUrgentLatency(
707 double UrgentLatencyPixelDataOnly,
708 double UrgentLatencyPixelMixedWithVMData,
709 double UrgentLatencyVMDataOnly,
710 bool DoUrgentLatencyAdjustment,
711 double UrgentLatencyAdjustmentFabricClockComponent,
712 double UrgentLatencyAdjustmentFabricClockReference,
713 double FabricClockSingle);
715 void dml30_recalculate(struct display_mode_lib *mode_lib)
717 ModeSupportAndSystemConfiguration(mode_lib);
718 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
719 DisplayPipeConfiguration(mode_lib);
720 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
723 static unsigned int dscceComputeDelay(
726 unsigned int sliceWidth,
727 unsigned int numSlices,
728 enum output_format_class pixelFormat,
729 enum output_encoder_class Output)
731 // valid bpc = source bits per component in the set of {8, 10, 12}
732 // valid bpp = increments of 1/16 of a bit
733 // min = 6/7/8 in N420/N422/444, respectively
734 // max = such that compression is 1:1
735 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
736 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
737 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
740 unsigned int rcModelSize = 8192;
742 // N422/N420 operate at 2 pixels per clock
743 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
746 if (pixelFormat == dm_420)
748 // #all other modes operate at 1 pixel per clock
749 else if (pixelFormat == dm_444)
751 else if (pixelFormat == dm_n422)
756 //initial transmit delay as per PPS
757 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
767 //divide by pixel per cycle to compute slice width as seen by DSC
768 w = sliceWidth / pixelsPerClock;
770 //422 mode has an additional cycle of delay
771 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
776 //main calculation for the dscce
777 ix = initalXmitDelay + 45;
782 ax = (a + 2) / 3 + D + 6 + 1;
783 L = (ax + wx - 1) / wx;
784 if ((ix % w) == 0 && P != 0)
788 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
790 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
791 pixels = Delay * 3 * pixelsPerClock;
795 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
797 unsigned int Delay = 0;
799 if (pixelFormat == dm_420) {
804 // dscc - input deserializer
806 // dscc gets pixels every other cycle
808 // dscc - input cdc fifo
810 // dscc gets pixels every other cycle
812 // dscc - cdc uncertainty
814 // dscc - output cdc fifo
816 // dscc gets pixels every other cycle
818 // dscc - cdc uncertainty
820 // dscc - output serializer
824 } else if (pixelFormat == dm_n422) {
829 // dscc - input deserializer
831 // dscc - input cdc fifo
833 // dscc - cdc uncertainty
835 // dscc - output cdc fifo
837 // dscc - cdc uncertainty
839 // dscc - output serializer
849 // dscc - input deserializer
851 // dscc - input cdc fifo
853 // dscc - cdc uncertainty
855 // dscc - output cdc fifo
857 // dscc - output serializer
859 // dscc - cdc uncertainty
868 static bool CalculatePrefetchSchedule(
869 struct display_mode_lib *mode_lib,
870 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
871 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
873 unsigned int DSCDelay,
874 double DPPCLKDelaySubtotalPlusCNVCFormater,
875 double DPPCLKDelaySCL,
876 double DPPCLKDelaySCLLBOnly,
877 double DPPCLKDelayCNVCCursor,
878 double DISPCLKDelaySubtotal,
879 unsigned int DPP_RECOUT_WIDTH,
880 enum output_format_class OutputFormat,
881 unsigned int MaxInterDCNTileRepeaters,
882 unsigned int VStartup,
883 unsigned int MaxVStartup,
884 unsigned int GPUVMPageTableLevels,
887 unsigned int HostVMMaxNonCachedPageTableLevels,
888 double HostVMMinPageSize,
889 bool DynamicMetadataEnable,
890 bool DynamicMetadataVMEnabled,
891 int DynamicMetadataLinesBeforeActiveRequired,
892 unsigned int DynamicMetadataTransmittedBytes,
893 double UrgentLatency,
894 double UrgentExtraLatency,
896 unsigned int PDEAndMetaPTEBytesFrame,
897 unsigned int MetaRowByte,
898 unsigned int PixelPTEBytesPerRow,
899 double PrefetchSourceLinesY,
900 unsigned int SwathWidthY,
902 double VInitPreFillY,
903 unsigned int MaxNumSwathY,
904 double PrefetchSourceLinesC,
905 unsigned int SwathWidthC,
907 double VInitPreFillC,
908 unsigned int MaxNumSwathC,
909 long swath_width_luma_ub,
910 long swath_width_chroma_ub,
911 unsigned int SwathHeightY,
912 unsigned int SwathHeightC,
914 bool ProgressiveToInterlaceUnitInOPP,
915 double *DSTXAfterScaler,
916 double *DSTYAfterScaler,
917 double *DestinationLinesForPrefetch,
918 double *PrefetchBandwidth,
919 double *DestinationLinesToRequestVMInVBlank,
920 double *DestinationLinesToRequestRowInVBlank,
921 double *VRatioPrefetchY,
922 double *VRatioPrefetchC,
923 double *RequiredPrefetchPixDataBWLuma,
924 double *RequiredPrefetchPixDataBWChroma,
925 bool *NotEnoughTimeForDynamicMetadata,
927 double *prefetch_vmrow_bw,
930 unsigned int *VUpdateOffsetPix,
931 double *VUpdateWidthPix,
932 double *VReadyOffsetPix)
934 bool MyError = false;
935 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
936 double DSTTotalPixelsAfterScaler = 0;
937 double LineTime = 0, Tsetup = 0;
938 double dst_y_prefetch_equ = 0;
940 double prefetch_bw_oto = 0;
943 double Tvm_oto_lines = 0;
944 double Tr0_oto_lines = 0;
945 double dst_y_prefetch_oto = 0;
946 double TimeForFetchingMetaPTE = 0;
947 double TimeForFetchingRowInVBlank = 0;
948 double LinesToRequestPrefetchPixelData = 0;
949 double HostVMInefficiencyFactor = 0;
950 unsigned int HostVMDynamicLevelsTrips = 0;
951 double trip_to_mem = 0;
952 double Tvm_trips = 0;
953 double Tr0_trips = 0;
954 double Tvm_trips_rounded = 0;
955 double Tr0_trips_rounded = 0;
957 double Tpre_rounded = 0;
958 double prefetch_bw_equ = 0;
965 if (GPUVMEnable == true && HostVMEnable == true) {
966 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
967 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
969 HostVMInefficiencyFactor = 1;
970 HostVMDynamicLevelsTrips = 0;
973 CalculateDynamicMetadataParameters(
974 MaxInterDCNTileRepeaters,
977 myPipe->DCFCLKDeepSleep,
981 DynamicMetadataTransmittedBytes,
982 DynamicMetadataLinesBeforeActiveRequired,
983 myPipe->InterlaceEnable,
984 ProgressiveToInterlaceUnitInOPP,
990 LineTime = myPipe->HTotal / myPipe->PixelClock;
991 trip_to_mem = UrgentLatency;
992 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
994 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
995 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
997 *Tdmdl = TWait + UrgentExtraLatency;
1000 if (DynamicMetadataEnable == true) {
1001 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1002 *NotEnoughTimeForDynamicMetadata = true;
1004 *NotEnoughTimeForDynamicMetadata = false;
1005 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
1006 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1007 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1008 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1009 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1012 *NotEnoughTimeForDynamicMetadata = false;
1015 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1017 if (myPipe->ScalerEnabled)
1018 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1020 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1022 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1024 DISPCLKCycles = DISPCLKDelaySubtotal;
1026 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1029 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
1032 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1034 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
1035 *DSTYAfterScaler = 1;
1037 *DSTYAfterScaler = 0;
1039 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1040 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1041 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1046 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1047 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1048 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1051 if (GPUVMPageTableLevels >= 3) {
1052 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1055 } else if (!myPipe->DCCEnable)
1058 *Tno_bw = LineTime / 4;
1060 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1061 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1063 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1064 Tsw_oto = Lsw_oto * LineTime;
1066 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1068 if (GPUVMEnable == true) {
1069 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1073 Tvm_oto = LineTime / 4.0;
1075 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1077 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1078 LineTime - Tvm_oto, LineTime / 4);
1080 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1082 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1083 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1084 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1086 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1087 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1089 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1090 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1092 dml_print("DML: LineTime: %f\n", LineTime);
1093 dml_print("DML: VStartup: %d\n", VStartup);
1094 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1095 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1096 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1097 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1098 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1099 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1100 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1101 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1102 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1103 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1104 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1106 *PrefetchBandwidth = 0;
1107 *DestinationLinesToRequestVMInVBlank = 0;
1108 *DestinationLinesToRequestRowInVBlank = 0;
1109 *VRatioPrefetchY = 0;
1110 *VRatioPrefetchC = 0;
1111 *RequiredPrefetchPixDataBWLuma = 0;
1112 if (dst_y_prefetch_equ > 1) {
1113 double PrefetchBandwidth1 = 0;
1114 double PrefetchBandwidth2 = 0;
1115 double PrefetchBandwidth3 = 0;
1116 double PrefetchBandwidth4 = 0;
1118 if (Tpre_rounded - *Tno_bw > 0)
1119 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1120 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1121 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1122 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1123 / (Tpre_rounded - *Tno_bw);
1125 PrefetchBandwidth1 = 0;
1127 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1128 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1131 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1132 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1133 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1134 swath_width_luma_ub * BytePerPixelY +
1135 PrefetchSourceLinesC * swath_width_chroma_ub *
1137 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1139 PrefetchBandwidth2 = 0;
1141 if (Tpre_rounded - Tvm_trips_rounded > 0)
1142 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1143 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1144 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1145 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1148 PrefetchBandwidth3 = 0;
1150 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1151 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1154 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1155 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1156 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1158 PrefetchBandwidth4 = 0;
1165 if (PrefetchBandwidth1 > 0) {
1166 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1167 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1176 if (PrefetchBandwidth2 > 0) {
1177 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1178 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1187 if (PrefetchBandwidth3 > 0) {
1188 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1189 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1199 prefetch_bw_equ = PrefetchBandwidth1;
1200 } else if (Case2OK) {
1201 prefetch_bw_equ = PrefetchBandwidth2;
1202 } else if (Case3OK) {
1203 prefetch_bw_equ = PrefetchBandwidth3;
1205 prefetch_bw_equ = PrefetchBandwidth4;
1208 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1210 if (prefetch_bw_equ > 0) {
1212 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1214 Tvm_equ = LineTime / 4;
1217 if ((GPUVMEnable || myPipe->DCCEnable)) {
1219 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1221 (LineTime - Tvm_equ) / 2,
1224 Tr0_equ = (LineTime - Tvm_equ) / 2;
1229 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1233 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1234 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1235 TimeForFetchingMetaPTE = Tvm_oto;
1236 TimeForFetchingRowInVBlank = Tr0_oto;
1237 *PrefetchBandwidth = prefetch_bw_oto;
1239 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1240 TimeForFetchingMetaPTE = Tvm_equ;
1241 TimeForFetchingRowInVBlank = Tr0_equ;
1242 *PrefetchBandwidth = prefetch_bw_equ;
1245 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1247 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1250 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1251 - 2 * *DestinationLinesToRequestRowInVBlank;
1253 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1255 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1256 / LinesToRequestPrefetchPixelData;
1257 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1258 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1259 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1260 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1261 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1262 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1265 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1266 *VRatioPrefetchY = 0;
1270 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1271 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1273 if ((SwathHeightC > 4)) {
1274 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1275 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1276 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1277 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1280 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1281 *VRatioPrefetchC = 0;
1285 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1286 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1289 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1290 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1291 *VRatioPrefetchY = 0;
1292 *VRatioPrefetchC = 0;
1293 *RequiredPrefetchPixDataBWLuma = 0;
1294 *RequiredPrefetchPixDataBWChroma = 0;
1297 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1298 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1299 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1300 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1301 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1302 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1303 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1304 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1305 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1309 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1313 double prefetch_vm_bw = 0;
1314 double prefetch_row_bw = 0;
1316 if (PDEAndMetaPTEBytesFrame == 0) {
1318 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1319 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1323 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1325 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1326 prefetch_row_bw = 0;
1327 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1328 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1330 prefetch_row_bw = 0;
1332 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1335 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1339 *PrefetchBandwidth = 0;
1340 TimeForFetchingMetaPTE = 0;
1341 TimeForFetchingRowInVBlank = 0;
1342 *DestinationLinesToRequestVMInVBlank = 0;
1343 *DestinationLinesToRequestRowInVBlank = 0;
1344 *DestinationLinesForPrefetch = 0;
1345 LinesToRequestPrefetchPixelData = 0;
1346 *VRatioPrefetchY = 0;
1347 *VRatioPrefetchC = 0;
1348 *RequiredPrefetchPixDataBWLuma = 0;
1349 *RequiredPrefetchPixDataBWChroma = 0;
1355 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1357 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1360 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1362 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1365 static void CalculateDCCConfiguration(
1367 bool DCCProgrammingAssumesScanDirectionUnknown,
1368 enum source_format_class SourcePixelFormat,
1369 unsigned int SurfaceWidthLuma,
1370 unsigned int SurfaceWidthChroma,
1371 unsigned int SurfaceHeightLuma,
1372 unsigned int SurfaceHeightChroma,
1373 double DETBufferSize,
1374 unsigned int RequestHeight256ByteLuma,
1375 unsigned int RequestHeight256ByteChroma,
1376 enum dm_swizzle_mode TilingFormat,
1377 unsigned int BytePerPixelY,
1378 unsigned int BytePerPixelC,
1379 double BytePerPixelDETY,
1380 double BytePerPixelDETC,
1381 enum scan_direction_class ScanOrientation,
1382 unsigned int *MaxUncompressedBlockLuma,
1383 unsigned int *MaxUncompressedBlockChroma,
1384 unsigned int *MaxCompressedBlockLuma,
1385 unsigned int *MaxCompressedBlockChroma,
1386 unsigned int *IndependentBlockLuma,
1387 unsigned int *IndependentBlockChroma)
1395 int req128_horz_wc_l = 0;
1396 int req128_horz_wc_c = 0;
1397 int req128_vert_wc_l = 0;
1398 int req128_vert_wc_c = 0;
1399 int segment_order_horz_contiguous_luma = 0;
1400 int segment_order_horz_contiguous_chroma = 0;
1401 int segment_order_vert_contiguous_luma = 0;
1402 int segment_order_vert_contiguous_chroma = 0;
1404 long full_swath_bytes_horz_wc_l = 0;
1405 long full_swath_bytes_horz_wc_c = 0;
1406 long full_swath_bytes_vert_wc_l = 0;
1407 long full_swath_bytes_vert_wc_c = 0;
1409 long swath_buf_size = 0;
1410 double detile_buf_vp_horz_limit = 0;
1411 double detile_buf_vp_vert_limit = 0;
1413 long MAS_vp_horz_limit = 0;
1414 long MAS_vp_vert_limit = 0;
1415 long max_vp_horz_width = 0;
1416 long max_vp_vert_height = 0;
1417 long eff_surf_width_l = 0;
1418 long eff_surf_width_c = 0;
1419 long eff_surf_height_l = 0;
1420 long eff_surf_height_c = 0;
1424 REQ_128BytesNonContiguous,
1425 REQ_128BytesContiguous,
1429 RequestType RequestLuma;
1430 RequestType RequestChroma;
1432 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1438 if (BytePerPixelY == 1)
1440 if (BytePerPixelC == 1)
1442 if (BytePerPixelY == 8
1443 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1444 || TilingFormat == dm_sw_64kb_s_x))
1446 if (BytePerPixelC == 8
1447 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1448 || TilingFormat == dm_sw_64kb_s_x))
1451 if (BytePerPixelC == 0) {
1452 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1453 detile_buf_vp_horz_limit = (double) swath_buf_size
1454 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1455 / (1 + horz_div_l));
1456 detile_buf_vp_vert_limit = (double) swath_buf_size
1457 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1459 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1460 detile_buf_vp_horz_limit = (double) swath_buf_size
1461 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1463 + (double) RequestHeight256ByteChroma
1464 * BytePerPixelC / (1 + horz_div_c)
1466 detile_buf_vp_vert_limit = (double) swath_buf_size
1467 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1468 + 256.0 / RequestHeight256ByteChroma
1469 / (1 + vert_div_c) / (1 + yuv420));
1472 if (SourcePixelFormat == dm_420_10) {
1473 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1474 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1477 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1478 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1480 MAS_vp_horz_limit = 5760;
1481 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1482 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1483 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1485 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1486 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1487 eff_surf_height_l = (
1488 SurfaceHeightLuma > max_vp_vert_height ?
1489 max_vp_vert_height : SurfaceHeightLuma);
1490 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1492 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1493 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1494 if (BytePerPixelC > 0) {
1495 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1497 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1499 full_swath_bytes_horz_wc_c = 0;
1500 full_swath_bytes_vert_wc_c = 0;
1503 if (SourcePixelFormat == dm_420_10) {
1504 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1505 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1506 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1507 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1510 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1511 req128_horz_wc_l = 0;
1512 req128_horz_wc_c = 0;
1513 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1514 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1516 req128_horz_wc_l = 0;
1517 req128_horz_wc_c = 1;
1518 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1519 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1521 req128_horz_wc_l = 1;
1522 req128_horz_wc_c = 0;
1524 req128_horz_wc_l = 1;
1525 req128_horz_wc_c = 1;
1528 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1529 req128_vert_wc_l = 0;
1530 req128_vert_wc_c = 0;
1531 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1532 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1534 req128_vert_wc_l = 0;
1535 req128_vert_wc_c = 1;
1536 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1537 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1539 req128_vert_wc_l = 1;
1540 req128_vert_wc_c = 0;
1542 req128_vert_wc_l = 1;
1543 req128_vert_wc_c = 1;
1546 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1547 segment_order_horz_contiguous_luma = 0;
1549 segment_order_horz_contiguous_luma = 1;
1551 if ((BytePerPixelY == 8
1552 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1553 || TilingFormat == dm_sw_64kb_d_t
1554 || TilingFormat == dm_sw_64kb_r_x))
1555 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1556 segment_order_vert_contiguous_luma = 0;
1558 segment_order_vert_contiguous_luma = 1;
1560 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1561 segment_order_horz_contiguous_chroma = 0;
1563 segment_order_horz_contiguous_chroma = 1;
1565 if ((BytePerPixelC == 8
1566 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1567 || TilingFormat == dm_sw_64kb_d_t
1568 || TilingFormat == dm_sw_64kb_r_x))
1569 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1570 segment_order_vert_contiguous_chroma = 0;
1572 segment_order_vert_contiguous_chroma = 1;
1575 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1576 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1577 RequestLuma = REQ_256Bytes;
1578 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1579 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1580 RequestLuma = REQ_128BytesNonContiguous;
1582 RequestLuma = REQ_128BytesContiguous;
1584 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1585 RequestChroma = REQ_256Bytes;
1586 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1587 || (req128_vert_wc_c == 1
1588 && segment_order_vert_contiguous_chroma == 0)) {
1589 RequestChroma = REQ_128BytesNonContiguous;
1591 RequestChroma = REQ_128BytesContiguous;
1593 } else if (ScanOrientation != dm_vert) {
1594 if (req128_horz_wc_l == 0) {
1595 RequestLuma = REQ_256Bytes;
1596 } else if (segment_order_horz_contiguous_luma == 0) {
1597 RequestLuma = REQ_128BytesNonContiguous;
1599 RequestLuma = REQ_128BytesContiguous;
1601 if (req128_horz_wc_c == 0) {
1602 RequestChroma = REQ_256Bytes;
1603 } else if (segment_order_horz_contiguous_chroma == 0) {
1604 RequestChroma = REQ_128BytesNonContiguous;
1606 RequestChroma = REQ_128BytesContiguous;
1609 if (req128_vert_wc_l == 0) {
1610 RequestLuma = REQ_256Bytes;
1611 } else if (segment_order_vert_contiguous_luma == 0) {
1612 RequestLuma = REQ_128BytesNonContiguous;
1614 RequestLuma = REQ_128BytesContiguous;
1616 if (req128_vert_wc_c == 0) {
1617 RequestChroma = REQ_256Bytes;
1618 } else if (segment_order_vert_contiguous_chroma == 0) {
1619 RequestChroma = REQ_128BytesNonContiguous;
1621 RequestChroma = REQ_128BytesContiguous;
1625 if (RequestLuma == REQ_256Bytes) {
1626 *MaxUncompressedBlockLuma = 256;
1627 *MaxCompressedBlockLuma = 256;
1628 *IndependentBlockLuma = 0;
1629 } else if (RequestLuma == REQ_128BytesContiguous) {
1630 *MaxUncompressedBlockLuma = 256;
1631 *MaxCompressedBlockLuma = 128;
1632 *IndependentBlockLuma = 128;
1634 *MaxUncompressedBlockLuma = 256;
1635 *MaxCompressedBlockLuma = 64;
1636 *IndependentBlockLuma = 64;
1639 if (RequestChroma == REQ_256Bytes) {
1640 *MaxUncompressedBlockChroma = 256;
1641 *MaxCompressedBlockChroma = 256;
1642 *IndependentBlockChroma = 0;
1643 } else if (RequestChroma == REQ_128BytesContiguous) {
1644 *MaxUncompressedBlockChroma = 256;
1645 *MaxCompressedBlockChroma = 128;
1646 *IndependentBlockChroma = 128;
1648 *MaxUncompressedBlockChroma = 256;
1649 *MaxCompressedBlockChroma = 64;
1650 *IndependentBlockChroma = 64;
1653 if (DCCEnabled != true || BytePerPixelC == 0) {
1654 *MaxUncompressedBlockChroma = 0;
1655 *MaxCompressedBlockChroma = 0;
1656 *IndependentBlockChroma = 0;
1659 if (DCCEnabled != true) {
1660 *MaxUncompressedBlockLuma = 0;
1661 *MaxCompressedBlockLuma = 0;
1662 *IndependentBlockLuma = 0;
1667 static double CalculatePrefetchSourceLines(
1668 struct display_mode_lib *mode_lib,
1672 bool ProgressiveToInterlaceUnitInOPP,
1673 unsigned int SwathHeight,
1674 unsigned int ViewportYStart,
1675 double *VInitPreFill,
1676 unsigned int *MaxNumSwath)
1678 unsigned int MaxPartialSwath = 0;
1680 if (ProgressiveToInterlaceUnitInOPP)
1681 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1683 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1685 if (!mode_lib->vba.IgnoreViewportPositioning) {
1687 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1689 if (*VInitPreFill > 1.0)
1690 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1692 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1694 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1698 if (ViewportYStart != 0)
1700 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1702 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1704 if (*VInitPreFill > 1.0)
1705 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1707 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1711 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1714 static unsigned int CalculateVMAndRowBytes(
1715 struct display_mode_lib *mode_lib,
1717 unsigned int BlockHeight256Bytes,
1718 unsigned int BlockWidth256Bytes,
1719 enum source_format_class SourcePixelFormat,
1720 unsigned int SurfaceTiling,
1721 unsigned int BytePerPixel,
1722 enum scan_direction_class ScanDirection,
1723 unsigned int SwathWidth,
1724 unsigned int ViewportHeight,
1727 unsigned int HostVMMaxNonCachedPageTableLevels,
1728 unsigned int GPUVMMinPageSize,
1729 unsigned int HostVMMinPageSize,
1730 unsigned int PTEBufferSizeInRequests,
1732 unsigned int DCCMetaPitch,
1733 unsigned int *MacroTileWidth,
1734 unsigned int *MetaRowByte,
1735 unsigned int *PixelPTEBytesPerRow,
1736 bool *PTEBufferSizeNotExceeded,
1737 unsigned int *dpte_row_width_ub,
1738 unsigned int *dpte_row_height,
1739 unsigned int *MetaRequestWidth,
1740 unsigned int *MetaRequestHeight,
1741 unsigned int *meta_row_width,
1742 unsigned int *meta_row_height,
1743 unsigned int *vm_group_bytes,
1744 unsigned int *dpte_group_bytes,
1745 unsigned int *PixelPTEReqWidth,
1746 unsigned int *PixelPTEReqHeight,
1747 unsigned int *PTERequestSize,
1748 unsigned int *DPDE0BytesFrame,
1749 unsigned int *MetaPTEBytesFrame)
1751 unsigned int MPDEBytesFrame = 0;
1752 unsigned int DCCMetaSurfaceBytes = 0;
1753 unsigned int MacroTileSizeBytes = 0;
1754 unsigned int MacroTileHeight = 0;
1755 unsigned int ExtraDPDEBytesFrame = 0;
1756 unsigned int PDEAndMetaPTEBytesFrame = 0;
1757 unsigned int PixelPTEReqHeightPTEs = 0;
1758 unsigned int HostVMDynamicLevels = 0;
1760 double FractionOfPTEReturnDrop;
1762 if (GPUVMEnable == true && HostVMEnable == true) {
1763 if (HostVMMinPageSize < 2048) {
1764 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1765 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1766 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1768 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1772 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1773 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1774 if (ScanDirection != dm_vert) {
1775 *meta_row_height = *MetaRequestHeight;
1776 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1777 + *MetaRequestWidth;
1778 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1780 *meta_row_height = *MetaRequestWidth;
1781 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1782 + *MetaRequestHeight;
1783 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1785 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1786 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1787 if (GPUVMEnable == true) {
1788 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1789 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1791 *MetaPTEBytesFrame = 0;
1795 if (DCCEnable != true) {
1796 *MetaPTEBytesFrame = 0;
1801 if (SurfaceTiling == dm_sw_linear) {
1802 MacroTileSizeBytes = 256;
1803 MacroTileHeight = BlockHeight256Bytes;
1805 MacroTileSizeBytes = 65536;
1806 MacroTileHeight = 16 * BlockHeight256Bytes;
1808 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1810 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1811 if (ScanDirection != dm_vert) {
1812 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1814 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1816 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1818 *DPDE0BytesFrame = 0;
1819 ExtraDPDEBytesFrame = 0;
1822 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1823 + ExtraDPDEBytesFrame;
1825 if (HostVMEnable == true) {
1826 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1829 if (SurfaceTiling == dm_sw_linear) {
1830 PixelPTEReqHeightPTEs = 1;
1831 *PixelPTEReqHeight = 1;
1832 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1833 *PTERequestSize = 64;
1834 FractionOfPTEReturnDrop = 0;
1835 } else if (MacroTileSizeBytes == 4096) {
1836 PixelPTEReqHeightPTEs = 1;
1837 *PixelPTEReqHeight = MacroTileHeight;
1838 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1839 *PTERequestSize = 64;
1840 if (ScanDirection != dm_vert)
1841 FractionOfPTEReturnDrop = 0;
1843 FractionOfPTEReturnDrop = 7 / 8;
1844 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1845 PixelPTEReqHeightPTEs = 16;
1846 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1847 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1848 *PTERequestSize = 128;
1849 FractionOfPTEReturnDrop = 0;
1851 PixelPTEReqHeightPTEs = 1;
1852 *PixelPTEReqHeight = MacroTileHeight;
1853 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1854 *PTERequestSize = 64;
1855 FractionOfPTEReturnDrop = 0;
1858 if (SurfaceTiling == dm_sw_linear) {
1859 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1860 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1861 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1862 } else if (ScanDirection != dm_vert) {
1863 *dpte_row_height = *PixelPTEReqHeight;
1864 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1865 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1867 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1868 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1869 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1871 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1872 <= 64 * PTEBufferSizeInRequests) {
1873 *PTEBufferSizeNotExceeded = true;
1875 *PTEBufferSizeNotExceeded = false;
1878 if (GPUVMEnable != true) {
1879 *PixelPTEBytesPerRow = 0;
1880 *PTEBufferSizeNotExceeded = true;
1882 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1884 if (HostVMEnable == true) {
1885 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1888 if (HostVMEnable == true) {
1889 *vm_group_bytes = 512;
1890 *dpte_group_bytes = 512;
1891 } else if (GPUVMEnable == true) {
1892 *vm_group_bytes = 2048;
1893 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1894 *dpte_group_bytes = 512;
1896 *dpte_group_bytes = 2048;
1899 *vm_group_bytes = 0;
1900 *dpte_group_bytes = 0;
1903 return PDEAndMetaPTEBytesFrame;
1906 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1907 struct display_mode_lib *mode_lib)
1909 struct vba_vars_st *v = &mode_lib->vba;
1911 long ReorderBytes = 0;
1912 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1913 double MaxTotalRDBandwidth = 0;
1914 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1915 bool DestinationLineTimesForPrefetchLessThan2 = false;
1916 bool VRatioPrefetchMoreThan4 = false;
1919 v->WritebackDISPCLK = 0.0;
1920 v->DISPCLKWithRamping = 0;
1921 v->DISPCLKWithoutRamping = 0;
1922 v->GlobalDPPCLK = 0.0;
1923 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1924 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1925 v->ReturnBusWidth * v->DCFCLK,
1926 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1927 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1928 if (v->HostVMEnable != true) {
1929 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1931 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1933 /* End DAL custom code */
1935 // DISPCLK and DPPCLK Calculation
1937 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1938 if (v->WritebackEnable[k]) {
1939 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1940 dml30_CalculateWriteBackDISPCLK(
1941 v->WritebackPixelFormat[k],
1943 v->WritebackHRatio[k],
1944 v->WritebackVRatio[k],
1945 v->WritebackHTaps[k],
1946 v->WritebackVTaps[k],
1947 v->WritebackSourceWidth[k],
1948 v->WritebackDestinationWidth[k],
1950 v->WritebackLineBufferSize));
1954 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1955 if (v->HRatio[k] > 1) {
1956 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1957 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1959 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1960 v->MaxDCHUBToPSCLThroughput,
1961 v->MaxPSCLToLBThroughput);
1964 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1965 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1966 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1968 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1969 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1970 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1973 if ((v->SourcePixelFormat[k] != dm_420_8
1974 && v->SourcePixelFormat[k] != dm_420_10
1975 && v->SourcePixelFormat[k] != dm_420_12
1976 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1977 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1978 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1980 if (v->HRatioChroma[k] > 1) {
1981 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1982 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1984 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1985 v->MaxDCHUBToPSCLThroughput,
1986 v->MaxPSCLToLBThroughput);
1988 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
1989 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
1990 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
1992 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
1993 && v->DPPCLKUsingSingleDPPChroma
1994 < 2 * v->PixelClock[k]) {
1995 v->DPPCLKUsingSingleDPPChroma = 2
1999 v->DPPCLKUsingSingleDPP[k] = dml_max(
2000 v->DPPCLKUsingSingleDPPLuma,
2001 v->DPPCLKUsingSingleDPPChroma);
2005 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2006 if (v->BlendingAndTiming[k] != k)
2008 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2009 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2010 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2011 * (1 + v->DISPCLKRampingMargin / 100));
2012 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2013 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2014 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2015 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2016 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2017 * (1 + v->DISPCLKRampingMargin / 100));
2018 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2019 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2021 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2022 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2023 * (1 + v->DISPCLKRampingMargin / 100));
2024 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2025 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2029 v->DISPCLKWithRamping = dml_max(
2030 v->DISPCLKWithRamping,
2031 v->WritebackDISPCLK);
2032 v->DISPCLKWithoutRamping = dml_max(
2033 v->DISPCLKWithoutRamping,
2034 v->WritebackDISPCLK);
2036 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2037 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2038 v->DISPCLKWithRamping,
2039 v->DISPCLKDPPCLKVCOSpeed);
2040 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2041 v->DISPCLKWithoutRamping,
2042 v->DISPCLKDPPCLKVCOSpeed);
2043 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2044 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
2045 v->DISPCLKDPPCLKVCOSpeed);
2046 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
2047 > v->MaxDispclkRoundedToDFSGranularity) {
2048 v->DISPCLK_calculated =
2049 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2050 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
2051 > v->MaxDispclkRoundedToDFSGranularity) {
2052 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2054 v->DISPCLK_calculated =
2055 v->DISPCLKWithRampingRoundedToDFSGranularity;
2057 v->DISPCLK = v->DISPCLK_calculated;
2058 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2060 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2061 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
2063 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2064 v->GlobalDPPCLK = dml_max(
2066 v->DPPCLK_calculated[k]);
2068 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2070 v->DISPCLKDPPCLKVCOSpeed);
2071 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2072 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2074 v->DPPCLK_calculated[k] * 255.0
2077 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2078 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2081 // Urgent and B P-State/DRAM Clock Change Watermark
2082 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2083 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2085 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2086 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2087 v->SourcePixelFormat[k],
2088 v->SurfaceTiling[k],
2089 &v->BytePerPixelY[k],
2090 &v->BytePerPixelC[k],
2091 &v->BytePerPixelDETY[k],
2092 &v->BytePerPixelDETC[k],
2093 &v->BlockHeight256BytesY[k],
2094 &v->BlockHeight256BytesC[k],
2095 &v->BlockWidth256BytesY[k],
2096 &v->BlockWidth256BytesC[k]);
2099 CalculateSwathWidth(
2101 v->NumberOfActivePlanes,
2102 v->SourcePixelFormat,
2110 v->ODMCombineEnabled,
2113 v->BlockHeight256BytesY,
2114 v->BlockHeight256BytesC,
2115 v->BlockWidth256BytesY,
2116 v->BlockWidth256BytesC,
2117 v->BlendingAndTiming,
2121 v->SwathWidthSingleDPPY,
2122 v->SwathWidthSingleDPPC,
2127 v->swath_width_luma_ub,
2128 v->swath_width_chroma_ub);
2131 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2132 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2133 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2134 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2138 // DCFCLK Deep Sleep
2139 CalculateDCFCLKDeepSleep(
2141 v->NumberOfActivePlanes,
2152 v->PSCL_THROUGHPUT_LUMA,
2153 v->PSCL_THROUGHPUT_CHROMA,
2155 v->ReadBandwidthPlaneLuma,
2156 v->ReadBandwidthPlaneChroma,
2158 &v->DCFCLKDeepSleep);
2161 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2162 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2163 v->DSCCLK_calculated[k] = 0.0;
2165 if (v->OutputFormat[k] == dm_420)
2166 v->DSCFormatFactor = 2;
2167 else if (v->OutputFormat[k] == dm_444)
2168 v->DSCFormatFactor = 1;
2169 else if (v->OutputFormat[k] == dm_n422)
2170 v->DSCFormatFactor = 2;
2172 v->DSCFormatFactor = 1;
2173 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2174 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2175 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2176 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2177 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2178 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2180 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2181 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2186 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2187 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2189 if (v->DSCEnabled[k] && BPP != 0) {
2190 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2191 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2193 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2194 v->NumberOfDSCSlices[k],
2197 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2198 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2199 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2201 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2202 v->NumberOfDSCSlices[k] / 2.0,
2205 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2207 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2209 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2210 v->NumberOfDSCSlices[k] / 4.0,
2213 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2215 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2221 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2222 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2223 if (j != k && v->BlendingAndTiming[k] == j
2224 && v->DSCEnabled[j])
2225 v->DSCDelay[k] = v->DSCDelay[j];
2228 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2229 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2230 unsigned int PixelPTEBytesPerRowY = 0;
2231 unsigned int MetaRowByteY = 0;
2232 unsigned int MetaRowByteC = 0;
2233 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2234 unsigned int PixelPTEBytesPerRowC = 0;
2235 bool PTEBufferSizeNotExceededY = 0;
2236 bool PTEBufferSizeNotExceededC = 0;
2239 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2240 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2241 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2242 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2244 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2245 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2248 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2251 v->BlockHeight256BytesC[k],
2252 v->BlockWidth256BytesC[k],
2253 v->SourcePixelFormat[k],
2254 v->SurfaceTiling[k],
2255 v->BytePerPixelC[k],
2258 v->ViewportHeightChroma[k],
2261 v->HostVMMaxNonCachedPageTableLevels,
2262 v->GPUVMMinPageSize,
2263 v->HostVMMinPageSize,
2264 v->PTEBufferSizeInRequestsForChroma,
2266 v->DCCMetaPitchC[k],
2267 &v->MacroTileWidthC[k],
2269 &PixelPTEBytesPerRowC,
2270 &PTEBufferSizeNotExceededC,
2271 &v->dpte_row_width_chroma_ub[k],
2272 &v->dpte_row_height_chroma[k],
2273 &v->meta_req_width_chroma[k],
2274 &v->meta_req_height_chroma[k],
2275 &v->meta_row_width_chroma[k],
2276 &v->meta_row_height_chroma[k],
2279 &v->PixelPTEReqWidthC[k],
2280 &v->PixelPTEReqHeightC[k],
2281 &v->PTERequestSizeC[k],
2282 &v->dpde0_bytes_per_frame_ub_c[k],
2283 &v->meta_pte_bytes_per_frame_ub_c[k]);
2285 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2290 v->ProgressiveToInterlaceUnitInOPP,
2292 v->ViewportYStartC[k],
2293 &v->VInitPreFillC[k],
2294 &v->MaxNumSwathC[k]);
2296 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2297 v->PTEBufferSizeInRequestsForChroma = 0;
2298 PixelPTEBytesPerRowC = 0;
2299 PDEAndMetaPTEBytesFrameC = 0;
2301 v->MaxNumSwathC[k] = 0;
2302 v->PrefetchSourceLinesC[k] = 0;
2305 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2308 v->BlockHeight256BytesY[k],
2309 v->BlockWidth256BytesY[k],
2310 v->SourcePixelFormat[k],
2311 v->SurfaceTiling[k],
2312 v->BytePerPixelY[k],
2315 v->ViewportHeight[k],
2318 v->HostVMMaxNonCachedPageTableLevels,
2319 v->GPUVMMinPageSize,
2320 v->HostVMMinPageSize,
2321 v->PTEBufferSizeInRequestsForLuma,
2323 v->DCCMetaPitchY[k],
2324 &v->MacroTileWidthY[k],
2326 &PixelPTEBytesPerRowY,
2327 &PTEBufferSizeNotExceededY,
2328 &v->dpte_row_width_luma_ub[k],
2329 &v->dpte_row_height[k],
2330 &v->meta_req_width[k],
2331 &v->meta_req_height[k],
2332 &v->meta_row_width[k],
2333 &v->meta_row_height[k],
2334 &v->vm_group_bytes[k],
2335 &v->dpte_group_bytes[k],
2336 &v->PixelPTEReqWidthY[k],
2337 &v->PixelPTEReqHeightY[k],
2338 &v->PTERequestSizeY[k],
2339 &v->dpde0_bytes_per_frame_ub_l[k],
2340 &v->meta_pte_bytes_per_frame_ub_l[k]);
2342 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2347 v->ProgressiveToInterlaceUnitInOPP,
2349 v->ViewportYStartY[k],
2350 &v->VInitPreFillY[k],
2351 &v->MaxNumSwathY[k]);
2352 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2353 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2354 + PDEAndMetaPTEBytesFrameC;
2355 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2357 CalculateRowBandwidth(
2359 v->SourcePixelFormat[k],
2363 v->HTotal[k] / v->PixelClock[k],
2366 v->meta_row_height[k],
2367 v->meta_row_height_chroma[k],
2368 PixelPTEBytesPerRowY,
2369 PixelPTEBytesPerRowC,
2370 v->dpte_row_height[k],
2371 v->dpte_row_height_chroma[k],
2373 &v->dpte_row_bw[k]);
2376 v->TotalDCCActiveDPP = 0;
2377 v->TotalActiveDPP = 0;
2378 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2379 v->TotalActiveDPP = v->TotalActiveDPP
2380 + v->DPPPerPlane[k];
2381 if (v->DCCEnable[k])
2382 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2383 + v->DPPPerPlane[k];
2387 ReorderBytes = v->NumberOfChannels * dml_max3(
2388 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2389 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2390 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2392 v->UrgentExtraLatency = CalculateExtraLatency(
2393 v->RoundTripPingLatencyCycles,
2397 v->PixelChunkSizeInKByte,
2398 v->TotalDCCActiveDPP,
2403 v->NumberOfActivePlanes,
2405 v->dpte_group_bytes,
2406 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2407 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2408 v->HostVMMinPageSize,
2409 v->HostVMMaxNonCachedPageTableLevels);
2411 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2413 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2414 if (v->BlendingAndTiming[k] == k) {
2415 if (v->WritebackEnable[k] == true) {
2416 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2417 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2418 v->WritebackHRatio[k],
2419 v->WritebackVRatio[k],
2420 v->WritebackVTaps[k],
2421 v->WritebackDestinationWidth[k],
2422 v->WritebackDestinationHeight[k],
2423 v->WritebackSourceHeight[k],
2424 v->HTotal[k]) / v->DISPCLK;
2426 v->WritebackDelay[v->VoltageLevel][k] = 0;
2427 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2428 if (v->BlendingAndTiming[j] == k
2429 && v->WritebackEnable[j] == true) {
2430 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2431 v->WritebackLatency + CalculateWriteBackDelay(
2432 v->WritebackPixelFormat[j],
2433 v->WritebackHRatio[j],
2434 v->WritebackVRatio[j],
2435 v->WritebackVTaps[j],
2436 v->WritebackDestinationWidth[j],
2437 v->WritebackDestinationHeight[j],
2438 v->WritebackSourceHeight[j],
2439 v->HTotal[k]) / v->DISPCLK);
2445 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2446 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2447 if (v->BlendingAndTiming[k] == j)
2448 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2450 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2451 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2454 v->MaximumMaxVStartupLines = 0;
2455 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2456 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2458 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2459 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2461 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2463 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2466 v->FractionOfUrgentBandwidth = 0.0;
2467 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2469 v->VStartupLines = 13;
2472 MaxTotalRDBandwidth = 0;
2473 MaxTotalRDBandwidthNoUrgentBurst = 0;
2474 DestinationLineTimesForPrefetchLessThan2 = false;
2475 VRatioPrefetchMoreThan4 = false;
2476 TWait = CalculateTWait(
2478 v->FinalDRAMClockChangeLatency,
2480 v->SREnterPlusExitTime);
2482 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2483 Pipe myPipe = { 0 };
2485 myPipe.DPPCLK = v->DPPCLK[k];
2486 myPipe.DISPCLK = v->DISPCLK;
2487 myPipe.PixelClock = v->PixelClock[k];
2488 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2489 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2490 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2491 myPipe.SourceScan = v->SourceScan[k];
2492 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2493 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2494 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2495 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2496 myPipe.InterlaceEnable = v->Interlace[k];
2497 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2498 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2499 myPipe.HTotal = v->HTotal[k];
2500 myPipe.DCCEnable = v->DCCEnable[k];
2501 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2503 v->ErrorResult[k] = CalculatePrefetchSchedule(
2505 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2506 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2509 v->DPPCLKDelaySubtotal
2510 + v->DPPCLKDelayCNVCFormater,
2512 v->DPPCLKDelaySCLLBOnly,
2513 v->DPPCLKDelayCNVCCursor,
2514 v->DISPCLKDelaySubtotal,
2515 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2517 v->MaxInterDCNTileRepeaters,
2518 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2519 v->MaxVStartupLines[k],
2520 v->GPUVMMaxPageTableLevels,
2523 v->HostVMMaxNonCachedPageTableLevels,
2524 v->HostVMMinPageSize,
2525 v->DynamicMetadataEnable[k],
2526 v->DynamicMetadataVMEnabled,
2527 v->DynamicMetadataLinesBeforeActiveRequired[k],
2528 v->DynamicMetadataTransmittedBytes[k],
2530 v->UrgentExtraLatency,
2532 v->PDEAndMetaPTEBytesFrame[k],
2534 v->PixelPTEBytesPerRow[k],
2535 v->PrefetchSourceLinesY[k],
2537 v->BytePerPixelY[k],
2538 v->VInitPreFillY[k],
2540 v->PrefetchSourceLinesC[k],
2542 v->BytePerPixelC[k],
2543 v->VInitPreFillC[k],
2545 v->swath_width_luma_ub[k],
2546 v->swath_width_chroma_ub[k],
2550 v->ProgressiveToInterlaceUnitInOPP,
2551 &v->DSTXAfterScaler[k],
2552 &v->DSTYAfterScaler[k],
2553 &v->DestinationLinesForPrefetch[k],
2554 &v->PrefetchBandwidth[k],
2555 &v->DestinationLinesToRequestVMInVBlank[k],
2556 &v->DestinationLinesToRequestRowInVBlank[k],
2557 &v->VRatioPrefetchY[k],
2558 &v->VRatioPrefetchC[k],
2559 &v->RequiredPrefetchPixDataBWLuma[k],
2560 &v->RequiredPrefetchPixDataBWChroma[k],
2561 &v->NotEnoughTimeForDynamicMetadata[k],
2563 &v->prefetch_vmrow_bw[k],
2566 &v->VUpdateOffsetPix[k],
2567 &v->VUpdateWidthPix[k],
2568 &v->VReadyOffsetPix[k]);
2569 if (v->BlendingAndTiming[k] == k) {
2570 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2571 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2572 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2573 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2574 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2576 int x = v->BlendingAndTiming[k];
2577 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2578 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2579 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2580 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2581 if (!v->MaxVStartupLines[x])
2582 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2583 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2587 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2588 v->NotEnoughUrgentLatencyHidingPre = false;
2590 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2591 v->cursor_bw[k] = v->NumberOfCursors[k]
2592 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2594 / (v->HTotal[k] / v->PixelClock[k])
2596 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2597 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2599 / (v->HTotal[k] / v->PixelClock[k])
2600 * v->VRatioPrefetchY[k];
2602 CalculateUrgentBurstFactor(
2603 v->swath_width_luma_ub[k],
2604 v->swath_width_chroma_ub[k],
2605 v->DETBufferSizeInKByte[0],
2608 v->HTotal[k] / v->PixelClock[k],
2610 v->CursorBufferSize,
2611 v->CursorWidth[k][0],
2615 v->BytePerPixelDETY[k],
2616 v->BytePerPixelDETC[k],
2617 v->DETBufferSizeY[k],
2618 v->DETBufferSizeC[k],
2619 &v->UrgentBurstFactorCursor[k],
2620 &v->UrgentBurstFactorLuma[k],
2621 &v->UrgentBurstFactorChroma[k],
2622 &v->NoUrgentLatencyHiding[k]);
2624 CalculateUrgentBurstFactor(
2625 v->swath_width_luma_ub[k],
2626 v->swath_width_chroma_ub[k],
2627 v->DETBufferSizeInKByte[0],
2630 v->HTotal[k] / v->PixelClock[k],
2632 v->CursorBufferSize,
2633 v->CursorWidth[k][0],
2635 v->VRatioPrefetchY[k],
2636 v->VRatioPrefetchC[k],
2637 v->BytePerPixelDETY[k],
2638 v->BytePerPixelDETC[k],
2639 v->DETBufferSizeY[k],
2640 v->DETBufferSizeC[k],
2641 &v->UrgentBurstFactorCursorPre[k],
2642 &v->UrgentBurstFactorLumaPre[k],
2643 &v->UrgentBurstFactorChromaPre[k],
2644 &v->NoUrgentLatencyHidingPre[k]);
2646 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2647 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2648 v->ReadBandwidthPlaneLuma[k] *
2649 v->UrgentBurstFactorLuma[k] +
2650 v->ReadBandwidthPlaneChroma[k] *
2651 v->UrgentBurstFactorChroma[k] +
2653 v->UrgentBurstFactorCursor[k] +
2654 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2655 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2656 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2657 v->UrgentBurstFactorCursorPre[k]);
2659 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2660 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2661 v->ReadBandwidthPlaneLuma[k] +
2662 v->ReadBandwidthPlaneChroma[k] +
2664 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2665 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2667 if (v->DestinationLinesForPrefetch[k] < 2)
2668 DestinationLineTimesForPrefetchLessThan2 = true;
2669 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2670 VRatioPrefetchMoreThan4 = true;
2671 if (v->NoUrgentLatencyHiding[k] == true)
2672 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2674 if (v->NoUrgentLatencyHidingPre[k] == true)
2675 v->NotEnoughUrgentLatencyHidingPre = true;
2677 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2680 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2681 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2682 && !DestinationLineTimesForPrefetchLessThan2)
2683 v->PrefetchModeSupported = true;
2685 v->PrefetchModeSupported = false;
2686 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2687 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2688 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2689 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2692 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2693 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2694 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2695 v->BandwidthAvailableForImmediateFlip =
2696 v->BandwidthAvailableForImmediateFlip
2698 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2699 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2700 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2701 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2702 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2703 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2706 v->TotImmediateFlipBytes = 0;
2707 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2708 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2710 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2711 CalculateFlipSchedule(
2713 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2714 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2715 v->UrgentExtraLatency,
2717 v->GPUVMMaxPageTableLevels,
2719 v->HostVMMaxNonCachedPageTableLevels,
2721 v->HostVMMinPageSize,
2722 v->PDEAndMetaPTEBytesFrame[k],
2724 v->PixelPTEBytesPerRow[k],
2725 v->BandwidthAvailableForImmediateFlip,
2726 v->TotImmediateFlipBytes,
2727 v->SourcePixelFormat[k],
2728 v->HTotal[k] / v->PixelClock[k],
2733 v->dpte_row_height[k],
2734 v->meta_row_height[k],
2735 v->dpte_row_height_chroma[k],
2736 v->meta_row_height_chroma[k],
2737 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2738 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2739 &v->final_flip_bw[k],
2740 &v->ImmediateFlipSupportedForPipe[k]);
2742 v->total_dcn_read_bw_with_flip = 0.0;
2743 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2744 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2745 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2746 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2747 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2748 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2749 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2750 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2751 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2752 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2753 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2754 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2755 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2756 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2757 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2758 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2759 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2762 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2764 v->ImmediateFlipSupported = true;
2765 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2766 v->ImmediateFlipSupported = false;
2767 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2769 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2770 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2771 v->ImmediateFlipSupported = false;
2775 v->ImmediateFlipSupported = false;
2778 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2779 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2780 v->PrefetchModeSupported = false;
2781 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2785 v->VStartupLines = v->VStartupLines + 1;
2786 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2787 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2788 v->ImmediateFlipSupported)) ? true : false;
2789 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2790 ASSERT(v->PrefetchModeSupported);
2792 //Watermarks and NB P-State/DRAM Clock Change Support
2794 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2795 CalculateWatermarksAndDRAMSpeedChangeSupport(
2798 v->NumberOfActivePlanes,
2799 v->MaxLineBufferLines,
2801 v->DPPOutputBufferPixels,
2802 v->DETBufferSizeInKByte[0],
2803 v->WritebackInterfaceBufferSize,
2807 v->dpte_group_bytes,
2810 v->UrgentExtraLatency,
2811 v->WritebackLatency,
2812 v->WritebackChunkSize,
2814 v->FinalDRAMClockChangeLatency,
2816 v->SREnterPlusExitTime,
2836 v->BlendingAndTiming,
2837 v->BytePerPixelDETY,
2838 v->BytePerPixelDETC,
2842 v->WritebackPixelFormat,
2843 v->WritebackDestinationWidth,
2844 v->WritebackDestinationHeight,
2845 v->WritebackSourceHeight,
2846 &DRAMClockChangeSupport,
2847 &v->UrgentWatermark,
2848 &v->WritebackUrgentWatermark,
2849 &v->DRAMClockChangeWatermark,
2850 &v->WritebackDRAMClockChangeWatermark,
2851 &v->StutterExitWatermark,
2852 &v->StutterEnterPlusExitWatermark,
2853 &v->MinActiveDRAMClockChangeLatencySupported);
2855 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2856 if (v->WritebackEnable[k] == true) {
2857 if (v->BlendingAndTiming[k] == k) {
2858 v->ThisVStartup = v->VStartup[k];
2860 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2861 if (v->BlendingAndTiming[k] == j) {
2862 v->ThisVStartup = v->VStartup[j];
2866 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2867 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2869 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2876 //Display Pipeline Delivery Time in Prefetch, Groups
2877 CalculatePixelDeliveryTimes(
2878 v->NumberOfActivePlanes,
2883 v->swath_width_luma_ub,
2884 v->swath_width_chroma_ub,
2889 v->PSCL_THROUGHPUT_LUMA,
2890 v->PSCL_THROUGHPUT_CHROMA,
2897 v->BlockWidth256BytesY,
2898 v->BlockHeight256BytesY,
2899 v->BlockWidth256BytesC,
2900 v->BlockHeight256BytesC,
2901 v->DisplayPipeLineDeliveryTimeLuma,
2902 v->DisplayPipeLineDeliveryTimeChroma,
2903 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2904 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2905 v->DisplayPipeRequestDeliveryTimeLuma,
2906 v->DisplayPipeRequestDeliveryTimeChroma,
2907 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2908 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2909 v->CursorRequestDeliveryTime,
2910 v->CursorRequestDeliveryTimePrefetch);
2912 CalculateMetaAndPTETimes(
2913 v->NumberOfActivePlanes,
2916 v->MinMetaChunkSizeBytes,
2920 v->DestinationLinesToRequestRowInVBlank,
2921 v->DestinationLinesToRequestRowInImmediateFlip,
2928 v->dpte_row_height_chroma,
2930 v->meta_row_width_chroma,
2932 v->meta_row_height_chroma,
2934 v->meta_req_width_chroma,
2936 v->meta_req_height_chroma,
2937 v->dpte_group_bytes,
2940 v->PixelPTEReqWidthY,
2941 v->PixelPTEReqHeightY,
2942 v->PixelPTEReqWidthC,
2943 v->PixelPTEReqHeightC,
2944 v->dpte_row_width_luma_ub,
2945 v->dpte_row_width_chroma_ub,
2946 v->DST_Y_PER_PTE_ROW_NOM_L,
2947 v->DST_Y_PER_PTE_ROW_NOM_C,
2948 v->DST_Y_PER_META_ROW_NOM_L,
2949 v->DST_Y_PER_META_ROW_NOM_C,
2950 v->TimePerMetaChunkNominal,
2951 v->TimePerChromaMetaChunkNominal,
2952 v->TimePerMetaChunkVBlank,
2953 v->TimePerChromaMetaChunkVBlank,
2954 v->TimePerMetaChunkFlip,
2955 v->TimePerChromaMetaChunkFlip,
2956 v->time_per_pte_group_nom_luma,
2957 v->time_per_pte_group_vblank_luma,
2958 v->time_per_pte_group_flip_luma,
2959 v->time_per_pte_group_nom_chroma,
2960 v->time_per_pte_group_vblank_chroma,
2961 v->time_per_pte_group_flip_chroma);
2963 CalculateVMGroupAndRequestTimes(
2964 v->NumberOfActivePlanes,
2966 v->GPUVMMaxPageTableLevels,
2969 v->DestinationLinesToRequestVMInVBlank,
2970 v->DestinationLinesToRequestVMInImmediateFlip,
2973 v->dpte_row_width_luma_ub,
2974 v->dpte_row_width_chroma_ub,
2976 v->dpde0_bytes_per_frame_ub_l,
2977 v->dpde0_bytes_per_frame_ub_c,
2978 v->meta_pte_bytes_per_frame_ub_l,
2979 v->meta_pte_bytes_per_frame_ub_c,
2980 v->TimePerVMGroupVBlank,
2981 v->TimePerVMGroupFlip,
2982 v->TimePerVMRequestVBlank,
2983 v->TimePerVMRequestFlip);
2987 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2988 if (PrefetchMode == 0) {
2989 v->AllowDRAMClockChangeDuringVBlank[k] = true;
2990 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2991 v->MinTTUVBlank[k] = dml_max(
2992 v->DRAMClockChangeWatermark,
2994 v->StutterEnterPlusExitWatermark,
2995 v->UrgentWatermark));
2996 } else if (PrefetchMode == 1) {
2997 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2998 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2999 v->MinTTUVBlank[k] = dml_max(
3000 v->StutterEnterPlusExitWatermark,
3001 v->UrgentWatermark);
3003 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3004 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3005 v->MinTTUVBlank[k] = v->UrgentWatermark;
3007 if (!v->DynamicMetadataEnable[k])
3008 v->MinTTUVBlank[k] = v->TCalc
3009 + v->MinTTUVBlank[k];
3012 // DCC Configuration
3014 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3015 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3016 v->SourcePixelFormat[k],
3017 v->SurfaceWidthY[k],
3018 v->SurfaceWidthC[k],
3019 v->SurfaceHeightY[k],
3020 v->SurfaceHeightC[k],
3021 v->DETBufferSizeInKByte[0] * 1024,
3022 v->BlockHeight256BytesY[k],
3023 v->BlockHeight256BytesC[k],
3024 v->SurfaceTiling[k],
3025 v->BytePerPixelY[k],
3026 v->BytePerPixelC[k],
3027 v->BytePerPixelDETY[k],
3028 v->BytePerPixelDETC[k],
3030 &v->DCCYMaxUncompressedBlock[k],
3031 &v->DCCCMaxUncompressedBlock[k],
3032 &v->DCCYMaxCompressedBlock[k],
3033 &v->DCCCMaxCompressedBlock[k],
3034 &v->DCCYIndependentBlock[k],
3035 &v->DCCCIndependentBlock[k]);
3039 //Maximum Bandwidth Used
3040 v->TotalDataReadBandwidth = 0;
3041 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3042 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
3043 + v->ReadBandwidthPlaneLuma[k]
3044 + v->ReadBandwidthPlaneChroma[k];
3049 v->VStartupMargin = 0;
3050 v->FirstMainPlane = true;
3051 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3052 if (v->BlendingAndTiming[k] == k) {
3053 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
3055 if (v->FirstMainPlane == true) {
3056 v->VStartupMargin = margin;
3057 v->FirstMainPlane = false;
3059 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
3064 // Stutter Efficiency
3065 CalculateStutterEfficiency(
3066 v->NumberOfActivePlanes,
3067 v->ROBBufferSizeInKByte,
3068 v->TotalDataReadBandwidth,
3072 v->SynchronizedVBlank,
3076 v->BytePerPixelDETY,
3087 v->BlockHeight256BytesY,
3088 v->BlockWidth256BytesY,
3089 v->BlockHeight256BytesC,
3090 v->BlockWidth256BytesC,
3091 v->DCCYMaxUncompressedBlock,
3092 v->DCCCMaxUncompressedBlock,
3096 v->ReadBandwidthPlaneLuma,
3097 v->ReadBandwidthPlaneChroma,
3100 &v->StutterEfficiencyNotIncludingVBlank,
3101 &v->StutterEfficiency,
3105 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3107 // Display Pipe Configuration
3108 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3109 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3110 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3111 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3112 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3113 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3114 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3115 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3116 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3117 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3118 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3119 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3120 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3121 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3122 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3123 bool dummysinglestring = 0;
3126 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3128 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3129 mode_lib->vba.SourcePixelFormat[k],
3130 mode_lib->vba.SurfaceTiling[k],
3135 &Read256BytesBlockHeightY[k],
3136 &Read256BytesBlockHeightC[k],
3137 &Read256BytesBlockWidthY[k],
3138 &Read256BytesBlockWidthC[k]);
3140 CalculateSwathAndDETConfiguration(
3142 mode_lib->vba.NumberOfActivePlanes,
3143 mode_lib->vba.DETBufferSizeInKByte[0],
3146 mode_lib->vba.SourceScan,
3147 mode_lib->vba.SourcePixelFormat,
3148 mode_lib->vba.SurfaceTiling,
3149 mode_lib->vba.ViewportWidth,
3150 mode_lib->vba.ViewportHeight,
3151 mode_lib->vba.SurfaceWidthY,
3152 mode_lib->vba.SurfaceWidthC,
3153 mode_lib->vba.SurfaceHeightY,
3154 mode_lib->vba.SurfaceHeightC,
3155 Read256BytesBlockHeightY,
3156 Read256BytesBlockHeightC,
3157 Read256BytesBlockWidthY,
3158 Read256BytesBlockWidthC,
3159 mode_lib->vba.ODMCombineEnabled,
3160 mode_lib->vba.BlendingAndTiming,
3165 mode_lib->vba.HActive,
3166 mode_lib->vba.HRatio,
3167 mode_lib->vba.HRatioChroma,
3168 mode_lib->vba.DPPPerPlane,
3173 mode_lib->vba.SwathHeightY,
3174 mode_lib->vba.SwathHeightC,
3175 mode_lib->vba.DETBufferSizeY,
3176 mode_lib->vba.DETBufferSizeC,
3178 &dummysinglestring);
3181 void dml30_CalculateBytePerPixelAnd256BBlockSizes(
3182 enum source_format_class SourcePixelFormat,
3183 enum dm_swizzle_mode SurfaceTiling,
3184 unsigned int *BytePerPixelY,
3185 unsigned int *BytePerPixelC,
3186 double *BytePerPixelDETY,
3187 double *BytePerPixelDETC,
3188 unsigned int *BlockHeight256BytesY,
3189 unsigned int *BlockHeight256BytesC,
3190 unsigned int *BlockWidth256BytesY,
3191 unsigned int *BlockWidth256BytesC)
3193 if (SourcePixelFormat == dm_444_64) {
3194 *BytePerPixelDETY = 8;
3195 *BytePerPixelDETC = 0;
3198 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3199 *BytePerPixelDETY = 4;
3200 *BytePerPixelDETC = 0;
3203 } else if (SourcePixelFormat == dm_444_16) {
3204 *BytePerPixelDETY = 2;
3205 *BytePerPixelDETC = 0;
3208 } else if (SourcePixelFormat == dm_444_8) {
3209 *BytePerPixelDETY = 1;
3210 *BytePerPixelDETC = 0;
3213 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3214 *BytePerPixelDETY = 4;
3215 *BytePerPixelDETC = 1;
3218 } else if (SourcePixelFormat == dm_420_8) {
3219 *BytePerPixelDETY = 1;
3220 *BytePerPixelDETC = 2;
3223 } else if (SourcePixelFormat == dm_420_12) {
3224 *BytePerPixelDETY = 2;
3225 *BytePerPixelDETC = 4;
3229 *BytePerPixelDETY = 4.0 / 3;
3230 *BytePerPixelDETC = 8.0 / 3;
3235 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3236 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3237 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3238 || SourcePixelFormat == dm_rgbe)) {
3239 if (SurfaceTiling == dm_sw_linear) {
3240 *BlockHeight256BytesY = 1;
3241 } else if (SourcePixelFormat == dm_444_64) {
3242 *BlockHeight256BytesY = 4;
3243 } else if (SourcePixelFormat == dm_444_8) {
3244 *BlockHeight256BytesY = 16;
3246 *BlockHeight256BytesY = 8;
3248 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3249 *BlockHeight256BytesC = 0;
3250 *BlockWidth256BytesC = 0;
3252 if (SurfaceTiling == dm_sw_linear) {
3253 *BlockHeight256BytesY = 1;
3254 *BlockHeight256BytesC = 1;
3255 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3256 *BlockHeight256BytesY = 8;
3257 *BlockHeight256BytesC = 16;
3258 } else if (SourcePixelFormat == dm_420_8) {
3259 *BlockHeight256BytesY = 16;
3260 *BlockHeight256BytesC = 8;
3262 *BlockHeight256BytesY = 8;
3263 *BlockHeight256BytesC = 8;
3265 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3266 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3270 static double CalculateTWait(
3271 unsigned int PrefetchMode,
3272 double DRAMClockChangeLatency,
3273 double UrgentLatency,
3274 double SREnterPlusExitTime)
3276 if (PrefetchMode == 0) {
3277 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3278 dml_max(SREnterPlusExitTime, UrgentLatency));
3279 } else if (PrefetchMode == 1) {
3280 return dml_max(SREnterPlusExitTime, UrgentLatency);
3282 return UrgentLatency;
3286 double dml30_CalculateWriteBackDISPCLK(
3287 enum source_format_class WritebackPixelFormat,
3289 double WritebackHRatio,
3290 double WritebackVRatio,
3291 unsigned int WritebackHTaps,
3292 unsigned int WritebackVTaps,
3293 long WritebackSourceWidth,
3294 long WritebackDestinationWidth,
3295 unsigned int HTotal,
3296 unsigned int WritebackLineBufferSize)
3298 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3300 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3301 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3302 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3303 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3306 static double CalculateWriteBackDelay(
3307 enum source_format_class WritebackPixelFormat,
3308 double WritebackHRatio,
3309 double WritebackVRatio,
3310 unsigned int WritebackVTaps,
3311 long WritebackDestinationWidth,
3312 long WritebackDestinationHeight,
3313 long WritebackSourceHeight,
3314 unsigned int HTotal)
3316 double CalculateWriteBackDelay = 0;
3317 double Line_length = 0;
3318 double Output_lines_last_notclamped = 0;
3319 double WritebackVInit = 0;
3321 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3322 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3323 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3324 if (Output_lines_last_notclamped < 0) {
3325 CalculateWriteBackDelay = 0;
3327 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3329 return CalculateWriteBackDelay;
3333 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3334 double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
3335 long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3336 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3338 double TotalRepeaterDelayTime = 0;
3339 double VUpdateWidthPix = 0;
3340 double VReadyOffsetPix = 0;
3341 double VUpdateOffsetPix = 0;
3342 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3343 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3344 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3345 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3346 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3347 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3348 *Tdmec = HTotal / PixelClock;
3349 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3350 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3352 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3354 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3355 *Tdmsks = *Tdmsks / 2;
3359 static void CalculateRowBandwidth(
3361 enum source_format_class SourcePixelFormat,
3363 double VRatioChroma,
3366 unsigned int MetaRowByteLuma,
3367 unsigned int MetaRowByteChroma,
3368 unsigned int meta_row_height_luma,
3369 unsigned int meta_row_height_chroma,
3370 unsigned int PixelPTEBytesPerRowLuma,
3371 unsigned int PixelPTEBytesPerRowChroma,
3372 unsigned int dpte_row_height_luma,
3373 unsigned int dpte_row_height_chroma,
3374 double *meta_row_bw,
3375 double *dpte_row_bw)
3377 if (DCCEnable != true) {
3379 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3380 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3381 + VRatioChroma * MetaRowByteChroma
3382 / (meta_row_height_chroma * LineTime);
3384 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3387 if (GPUVMEnable != true) {
3389 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3390 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3391 + VRatioChroma * PixelPTEBytesPerRowChroma
3392 / (dpte_row_height_chroma * LineTime);
3394 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3398 static void CalculateFlipSchedule(
3399 struct display_mode_lib *mode_lib,
3400 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3401 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3402 double UrgentExtraLatency,
3403 double UrgentLatency,
3404 unsigned int GPUVMMaxPageTableLevels,
3406 unsigned int HostVMMaxNonCachedPageTableLevels,
3408 double HostVMMinPageSize,
3409 double PDEAndMetaPTEBytesPerFrame,
3410 double MetaRowBytes,
3411 double DPTEBytesPerRow,
3412 double BandwidthAvailableForImmediateFlip,
3413 unsigned int TotImmediateFlipBytes,
3414 enum source_format_class SourcePixelFormat,
3417 double VRatioChroma,
3420 unsigned int dpte_row_height,
3421 unsigned int meta_row_height,
3422 unsigned int dpte_row_height_chroma,
3423 unsigned int meta_row_height_chroma,
3424 double *DestinationLinesToRequestVMInImmediateFlip,
3425 double *DestinationLinesToRequestRowInImmediateFlip,
3426 double *final_flip_bw,
3427 bool *ImmediateFlipSupportedForPipe)
3429 double min_row_time = 0.0;
3430 unsigned int HostVMDynamicLevelsTrips = 0;
3431 double TimeForFetchingMetaPTEImmediateFlip = 0;
3432 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3433 double ImmediateFlipBW = 0;
3434 double HostVMInefficiencyFactor = 0;
3436 if (GPUVMEnable == true && HostVMEnable == true) {
3437 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3438 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3440 HostVMInefficiencyFactor = 1;
3441 HostVMDynamicLevelsTrips = 0;
3444 if (GPUVMEnable == true || DCCEnable == true) {
3445 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3448 if (GPUVMEnable == true) {
3449 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3450 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3452 TimeForFetchingMetaPTEImmediateFlip = 0;
3455 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3456 if ((GPUVMEnable == true || DCCEnable == true)) {
3457 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3458 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3460 TimeForFetchingRowInVBlankImmediateFlip = 0;
3463 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3465 if (GPUVMEnable == true) {
3466 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3467 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3468 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3469 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3475 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3476 if (GPUVMEnable == true && DCCEnable != true) {
3477 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3478 } else if (GPUVMEnable != true && DCCEnable == true) {
3479 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3481 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3482 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3485 if (GPUVMEnable == true && DCCEnable != true) {
3486 min_row_time = dpte_row_height * LineTime / VRatio;
3487 } else if (GPUVMEnable != true && DCCEnable == true) {
3488 min_row_time = meta_row_height * LineTime / VRatio;
3490 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3494 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3495 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3496 *ImmediateFlipSupportedForPipe = false;
3498 *ImmediateFlipSupportedForPipe = true;
3502 static double TruncToValidBPP(
3510 enum output_encoder_class Output,
3511 enum output_format_class Format,
3512 unsigned int DSCInputBitPerComponent,
3516 enum odm_combine_mode ODMCombine)
3518 double MaxLinkBPP = 0;
3520 double MaxDSCBPP = 0;
3525 if (Format == dm_420) {
3530 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3531 } else if (Format == dm_444) {
3536 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3542 if (Format == dm_n422) {
3544 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3548 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3552 if (DSCEnable && Output == dm_dp) {
3553 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3555 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3558 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3560 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3565 if (DesiredBPP == 0) {
3567 if (MaxLinkBPP < MinDSCBPP) {
3569 } else if (MaxLinkBPP >= MaxDSCBPP) {
3572 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3575 if (MaxLinkBPP >= NonDSCBPP2) {
3577 } else if (MaxLinkBPP >= NonDSCBPP1) {
3579 } else if (MaxLinkBPP >= NonDSCBPP0) {
3586 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3587 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3596 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3598 struct vba_vars_st *v = &mode_lib->vba;
3599 int MinPrefetchMode, MaxPrefetchMode;
3601 unsigned int j, k, m;
3602 bool EnoughWritebackUnits = true;
3603 bool WritebackModeSupport = true;
3604 bool ViewportExceedsSurface = false;
3605 double MaxTotalVActiveRDBandwidth = 0;
3606 long ReorderingBytes = 0;
3607 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3609 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3611 CalculateMinAndMaxPrefetchMode(
3612 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3613 &MinPrefetchMode, &MaxPrefetchMode);
3615 /*Scale Ratio, taps Support Check*/
3617 v->ScaleRatioAndTapsSupport = true;
3618 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3619 if (v->ScalerEnabled[k] == false
3620 && ((v->SourcePixelFormat[k] != dm_444_64
3621 && v->SourcePixelFormat[k] != dm_444_32
3622 && v->SourcePixelFormat[k] != dm_444_16
3623 && v->SourcePixelFormat[k] != dm_mono_16
3624 && v->SourcePixelFormat[k] != dm_mono_8
3625 && v->SourcePixelFormat[k] != dm_rgbe
3626 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3627 || v->HRatio[k] != 1.0
3628 || v->htaps[k] != 1.0
3629 || v->VRatio[k] != 1.0
3630 || v->vtaps[k] != 1.0)) {
3631 v->ScaleRatioAndTapsSupport = false;
3632 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3633 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3634 || (v->htaps[k] > 1.0
3635 && (v->htaps[k] % 2) == 1)
3636 || v->HRatio[k] > v->MaxHSCLRatio
3637 || v->VRatio[k] > v->MaxVSCLRatio
3638 || v->HRatio[k] > v->htaps[k]
3639 || v->VRatio[k] > v->vtaps[k]
3640 || (v->SourcePixelFormat[k] != dm_444_64
3641 && v->SourcePixelFormat[k] != dm_444_32
3642 && v->SourcePixelFormat[k] != dm_444_16
3643 && v->SourcePixelFormat[k] != dm_mono_16
3644 && v->SourcePixelFormat[k] != dm_mono_8
3645 && v->SourcePixelFormat[k] != dm_rgbe
3646 && (v->VTAPsChroma[k] < 1
3647 || v->VTAPsChroma[k] > 8
3648 || v->HTAPsChroma[k] < 1
3649 || v->HTAPsChroma[k] > 8
3650 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3651 || v->HRatioChroma[k] > v->MaxHSCLRatio
3652 || v->VRatioChroma[k] > v->MaxVSCLRatio
3653 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3654 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3655 v->ScaleRatioAndTapsSupport = false;
3658 /*Source Format, Pixel Format and Scan Support Check*/
3660 v->SourceFormatPixelAndScanSupport = true;
3661 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3662 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3663 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3664 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3665 v->SourceFormatPixelAndScanSupport = false;
3668 /*Bandwidth Support Check*/
3670 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3671 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3672 v->SourcePixelFormat[k],
3673 v->SurfaceTiling[k],
3674 &v->BytePerPixelY[k],
3675 &v->BytePerPixelC[k],
3676 &v->BytePerPixelInDETY[k],
3677 &v->BytePerPixelInDETC[k],
3678 &v->Read256BlockHeightY[k],
3679 &v->Read256BlockHeightC[k],
3680 &v->Read256BlockWidthY[k],
3681 &v->Read256BlockWidthC[k]);
3683 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3684 if (v->SourceScan[k] != dm_vert) {
3685 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3686 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3688 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3689 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3692 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3693 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3694 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3696 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3697 if (v->WritebackEnable[k] == true
3698 && v->WritebackPixelFormat[k] == dm_444_64) {
3699 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3700 * v->WritebackDestinationHeight[k]
3701 / (v->WritebackSourceHeight[k]
3703 / v->PixelClock[k]) * 8.0;
3704 } else if (v->WritebackEnable[k] == true) {
3705 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3706 * v->WritebackDestinationHeight[k]
3707 / (v->WritebackSourceHeight[k]
3709 / v->PixelClock[k]) * 4.0;
3711 v->WriteBandwidth[k] = 0.0;
3715 /*Writeback Latency support check*/
3717 v->WritebackLatencySupport = true;
3718 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3719 if (v->WritebackEnable[k] == true) {
3720 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3721 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3722 if (v->WriteBandwidth[k]
3723 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3724 / v->WritebackLatency) {
3725 v->WritebackLatencySupport = false;
3728 if (v->WriteBandwidth[k]
3729 > v->WritebackInterfaceBufferSize * 1024
3730 / v->WritebackLatency) {
3731 v->WritebackLatencySupport = false;
3737 /*Writeback Mode Support Check*/
3739 v->TotalNumberOfActiveWriteback = 0;
3740 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3741 if (v->WritebackEnable[k] == true) {
3742 v->TotalNumberOfActiveWriteback =
3743 v->TotalNumberOfActiveWriteback + 1;
3747 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3748 EnoughWritebackUnits = false;
3750 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3751 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3752 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3754 WritebackModeSupport = false;
3756 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3757 WritebackModeSupport = false;
3760 /*Writeback Scale Ratio and Taps Support Check*/
3762 v->WritebackScaleRatioAndTapsSupport = true;
3763 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3764 if (v->WritebackEnable[k] == true) {
3765 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3766 || v->WritebackVRatio[k]
3767 > v->WritebackMaxVSCLRatio
3768 || v->WritebackHRatio[k]
3769 < v->WritebackMinHSCLRatio
3770 || v->WritebackVRatio[k]
3771 < v->WritebackMinVSCLRatio
3772 || v->WritebackHTaps[k]
3773 > v->WritebackMaxHSCLTaps
3774 || v->WritebackVTaps[k]
3775 > v->WritebackMaxVSCLTaps
3776 || v->WritebackHRatio[k]
3777 > v->WritebackHTaps[k]
3778 || v->WritebackVRatio[k]
3779 > v->WritebackVTaps[k]
3780 || (v->WritebackHTaps[k] > 2.0
3781 && ((v->WritebackHTaps[k] % 2)
3783 v->WritebackScaleRatioAndTapsSupport = false;
3785 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3786 v->WritebackScaleRatioAndTapsSupport = false;
3790 /*Maximum DISPCLK/DPPCLK Support check*/
3792 v->WritebackRequiredDISPCLK = 0.0;
3793 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3794 if (v->WritebackEnable[k] == true) {
3795 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3796 dml30_CalculateWriteBackDISPCLK(
3797 v->WritebackPixelFormat[k],
3799 v->WritebackHRatio[k],
3800 v->WritebackVRatio[k],
3801 v->WritebackHTaps[k],
3802 v->WritebackVTaps[k],
3803 v->WritebackSourceWidth[k],
3804 v->WritebackDestinationWidth[k],
3806 v->WritebackLineBufferSize));
3809 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3810 if (v->HRatio[k] > 1.0) {
3811 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3813 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3815 if (v->BytePerPixelC[k] == 0.0) {
3816 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3817 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3818 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3819 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3820 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3823 if (v->HRatioChroma[k] > 1.0) {
3824 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3825 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3827 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3829 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3830 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3831 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3832 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3834 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3835 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3836 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3840 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3841 int MaximumSwathWidthSupportLuma = 0;
3842 int MaximumSwathWidthSupportChroma = 0;
3844 if (v->SurfaceTiling[k] == dm_sw_linear) {
3845 MaximumSwathWidthSupportLuma = 8192.0;
3846 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3847 MaximumSwathWidthSupportLuma = 2880.0;
3849 MaximumSwathWidthSupportLuma = 5760.0;
3852 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3853 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3855 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3857 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3858 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3859 if (v->BytePerPixelC[k] == 0.0) {
3860 v->MaximumSwathWidthInLineBufferChroma = 0;
3862 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3863 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3865 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3866 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3869 CalculateSwathAndDETConfiguration(
3871 v->NumberOfActivePlanes,
3872 v->DETBufferSizeInKByte[0],
3873 v->MaximumSwathWidthLuma,
3874 v->MaximumSwathWidthChroma,
3876 v->SourcePixelFormat,
3884 v->Read256BlockHeightY,
3885 v->Read256BlockHeightC,
3886 v->Read256BlockWidthY,
3887 v->Read256BlockWidthC,
3888 v->odm_combine_dummy,
3889 v->BlendingAndTiming,
3892 v->BytePerPixelInDETY,
3893 v->BytePerPixelInDETC,
3898 v->swath_width_luma_ub,
3899 v->swath_width_chroma_ub,
3906 v->SingleDPPViewportSizeSupportPerPlane,
3907 &v->ViewportSizeSupport[0][0]);
3909 for (i = 0; i < v->soc.num_states; i++) {
3910 for (j = 0; j < 2; j++) {
3911 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3912 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3913 v->RequiredDISPCLK[i][j] = 0.0;
3914 v->DISPCLK_DPPCLK_Support[i][j] = true;
3915 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3916 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3917 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3918 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3919 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3920 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3922 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3923 * (1 + v->DISPCLKRampingMargin / 100.0);
3924 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3925 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3926 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3928 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3929 * (1 + v->DISPCLKRampingMargin / 100.0);
3930 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3931 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3932 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3935 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3936 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3937 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3938 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3939 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3940 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3941 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3942 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3943 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3944 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3945 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3946 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3947 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3949 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3950 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3952 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
3953 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3954 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
3955 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3956 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3958 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3959 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3962 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
3963 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3964 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
3965 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3966 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3968 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3969 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3972 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
3973 v->MPCCombine[i][j][k] = false;
3974 v->NoOfDPP[i][j][k] = 4;
3975 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
3976 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
3977 v->MPCCombine[i][j][k] = false;
3978 v->NoOfDPP[i][j][k] = 2;
3979 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
3980 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
3981 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
3982 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
3983 v->MPCCombine[i][j][k] = false;
3984 v->NoOfDPP[i][j][k] = 1;
3985 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3987 v->MPCCombine[i][j][k] = true;
3988 v->NoOfDPP[i][j][k] = 2;
3989 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3991 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3992 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3993 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3994 v->DISPCLK_DPPCLK_Support[i][j] = false;
3997 v->TotalNumberOfActiveDPP[i][j] = 0;
3998 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
3999 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4000 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4001 if (v->NoOfDPP[i][j][k] == 1)
4002 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4004 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
4005 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4006 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4007 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4008 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4009 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4010 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4011 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4012 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4013 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4014 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4017 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4018 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4019 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4020 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4021 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4022 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4025 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4026 v->RequiredDISPCLK[i][j] = 0.0;
4027 v->DISPCLK_DPPCLK_Support[i][j] = true;
4028 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4029 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4030 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4031 v->MPCCombine[i][j][k] = true;
4032 v->NoOfDPP[i][j][k] = 2;
4033 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4035 v->MPCCombine[i][j][k] = false;
4036 v->NoOfDPP[i][j][k] = 1;
4037 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4039 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4040 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4041 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4043 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4045 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4046 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4047 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4048 v->DISPCLK_DPPCLK_Support[i][j] = false;
4051 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4052 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4053 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4056 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4057 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4058 v->DISPCLK_DPPCLK_Support[i][j] = false;
4063 /*Total Available Pipes Support Check*/
4065 for (i = 0; i < v->soc.num_states; i++) {
4066 for (j = 0; j < 2; j++) {
4067 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4068 v->TotalAvailablePipesSupport[i][j] = true;
4070 v->TotalAvailablePipesSupport[i][j] = false;
4074 /*Display IO and DSC Support Check*/
4076 v->NonsupportedDSCInputBPC = false;
4077 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4078 if (!(v->DSCInputBitPerComponent[k] == 12.0
4079 || v->DSCInputBitPerComponent[k] == 10.0
4080 || v->DSCInputBitPerComponent[k] == 8.0)) {
4081 v->NonsupportedDSCInputBPC = true;
4085 /*Number Of DSC Slices*/
4086 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4087 if (v->BlendingAndTiming[k] == k) {
4088 if (v->PixelClockBackEnd[k] > 3200) {
4089 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4090 } else if (v->PixelClockBackEnd[k] > 1360) {
4091 v->NumberOfDSCSlices[k] = 8;
4092 } else if (v->PixelClockBackEnd[k] > 680) {
4093 v->NumberOfDSCSlices[k] = 4;
4094 } else if (v->PixelClockBackEnd[k] > 340) {
4095 v->NumberOfDSCSlices[k] = 2;
4097 v->NumberOfDSCSlices[k] = 1;
4100 v->NumberOfDSCSlices[k] = 0;
4104 for (i = 0; i < v->soc.num_states; i++) {
4105 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4106 v->RequiresDSC[i][k] = false;
4107 v->RequiresFEC[i][k] = false;
4108 if (v->BlendingAndTiming[k] == k) {
4109 if (v->Output[k] == dm_hdmi) {
4110 v->RequiresDSC[i][k] = false;
4111 v->RequiresFEC[i][k] = false;
4112 v->OutputBppPerState[i][k] = TruncToValidBPP(
4113 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4117 v->PixelClockBackEnd[k],
4118 v->ForcedOutputLinkBPP[k],
4122 v->DSCInputBitPerComponent[k],
4123 v->NumberOfDSCSlices[k],
4124 v->AudioSampleRate[k],
4125 v->AudioSampleLayout[k],
4126 v->ODMCombineEnablePerState[i][k]);
4127 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4128 if (v->DSCEnable[k] == true) {
4129 v->RequiresDSC[i][k] = true;
4130 v->LinkDSCEnable = true;
4131 if (v->Output[k] == dm_dp) {
4132 v->RequiresFEC[i][k] = true;
4134 v->RequiresFEC[i][k] = false;
4137 v->RequiresDSC[i][k] = false;
4138 v->LinkDSCEnable = false;
4139 v->RequiresFEC[i][k] = false;
4142 v->Outbpp = BPP_INVALID;
4143 if (v->PHYCLKPerState[i] >= 270.0) {
4144 v->Outbpp = TruncToValidBPP(
4145 (1.0 - v->Downspreading / 100.0) * 2700,
4146 v->OutputLinkDPLanes[k],
4149 v->PixelClockBackEnd[k],
4150 v->ForcedOutputLinkBPP[k],
4154 v->DSCInputBitPerComponent[k],
4155 v->NumberOfDSCSlices[k],
4156 v->AudioSampleRate[k],
4157 v->AudioSampleLayout[k],
4158 v->ODMCombineEnablePerState[i][k]);
4159 v->OutputBppPerState[i][k] = v->Outbpp;
4160 // TODO: Need some other way to handle this nonsense
4161 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4163 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4164 v->Outbpp = TruncToValidBPP(
4165 (1.0 - v->Downspreading / 100.0) * 5400,
4166 v->OutputLinkDPLanes[k],
4169 v->PixelClockBackEnd[k],
4170 v->ForcedOutputLinkBPP[k],
4174 v->DSCInputBitPerComponent[k],
4175 v->NumberOfDSCSlices[k],
4176 v->AudioSampleRate[k],
4177 v->AudioSampleLayout[k],
4178 v->ODMCombineEnablePerState[i][k]);
4179 v->OutputBppPerState[i][k] = v->Outbpp;
4180 // TODO: Need some other way to handle this nonsense
4181 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4183 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4184 v->Outbpp = TruncToValidBPP(
4185 (1.0 - v->Downspreading / 100.0) * 8100,
4186 v->OutputLinkDPLanes[k],
4189 v->PixelClockBackEnd[k],
4190 v->ForcedOutputLinkBPP[k],
4194 v->DSCInputBitPerComponent[k],
4195 v->NumberOfDSCSlices[k],
4196 v->AudioSampleRate[k],
4197 v->AudioSampleLayout[k],
4198 v->ODMCombineEnablePerState[i][k]);
4199 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4200 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4201 v->RequiresDSC[i][k] = true;
4202 v->LinkDSCEnable = true;
4203 if (v->Output[k] == dm_dp) {
4204 v->RequiresFEC[i][k] = true;
4206 v->Outbpp = TruncToValidBPP(
4207 (1.0 - v->Downspreading / 100.0) * 8100,
4208 v->OutputLinkDPLanes[k],
4211 v->PixelClockBackEnd[k],
4212 v->ForcedOutputLinkBPP[k],
4216 v->DSCInputBitPerComponent[k],
4217 v->NumberOfDSCSlices[k],
4218 v->AudioSampleRate[k],
4219 v->AudioSampleLayout[k],
4220 v->ODMCombineEnablePerState[i][k]);
4222 v->OutputBppPerState[i][k] = v->Outbpp;
4223 // TODO: Need some other way to handle this nonsense
4224 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4228 v->OutputBppPerState[i][k] = 0;
4232 for (i = 0; i < v->soc.num_states; i++) {
4233 v->DIOSupport[i] = true;
4234 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4235 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4236 && (v->OutputBppPerState[i][k] == 0
4237 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4238 v->DIOSupport[i] = false;
4243 for (i = 0; i < v->soc.num_states; ++i) {
4244 v->ODMCombine4To1SupportCheckOK[i] = true;
4245 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4246 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4247 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4248 v->ODMCombine4To1SupportCheckOK[i] = false;
4253 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4255 for (i = 0; i < v->soc.num_states; i++) {
4256 v->NotEnoughDSCUnits[i] = false;
4257 v->TotalDSCUnitsRequired = 0.0;
4258 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4259 if (v->RequiresDSC[i][k] == true) {
4260 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4261 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4262 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4263 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4265 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4269 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4270 v->NotEnoughDSCUnits[i] = true;
4273 /*DSC Delay per state*/
4275 for (i = 0; i < v->soc.num_states; i++) {
4276 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4277 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4280 v->BPP = v->OutputBppPerState[i][k];
4282 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4283 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4284 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4285 v->DSCInputBitPerComponent[k],
4287 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4288 v->NumberOfDSCSlices[k],
4290 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4291 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4292 v->DSCDelayPerState[i][k] = 2.0
4293 * dscceComputeDelay(
4294 v->DSCInputBitPerComponent[k],
4296 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4297 v->NumberOfDSCSlices[k] / 2,
4299 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4301 v->DSCDelayPerState[i][k] = 4.0
4302 * (dscceComputeDelay(
4303 v->DSCInputBitPerComponent[k],
4305 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4306 v->NumberOfDSCSlices[k] / 4,
4308 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4310 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4312 v->DSCDelayPerState[i][k] = 0.0;
4315 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4316 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4317 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4318 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4324 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4326 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4327 for (j = 0; j <= 1; ++j) {
4328 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4329 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4330 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4331 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4334 CalculateSwathAndDETConfiguration(
4336 v->NumberOfActivePlanes,
4337 v->DETBufferSizeInKByte[0],
4338 v->MaximumSwathWidthLuma,
4339 v->MaximumSwathWidthChroma,
4341 v->SourcePixelFormat,
4349 v->Read256BlockHeightY,
4350 v->Read256BlockHeightC,
4351 v->Read256BlockWidthY,
4352 v->Read256BlockWidthC,
4353 v->ODMCombineEnableThisState,
4354 v->BlendingAndTiming,
4357 v->BytePerPixelInDETY,
4358 v->BytePerPixelInDETC,
4362 v->NoOfDPPThisState,
4363 v->swath_width_luma_ub_this_state,
4364 v->swath_width_chroma_ub_this_state,
4365 v->SwathWidthYThisState,
4366 v->SwathWidthCThisState,
4367 v->SwathHeightYThisState,
4368 v->SwathHeightCThisState,
4369 v->DETBufferSizeYThisState,
4370 v->DETBufferSizeCThisState,
4372 &v->ViewportSizeSupport[i][j]);
4374 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4375 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4376 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4377 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4378 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4379 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4380 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4381 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4382 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4387 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4388 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4391 for (i = 0; i < v->soc.num_states; i++) {
4392 for (j = 0; j < 2; j++) {
4393 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4394 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4395 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4396 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4397 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4398 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4399 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4400 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4401 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4404 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4405 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4406 if (v->DCCEnable[k] == true) {
4407 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4411 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4412 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4413 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4415 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4416 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4417 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4419 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4420 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4423 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4426 v->Read256BlockHeightC[k],
4427 v->Read256BlockWidthY[k],
4428 v->SourcePixelFormat[k],
4429 v->SurfaceTiling[k],
4430 v->BytePerPixelC[k],
4432 v->SwathWidthCThisState[k],
4433 v->ViewportHeightChroma[k],
4436 v->HostVMMaxNonCachedPageTableLevels,
4437 v->GPUVMMinPageSize,
4438 v->HostVMMinPageSize,
4439 v->PTEBufferSizeInRequestsForChroma,
4442 &v->MacroTileWidthC[k],
4444 &v->DPTEBytesPerRowC,
4445 &v->PTEBufferSizeNotExceededC[i][j][k],
4447 &v->dpte_row_height_chroma[k],
4451 &v->meta_row_height_chroma[k],
4458 &v->dummyinteger11);
4460 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4465 v->ProgressiveToInterlaceUnitInOPP,
4466 v->SwathHeightCThisState[k],
4467 v->ViewportYStartC[k],
4471 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4472 v->PTEBufferSizeInRequestsForChroma = 0;
4473 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4474 v->MetaRowBytesC = 0.0;
4475 v->DPTEBytesPerRowC = 0.0;
4476 v->PrefetchLinesC[i][j][k] = 0.0;
4477 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4479 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4482 v->Read256BlockHeightY[k],
4483 v->Read256BlockWidthY[k],
4484 v->SourcePixelFormat[k],
4485 v->SurfaceTiling[k],
4486 v->BytePerPixelY[k],
4488 v->SwathWidthYThisState[k],
4489 v->ViewportHeight[k],
4492 v->HostVMMaxNonCachedPageTableLevels,
4493 v->GPUVMMinPageSize,
4494 v->HostVMMinPageSize,
4495 v->PTEBufferSizeInRequestsForLuma,
4497 v->DCCMetaPitchY[k],
4498 &v->MacroTileWidthY[k],
4500 &v->DPTEBytesPerRowY,
4501 &v->PTEBufferSizeNotExceededY[i][j][k],
4503 &v->dpte_row_height[k],
4507 &v->meta_row_height[k],
4509 &v->dpte_group_bytes[k],
4515 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4520 v->ProgressiveToInterlaceUnitInOPP,
4521 v->SwathHeightYThisState[k],
4522 v->ViewportYStartY[k],
4525 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4526 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4527 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4529 CalculateRowBandwidth(
4531 v->SourcePixelFormat[k],
4535 v->HTotal[k] / v->PixelClock[k],
4538 v->meta_row_height[k],
4539 v->meta_row_height_chroma[k],
4540 v->DPTEBytesPerRowY,
4541 v->DPTEBytesPerRowC,
4542 v->dpte_row_height[k],
4543 v->dpte_row_height_chroma[k],
4544 &v->meta_row_bandwidth[i][j][k],
4545 &v->dpte_row_bandwidth[i][j][k]);
4547 v->UrgLatency[i] = CalculateUrgentLatency(
4548 v->UrgentLatencyPixelDataOnly,
4549 v->UrgentLatencyPixelMixedWithVMData,
4550 v->UrgentLatencyVMDataOnly,
4551 v->DoUrgentLatencyAdjustment,
4552 v->UrgentLatencyAdjustmentFabricClockComponent,
4553 v->UrgentLatencyAdjustmentFabricClockReference,
4554 v->FabricClockPerState[i]);
4556 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4557 CalculateUrgentBurstFactor(
4558 v->swath_width_luma_ub_this_state[k],
4559 v->swath_width_chroma_ub_this_state[k],
4560 v->DETBufferSizeInKByte[0],
4561 v->SwathHeightYThisState[k],
4562 v->SwathHeightCThisState[k],
4563 v->HTotal[k] / v->PixelClock[k],
4565 v->CursorBufferSize,
4566 v->CursorWidth[k][0],
4570 v->BytePerPixelInDETY[k],
4571 v->BytePerPixelInDETC[k],
4572 v->DETBufferSizeYThisState[k],
4573 v->DETBufferSizeCThisState[k],
4574 &v->UrgentBurstFactorCursor[k],
4575 &v->UrgentBurstFactorLuma[k],
4576 &v->UrgentBurstFactorChroma[k],
4577 &NotUrgentLatencyHiding[k]);
4580 v->NotUrgentLatencyHiding[i][j] = false;
4581 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4582 if (NotUrgentLatencyHiding[k]) {
4583 v->NotUrgentLatencyHiding[i][j] = true;
4587 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4588 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4589 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4590 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4593 v->TotalVActivePixelBandwidth[i][j] = 0;
4594 v->TotalVActiveCursorBandwidth[i][j] = 0;
4595 v->TotalMetaRowBandwidth[i][j] = 0;
4596 v->TotalDPTERowBandwidth[i][j] = 0;
4597 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4598 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4599 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4600 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4601 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4604 CalculateDCFCLKDeepSleep(
4606 v->NumberOfActivePlanes,
4611 v->SwathWidthYThisState,
4612 v->SwathWidthCThisState,
4613 v->NoOfDPPThisState,
4618 v->PSCL_FACTOR_CHROMA,
4619 v->RequiredDPPCLKThisState,
4620 v->ReadBandwidthLuma,
4621 v->ReadBandwidthChroma,
4623 &v->ProjectedDCFCLKDeepSleep[i][j]);
4627 //Calculate Return BW
4629 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4630 for (j = 0; j <= 1; ++j) {
4631 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4632 if (v->BlendingAndTiming[k] == k) {
4633 if (v->WritebackEnable[k] == true) {
4634 v->WritebackDelayTime[k] = v->WritebackLatency
4635 + CalculateWriteBackDelay(
4636 v->WritebackPixelFormat[k],
4637 v->WritebackHRatio[k],
4638 v->WritebackVRatio[k],
4639 v->WritebackVTaps[k],
4640 v->WritebackDestinationWidth[k],
4641 v->WritebackDestinationHeight[k],
4642 v->WritebackSourceHeight[k],
4643 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4645 v->WritebackDelayTime[k] = 0.0;
4647 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4648 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4649 v->WritebackDelayTime[k] = dml_max(
4650 v->WritebackDelayTime[k],
4652 + CalculateWriteBackDelay(
4653 v->WritebackPixelFormat[m],
4654 v->WritebackHRatio[m],
4655 v->WritebackVRatio[m],
4656 v->WritebackVTaps[m],
4657 v->WritebackDestinationWidth[m],
4658 v->WritebackDestinationHeight[m],
4659 v->WritebackSourceHeight[m],
4660 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4665 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4666 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4667 if (v->BlendingAndTiming[k] == m) {
4668 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4672 v->MaxMaxVStartup[i][j] = 0;
4673 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4674 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4675 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4676 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4681 ReorderingBytes = v->NumberOfChannels
4683 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4684 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4685 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4686 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4688 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4689 for (j = 0; j <= 1; ++j) {
4690 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4694 if (v->UseMinimumRequiredDCFCLK == true) {
4697 v->MaxInterDCNTileRepeaters,
4699 v->FinalDRAMClockChangeLatency,
4700 v->SREnterPlusExitTime,
4702 v->RoundTripPingLatencyCycles,
4704 v->PixelChunkSizeInKByte,
4707 v->GPUVMMaxPageTableLevels,
4709 v->NumberOfActivePlanes,
4710 v->HostVMMinPageSize,
4711 v->HostVMMaxNonCachedPageTableLevels,
4712 v->DynamicMetadataVMEnabled,
4713 v->ImmediateFlipRequirement[0],
4714 v->ProgressiveToInterlaceUnitInOPP,
4715 v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
4716 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4717 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4718 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
4721 v->DynamicMetadataTransmittedBytes,
4722 v->DynamicMetadataLinesBeforeActiveRequired,
4728 v->ProjectedDCFCLKDeepSleep,
4730 v->TotalVActivePixelBandwidth,
4731 v->TotalVActiveCursorBandwidth,
4732 v->TotalMetaRowBandwidth,
4733 v->TotalDPTERowBandwidth,
4734 v->TotalNumberOfActiveDPP,
4735 v->TotalNumberOfDCCActiveDPP,
4736 v->dpte_group_bytes,
4739 v->swath_width_luma_ub_all_states,
4740 v->swath_width_chroma_ub_all_states,
4745 v->PDEAndMetaPTEBytesPerFrame,
4748 v->DynamicMetadataEnable,
4749 v->VActivePixelBandwidth,
4750 v->VActiveCursorBandwidth,
4751 v->ReadBandwidthLuma,
4752 v->ReadBandwidthChroma,
4756 if (v->ClampMinDCFCLK) {
4757 /* Clamp calculated values to actual minimum */
4758 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4759 for (j = 0; j <= 1; ++j) {
4760 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4761 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4768 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4769 for (j = 0; j <= 1; ++j) {
4770 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4771 v->ReturnBusWidth * v->DCFCLKState[i][j],
4772 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4773 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4774 if (v->HostVMEnable != true) {
4775 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4778 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4779 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4784 //Re-ordering Buffer Support Check
4786 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4787 for (j = 0; j <= 1; ++j) {
4788 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4789 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4790 v->ROBSupport[i][j] = true;
4792 v->ROBSupport[i][j] = false;
4797 //Vertical Active BW support check
4799 MaxTotalVActiveRDBandwidth = 0;
4800 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4801 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4804 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4805 for (j = 0; j <= 1; ++j) {
4806 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4807 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4808 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4810 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4811 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4813 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4820 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4821 for (j = 0; j <= 1; ++j) {
4822 int NextPrefetchModeState = MinPrefetchMode;
4824 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4826 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4827 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4828 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4829 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4832 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4833 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4834 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4835 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4836 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4837 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4838 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4839 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4840 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4841 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4842 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4845 v->ExtraLatency = CalculateExtraLatency(
4846 v->RoundTripPingLatencyCycles,
4848 v->DCFCLKState[i][j],
4849 v->TotalNumberOfActiveDPP[i][j],
4850 v->PixelChunkSizeInKByte,
4851 v->TotalNumberOfDCCActiveDPP[i][j],
4853 v->ReturnBWPerState[i][j],
4856 v->NumberOfActivePlanes,
4857 v->NoOfDPPThisState,
4858 v->dpte_group_bytes,
4859 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4860 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4861 v->HostVMMinPageSize,
4862 v->HostVMMaxNonCachedPageTableLevels);
4864 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4866 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4867 v->MaxVStartup = v->NextMaxVStartup;
4869 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4871 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4872 Pipe myPipe = { 0 };
4874 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4875 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4876 myPipe.PixelClock = v->PixelClock[k];
4877 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4878 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4879 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4880 myPipe.SourceScan = v->SourceScan[k];
4881 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4882 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4883 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4884 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4885 myPipe.InterlaceEnable = v->Interlace[k];
4886 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4887 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4888 myPipe.HTotal = v->HTotal[k];
4889 myPipe.DCCEnable = v->DCCEnable[k];
4890 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4892 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4894 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4895 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4897 v->DSCDelayPerState[i][k],
4898 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4900 v->DPPCLKDelaySCLLBOnly,
4901 v->DPPCLKDelayCNVCCursor,
4902 v->DISPCLKDelaySubtotal,
4903 v->SwathWidthYThisState[k] / v->HRatio[k],
4905 v->MaxInterDCNTileRepeaters,
4906 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4907 v->MaximumVStartup[i][j][k],
4908 v->GPUVMMaxPageTableLevels,
4911 v->HostVMMaxNonCachedPageTableLevels,
4912 v->HostVMMinPageSize,
4913 v->DynamicMetadataEnable[k],
4914 v->DynamicMetadataVMEnabled,
4915 v->DynamicMetadataLinesBeforeActiveRequired[k],
4916 v->DynamicMetadataTransmittedBytes[k],
4920 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4921 v->MetaRowBytes[i][j][k],
4922 v->DPTEBytesPerRow[i][j][k],
4923 v->PrefetchLinesY[i][j][k],
4924 v->SwathWidthYThisState[k],
4925 v->BytePerPixelY[k],
4928 v->PrefetchLinesC[i][j][k],
4929 v->SwathWidthCThisState[k],
4930 v->BytePerPixelC[k],
4933 v->swath_width_luma_ub_this_state[k],
4934 v->swath_width_chroma_ub_this_state[k],
4935 v->SwathHeightYThisState[k],
4936 v->SwathHeightCThisState[k],
4938 v->ProgressiveToInterlaceUnitInOPP,
4939 &v->DSTXAfterScaler[k],
4940 &v->DSTYAfterScaler[k],
4941 &v->LineTimesForPrefetch[k],
4943 &v->LinesForMetaPTE[k],
4944 &v->LinesForMetaAndDPTERow[k],
4945 &v->VRatioPreY[i][j][k],
4946 &v->VRatioPreC[i][j][k],
4947 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4948 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
4949 &v->NoTimeForDynamicMetadata[i][j][k],
4951 &v->prefetch_vmrow_bw[k],
4954 &v->VUpdateOffsetPix[k],
4955 &v->VUpdateWidthPix[k],
4956 &v->VReadyOffsetPix[k]);
4959 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4960 CalculateUrgentBurstFactor(
4961 v->swath_width_luma_ub_this_state[k],
4962 v->swath_width_chroma_ub_this_state[k],
4963 v->DETBufferSizeInKByte[0],
4964 v->SwathHeightYThisState[k],
4965 v->SwathHeightCThisState[k],
4966 v->HTotal[k] / v->PixelClock[k],
4968 v->CursorBufferSize,
4969 v->CursorWidth[k][0],
4971 v->VRatioPreY[i][j][k],
4972 v->VRatioPreC[i][j][k],
4973 v->BytePerPixelInDETY[k],
4974 v->BytePerPixelInDETC[k],
4975 v->DETBufferSizeYThisState[k],
4976 v->DETBufferSizeCThisState[k],
4977 &v->UrgentBurstFactorCursorPre[k],
4978 &v->UrgentBurstFactorLumaPre[k],
4979 &v->UrgentBurstFactorChroma[k],
4980 &v->NoUrgentLatencyHidingPre[k]);
4983 v->MaximumReadBandwidthWithPrefetch = 0.0;
4984 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4985 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
4986 * v->VRatioPreY[i][j][k];
4988 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
4990 v->VActivePixelBandwidth[i][j][k],
4991 v->VActiveCursorBandwidth[i][j][k]
4992 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
4993 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4995 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4996 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4997 * v->UrgentBurstFactorChromaPre[k])
4998 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5001 v->NotEnoughUrgentLatencyHidingPre = false;
5002 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5003 if (v->NoUrgentLatencyHidingPre[k] == true) {
5004 v->NotEnoughUrgentLatencyHidingPre = true;
5008 v->PrefetchSupported[i][j] = true;
5009 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5010 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5011 v->PrefetchSupported[i][j] = false;
5013 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5014 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5015 || v->NoTimeForPrefetch[i][j][k] == true) {
5016 v->PrefetchSupported[i][j] = false;
5020 v->DynamicMetadataSupported[i][j] = true;
5021 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5022 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5023 v->DynamicMetadataSupported[i][j] = false;
5027 v->VRatioInPrefetchSupported[i][j] = true;
5028 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5029 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5030 v->VRatioInPrefetchSupported[i][j] = false;
5033 v->AnyLinesForVMOrRowTooLarge = false;
5034 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5035 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5036 v->AnyLinesForVMOrRowTooLarge = true;
5040 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5041 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5042 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5043 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5045 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5047 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5048 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5049 * v->UrgentBurstFactorChromaPre[k])
5050 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5052 v->TotImmediateFlipBytes = 0.0;
5053 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5054 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k]
5055 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k];
5058 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5059 CalculateFlipSchedule(
5061 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
5062 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
5065 v->GPUVMMaxPageTableLevels,
5067 v->HostVMMaxNonCachedPageTableLevels,
5069 v->HostVMMinPageSize,
5070 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5071 v->MetaRowBytes[i][j][k],
5072 v->DPTEBytesPerRow[i][j][k],
5073 v->BandwidthAvailableForImmediateFlip,
5074 v->TotImmediateFlipBytes,
5075 v->SourcePixelFormat[k],
5076 v->HTotal[k] / v->PixelClock[k],
5081 v->dpte_row_height[k],
5082 v->meta_row_height[k],
5083 v->dpte_row_height_chroma[k],
5084 v->meta_row_height_chroma[k],
5085 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5086 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5087 &v->final_flip_bw[k],
5088 &v->ImmediateFlipSupportedForPipe[k]);
5090 v->total_dcn_read_bw_with_flip = 0.0;
5091 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5092 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5094 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5095 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5096 + v->VActiveCursorBandwidth[i][j][k],
5098 * (v->final_flip_bw[k]
5099 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5100 * v->UrgentBurstFactorLumaPre[k]
5101 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5102 * v->UrgentBurstFactorChromaPre[k])
5103 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5105 v->ImmediateFlipSupportedForState[i][j] = true;
5106 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5107 v->ImmediateFlipSupportedForState[i][j] = false;
5109 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5110 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5111 v->ImmediateFlipSupportedForState[i][j] = false;
5115 v->ImmediateFlipSupportedForState[i][j] = false;
5117 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
5118 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5119 NextPrefetchModeState = NextPrefetchModeState + 1;
5121 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5123 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5124 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5125 || v->ImmediateFlipSupportedForState[i][j] == true))
5126 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5128 CalculateWatermarksAndDRAMSpeedChangeSupport(
5130 v->PrefetchModePerState[i][j],
5131 v->NumberOfActivePlanes,
5132 v->MaxLineBufferLines,
5134 v->DPPOutputBufferPixels,
5135 v->DETBufferSizeInKByte[0],
5136 v->WritebackInterfaceBufferSize,
5137 v->DCFCLKState[i][j],
5138 v->ReturnBWPerState[i][j],
5140 v->dpte_group_bytes,
5144 v->WritebackLatency,
5145 v->WritebackChunkSize,
5146 v->SOCCLKPerState[i],
5147 v->FinalDRAMClockChangeLatency,
5149 v->SREnterPlusExitTime,
5150 v->ProjectedDCFCLKDeepSleep[i][j],
5151 v->NoOfDPPThisState,
5153 v->RequiredDPPCLKThisState,
5154 v->DETBufferSizeYThisState,
5155 v->DETBufferSizeCThisState,
5156 v->SwathHeightYThisState,
5157 v->SwathHeightCThisState,
5159 v->SwathWidthYThisState,
5160 v->SwathWidthCThisState,
5169 v->BlendingAndTiming,
5170 v->BytePerPixelInDETY,
5171 v->BytePerPixelInDETC,
5175 v->WritebackPixelFormat,
5176 v->WritebackDestinationWidth,
5177 v->WritebackDestinationHeight,
5178 v->WritebackSourceHeight,
5179 &v->DRAMClockChangeSupport[i][j],
5180 &v->UrgentWatermark,
5181 &v->WritebackUrgentWatermark,
5182 &v->DRAMClockChangeWatermark,
5183 &v->WritebackDRAMClockChangeWatermark,
5184 &v->StutterExitWatermark,
5185 &v->StutterEnterPlusExitWatermark,
5186 &v->MinActiveDRAMClockChangeLatencySupported);
5190 /*PTE Buffer Size Check*/
5192 for (i = 0; i < v->soc.num_states; i++) {
5193 for (j = 0; j < 2; j++) {
5194 v->PTEBufferSizeNotExceeded[i][j] = true;
5195 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5196 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5197 v->PTEBufferSizeNotExceeded[i][j] = false;
5202 /*Cursor Support Check*/
5204 v->CursorSupport = true;
5205 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5206 if (v->CursorWidth[k][0] > 0.0) {
5207 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5208 v->CursorSupport = false;
5212 /*Valid Pitch Check*/
5214 v->PitchSupport = true;
5215 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5216 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5217 if (v->DCCEnable[k] == true) {
5218 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5220 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5222 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5223 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5224 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5225 if (v->DCCEnable[k] == true) {
5226 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5228 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5231 v->AlignedCPitch[k] = v->PitchC[k];
5232 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5234 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5235 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5236 v->PitchSupport = false;
5240 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5241 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5242 ViewportExceedsSurface = true;
5244 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5245 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5246 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5247 ViewportExceedsSurface = true;
5251 /*Mode Support, Voltage State and SOC Configuration*/
5253 for (i = v->soc.num_states - 1; i >= 0; i--) {
5254 for (j = 0; j < 2; j++) {
5255 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5256 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5257 && v->NotEnoughDSCUnits[i] == 0
5258 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5259 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5260 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5261 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5262 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5263 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5264 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5265 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5266 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5267 v->ModeSupport[i][j] = true;
5269 v->ModeSupport[i][j] = false;
5274 unsigned int MaximumMPCCombine = 0;
5275 for (i = v->soc.num_states; i >= 0; i--) {
5276 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5277 v->VoltageLevel = i;
5278 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5279 if (v->ModeSupport[i][1] == true) {
5280 MaximumMPCCombine = 1;
5282 MaximumMPCCombine = 0;
5286 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5287 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5288 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5289 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5291 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5292 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5293 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5294 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5295 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5296 v->maxMpcComb = MaximumMPCCombine;
5300 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5301 struct display_mode_lib *mode_lib,
5302 unsigned int PrefetchMode,
5303 unsigned int NumberOfActivePlanes,
5304 unsigned int MaxLineBufferLines,
5305 unsigned int LineBufferSize,
5306 unsigned int DPPOutputBufferPixels,
5307 unsigned int DETBufferSizeInKByte,
5308 unsigned int WritebackInterfaceBufferSize,
5312 unsigned int dpte_group_bytes[],
5313 unsigned int MetaChunkSize,
5314 double UrgentLatency,
5315 double ExtraLatency,
5316 double WritebackLatency,
5317 double WritebackChunkSize,
5319 double DRAMClockChangeLatency,
5321 double SREnterPlusExitTime,
5322 double DCFCLKDeepSleep,
5323 unsigned int DPPPerPlane[],
5326 unsigned int DETBufferSizeY[],
5327 unsigned int DETBufferSizeC[],
5328 unsigned int SwathHeightY[],
5329 unsigned int SwathHeightC[],
5330 unsigned int LBBitPerPixel[],
5331 double SwathWidthY[],
5332 double SwathWidthC[],
5334 double HRatioChroma[],
5335 unsigned int vtaps[],
5336 unsigned int VTAPsChroma[],
5338 double VRatioChroma[],
5339 unsigned int HTotal[],
5340 double PixelClock[],
5341 unsigned int BlendingAndTiming[],
5342 double BytePerPixelDETY[],
5343 double BytePerPixelDETC[],
5344 double DSTXAfterScaler[],
5345 double DSTYAfterScaler[],
5346 bool WritebackEnable[],
5347 enum source_format_class WritebackPixelFormat[],
5348 double WritebackDestinationWidth[],
5349 double WritebackDestinationHeight[],
5350 double WritebackSourceHeight[],
5351 enum clock_change_support *DRAMClockChangeSupport,
5352 double *UrgentWatermark,
5353 double *WritebackUrgentWatermark,
5354 double *DRAMClockChangeWatermark,
5355 double *WritebackDRAMClockChangeWatermark,
5356 double *StutterExitWatermark,
5357 double *StutterEnterPlusExitWatermark,
5358 double *MinActiveDRAMClockChangeLatencySupported)
5360 double EffectiveLBLatencyHidingY = 0;
5361 double EffectiveLBLatencyHidingC = 0;
5362 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5363 double LinesInDETC = 0;
5364 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5365 unsigned int LinesInDETCRoundedDownToSwath = 0;
5366 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5367 double FullDETBufferingTimeC = 0;
5368 double ActiveDRAMClockChangeLatencyMarginY = 0;
5369 double ActiveDRAMClockChangeLatencyMarginC = 0;
5370 double WritebackDRAMClockChangeLatencyMargin = 0;
5371 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5372 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5373 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5374 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5375 double WritebackDRAMClockChangeLatencyHiding = 0;
5378 mode_lib->vba.TotalActiveDPP = 0;
5379 mode_lib->vba.TotalDCCActiveDPP = 0;
5380 for (k = 0; k < NumberOfActivePlanes; ++k) {
5381 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5382 if (DCCEnable[k] == true) {
5383 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5387 *UrgentWatermark = UrgentLatency + ExtraLatency;
5389 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5391 mode_lib->vba.TotalActiveWriteback = 0;
5392 for (k = 0; k < NumberOfActivePlanes; ++k) {
5393 if (WritebackEnable[k] == true) {
5394 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5398 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5399 *WritebackUrgentWatermark = WritebackLatency;
5401 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5404 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5405 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5407 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5410 for (k = 0; k < NumberOfActivePlanes; ++k) {
5412 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5414 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5416 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5418 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5420 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5421 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5422 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5423 if (BytePerPixelDETC[k] > 0) {
5424 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5425 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5426 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5429 FullDETBufferingTimeC = 999999;
5432 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5434 if (NumberOfActivePlanes > 1) {
5435 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5438 if (BytePerPixelDETC[k] > 0) {
5439 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5441 if (NumberOfActivePlanes > 1) {
5442 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5444 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5446 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5449 if (WritebackEnable[k] == true) {
5451 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5452 if (WritebackPixelFormat[k] == dm_444_64) {
5453 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5455 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5456 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5458 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5459 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5463 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5464 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5465 for (k = 0; k < NumberOfActivePlanes; ++k) {
5466 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5467 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5468 if (BlendingAndTiming[k] == k) {
5469 PlaneWithMinActiveDRAMClockChangeMargin = k;
5471 for (j = 0; j < NumberOfActivePlanes; ++j) {
5472 if (BlendingAndTiming[k] == j) {
5473 PlaneWithMinActiveDRAMClockChangeMargin = j;
5480 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5482 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5483 for (k = 0; k < NumberOfActivePlanes; ++k) {
5484 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5485 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5489 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5490 for (k = 0; k < NumberOfActivePlanes; ++k) {
5491 if (BlendingAndTiming[k] == k) {
5492 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5496 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5497 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5498 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5499 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5501 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5504 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5505 for (k = 0; k < NumberOfActivePlanes; ++k) {
5506 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5507 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5508 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5512 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5513 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5517 static void CalculateDCFCLKDeepSleep(
5518 struct display_mode_lib *mode_lib,
5519 unsigned int NumberOfActivePlanes,
5520 int BytePerPixelY[],
5521 int BytePerPixelC[],
5523 double VRatioChroma[],
5524 double SwathWidthY[],
5525 double SwathWidthC[],
5526 unsigned int DPPPerPlane[],
5528 double HRatioChroma[],
5529 double PixelClock[],
5530 double PSCL_THROUGHPUT[],
5531 double PSCL_THROUGHPUT_CHROMA[],
5533 double ReadBandwidthLuma[],
5534 double ReadBandwidthChroma[],
5536 double *DCFCLKDeepSleep)
5538 double DisplayPipeLineDeliveryTimeLuma = 0;
5539 double DisplayPipeLineDeliveryTimeChroma = 0;
5541 double ReadBandwidth = 0.0;
5543 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5544 for (k = 0; k < NumberOfActivePlanes; ++k) {
5546 if (VRatio[k] <= 1) {
5547 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5549 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5551 if (BytePerPixelC[k] == 0) {
5552 DisplayPipeLineDeliveryTimeChroma = 0;
5554 if (VRatioChroma[k] <= 1) {
5555 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5557 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5561 if (BytePerPixelC[k] > 0) {
5562 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5564 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5566 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5570 for (k = 0; k < NumberOfActivePlanes; ++k) {
5571 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5574 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5576 for (k = 0; k < NumberOfActivePlanes; ++k) {
5577 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5581 static void CalculateUrgentBurstFactor(
5582 long swath_width_luma_ub,
5583 long swath_width_chroma_ub,
5584 unsigned int DETBufferSizeInKByte,
5585 unsigned int SwathHeightY,
5586 unsigned int SwathHeightC,
5588 double UrgentLatency,
5589 double CursorBufferSize,
5590 unsigned int CursorWidth,
5591 unsigned int CursorBPP,
5594 double BytePerPixelInDETY,
5595 double BytePerPixelInDETC,
5596 double DETBufferSizeY,
5597 double DETBufferSizeC,
5598 double *UrgentBurstFactorCursor,
5599 double *UrgentBurstFactorLuma,
5600 double *UrgentBurstFactorChroma,
5601 bool *NotEnoughUrgentLatencyHiding)
5603 double LinesInDETLuma = 0;
5604 double LinesInDETChroma = 0;
5605 unsigned int LinesInCursorBuffer = 0;
5606 double CursorBufferSizeInTime = 0;
5607 double DETBufferSizeInTimeLuma = 0;
5608 double DETBufferSizeInTimeChroma = 0;
5610 *NotEnoughUrgentLatencyHiding = 0;
5612 if (CursorWidth > 0) {
5613 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5615 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5616 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5617 *NotEnoughUrgentLatencyHiding = 1;
5618 *UrgentBurstFactorCursor = 0;
5620 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5623 *UrgentBurstFactorCursor = 1;
5627 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5629 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5630 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5631 *NotEnoughUrgentLatencyHiding = 1;
5632 *UrgentBurstFactorLuma = 0;
5634 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5637 *UrgentBurstFactorLuma = 1;
5640 if (BytePerPixelInDETC > 0) {
5641 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5643 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5644 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5645 *NotEnoughUrgentLatencyHiding = 1;
5646 *UrgentBurstFactorChroma = 0;
5648 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5651 *UrgentBurstFactorChroma = 1;
5656 static void CalculatePixelDeliveryTimes(
5657 unsigned int NumberOfActivePlanes,
5659 double VRatioChroma[],
5660 double VRatioPrefetchY[],
5661 double VRatioPrefetchC[],
5662 unsigned int swath_width_luma_ub[],
5663 unsigned int swath_width_chroma_ub[],
5664 unsigned int DPPPerPlane[],
5666 double HRatioChroma[],
5667 double PixelClock[],
5668 double PSCL_THROUGHPUT[],
5669 double PSCL_THROUGHPUT_CHROMA[],
5671 int BytePerPixelC[],
5672 enum scan_direction_class SourceScan[],
5673 unsigned int NumberOfCursors[],
5674 unsigned int CursorWidth[][2],
5675 unsigned int CursorBPP[][2],
5676 unsigned int BlockWidth256BytesY[],
5677 unsigned int BlockHeight256BytesY[],
5678 unsigned int BlockWidth256BytesC[],
5679 unsigned int BlockHeight256BytesC[],
5680 double DisplayPipeLineDeliveryTimeLuma[],
5681 double DisplayPipeLineDeliveryTimeChroma[],
5682 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5683 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5684 double DisplayPipeRequestDeliveryTimeLuma[],
5685 double DisplayPipeRequestDeliveryTimeChroma[],
5686 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5687 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5688 double CursorRequestDeliveryTime[],
5689 double CursorRequestDeliveryTimePrefetch[])
5691 double req_per_swath_ub = 0;
5694 for (k = 0; k < NumberOfActivePlanes; ++k) {
5695 if (VRatio[k] <= 1) {
5696 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5698 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5701 if (BytePerPixelC[k] == 0) {
5702 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5704 if (VRatioChroma[k] <= 1) {
5705 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5707 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5711 if (VRatioPrefetchY[k] <= 1) {
5712 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5714 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5717 if (BytePerPixelC[k] == 0) {
5718 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5720 if (VRatioPrefetchC[k] <= 1) {
5721 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5723 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5728 for (k = 0; k < NumberOfActivePlanes; ++k) {
5729 if (SourceScan[k] != dm_vert) {
5730 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5732 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5734 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5735 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5736 if (BytePerPixelC[k] == 0) {
5737 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5738 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5740 if (SourceScan[k] != dm_vert) {
5741 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5743 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5745 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5746 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5750 for (k = 0; k < NumberOfActivePlanes; ++k) {
5751 int cursor_req_per_width = 0;
5752 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5753 if (NumberOfCursors[k] > 0) {
5754 if (VRatio[k] <= 1) {
5755 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5757 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5759 if (VRatioPrefetchY[k] <= 1) {
5760 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5762 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5765 CursorRequestDeliveryTime[k] = 0;
5766 CursorRequestDeliveryTimePrefetch[k] = 0;
5771 static void CalculateMetaAndPTETimes(
5772 int NumberOfActivePlanes,
5775 int MinMetaChunkSizeBytes,
5778 double VRatioChroma[],
5779 double DestinationLinesToRequestRowInVBlank[],
5780 double DestinationLinesToRequestRowInImmediateFlip[],
5782 double PixelClock[],
5783 int BytePerPixelY[],
5784 int BytePerPixelC[],
5785 enum scan_direction_class SourceScan[],
5786 int dpte_row_height[],
5787 int dpte_row_height_chroma[],
5788 int meta_row_width[],
5789 int meta_row_width_chroma[],
5790 int meta_row_height[],
5791 int meta_row_height_chroma[],
5792 int meta_req_width[],
5793 int meta_req_width_chroma[],
5794 int meta_req_height[],
5795 int meta_req_height_chroma[],
5796 int dpte_group_bytes[],
5797 int PTERequestSizeY[],
5798 int PTERequestSizeC[],
5799 int PixelPTEReqWidthY[],
5800 int PixelPTEReqHeightY[],
5801 int PixelPTEReqWidthC[],
5802 int PixelPTEReqHeightC[],
5803 int dpte_row_width_luma_ub[],
5804 int dpte_row_width_chroma_ub[],
5805 double DST_Y_PER_PTE_ROW_NOM_L[],
5806 double DST_Y_PER_PTE_ROW_NOM_C[],
5807 double DST_Y_PER_META_ROW_NOM_L[],
5808 double DST_Y_PER_META_ROW_NOM_C[],
5809 double TimePerMetaChunkNominal[],
5810 double TimePerChromaMetaChunkNominal[],
5811 double TimePerMetaChunkVBlank[],
5812 double TimePerChromaMetaChunkVBlank[],
5813 double TimePerMetaChunkFlip[],
5814 double TimePerChromaMetaChunkFlip[],
5815 double time_per_pte_group_nom_luma[],
5816 double time_per_pte_group_vblank_luma[],
5817 double time_per_pte_group_flip_luma[],
5818 double time_per_pte_group_nom_chroma[],
5819 double time_per_pte_group_vblank_chroma[],
5820 double time_per_pte_group_flip_chroma[])
5822 unsigned int meta_chunk_width = 0;
5823 unsigned int min_meta_chunk_width = 0;
5824 unsigned int meta_chunk_per_row_int = 0;
5825 unsigned int meta_row_remainder = 0;
5826 unsigned int meta_chunk_threshold = 0;
5827 unsigned int meta_chunks_per_row_ub = 0;
5828 unsigned int meta_chunk_width_chroma = 0;
5829 unsigned int min_meta_chunk_width_chroma = 0;
5830 unsigned int meta_chunk_per_row_int_chroma = 0;
5831 unsigned int meta_row_remainder_chroma = 0;
5832 unsigned int meta_chunk_threshold_chroma = 0;
5833 unsigned int meta_chunks_per_row_ub_chroma = 0;
5834 unsigned int dpte_group_width_luma = 0;
5835 unsigned int dpte_groups_per_row_luma_ub = 0;
5836 unsigned int dpte_group_width_chroma = 0;
5837 unsigned int dpte_groups_per_row_chroma_ub = 0;
5840 for (k = 0; k < NumberOfActivePlanes; ++k) {
5841 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5842 if (BytePerPixelC[k] == 0) {
5843 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5845 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5847 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5848 if (BytePerPixelC[k] == 0) {
5849 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5851 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5855 for (k = 0; k < NumberOfActivePlanes; ++k) {
5856 if (DCCEnable[k] == true) {
5857 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5858 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5859 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5860 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5861 if (SourceScan[k] != dm_vert) {
5862 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5864 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5866 if (meta_row_remainder <= meta_chunk_threshold) {
5867 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5869 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5871 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5872 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5873 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5874 if (BytePerPixelC[k] == 0) {
5875 TimePerChromaMetaChunkNominal[k] = 0;
5876 TimePerChromaMetaChunkVBlank[k] = 0;
5877 TimePerChromaMetaChunkFlip[k] = 0;
5879 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5880 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5881 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5882 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5883 if (SourceScan[k] != dm_vert) {
5884 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5886 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5888 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5889 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5891 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5893 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5894 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5895 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5898 TimePerMetaChunkNominal[k] = 0;
5899 TimePerMetaChunkVBlank[k] = 0;
5900 TimePerMetaChunkFlip[k] = 0;
5901 TimePerChromaMetaChunkNominal[k] = 0;
5902 TimePerChromaMetaChunkVBlank[k] = 0;
5903 TimePerChromaMetaChunkFlip[k] = 0;
5907 for (k = 0; k < NumberOfActivePlanes; ++k) {
5908 if (GPUVMEnable == true) {
5909 if (SourceScan[k] != dm_vert) {
5910 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5912 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5914 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5915 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5916 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5917 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5918 if (BytePerPixelC[k] == 0) {
5919 time_per_pte_group_nom_chroma[k] = 0;
5920 time_per_pte_group_vblank_chroma[k] = 0;
5921 time_per_pte_group_flip_chroma[k] = 0;
5923 if (SourceScan[k] != dm_vert) {
5924 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5926 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5928 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5929 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5930 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5931 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5934 time_per_pte_group_nom_luma[k] = 0;
5935 time_per_pte_group_vblank_luma[k] = 0;
5936 time_per_pte_group_flip_luma[k] = 0;
5937 time_per_pte_group_nom_chroma[k] = 0;
5938 time_per_pte_group_vblank_chroma[k] = 0;
5939 time_per_pte_group_flip_chroma[k] = 0;
5944 static void CalculateVMGroupAndRequestTimes(
5945 unsigned int NumberOfActivePlanes,
5947 unsigned int GPUVMMaxPageTableLevels,
5948 unsigned int HTotal[],
5949 int BytePerPixelC[],
5950 double DestinationLinesToRequestVMInVBlank[],
5951 double DestinationLinesToRequestVMInImmediateFlip[],
5953 double PixelClock[],
5954 int dpte_row_width_luma_ub[],
5955 int dpte_row_width_chroma_ub[],
5956 int vm_group_bytes[],
5957 unsigned int dpde0_bytes_per_frame_ub_l[],
5958 unsigned int dpde0_bytes_per_frame_ub_c[],
5959 int meta_pte_bytes_per_frame_ub_l[],
5960 int meta_pte_bytes_per_frame_ub_c[],
5961 double TimePerVMGroupVBlank[],
5962 double TimePerVMGroupFlip[],
5963 double TimePerVMRequestVBlank[],
5964 double TimePerVMRequestFlip[])
5966 int num_group_per_lower_vm_stage = 0;
5967 int num_req_per_lower_vm_stage = 0;
5970 for (k = 0; k < NumberOfActivePlanes; ++k) {
5971 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5972 if (DCCEnable[k] == false) {
5973 if (BytePerPixelC[k] > 0) {
5974 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5975 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
5976 / (double) (vm_group_bytes[k]), 1);
5978 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5979 / (double) (vm_group_bytes[k]), 1);
5982 if (GPUVMMaxPageTableLevels == 1) {
5983 if (BytePerPixelC[k] > 0) {
5984 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5985 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
5986 / (double) (vm_group_bytes[k]), 1);
5988 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5989 / (double) (vm_group_bytes[k]), 1);
5992 if (BytePerPixelC[k] > 0) {
5993 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5994 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
5995 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5996 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
5998 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5999 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6004 if (DCCEnable[k] == false) {
6005 if (BytePerPixelC[k] > 0) {
6006 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6008 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6011 if (GPUVMMaxPageTableLevels == 1) {
6012 if (BytePerPixelC[k] > 0) {
6013 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6014 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6016 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6019 if (BytePerPixelC[k] > 0) {
6020 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6021 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
6022 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6024 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6025 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6030 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6031 / num_group_per_lower_vm_stage;
6032 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6033 / num_group_per_lower_vm_stage;
6034 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6035 / num_req_per_lower_vm_stage;
6036 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6037 / num_req_per_lower_vm_stage;
6039 if (GPUVMMaxPageTableLevels > 2) {
6040 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6041 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6042 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6043 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6047 TimePerVMGroupVBlank[k] = 0;
6048 TimePerVMGroupFlip[k] = 0;
6049 TimePerVMRequestVBlank[k] = 0;
6050 TimePerVMRequestFlip[k] = 0;
6055 static void CalculateStutterEfficiency(
6056 int NumberOfActivePlanes,
6057 long ROBBufferSizeInKByte,
6058 double TotalDataReadBandwidth,
6062 bool SynchronizedVBlank,
6064 unsigned int DETBufferSizeY[],
6065 int BytePerPixelY[],
6066 double BytePerPixelDETY[],
6067 double SwathWidthY[],
6070 double DCCRateLuma[],
6071 double DCCRateChroma[],
6074 double PixelClock[],
6076 enum scan_direction_class SourceScan[],
6077 int BlockHeight256BytesY[],
6078 int BlockWidth256BytesY[],
6079 int BlockHeight256BytesC[],
6080 int BlockWidth256BytesC[],
6081 int DCCYMaxUncompressedBlock[],
6082 int DCCCMaxUncompressedBlock[],
6085 bool WritebackEnable[],
6086 double ReadBandwidthPlaneLuma[],
6087 double ReadBandwidthPlaneChroma[],
6088 double meta_row_bw[],
6089 double dpte_row_bw[],
6090 double *StutterEfficiencyNotIncludingVBlank,
6091 double *StutterEfficiency,
6092 double *StutterPeriodOut)
6094 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
6095 double FrameTimeForMinFullDETBufferingTime = 0;
6096 double StutterPeriod = 0;
6097 double AverageReadBandwidth = 0;
6098 double TotalRowReadBandwidth = 0;
6099 double AverageDCCCompressionRate = 0;
6100 double PartOfBurstThatFitsInROB = 0;
6101 double StutterBurstTime = 0;
6102 int TotalActiveWriteback = 0;
6103 double VBlankTime = 0;
6104 double SmallestVBlank = 0;
6105 int BytePerPixelYCriticalPlane = 0;
6106 double SwathWidthYCriticalPlane = 0;
6107 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
6108 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
6109 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6110 double MaximumEffectiveCompressionLuma = 0;
6111 double MaximumEffectiveCompressionChroma = 0;
6114 for (k = 0; k < NumberOfActivePlanes; ++k) {
6115 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
6116 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
6117 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
6120 StutterPeriod = FullDETBufferingTimeY[0];
6121 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6122 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6123 SwathWidthYCriticalPlane = SwathWidthY[0];
6124 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6125 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6127 for (k = 0; k < NumberOfActivePlanes; ++k) {
6128 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6129 StutterPeriod = FullDETBufferingTimeY[k];
6130 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6131 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6132 SwathWidthYCriticalPlane = SwathWidthY[k];
6133 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6134 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6138 AverageReadBandwidth = 0;
6139 TotalRowReadBandwidth = 0;
6140 for (k = 0; k < NumberOfActivePlanes; ++k) {
6141 if (DCCEnable[k] == true) {
6142 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6143 || (SourceScan[k] != dm_vert
6144 && BlockHeight256BytesY[k] > SwathHeightY[k])
6145 || DCCYMaxUncompressedBlock[k] < 256) {
6146 MaximumEffectiveCompressionLuma = 2;
6148 MaximumEffectiveCompressionLuma = 4;
6150 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6152 if (ReadBandwidthPlaneChroma[k] > 0) {
6153 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6154 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6155 || DCCCMaxUncompressedBlock[k] < 256) {
6156 MaximumEffectiveCompressionChroma = 2;
6158 MaximumEffectiveCompressionChroma = 4;
6160 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6163 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6165 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6168 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6169 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6170 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6171 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6172 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6174 TotalActiveWriteback = 0;
6175 for (k = 0; k < NumberOfActivePlanes; ++k) {
6176 if (WritebackEnable[k] == true) {
6177 TotalActiveWriteback = TotalActiveWriteback + 1;
6181 if (TotalActiveWriteback == 0) {
6182 *StutterEfficiencyNotIncludingVBlank = (1
6183 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6185 *StutterEfficiencyNotIncludingVBlank = 0;
6188 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6189 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6193 for (k = 0; k < NumberOfActivePlanes; ++k) {
6194 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6195 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6199 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6202 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6204 if (StutterPeriodOut)
6205 *StutterPeriodOut = StutterPeriod;
6208 static void CalculateSwathAndDETConfiguration(
6209 bool ForceSingleDPP,
6210 int NumberOfActivePlanes,
6211 unsigned int DETBufferSizeInKByte,
6212 double MaximumSwathWidthLuma[],
6213 double MaximumSwathWidthChroma[],
6214 enum scan_direction_class SourceScan[],
6215 enum source_format_class SourcePixelFormat[],
6216 enum dm_swizzle_mode SurfaceTiling[],
6217 int ViewportWidth[],
6218 int ViewportHeight[],
6219 int SurfaceWidthY[],
6220 int SurfaceWidthC[],
6221 int SurfaceHeightY[],
6222 int SurfaceHeightC[],
6223 int Read256BytesBlockHeightY[],
6224 int Read256BytesBlockHeightC[],
6225 int Read256BytesBlockWidthY[],
6226 int Read256BytesBlockWidthC[],
6227 enum odm_combine_mode ODMCombineEnabled[],
6228 int BlendingAndTiming[],
6231 double BytePerPixDETY[],
6232 double BytePerPixDETC[],
6235 double HRatioChroma[],
6237 int swath_width_luma_ub[],
6238 int swath_width_chroma_ub[],
6239 double SwathWidth[],
6240 double SwathWidthChroma[],
6243 unsigned int DETBufferSizeY[],
6244 unsigned int DETBufferSizeC[],
6245 bool ViewportSizeSupportPerPlane[],
6246 bool *ViewportSizeSupport)
6248 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6249 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6250 int MinimumSwathHeightY = 0;
6251 int MinimumSwathHeightC = 0;
6252 long RoundedUpMaxSwathSizeBytesY = 0;
6253 long RoundedUpMaxSwathSizeBytesC = 0;
6254 long RoundedUpMinSwathSizeBytesY = 0;
6255 long RoundedUpMinSwathSizeBytesC = 0;
6256 long RoundedUpSwathSizeBytesY = 0;
6257 long RoundedUpSwathSizeBytesC = 0;
6258 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6259 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6262 CalculateSwathWidth(
6264 NumberOfActivePlanes,
6276 Read256BytesBlockHeightY,
6277 Read256BytesBlockHeightC,
6278 Read256BytesBlockWidthY,
6279 Read256BytesBlockWidthC,
6284 SwathWidthSingleDPP,
6285 SwathWidthSingleDPPChroma,
6288 MaximumSwathHeightY,
6289 MaximumSwathHeightC,
6290 swath_width_luma_ub,
6291 swath_width_chroma_ub);
6293 *ViewportSizeSupport = true;
6294 for (k = 0; k < NumberOfActivePlanes; ++k) {
6295 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6296 || SourcePixelFormat[k] == dm_444_16
6297 || SourcePixelFormat[k] == dm_mono_16
6298 || SourcePixelFormat[k] == dm_mono_8
6299 || SourcePixelFormat[k] == dm_rgbe)) {
6300 if (SurfaceTiling[k] == dm_sw_linear
6301 || (SourcePixelFormat[k] == dm_444_64
6302 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6303 && SourceScan[k] != dm_vert)) {
6304 MinimumSwathHeightY = MaximumSwathHeightY[k];
6305 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6306 MinimumSwathHeightY = MaximumSwathHeightY[k];
6308 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6310 MinimumSwathHeightC = MaximumSwathHeightC[k];
6312 if (SurfaceTiling[k] == dm_sw_linear) {
6313 MinimumSwathHeightY = MaximumSwathHeightY[k];
6314 MinimumSwathHeightC = MaximumSwathHeightC[k];
6315 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6316 && SourceScan[k] == dm_vert) {
6317 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6318 MinimumSwathHeightC = MaximumSwathHeightC[k];
6319 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6320 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6321 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6322 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6323 MinimumSwathHeightY = MaximumSwathHeightY[k];
6324 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6326 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6327 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6331 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6332 * MaximumSwathHeightY[k];
6333 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6334 * MinimumSwathHeightY;
6335 if (SourcePixelFormat[k] == dm_420_10) {
6336 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6337 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6339 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6340 * MaximumSwathHeightC[k];
6341 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6342 * MinimumSwathHeightC;
6343 if (SourcePixelFormat[k] == dm_420_10) {
6344 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6345 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6348 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6349 <= DETBufferSizeInKByte * 1024 / 2) {
6350 SwathHeightY[k] = MaximumSwathHeightY[k];
6351 SwathHeightC[k] = MaximumSwathHeightC[k];
6352 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6353 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6354 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6355 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6356 <= DETBufferSizeInKByte * 1024 / 2) {
6357 SwathHeightY[k] = MinimumSwathHeightY;
6358 SwathHeightC[k] = MaximumSwathHeightC[k];
6359 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6360 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6361 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6362 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6363 <= DETBufferSizeInKByte * 1024 / 2) {
6364 SwathHeightY[k] = MaximumSwathHeightY[k];
6365 SwathHeightC[k] = MinimumSwathHeightC;
6366 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6367 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6369 SwathHeightY[k] = MinimumSwathHeightY;
6370 SwathHeightC[k] = MinimumSwathHeightC;
6371 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6372 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6375 if (SwathHeightC[k] == 0) {
6376 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6377 DETBufferSizeC[k] = 0;
6378 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6379 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6380 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6382 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6383 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6386 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6387 > DETBufferSizeInKByte * 1024 / 2
6388 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6389 || (SwathHeightC[k] > 0
6390 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6391 *ViewportSizeSupport = false;
6392 ViewportSizeSupportPerPlane[k] = false;
6394 ViewportSizeSupportPerPlane[k] = true;
6399 static void CalculateSwathWidth(
6400 bool ForceSingleDPP,
6401 int NumberOfActivePlanes,
6402 enum source_format_class SourcePixelFormat[],
6403 enum scan_direction_class SourceScan[],
6404 unsigned int ViewportWidth[],
6405 unsigned int ViewportHeight[],
6406 unsigned int SurfaceWidthY[],
6407 unsigned int SurfaceWidthC[],
6408 unsigned int SurfaceHeightY[],
6409 unsigned int SurfaceHeightC[],
6410 enum odm_combine_mode ODMCombineEnabled[],
6413 int Read256BytesBlockHeightY[],
6414 int Read256BytesBlockHeightC[],
6415 int Read256BytesBlockWidthY[],
6416 int Read256BytesBlockWidthC[],
6417 int BlendingAndTiming[],
6418 unsigned int HActive[],
6421 double SwathWidthSingleDPPY[],
6422 double SwathWidthSingleDPPC[],
6423 double SwathWidthY[],
6424 double SwathWidthC[],
6425 int MaximumSwathHeightY[],
6426 int MaximumSwathHeightC[],
6427 unsigned int swath_width_luma_ub[],
6428 unsigned int swath_width_chroma_ub[])
6431 long surface_width_ub_l;
6432 long surface_height_ub_l;
6433 long surface_width_ub_c;
6434 long surface_height_ub_c;
6436 for (k = 0; k < NumberOfActivePlanes; ++k) {
6437 enum odm_combine_mode MainPlaneODMCombine = 0;
6438 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6439 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6440 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6441 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6443 if (SourceScan[k] != dm_vert) {
6444 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6446 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6449 MainPlaneODMCombine = ODMCombineEnabled[k];
6450 for (j = 0; j < NumberOfActivePlanes; ++j) {
6451 if (BlendingAndTiming[k] == j) {
6452 MainPlaneODMCombine = ODMCombineEnabled[j];
6456 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6457 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6458 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6459 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6460 } else if (DPPPerPlane[k] == 2) {
6461 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6463 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6466 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6467 SwathWidthC[k] = SwathWidthY[k] / 2;
6468 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6470 SwathWidthC[k] = SwathWidthY[k];
6471 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6474 if (ForceSingleDPP == true) {
6475 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6476 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6479 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6480 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6481 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6482 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6484 if (SourceScan[k] != dm_vert) {
6485 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6486 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6487 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6488 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6489 if (BytePerPixC[k] > 0) {
6490 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6491 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6493 swath_width_chroma_ub[k] = 0;
6496 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6497 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6498 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6499 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6500 if (BytePerPixC[k] > 0) {
6501 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6502 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6504 swath_width_chroma_ub[k] = 0;
6510 static double CalculateExtraLatency(
6511 long RoundTripPingLatencyCycles,
6512 long ReorderingBytes,
6514 int TotalNumberOfActiveDPP,
6515 int PixelChunkSizeInKByte,
6516 int TotalNumberOfDCCActiveDPP,
6521 int NumberOfActivePlanes,
6523 int dpte_group_bytes[],
6524 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6525 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6526 double HostVMMinPageSize,
6527 int HostVMMaxNonCachedPageTableLevels)
6529 double ExtraLatencyBytes = 0;
6530 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6532 TotalNumberOfActiveDPP,
6533 PixelChunkSizeInKByte,
6534 TotalNumberOfDCCActiveDPP,
6538 NumberOfActivePlanes,
6541 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6542 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6544 HostVMMaxNonCachedPageTableLevels);
6546 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6549 static double CalculateExtraLatencyBytes(
6550 long ReorderingBytes,
6551 int TotalNumberOfActiveDPP,
6552 int PixelChunkSizeInKByte,
6553 int TotalNumberOfDCCActiveDPP,
6557 int NumberOfActivePlanes,
6559 int dpte_group_bytes[],
6560 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6561 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6562 double HostVMMinPageSize,
6563 int HostVMMaxNonCachedPageTableLevels)
6566 double HostVMInefficiencyFactor = 0;
6567 int HostVMDynamicLevels = 0;
6570 if (GPUVMEnable == true && HostVMEnable == true) {
6571 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6572 if (HostVMMinPageSize < 2048) {
6573 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6574 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6575 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6577 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6580 HostVMInefficiencyFactor = 1;
6581 HostVMDynamicLevels = 0;
6584 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6586 if (GPUVMEnable == true) {
6587 for (k = 0; k < NumberOfActivePlanes; ++k) {
6588 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6595 static double CalculateUrgentLatency(
6596 double UrgentLatencyPixelDataOnly,
6597 double UrgentLatencyPixelMixedWithVMData,
6598 double UrgentLatencyVMDataOnly,
6599 bool DoUrgentLatencyAdjustment,
6600 double UrgentLatencyAdjustmentFabricClockComponent,
6601 double UrgentLatencyAdjustmentFabricClockReference,
6606 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6607 if (DoUrgentLatencyAdjustment == true) {
6608 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6614 static void UseMinimumDCFCLK(
6615 struct display_mode_lib *mode_lib,
6616 int MaxInterDCNTileRepeaters,
6617 int MaxPrefetchMode,
6618 double FinalDRAMClockChangeLatency,
6619 double SREnterPlusExitTime,
6621 int RoundTripPingLatencyCycles,
6622 int ReorderingBytes,
6623 int PixelChunkSizeInKByte,
6626 int GPUVMMaxPageTableLevels,
6628 int NumberOfActivePlanes,
6629 double HostVMMinPageSize,
6630 int HostVMMaxNonCachedPageTableLevels,
6631 bool DynamicMetadataVMEnabled,
6632 enum immediate_flip_requirement ImmediateFlipRequirement,
6633 bool ProgressiveToInterlaceUnitInOPP,
6634 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
6635 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6636 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6637 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
6640 int DynamicMetadataTransmittedBytes[],
6641 int DynamicMetadataLinesBeforeActiveRequired[],
6643 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
6644 double RequiredDISPCLK[][2],
6645 double UrgLatency[],
6646 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
6647 double ProjectedDCFCLKDeepSleep[][2],
6648 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
6649 double TotalVActivePixelBandwidth[][2],
6650 double TotalVActiveCursorBandwidth[][2],
6651 double TotalMetaRowBandwidth[][2],
6652 double TotalDPTERowBandwidth[][2],
6653 unsigned int TotalNumberOfActiveDPP[][2],
6654 unsigned int TotalNumberOfDCCActiveDPP[][2],
6655 int dpte_group_bytes[],
6656 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
6657 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
6658 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
6659 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
6660 int BytePerPixelY[],
6661 int BytePerPixelC[],
6663 double PixelClock[],
6664 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
6665 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
6666 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
6667 bool DynamicMetadataEnable[],
6668 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
6669 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
6670 double ReadBandwidthLuma[],
6671 double ReadBandwidthChroma[],
6672 double DCFCLKPerState[],
6673 double DCFCLKState[][2])
6675 double NormalEfficiency = 0;
6676 double PTEEfficiency = 0;
6677 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6678 unsigned int i, j, k;
6680 NormalEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6681 : PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6682 PTEEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6683 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6684 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6685 for (j = 0; j <= 1; ++j) {
6686 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6687 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6688 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6689 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6690 double MinimumTWait = 0;
6691 double NonDPTEBandwidth = 0;
6692 double DPTEBandwidth = 0;
6693 double DCFCLKRequiredForAverageBandwidth = 0;
6694 double ExtraLatencyBytes = 0;
6695 double ExtraLatencyCycles = 0;
6696 double DCFCLKRequiredForPeakBandwidth = 0;
6697 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6698 double MinimumTvmPlus2Tr0 = 0;
6700 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6701 for (k = 0; k < NumberOfActivePlanes; ++k) {
6702 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6703 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
6706 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6707 NoOfDPPState[k] = NoOfDPP[i][j][k];
6710 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
6711 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
6712 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
6713 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
6714 DCFCLKRequiredForAverageBandwidth = dml_max3(ProjectedDCFCLKDeepSleep[i][j],
6715 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth / (MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6716 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / ReturnBusWidth);
6718 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP[i][j],
6719 MetaChunkSize, GPUVMEnable, HostVMEnable, NumberOfActivePlanes, NoOfDPPState, dpte_group_bytes,
6720 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6721 HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
6722 ExtraLatencyCycles = RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
6723 for (k = 0; k < NumberOfActivePlanes; ++k) {
6724 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6725 double ExpectedPrefetchBWAcceleration = { 0 };
6726 double PrefetchTime = { 0 };
6728 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
6729 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
6730 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6731 / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * DPTEBytesPerRow[i][j][k] / PTEEfficiency
6732 / NormalEfficiency / ReturnBusWidth + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6733 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
6734 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
6735 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
6736 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6737 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - UrgLatency[i] * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels
6738 : GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6740 if (PrefetchTime > 0) {
6741 double ExpectedVRatioPrefetch = { 0 };
6742 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6743 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6744 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6745 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
6746 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6747 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / ReturnBusWidth;
6750 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6752 if (DynamicMetadataEnable[k] == true) {
6753 double TsetupPipe = { 0 };
6754 double TdmbfPipe = { 0 };
6755 double TdmsksPipe = { 0 };
6756 double TdmecPipe = { 0 };
6757 double AllowedTimeForUrgentExtraLatency = { 0 };
6759 CalculateDynamicMetadataParameters(
6760 MaxInterDCNTileRepeaters,
6761 RequiredDPPCLK[i][j][k],
6762 RequiredDISPCLK[i][j],
6763 ProjectedDCFCLKDeepSleep[i][j],
6766 VTotal[k] - VActive[k],
6767 DynamicMetadataTransmittedBytes[k],
6768 DynamicMetadataLinesBeforeActiveRequired[k],
6770 ProgressiveToInterlaceUnitInOPP,
6775 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TsetupPipe
6776 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6777 if (AllowedTimeForUrgentExtraLatency > 0) {
6778 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6779 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6781 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6785 DCFCLKRequiredForPeakBandwidth = 0;
6786 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6787 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6789 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? (HostVMEnable == true ?
6790 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
6791 for (k = 0; k < NumberOfActivePlanes; ++k) {
6792 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6793 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6794 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6795 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
6797 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6798 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6799 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6802 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6803 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6808 #endif /* CONFIG_DRM_AMD_DC_DCN */