2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "../display_mode_lib.h"
29 #include "../dcn30/display_mode_vba_30.h"
30 #include "display_mode_vba_31.h"
31 #include "../dml_inline_defs.h"
35 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
37 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
38 * ways. Unless there is something clearly wrong with it the code should
39 * remain as-is as it provides us with a guarantee from HW that it is correct.
43 #define BPP_BLENDED_PIPE 0xffffffff
44 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
45 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
47 // For DML-C changes that hasn't been propagated to VBA yet
48 //#define __DML_VBA_ALLOW_DELTA__
50 // Move these to ip paramaters/constant
52 // At which vstartup the DML start to try if the mode can be supported
53 #define __DML_VBA_MIN_VSTARTUP__ 9
55 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
56 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
58 // fudge factor for min dcfclk calclation
59 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
65 double DCFCLKDeepSleep;
66 unsigned int DPPPerPlane;
70 enum scan_direction_class SourceScan;
71 unsigned int BlockWidth256BytesY;
72 unsigned int BlockHeight256BytesY;
73 unsigned int BlockWidth256BytesC;
74 unsigned int BlockHeight256BytesC;
75 unsigned int InterlaceEnable;
76 unsigned int NumberOfCursors;
79 unsigned int DCCEnable;
80 bool ODMCombineIsEnabled;
81 enum source_format_class SourcePixelFormat;
84 bool ProgressiveToInterlaceUnitInOPP;
88 #define BPP_BLENDED_PIPE 0xffffffff
90 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
91 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
92 static unsigned int dscceComputeDelay(
95 unsigned int sliceWidth,
96 unsigned int numSlices,
97 enum output_format_class pixelFormat,
98 enum output_encoder_class Output);
99 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
100 static bool CalculatePrefetchSchedule(
101 struct display_mode_lib *mode_lib,
102 double HostVMInefficiencyFactor,
104 unsigned int DSCDelay,
105 double DPPCLKDelaySubtotalPlusCNVCFormater,
106 double DPPCLKDelaySCL,
107 double DPPCLKDelaySCLLBOnly,
108 double DPPCLKDelayCNVCCursor,
109 double DISPCLKDelaySubtotal,
110 unsigned int DPP_RECOUT_WIDTH,
111 enum output_format_class OutputFormat,
112 unsigned int MaxInterDCNTileRepeaters,
113 unsigned int VStartup,
114 unsigned int MaxVStartup,
115 unsigned int GPUVMPageTableLevels,
118 unsigned int HostVMMaxNonCachedPageTableLevels,
119 double HostVMMinPageSize,
120 bool DynamicMetadataEnable,
121 bool DynamicMetadataVMEnabled,
122 int DynamicMetadataLinesBeforeActiveRequired,
123 unsigned int DynamicMetadataTransmittedBytes,
124 double UrgentLatency,
125 double UrgentExtraLatency,
127 unsigned int PDEAndMetaPTEBytesFrame,
128 unsigned int MetaRowByte,
129 unsigned int PixelPTEBytesPerRow,
130 double PrefetchSourceLinesY,
131 unsigned int SwathWidthY,
132 double VInitPreFillY,
133 unsigned int MaxNumSwathY,
134 double PrefetchSourceLinesC,
135 unsigned int SwathWidthC,
136 double VInitPreFillC,
137 unsigned int MaxNumSwathC,
138 int swath_width_luma_ub,
139 int swath_width_chroma_ub,
140 unsigned int SwathHeightY,
141 unsigned int SwathHeightC,
143 double *DSTXAfterScaler,
144 double *DSTYAfterScaler,
145 double *DestinationLinesForPrefetch,
146 double *PrefetchBandwidth,
147 double *DestinationLinesToRequestVMInVBlank,
148 double *DestinationLinesToRequestRowInVBlank,
149 double *VRatioPrefetchY,
150 double *VRatioPrefetchC,
151 double *RequiredPrefetchPixDataBWLuma,
152 double *RequiredPrefetchPixDataBWChroma,
153 bool *NotEnoughTimeForDynamicMetadata,
155 double *prefetch_vmrow_bw,
159 int *VUpdateOffsetPix,
160 double *VUpdateWidthPix,
161 double *VReadyOffsetPix);
162 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
163 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
164 static void CalculateDCCConfiguration(
166 bool DCCProgrammingAssumesScanDirectionUnknown,
167 enum source_format_class SourcePixelFormat,
168 unsigned int SurfaceWidthLuma,
169 unsigned int SurfaceWidthChroma,
170 unsigned int SurfaceHeightLuma,
171 unsigned int SurfaceHeightChroma,
172 double DETBufferSize,
173 unsigned int RequestHeight256ByteLuma,
174 unsigned int RequestHeight256ByteChroma,
175 enum dm_swizzle_mode TilingFormat,
176 unsigned int BytePerPixelY,
177 unsigned int BytePerPixelC,
178 double BytePerPixelDETY,
179 double BytePerPixelDETC,
180 enum scan_direction_class ScanOrientation,
181 unsigned int *MaxUncompressedBlockLuma,
182 unsigned int *MaxUncompressedBlockChroma,
183 unsigned int *MaxCompressedBlockLuma,
184 unsigned int *MaxCompressedBlockChroma,
185 unsigned int *IndependentBlockLuma,
186 unsigned int *IndependentBlockChroma);
187 static double CalculatePrefetchSourceLines(
188 struct display_mode_lib *mode_lib,
192 bool ProgressiveToInterlaceUnitInOPP,
193 unsigned int SwathHeight,
194 unsigned int ViewportYStart,
195 double *VInitPreFill,
196 unsigned int *MaxNumSwath);
197 static unsigned int CalculateVMAndRowBytes(
198 struct display_mode_lib *mode_lib,
200 unsigned int BlockHeight256Bytes,
201 unsigned int BlockWidth256Bytes,
202 enum source_format_class SourcePixelFormat,
203 unsigned int SurfaceTiling,
204 unsigned int BytePerPixel,
205 enum scan_direction_class ScanDirection,
206 unsigned int SwathWidth,
207 unsigned int ViewportHeight,
210 unsigned int HostVMMaxNonCachedPageTableLevels,
211 unsigned int GPUVMMinPageSize,
212 unsigned int HostVMMinPageSize,
213 unsigned int PTEBufferSizeInRequests,
215 unsigned int DCCMetaPitch,
216 unsigned int *MacroTileWidth,
217 unsigned int *MetaRowByte,
218 unsigned int *PixelPTEBytesPerRow,
219 bool *PTEBufferSizeNotExceeded,
220 int *dpte_row_width_ub,
221 unsigned int *dpte_row_height,
222 unsigned int *MetaRequestWidth,
223 unsigned int *MetaRequestHeight,
224 unsigned int *meta_row_width,
225 unsigned int *meta_row_height,
227 unsigned int *dpte_group_bytes,
228 unsigned int *PixelPTEReqWidth,
229 unsigned int *PixelPTEReqHeight,
230 unsigned int *PTERequestSize,
231 int *DPDE0BytesFrame,
232 int *MetaPTEBytesFrame);
233 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
234 static void CalculateRowBandwidth(
236 enum source_format_class SourcePixelFormat,
241 unsigned int MetaRowByteLuma,
242 unsigned int MetaRowByteChroma,
243 unsigned int meta_row_height_luma,
244 unsigned int meta_row_height_chroma,
245 unsigned int PixelPTEBytesPerRowLuma,
246 unsigned int PixelPTEBytesPerRowChroma,
247 unsigned int dpte_row_height_luma,
248 unsigned int dpte_row_height_chroma,
250 double *dpte_row_bw);
252 static void CalculateFlipSchedule(
253 struct display_mode_lib *mode_lib,
254 double HostVMInefficiencyFactor,
255 double UrgentExtraLatency,
256 double UrgentLatency,
257 unsigned int GPUVMMaxPageTableLevels,
259 unsigned int HostVMMaxNonCachedPageTableLevels,
261 double HostVMMinPageSize,
262 double PDEAndMetaPTEBytesPerFrame,
264 double DPTEBytesPerRow,
265 double BandwidthAvailableForImmediateFlip,
266 unsigned int TotImmediateFlipBytes,
267 enum source_format_class SourcePixelFormat,
273 unsigned int dpte_row_height,
274 unsigned int meta_row_height,
275 unsigned int dpte_row_height_chroma,
276 unsigned int meta_row_height_chroma,
277 double *DestinationLinesToRequestVMInImmediateFlip,
278 double *DestinationLinesToRequestRowInImmediateFlip,
279 double *final_flip_bw,
280 bool *ImmediateFlipSupportedForPipe);
281 static double CalculateWriteBackDelay(
282 enum source_format_class WritebackPixelFormat,
283 double WritebackHRatio,
284 double WritebackVRatio,
285 unsigned int WritebackVTaps,
286 int WritebackDestinationWidth,
287 int WritebackDestinationHeight,
288 int WritebackSourceHeight,
289 unsigned int HTotal);
291 static void CalculateVupdateAndDynamicMetadataParameters(
292 int MaxInterDCNTileRepeaters,
295 double DCFClkDeepSleep,
299 int DynamicMetadataTransmittedBytes,
300 int DynamicMetadataLinesBeforeActiveRequired,
302 bool ProgressiveToInterlaceUnitInOPP,
307 int *VUpdateOffsetPix,
308 double *VUpdateWidthPix,
309 double *VReadyOffsetPix);
311 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
312 struct display_mode_lib *mode_lib,
313 unsigned int PrefetchMode,
314 unsigned int NumberOfActivePlanes,
315 unsigned int MaxLineBufferLines,
316 unsigned int LineBufferSize,
317 unsigned int WritebackInterfaceBufferSize,
320 bool SynchronizedVBlank,
321 unsigned int dpte_group_bytes[],
322 unsigned int MetaChunkSize,
323 double UrgentLatency,
325 double WritebackLatency,
326 double WritebackChunkSize,
328 double DRAMClockChangeLatency,
330 double SREnterPlusExitTime,
332 double SREnterPlusExitZ8Time,
333 double DCFCLKDeepSleep,
334 unsigned int DETBufferSizeY[],
335 unsigned int DETBufferSizeC[],
336 unsigned int SwathHeightY[],
337 unsigned int SwathHeightC[],
338 unsigned int LBBitPerPixel[],
339 double SwathWidthY[],
340 double SwathWidthC[],
342 double HRatioChroma[],
343 unsigned int vtaps[],
344 unsigned int VTAPsChroma[],
346 double VRatioChroma[],
347 unsigned int HTotal[],
349 unsigned int BlendingAndTiming[],
350 unsigned int DPPPerPlane[],
351 double BytePerPixelDETY[],
352 double BytePerPixelDETC[],
353 double DSTXAfterScaler[],
354 double DSTYAfterScaler[],
355 bool WritebackEnable[],
356 enum source_format_class WritebackPixelFormat[],
357 double WritebackDestinationWidth[],
358 double WritebackDestinationHeight[],
359 double WritebackSourceHeight[],
360 bool UnboundedRequestEnabled,
361 int unsigned CompressedBufferSizeInkByte,
362 enum clock_change_support *DRAMClockChangeSupport,
363 double *UrgentWatermark,
364 double *WritebackUrgentWatermark,
365 double *DRAMClockChangeWatermark,
366 double *WritebackDRAMClockChangeWatermark,
367 double *StutterExitWatermark,
368 double *StutterEnterPlusExitWatermark,
369 double *Z8StutterExitWatermark,
370 double *Z8StutterEnterPlusExitWatermark,
371 double *MinActiveDRAMClockChangeLatencySupported);
373 static void CalculateDCFCLKDeepSleep(
374 struct display_mode_lib *mode_lib,
375 unsigned int NumberOfActivePlanes,
379 double VRatioChroma[],
380 double SwathWidthY[],
381 double SwathWidthC[],
382 unsigned int DPPPerPlane[],
384 double HRatioChroma[],
386 double PSCL_THROUGHPUT[],
387 double PSCL_THROUGHPUT_CHROMA[],
389 double ReadBandwidthLuma[],
390 double ReadBandwidthChroma[],
392 double *DCFCLKDeepSleep);
394 static void CalculateUrgentBurstFactor(
395 int swath_width_luma_ub,
396 int swath_width_chroma_ub,
397 unsigned int SwathHeightY,
398 unsigned int SwathHeightC,
400 double UrgentLatency,
401 double CursorBufferSize,
402 unsigned int CursorWidth,
403 unsigned int CursorBPP,
406 double BytePerPixelInDETY,
407 double BytePerPixelInDETC,
408 double DETBufferSizeY,
409 double DETBufferSizeC,
410 double *UrgentBurstFactorCursor,
411 double *UrgentBurstFactorLuma,
412 double *UrgentBurstFactorChroma,
413 bool *NotEnoughUrgentLatencyHiding);
415 static void UseMinimumDCFCLK(
416 struct display_mode_lib *mode_lib,
418 int ReorderingBytes);
420 static void CalculatePixelDeliveryTimes(
421 unsigned int NumberOfActivePlanes,
423 double VRatioChroma[],
424 double VRatioPrefetchY[],
425 double VRatioPrefetchC[],
426 unsigned int swath_width_luma_ub[],
427 unsigned int swath_width_chroma_ub[],
428 unsigned int DPPPerPlane[],
430 double HRatioChroma[],
432 double PSCL_THROUGHPUT[],
433 double PSCL_THROUGHPUT_CHROMA[],
436 enum scan_direction_class SourceScan[],
437 unsigned int NumberOfCursors[],
438 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
439 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
440 unsigned int BlockWidth256BytesY[],
441 unsigned int BlockHeight256BytesY[],
442 unsigned int BlockWidth256BytesC[],
443 unsigned int BlockHeight256BytesC[],
444 double DisplayPipeLineDeliveryTimeLuma[],
445 double DisplayPipeLineDeliveryTimeChroma[],
446 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
447 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
448 double DisplayPipeRequestDeliveryTimeLuma[],
449 double DisplayPipeRequestDeliveryTimeChroma[],
450 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
451 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
452 double CursorRequestDeliveryTime[],
453 double CursorRequestDeliveryTimePrefetch[]);
455 static void CalculateMetaAndPTETimes(
456 int NumberOfActivePlanes,
459 int MinMetaChunkSizeBytes,
462 double VRatioChroma[],
463 double DestinationLinesToRequestRowInVBlank[],
464 double DestinationLinesToRequestRowInImmediateFlip[],
469 enum scan_direction_class SourceScan[],
470 int dpte_row_height[],
471 int dpte_row_height_chroma[],
472 int meta_row_width[],
473 int meta_row_width_chroma[],
474 int meta_row_height[],
475 int meta_row_height_chroma[],
476 int meta_req_width[],
477 int meta_req_width_chroma[],
478 int meta_req_height[],
479 int meta_req_height_chroma[],
480 int dpte_group_bytes[],
481 int PTERequestSizeY[],
482 int PTERequestSizeC[],
483 int PixelPTEReqWidthY[],
484 int PixelPTEReqHeightY[],
485 int PixelPTEReqWidthC[],
486 int PixelPTEReqHeightC[],
487 int dpte_row_width_luma_ub[],
488 int dpte_row_width_chroma_ub[],
489 double DST_Y_PER_PTE_ROW_NOM_L[],
490 double DST_Y_PER_PTE_ROW_NOM_C[],
491 double DST_Y_PER_META_ROW_NOM_L[],
492 double DST_Y_PER_META_ROW_NOM_C[],
493 double TimePerMetaChunkNominal[],
494 double TimePerChromaMetaChunkNominal[],
495 double TimePerMetaChunkVBlank[],
496 double TimePerChromaMetaChunkVBlank[],
497 double TimePerMetaChunkFlip[],
498 double TimePerChromaMetaChunkFlip[],
499 double time_per_pte_group_nom_luma[],
500 double time_per_pte_group_vblank_luma[],
501 double time_per_pte_group_flip_luma[],
502 double time_per_pte_group_nom_chroma[],
503 double time_per_pte_group_vblank_chroma[],
504 double time_per_pte_group_flip_chroma[]);
506 static void CalculateVMGroupAndRequestTimes(
507 unsigned int NumberOfActivePlanes,
509 unsigned int GPUVMMaxPageTableLevels,
510 unsigned int HTotal[],
512 double DestinationLinesToRequestVMInVBlank[],
513 double DestinationLinesToRequestVMInImmediateFlip[],
516 int dpte_row_width_luma_ub[],
517 int dpte_row_width_chroma_ub[],
518 int vm_group_bytes[],
519 unsigned int dpde0_bytes_per_frame_ub_l[],
520 unsigned int dpde0_bytes_per_frame_ub_c[],
521 int meta_pte_bytes_per_frame_ub_l[],
522 int meta_pte_bytes_per_frame_ub_c[],
523 double TimePerVMGroupVBlank[],
524 double TimePerVMGroupFlip[],
525 double TimePerVMRequestVBlank[],
526 double TimePerVMRequestFlip[]);
528 static void CalculateStutterEfficiency(
529 struct display_mode_lib *mode_lib,
530 int CompressedBufferSizeInkByte,
531 bool UnboundedRequestEnabled,
532 int ConfigReturnBufferSizeInKByte,
533 int MetaFIFOSizeInKEntries,
534 int ZeroSizeBufferEntries,
535 int NumberOfActivePlanes,
536 int ROBBufferSizeInKByte,
537 double TotalDataReadBandwidth,
540 double COMPBUF_RESERVED_SPACE_64B,
541 double COMPBUF_RESERVED_SPACE_ZS,
544 bool SynchronizedVBlank,
545 double Z8StutterEnterPlusExitWatermark,
546 double StutterEnterPlusExitWatermark,
547 bool ProgressiveToInterlaceUnitInOPP,
549 double MinTTUVBlank[],
551 unsigned int DETBufferSizeY[],
553 double BytePerPixelDETY[],
554 double SwathWidthY[],
557 double NetDCCRateLuma[],
558 double NetDCCRateChroma[],
559 double DCCFractionOfZeroSizeRequestsLuma[],
560 double DCCFractionOfZeroSizeRequestsChroma[],
565 enum scan_direction_class SourceScan[],
566 int BlockHeight256BytesY[],
567 int BlockWidth256BytesY[],
568 int BlockHeight256BytesC[],
569 int BlockWidth256BytesC[],
570 int DCCYMaxUncompressedBlock[],
571 int DCCCMaxUncompressedBlock[],
574 bool WritebackEnable[],
575 double ReadBandwidthPlaneLuma[],
576 double ReadBandwidthPlaneChroma[],
577 double meta_row_bw[],
578 double dpte_row_bw[],
579 double *StutterEfficiencyNotIncludingVBlank,
580 double *StutterEfficiency,
581 int *NumberOfStutterBurstsPerFrame,
582 double *Z8StutterEfficiencyNotIncludingVBlank,
583 double *Z8StutterEfficiency,
584 int *Z8NumberOfStutterBurstsPerFrame,
585 double *StutterPeriod);
587 static void CalculateSwathAndDETConfiguration(
589 int NumberOfActivePlanes,
590 unsigned int DETBufferSizeInKByte,
591 double MaximumSwathWidthLuma[],
592 double MaximumSwathWidthChroma[],
593 enum scan_direction_class SourceScan[],
594 enum source_format_class SourcePixelFormat[],
595 enum dm_swizzle_mode SurfaceTiling[],
597 int ViewportHeight[],
600 int SurfaceHeightY[],
601 int SurfaceHeightC[],
602 int Read256BytesBlockHeightY[],
603 int Read256BytesBlockHeightC[],
604 int Read256BytesBlockWidthY[],
605 int Read256BytesBlockWidthC[],
606 enum odm_combine_mode ODMCombineEnabled[],
607 int BlendingAndTiming[],
610 double BytePerPixDETY[],
611 double BytePerPixDETC[],
614 double HRatioChroma[],
616 int swath_width_luma_ub[],
617 int swath_width_chroma_ub[],
619 double SwathWidthChroma[],
622 unsigned int DETBufferSizeY[],
623 unsigned int DETBufferSizeC[],
624 bool ViewportSizeSupportPerPlane[],
625 bool *ViewportSizeSupport);
626 static void CalculateSwathWidth(
628 int NumberOfActivePlanes,
629 enum source_format_class SourcePixelFormat[],
630 enum scan_direction_class SourceScan[],
632 int ViewportHeight[],
635 int SurfaceHeightY[],
636 int SurfaceHeightC[],
637 enum odm_combine_mode ODMCombineEnabled[],
640 int Read256BytesBlockHeightY[],
641 int Read256BytesBlockHeightC[],
642 int Read256BytesBlockWidthY[],
643 int Read256BytesBlockWidthC[],
644 int BlendingAndTiming[],
648 double SwathWidthSingleDPPY[],
649 double SwathWidthSingleDPPC[],
650 double SwathWidthY[],
651 double SwathWidthC[],
652 int MaximumSwathHeightY[],
653 int MaximumSwathHeightC[],
654 int swath_width_luma_ub[],
655 int swath_width_chroma_ub[]);
657 static double CalculateExtraLatency(
658 int RoundTripPingLatencyCycles,
661 int TotalNumberOfActiveDPP,
662 int PixelChunkSizeInKByte,
663 int TotalNumberOfDCCActiveDPP,
668 int NumberOfActivePlanes,
670 int dpte_group_bytes[],
671 double HostVMInefficiencyFactor,
672 double HostVMMinPageSize,
673 int HostVMMaxNonCachedPageTableLevels);
675 static double CalculateExtraLatencyBytes(
677 int TotalNumberOfActiveDPP,
678 int PixelChunkSizeInKByte,
679 int TotalNumberOfDCCActiveDPP,
683 int NumberOfActivePlanes,
685 int dpte_group_bytes[],
686 double HostVMInefficiencyFactor,
687 double HostVMMinPageSize,
688 int HostVMMaxNonCachedPageTableLevels);
690 static double CalculateUrgentLatency(
691 double UrgentLatencyPixelDataOnly,
692 double UrgentLatencyPixelMixedWithVMData,
693 double UrgentLatencyVMDataOnly,
694 bool DoUrgentLatencyAdjustment,
695 double UrgentLatencyAdjustmentFabricClockComponent,
696 double UrgentLatencyAdjustmentFabricClockReference,
697 double FabricClockSingle);
699 static void CalculateUnboundedRequestAndCompressedBufferSize(
700 unsigned int DETBufferSizeInKByte,
701 int ConfigReturnBufferSizeInKByte,
702 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
706 int CompressedBufferSegmentSizeInkByteFinal,
707 enum output_encoder_class *Output,
708 bool *UnboundedRequestEnabled,
709 int *CompressedBufferSizeInkByte);
711 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
713 void dml31_recalculate(struct display_mode_lib *mode_lib)
715 ModeSupportAndSystemConfiguration(mode_lib);
716 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
717 DisplayPipeConfiguration(mode_lib);
718 #ifdef __DML_VBA_DEBUG__
719 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
721 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
724 static unsigned int dscceComputeDelay(
727 unsigned int sliceWidth,
728 unsigned int numSlices,
729 enum output_format_class pixelFormat,
730 enum output_encoder_class Output)
732 // valid bpc = source bits per component in the set of {8, 10, 12}
733 // valid bpp = increments of 1/16 of a bit
734 // min = 6/7/8 in N420/N422/444, respectively
735 // max = such that compression is 1:1
736 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
737 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
738 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
741 unsigned int rcModelSize = 8192;
743 // N422/N420 operate at 2 pixels per clock
744 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
746 if (pixelFormat == dm_420)
748 else if (pixelFormat == dm_444)
750 else if (pixelFormat == dm_n422)
752 // #all other modes operate at 1 pixel per clock
756 //initial transmit delay as per PPS
757 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
767 //divide by pixel per cycle to compute slice width as seen by DSC
768 w = sliceWidth / pixelsPerClock;
770 //422 mode has an additional cycle of delay
771 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
776 //main calculation for the dscce
777 ix = initalXmitDelay + 45;
782 ax = (a + 2) / 3 + D + 6 + 1;
783 L = (ax + wx - 1) / wx;
784 if ((ix % w) == 0 && P != 0)
788 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
790 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
791 pixels = Delay * 3 * pixelsPerClock;
795 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
797 unsigned int Delay = 0;
799 if (pixelFormat == dm_420) {
804 // dscc - input deserializer
806 // dscc gets pixels every other cycle
808 // dscc - input cdc fifo
810 // dscc gets pixels every other cycle
812 // dscc - cdc uncertainty
814 // dscc - output cdc fifo
816 // dscc gets pixels every other cycle
818 // dscc - cdc uncertainty
820 // dscc - output serializer
824 } else if (pixelFormat == dm_n422) {
829 // dscc - input deserializer
831 // dscc - input cdc fifo
833 // dscc - cdc uncertainty
835 // dscc - output cdc fifo
837 // dscc - cdc uncertainty
839 // dscc - output serializer
848 // dscc - input deserializer
850 // dscc - input cdc fifo
852 // dscc - cdc uncertainty
854 // dscc - output cdc fifo
856 // dscc - output serializer
858 // dscc - cdc uncertainty
867 static bool CalculatePrefetchSchedule(
868 struct display_mode_lib *mode_lib,
869 double HostVMInefficiencyFactor,
871 unsigned int DSCDelay,
872 double DPPCLKDelaySubtotalPlusCNVCFormater,
873 double DPPCLKDelaySCL,
874 double DPPCLKDelaySCLLBOnly,
875 double DPPCLKDelayCNVCCursor,
876 double DISPCLKDelaySubtotal,
877 unsigned int DPP_RECOUT_WIDTH,
878 enum output_format_class OutputFormat,
879 unsigned int MaxInterDCNTileRepeaters,
880 unsigned int VStartup,
881 unsigned int MaxVStartup,
882 unsigned int GPUVMPageTableLevels,
885 unsigned int HostVMMaxNonCachedPageTableLevels,
886 double HostVMMinPageSize,
887 bool DynamicMetadataEnable,
888 bool DynamicMetadataVMEnabled,
889 int DynamicMetadataLinesBeforeActiveRequired,
890 unsigned int DynamicMetadataTransmittedBytes,
891 double UrgentLatency,
892 double UrgentExtraLatency,
894 unsigned int PDEAndMetaPTEBytesFrame,
895 unsigned int MetaRowByte,
896 unsigned int PixelPTEBytesPerRow,
897 double PrefetchSourceLinesY,
898 unsigned int SwathWidthY,
899 double VInitPreFillY,
900 unsigned int MaxNumSwathY,
901 double PrefetchSourceLinesC,
902 unsigned int SwathWidthC,
903 double VInitPreFillC,
904 unsigned int MaxNumSwathC,
905 int swath_width_luma_ub,
906 int swath_width_chroma_ub,
907 unsigned int SwathHeightY,
908 unsigned int SwathHeightC,
910 double *DSTXAfterScaler,
911 double *DSTYAfterScaler,
912 double *DestinationLinesForPrefetch,
913 double *PrefetchBandwidth,
914 double *DestinationLinesToRequestVMInVBlank,
915 double *DestinationLinesToRequestRowInVBlank,
916 double *VRatioPrefetchY,
917 double *VRatioPrefetchC,
918 double *RequiredPrefetchPixDataBWLuma,
919 double *RequiredPrefetchPixDataBWChroma,
920 bool *NotEnoughTimeForDynamicMetadata,
922 double *prefetch_vmrow_bw,
926 int *VUpdateOffsetPix,
927 double *VUpdateWidthPix,
928 double *VReadyOffsetPix)
930 bool MyError = false;
931 unsigned int DPPCycles, DISPCLKCycles;
932 double DSTTotalPixelsAfterScaler;
934 double dst_y_prefetch_equ;
936 double prefetch_bw_oto;
937 double prefetch_bw_pr;
940 double Tvm_oto_lines;
941 double Tr0_oto_lines;
942 double dst_y_prefetch_oto;
943 double TimeForFetchingMetaPTE = 0;
944 double TimeForFetchingRowInVBlank = 0;
945 double LinesToRequestPrefetchPixelData = 0;
946 unsigned int HostVMDynamicLevelsTrips;
950 double Tvm_trips_rounded;
951 double Tr0_trips_rounded;
954 double prefetch_bw_equ;
960 double prefetch_sw_bytes;
963 int max_vratio_pre = 4;
969 if (GPUVMEnable == true && HostVMEnable == true) {
970 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
972 HostVMDynamicLevelsTrips = 0;
974 #ifdef __DML_VBA_DEBUG__
975 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
977 CalculateVupdateAndDynamicMetadataParameters(
978 MaxInterDCNTileRepeaters,
981 myPipe->DCFCLKDeepSleep,
985 DynamicMetadataTransmittedBytes,
986 DynamicMetadataLinesBeforeActiveRequired,
987 myPipe->InterlaceEnable,
988 myPipe->ProgressiveToInterlaceUnitInOPP,
997 LineTime = myPipe->HTotal / myPipe->PixelClock;
998 trip_to_mem = UrgentLatency;
999 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1001 #ifdef __DML_VBA_ALLOW_DELTA__
1002 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1004 if (DynamicMetadataVMEnabled == true) {
1006 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1008 *Tdmdl = TWait + UrgentExtraLatency;
1011 #ifdef __DML_VBA_ALLOW_DELTA__
1012 if (DynamicMetadataEnable == false) {
1017 if (DynamicMetadataEnable == true) {
1018 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1019 *NotEnoughTimeForDynamicMetadata = true;
1020 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1021 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
1022 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
1023 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
1024 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
1026 *NotEnoughTimeForDynamicMetadata = false;
1029 *NotEnoughTimeForDynamicMetadata = false;
1032 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1034 if (myPipe->ScalerEnabled)
1035 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1037 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1039 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1041 DISPCLKCycles = DISPCLKDelaySubtotal;
1043 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1046 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1048 #ifdef __DML_VBA_DEBUG__
1049 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1050 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1051 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1052 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1053 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1054 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1055 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1056 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1059 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1061 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1062 *DSTYAfterScaler = 1;
1064 *DSTYAfterScaler = 0;
1066 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1067 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1068 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1070 #ifdef __DML_VBA_DEBUG__
1071 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1076 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1077 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1078 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1080 #ifdef __DML_VBA_ALLOW_DELTA__
1081 if (!myPipe->DCCEnable) {
1083 Tr0_trips_rounded = 0.0;
1089 Tvm_trips_rounded = 0.0;
1093 if (GPUVMPageTableLevels >= 3) {
1094 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1098 } else if (!myPipe->DCCEnable) {
1101 *Tno_bw = LineTime / 4;
1104 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1105 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1107 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1109 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
1110 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1111 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1112 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1113 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1115 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1116 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1117 Tsw_oto = Lsw_oto * LineTime;
1119 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1121 #ifdef __DML_VBA_DEBUG__
1122 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1123 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1124 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1125 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1126 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1127 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1130 if (GPUVMEnable == true)
1131 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1133 Tvm_oto = LineTime / 4.0;
1135 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1136 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1140 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1143 #ifdef __DML_VBA_DEBUG__
1144 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1145 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1146 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1147 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1148 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1149 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1150 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1151 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1152 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1155 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1156 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1157 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1158 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1159 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1160 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1162 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1164 if (prefetch_sw_bytes < dep_bytes)
1165 prefetch_sw_bytes = 2 * dep_bytes;
1167 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1168 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1169 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1170 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1171 dml_print("DML: LineTime: %f\n", LineTime);
1172 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1174 dml_print("DML: LineTime: %f\n", LineTime);
1175 dml_print("DML: VStartup: %d\n", VStartup);
1176 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1177 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1178 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1179 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1180 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1181 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1182 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1183 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1184 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1185 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1186 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1188 *PrefetchBandwidth = 0;
1189 *DestinationLinesToRequestVMInVBlank = 0;
1190 *DestinationLinesToRequestRowInVBlank = 0;
1191 *VRatioPrefetchY = 0;
1192 *VRatioPrefetchC = 0;
1193 *RequiredPrefetchPixDataBWLuma = 0;
1194 if (dst_y_prefetch_equ > 1) {
1195 double PrefetchBandwidth1;
1196 double PrefetchBandwidth2;
1197 double PrefetchBandwidth3;
1198 double PrefetchBandwidth4;
1200 if (Tpre_rounded - *Tno_bw > 0) {
1201 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1202 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1203 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1205 PrefetchBandwidth1 = 0;
1208 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1209 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1210 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1213 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1214 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1216 PrefetchBandwidth2 = 0;
1218 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1219 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1220 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1221 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1223 PrefetchBandwidth3 = 0;
1226 #ifdef __DML_VBA_DEBUG__
1227 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1228 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1229 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1231 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1232 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1233 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1236 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1237 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1239 PrefetchBandwidth4 = 0;
1246 if (PrefetchBandwidth1 > 0) {
1247 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1248 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1257 if (PrefetchBandwidth2 > 0) {
1258 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1259 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1268 if (PrefetchBandwidth3 > 0) {
1269 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1270 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1280 prefetch_bw_equ = PrefetchBandwidth1;
1281 } else if (Case2OK) {
1282 prefetch_bw_equ = PrefetchBandwidth2;
1283 } else if (Case3OK) {
1284 prefetch_bw_equ = PrefetchBandwidth3;
1286 prefetch_bw_equ = PrefetchBandwidth4;
1289 #ifdef __DML_VBA_DEBUG__
1290 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1291 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1292 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1293 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1296 if (prefetch_bw_equ > 0) {
1297 if (GPUVMEnable == true) {
1298 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1300 Tvm_equ = LineTime / 4;
1303 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1305 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1307 (LineTime - Tvm_equ) / 2,
1310 Tr0_equ = (LineTime - Tvm_equ) / 2;
1315 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1319 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1320 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1321 TimeForFetchingMetaPTE = Tvm_oto;
1322 TimeForFetchingRowInVBlank = Tr0_oto;
1323 *PrefetchBandwidth = prefetch_bw_oto;
1325 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1326 TimeForFetchingMetaPTE = Tvm_equ;
1327 TimeForFetchingRowInVBlank = Tr0_equ;
1328 *PrefetchBandwidth = prefetch_bw_equ;
1331 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1333 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1335 #ifdef __DML_VBA_ALLOW_DELTA__
1336 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1337 // See note above dated 5/30/2018
1338 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1339 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1341 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1344 #ifdef __DML_VBA_DEBUG__
1345 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1346 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1347 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1348 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1349 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1350 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1351 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1354 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1356 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1357 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1358 #ifdef __DML_VBA_DEBUG__
1359 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1360 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1361 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1363 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1364 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1365 *VRatioPrefetchY = dml_max(
1366 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1367 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1368 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1371 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1372 *VRatioPrefetchY = 0;
1374 #ifdef __DML_VBA_DEBUG__
1375 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1376 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1377 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1381 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1382 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1384 #ifdef __DML_VBA_DEBUG__
1385 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1386 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1387 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1389 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1390 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1391 *VRatioPrefetchC = dml_max(
1393 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1394 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1397 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1398 *VRatioPrefetchC = 0;
1400 #ifdef __DML_VBA_DEBUG__
1401 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1402 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1403 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1407 #ifdef __DML_VBA_DEBUG__
1408 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1409 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1410 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1413 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1415 #ifdef __DML_VBA_DEBUG__
1416 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1419 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1423 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1424 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1425 *VRatioPrefetchY = 0;
1426 *VRatioPrefetchC = 0;
1427 *RequiredPrefetchPixDataBWLuma = 0;
1428 *RequiredPrefetchPixDataBWChroma = 0;
1432 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1433 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1434 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1435 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1437 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1438 (double) LinesToRequestPrefetchPixelData * LineTime);
1439 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1440 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1441 (double) myPipe->HTotal)) * LineTime);
1442 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1443 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1444 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1445 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1446 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1450 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1454 double prefetch_vm_bw;
1455 double prefetch_row_bw;
1457 if (PDEAndMetaPTEBytesFrame == 0) {
1459 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1460 #ifdef __DML_VBA_DEBUG__
1461 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1462 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1463 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1464 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1466 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1467 #ifdef __DML_VBA_DEBUG__
1468 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1473 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1476 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1477 prefetch_row_bw = 0;
1478 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1479 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1481 #ifdef __DML_VBA_DEBUG__
1482 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1483 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1484 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1485 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1488 prefetch_row_bw = 0;
1490 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1493 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1497 *PrefetchBandwidth = 0;
1498 TimeForFetchingMetaPTE = 0;
1499 TimeForFetchingRowInVBlank = 0;
1500 *DestinationLinesToRequestVMInVBlank = 0;
1501 *DestinationLinesToRequestRowInVBlank = 0;
1502 *DestinationLinesForPrefetch = 0;
1503 LinesToRequestPrefetchPixelData = 0;
1504 *VRatioPrefetchY = 0;
1505 *VRatioPrefetchC = 0;
1506 *RequiredPrefetchPixDataBWLuma = 0;
1507 *RequiredPrefetchPixDataBWChroma = 0;
1513 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1515 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1518 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1520 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1523 static void CalculateDCCConfiguration(
1525 bool DCCProgrammingAssumesScanDirectionUnknown,
1526 enum source_format_class SourcePixelFormat,
1527 unsigned int SurfaceWidthLuma,
1528 unsigned int SurfaceWidthChroma,
1529 unsigned int SurfaceHeightLuma,
1530 unsigned int SurfaceHeightChroma,
1531 double DETBufferSize,
1532 unsigned int RequestHeight256ByteLuma,
1533 unsigned int RequestHeight256ByteChroma,
1534 enum dm_swizzle_mode TilingFormat,
1535 unsigned int BytePerPixelY,
1536 unsigned int BytePerPixelC,
1537 double BytePerPixelDETY,
1538 double BytePerPixelDETC,
1539 enum scan_direction_class ScanOrientation,
1540 unsigned int *MaxUncompressedBlockLuma,
1541 unsigned int *MaxUncompressedBlockChroma,
1542 unsigned int *MaxCompressedBlockLuma,
1543 unsigned int *MaxCompressedBlockChroma,
1544 unsigned int *IndependentBlockLuma,
1545 unsigned int *IndependentBlockChroma)
1554 double detile_buf_vp_horz_limit;
1555 double detile_buf_vp_vert_limit;
1557 int MAS_vp_horz_limit;
1558 int MAS_vp_vert_limit;
1559 int max_vp_horz_width;
1560 int max_vp_vert_height;
1561 int eff_surf_width_l;
1562 int eff_surf_width_c;
1563 int eff_surf_height_l;
1564 int eff_surf_height_c;
1566 int full_swath_bytes_horz_wc_l;
1567 int full_swath_bytes_horz_wc_c;
1568 int full_swath_bytes_vert_wc_l;
1569 int full_swath_bytes_vert_wc_c;
1570 int req128_horz_wc_l;
1571 int req128_horz_wc_c;
1572 int req128_vert_wc_l;
1573 int req128_vert_wc_c;
1574 int segment_order_horz_contiguous_luma;
1575 int segment_order_horz_contiguous_chroma;
1576 int segment_order_vert_contiguous_luma;
1577 int segment_order_vert_contiguous_chroma;
1580 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1582 RequestType RequestLuma;
1583 RequestType RequestChroma;
1585 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1591 if (BytePerPixelY == 1)
1593 if (BytePerPixelC == 1)
1595 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1597 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1600 if (BytePerPixelC == 0) {
1601 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1602 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1603 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1605 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1606 detile_buf_vp_horz_limit = (double) swath_buf_size
1607 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1608 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1609 detile_buf_vp_vert_limit = (double) swath_buf_size
1610 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1613 if (SourcePixelFormat == dm_420_10) {
1614 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1615 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1618 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1619 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1621 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1622 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1623 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1624 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1625 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1626 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1627 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1628 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1630 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1631 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1632 if (BytePerPixelC > 0) {
1633 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1634 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1636 full_swath_bytes_horz_wc_c = 0;
1637 full_swath_bytes_vert_wc_c = 0;
1640 if (SourcePixelFormat == dm_420_10) {
1641 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1642 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1643 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1644 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1647 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1648 req128_horz_wc_l = 0;
1649 req128_horz_wc_c = 0;
1650 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1651 req128_horz_wc_l = 0;
1652 req128_horz_wc_c = 1;
1653 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1654 req128_horz_wc_l = 1;
1655 req128_horz_wc_c = 0;
1657 req128_horz_wc_l = 1;
1658 req128_horz_wc_c = 1;
1661 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1662 req128_vert_wc_l = 0;
1663 req128_vert_wc_c = 0;
1664 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1665 req128_vert_wc_l = 0;
1666 req128_vert_wc_c = 1;
1667 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1668 req128_vert_wc_l = 1;
1669 req128_vert_wc_c = 0;
1671 req128_vert_wc_l = 1;
1672 req128_vert_wc_c = 1;
1675 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1676 segment_order_horz_contiguous_luma = 0;
1678 segment_order_horz_contiguous_luma = 1;
1680 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1681 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1682 segment_order_vert_contiguous_luma = 0;
1684 segment_order_vert_contiguous_luma = 1;
1686 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1687 segment_order_horz_contiguous_chroma = 0;
1689 segment_order_horz_contiguous_chroma = 1;
1691 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1692 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1693 segment_order_vert_contiguous_chroma = 0;
1695 segment_order_vert_contiguous_chroma = 1;
1698 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1699 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1700 RequestLuma = REQ_256Bytes;
1701 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1702 RequestLuma = REQ_128BytesNonContiguous;
1704 RequestLuma = REQ_128BytesContiguous;
1706 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1707 RequestChroma = REQ_256Bytes;
1708 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1709 RequestChroma = REQ_128BytesNonContiguous;
1711 RequestChroma = REQ_128BytesContiguous;
1713 } else if (ScanOrientation != dm_vert) {
1714 if (req128_horz_wc_l == 0) {
1715 RequestLuma = REQ_256Bytes;
1716 } else if (segment_order_horz_contiguous_luma == 0) {
1717 RequestLuma = REQ_128BytesNonContiguous;
1719 RequestLuma = REQ_128BytesContiguous;
1721 if (req128_horz_wc_c == 0) {
1722 RequestChroma = REQ_256Bytes;
1723 } else if (segment_order_horz_contiguous_chroma == 0) {
1724 RequestChroma = REQ_128BytesNonContiguous;
1726 RequestChroma = REQ_128BytesContiguous;
1729 if (req128_vert_wc_l == 0) {
1730 RequestLuma = REQ_256Bytes;
1731 } else if (segment_order_vert_contiguous_luma == 0) {
1732 RequestLuma = REQ_128BytesNonContiguous;
1734 RequestLuma = REQ_128BytesContiguous;
1736 if (req128_vert_wc_c == 0) {
1737 RequestChroma = REQ_256Bytes;
1738 } else if (segment_order_vert_contiguous_chroma == 0) {
1739 RequestChroma = REQ_128BytesNonContiguous;
1741 RequestChroma = REQ_128BytesContiguous;
1745 if (RequestLuma == REQ_256Bytes) {
1746 *MaxUncompressedBlockLuma = 256;
1747 *MaxCompressedBlockLuma = 256;
1748 *IndependentBlockLuma = 0;
1749 } else if (RequestLuma == REQ_128BytesContiguous) {
1750 *MaxUncompressedBlockLuma = 256;
1751 *MaxCompressedBlockLuma = 128;
1752 *IndependentBlockLuma = 128;
1754 *MaxUncompressedBlockLuma = 256;
1755 *MaxCompressedBlockLuma = 64;
1756 *IndependentBlockLuma = 64;
1759 if (RequestChroma == REQ_256Bytes) {
1760 *MaxUncompressedBlockChroma = 256;
1761 *MaxCompressedBlockChroma = 256;
1762 *IndependentBlockChroma = 0;
1763 } else if (RequestChroma == REQ_128BytesContiguous) {
1764 *MaxUncompressedBlockChroma = 256;
1765 *MaxCompressedBlockChroma = 128;
1766 *IndependentBlockChroma = 128;
1768 *MaxUncompressedBlockChroma = 256;
1769 *MaxCompressedBlockChroma = 64;
1770 *IndependentBlockChroma = 64;
1773 if (DCCEnabled != true || BytePerPixelC == 0) {
1774 *MaxUncompressedBlockChroma = 0;
1775 *MaxCompressedBlockChroma = 0;
1776 *IndependentBlockChroma = 0;
1779 if (DCCEnabled != true) {
1780 *MaxUncompressedBlockLuma = 0;
1781 *MaxCompressedBlockLuma = 0;
1782 *IndependentBlockLuma = 0;
1786 static double CalculatePrefetchSourceLines(
1787 struct display_mode_lib *mode_lib,
1791 bool ProgressiveToInterlaceUnitInOPP,
1792 unsigned int SwathHeight,
1793 unsigned int ViewportYStart,
1794 double *VInitPreFill,
1795 unsigned int *MaxNumSwath)
1797 struct vba_vars_st *v = &mode_lib->vba;
1798 unsigned int MaxPartialSwath;
1800 if (ProgressiveToInterlaceUnitInOPP)
1801 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1803 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1805 if (!v->IgnoreViewportPositioning) {
1807 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1809 if (*VInitPreFill > 1.0)
1810 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1812 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1813 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1817 if (ViewportYStart != 0)
1818 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1820 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1822 if (*VInitPreFill > 1.0)
1823 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1825 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1828 #ifdef __DML_VBA_DEBUG__
1829 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1830 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1831 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1832 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1833 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1834 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1835 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1836 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1837 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1839 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1842 static unsigned int CalculateVMAndRowBytes(
1843 struct display_mode_lib *mode_lib,
1845 unsigned int BlockHeight256Bytes,
1846 unsigned int BlockWidth256Bytes,
1847 enum source_format_class SourcePixelFormat,
1848 unsigned int SurfaceTiling,
1849 unsigned int BytePerPixel,
1850 enum scan_direction_class ScanDirection,
1851 unsigned int SwathWidth,
1852 unsigned int ViewportHeight,
1855 unsigned int HostVMMaxNonCachedPageTableLevels,
1856 unsigned int GPUVMMinPageSize,
1857 unsigned int HostVMMinPageSize,
1858 unsigned int PTEBufferSizeInRequests,
1860 unsigned int DCCMetaPitch,
1861 unsigned int *MacroTileWidth,
1862 unsigned int *MetaRowByte,
1863 unsigned int *PixelPTEBytesPerRow,
1864 bool *PTEBufferSizeNotExceeded,
1865 int *dpte_row_width_ub,
1866 unsigned int *dpte_row_height,
1867 unsigned int *MetaRequestWidth,
1868 unsigned int *MetaRequestHeight,
1869 unsigned int *meta_row_width,
1870 unsigned int *meta_row_height,
1871 int *vm_group_bytes,
1872 unsigned int *dpte_group_bytes,
1873 unsigned int *PixelPTEReqWidth,
1874 unsigned int *PixelPTEReqHeight,
1875 unsigned int *PTERequestSize,
1876 int *DPDE0BytesFrame,
1877 int *MetaPTEBytesFrame)
1879 struct vba_vars_st *v = &mode_lib->vba;
1880 unsigned int MPDEBytesFrame;
1881 unsigned int DCCMetaSurfaceBytes;
1882 unsigned int MacroTileSizeBytes;
1883 unsigned int MacroTileHeight;
1884 unsigned int ExtraDPDEBytesFrame;
1885 unsigned int PDEAndMetaPTEBytesFrame;
1886 unsigned int PixelPTEReqHeightPTEs = 0;
1887 unsigned int HostVMDynamicLevels = 0;
1888 double FractionOfPTEReturnDrop;
1890 if (GPUVMEnable == true && HostVMEnable == true) {
1891 if (HostVMMinPageSize < 2048) {
1892 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1893 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1894 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1896 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1900 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1901 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1902 if (ScanDirection != dm_vert) {
1903 *meta_row_height = *MetaRequestHeight;
1904 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1905 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1907 *meta_row_height = *MetaRequestWidth;
1908 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1909 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1911 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1912 if (GPUVMEnable == true) {
1913 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1914 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1916 *MetaPTEBytesFrame = 0;
1920 if (DCCEnable != true) {
1921 *MetaPTEBytesFrame = 0;
1926 if (SurfaceTiling == dm_sw_linear) {
1927 MacroTileSizeBytes = 256;
1928 MacroTileHeight = BlockHeight256Bytes;
1930 MacroTileSizeBytes = 65536;
1931 MacroTileHeight = 16 * BlockHeight256Bytes;
1933 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1935 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1936 if (ScanDirection != dm_vert) {
1937 *DPDE0BytesFrame = 64
1939 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1943 *DPDE0BytesFrame = 64
1945 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1949 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1951 *DPDE0BytesFrame = 0;
1952 ExtraDPDEBytesFrame = 0;
1955 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1957 #ifdef __DML_VBA_DEBUG__
1958 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1959 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1960 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1961 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1962 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1965 if (HostVMEnable == true) {
1966 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1968 #ifdef __DML_VBA_DEBUG__
1969 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1972 if (SurfaceTiling == dm_sw_linear) {
1973 PixelPTEReqHeightPTEs = 1;
1974 *PixelPTEReqHeight = 1;
1975 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1976 *PTERequestSize = 64;
1977 FractionOfPTEReturnDrop = 0;
1978 } else if (MacroTileSizeBytes == 4096) {
1979 PixelPTEReqHeightPTEs = 1;
1980 *PixelPTEReqHeight = MacroTileHeight;
1981 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1982 *PTERequestSize = 64;
1983 if (ScanDirection != dm_vert)
1984 FractionOfPTEReturnDrop = 0;
1986 FractionOfPTEReturnDrop = 7 / 8;
1987 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1988 PixelPTEReqHeightPTEs = 16;
1989 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1990 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1991 *PTERequestSize = 128;
1992 FractionOfPTEReturnDrop = 0;
1994 PixelPTEReqHeightPTEs = 1;
1995 *PixelPTEReqHeight = MacroTileHeight;
1996 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1997 *PTERequestSize = 64;
1998 FractionOfPTEReturnDrop = 0;
2001 if (SurfaceTiling == dm_sw_linear) {
2002 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2003 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2004 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2005 } else if (ScanDirection != dm_vert) {
2006 *dpte_row_height = *PixelPTEReqHeight;
2007 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2008 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2010 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
2011 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
2012 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2015 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
2016 *PTEBufferSizeNotExceeded = true;
2018 *PTEBufferSizeNotExceeded = false;
2021 if (GPUVMEnable != true) {
2022 *PixelPTEBytesPerRow = 0;
2023 *PTEBufferSizeNotExceeded = true;
2026 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2028 if (HostVMEnable == true) {
2029 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2032 if (HostVMEnable == true) {
2033 *vm_group_bytes = 512;
2034 *dpte_group_bytes = 512;
2035 } else if (GPUVMEnable == true) {
2036 *vm_group_bytes = 2048;
2037 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2038 *dpte_group_bytes = 512;
2040 *dpte_group_bytes = 2048;
2043 *vm_group_bytes = 0;
2044 *dpte_group_bytes = 0;
2046 return PDEAndMetaPTEBytesFrame;
2049 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2051 struct vba_vars_st *v = &mode_lib->vba;
2053 double HostVMInefficiencyFactor = 1.0;
2054 bool NoChromaPlanes = true;
2056 double VMDataOnlyReturnBW;
2057 double MaxTotalRDBandwidth = 0;
2058 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2060 v->WritebackDISPCLK = 0.0;
2061 v->DISPCLKWithRamping = 0;
2062 v->DISPCLKWithoutRamping = 0;
2063 v->GlobalDPPCLK = 0.0;
2064 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2066 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2067 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2068 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2069 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2070 if (v->HostVMEnable != true) {
2071 v->ReturnBW = dml_min(
2072 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2073 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2075 v->ReturnBW = dml_min(
2076 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2077 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2080 /* End DAL custom code */
2082 // DISPCLK and DPPCLK Calculation
2084 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2085 if (v->WritebackEnable[k]) {
2086 v->WritebackDISPCLK = dml_max(
2087 v->WritebackDISPCLK,
2088 dml31_CalculateWriteBackDISPCLK(
2089 v->WritebackPixelFormat[k],
2091 v->WritebackHRatio[k],
2092 v->WritebackVRatio[k],
2093 v->WritebackHTaps[k],
2094 v->WritebackVTaps[k],
2095 v->WritebackSourceWidth[k],
2096 v->WritebackDestinationWidth[k],
2098 v->WritebackLineBufferSize));
2102 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2103 if (v->HRatio[k] > 1) {
2104 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2105 v->MaxDCHUBToPSCLThroughput,
2106 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2108 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2111 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2113 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2114 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2116 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2117 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2120 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2121 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2122 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2123 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2125 if (v->HRatioChroma[k] > 1) {
2126 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2127 v->MaxDCHUBToPSCLThroughput,
2128 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2130 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2132 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2134 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2135 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2138 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2139 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2142 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2146 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2147 if (v->BlendingAndTiming[k] != k)
2149 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2150 v->DISPCLKWithRamping = dml_max(
2151 v->DISPCLKWithRamping,
2152 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2153 * (1 + v->DISPCLKRampingMargin / 100));
2154 v->DISPCLKWithoutRamping = dml_max(
2155 v->DISPCLKWithoutRamping,
2156 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2157 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2158 v->DISPCLKWithRamping = dml_max(
2159 v->DISPCLKWithRamping,
2160 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2161 * (1 + v->DISPCLKRampingMargin / 100));
2162 v->DISPCLKWithoutRamping = dml_max(
2163 v->DISPCLKWithoutRamping,
2164 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2166 v->DISPCLKWithRamping = dml_max(
2167 v->DISPCLKWithRamping,
2168 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2169 v->DISPCLKWithoutRamping = dml_max(
2170 v->DISPCLKWithoutRamping,
2171 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2175 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2176 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2178 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2179 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2180 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2181 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2182 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2183 v->DISPCLKDPPCLKVCOSpeed);
2184 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2185 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2186 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2187 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2189 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2191 v->DISPCLK = v->DISPCLK_calculated;
2192 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2194 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2195 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2196 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2198 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2199 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2200 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2201 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2204 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2205 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2208 // Urgent and B P-State/DRAM Clock Change Watermark
2209 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2210 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2212 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2213 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2214 v->SourcePixelFormat[k],
2215 v->SurfaceTiling[k],
2216 &v->BytePerPixelY[k],
2217 &v->BytePerPixelC[k],
2218 &v->BytePerPixelDETY[k],
2219 &v->BytePerPixelDETC[k],
2220 &v->BlockHeight256BytesY[k],
2221 &v->BlockHeight256BytesC[k],
2222 &v->BlockWidth256BytesY[k],
2223 &v->BlockWidth256BytesC[k]);
2226 CalculateSwathWidth(
2228 v->NumberOfActivePlanes,
2229 v->SourcePixelFormat,
2237 v->ODMCombineEnabled,
2240 v->BlockHeight256BytesY,
2241 v->BlockHeight256BytesC,
2242 v->BlockWidth256BytesY,
2243 v->BlockWidth256BytesC,
2244 v->BlendingAndTiming,
2248 v->SwathWidthSingleDPPY,
2249 v->SwathWidthSingleDPPC,
2254 v->swath_width_luma_ub,
2255 v->swath_width_chroma_ub);
2257 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2258 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2260 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2261 * v->VRatioChroma[k];
2262 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2265 // DCFCLK Deep Sleep
2266 CalculateDCFCLKDeepSleep(
2268 v->NumberOfActivePlanes,
2279 v->PSCL_THROUGHPUT_LUMA,
2280 v->PSCL_THROUGHPUT_CHROMA,
2282 v->ReadBandwidthPlaneLuma,
2283 v->ReadBandwidthPlaneChroma,
2285 &v->DCFCLKDeepSleep);
2288 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2289 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2290 v->DSCCLK_calculated[k] = 0.0;
2292 if (v->OutputFormat[k] == dm_420)
2293 v->DSCFormatFactor = 2;
2294 else if (v->OutputFormat[k] == dm_444)
2295 v->DSCFormatFactor = 1;
2296 else if (v->OutputFormat[k] == dm_n422)
2297 v->DSCFormatFactor = 2;
2299 v->DSCFormatFactor = 1;
2300 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2301 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2302 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2303 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2304 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2305 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2307 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2308 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2313 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2314 double BPP = v->OutputBpp[k];
2316 if (v->DSCEnabled[k] && BPP != 0) {
2317 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2318 v->DSCDelay[k] = dscceComputeDelay(
2319 v->DSCInputBitPerComponent[k],
2321 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2322 v->NumberOfDSCSlices[k],
2324 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2325 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2327 * (dscceComputeDelay(
2328 v->DSCInputBitPerComponent[k],
2330 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2331 v->NumberOfDSCSlices[k] / 2.0,
2333 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2336 * (dscceComputeDelay(
2337 v->DSCInputBitPerComponent[k],
2339 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2340 v->NumberOfDSCSlices[k] / 4.0,
2342 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2344 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2350 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2351 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2352 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2353 v->DSCDelay[k] = v->DSCDelay[j];
2356 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2357 unsigned int PDEAndMetaPTEBytesFrameY;
2358 unsigned int PixelPTEBytesPerRowY;
2359 unsigned int MetaRowByteY;
2360 unsigned int MetaRowByteC;
2361 unsigned int PDEAndMetaPTEBytesFrameC;
2362 unsigned int PixelPTEBytesPerRowC;
2363 bool PTEBufferSizeNotExceededY;
2364 bool PTEBufferSizeNotExceededC;
2366 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2367 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2368 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2369 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2370 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2372 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2373 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2376 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2379 v->BlockHeight256BytesC[k],
2380 v->BlockWidth256BytesC[k],
2381 v->SourcePixelFormat[k],
2382 v->SurfaceTiling[k],
2383 v->BytePerPixelC[k],
2386 v->ViewportHeightChroma[k],
2389 v->HostVMMaxNonCachedPageTableLevels,
2390 v->GPUVMMinPageSize,
2391 v->HostVMMinPageSize,
2392 v->PTEBufferSizeInRequestsForChroma,
2394 v->DCCMetaPitchC[k],
2395 &v->MacroTileWidthC[k],
2397 &PixelPTEBytesPerRowC,
2398 &PTEBufferSizeNotExceededC,
2399 &v->dpte_row_width_chroma_ub[k],
2400 &v->dpte_row_height_chroma[k],
2401 &v->meta_req_width_chroma[k],
2402 &v->meta_req_height_chroma[k],
2403 &v->meta_row_width_chroma[k],
2404 &v->meta_row_height_chroma[k],
2407 &v->PixelPTEReqWidthC[k],
2408 &v->PixelPTEReqHeightC[k],
2409 &v->PTERequestSizeC[k],
2410 &v->dpde0_bytes_per_frame_ub_c[k],
2411 &v->meta_pte_bytes_per_frame_ub_c[k]);
2413 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2418 v->ProgressiveToInterlaceUnitInOPP,
2420 v->ViewportYStartC[k],
2421 &v->VInitPreFillC[k],
2422 &v->MaxNumSwathC[k]);
2424 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2425 v->PTEBufferSizeInRequestsForChroma = 0;
2426 PixelPTEBytesPerRowC = 0;
2427 PDEAndMetaPTEBytesFrameC = 0;
2429 v->MaxNumSwathC[k] = 0;
2430 v->PrefetchSourceLinesC[k] = 0;
2433 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2436 v->BlockHeight256BytesY[k],
2437 v->BlockWidth256BytesY[k],
2438 v->SourcePixelFormat[k],
2439 v->SurfaceTiling[k],
2440 v->BytePerPixelY[k],
2443 v->ViewportHeight[k],
2446 v->HostVMMaxNonCachedPageTableLevels,
2447 v->GPUVMMinPageSize,
2448 v->HostVMMinPageSize,
2449 v->PTEBufferSizeInRequestsForLuma,
2451 v->DCCMetaPitchY[k],
2452 &v->MacroTileWidthY[k],
2454 &PixelPTEBytesPerRowY,
2455 &PTEBufferSizeNotExceededY,
2456 &v->dpte_row_width_luma_ub[k],
2457 &v->dpte_row_height[k],
2458 &v->meta_req_width[k],
2459 &v->meta_req_height[k],
2460 &v->meta_row_width[k],
2461 &v->meta_row_height[k],
2462 &v->vm_group_bytes[k],
2463 &v->dpte_group_bytes[k],
2464 &v->PixelPTEReqWidthY[k],
2465 &v->PixelPTEReqHeightY[k],
2466 &v->PTERequestSizeY[k],
2467 &v->dpde0_bytes_per_frame_ub_l[k],
2468 &v->meta_pte_bytes_per_frame_ub_l[k]);
2470 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2475 v->ProgressiveToInterlaceUnitInOPP,
2477 v->ViewportYStartY[k],
2478 &v->VInitPreFillY[k],
2479 &v->MaxNumSwathY[k]);
2480 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2481 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2482 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2484 CalculateRowBandwidth(
2486 v->SourcePixelFormat[k],
2490 v->HTotal[k] / v->PixelClock[k],
2493 v->meta_row_height[k],
2494 v->meta_row_height_chroma[k],
2495 PixelPTEBytesPerRowY,
2496 PixelPTEBytesPerRowC,
2497 v->dpte_row_height[k],
2498 v->dpte_row_height_chroma[k],
2500 &v->dpte_row_bw[k]);
2503 v->TotalDCCActiveDPP = 0;
2504 v->TotalActiveDPP = 0;
2505 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2506 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2507 if (v->DCCEnable[k])
2508 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2509 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2510 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2511 NoChromaPlanes = false;
2514 ReorderBytes = v->NumberOfChannels
2516 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2517 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2518 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2520 VMDataOnlyReturnBW = dml_min(
2521 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2522 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2523 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2524 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2526 #ifdef __DML_VBA_DEBUG__
2527 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2528 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2529 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2530 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2531 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2532 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2533 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2534 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2535 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2536 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2537 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2540 if (v->GPUVMEnable && v->HostVMEnable)
2541 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2543 v->UrgentExtraLatency = CalculateExtraLatency(
2544 v->RoundTripPingLatencyCycles,
2548 v->PixelChunkSizeInKByte,
2549 v->TotalDCCActiveDPP,
2554 v->NumberOfActivePlanes,
2556 v->dpte_group_bytes,
2557 HostVMInefficiencyFactor,
2558 v->HostVMMinPageSize,
2559 v->HostVMMaxNonCachedPageTableLevels);
2561 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2563 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2564 if (v->BlendingAndTiming[k] == k) {
2565 if (v->WritebackEnable[k] == true) {
2566 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2567 + CalculateWriteBackDelay(
2568 v->WritebackPixelFormat[k],
2569 v->WritebackHRatio[k],
2570 v->WritebackVRatio[k],
2571 v->WritebackVTaps[k],
2572 v->WritebackDestinationWidth[k],
2573 v->WritebackDestinationHeight[k],
2574 v->WritebackSourceHeight[k],
2575 v->HTotal[k]) / v->DISPCLK;
2577 v->WritebackDelay[v->VoltageLevel][k] = 0;
2578 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2579 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2580 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2581 v->WritebackDelay[v->VoltageLevel][k],
2583 + CalculateWriteBackDelay(
2584 v->WritebackPixelFormat[j],
2585 v->WritebackHRatio[j],
2586 v->WritebackVRatio[j],
2587 v->WritebackVTaps[j],
2588 v->WritebackDestinationWidth[j],
2589 v->WritebackDestinationHeight[j],
2590 v->WritebackSourceHeight[j],
2591 v->HTotal[k]) / v->DISPCLK);
2597 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2598 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2599 if (v->BlendingAndTiming[k] == j)
2600 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2602 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2603 v->MaxVStartupLines[k] =
2604 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2605 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2606 v->VTotal[k] - v->VActive[k]
2610 (double) v->WritebackDelay[v->VoltageLevel][k]
2611 / (v->HTotal[k] / v->PixelClock[k]),
2613 if (v->MaxVStartupLines[k] > 1023)
2614 v->MaxVStartupLines[k] = 1023;
2616 #ifdef __DML_VBA_DEBUG__
2617 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2618 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2619 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2623 v->MaximumMaxVStartupLines = 0;
2624 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2625 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2628 // We don't really care to iterate between the various prefetch modes
2629 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2631 v->UrgentLatency = CalculateUrgentLatency(
2632 v->UrgentLatencyPixelDataOnly,
2633 v->UrgentLatencyPixelMixedWithVMData,
2634 v->UrgentLatencyVMDataOnly,
2635 v->DoUrgentLatencyAdjustment,
2636 v->UrgentLatencyAdjustmentFabricClockComponent,
2637 v->UrgentLatencyAdjustmentFabricClockReference,
2640 v->FractionOfUrgentBandwidth = 0.0;
2641 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2643 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2646 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2647 bool DestinationLineTimesForPrefetchLessThan2 = false;
2648 bool VRatioPrefetchMoreThan4 = false;
2649 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2650 MaxTotalRDBandwidth = 0;
2652 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2654 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2657 myPipe.DPPCLK = v->DPPCLK[k];
2658 myPipe.DISPCLK = v->DISPCLK;
2659 myPipe.PixelClock = v->PixelClock[k];
2660 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2661 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2662 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2663 myPipe.VRatio = v->VRatio[k];
2664 myPipe.VRatioChroma = v->VRatioChroma[k];
2665 myPipe.SourceScan = v->SourceScan[k];
2666 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2667 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2668 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2669 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2670 myPipe.InterlaceEnable = v->Interlace[k];
2671 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2672 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2673 myPipe.HTotal = v->HTotal[k];
2674 myPipe.DCCEnable = v->DCCEnable[k];
2675 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2676 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2677 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2678 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2679 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2680 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2681 v->ErrorResult[k] = CalculatePrefetchSchedule(
2683 HostVMInefficiencyFactor,
2686 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2688 v->DPPCLKDelaySCLLBOnly,
2689 v->DPPCLKDelayCNVCCursor,
2690 v->DISPCLKDelaySubtotal,
2691 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2693 v->MaxInterDCNTileRepeaters,
2694 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2695 v->MaxVStartupLines[k],
2696 v->GPUVMMaxPageTableLevels,
2699 v->HostVMMaxNonCachedPageTableLevels,
2700 v->HostVMMinPageSize,
2701 v->DynamicMetadataEnable[k],
2702 v->DynamicMetadataVMEnabled,
2703 v->DynamicMetadataLinesBeforeActiveRequired[k],
2704 v->DynamicMetadataTransmittedBytes[k],
2706 v->UrgentExtraLatency,
2708 v->PDEAndMetaPTEBytesFrame[k],
2710 v->PixelPTEBytesPerRow[k],
2711 v->PrefetchSourceLinesY[k],
2713 v->VInitPreFillY[k],
2715 v->PrefetchSourceLinesC[k],
2717 v->VInitPreFillC[k],
2719 v->swath_width_luma_ub[k],
2720 v->swath_width_chroma_ub[k],
2724 &v->DSTXAfterScaler[k],
2725 &v->DSTYAfterScaler[k],
2726 &v->DestinationLinesForPrefetch[k],
2727 &v->PrefetchBandwidth[k],
2728 &v->DestinationLinesToRequestVMInVBlank[k],
2729 &v->DestinationLinesToRequestRowInVBlank[k],
2730 &v->VRatioPrefetchY[k],
2731 &v->VRatioPrefetchC[k],
2732 &v->RequiredPrefetchPixDataBWLuma[k],
2733 &v->RequiredPrefetchPixDataBWChroma[k],
2734 &v->NotEnoughTimeForDynamicMetadata[k],
2736 &v->prefetch_vmrow_bw[k],
2740 &v->VUpdateOffsetPix[k],
2741 &v->VUpdateWidthPix[k],
2742 &v->VReadyOffsetPix[k]);
2744 #ifdef __DML_VBA_DEBUG__
2745 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2747 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2750 v->NoEnoughUrgentLatencyHiding = false;
2751 v->NoEnoughUrgentLatencyHidingPre = false;
2753 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2754 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2755 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2756 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2757 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2759 CalculateUrgentBurstFactor(
2760 v->swath_width_luma_ub[k],
2761 v->swath_width_chroma_ub[k],
2764 v->HTotal[k] / v->PixelClock[k],
2766 v->CursorBufferSize,
2767 v->CursorWidth[k][0],
2771 v->BytePerPixelDETY[k],
2772 v->BytePerPixelDETC[k],
2773 v->DETBufferSizeY[k],
2774 v->DETBufferSizeC[k],
2775 &v->UrgBurstFactorCursor[k],
2776 &v->UrgBurstFactorLuma[k],
2777 &v->UrgBurstFactorChroma[k],
2778 &v->NoUrgentLatencyHiding[k]);
2780 CalculateUrgentBurstFactor(
2781 v->swath_width_luma_ub[k],
2782 v->swath_width_chroma_ub[k],
2785 v->HTotal[k] / v->PixelClock[k],
2787 v->CursorBufferSize,
2788 v->CursorWidth[k][0],
2790 v->VRatioPrefetchY[k],
2791 v->VRatioPrefetchC[k],
2792 v->BytePerPixelDETY[k],
2793 v->BytePerPixelDETC[k],
2794 v->DETBufferSizeY[k],
2795 v->DETBufferSizeC[k],
2796 &v->UrgBurstFactorCursorPre[k],
2797 &v->UrgBurstFactorLumaPre[k],
2798 &v->UrgBurstFactorChromaPre[k],
2799 &v->NoUrgentLatencyHidingPre[k]);
2801 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2803 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2804 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2805 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2806 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2807 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2809 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2810 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2811 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2813 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2815 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2816 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2817 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2818 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2819 + v->cursor_bw_pre[k]);
2821 #ifdef __DML_VBA_DEBUG__
2822 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2823 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2824 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2825 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2826 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2828 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2829 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2831 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2832 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2833 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2834 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2835 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2836 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2837 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2838 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2839 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2840 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2843 if (v->DestinationLinesForPrefetch[k] < 2)
2844 DestinationLineTimesForPrefetchLessThan2 = true;
2846 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2847 VRatioPrefetchMoreThan4 = true;
2849 if (v->NoUrgentLatencyHiding[k] == true)
2850 v->NoEnoughUrgentLatencyHiding = true;
2852 if (v->NoUrgentLatencyHidingPre[k] == true)
2853 v->NoEnoughUrgentLatencyHidingPre = true;
2856 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2858 #ifdef __DML_VBA_DEBUG__
2859 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2860 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2861 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2864 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2865 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2866 v->PrefetchModeSupported = true;
2868 v->PrefetchModeSupported = false;
2869 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2870 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2871 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2872 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2876 // This error result check was done after the PrefetchModeSupported. So we will
2877 // still try to calculate flip schedule even prefetch mode not supported
2878 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2879 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2880 v->PrefetchModeSupported = false;
2881 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2885 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2886 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2887 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2888 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2890 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2891 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2892 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2894 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2895 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2896 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2899 v->TotImmediateFlipBytes = 0;
2900 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2901 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2902 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2904 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2905 CalculateFlipSchedule(
2907 HostVMInefficiencyFactor,
2908 v->UrgentExtraLatency,
2910 v->GPUVMMaxPageTableLevels,
2912 v->HostVMMaxNonCachedPageTableLevels,
2914 v->HostVMMinPageSize,
2915 v->PDEAndMetaPTEBytesFrame[k],
2917 v->PixelPTEBytesPerRow[k],
2918 v->BandwidthAvailableForImmediateFlip,
2919 v->TotImmediateFlipBytes,
2920 v->SourcePixelFormat[k],
2921 v->HTotal[k] / v->PixelClock[k],
2926 v->dpte_row_height[k],
2927 v->meta_row_height[k],
2928 v->dpte_row_height_chroma[k],
2929 v->meta_row_height_chroma[k],
2930 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2931 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2932 &v->final_flip_bw[k],
2933 &v->ImmediateFlipSupportedForPipe[k]);
2936 v->total_dcn_read_bw_with_flip = 0.0;
2937 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2938 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2939 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2941 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2942 v->DPPPerPlane[k] * v->final_flip_bw[k]
2943 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2944 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2945 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2947 * (v->final_flip_bw[k]
2948 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2949 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2950 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2951 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2953 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2954 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2955 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2957 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2958 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2960 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2962 v->ImmediateFlipSupported = true;
2963 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2964 #ifdef __DML_VBA_DEBUG__
2965 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2967 v->ImmediateFlipSupported = false;
2968 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2970 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2971 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2972 #ifdef __DML_VBA_DEBUG__
2973 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2976 v->ImmediateFlipSupported = false;
2980 v->ImmediateFlipSupported = false;
2983 v->PrefetchAndImmediateFlipSupported =
2984 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2985 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2986 v->ImmediateFlipSupported)) ? true : false;
2987 #ifdef __DML_VBA_DEBUG__
2988 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2989 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
2990 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2991 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2992 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2993 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2995 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2997 v->VStartupLines = v->VStartupLines + 1;
2998 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2999 ASSERT(v->PrefetchAndImmediateFlipSupported);
3001 // Unbounded Request Enabled
3002 CalculateUnboundedRequestAndCompressedBufferSize(
3003 v->DETBufferSizeInKByte[0],
3004 v->ConfigReturnBufferSizeInKByte,
3005 v->UseUnboundedRequesting,
3009 v->CompressedBufferSegmentSizeInkByte,
3011 &v->UnboundedRequestEnabled,
3012 &v->CompressedBufferSizeInkByte);
3014 //Watermarks and NB P-State/DRAM Clock Change Support
3016 enum clock_change_support DRAMClockChangeSupport; // dummy
3017 CalculateWatermarksAndDRAMSpeedChangeSupport(
3020 v->NumberOfActivePlanes,
3021 v->MaxLineBufferLines,
3023 v->WritebackInterfaceBufferSize,
3026 v->SynchronizedVBlank,
3027 v->dpte_group_bytes,
3030 v->UrgentExtraLatency,
3031 v->WritebackLatency,
3032 v->WritebackChunkSize,
3034 v->DRAMClockChangeLatency,
3036 v->SREnterPlusExitTime,
3038 v->SREnterPlusExitZ8Time,
3055 v->BlendingAndTiming,
3057 v->BytePerPixelDETY,
3058 v->BytePerPixelDETC,
3062 v->WritebackPixelFormat,
3063 v->WritebackDestinationWidth,
3064 v->WritebackDestinationHeight,
3065 v->WritebackSourceHeight,
3066 v->UnboundedRequestEnabled,
3067 v->CompressedBufferSizeInkByte,
3068 &DRAMClockChangeSupport,
3069 &v->UrgentWatermark,
3070 &v->WritebackUrgentWatermark,
3071 &v->DRAMClockChangeWatermark,
3072 &v->WritebackDRAMClockChangeWatermark,
3073 &v->StutterExitWatermark,
3074 &v->StutterEnterPlusExitWatermark,
3075 &v->Z8StutterExitWatermark,
3076 &v->Z8StutterEnterPlusExitWatermark,
3077 &v->MinActiveDRAMClockChangeLatencySupported);
3079 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3080 if (v->WritebackEnable[k] == true) {
3081 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
3083 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3085 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3090 //Display Pipeline Delivery Time in Prefetch, Groups
3091 CalculatePixelDeliveryTimes(
3092 v->NumberOfActivePlanes,
3097 v->swath_width_luma_ub,
3098 v->swath_width_chroma_ub,
3103 v->PSCL_THROUGHPUT_LUMA,
3104 v->PSCL_THROUGHPUT_CHROMA,
3111 v->BlockWidth256BytesY,
3112 v->BlockHeight256BytesY,
3113 v->BlockWidth256BytesC,
3114 v->BlockHeight256BytesC,
3115 v->DisplayPipeLineDeliveryTimeLuma,
3116 v->DisplayPipeLineDeliveryTimeChroma,
3117 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3118 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3119 v->DisplayPipeRequestDeliveryTimeLuma,
3120 v->DisplayPipeRequestDeliveryTimeChroma,
3121 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3122 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3123 v->CursorRequestDeliveryTime,
3124 v->CursorRequestDeliveryTimePrefetch);
3126 CalculateMetaAndPTETimes(
3127 v->NumberOfActivePlanes,
3130 v->MinMetaChunkSizeBytes,
3134 v->DestinationLinesToRequestRowInVBlank,
3135 v->DestinationLinesToRequestRowInImmediateFlip,
3142 v->dpte_row_height_chroma,
3144 v->meta_row_width_chroma,
3146 v->meta_row_height_chroma,
3148 v->meta_req_width_chroma,
3150 v->meta_req_height_chroma,
3151 v->dpte_group_bytes,
3154 v->PixelPTEReqWidthY,
3155 v->PixelPTEReqHeightY,
3156 v->PixelPTEReqWidthC,
3157 v->PixelPTEReqHeightC,
3158 v->dpte_row_width_luma_ub,
3159 v->dpte_row_width_chroma_ub,
3160 v->DST_Y_PER_PTE_ROW_NOM_L,
3161 v->DST_Y_PER_PTE_ROW_NOM_C,
3162 v->DST_Y_PER_META_ROW_NOM_L,
3163 v->DST_Y_PER_META_ROW_NOM_C,
3164 v->TimePerMetaChunkNominal,
3165 v->TimePerChromaMetaChunkNominal,
3166 v->TimePerMetaChunkVBlank,
3167 v->TimePerChromaMetaChunkVBlank,
3168 v->TimePerMetaChunkFlip,
3169 v->TimePerChromaMetaChunkFlip,
3170 v->time_per_pte_group_nom_luma,
3171 v->time_per_pte_group_vblank_luma,
3172 v->time_per_pte_group_flip_luma,
3173 v->time_per_pte_group_nom_chroma,
3174 v->time_per_pte_group_vblank_chroma,
3175 v->time_per_pte_group_flip_chroma);
3177 CalculateVMGroupAndRequestTimes(
3178 v->NumberOfActivePlanes,
3180 v->GPUVMMaxPageTableLevels,
3183 v->DestinationLinesToRequestVMInVBlank,
3184 v->DestinationLinesToRequestVMInImmediateFlip,
3187 v->dpte_row_width_luma_ub,
3188 v->dpte_row_width_chroma_ub,
3190 v->dpde0_bytes_per_frame_ub_l,
3191 v->dpde0_bytes_per_frame_ub_c,
3192 v->meta_pte_bytes_per_frame_ub_l,
3193 v->meta_pte_bytes_per_frame_ub_c,
3194 v->TimePerVMGroupVBlank,
3195 v->TimePerVMGroupFlip,
3196 v->TimePerVMRequestVBlank,
3197 v->TimePerVMRequestFlip);
3200 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3201 if (PrefetchMode == 0) {
3202 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3203 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3204 v->MinTTUVBlank[k] = dml_max(
3205 v->DRAMClockChangeWatermark,
3206 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3207 } else if (PrefetchMode == 1) {
3208 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3209 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3210 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3212 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3213 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3214 v->MinTTUVBlank[k] = v->UrgentWatermark;
3216 if (!v->DynamicMetadataEnable[k])
3217 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3220 // DCC Configuration
3222 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3223 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3224 v->SourcePixelFormat[k],
3225 v->SurfaceWidthY[k],
3226 v->SurfaceWidthC[k],
3227 v->SurfaceHeightY[k],
3228 v->SurfaceHeightC[k],
3229 v->DETBufferSizeInKByte[0] * 1024,
3230 v->BlockHeight256BytesY[k],
3231 v->BlockHeight256BytesC[k],
3232 v->SurfaceTiling[k],
3233 v->BytePerPixelY[k],
3234 v->BytePerPixelC[k],
3235 v->BytePerPixelDETY[k],
3236 v->BytePerPixelDETC[k],
3238 &v->DCCYMaxUncompressedBlock[k],
3239 &v->DCCCMaxUncompressedBlock[k],
3240 &v->DCCYMaxCompressedBlock[k],
3241 &v->DCCCMaxCompressedBlock[k],
3242 &v->DCCYIndependentBlock[k],
3243 &v->DCCCIndependentBlock[k]);
3246 // VStartup Adjustment
3247 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3248 bool isInterlaceTiming;
3249 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3250 #ifdef __DML_VBA_DEBUG__
3251 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3254 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3256 #ifdef __DML_VBA_DEBUG__
3257 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3258 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3259 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3260 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3263 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3264 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3265 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3268 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3270 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3271 - v->VFrontPorch[k])
3272 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3273 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3275 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3277 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3278 <= (isInterlaceTiming ?
3279 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3280 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3281 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3283 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3285 #ifdef __DML_VBA_DEBUG__
3286 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3287 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3288 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3289 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3290 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3291 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3292 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3293 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3294 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3295 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3296 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3301 //Maximum Bandwidth Used
3302 double TotalWRBandwidth = 0;
3303 double MaxPerPlaneVActiveWRBandwidth = 0;
3304 double WRBandwidth = 0;
3305 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3306 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3307 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3308 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3309 } else if (v->WritebackEnable[k] == true) {
3310 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3311 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3313 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3314 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3317 v->TotalDataReadBandwidth = 0;
3318 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3319 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3322 // Stutter Efficiency
3323 CalculateStutterEfficiency(
3325 v->CompressedBufferSizeInkByte,
3326 v->UnboundedRequestEnabled,
3327 v->ConfigReturnBufferSizeInKByte,
3328 v->MetaFIFOSizeInKEntries,
3329 v->ZeroSizeBufferEntries,
3330 v->NumberOfActivePlanes,
3331 v->ROBBufferSizeInKByte,
3332 v->TotalDataReadBandwidth,
3335 v->COMPBUF_RESERVED_SPACE_64B,
3336 v->COMPBUF_RESERVED_SPACE_ZS,
3339 v->SynchronizedVBlank,
3340 v->StutterEnterPlusExitWatermark,
3341 v->Z8StutterEnterPlusExitWatermark,
3342 v->ProgressiveToInterlaceUnitInOPP,
3348 v->BytePerPixelDETY,
3354 v->DCCFractionOfZeroSizeRequestsLuma,
3355 v->DCCFractionOfZeroSizeRequestsChroma,
3361 v->BlockHeight256BytesY,
3362 v->BlockWidth256BytesY,
3363 v->BlockHeight256BytesC,
3364 v->BlockWidth256BytesC,
3365 v->DCCYMaxUncompressedBlock,
3366 v->DCCCMaxUncompressedBlock,
3370 v->ReadBandwidthPlaneLuma,
3371 v->ReadBandwidthPlaneChroma,
3374 &v->StutterEfficiencyNotIncludingVBlank,
3375 &v->StutterEfficiency,
3376 &v->NumberOfStutterBurstsPerFrame,
3377 &v->Z8StutterEfficiencyNotIncludingVBlank,
3378 &v->Z8StutterEfficiency,
3379 &v->Z8NumberOfStutterBurstsPerFrame,
3383 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3385 struct vba_vars_st *v = &mode_lib->vba;
3386 // Display Pipe Configuration
3387 double BytePerPixDETY[DC__NUM_DPP__MAX];
3388 double BytePerPixDETC[DC__NUM_DPP__MAX];
3389 int BytePerPixY[DC__NUM_DPP__MAX];
3390 int BytePerPixC[DC__NUM_DPP__MAX];
3391 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3392 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3393 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3394 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3395 double dummy1[DC__NUM_DPP__MAX];
3396 double dummy2[DC__NUM_DPP__MAX];
3397 double dummy3[DC__NUM_DPP__MAX];
3398 double dummy4[DC__NUM_DPP__MAX];
3399 int dummy5[DC__NUM_DPP__MAX];
3400 int dummy6[DC__NUM_DPP__MAX];
3401 bool dummy7[DC__NUM_DPP__MAX];
3402 bool dummysinglestring;
3406 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3408 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3409 v->SourcePixelFormat[k],
3410 v->SurfaceTiling[k],
3415 &Read256BytesBlockHeightY[k],
3416 &Read256BytesBlockHeightC[k],
3417 &Read256BytesBlockWidthY[k],
3418 &Read256BytesBlockWidthC[k]);
3421 CalculateSwathAndDETConfiguration(
3423 v->NumberOfActivePlanes,
3424 v->DETBufferSizeInKByte[0],
3428 v->SourcePixelFormat,
3436 Read256BytesBlockHeightY,
3437 Read256BytesBlockHeightC,
3438 Read256BytesBlockWidthY,
3439 Read256BytesBlockWidthC,
3440 v->ODMCombineEnabled,
3441 v->BlendingAndTiming,
3459 &dummysinglestring);
3462 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3464 if (PrefetchMode == 0) {
3465 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3466 } else if (PrefetchMode == 1) {
3467 return dml_max(SREnterPlusExitTime, UrgentLatency);
3469 return UrgentLatency;
3473 double dml31_CalculateWriteBackDISPCLK(
3474 enum source_format_class WritebackPixelFormat,
3476 double WritebackHRatio,
3477 double WritebackVRatio,
3478 unsigned int WritebackHTaps,
3479 unsigned int WritebackVTaps,
3480 long WritebackSourceWidth,
3481 long WritebackDestinationWidth,
3482 unsigned int HTotal,
3483 unsigned int WritebackLineBufferSize)
3485 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3487 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3488 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3489 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3490 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3493 static double CalculateWriteBackDelay(
3494 enum source_format_class WritebackPixelFormat,
3495 double WritebackHRatio,
3496 double WritebackVRatio,
3497 unsigned int WritebackVTaps,
3498 int WritebackDestinationWidth,
3499 int WritebackDestinationHeight,
3500 int WritebackSourceHeight,
3501 unsigned int HTotal)
3503 double CalculateWriteBackDelay;
3505 double Output_lines_last_notclamped;
3506 double WritebackVInit;
3508 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3509 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3510 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3511 if (Output_lines_last_notclamped < 0) {
3512 CalculateWriteBackDelay = 0;
3514 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3516 return CalculateWriteBackDelay;
3519 static void CalculateVupdateAndDynamicMetadataParameters(
3520 int MaxInterDCNTileRepeaters,
3523 double DCFClkDeepSleep,
3527 int DynamicMetadataTransmittedBytes,
3528 int DynamicMetadataLinesBeforeActiveRequired,
3529 int InterlaceEnable,
3530 bool ProgressiveToInterlaceUnitInOPP,
3535 int *VUpdateOffsetPix,
3536 double *VUpdateWidthPix,
3537 double *VReadyOffsetPix)
3539 double TotalRepeaterDelayTime;
3541 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3542 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3543 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3544 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3545 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3546 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3547 *Tdmec = HTotal / PixelClock;
3548 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3549 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3551 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3553 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3554 *Tdmsks = *Tdmsks / 2;
3556 #ifdef __DML_VBA_DEBUG__
3557 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3558 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3559 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3563 static void CalculateRowBandwidth(
3565 enum source_format_class SourcePixelFormat,
3567 double VRatioChroma,
3570 unsigned int MetaRowByteLuma,
3571 unsigned int MetaRowByteChroma,
3572 unsigned int meta_row_height_luma,
3573 unsigned int meta_row_height_chroma,
3574 unsigned int PixelPTEBytesPerRowLuma,
3575 unsigned int PixelPTEBytesPerRowChroma,
3576 unsigned int dpte_row_height_luma,
3577 unsigned int dpte_row_height_chroma,
3578 double *meta_row_bw,
3579 double *dpte_row_bw)
3581 if (DCCEnable != true) {
3583 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3584 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3586 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3589 if (GPUVMEnable != true) {
3591 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3592 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3593 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3595 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3599 static void CalculateFlipSchedule(
3600 struct display_mode_lib *mode_lib,
3601 double HostVMInefficiencyFactor,
3602 double UrgentExtraLatency,
3603 double UrgentLatency,
3604 unsigned int GPUVMMaxPageTableLevels,
3606 unsigned int HostVMMaxNonCachedPageTableLevels,
3608 double HostVMMinPageSize,
3609 double PDEAndMetaPTEBytesPerFrame,
3610 double MetaRowBytes,
3611 double DPTEBytesPerRow,
3612 double BandwidthAvailableForImmediateFlip,
3613 unsigned int TotImmediateFlipBytes,
3614 enum source_format_class SourcePixelFormat,
3617 double VRatioChroma,
3620 unsigned int dpte_row_height,
3621 unsigned int meta_row_height,
3622 unsigned int dpte_row_height_chroma,
3623 unsigned int meta_row_height_chroma,
3624 double *DestinationLinesToRequestVMInImmediateFlip,
3625 double *DestinationLinesToRequestRowInImmediateFlip,
3626 double *final_flip_bw,
3627 bool *ImmediateFlipSupportedForPipe)
3629 double min_row_time = 0.0;
3630 unsigned int HostVMDynamicLevelsTrips;
3631 double TimeForFetchingMetaPTEImmediateFlip;
3632 double TimeForFetchingRowInVBlankImmediateFlip;
3633 double ImmediateFlipBW;
3635 if (GPUVMEnable == true && HostVMEnable == true) {
3636 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3638 HostVMDynamicLevelsTrips = 0;
3641 if (GPUVMEnable == true || DCCEnable == true) {
3642 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3645 if (GPUVMEnable == true) {
3646 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3647 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3648 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3651 TimeForFetchingMetaPTEImmediateFlip = 0;
3654 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3655 if ((GPUVMEnable == true || DCCEnable == true)) {
3656 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3657 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3658 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3661 TimeForFetchingRowInVBlankImmediateFlip = 0;
3664 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3666 if (GPUVMEnable == true) {
3667 *final_flip_bw = dml_max(
3668 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3669 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3670 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3671 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3676 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3677 if (GPUVMEnable == true && DCCEnable != true) {
3678 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3679 } else if (GPUVMEnable != true && DCCEnable == true) {
3680 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3682 min_row_time = dml_min4(
3683 dpte_row_height * LineTime / VRatio,
3684 meta_row_height * LineTime / VRatio,
3685 dpte_row_height_chroma * LineTime / VRatioChroma,
3686 meta_row_height_chroma * LineTime / VRatioChroma);
3689 if (GPUVMEnable == true && DCCEnable != true) {
3690 min_row_time = dpte_row_height * LineTime / VRatio;
3691 } else if (GPUVMEnable != true && DCCEnable == true) {
3692 min_row_time = meta_row_height * LineTime / VRatio;
3694 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3698 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3699 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3700 *ImmediateFlipSupportedForPipe = false;
3702 *ImmediateFlipSupportedForPipe = true;
3705 #ifdef __DML_VBA_DEBUG__
3706 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
3707 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
3708 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3709 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3710 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3711 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
3716 static double TruncToValidBPP(
3724 enum output_encoder_class Output,
3725 enum output_format_class Format,
3726 unsigned int DSCInputBitPerComponent,
3730 enum odm_combine_mode ODMCombine)
3739 if (Format == dm_420) {
3744 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3745 } else if (Format == dm_444) {
3750 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3757 if (Format == dm_n422) {
3759 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3762 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3766 if (DSCEnable && Output == dm_dp) {
3767 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3769 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3772 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3774 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3778 if (DesiredBPP == 0) {
3780 if (MaxLinkBPP < MinDSCBPP) {
3782 } else if (MaxLinkBPP >= MaxDSCBPP) {
3785 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3788 if (MaxLinkBPP >= NonDSCBPP2) {
3790 } else if (MaxLinkBPP >= NonDSCBPP1) {
3792 } else if (MaxLinkBPP >= NonDSCBPP0) {
3799 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3800 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3809 static noinline void CalculatePrefetchSchedulePerPlane(
3810 struct display_mode_lib *mode_lib,
3811 double HostVMInefficiencyFactor,
3816 struct vba_vars_st *v = &mode_lib->vba;
3819 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3820 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3821 myPipe.PixelClock = v->PixelClock[k];
3822 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3823 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3824 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3825 myPipe.VRatio = mode_lib->vba.VRatio[k];
3826 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3828 myPipe.SourceScan = v->SourceScan[k];
3829 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3830 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3831 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3832 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3833 myPipe.InterlaceEnable = v->Interlace[k];
3834 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3835 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3836 myPipe.HTotal = v->HTotal[k];
3837 myPipe.DCCEnable = v->DCCEnable[k];
3838 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3839 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3840 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3841 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3842 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3843 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3844 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3846 HostVMInefficiencyFactor,
3848 v->DSCDelayPerState[i][k],
3849 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3851 v->DPPCLKDelaySCLLBOnly,
3852 v->DPPCLKDelayCNVCCursor,
3853 v->DISPCLKDelaySubtotal,
3854 v->SwathWidthYThisState[k] / v->HRatio[k],
3856 v->MaxInterDCNTileRepeaters,
3857 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3858 v->MaximumVStartup[i][j][k],
3859 v->GPUVMMaxPageTableLevels,
3862 v->HostVMMaxNonCachedPageTableLevels,
3863 v->HostVMMinPageSize,
3864 v->DynamicMetadataEnable[k],
3865 v->DynamicMetadataVMEnabled,
3866 v->DynamicMetadataLinesBeforeActiveRequired[k],
3867 v->DynamicMetadataTransmittedBytes[k],
3871 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3872 v->MetaRowBytes[i][j][k],
3873 v->DPTEBytesPerRow[i][j][k],
3874 v->PrefetchLinesY[i][j][k],
3875 v->SwathWidthYThisState[k],
3878 v->PrefetchLinesC[i][j][k],
3879 v->SwathWidthCThisState[k],
3882 v->swath_width_luma_ub_this_state[k],
3883 v->swath_width_chroma_ub_this_state[k],
3884 v->SwathHeightYThisState[k],
3885 v->SwathHeightCThisState[k],
3887 &v->DSTXAfterScaler[k],
3888 &v->DSTYAfterScaler[k],
3889 &v->LineTimesForPrefetch[k],
3891 &v->LinesForMetaPTE[k],
3892 &v->LinesForMetaAndDPTERow[k],
3893 &v->VRatioPreY[i][j][k],
3894 &v->VRatioPreC[i][j][k],
3895 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3896 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3897 &v->NoTimeForDynamicMetadata[i][j][k],
3899 &v->prefetch_vmrow_bw[k],
3903 &v->VUpdateOffsetPix[k],
3904 &v->VUpdateWidthPix[k],
3905 &v->VReadyOffsetPix[k]);
3908 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3910 struct vba_vars_st *v = &mode_lib->vba;
3914 int ReorderingBytes;
3915 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3916 bool NoChroma = true;
3917 bool EnoughWritebackUnits = true;
3918 bool P2IWith420 = false;
3919 bool DSCOnlyIfNecessaryWithBPP = false;
3920 bool DSC422NativeNotSupported = false;
3921 double MaxTotalVActiveRDBandwidth;
3922 bool ViewportExceedsSurface = false;
3923 bool FMTBufferExceeded = false;
3925 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3927 CalculateMinAndMaxPrefetchMode(
3928 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3929 &MinPrefetchMode, &MaxPrefetchMode);
3931 /*Scale Ratio, taps Support Check*/
3933 v->ScaleRatioAndTapsSupport = true;
3934 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3935 if (v->ScalerEnabled[k] == false
3936 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3937 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3938 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3939 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3940 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3941 v->ScaleRatioAndTapsSupport = false;
3942 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3943 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3944 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3945 || v->VRatio[k] > v->vtaps[k]
3946 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3947 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3948 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3949 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3950 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3951 || v->HRatioChroma[k] > v->MaxHSCLRatio
3952 || v->VRatioChroma[k] > v->MaxVSCLRatio
3953 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3954 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3955 v->ScaleRatioAndTapsSupport = false;
3958 /*Source Format, Pixel Format and Scan Support Check*/
3960 v->SourceFormatPixelAndScanSupport = true;
3961 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3962 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3963 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3964 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3965 v->SourceFormatPixelAndScanSupport = false;
3968 /*Bandwidth Support Check*/
3970 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3971 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3972 v->SourcePixelFormat[k],
3973 v->SurfaceTiling[k],
3974 &v->BytePerPixelY[k],
3975 &v->BytePerPixelC[k],
3976 &v->BytePerPixelInDETY[k],
3977 &v->BytePerPixelInDETC[k],
3978 &v->Read256BlockHeightY[k],
3979 &v->Read256BlockHeightC[k],
3980 &v->Read256BlockWidthY[k],
3981 &v->Read256BlockWidthC[k]);
3983 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3984 if (v->SourceScan[k] != dm_vert) {
3985 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3986 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3988 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3989 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3992 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3993 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3994 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3995 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3996 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3998 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3999 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
4000 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4001 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
4002 } else if (v->WritebackEnable[k] == true) {
4003 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4004 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
4006 v->WriteBandwidth[k] = 0.0;
4010 /*Writeback Latency support check*/
4012 v->WritebackLatencySupport = true;
4013 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4014 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
4015 v->WritebackLatencySupport = false;
4019 /*Writeback Mode Support Check*/
4021 v->TotalNumberOfActiveWriteback = 0;
4022 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4023 if (v->WritebackEnable[k] == true) {
4024 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4028 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4029 EnoughWritebackUnits = false;
4032 /*Writeback Scale Ratio and Taps Support Check*/
4034 v->WritebackScaleRatioAndTapsSupport = true;
4035 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4036 if (v->WritebackEnable[k] == true) {
4037 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4038 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4039 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4040 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4041 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4042 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4043 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4044 v->WritebackScaleRatioAndTapsSupport = false;
4046 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4047 v->WritebackScaleRatioAndTapsSupport = false;
4051 /*Maximum DISPCLK/DPPCLK Support check*/
4053 v->WritebackRequiredDISPCLK = 0.0;
4054 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4055 if (v->WritebackEnable[k] == true) {
4056 v->WritebackRequiredDISPCLK = dml_max(
4057 v->WritebackRequiredDISPCLK,
4058 dml31_CalculateWriteBackDISPCLK(
4059 v->WritebackPixelFormat[k],
4061 v->WritebackHRatio[k],
4062 v->WritebackVRatio[k],
4063 v->WritebackHTaps[k],
4064 v->WritebackVTaps[k],
4065 v->WritebackSourceWidth[k],
4066 v->WritebackDestinationWidth[k],
4068 v->WritebackLineBufferSize));
4071 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4072 if (v->HRatio[k] > 1.0) {
4073 v->PSCL_FACTOR[k] = dml_min(
4074 v->MaxDCHUBToPSCLThroughput,
4075 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4077 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4079 if (v->BytePerPixelC[k] == 0.0) {
4080 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4081 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4083 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4084 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4086 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4087 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4090 if (v->HRatioChroma[k] > 1.0) {
4091 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4092 v->MaxDCHUBToPSCLThroughput,
4093 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4095 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4097 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4099 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4100 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4101 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4102 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4104 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4105 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4106 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4110 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4111 int MaximumSwathWidthSupportLuma;
4112 int MaximumSwathWidthSupportChroma;
4114 if (v->SurfaceTiling[k] == dm_sw_linear) {
4115 MaximumSwathWidthSupportLuma = 8192.0;
4116 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4117 MaximumSwathWidthSupportLuma = 2880.0;
4118 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4119 MaximumSwathWidthSupportLuma = 3840.0;
4121 MaximumSwathWidthSupportLuma = 5760.0;
4124 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4125 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4127 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4129 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4130 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4131 if (v->BytePerPixelC[k] == 0.0) {
4132 v->MaximumSwathWidthInLineBufferChroma = 0;
4134 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4135 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4137 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4138 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4141 CalculateSwathAndDETConfiguration(
4143 v->NumberOfActivePlanes,
4144 v->DETBufferSizeInKByte[0],
4145 v->MaximumSwathWidthLuma,
4146 v->MaximumSwathWidthChroma,
4148 v->SourcePixelFormat,
4156 v->Read256BlockHeightY,
4157 v->Read256BlockHeightC,
4158 v->Read256BlockWidthY,
4159 v->Read256BlockWidthC,
4160 v->odm_combine_dummy,
4161 v->BlendingAndTiming,
4164 v->BytePerPixelInDETY,
4165 v->BytePerPixelInDETC,
4169 v->NoOfDPPThisState,
4170 v->swath_width_luma_ub_this_state,
4171 v->swath_width_chroma_ub_this_state,
4172 v->SwathWidthYThisState,
4173 v->SwathWidthCThisState,
4174 v->SwathHeightYThisState,
4175 v->SwathHeightCThisState,
4176 v->DETBufferSizeYThisState,
4177 v->DETBufferSizeCThisState,
4178 v->SingleDPPViewportSizeSupportPerPlane,
4179 &v->ViewportSizeSupport[0][0]);
4181 for (i = 0; i < v->soc.num_states; i++) {
4182 for (j = 0; j < 2; j++) {
4183 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4184 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4185 v->RequiredDISPCLK[i][j] = 0.0;
4186 v->DISPCLK_DPPCLK_Support[i][j] = true;
4187 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4188 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4189 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4190 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4191 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4192 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4193 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4194 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4196 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4197 * (1 + v->DISPCLKRampingMargin / 100.0);
4198 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4199 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4200 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4201 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4202 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4204 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4205 * (1 + v->DISPCLKRampingMargin / 100.0);
4206 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4207 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4208 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4209 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4210 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4213 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4214 || !(v->Output[k] == dm_dp ||
4215 v->Output[k] == dm_dp2p0 ||
4216 v->Output[k] == dm_edp)) {
4217 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4218 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4220 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4221 FMTBufferExceeded = true;
4222 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4223 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4224 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4225 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4226 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4227 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4228 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4229 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4230 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4231 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4233 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4234 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4236 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4237 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4238 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4239 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4240 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4242 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4243 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4246 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4247 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4248 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4249 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4250 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4252 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4253 FMTBufferExceeded = true;
4255 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4256 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4259 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4260 v->MPCCombine[i][j][k] = false;
4261 v->NoOfDPP[i][j][k] = 4;
4262 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4263 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4264 v->MPCCombine[i][j][k] = false;
4265 v->NoOfDPP[i][j][k] = 2;
4266 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4267 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4268 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4269 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4270 v->MPCCombine[i][j][k] = false;
4271 v->NoOfDPP[i][j][k] = 1;
4272 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4274 v->MPCCombine[i][j][k] = true;
4275 v->NoOfDPP[i][j][k] = 2;
4276 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4278 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4279 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4280 > v->MaxDppclkRoundedDownToDFSGranularity)
4281 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4282 v->DISPCLK_DPPCLK_Support[i][j] = false;
4285 v->TotalNumberOfActiveDPP[i][j] = 0;
4286 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4287 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4288 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4289 if (v->NoOfDPP[i][j][k] == 1)
4290 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4291 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4292 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4297 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4298 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4299 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4300 double BWOfNonSplitPlaneOfMaximumBandwidth;
4301 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4302 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4303 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4304 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4305 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4306 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4307 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4308 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4311 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4312 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4313 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4314 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4315 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4316 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4317 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4320 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4321 v->RequiredDISPCLK[i][j] = 0.0;
4322 v->DISPCLK_DPPCLK_Support[i][j] = true;
4323 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4324 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4325 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4326 v->MPCCombine[i][j][k] = true;
4327 v->NoOfDPP[i][j][k] = 2;
4328 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4329 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4331 v->MPCCombine[i][j][k] = false;
4332 v->NoOfDPP[i][j][k] = 1;
4333 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4334 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4336 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4337 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4338 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4339 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4341 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4343 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4344 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4345 > v->MaxDppclkRoundedDownToDFSGranularity)
4346 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4347 v->DISPCLK_DPPCLK_Support[i][j] = false;
4350 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4351 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4352 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4355 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4356 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4357 v->DISPCLK_DPPCLK_Support[i][j] = false;
4362 /*Total Available Pipes Support Check*/
4364 for (i = 0; i < v->soc.num_states; i++) {
4365 for (j = 0; j < 2; j++) {
4366 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4367 v->TotalAvailablePipesSupport[i][j] = true;
4369 v->TotalAvailablePipesSupport[i][j] = false;
4373 /*Display IO and DSC Support Check*/
4375 v->NonsupportedDSCInputBPC = false;
4376 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4377 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4378 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4379 v->NonsupportedDSCInputBPC = true;
4383 /*Number Of DSC Slices*/
4384 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4385 if (v->BlendingAndTiming[k] == k) {
4386 if (v->PixelClockBackEnd[k] > 3200) {
4387 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4388 } else if (v->PixelClockBackEnd[k] > 1360) {
4389 v->NumberOfDSCSlices[k] = 8;
4390 } else if (v->PixelClockBackEnd[k] > 680) {
4391 v->NumberOfDSCSlices[k] = 4;
4392 } else if (v->PixelClockBackEnd[k] > 340) {
4393 v->NumberOfDSCSlices[k] = 2;
4395 v->NumberOfDSCSlices[k] = 1;
4398 v->NumberOfDSCSlices[k] = 0;
4402 for (i = 0; i < v->soc.num_states; i++) {
4403 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4404 v->RequiresDSC[i][k] = false;
4405 v->RequiresFEC[i][k] = false;
4406 if (v->BlendingAndTiming[k] == k) {
4407 if (v->Output[k] == dm_hdmi) {
4408 v->RequiresDSC[i][k] = false;
4409 v->RequiresFEC[i][k] = false;
4410 v->OutputBppPerState[i][k] = TruncToValidBPP(
4411 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4415 v->PixelClockBackEnd[k],
4416 v->ForcedOutputLinkBPP[k],
4420 v->DSCInputBitPerComponent[k],
4421 v->NumberOfDSCSlices[k],
4422 v->AudioSampleRate[k],
4423 v->AudioSampleLayout[k],
4424 v->ODMCombineEnablePerState[i][k]);
4425 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4426 if (v->DSCEnable[k] == true) {
4427 v->RequiresDSC[i][k] = true;
4428 v->LinkDSCEnable = true;
4429 if (v->Output[k] == dm_dp) {
4430 v->RequiresFEC[i][k] = true;
4432 v->RequiresFEC[i][k] = false;
4435 v->RequiresDSC[i][k] = false;
4436 v->LinkDSCEnable = false;
4437 v->RequiresFEC[i][k] = false;
4440 v->Outbpp = BPP_INVALID;
4441 if (v->PHYCLKPerState[i] >= 270.0) {
4442 v->Outbpp = TruncToValidBPP(
4443 (1.0 - v->Downspreading / 100.0) * 2700,
4444 v->OutputLinkDPLanes[k],
4447 v->PixelClockBackEnd[k],
4448 v->ForcedOutputLinkBPP[k],
4452 v->DSCInputBitPerComponent[k],
4453 v->NumberOfDSCSlices[k],
4454 v->AudioSampleRate[k],
4455 v->AudioSampleLayout[k],
4456 v->ODMCombineEnablePerState[i][k]);
4457 v->OutputBppPerState[i][k] = v->Outbpp;
4458 // TODO: Need some other way to handle this nonsense
4459 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4461 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4462 v->Outbpp = TruncToValidBPP(
4463 (1.0 - v->Downspreading / 100.0) * 5400,
4464 v->OutputLinkDPLanes[k],
4467 v->PixelClockBackEnd[k],
4468 v->ForcedOutputLinkBPP[k],
4472 v->DSCInputBitPerComponent[k],
4473 v->NumberOfDSCSlices[k],
4474 v->AudioSampleRate[k],
4475 v->AudioSampleLayout[k],
4476 v->ODMCombineEnablePerState[i][k]);
4477 v->OutputBppPerState[i][k] = v->Outbpp;
4478 // TODO: Need some other way to handle this nonsense
4479 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4481 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4482 v->Outbpp = TruncToValidBPP(
4483 (1.0 - v->Downspreading / 100.0) * 8100,
4484 v->OutputLinkDPLanes[k],
4487 v->PixelClockBackEnd[k],
4488 v->ForcedOutputLinkBPP[k],
4492 v->DSCInputBitPerComponent[k],
4493 v->NumberOfDSCSlices[k],
4494 v->AudioSampleRate[k],
4495 v->AudioSampleLayout[k],
4496 v->ODMCombineEnablePerState[i][k]);
4497 v->OutputBppPerState[i][k] = v->Outbpp;
4498 // TODO: Need some other way to handle this nonsense
4499 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4501 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4502 v->Outbpp = TruncToValidBPP(
4503 (1.0 - v->Downspreading / 100.0) * 10000,
4507 v->PixelClockBackEnd[k],
4508 v->ForcedOutputLinkBPP[k],
4512 v->DSCInputBitPerComponent[k],
4513 v->NumberOfDSCSlices[k],
4514 v->AudioSampleRate[k],
4515 v->AudioSampleLayout[k],
4516 v->ODMCombineEnablePerState[i][k]);
4517 v->OutputBppPerState[i][k] = v->Outbpp;
4518 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4520 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4521 v->Outbpp = TruncToValidBPP(
4526 v->PixelClockBackEnd[k],
4527 v->ForcedOutputLinkBPP[k],
4531 v->DSCInputBitPerComponent[k],
4532 v->NumberOfDSCSlices[k],
4533 v->AudioSampleRate[k],
4534 v->AudioSampleLayout[k],
4535 v->ODMCombineEnablePerState[i][k]);
4536 v->OutputBppPerState[i][k] = v->Outbpp;
4537 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4541 v->OutputBppPerState[i][k] = 0;
4546 for (i = 0; i < v->soc.num_states; i++) {
4547 v->LinkCapacitySupport[i] = true;
4548 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4549 if (v->BlendingAndTiming[k] == k
4550 && (v->Output[k] == dm_dp ||
4551 v->Output[k] == dm_edp ||
4552 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4553 v->LinkCapacitySupport[i] = false;
4559 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4560 if (v->BlendingAndTiming[k] == k
4561 && (v->Output[k] == dm_dp ||
4562 v->Output[k] == dm_edp ||
4563 v->Output[k] == dm_hdmi)) {
4564 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4567 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4568 && !v->DSC422NativeSupport) {
4569 DSC422NativeNotSupported = true;
4574 for (i = 0; i < v->soc.num_states; ++i) {
4575 v->ODMCombine4To1SupportCheckOK[i] = true;
4576 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4577 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4578 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4579 || v->Output[k] == dm_hdmi)) {
4580 v->ODMCombine4To1SupportCheckOK[i] = false;
4585 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4587 for (i = 0; i < v->soc.num_states; i++) {
4588 v->NotEnoughDSCUnits[i] = false;
4589 v->TotalDSCUnitsRequired = 0.0;
4590 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4591 if (v->RequiresDSC[i][k] == true) {
4592 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4593 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4594 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4595 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4597 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4601 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4602 v->NotEnoughDSCUnits[i] = true;
4605 /*DSC Delay per state*/
4607 for (i = 0; i < v->soc.num_states; i++) {
4608 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4609 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4612 v->BPP = v->OutputBppPerState[i][k];
4614 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4615 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4616 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4617 v->DSCInputBitPerComponent[k],
4619 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4620 v->NumberOfDSCSlices[k],
4622 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4623 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4624 v->DSCDelayPerState[i][k] = 2.0
4625 * (dscceComputeDelay(
4626 v->DSCInputBitPerComponent[k],
4628 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4629 v->NumberOfDSCSlices[k] / 2,
4631 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4633 v->DSCDelayPerState[i][k] = 4.0
4634 * (dscceComputeDelay(
4635 v->DSCInputBitPerComponent[k],
4637 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4638 v->NumberOfDSCSlices[k] / 4,
4640 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4642 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4644 v->DSCDelayPerState[i][k] = 0.0;
4647 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4648 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4649 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4650 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4656 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4658 for (i = 0; i < v->soc.num_states; ++i) {
4659 for (j = 0; j <= 1; ++j) {
4660 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4661 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4662 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4663 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4666 CalculateSwathAndDETConfiguration(
4668 v->NumberOfActivePlanes,
4669 v->DETBufferSizeInKByte[0],
4670 v->MaximumSwathWidthLuma,
4671 v->MaximumSwathWidthChroma,
4673 v->SourcePixelFormat,
4681 v->Read256BlockHeightY,
4682 v->Read256BlockHeightC,
4683 v->Read256BlockWidthY,
4684 v->Read256BlockWidthC,
4685 v->ODMCombineEnableThisState,
4686 v->BlendingAndTiming,
4689 v->BytePerPixelInDETY,
4690 v->BytePerPixelInDETC,
4694 v->NoOfDPPThisState,
4695 v->swath_width_luma_ub_this_state,
4696 v->swath_width_chroma_ub_this_state,
4697 v->SwathWidthYThisState,
4698 v->SwathWidthCThisState,
4699 v->SwathHeightYThisState,
4700 v->SwathHeightCThisState,
4701 v->DETBufferSizeYThisState,
4702 v->DETBufferSizeCThisState,
4704 &v->ViewportSizeSupport[i][j]);
4706 CalculateDCFCLKDeepSleep(
4708 v->NumberOfActivePlanes,
4713 v->SwathWidthYThisState,
4714 v->SwathWidthCThisState,
4715 v->NoOfDPPThisState,
4720 v->PSCL_FACTOR_CHROMA,
4721 v->RequiredDPPCLKThisState,
4722 v->ReadBandwidthLuma,
4723 v->ReadBandwidthChroma,
4725 &v->ProjectedDCFCLKDeepSleep[i][j]);
4727 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4728 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4729 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4730 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4731 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4732 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4733 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4734 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4735 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4740 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4741 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4742 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4745 for (i = 0; i < v->soc.num_states; i++) {
4746 for (j = 0; j < 2; j++) {
4747 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4749 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4750 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4751 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4752 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4753 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4754 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4755 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4756 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4757 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4760 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4761 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4762 if (v->DCCEnable[k] == true) {
4763 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4767 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4768 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4769 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4771 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4772 && v->SourceScan[k] != dm_vert) {
4773 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4775 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4777 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4778 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4781 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4784 v->Read256BlockHeightC[k],
4785 v->Read256BlockWidthC[k],
4786 v->SourcePixelFormat[k],
4787 v->SurfaceTiling[k],
4788 v->BytePerPixelC[k],
4790 v->SwathWidthCThisState[k],
4791 v->ViewportHeightChroma[k],
4794 v->HostVMMaxNonCachedPageTableLevels,
4795 v->GPUVMMinPageSize,
4796 v->HostVMMinPageSize,
4797 v->PTEBufferSizeInRequestsForChroma,
4800 &v->MacroTileWidthC[k],
4802 &v->DPTEBytesPerRowC,
4803 &v->PTEBufferSizeNotExceededC[i][j][k],
4805 &v->dpte_row_height_chroma[k],
4809 &v->meta_row_height_chroma[k],
4816 &v->dummyinteger11);
4818 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4823 v->ProgressiveToInterlaceUnitInOPP,
4824 v->SwathHeightCThisState[k],
4825 v->ViewportYStartC[k],
4829 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4830 v->PTEBufferSizeInRequestsForChroma = 0;
4831 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4832 v->MetaRowBytesC = 0.0;
4833 v->DPTEBytesPerRowC = 0.0;
4834 v->PrefetchLinesC[i][j][k] = 0.0;
4835 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4837 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4840 v->Read256BlockHeightY[k],
4841 v->Read256BlockWidthY[k],
4842 v->SourcePixelFormat[k],
4843 v->SurfaceTiling[k],
4844 v->BytePerPixelY[k],
4846 v->SwathWidthYThisState[k],
4847 v->ViewportHeight[k],
4850 v->HostVMMaxNonCachedPageTableLevels,
4851 v->GPUVMMinPageSize,
4852 v->HostVMMinPageSize,
4853 v->PTEBufferSizeInRequestsForLuma,
4855 v->DCCMetaPitchY[k],
4856 &v->MacroTileWidthY[k],
4858 &v->DPTEBytesPerRowY,
4859 &v->PTEBufferSizeNotExceededY[i][j][k],
4861 &v->dpte_row_height[k],
4865 &v->meta_row_height[k],
4867 &v->dpte_group_bytes[k],
4873 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4878 v->ProgressiveToInterlaceUnitInOPP,
4879 v->SwathHeightYThisState[k],
4880 v->ViewportYStartY[k],
4883 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4884 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4885 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4887 CalculateRowBandwidth(
4889 v->SourcePixelFormat[k],
4893 v->HTotal[k] / v->PixelClock[k],
4896 v->meta_row_height[k],
4897 v->meta_row_height_chroma[k],
4898 v->DPTEBytesPerRowY,
4899 v->DPTEBytesPerRowC,
4900 v->dpte_row_height[k],
4901 v->dpte_row_height_chroma[k],
4902 &v->meta_row_bandwidth[i][j][k],
4903 &v->dpte_row_bandwidth[i][j][k]);
4905 /*DCCMetaBufferSizeSupport(i, j) = True
4906 For k = 0 To NumberOfActivePlanes - 1
4907 If MetaRowBytes(i, j, k) > 24064 Then
4908 DCCMetaBufferSizeSupport(i, j) = False
4911 v->DCCMetaBufferSizeSupport[i][j] = true;
4912 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4913 if (v->MetaRowBytes[i][j][k] > 24064)
4914 v->DCCMetaBufferSizeSupport[i][j] = false;
4916 v->UrgLatency[i] = CalculateUrgentLatency(
4917 v->UrgentLatencyPixelDataOnly,
4918 v->UrgentLatencyPixelMixedWithVMData,
4919 v->UrgentLatencyVMDataOnly,
4920 v->DoUrgentLatencyAdjustment,
4921 v->UrgentLatencyAdjustmentFabricClockComponent,
4922 v->UrgentLatencyAdjustmentFabricClockReference,
4923 v->FabricClockPerState[i]);
4925 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4926 CalculateUrgentBurstFactor(
4927 v->swath_width_luma_ub_this_state[k],
4928 v->swath_width_chroma_ub_this_state[k],
4929 v->SwathHeightYThisState[k],
4930 v->SwathHeightCThisState[k],
4931 v->HTotal[k] / v->PixelClock[k],
4933 v->CursorBufferSize,
4934 v->CursorWidth[k][0],
4938 v->BytePerPixelInDETY[k],
4939 v->BytePerPixelInDETC[k],
4940 v->DETBufferSizeYThisState[k],
4941 v->DETBufferSizeCThisState[k],
4942 &v->UrgentBurstFactorCursor[k],
4943 &v->UrgentBurstFactorLuma[k],
4944 &v->UrgentBurstFactorChroma[k],
4945 &NotUrgentLatencyHiding[k]);
4948 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4949 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4950 if (NotUrgentLatencyHiding[k]) {
4951 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4955 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4956 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4957 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4958 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4961 v->TotalVActivePixelBandwidth[i][j] = 0;
4962 v->TotalVActiveCursorBandwidth[i][j] = 0;
4963 v->TotalMetaRowBandwidth[i][j] = 0;
4964 v->TotalDPTERowBandwidth[i][j] = 0;
4965 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4966 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4967 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4968 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4969 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4974 //Calculate Return BW
4975 for (i = 0; i < v->soc.num_states; ++i) {
4976 for (j = 0; j <= 1; ++j) {
4977 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4978 if (v->BlendingAndTiming[k] == k) {
4979 if (v->WritebackEnable[k] == true) {
4980 v->WritebackDelayTime[k] = v->WritebackLatency
4981 + CalculateWriteBackDelay(
4982 v->WritebackPixelFormat[k],
4983 v->WritebackHRatio[k],
4984 v->WritebackVRatio[k],
4985 v->WritebackVTaps[k],
4986 v->WritebackDestinationWidth[k],
4987 v->WritebackDestinationHeight[k],
4988 v->WritebackSourceHeight[k],
4989 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4991 v->WritebackDelayTime[k] = 0.0;
4993 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4994 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4995 v->WritebackDelayTime[k] = dml_max(
4996 v->WritebackDelayTime[k],
4998 + CalculateWriteBackDelay(
4999 v->WritebackPixelFormat[m],
5000 v->WritebackHRatio[m],
5001 v->WritebackVRatio[m],
5002 v->WritebackVTaps[m],
5003 v->WritebackDestinationWidth[m],
5004 v->WritebackDestinationHeight[m],
5005 v->WritebackSourceHeight[m],
5006 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5011 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5012 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5013 if (v->BlendingAndTiming[k] == m) {
5014 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5018 v->MaxMaxVStartup[i][j] = 0;
5019 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5020 v->MaximumVStartup[i][j][k] =
5021 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5022 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5023 v->VTotal[k] - v->VActive[k]
5027 1.0 * v->WritebackDelayTime[k]
5029 / v->PixelClock[k]),
5031 if (v->MaximumVStartup[i][j][k] > 1023)
5032 v->MaximumVStartup[i][j][k] = 1023;
5033 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5038 ReorderingBytes = v->NumberOfChannels
5040 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5041 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5042 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5044 for (i = 0; i < v->soc.num_states; ++i) {
5045 for (j = 0; j <= 1; ++j) {
5046 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5050 if (v->UseMinimumRequiredDCFCLK == true)
5051 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5053 for (i = 0; i < v->soc.num_states; ++i) {
5054 for (j = 0; j <= 1; ++j) {
5055 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5056 v->ReturnBusWidth * v->DCFCLKState[i][j],
5057 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5058 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5059 double PixelDataOnlyReturnBWPerState = dml_min(
5060 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5061 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5062 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5063 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5064 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5066 if (v->HostVMEnable != true) {
5067 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5069 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5074 //Re-ordering Buffer Support Check
5075 for (i = 0; i < v->soc.num_states; ++i) {
5076 for (j = 0; j <= 1; ++j) {
5077 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5078 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5079 v->ROBSupport[i][j] = true;
5081 v->ROBSupport[i][j] = false;
5086 //Vertical Active BW support check
5088 MaxTotalVActiveRDBandwidth = 0;
5089 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5090 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5093 for (i = 0; i < v->soc.num_states; ++i) {
5094 for (j = 0; j <= 1; ++j) {
5095 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5097 v->ReturnBusWidth * v->DCFCLKState[i][j],
5098 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5099 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5100 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5101 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5103 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5104 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5106 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5111 v->UrgentLatency = CalculateUrgentLatency(
5112 v->UrgentLatencyPixelDataOnly,
5113 v->UrgentLatencyPixelMixedWithVMData,
5114 v->UrgentLatencyVMDataOnly,
5115 v->DoUrgentLatencyAdjustment,
5116 v->UrgentLatencyAdjustmentFabricClockComponent,
5117 v->UrgentLatencyAdjustmentFabricClockReference,
5120 for (i = 0; i < v->soc.num_states; ++i) {
5121 for (j = 0; j <= 1; ++j) {
5122 double VMDataOnlyReturnBWPerState;
5123 double HostVMInefficiencyFactor = 1;
5124 int NextPrefetchModeState = MinPrefetchMode;
5125 bool UnboundedRequestEnabledThisState = false;
5126 int CompressedBufferSizeInkByteThisState = 0;
5129 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5131 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5132 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5133 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5134 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5137 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5138 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5139 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5140 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5141 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5142 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5143 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5144 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5145 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5146 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5149 VMDataOnlyReturnBWPerState = dml_min(
5151 v->ReturnBusWidth * v->DCFCLKState[i][j],
5152 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5153 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5154 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5155 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5156 if (v->GPUVMEnable && v->HostVMEnable)
5157 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5159 v->ExtraLatency = CalculateExtraLatency(
5160 v->RoundTripPingLatencyCycles,
5162 v->DCFCLKState[i][j],
5163 v->TotalNumberOfActiveDPP[i][j],
5164 v->PixelChunkSizeInKByte,
5165 v->TotalNumberOfDCCActiveDPP[i][j],
5167 v->ReturnBWPerState[i][j],
5170 v->NumberOfActivePlanes,
5171 v->NoOfDPPThisState,
5172 v->dpte_group_bytes,
5173 HostVMInefficiencyFactor,
5174 v->HostVMMinPageSize,
5175 v->HostVMMaxNonCachedPageTableLevels);
5177 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5179 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5180 v->MaxVStartup = v->NextMaxVStartup;
5182 v->TWait = CalculateTWait(
5183 v->PrefetchModePerState[i][j],
5184 v->DRAMClockChangeLatency,
5186 v->SREnterPlusExitTime);
5188 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5189 CalculatePrefetchSchedulePerPlane(mode_lib,
5190 HostVMInefficiencyFactor,
5194 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5195 CalculateUrgentBurstFactor(
5196 v->swath_width_luma_ub_this_state[k],
5197 v->swath_width_chroma_ub_this_state[k],
5198 v->SwathHeightYThisState[k],
5199 v->SwathHeightCThisState[k],
5200 v->HTotal[k] / v->PixelClock[k],
5202 v->CursorBufferSize,
5203 v->CursorWidth[k][0],
5205 v->VRatioPreY[i][j][k],
5206 v->VRatioPreC[i][j][k],
5207 v->BytePerPixelInDETY[k],
5208 v->BytePerPixelInDETC[k],
5209 v->DETBufferSizeYThisState[k],
5210 v->DETBufferSizeCThisState[k],
5211 &v->UrgentBurstFactorCursorPre[k],
5212 &v->UrgentBurstFactorLumaPre[k],
5213 &v->UrgentBurstFactorChroma[k],
5214 &v->NotUrgentLatencyHidingPre[k]);
5217 v->MaximumReadBandwidthWithPrefetch = 0.0;
5218 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5219 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5220 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5222 v->MaximumReadBandwidthWithPrefetch =
5223 v->MaximumReadBandwidthWithPrefetch
5225 v->VActivePixelBandwidth[i][j][k]
5226 + v->VActiveCursorBandwidth[i][j][k]
5227 + v->NoOfDPP[i][j][k]
5228 * (v->meta_row_bandwidth[i][j][k]
5229 + v->dpte_row_bandwidth[i][j][k]),
5230 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5232 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5233 * v->UrgentBurstFactorLumaPre[k]
5234 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5235 * v->UrgentBurstFactorChromaPre[k])
5236 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5239 v->NotEnoughUrgentLatencyHidingPre = false;
5240 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5241 if (v->NotUrgentLatencyHidingPre[k] == true) {
5242 v->NotEnoughUrgentLatencyHidingPre = true;
5246 v->PrefetchSupported[i][j] = true;
5247 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5248 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5249 v->PrefetchSupported[i][j] = false;
5251 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5252 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5253 || v->NoTimeForPrefetch[i][j][k] == true) {
5254 v->PrefetchSupported[i][j] = false;
5258 v->DynamicMetadataSupported[i][j] = true;
5259 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5260 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5261 v->DynamicMetadataSupported[i][j] = false;
5265 v->VRatioInPrefetchSupported[i][j] = true;
5266 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5267 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5268 v->VRatioInPrefetchSupported[i][j] = false;
5271 v->AnyLinesForVMOrRowTooLarge = false;
5272 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5273 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5274 v->AnyLinesForVMOrRowTooLarge = true;
5278 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5280 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5281 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5282 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5283 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5285 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5287 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5288 * v->UrgentBurstFactorLumaPre[k]
5289 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5290 * v->UrgentBurstFactorChromaPre[k])
5291 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5293 v->TotImmediateFlipBytes = 0.0;
5294 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5295 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5296 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5297 + v->DPTEBytesPerRow[i][j][k];
5300 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5301 CalculateFlipSchedule(
5303 HostVMInefficiencyFactor,
5306 v->GPUVMMaxPageTableLevels,
5308 v->HostVMMaxNonCachedPageTableLevels,
5310 v->HostVMMinPageSize,
5311 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5312 v->MetaRowBytes[i][j][k],
5313 v->DPTEBytesPerRow[i][j][k],
5314 v->BandwidthAvailableForImmediateFlip,
5315 v->TotImmediateFlipBytes,
5316 v->SourcePixelFormat[k],
5317 v->HTotal[k] / v->PixelClock[k],
5322 v->dpte_row_height[k],
5323 v->meta_row_height[k],
5324 v->dpte_row_height_chroma[k],
5325 v->meta_row_height_chroma[k],
5326 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5327 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5328 &v->final_flip_bw[k],
5329 &v->ImmediateFlipSupportedForPipe[k]);
5331 v->total_dcn_read_bw_with_flip = 0.0;
5332 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5333 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5335 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5336 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5337 + v->VActiveCursorBandwidth[i][j][k],
5339 * (v->final_flip_bw[k]
5340 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5341 * v->UrgentBurstFactorLumaPre[k]
5342 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5343 * v->UrgentBurstFactorChromaPre[k])
5344 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5346 v->ImmediateFlipSupportedForState[i][j] = true;
5347 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5348 v->ImmediateFlipSupportedForState[i][j] = false;
5350 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5351 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5352 v->ImmediateFlipSupportedForState[i][j] = false;
5356 v->ImmediateFlipSupportedForState[i][j] = false;
5359 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5360 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5361 NextPrefetchModeState = NextPrefetchModeState + 1;
5363 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5365 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5366 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5367 && ((v->HostVMEnable == false &&
5368 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5369 || v->ImmediateFlipSupportedForState[i][j] == true))
5370 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5372 CalculateUnboundedRequestAndCompressedBufferSize(
5373 v->DETBufferSizeInKByte[0],
5374 v->ConfigReturnBufferSizeInKByte,
5375 v->UseUnboundedRequesting,
5376 v->TotalNumberOfActiveDPP[i][j],
5379 v->CompressedBufferSegmentSizeInkByte,
5381 &UnboundedRequestEnabledThisState,
5382 &CompressedBufferSizeInkByteThisState);
5384 CalculateWatermarksAndDRAMSpeedChangeSupport(
5386 v->PrefetchModePerState[i][j],
5387 v->NumberOfActivePlanes,
5388 v->MaxLineBufferLines,
5390 v->WritebackInterfaceBufferSize,
5391 v->DCFCLKState[i][j],
5392 v->ReturnBWPerState[i][j],
5393 v->SynchronizedVBlank,
5394 v->dpte_group_bytes,
5398 v->WritebackLatency,
5399 v->WritebackChunkSize,
5400 v->SOCCLKPerState[i],
5401 v->DRAMClockChangeLatency,
5403 v->SREnterPlusExitTime,
5405 v->SREnterPlusExitZ8Time,
5406 v->ProjectedDCFCLKDeepSleep[i][j],
5407 v->DETBufferSizeYThisState,
5408 v->DETBufferSizeCThisState,
5409 v->SwathHeightYThisState,
5410 v->SwathHeightCThisState,
5412 v->SwathWidthYThisState,
5413 v->SwathWidthCThisState,
5422 v->BlendingAndTiming,
5423 v->NoOfDPPThisState,
5424 v->BytePerPixelInDETY,
5425 v->BytePerPixelInDETC,
5429 v->WritebackPixelFormat,
5430 v->WritebackDestinationWidth,
5431 v->WritebackDestinationHeight,
5432 v->WritebackSourceHeight,
5433 UnboundedRequestEnabledThisState,
5434 CompressedBufferSizeInkByteThisState,
5435 &v->DRAMClockChangeSupport[i][j],
5436 &v->UrgentWatermark,
5437 &v->WritebackUrgentWatermark,
5438 &v->DRAMClockChangeWatermark,
5439 &v->WritebackDRAMClockChangeWatermark,
5444 &v->MinActiveDRAMClockChangeLatencySupported);
5448 /*PTE Buffer Size Check*/
5449 for (i = 0; i < v->soc.num_states; i++) {
5450 for (j = 0; j < 2; j++) {
5451 v->PTEBufferSizeNotExceeded[i][j] = true;
5452 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5453 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5454 v->PTEBufferSizeNotExceeded[i][j] = false;
5460 /*Cursor Support Check*/
5461 v->CursorSupport = true;
5462 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5463 if (v->CursorWidth[k][0] > 0.0) {
5464 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5465 v->CursorSupport = false;
5470 /*Valid Pitch Check*/
5471 v->PitchSupport = true;
5472 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5473 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5474 if (v->DCCEnable[k] == true) {
5475 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5477 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5479 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5480 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5481 && v->SourcePixelFormat[k] != dm_mono_8) {
5482 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5483 if (v->DCCEnable[k] == true) {
5484 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5485 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5486 64.0 * v->Read256BlockWidthC[k]);
5488 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5491 v->AlignedCPitch[k] = v->PitchC[k];
5492 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5494 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5495 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5496 v->PitchSupport = false;
5500 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5501 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5502 ViewportExceedsSurface = true;
5503 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5504 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5505 && v->SourcePixelFormat[k] != dm_rgbe) {
5506 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5507 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5508 ViewportExceedsSurface = true;
5514 /*Mode Support, Voltage State and SOC Configuration*/
5515 for (i = v->soc.num_states - 1; i >= 0; i--) {
5516 for (j = 0; j < 2; j++) {
5517 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5518 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5519 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5520 && v->DTBCLKRequiredMoreThanSupported[i] == false
5521 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5522 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5523 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5524 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5525 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5526 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5527 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5528 && ((v->HostVMEnable == false
5529 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5530 || v->ImmediateFlipSupportedForState[i][j] == true)
5531 && FMTBufferExceeded == false) {
5532 v->ModeSupport[i][j] = true;
5534 v->ModeSupport[i][j] = false;
5540 unsigned int MaximumMPCCombine = 0;
5541 for (i = v->soc.num_states; i >= 0; i--) {
5542 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5543 v->VoltageLevel = i;
5544 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5545 if (v->ModeSupport[i][0] == true) {
5546 MaximumMPCCombine = 0;
5548 MaximumMPCCombine = 1;
5552 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5553 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5554 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5555 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5557 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5558 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5559 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5560 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5561 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5562 v->maxMpcComb = MaximumMPCCombine;
5566 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5567 struct display_mode_lib *mode_lib,
5568 unsigned int PrefetchMode,
5569 unsigned int NumberOfActivePlanes,
5570 unsigned int MaxLineBufferLines,
5571 unsigned int LineBufferSize,
5572 unsigned int WritebackInterfaceBufferSize,
5575 bool SynchronizedVBlank,
5576 unsigned int dpte_group_bytes[],
5577 unsigned int MetaChunkSize,
5578 double UrgentLatency,
5579 double ExtraLatency,
5580 double WritebackLatency,
5581 double WritebackChunkSize,
5583 double DRAMClockChangeLatency,
5585 double SREnterPlusExitTime,
5586 double SRExitZ8Time,
5587 double SREnterPlusExitZ8Time,
5588 double DCFCLKDeepSleep,
5589 unsigned int DETBufferSizeY[],
5590 unsigned int DETBufferSizeC[],
5591 unsigned int SwathHeightY[],
5592 unsigned int SwathHeightC[],
5593 unsigned int LBBitPerPixel[],
5594 double SwathWidthY[],
5595 double SwathWidthC[],
5597 double HRatioChroma[],
5598 unsigned int vtaps[],
5599 unsigned int VTAPsChroma[],
5601 double VRatioChroma[],
5602 unsigned int HTotal[],
5603 double PixelClock[],
5604 unsigned int BlendingAndTiming[],
5605 unsigned int DPPPerPlane[],
5606 double BytePerPixelDETY[],
5607 double BytePerPixelDETC[],
5608 double DSTXAfterScaler[],
5609 double DSTYAfterScaler[],
5610 bool WritebackEnable[],
5611 enum source_format_class WritebackPixelFormat[],
5612 double WritebackDestinationWidth[],
5613 double WritebackDestinationHeight[],
5614 double WritebackSourceHeight[],
5615 bool UnboundedRequestEnabled,
5616 int unsigned CompressedBufferSizeInkByte,
5617 enum clock_change_support *DRAMClockChangeSupport,
5618 double *UrgentWatermark,
5619 double *WritebackUrgentWatermark,
5620 double *DRAMClockChangeWatermark,
5621 double *WritebackDRAMClockChangeWatermark,
5622 double *StutterExitWatermark,
5623 double *StutterEnterPlusExitWatermark,
5624 double *Z8StutterExitWatermark,
5625 double *Z8StutterEnterPlusExitWatermark,
5626 double *MinActiveDRAMClockChangeLatencySupported)
5628 struct vba_vars_st *v = &mode_lib->vba;
5629 double EffectiveLBLatencyHidingY;
5630 double EffectiveLBLatencyHidingC;
5631 double LinesInDETY[DC__NUM_DPP__MAX];
5633 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5634 unsigned int LinesInDETCRoundedDownToSwath;
5635 double FullDETBufferingTimeY;
5636 double FullDETBufferingTimeC;
5637 double ActiveDRAMClockChangeLatencyMarginY;
5638 double ActiveDRAMClockChangeLatencyMarginC;
5639 double WritebackDRAMClockChangeLatencyMargin;
5640 double PlaneWithMinActiveDRAMClockChangeMargin;
5641 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5642 double WritebackDRAMClockChangeLatencyHiding;
5643 double TotalPixelBW = 0.0;
5646 *UrgentWatermark = UrgentLatency + ExtraLatency;
5648 #ifdef __DML_VBA_DEBUG__
5649 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5650 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5651 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
5654 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5656 #ifdef __DML_VBA_DEBUG__
5657 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
5658 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
5661 v->TotalActiveWriteback = 0;
5662 for (k = 0; k < NumberOfActivePlanes; ++k) {
5663 if (WritebackEnable[k] == true) {
5664 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5668 if (v->TotalActiveWriteback <= 1) {
5669 *WritebackUrgentWatermark = WritebackLatency;
5671 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5674 if (v->TotalActiveWriteback <= 1) {
5675 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5677 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5680 for (k = 0; k < NumberOfActivePlanes; ++k) {
5681 TotalPixelBW = TotalPixelBW
5682 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
5683 / (HTotal[k] / PixelClock[k]);
5686 for (k = 0; k < NumberOfActivePlanes; ++k) {
5687 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5689 v->LBLatencyHidingSourceLinesY = dml_min(
5690 (double) MaxLineBufferLines,
5691 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5693 v->LBLatencyHidingSourceLinesC = dml_min(
5694 (double) MaxLineBufferLines,
5695 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5697 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5699 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5701 if (UnboundedRequestEnabled) {
5702 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5703 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
5706 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5707 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5708 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5709 if (BytePerPixelDETC[k] > 0) {
5710 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5711 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5712 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5715 FullDETBufferingTimeC = 999999;
5718 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5719 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5721 if (NumberOfActivePlanes > 1) {
5722 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5723 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5726 if (BytePerPixelDETC[k] > 0) {
5727 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5728 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5730 if (NumberOfActivePlanes > 1) {
5731 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5732 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5734 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5736 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5739 if (WritebackEnable[k] == true) {
5740 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
5741 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5742 if (WritebackPixelFormat[k] == dm_444_64) {
5743 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5745 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5746 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5750 v->MinActiveDRAMClockChangeMargin = 999999;
5751 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5752 for (k = 0; k < NumberOfActivePlanes; ++k) {
5753 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5754 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5755 if (BlendingAndTiming[k] == k) {
5756 PlaneWithMinActiveDRAMClockChangeMargin = k;
5758 for (j = 0; j < NumberOfActivePlanes; ++j) {
5759 if (BlendingAndTiming[k] == j) {
5760 PlaneWithMinActiveDRAMClockChangeMargin = j;
5767 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5769 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5770 for (k = 0; k < NumberOfActivePlanes; ++k) {
5771 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5772 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5773 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5777 v->TotalNumberOfActiveOTG = 0;
5779 for (k = 0; k < NumberOfActivePlanes; ++k) {
5780 if (BlendingAndTiming[k] == k) {
5781 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5785 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5786 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5787 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5788 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5789 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5791 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5794 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5795 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5796 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5797 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5799 #ifdef __DML_VBA_DEBUG__
5800 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5801 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5802 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5803 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5807 static void CalculateDCFCLKDeepSleep(
5808 struct display_mode_lib *mode_lib,
5809 unsigned int NumberOfActivePlanes,
5810 int BytePerPixelY[],
5811 int BytePerPixelC[],
5813 double VRatioChroma[],
5814 double SwathWidthY[],
5815 double SwathWidthC[],
5816 unsigned int DPPPerPlane[],
5818 double HRatioChroma[],
5819 double PixelClock[],
5820 double PSCL_THROUGHPUT[],
5821 double PSCL_THROUGHPUT_CHROMA[],
5823 double ReadBandwidthLuma[],
5824 double ReadBandwidthChroma[],
5826 double *DCFCLKDeepSleep)
5828 struct vba_vars_st *v = &mode_lib->vba;
5829 double DisplayPipeLineDeliveryTimeLuma;
5830 double DisplayPipeLineDeliveryTimeChroma;
5831 double ReadBandwidth = 0.0;
5834 for (k = 0; k < NumberOfActivePlanes; ++k) {
5836 if (VRatio[k] <= 1) {
5837 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5839 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5841 if (BytePerPixelC[k] == 0) {
5842 DisplayPipeLineDeliveryTimeChroma = 0;
5844 if (VRatioChroma[k] <= 1) {
5845 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5847 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5851 if (BytePerPixelC[k] > 0) {
5852 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5853 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5855 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5857 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5861 for (k = 0; k < NumberOfActivePlanes; ++k) {
5862 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5865 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5867 for (k = 0; k < NumberOfActivePlanes; ++k) {
5868 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5872 static void CalculateUrgentBurstFactor(
5873 int swath_width_luma_ub,
5874 int swath_width_chroma_ub,
5875 unsigned int SwathHeightY,
5876 unsigned int SwathHeightC,
5878 double UrgentLatency,
5879 double CursorBufferSize,
5880 unsigned int CursorWidth,
5881 unsigned int CursorBPP,
5884 double BytePerPixelInDETY,
5885 double BytePerPixelInDETC,
5886 double DETBufferSizeY,
5887 double DETBufferSizeC,
5888 double *UrgentBurstFactorCursor,
5889 double *UrgentBurstFactorLuma,
5890 double *UrgentBurstFactorChroma,
5891 bool *NotEnoughUrgentLatencyHiding)
5893 double LinesInDETLuma;
5894 double LinesInDETChroma;
5895 unsigned int LinesInCursorBuffer;
5896 double CursorBufferSizeInTime;
5897 double DETBufferSizeInTimeLuma;
5898 double DETBufferSizeInTimeChroma;
5900 *NotEnoughUrgentLatencyHiding = 0;
5902 if (CursorWidth > 0) {
5903 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5905 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5906 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5907 *NotEnoughUrgentLatencyHiding = 1;
5908 *UrgentBurstFactorCursor = 0;
5910 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5913 *UrgentBurstFactorCursor = 1;
5917 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5919 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5920 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5921 *NotEnoughUrgentLatencyHiding = 1;
5922 *UrgentBurstFactorLuma = 0;
5924 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5927 *UrgentBurstFactorLuma = 1;
5930 if (BytePerPixelInDETC > 0) {
5931 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5933 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5934 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5935 *NotEnoughUrgentLatencyHiding = 1;
5936 *UrgentBurstFactorChroma = 0;
5938 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5941 *UrgentBurstFactorChroma = 1;
5946 static void CalculatePixelDeliveryTimes(
5947 unsigned int NumberOfActivePlanes,
5949 double VRatioChroma[],
5950 double VRatioPrefetchY[],
5951 double VRatioPrefetchC[],
5952 unsigned int swath_width_luma_ub[],
5953 unsigned int swath_width_chroma_ub[],
5954 unsigned int DPPPerPlane[],
5956 double HRatioChroma[],
5957 double PixelClock[],
5958 double PSCL_THROUGHPUT[],
5959 double PSCL_THROUGHPUT_CHROMA[],
5961 int BytePerPixelC[],
5962 enum scan_direction_class SourceScan[],
5963 unsigned int NumberOfCursors[],
5964 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5965 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5966 unsigned int BlockWidth256BytesY[],
5967 unsigned int BlockHeight256BytesY[],
5968 unsigned int BlockWidth256BytesC[],
5969 unsigned int BlockHeight256BytesC[],
5970 double DisplayPipeLineDeliveryTimeLuma[],
5971 double DisplayPipeLineDeliveryTimeChroma[],
5972 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5973 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5974 double DisplayPipeRequestDeliveryTimeLuma[],
5975 double DisplayPipeRequestDeliveryTimeChroma[],
5976 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5977 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5978 double CursorRequestDeliveryTime[],
5979 double CursorRequestDeliveryTimePrefetch[])
5981 double req_per_swath_ub;
5984 for (k = 0; k < NumberOfActivePlanes; ++k) {
5985 if (VRatio[k] <= 1) {
5986 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5988 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5991 if (BytePerPixelC[k] == 0) {
5992 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5994 if (VRatioChroma[k] <= 1) {
5995 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5997 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6001 if (VRatioPrefetchY[k] <= 1) {
6002 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6004 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6007 if (BytePerPixelC[k] == 0) {
6008 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6010 if (VRatioPrefetchC[k] <= 1) {
6011 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6013 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6018 for (k = 0; k < NumberOfActivePlanes; ++k) {
6019 if (SourceScan[k] != dm_vert) {
6020 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6022 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6024 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6025 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6026 if (BytePerPixelC[k] == 0) {
6027 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6028 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6030 if (SourceScan[k] != dm_vert) {
6031 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6033 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6035 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6036 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6038 #ifdef __DML_VBA_DEBUG__
6039 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6040 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6041 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6042 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6043 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6044 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6045 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6046 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6047 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6048 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6049 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6050 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6054 for (k = 0; k < NumberOfActivePlanes; ++k) {
6055 int cursor_req_per_width;
6056 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6057 if (NumberOfCursors[k] > 0) {
6058 if (VRatio[k] <= 1) {
6059 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6061 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6063 if (VRatioPrefetchY[k] <= 1) {
6064 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6066 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6069 CursorRequestDeliveryTime[k] = 0;
6070 CursorRequestDeliveryTimePrefetch[k] = 0;
6072 #ifdef __DML_VBA_DEBUG__
6073 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6074 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6075 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6080 static void CalculateMetaAndPTETimes(
6081 int NumberOfActivePlanes,
6084 int MinMetaChunkSizeBytes,
6087 double VRatioChroma[],
6088 double DestinationLinesToRequestRowInVBlank[],
6089 double DestinationLinesToRequestRowInImmediateFlip[],
6091 double PixelClock[],
6092 int BytePerPixelY[],
6093 int BytePerPixelC[],
6094 enum scan_direction_class SourceScan[],
6095 int dpte_row_height[],
6096 int dpte_row_height_chroma[],
6097 int meta_row_width[],
6098 int meta_row_width_chroma[],
6099 int meta_row_height[],
6100 int meta_row_height_chroma[],
6101 int meta_req_width[],
6102 int meta_req_width_chroma[],
6103 int meta_req_height[],
6104 int meta_req_height_chroma[],
6105 int dpte_group_bytes[],
6106 int PTERequestSizeY[],
6107 int PTERequestSizeC[],
6108 int PixelPTEReqWidthY[],
6109 int PixelPTEReqHeightY[],
6110 int PixelPTEReqWidthC[],
6111 int PixelPTEReqHeightC[],
6112 int dpte_row_width_luma_ub[],
6113 int dpte_row_width_chroma_ub[],
6114 double DST_Y_PER_PTE_ROW_NOM_L[],
6115 double DST_Y_PER_PTE_ROW_NOM_C[],
6116 double DST_Y_PER_META_ROW_NOM_L[],
6117 double DST_Y_PER_META_ROW_NOM_C[],
6118 double TimePerMetaChunkNominal[],
6119 double TimePerChromaMetaChunkNominal[],
6120 double TimePerMetaChunkVBlank[],
6121 double TimePerChromaMetaChunkVBlank[],
6122 double TimePerMetaChunkFlip[],
6123 double TimePerChromaMetaChunkFlip[],
6124 double time_per_pte_group_nom_luma[],
6125 double time_per_pte_group_vblank_luma[],
6126 double time_per_pte_group_flip_luma[],
6127 double time_per_pte_group_nom_chroma[],
6128 double time_per_pte_group_vblank_chroma[],
6129 double time_per_pte_group_flip_chroma[])
6131 unsigned int meta_chunk_width;
6132 unsigned int min_meta_chunk_width;
6133 unsigned int meta_chunk_per_row_int;
6134 unsigned int meta_row_remainder;
6135 unsigned int meta_chunk_threshold;
6136 unsigned int meta_chunks_per_row_ub;
6137 unsigned int meta_chunk_width_chroma;
6138 unsigned int min_meta_chunk_width_chroma;
6139 unsigned int meta_chunk_per_row_int_chroma;
6140 unsigned int meta_row_remainder_chroma;
6141 unsigned int meta_chunk_threshold_chroma;
6142 unsigned int meta_chunks_per_row_ub_chroma;
6143 unsigned int dpte_group_width_luma;
6144 unsigned int dpte_groups_per_row_luma_ub;
6145 unsigned int dpte_group_width_chroma;
6146 unsigned int dpte_groups_per_row_chroma_ub;
6149 for (k = 0; k < NumberOfActivePlanes; ++k) {
6150 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6151 if (BytePerPixelC[k] == 0) {
6152 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6154 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6156 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6157 if (BytePerPixelC[k] == 0) {
6158 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6160 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6164 for (k = 0; k < NumberOfActivePlanes; ++k) {
6165 if (DCCEnable[k] == true) {
6166 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6167 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6168 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6169 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6170 if (SourceScan[k] != dm_vert) {
6171 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6173 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6175 if (meta_row_remainder <= meta_chunk_threshold) {
6176 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6178 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6180 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6181 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6182 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6183 if (BytePerPixelC[k] == 0) {
6184 TimePerChromaMetaChunkNominal[k] = 0;
6185 TimePerChromaMetaChunkVBlank[k] = 0;
6186 TimePerChromaMetaChunkFlip[k] = 0;
6188 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6189 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6190 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6191 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6192 if (SourceScan[k] != dm_vert) {
6193 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6195 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6197 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6198 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6200 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6202 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6203 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6204 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6207 TimePerMetaChunkNominal[k] = 0;
6208 TimePerMetaChunkVBlank[k] = 0;
6209 TimePerMetaChunkFlip[k] = 0;
6210 TimePerChromaMetaChunkNominal[k] = 0;
6211 TimePerChromaMetaChunkVBlank[k] = 0;
6212 TimePerChromaMetaChunkFlip[k] = 0;
6216 for (k = 0; k < NumberOfActivePlanes; ++k) {
6217 if (GPUVMEnable == true) {
6218 if (SourceScan[k] != dm_vert) {
6219 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6221 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6223 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6224 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6225 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6226 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6227 if (BytePerPixelC[k] == 0) {
6228 time_per_pte_group_nom_chroma[k] = 0;
6229 time_per_pte_group_vblank_chroma[k] = 0;
6230 time_per_pte_group_flip_chroma[k] = 0;
6232 if (SourceScan[k] != dm_vert) {
6233 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6235 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6237 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6238 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6239 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6240 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6243 time_per_pte_group_nom_luma[k] = 0;
6244 time_per_pte_group_vblank_luma[k] = 0;
6245 time_per_pte_group_flip_luma[k] = 0;
6246 time_per_pte_group_nom_chroma[k] = 0;
6247 time_per_pte_group_vblank_chroma[k] = 0;
6248 time_per_pte_group_flip_chroma[k] = 0;
6253 static void CalculateVMGroupAndRequestTimes(
6254 unsigned int NumberOfActivePlanes,
6256 unsigned int GPUVMMaxPageTableLevels,
6257 unsigned int HTotal[],
6258 int BytePerPixelC[],
6259 double DestinationLinesToRequestVMInVBlank[],
6260 double DestinationLinesToRequestVMInImmediateFlip[],
6262 double PixelClock[],
6263 int dpte_row_width_luma_ub[],
6264 int dpte_row_width_chroma_ub[],
6265 int vm_group_bytes[],
6266 unsigned int dpde0_bytes_per_frame_ub_l[],
6267 unsigned int dpde0_bytes_per_frame_ub_c[],
6268 int meta_pte_bytes_per_frame_ub_l[],
6269 int meta_pte_bytes_per_frame_ub_c[],
6270 double TimePerVMGroupVBlank[],
6271 double TimePerVMGroupFlip[],
6272 double TimePerVMRequestVBlank[],
6273 double TimePerVMRequestFlip[])
6275 int num_group_per_lower_vm_stage;
6276 int num_req_per_lower_vm_stage;
6279 for (k = 0; k < NumberOfActivePlanes; ++k) {
6280 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6281 if (DCCEnable[k] == false) {
6282 if (BytePerPixelC[k] > 0) {
6283 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6284 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6286 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6289 if (GPUVMMaxPageTableLevels == 1) {
6290 if (BytePerPixelC[k] > 0) {
6291 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6292 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6294 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6297 if (BytePerPixelC[k] > 0) {
6298 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6299 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6300 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6301 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6303 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6304 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6309 if (DCCEnable[k] == false) {
6310 if (BytePerPixelC[k] > 0) {
6311 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6313 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6316 if (GPUVMMaxPageTableLevels == 1) {
6317 if (BytePerPixelC[k] > 0) {
6318 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6320 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6323 if (BytePerPixelC[k] > 0) {
6324 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6325 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6327 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6332 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6333 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6334 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6335 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6337 if (GPUVMMaxPageTableLevels > 2) {
6338 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6339 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6340 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6341 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6345 TimePerVMGroupVBlank[k] = 0;
6346 TimePerVMGroupFlip[k] = 0;
6347 TimePerVMRequestVBlank[k] = 0;
6348 TimePerVMRequestFlip[k] = 0;
6353 static void CalculateStutterEfficiency(
6354 struct display_mode_lib *mode_lib,
6355 int CompressedBufferSizeInkByte,
6356 bool UnboundedRequestEnabled,
6357 int ConfigReturnBufferSizeInKByte,
6358 int MetaFIFOSizeInKEntries,
6359 int ZeroSizeBufferEntries,
6360 int NumberOfActivePlanes,
6361 int ROBBufferSizeInKByte,
6362 double TotalDataReadBandwidth,
6365 double COMPBUF_RESERVED_SPACE_64B,
6366 double COMPBUF_RESERVED_SPACE_ZS,
6368 double SRExitZ8Time,
6369 bool SynchronizedVBlank,
6370 double Z8StutterEnterPlusExitWatermark,
6371 double StutterEnterPlusExitWatermark,
6372 bool ProgressiveToInterlaceUnitInOPP,
6374 double MinTTUVBlank[],
6376 unsigned int DETBufferSizeY[],
6377 int BytePerPixelY[],
6378 double BytePerPixelDETY[],
6379 double SwathWidthY[],
6382 double NetDCCRateLuma[],
6383 double NetDCCRateChroma[],
6384 double DCCFractionOfZeroSizeRequestsLuma[],
6385 double DCCFractionOfZeroSizeRequestsChroma[],
6388 double PixelClock[],
6390 enum scan_direction_class SourceScan[],
6391 int BlockHeight256BytesY[],
6392 int BlockWidth256BytesY[],
6393 int BlockHeight256BytesC[],
6394 int BlockWidth256BytesC[],
6395 int DCCYMaxUncompressedBlock[],
6396 int DCCCMaxUncompressedBlock[],
6399 bool WritebackEnable[],
6400 double ReadBandwidthPlaneLuma[],
6401 double ReadBandwidthPlaneChroma[],
6402 double meta_row_bw[],
6403 double dpte_row_bw[],
6404 double *StutterEfficiencyNotIncludingVBlank,
6405 double *StutterEfficiency,
6406 int *NumberOfStutterBurstsPerFrame,
6407 double *Z8StutterEfficiencyNotIncludingVBlank,
6408 double *Z8StutterEfficiency,
6409 int *Z8NumberOfStutterBurstsPerFrame,
6410 double *StutterPeriod)
6412 struct vba_vars_st *v = &mode_lib->vba;
6414 double DETBufferingTimeY;
6415 double SwathWidthYCriticalPlane = 0;
6416 double VActiveTimeCriticalPlane = 0;
6417 double FrameTimeCriticalPlane = 0;
6418 int BytePerPixelYCriticalPlane = 0;
6419 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6420 double MinTTUVBlankCriticalPlane = 0;
6421 double TotalCompressedReadBandwidth;
6422 double TotalRowReadBandwidth;
6423 double AverageDCCCompressionRate;
6424 double EffectiveCompressedBufferSize;
6425 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6426 double StutterBurstTime;
6427 int TotalActiveWriteback;
6429 double LinesInDETYRoundedDownToSwath;
6430 double MaximumEffectiveCompressionLuma;
6431 double MaximumEffectiveCompressionChroma;
6432 double TotalZeroSizeRequestReadBandwidth;
6433 double TotalZeroSizeCompressedReadBandwidth;
6434 double AverageDCCZeroSizeFraction;
6435 double AverageZeroSizeCompressionRate;
6436 int TotalNumberOfActiveOTG = 0;
6437 double LastStutterPeriod = 0.0;
6438 double LastZ8StutterPeriod = 0.0;
6441 TotalZeroSizeRequestReadBandwidth = 0;
6442 TotalZeroSizeCompressedReadBandwidth = 0;
6443 TotalRowReadBandwidth = 0;
6444 TotalCompressedReadBandwidth = 0;
6446 for (k = 0; k < NumberOfActivePlanes; ++k) {
6447 if (DCCEnable[k] == true) {
6448 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6449 || DCCYMaxUncompressedBlock[k] < 256) {
6450 MaximumEffectiveCompressionLuma = 2;
6452 MaximumEffectiveCompressionLuma = 4;
6454 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6455 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6456 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6457 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6458 if (ReadBandwidthPlaneChroma[k] > 0) {
6459 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6460 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6461 MaximumEffectiveCompressionChroma = 2;
6463 MaximumEffectiveCompressionChroma = 4;
6465 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6466 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6467 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6468 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6469 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6472 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6474 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6477 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6478 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6480 #ifdef __DML_VBA_DEBUG__
6481 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6482 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6483 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6484 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6485 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6486 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6487 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6488 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6491 if (AverageDCCZeroSizeFraction == 1) {
6492 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6493 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6494 } else if (AverageDCCZeroSizeFraction > 0) {
6495 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6496 EffectiveCompressedBufferSize = dml_min(
6497 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6498 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6499 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6500 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6501 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6503 "DML::%s: min 2 = %f\n",
6505 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6506 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6507 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6509 EffectiveCompressedBufferSize = dml_min(
6510 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6511 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6512 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6513 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6516 #ifdef __DML_VBA_DEBUG__
6517 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6518 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6519 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6523 for (k = 0; k < NumberOfActivePlanes; ++k) {
6524 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6525 / BytePerPixelDETY[k] / SwathWidthY[k];
6526 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6527 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6528 #ifdef __DML_VBA_DEBUG__
6529 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6530 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6531 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6532 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6533 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6534 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6535 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6536 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6537 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6538 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6539 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6540 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6543 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6544 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6546 *StutterPeriod = DETBufferingTimeY;
6547 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6548 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6549 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6550 SwathWidthYCriticalPlane = SwathWidthY[k];
6551 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6552 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6554 #ifdef __DML_VBA_DEBUG__
6555 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6556 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6557 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6558 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6559 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6560 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6561 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6566 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6567 #ifdef __DML_VBA_DEBUG__
6568 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6569 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6570 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6571 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6572 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6573 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6574 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6575 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6576 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6577 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6580 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6581 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6582 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6583 #ifdef __DML_VBA_DEBUG__
6584 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6585 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6586 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6587 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6588 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6590 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6593 "DML::%s: Time to finish residue swath=%f\n",
6595 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6597 TotalActiveWriteback = 0;
6598 for (k = 0; k < NumberOfActivePlanes; ++k) {
6599 if (WritebackEnable[k]) {
6600 TotalActiveWriteback = TotalActiveWriteback + 1;
6604 if (TotalActiveWriteback == 0) {
6605 #ifdef __DML_VBA_DEBUG__
6606 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6607 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6608 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6609 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6611 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6612 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6613 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6614 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6616 *StutterEfficiencyNotIncludingVBlank = 0.;
6617 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6618 *NumberOfStutterBurstsPerFrame = 0;
6619 *Z8NumberOfStutterBurstsPerFrame = 0;
6621 #ifdef __DML_VBA_DEBUG__
6622 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6623 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6624 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6625 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6626 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6629 for (k = 0; k < NumberOfActivePlanes; ++k) {
6630 if (v->BlendingAndTiming[k] == k) {
6631 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6635 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6636 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6638 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6639 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6640 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6642 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6645 *StutterEfficiency = 0;
6648 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6649 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6650 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6651 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6652 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6654 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6657 *Z8StutterEfficiency = 0.;
6660 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6661 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6662 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6663 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6664 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6665 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6666 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6667 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6670 static void CalculateSwathAndDETConfiguration(
6671 bool ForceSingleDPP,
6672 int NumberOfActivePlanes,
6673 unsigned int DETBufferSizeInKByte,
6674 double MaximumSwathWidthLuma[],
6675 double MaximumSwathWidthChroma[],
6676 enum scan_direction_class SourceScan[],
6677 enum source_format_class SourcePixelFormat[],
6678 enum dm_swizzle_mode SurfaceTiling[],
6679 int ViewportWidth[],
6680 int ViewportHeight[],
6681 int SurfaceWidthY[],
6682 int SurfaceWidthC[],
6683 int SurfaceHeightY[],
6684 int SurfaceHeightC[],
6685 int Read256BytesBlockHeightY[],
6686 int Read256BytesBlockHeightC[],
6687 int Read256BytesBlockWidthY[],
6688 int Read256BytesBlockWidthC[],
6689 enum odm_combine_mode ODMCombineEnabled[],
6690 int BlendingAndTiming[],
6693 double BytePerPixDETY[],
6694 double BytePerPixDETC[],
6697 double HRatioChroma[],
6699 int swath_width_luma_ub[],
6700 int swath_width_chroma_ub[],
6701 double SwathWidth[],
6702 double SwathWidthChroma[],
6705 unsigned int DETBufferSizeY[],
6706 unsigned int DETBufferSizeC[],
6707 bool ViewportSizeSupportPerPlane[],
6708 bool *ViewportSizeSupport)
6710 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6711 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6712 int MinimumSwathHeightY;
6713 int MinimumSwathHeightC;
6714 int RoundedUpMaxSwathSizeBytesY;
6715 int RoundedUpMaxSwathSizeBytesC;
6716 int RoundedUpMinSwathSizeBytesY;
6717 int RoundedUpMinSwathSizeBytesC;
6718 int RoundedUpSwathSizeBytesY;
6719 int RoundedUpSwathSizeBytesC;
6720 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6721 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6724 CalculateSwathWidth(
6726 NumberOfActivePlanes,
6738 Read256BytesBlockHeightY,
6739 Read256BytesBlockHeightC,
6740 Read256BytesBlockWidthY,
6741 Read256BytesBlockWidthC,
6746 SwathWidthSingleDPP,
6747 SwathWidthSingleDPPChroma,
6750 MaximumSwathHeightY,
6751 MaximumSwathHeightC,
6752 swath_width_luma_ub,
6753 swath_width_chroma_ub);
6755 *ViewportSizeSupport = true;
6756 for (k = 0; k < NumberOfActivePlanes; ++k) {
6757 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6758 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6759 if (SurfaceTiling[k] == dm_sw_linear
6760 || (SourcePixelFormat[k] == dm_444_64
6761 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6762 && SourceScan[k] != dm_vert)) {
6763 MinimumSwathHeightY = MaximumSwathHeightY[k];
6764 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6765 MinimumSwathHeightY = MaximumSwathHeightY[k];
6767 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6769 MinimumSwathHeightC = MaximumSwathHeightC[k];
6771 if (SurfaceTiling[k] == dm_sw_linear) {
6772 MinimumSwathHeightY = MaximumSwathHeightY[k];
6773 MinimumSwathHeightC = MaximumSwathHeightC[k];
6774 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6775 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6776 MinimumSwathHeightC = MaximumSwathHeightC[k];
6777 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6778 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6779 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6780 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6781 MinimumSwathHeightY = MaximumSwathHeightY[k];
6782 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6784 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6785 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6789 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6790 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6791 if (SourcePixelFormat[k] == dm_420_10) {
6792 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6793 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6795 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6796 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6797 if (SourcePixelFormat[k] == dm_420_10) {
6798 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6799 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6802 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6803 SwathHeightY[k] = MaximumSwathHeightY[k];
6804 SwathHeightC[k] = MaximumSwathHeightC[k];
6805 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6806 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6807 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6808 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6809 SwathHeightY[k] = MinimumSwathHeightY;
6810 SwathHeightC[k] = MaximumSwathHeightC[k];
6811 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6812 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6813 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6814 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6815 SwathHeightY[k] = MaximumSwathHeightY[k];
6816 SwathHeightC[k] = MinimumSwathHeightC;
6817 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6818 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6820 SwathHeightY[k] = MinimumSwathHeightY;
6821 SwathHeightC[k] = MinimumSwathHeightC;
6822 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6823 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6826 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6827 if (SwathHeightC[k] == 0) {
6828 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6829 DETBufferSizeC[k] = 0;
6830 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6831 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6832 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6834 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6835 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6838 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6839 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6840 *ViewportSizeSupport = false;
6841 ViewportSizeSupportPerPlane[k] = false;
6843 ViewportSizeSupportPerPlane[k] = true;
6849 static void CalculateSwathWidth(
6850 bool ForceSingleDPP,
6851 int NumberOfActivePlanes,
6852 enum source_format_class SourcePixelFormat[],
6853 enum scan_direction_class SourceScan[],
6854 int ViewportWidth[],
6855 int ViewportHeight[],
6856 int SurfaceWidthY[],
6857 int SurfaceWidthC[],
6858 int SurfaceHeightY[],
6859 int SurfaceHeightC[],
6860 enum odm_combine_mode ODMCombineEnabled[],
6863 int Read256BytesBlockHeightY[],
6864 int Read256BytesBlockHeightC[],
6865 int Read256BytesBlockWidthY[],
6866 int Read256BytesBlockWidthC[],
6867 int BlendingAndTiming[],
6871 double SwathWidthSingleDPPY[],
6872 double SwathWidthSingleDPPC[],
6873 double SwathWidthY[],
6874 double SwathWidthC[],
6875 int MaximumSwathHeightY[],
6876 int MaximumSwathHeightC[],
6877 int swath_width_luma_ub[],
6878 int swath_width_chroma_ub[])
6880 enum odm_combine_mode MainPlaneODMCombine;
6883 #ifdef __DML_VBA_DEBUG__
6884 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6887 for (k = 0; k < NumberOfActivePlanes; ++k) {
6888 if (SourceScan[k] != dm_vert) {
6889 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6891 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6894 #ifdef __DML_VBA_DEBUG__
6895 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6896 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6899 MainPlaneODMCombine = ODMCombineEnabled[k];
6900 for (j = 0; j < NumberOfActivePlanes; ++j) {
6901 if (BlendingAndTiming[k] == j) {
6902 MainPlaneODMCombine = ODMCombineEnabled[j];
6906 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6907 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6908 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6909 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6910 } else if (DPPPerPlane[k] == 2) {
6911 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6913 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6916 #ifdef __DML_VBA_DEBUG__
6917 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6918 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6921 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6922 SwathWidthC[k] = SwathWidthY[k] / 2;
6923 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6925 SwathWidthC[k] = SwathWidthY[k];
6926 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6929 if (ForceSingleDPP == true) {
6930 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6931 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6934 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6935 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6936 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6937 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6939 #ifdef __DML_VBA_DEBUG__
6940 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6943 if (SourceScan[k] != dm_vert) {
6944 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6945 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6946 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6947 if (BytePerPixC[k] > 0) {
6948 swath_width_chroma_ub[k] = dml_min(
6950 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6952 swath_width_chroma_ub[k] = 0;
6955 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6956 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6957 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6958 if (BytePerPixC[k] > 0) {
6959 swath_width_chroma_ub[k] = dml_min(
6960 surface_height_ub_c,
6961 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6963 swath_width_chroma_ub[k] = 0;
6970 static double CalculateExtraLatency(
6971 int RoundTripPingLatencyCycles,
6972 int ReorderingBytes,
6974 int TotalNumberOfActiveDPP,
6975 int PixelChunkSizeInKByte,
6976 int TotalNumberOfDCCActiveDPP,
6981 int NumberOfActivePlanes,
6983 int dpte_group_bytes[],
6984 double HostVMInefficiencyFactor,
6985 double HostVMMinPageSize,
6986 int HostVMMaxNonCachedPageTableLevels)
6988 double ExtraLatencyBytes;
6989 double ExtraLatency;
6991 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6993 TotalNumberOfActiveDPP,
6994 PixelChunkSizeInKByte,
6995 TotalNumberOfDCCActiveDPP,
6999 NumberOfActivePlanes,
7002 HostVMInefficiencyFactor,
7004 HostVMMaxNonCachedPageTableLevels);
7006 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7008 #ifdef __DML_VBA_DEBUG__
7009 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7010 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7011 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7012 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7013 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7016 return ExtraLatency;
7019 static double CalculateExtraLatencyBytes(
7020 int ReorderingBytes,
7021 int TotalNumberOfActiveDPP,
7022 int PixelChunkSizeInKByte,
7023 int TotalNumberOfDCCActiveDPP,
7027 int NumberOfActivePlanes,
7029 int dpte_group_bytes[],
7030 double HostVMInefficiencyFactor,
7031 double HostVMMinPageSize,
7032 int HostVMMaxNonCachedPageTableLevels)
7035 int HostVMDynamicLevels = 0, k;
7037 if (GPUVMEnable == true && HostVMEnable == true) {
7038 if (HostVMMinPageSize < 2048) {
7039 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7040 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
7041 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7043 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7046 HostVMDynamicLevels = 0;
7049 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7051 if (GPUVMEnable == true) {
7052 for (k = 0; k < NumberOfActivePlanes; ++k) {
7053 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7059 static double CalculateUrgentLatency(
7060 double UrgentLatencyPixelDataOnly,
7061 double UrgentLatencyPixelMixedWithVMData,
7062 double UrgentLatencyVMDataOnly,
7063 bool DoUrgentLatencyAdjustment,
7064 double UrgentLatencyAdjustmentFabricClockComponent,
7065 double UrgentLatencyAdjustmentFabricClockReference,
7070 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7071 if (DoUrgentLatencyAdjustment == true) {
7072 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7077 static void UseMinimumDCFCLK(
7078 struct display_mode_lib *mode_lib,
7079 int MaxPrefetchMode,
7080 int ReorderingBytes)
7082 struct vba_vars_st *v = &mode_lib->vba;
7083 int dummy1, i, j, k;
7084 double NormalEfficiency, dummy2, dummy3;
7085 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7087 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7088 for (i = 0; i < v->soc.num_states; ++i) {
7089 for (j = 0; j <= 1; ++j) {
7090 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7091 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7092 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7093 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7094 double MinimumTWait;
7095 double NonDPTEBandwidth;
7096 double DPTEBandwidth;
7097 double DCFCLKRequiredForAverageBandwidth;
7098 double ExtraLatencyBytes;
7099 double ExtraLatencyCycles;
7100 double DCFCLKRequiredForPeakBandwidth;
7101 int NoOfDPPState[DC__NUM_DPP__MAX];
7102 double MinimumTvmPlus2Tr0;
7104 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7105 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7106 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7107 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7110 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7111 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7114 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7115 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7116 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7117 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7118 DCFCLKRequiredForAverageBandwidth = dml_max3(
7119 v->ProjectedDCFCLKDeepSleep[i][j],
7120 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7121 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7122 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7124 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7126 v->TotalNumberOfActiveDPP[i][j],
7127 v->PixelChunkSizeInKByte,
7128 v->TotalNumberOfDCCActiveDPP[i][j],
7132 v->NumberOfActivePlanes,
7134 v->dpte_group_bytes,
7136 v->HostVMMinPageSize,
7137 v->HostVMMaxNonCachedPageTableLevels);
7138 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7139 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7140 double DCFCLKCyclesRequiredInPrefetch;
7141 double ExpectedPrefetchBWAcceleration;
7142 double PrefetchTime;
7144 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7145 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7146 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7147 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7148 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7149 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7150 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7151 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7152 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7153 DynamicMetadataVMExtraLatency[k] =
7154 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7155 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7156 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7158 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7159 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7160 - DynamicMetadataVMExtraLatency[k];
7162 if (PrefetchTime > 0) {
7163 double ExpectedVRatioPrefetch;
7164 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7165 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7166 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7167 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7168 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7169 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7170 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7173 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7175 if (v->DynamicMetadataEnable[k] == true) {
7180 double AllowedTimeForUrgentExtraLatency;
7182 CalculateVupdateAndDynamicMetadataParameters(
7183 v->MaxInterDCNTileRepeaters,
7184 v->RequiredDPPCLK[i][j][k],
7185 v->RequiredDISPCLK[i][j],
7186 v->ProjectedDCFCLKDeepSleep[i][j],
7189 v->VTotal[k] - v->VActive[k],
7190 v->DynamicMetadataTransmittedBytes[k],
7191 v->DynamicMetadataLinesBeforeActiveRequired[k],
7193 v->ProgressiveToInterlaceUnitInOPP,
7201 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7202 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7203 if (AllowedTimeForUrgentExtraLatency > 0) {
7204 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7205 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7206 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7208 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7212 DCFCLKRequiredForPeakBandwidth = 0;
7213 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7214 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7216 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7217 * (v->GPUVMEnable == true ?
7218 (v->HostVMEnable == true ?
7219 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7221 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7222 double MaximumTvmPlus2Tr0PlusTsw;
7223 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7224 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7225 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7227 DCFCLKRequiredForPeakBandwidth = dml_max3(
7228 DCFCLKRequiredForPeakBandwidth,
7229 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7230 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7233 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7238 static void CalculateUnboundedRequestAndCompressedBufferSize(
7239 unsigned int DETBufferSizeInKByte,
7240 int ConfigReturnBufferSizeInKByte,
7241 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7243 bool NoChromaPlanes,
7245 int CompressedBufferSegmentSizeInkByteFinal,
7246 enum output_encoder_class *Output,
7247 bool *UnboundedRequestEnabled,
7248 int *CompressedBufferSizeInkByte)
7250 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7252 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7253 *CompressedBufferSizeInkByte = (
7254 *UnboundedRequestEnabled == true ?
7255 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7256 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7257 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7259 #ifdef __DML_VBA_DEBUG__
7260 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7261 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7262 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7263 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7264 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7265 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7266 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7270 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7272 bool ret_val = false;
7274 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7275 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {