2 * Copyright 2022 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
30 unsigned int dml32_dscceComputeDelay(
33 unsigned int sliceWidth,
34 unsigned int numSlices,
35 enum output_format_class pixelFormat,
36 enum output_encoder_class Output)
38 // valid bpc = source bits per component in the set of {8, 10, 12}
39 // valid bpp = increments of 1/16 of a bit
40 // min = 6/7/8 in N420/N422/444, respectively
41 // max = such that compression is 1:1
42 //valid sliceWidth = number of pixels per slice line,
43 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
44 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
45 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 unsigned int rcModelSize = 8192;
50 // N422/N420 operate at 2 pixels per clock
51 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 if (pixelFormat == dm_420)
56 else if (pixelFormat == dm_n422)
58 // #all other modes operate at 1 pixel per clock
62 //initial transmit delay as per PPS
63 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
73 //divide by pixel per cycle to compute slice width as seen by DSC
74 w = sliceWidth / pixelsPerClock;
76 //422 mode has an additional cycle of delay
77 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
82 //main calculation for the dscce
83 ix = initalXmitDelay + 45;
88 ax = (a + 2) / 3 + D + 6 + 1;
89 L = (ax + wx - 1) / wx;
90 if ((ix % w) == 0 && p != 0)
94 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
96 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
97 pixels = Delay * 3 * pixelsPerClock;
99 #ifdef __DML_VBA_DEBUG__
100 dml_print("DML::%s: bpc: %d\n", __func__, bpc);
101 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
102 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
103 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
104 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
105 dml_print("DML::%s: Output: %d\n", __func__, Output);
106 dml_print("DML::%s: pixels: %d\n", __func__, pixels);
112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
114 unsigned int Delay = 0;
116 if (pixelFormat == dm_420) {
121 // dscc - input deserializer
123 // dscc gets pixels every other cycle
125 // dscc - input cdc fifo
127 // dscc gets pixels every other cycle
129 // dscc - cdc uncertainty
131 // dscc - output cdc fifo
133 // dscc gets pixels every other cycle
135 // dscc - cdc uncertainty
137 // dscc - output serializer
141 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
146 // dscc - input deserializer
148 // dscc - input cdc fifo
150 // dscc - cdc uncertainty
152 // dscc - output cdc fifo
154 // dscc - cdc uncertainty
156 // dscc - output serializer
165 // dscc - input deserializer
167 // dscc - input cdc fifo
169 // dscc - cdc uncertainty
171 // dscc - output cdc fifo
173 // dscc - output serializer
175 // dscc - cdc uncertainty
185 bool IsVertical(enum dm_rotation_angle Scan)
187 bool is_vert = false;
189 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
201 double MaxDCHUBToPSCLThroughput,
202 double MaxPSCLToLBThroughput,
204 enum source_format_class SourcePixelFormat,
206 unsigned int HTapsChroma,
208 unsigned int VTapsChroma,
211 double *PSCL_THROUGHPUT,
212 double *PSCL_THROUGHPUT_CHROMA,
213 double *DPPCLKUsingSingleDPP)
215 double DPPCLKUsingSingleDPPLuma;
216 double DPPCLKUsingSingleDPPChroma;
219 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
220 dml_ceil((double) HTaps / 6.0, 1.0));
222 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
226 *PSCL_THROUGHPUT, 1);
228 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
229 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
231 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
232 SourcePixelFormat != dm_rgbe_alpha)) {
233 *PSCL_THROUGHPUT_CHROMA = 0;
234 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
236 if (HRatioChroma > 1) {
237 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
238 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
240 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
242 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
243 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
244 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
245 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
246 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
250 void dml32_CalculateBytePerPixelAndBlockSizes(
251 enum source_format_class SourcePixelFormat,
252 enum dm_swizzle_mode SurfaceTiling,
255 unsigned int *BytePerPixelY,
256 unsigned int *BytePerPixelC,
257 double *BytePerPixelDETY,
258 double *BytePerPixelDETC,
259 unsigned int *BlockHeight256BytesY,
260 unsigned int *BlockHeight256BytesC,
261 unsigned int *BlockWidth256BytesY,
262 unsigned int *BlockWidth256BytesC,
263 unsigned int *MacroTileHeightY,
264 unsigned int *MacroTileHeightC,
265 unsigned int *MacroTileWidthY,
266 unsigned int *MacroTileWidthC)
268 if (SourcePixelFormat == dm_444_64) {
269 *BytePerPixelDETY = 8;
270 *BytePerPixelDETC = 0;
273 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
274 *BytePerPixelDETY = 4;
275 *BytePerPixelDETC = 0;
278 } else if (SourcePixelFormat == dm_444_16) {
279 *BytePerPixelDETY = 2;
280 *BytePerPixelDETC = 0;
283 } else if (SourcePixelFormat == dm_444_8) {
284 *BytePerPixelDETY = 1;
285 *BytePerPixelDETC = 0;
288 } else if (SourcePixelFormat == dm_rgbe_alpha) {
289 *BytePerPixelDETY = 4;
290 *BytePerPixelDETC = 1;
293 } else if (SourcePixelFormat == dm_420_8) {
294 *BytePerPixelDETY = 1;
295 *BytePerPixelDETC = 2;
298 } else if (SourcePixelFormat == dm_420_12) {
299 *BytePerPixelDETY = 2;
300 *BytePerPixelDETC = 4;
304 *BytePerPixelDETY = 4.0 / 3;
305 *BytePerPixelDETC = 8.0 / 3;
309 #ifdef __DML_VBA_DEBUG__
310 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
311 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
312 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
313 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
314 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
316 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
317 || SourcePixelFormat == dm_444_16
318 || SourcePixelFormat == dm_444_8
319 || SourcePixelFormat == dm_mono_16
320 || SourcePixelFormat == dm_mono_8
321 || SourcePixelFormat == dm_rgbe)) {
322 if (SurfaceTiling == dm_sw_linear)
323 *BlockHeight256BytesY = 1;
324 else if (SourcePixelFormat == dm_444_64)
325 *BlockHeight256BytesY = 4;
326 else if (SourcePixelFormat == dm_444_8)
327 *BlockHeight256BytesY = 16;
329 *BlockHeight256BytesY = 8;
331 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
332 *BlockHeight256BytesC = 0;
333 *BlockWidth256BytesC = 0;
335 if (SurfaceTiling == dm_sw_linear) {
336 *BlockHeight256BytesY = 1;
337 *BlockHeight256BytesC = 1;
338 } else if (SourcePixelFormat == dm_rgbe_alpha) {
339 *BlockHeight256BytesY = 8;
340 *BlockHeight256BytesC = 16;
341 } else if (SourcePixelFormat == dm_420_8) {
342 *BlockHeight256BytesY = 16;
343 *BlockHeight256BytesC = 8;
345 *BlockHeight256BytesY = 8;
346 *BlockHeight256BytesC = 8;
348 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
349 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
351 #ifdef __DML_VBA_DEBUG__
352 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
353 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
354 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
355 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 if (SurfaceTiling == dm_sw_linear) {
359 *MacroTileHeightY = *BlockHeight256BytesY;
360 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
361 *MacroTileHeightC = *BlockHeight256BytesC;
362 if (*MacroTileHeightC == 0)
363 *MacroTileWidthC = 0;
365 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
366 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
367 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
368 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
369 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
370 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
371 if (*MacroTileHeightC == 0)
372 *MacroTileWidthC = 0;
374 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
376 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
377 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
378 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
379 if (*MacroTileHeightC == 0)
380 *MacroTileWidthC = 0;
382 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 #ifdef __DML_VBA_DEBUG__
386 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
387 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
388 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
389 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
391 } // CalculateBytePerPixelAndBlockSizes
393 void dml32_CalculateSwathAndDETConfiguration(
394 unsigned int DETSizeOverride[],
395 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
396 unsigned int ConfigReturnBufferSizeInKByte,
397 unsigned int MaxTotalDETInKByte,
398 unsigned int MinCompressedBufferSizeInKByte,
399 double ForceSingleDPP,
400 unsigned int NumberOfActiveSurfaces,
401 unsigned int nomDETInKByte,
402 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
403 unsigned int CompressedBufferSegmentSizeInkByteFinal,
404 enum output_encoder_class Output[],
405 double ReadBandwidthLuma[],
406 double ReadBandwidthChroma[],
407 double MaximumSwathWidthLuma[],
408 double MaximumSwathWidthChroma[],
409 enum dm_rotation_angle SourceRotation[],
410 bool ViewportStationary[],
411 enum source_format_class SourcePixelFormat[],
412 enum dm_swizzle_mode SurfaceTiling[],
413 unsigned int ViewportWidth[],
414 unsigned int ViewportHeight[],
415 unsigned int ViewportXStart[],
416 unsigned int ViewportYStart[],
417 unsigned int ViewportXStartC[],
418 unsigned int ViewportYStartC[],
419 unsigned int SurfaceWidthY[],
420 unsigned int SurfaceWidthC[],
421 unsigned int SurfaceHeightY[],
422 unsigned int SurfaceHeightC[],
423 unsigned int Read256BytesBlockHeightY[],
424 unsigned int Read256BytesBlockHeightC[],
425 unsigned int Read256BytesBlockWidthY[],
426 unsigned int Read256BytesBlockWidthC[],
427 enum odm_combine_mode ODMMode[],
428 unsigned int BlendingAndTiming[],
429 unsigned int BytePerPixY[],
430 unsigned int BytePerPixC[],
431 double BytePerPixDETY[],
432 double BytePerPixDETC[],
433 unsigned int HActive[],
435 double HRatioChroma[],
436 unsigned int DPPPerSurface[],
439 unsigned int swath_width_luma_ub[],
440 unsigned int swath_width_chroma_ub[],
442 double SwathWidthChroma[],
443 unsigned int SwathHeightY[],
444 unsigned int SwathHeightC[],
445 unsigned int DETBufferSizeInKByte[],
446 unsigned int DETBufferSizeY[],
447 unsigned int DETBufferSizeC[],
448 bool *UnboundedRequestEnabled,
449 unsigned int *CompressedBufferSizeInkByte,
450 bool ViewportSizeSupportPerSurface[],
451 bool *ViewportSizeSupport)
453 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
454 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
455 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
456 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
457 unsigned int RoundedUpSwathSizeBytesY;
458 unsigned int RoundedUpSwathSizeBytesC;
459 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
460 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
462 unsigned int TotalActiveDPP = 0;
463 bool NoChromaSurfaces = true;
464 unsigned int DETBufferSizeInKByteForSwathCalculation;
466 #ifdef __DML_VBA_DEBUG__
467 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
469 dml32_CalculateSwathWidth(ForceSingleDPP,
470 NumberOfActiveSurfaces,
487 Read256BytesBlockHeightY,
488 Read256BytesBlockHeightC,
489 Read256BytesBlockWidthY,
490 Read256BytesBlockWidthC,
498 SwathWidthdoubleDPPChroma,
504 swath_width_chroma_ub);
506 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
507 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
508 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
509 #ifdef __DML_VBA_DEBUG__
510 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
511 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
512 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
513 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
514 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
515 RoundedUpMaxSwathSizeBytesY[k]);
516 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
517 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
518 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
519 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
520 RoundedUpMaxSwathSizeBytesC[k]);
523 if (SourcePixelFormat[k] == dm_420_10) {
524 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
525 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
529 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
530 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
531 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
532 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
533 NoChromaSurfaces = false;
537 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP,
538 NoChromaSurfaces, Output[0]);
540 dml32_CalculateDETBufferSize(DETSizeOverride,
541 UseMALLForPStateChange,
543 NumberOfActiveSurfaces,
544 *UnboundedRequestEnabled,
547 ConfigReturnBufferSizeInKByte,
548 MinCompressedBufferSizeInKByte,
549 CompressedBufferSegmentSizeInkByteFinal,
553 RoundedUpMaxSwathSizeBytesY,
554 RoundedUpMaxSwathSizeBytesC,
558 DETBufferSizeInKByte, // per hubp pipe
559 CompressedBufferSizeInkByte);
561 #ifdef __DML_VBA_DEBUG__
562 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
563 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
564 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
565 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
566 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
567 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
570 *ViewportSizeSupport = true;
571 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
573 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
574 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
575 #ifdef __DML_VBA_DEBUG__
576 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
577 DETBufferSizeInKByteForSwathCalculation);
580 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
581 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
582 SwathHeightY[k] = MaximumSwathHeightY[k];
583 SwathHeightC[k] = MaximumSwathHeightC[k];
584 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
585 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
586 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
587 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
588 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
589 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
590 SwathHeightC[k] = MaximumSwathHeightC[k];
591 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
592 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
593 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
594 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
595 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
596 SwathHeightY[k] = MaximumSwathHeightY[k];
597 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
598 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
599 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
601 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
602 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
603 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
604 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
607 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
608 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
609 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
610 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
611 *ViewportSizeSupport = false;
612 ViewportSizeSupportPerSurface[k] = false;
614 ViewportSizeSupportPerSurface[k] = true;
617 if (SwathHeightC[k] == 0) {
618 #ifdef __DML_VBA_DEBUG__
619 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
621 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
622 DETBufferSizeC[k] = 0;
623 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
624 #ifdef __DML_VBA_DEBUG__
625 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
627 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
628 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
630 #ifdef __DML_VBA_DEBUG__
631 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
633 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
634 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
637 #ifdef __DML_VBA_DEBUG__
638 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
639 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
640 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
641 k, RoundedUpMaxSwathSizeBytesY[k]);
642 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
643 k, RoundedUpMaxSwathSizeBytesC[k]);
644 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
645 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
646 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
647 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
648 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
649 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
650 ViewportSizeSupportPerSurface[k]);
654 } // CalculateSwathAndDETConfiguration
656 void dml32_CalculateSwathWidth(
658 unsigned int NumberOfActiveSurfaces,
659 enum source_format_class SourcePixelFormat[],
660 enum dm_rotation_angle SourceRotation[],
661 bool ViewportStationary[],
662 unsigned int ViewportWidth[],
663 unsigned int ViewportHeight[],
664 unsigned int ViewportXStart[],
665 unsigned int ViewportYStart[],
666 unsigned int ViewportXStartC[],
667 unsigned int ViewportYStartC[],
668 unsigned int SurfaceWidthY[],
669 unsigned int SurfaceWidthC[],
670 unsigned int SurfaceHeightY[],
671 unsigned int SurfaceHeightC[],
672 enum odm_combine_mode ODMMode[],
673 unsigned int BytePerPixY[],
674 unsigned int BytePerPixC[],
675 unsigned int Read256BytesBlockHeightY[],
676 unsigned int Read256BytesBlockHeightC[],
677 unsigned int Read256BytesBlockWidthY[],
678 unsigned int Read256BytesBlockWidthC[],
679 unsigned int BlendingAndTiming[],
680 unsigned int HActive[],
682 unsigned int DPPPerSurface[],
685 double SwathWidthdoubleDPPY[],
686 double SwathWidthdoubleDPPC[],
687 double SwathWidthY[], // per-pipe
688 double SwathWidthC[], // per-pipe
689 unsigned int MaximumSwathHeightY[],
690 unsigned int MaximumSwathHeightC[],
691 unsigned int swath_width_luma_ub[], // per-pipe
692 unsigned int swath_width_chroma_ub[]) // per-pipe
695 enum odm_combine_mode MainSurfaceODMMode;
697 unsigned int surface_width_ub_l;
698 unsigned int surface_height_ub_l;
699 unsigned int surface_width_ub_c;
700 unsigned int surface_height_ub_c;
702 #ifdef __DML_VBA_DEBUG__
703 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
704 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
707 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
708 if (!IsVertical(SourceRotation[k]))
709 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
711 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
713 #ifdef __DML_VBA_DEBUG__
714 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
715 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
718 MainSurfaceODMMode = ODMMode[k];
719 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
720 if (BlendingAndTiming[k] == j)
721 MainSurfaceODMMode = ODMMode[j];
724 if (ForceSingleDPP) {
725 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
727 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
728 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
729 dml_round(HActive[k] / 4.0 * HRatio[k]));
730 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
731 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
732 dml_round(HActive[k] / 2.0 * HRatio[k]));
733 } else if (DPPPerSurface[k] == 2) {
734 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
736 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
740 #ifdef __DML_VBA_DEBUG__
741 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
742 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
743 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
744 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
745 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
748 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
749 SourcePixelFormat[k] == dm_420_12) {
750 SwathWidthC[k] = SwathWidthY[k] / 2;
751 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
753 SwathWidthC[k] = SwathWidthY[k];
754 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
757 if (ForceSingleDPP == true) {
758 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
759 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
762 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
763 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
764 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
765 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
767 #ifdef __DML_VBA_DEBUG__
768 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
769 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
770 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
771 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
772 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
773 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
774 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
775 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
776 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
777 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
780 if (!IsVertical(SourceRotation[k])) {
781 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
782 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
783 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
784 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
785 dml_floor(ViewportXStart[k] +
787 Read256BytesBlockWidthY[k] - 1,
788 Read256BytesBlockWidthY[k]) -
789 dml_floor(ViewportXStart[k],
790 Read256BytesBlockWidthY[k]));
792 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
793 dml_ceil(SwathWidthY[k] - 1,
794 Read256BytesBlockWidthY[k]) +
795 Read256BytesBlockWidthY[k]);
797 if (BytePerPixC[k] > 0) {
798 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
799 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
800 dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
801 Read256BytesBlockWidthC[k] - 1,
802 Read256BytesBlockWidthC[k]) -
803 dml_floor(ViewportXStartC[k],
804 Read256BytesBlockWidthC[k]));
806 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
807 dml_ceil(SwathWidthC[k] - 1,
808 Read256BytesBlockWidthC[k]) +
809 Read256BytesBlockWidthC[k]);
812 swath_width_chroma_ub[k] = 0;
815 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
816 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
818 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
819 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
820 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
821 Read256BytesBlockHeightY[k]) -
822 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
824 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
825 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
827 if (BytePerPixC[k] > 0) {
828 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
829 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
830 dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
831 Read256BytesBlockHeightC[k] - 1,
832 Read256BytesBlockHeightC[k]) -
833 dml_floor(ViewportYStartC[k],
834 Read256BytesBlockHeightC[k]));
836 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
837 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
838 Read256BytesBlockHeightC[k]);
841 swath_width_chroma_ub[k] = 0;
845 #ifdef __DML_VBA_DEBUG__
846 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
847 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
848 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
849 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
853 } // CalculateSwathWidth
855 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
856 unsigned int TotalNumberOfActiveDPP,
858 enum output_encoder_class Output)
860 bool ret_val = false;
862 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
863 TotalNumberOfActiveDPP == 1 && NoChroma);
864 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
869 void dml32_CalculateDETBufferSize(
870 unsigned int DETSizeOverride[],
871 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
873 unsigned int NumberOfActiveSurfaces,
874 bool UnboundedRequestEnabled,
875 unsigned int nomDETInKByte,
876 unsigned int MaxTotalDETInKByte,
877 unsigned int ConfigReturnBufferSizeInKByte,
878 unsigned int MinCompressedBufferSizeInKByte,
879 unsigned int CompressedBufferSegmentSizeInkByteFinal,
880 enum source_format_class SourcePixelFormat[],
881 double ReadBandwidthLuma[],
882 double ReadBandwidthChroma[],
883 unsigned int RoundedUpMaxSwathSizeBytesY[],
884 unsigned int RoundedUpMaxSwathSizeBytesC[],
885 unsigned int DPPPerSurface[],
887 unsigned int DETBufferSizeInKByte[],
888 unsigned int *CompressedBufferSizeInkByte)
890 unsigned int DETBufferSizePoolInKByte;
891 unsigned int NextDETBufferPieceInKByte;
892 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
893 bool NextPotentialSurfaceToAssignDETPieceFound;
894 unsigned int NextSurfaceToAssignDETPiece;
895 double TotalBandwidth;
896 double BandwidthOfSurfacesNotAssignedDETPiece;
897 unsigned int max_minDET;
899 unsigned int minDET_pipe;
902 #ifdef __DML_VBA_DEBUG__
903 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
904 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
905 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
906 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
907 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
908 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
909 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
910 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
911 CompressedBufferSegmentSizeInkByteFinal);
914 // Note: Will use default det size if that fits 2 swaths
915 if (UnboundedRequestEnabled) {
916 if (DETSizeOverride[0] > 0) {
917 DETBufferSizeInKByte[0] = DETSizeOverride[0];
919 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
920 ((double) RoundedUpMaxSwathSizeBytesY[0] +
921 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
923 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
925 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
926 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
927 DETBufferSizeInKByte[k] = nomDETInKByte;
928 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
929 SourcePixelFormat[k] == dm_420_12) {
930 max_minDET = nomDETInKByte - 64;
932 max_minDET = nomDETInKByte;
937 // add DET resource until can hold 2 full swaths
938 while (minDET <= max_minDET && minDET_pipe == 0) {
939 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
940 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
941 minDET_pipe = minDET;
942 minDET = minDET + 64;
945 #ifdef __DML_VBA_DEBUG__
946 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
947 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
948 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
949 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
950 RoundedUpMaxSwathSizeBytesY[k]);
951 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
952 RoundedUpMaxSwathSizeBytesC[k]);
955 if (minDET_pipe == 0) {
956 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
957 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
958 #ifdef __DML_VBA_DEBUG__
959 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
960 __func__, k, minDET_pipe);
964 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
965 DETBufferSizeInKByte[k] = 0;
966 } else if (DETSizeOverride[k] > 0) {
967 DETBufferSizeInKByte[k] = DETSizeOverride[k];
968 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
969 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
970 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
971 DETBufferSizeInKByte[k] = minDET_pipe;
972 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
973 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
976 #ifdef __DML_VBA_DEBUG__
977 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
978 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
979 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
980 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
985 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
986 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
987 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
989 #ifdef __DML_VBA_DEBUG__
990 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
991 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
992 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
993 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
994 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
996 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
997 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
999 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1000 DETPieceAssignedToThisSurfaceAlready[k] = true;
1001 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1002 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1003 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1004 DETPieceAssignedToThisSurfaceAlready[k] = true;
1005 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1006 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1008 DETPieceAssignedToThisSurfaceAlready[k] = false;
1010 #ifdef __DML_VBA_DEBUG__
1011 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1012 DETPieceAssignedToThisSurfaceAlready[k]);
1013 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1014 BandwidthOfSurfacesNotAssignedDETPiece);
1018 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1019 NextPotentialSurfaceToAssignDETPieceFound = false;
1020 NextSurfaceToAssignDETPiece = 0;
1022 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1023 #ifdef __DML_VBA_DEBUG__
1024 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1025 ReadBandwidthLuma[k]);
1026 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1027 ReadBandwidthChroma[k]);
1028 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1029 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1030 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1031 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1032 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1033 NextSurfaceToAssignDETPiece);
1035 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1036 (!NextPotentialSurfaceToAssignDETPieceFound ||
1037 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1038 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1039 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1040 NextSurfaceToAssignDETPiece = k;
1041 NextPotentialSurfaceToAssignDETPieceFound = true;
1043 #ifdef __DML_VBA_DEBUG__
1044 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1045 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1046 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1047 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1051 if (NextPotentialSurfaceToAssignDETPieceFound) {
1052 // Note: To show the banker's rounding behavior in VBA and also the fact
1053 // that the DET buffer size varies due to precision issue
1055 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1056 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1057 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1058 // BandwidthOfSurfacesNotAssignedDETPiece /
1059 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1060 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1061 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1062 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1063 //BandwidthOfSurfacesNotAssignedDETPiece /
1064 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1066 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1067 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1069 NextDETBufferPieceInKByte = dml_min(
1070 dml_round((double) DETBufferSizePoolInKByte *
1071 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1072 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1073 BandwidthOfSurfacesNotAssignedDETPiece /
1074 ((ForceSingleDPP ? 1 :
1075 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1076 (ForceSingleDPP ? 1 :
1077 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1078 dml_floor((double) DETBufferSizePoolInKByte,
1079 (ForceSingleDPP ? 1 :
1080 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1082 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1083 // We should limit the per-pipe DET size to the nominal / max per pipe.
1084 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1085 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1086 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1087 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1088 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1090 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1091 // already has the max per-pipe value
1092 NextDETBufferPieceInKByte = 0;
1096 #ifdef __DML_VBA_DEBUG__
1097 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1098 DETBufferSizePoolInKByte);
1099 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1100 NextSurfaceToAssignDETPiece);
1101 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1102 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1103 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1104 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1105 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1106 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1107 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1108 NextDETBufferPieceInKByte);
1109 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1110 __func__, j, NextSurfaceToAssignDETPiece,
1111 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1114 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1115 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1116 + NextDETBufferPieceInKByte
1117 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1118 #ifdef __DML_VBA_DEBUG__
1119 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1122 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1123 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1124 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1125 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1126 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1129 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1131 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1133 #ifdef __DML_VBA_DEBUG__
1134 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1135 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1136 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1137 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1138 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1141 } // CalculateDETBufferSize
1143 void dml32_CalculateODMMode(
1144 unsigned int MaximumPixelsPerLinePerDSCUnit,
1145 unsigned int HActive,
1146 enum output_encoder_class Output,
1147 enum odm_combine_policy ODMUse,
1148 double StateDispclk,
1151 unsigned int TotalNumberOfActiveDPP,
1152 unsigned int MaxNumDPP,
1154 double DISPCLKDPPCLKDSCCLKDownSpreading,
1155 double DISPCLKRampingMargin,
1156 double DISPCLKDPPCLKVCOSpeed,
1159 bool *TotalAvailablePipesSupport,
1160 unsigned int *NumberOfDPP,
1161 enum odm_combine_mode *ODMMode,
1162 double *RequiredDISPCLKPerSurface)
1165 double SurfaceRequiredDISPCLKWithoutODMCombine;
1166 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1167 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1169 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1170 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1172 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1173 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1175 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1176 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1178 *TotalAvailablePipesSupport = true;
1179 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1181 if (ODMUse == dm_odm_combine_policy_none)
1182 *ODMMode = dm_odm_combine_mode_disabled;
1184 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1187 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1188 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1190 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1191 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1192 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
1193 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1194 *ODMMode = dm_odm_combine_mode_4to1;
1195 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1198 *TotalAvailablePipesSupport = false;
1200 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1201 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1202 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1203 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
1204 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1205 *ODMMode = dm_odm_combine_mode_2to1;
1206 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1209 *TotalAvailablePipesSupport = false;
1212 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1215 *TotalAvailablePipesSupport = false;
1219 double dml32_CalculateRequiredDispclk(
1220 enum odm_combine_mode ODMMode,
1222 double DISPCLKDPPCLKDSCCLKDownSpreading,
1223 double DISPCLKRampingMargin,
1224 double DISPCLKDPPCLKVCOSpeed,
1227 double RequiredDispclk = 0.;
1228 double PixelClockAfterODM;
1229 double DISPCLKWithRampingRoundedToDFSGranularity;
1230 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1231 double MaxDispclkRoundedDownToDFSGranularity;
1233 if (ODMMode == dm_odm_combine_mode_4to1)
1234 PixelClockAfterODM = PixelClock / 4;
1235 else if (ODMMode == dm_odm_combine_mode_2to1)
1236 PixelClockAfterODM = PixelClock / 2;
1238 PixelClockAfterODM = PixelClock;
1241 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1242 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1243 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1245 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1246 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1248 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1250 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1251 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1252 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1253 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1255 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1257 return RequiredDispclk;
1260 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1266 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1268 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1271 void dml32_CalculateOutputLink(
1272 double PHYCLKPerState,
1273 double PHYCLKD18PerState,
1274 double PHYCLKD32PerState,
1275 double Downspreading,
1276 bool IsMainSurfaceUsingTheIndicatedTiming,
1277 enum output_encoder_class Output,
1278 enum output_format_class OutputFormat,
1279 unsigned int HTotal,
1280 unsigned int HActive,
1281 double PixelClockBackEnd,
1282 double ForcedOutputLinkBPP,
1283 unsigned int DSCInputBitPerComponent,
1284 unsigned int NumberOfDSCSlices,
1285 double AudioSampleRate,
1286 unsigned int AudioSampleLayout,
1287 enum odm_combine_mode ODMModeNoDSC,
1288 enum odm_combine_mode ODMModeDSC,
1290 unsigned int OutputLinkDPLanes,
1291 enum dm_output_link_dp_rate OutputLinkDPRate,
1295 double *RequiresFEC,
1297 enum dm_output_type *OutputType,
1298 enum dm_output_rate *OutputRate,
1299 unsigned int *RequiredSlots)
1303 *RequiresDSC = false;
1304 *RequiresFEC = false;
1306 *OutputType = dm_output_type_unknown;
1307 *OutputRate = dm_output_rate_unknown;
1309 if (IsMainSurfaceUsingTheIndicatedTiming) {
1310 if (Output == dm_hdmi) {
1311 *RequiresDSC = false;
1312 *RequiresFEC = false;
1313 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1314 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1315 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1316 ODMModeNoDSC, ODMModeDSC, &dummy);
1317 //OutputTypeAndRate = "HDMI";
1318 *OutputType = dm_output_type_hdmi;
1320 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1321 if (DSCEnable == true) {
1322 *RequiresDSC = true;
1323 LinkDSCEnable = true;
1324 if (Output == dm_dp || Output == dm_dp2p0)
1325 *RequiresFEC = true;
1327 *RequiresFEC = false;
1329 *RequiresDSC = false;
1330 LinkDSCEnable = false;
1331 if (Output == dm_dp2p0)
1332 *RequiresFEC = true;
1334 *RequiresFEC = false;
1336 if (Output == dm_dp2p0) {
1338 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1339 PHYCLKD32PerState >= 10000 / 32) {
1340 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1341 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1342 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1343 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1344 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1345 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1346 ForcedOutputLinkBPP == 0) {
1347 *RequiresDSC = true;
1348 LinkDSCEnable = true;
1349 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1350 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1351 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1352 OutputFormat, DSCInputBitPerComponent,
1353 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1354 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1356 //OutputTypeAndRate = Output & " UHBR10";
1357 *OutputType = dm_output_type_dp2p0;
1358 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1360 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1361 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1362 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1363 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1364 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1365 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1366 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1368 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1369 ForcedOutputLinkBPP == 0) {
1370 *RequiresDSC = true;
1371 LinkDSCEnable = true;
1372 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1373 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1374 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1375 OutputFormat, DSCInputBitPerComponent,
1376 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1377 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1379 //OutputTypeAndRate = Output & " UHBR13p5";
1380 *OutputType = dm_output_type_dp2p0;
1381 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1383 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1384 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1385 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1386 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1387 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1388 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1389 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1390 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1391 *RequiresDSC = true;
1392 LinkDSCEnable = true;
1393 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1394 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1395 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1396 OutputFormat, DSCInputBitPerComponent,
1397 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1398 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1400 //OutputTypeAndRate = Output & " UHBR20";
1401 *OutputType = dm_output_type_dp2p0;
1402 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1406 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1407 PHYCLKPerState >= 270) {
1408 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1409 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1410 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1411 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1412 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1413 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1414 ForcedOutputLinkBPP == 0) {
1415 *RequiresDSC = true;
1416 LinkDSCEnable = true;
1417 if (Output == dm_dp)
1418 *RequiresFEC = true;
1419 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1420 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1421 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1422 OutputFormat, DSCInputBitPerComponent,
1423 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1424 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1426 //OutputTypeAndRate = Output & " HBR";
1427 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1428 *OutputRate = dm_output_rate_dp_rate_hbr;
1430 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1431 *OutBpp == 0 && PHYCLKPerState >= 540) {
1432 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1433 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1434 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1435 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1436 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1438 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1439 ForcedOutputLinkBPP == 0) {
1440 *RequiresDSC = true;
1441 LinkDSCEnable = true;
1442 if (Output == dm_dp)
1443 *RequiresFEC = true;
1445 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1446 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1447 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1448 OutputFormat, DSCInputBitPerComponent,
1449 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1450 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1452 //OutputTypeAndRate = Output & " HBR2";
1453 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1454 *OutputRate = dm_output_rate_dp_rate_hbr2;
1456 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1457 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1458 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1459 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1460 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1461 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1464 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1465 *RequiresDSC = true;
1466 LinkDSCEnable = true;
1467 if (Output == dm_dp)
1468 *RequiresFEC = true;
1470 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1471 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1472 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1473 OutputFormat, DSCInputBitPerComponent,
1474 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1475 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1477 //OutputTypeAndRate = Output & " HBR3";
1478 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1479 *OutputRate = dm_output_rate_dp_rate_hbr3;
1486 void dml32_CalculateDPPCLK(
1487 unsigned int NumberOfActiveSurfaces,
1488 double DISPCLKDPPCLKDSCCLKDownSpreading,
1489 double DISPCLKDPPCLKVCOSpeed,
1490 double DPPCLKUsingSingleDPP[],
1491 unsigned int DPPPerSurface[],
1494 double *GlobalDPPCLK,
1499 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1500 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1501 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1503 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1504 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1505 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1508 double dml32_TruncToValidBPP(
1511 unsigned int HTotal,
1512 unsigned int HActive,
1516 enum output_encoder_class Output,
1517 enum output_format_class Format,
1518 unsigned int DSCInputBitPerComponent,
1519 unsigned int DSCSlices,
1520 unsigned int AudioRate,
1521 unsigned int AudioLayout,
1522 enum odm_combine_mode ODMModeNoDSC,
1523 enum odm_combine_mode ODMModeDSC,
1525 unsigned int *RequiredSlots)
1528 unsigned int MinDSCBPP;
1530 unsigned int NonDSCBPP0;
1531 unsigned int NonDSCBPP1;
1532 unsigned int NonDSCBPP2;
1533 unsigned int NonDSCBPP3;
1535 if (Format == dm_420) {
1540 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1541 } else if (Format == dm_444) {
1547 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1549 if (Output == dm_hdmi) {
1558 if (Format == dm_n422) {
1560 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1563 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1566 if (Output == dm_dp2p0) {
1567 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1568 } else if (DSCEnable && Output == dm_dp) {
1569 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1571 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1575 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1576 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1577 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1578 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1579 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1580 MaxLinkBPP = 2 * MaxLinkBPP;
1582 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1583 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1584 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1585 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1586 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1587 MaxLinkBPP = 2 * MaxLinkBPP;
1590 if (DesiredBPP == 0) {
1592 if (MaxLinkBPP < MinDSCBPP)
1594 else if (MaxLinkBPP >= MaxDSCBPP)
1597 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1599 if (MaxLinkBPP >= NonDSCBPP3)
1601 else if (MaxLinkBPP >= NonDSCBPP2)
1603 else if (MaxLinkBPP >= NonDSCBPP1)
1605 else if (MaxLinkBPP >= NonDSCBPP0)
1611 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1612 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1613 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1619 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1622 } // TruncToValidBPP
1624 double dml32_RequiredDTBCLK(
1627 enum output_format_class OutputFormat,
1629 unsigned int DSCSlices,
1630 unsigned int HTotal,
1631 unsigned int HActive,
1632 unsigned int AudioRate,
1633 unsigned int AudioLayout)
1635 double PixelWordRate;
1638 double AverageTribyteRate;
1639 double HActiveTribyteRate;
1641 if (DSCEnable != true)
1642 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1644 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1645 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1646 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1648 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1649 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1650 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1651 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1654 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1655 enum odm_combine_mode ODMMode,
1656 unsigned int DSCInputBitPerComponent,
1658 unsigned int HActive,
1659 unsigned int HTotal,
1660 unsigned int NumberOfDSCSlices,
1661 enum output_format_class OutputFormat,
1662 enum output_encoder_class Output,
1664 double PixelClockBackEnd)
1666 unsigned int DSCDelayRequirement_val;
1668 if (DSCEnabled == true && OutputBpp != 0) {
1669 if (ODMMode == dm_odm_combine_mode_4to1) {
1670 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1671 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1672 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1673 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1674 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1675 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1676 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1678 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1679 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1680 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1683 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1684 dml_ceil(DSCDelayRequirement_val / HActive, 1);
1686 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1689 DSCDelayRequirement_val = 0;
1692 #ifdef __DML_VBA_DEBUG__
1693 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
1694 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
1695 dml_print("DML::%s: HActive = %d\n", __func__, HActive);
1696 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
1697 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1698 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
1699 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1702 return DSCDelayRequirement_val;
1705 void dml32_CalculateSurfaceSizeInMall(
1706 unsigned int NumberOfActiveSurfaces,
1707 unsigned int MALLAllocatedForDCN,
1708 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1710 bool ViewportStationary[],
1711 unsigned int ViewportXStartY[],
1712 unsigned int ViewportYStartY[],
1713 unsigned int ViewportXStartC[],
1714 unsigned int ViewportYStartC[],
1715 unsigned int ViewportWidthY[],
1716 unsigned int ViewportHeightY[],
1717 unsigned int BytesPerPixelY[],
1718 unsigned int ViewportWidthC[],
1719 unsigned int ViewportHeightC[],
1720 unsigned int BytesPerPixelC[],
1721 unsigned int SurfaceWidthY[],
1722 unsigned int SurfaceWidthC[],
1723 unsigned int SurfaceHeightY[],
1724 unsigned int SurfaceHeightC[],
1725 unsigned int Read256BytesBlockWidthY[],
1726 unsigned int Read256BytesBlockWidthC[],
1727 unsigned int Read256BytesBlockHeightY[],
1728 unsigned int Read256BytesBlockHeightC[],
1729 unsigned int ReadBlockWidthY[],
1730 unsigned int ReadBlockWidthC[],
1731 unsigned int ReadBlockHeightY[],
1732 unsigned int ReadBlockHeightC[],
1735 unsigned int SurfaceSizeInMALL[],
1736 bool *ExceededMALLSize)
1738 unsigned int TotalSurfaceSizeInMALL = 0;
1741 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1742 if (ViewportStationary[k]) {
1743 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1744 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1745 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1746 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1747 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1748 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1749 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1751 if (ReadBlockWidthC[k] > 0) {
1752 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1753 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1754 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1755 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1756 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1757 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1758 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1759 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1760 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1763 if (DCCEnable[k] == true) {
1764 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1765 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1766 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1767 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1768 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1769 * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1770 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1771 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1772 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1773 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1774 if (Read256BytesBlockWidthC[k] > 0) {
1775 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1776 dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1777 Read256BytesBlockWidthC[k]),
1778 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1779 * Read256BytesBlockWidthC[k] - 1, 8 *
1780 Read256BytesBlockWidthC[k]) -
1781 dml_floor(ViewportXStartC[k], 8 *
1782 Read256BytesBlockWidthC[k])) *
1783 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1784 Read256BytesBlockHeightC[k]),
1785 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1786 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1787 Read256BytesBlockHeightC[k]) -
1788 dml_floor(ViewportYStartC[k], 8 *
1789 Read256BytesBlockHeightC[k])) *
1790 BytesPerPixelC[k] / 256;
1794 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1795 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1796 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1797 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1799 if (ReadBlockWidthC[k] > 0) {
1800 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1801 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1802 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1803 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1804 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1807 if (DCCEnable[k] == true) {
1808 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1809 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1810 Read256BytesBlockWidthY[k] - 1), 8 *
1811 Read256BytesBlockWidthY[k]) *
1812 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1813 Read256BytesBlockHeightY[k] - 1), 8 *
1814 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1816 if (Read256BytesBlockWidthC[k] > 0) {
1817 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1818 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1819 Read256BytesBlockWidthC[k] - 1), 8 *
1820 Read256BytesBlockWidthC[k]) *
1821 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1822 Read256BytesBlockHeightC[k] - 1), 8 *
1823 Read256BytesBlockHeightC[k]) *
1824 BytesPerPixelC[k] / 256;
1830 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1831 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1832 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1834 *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
1835 } // CalculateSurfaceSizeInMall
1837 void dml32_CalculateVMRowAndSwath(
1838 unsigned int NumberOfActiveSurfaces,
1840 unsigned int SurfaceSizeInMALL[],
1841 unsigned int PTEBufferSizeInRequestsLuma,
1842 unsigned int PTEBufferSizeInRequestsChroma,
1843 unsigned int DCCMetaBufferSizeBytes,
1844 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1845 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1846 unsigned int MALLAllocatedForDCN,
1847 double SwathWidthY[],
1848 double SwathWidthC[],
1851 unsigned int HostVMMaxNonCachedPageTableLevels,
1852 unsigned int GPUVMMaxPageTableLevels,
1853 unsigned int GPUVMMinPageSizeKBytes[],
1854 unsigned int HostVMMinPageSize,
1857 bool PTEBufferSizeNotExceeded[],
1858 bool DCCMetaBufferSizeNotExceeded[],
1859 unsigned int dpte_row_width_luma_ub[],
1860 unsigned int dpte_row_width_chroma_ub[],
1861 unsigned int dpte_row_height_luma[],
1862 unsigned int dpte_row_height_chroma[],
1863 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1864 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1865 unsigned int meta_req_width[],
1866 unsigned int meta_req_width_chroma[],
1867 unsigned int meta_req_height[],
1868 unsigned int meta_req_height_chroma[],
1869 unsigned int meta_row_width[],
1870 unsigned int meta_row_width_chroma[],
1871 unsigned int meta_row_height[],
1872 unsigned int meta_row_height_chroma[],
1873 unsigned int vm_group_bytes[],
1874 unsigned int dpte_group_bytes[],
1875 unsigned int PixelPTEReqWidthY[],
1876 unsigned int PixelPTEReqHeightY[],
1877 unsigned int PTERequestSizeY[],
1878 unsigned int PixelPTEReqWidthC[],
1879 unsigned int PixelPTEReqHeightC[],
1880 unsigned int PTERequestSizeC[],
1881 unsigned int dpde0_bytes_per_frame_ub_l[],
1882 unsigned int meta_pte_bytes_per_frame_ub_l[],
1883 unsigned int dpde0_bytes_per_frame_ub_c[],
1884 unsigned int meta_pte_bytes_per_frame_ub_c[],
1885 double PrefetchSourceLinesY[],
1886 double PrefetchSourceLinesC[],
1887 double VInitPreFillY[],
1888 double VInitPreFillC[],
1889 unsigned int MaxNumSwathY[],
1890 unsigned int MaxNumSwathC[],
1891 double meta_row_bw[],
1892 double dpte_row_bw[],
1893 double PixelPTEBytesPerRow[],
1894 double PDEAndMetaPTEBytesFrame[],
1895 double MetaRowByte[],
1896 bool use_one_row_for_frame[],
1897 bool use_one_row_for_frame_flip[],
1898 bool UsesMALLForStaticScreen[],
1899 bool PTE_BUFFER_MODE[],
1900 unsigned int BIGK_FRAGMENT_SIZE[])
1903 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1904 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1905 unsigned int PDEAndMetaPTEBytesFrameY;
1906 unsigned int PDEAndMetaPTEBytesFrameC;
1907 unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1908 unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1909 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1910 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1911 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1912 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1913 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1914 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1915 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1916 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1917 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1919 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1920 if (HostVMEnable == true) {
1921 vm_group_bytes[k] = 512;
1922 dpte_group_bytes[k] = 512;
1923 } else if (GPUVMEnable == true) {
1924 vm_group_bytes[k] = 2048;
1925 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1926 dpte_group_bytes[k] = 512;
1928 dpte_group_bytes[k] = 2048;
1930 vm_group_bytes[k] = 0;
1931 dpte_group_bytes[k] = 0;
1934 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
1935 myPipe[k].SourcePixelFormat == dm_420_12 ||
1936 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
1937 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
1938 !IsVertical(myPipe[k].SourceRotation)) {
1939 PTEBufferSizeInRequestsForLuma[k] =
1940 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
1941 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
1943 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
1944 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
1947 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
1948 myPipe[k].ViewportStationary,
1949 myPipe[k].DCCEnable,
1950 myPipe[k].DPPPerSurface,
1951 myPipe[k].BlockHeight256BytesC,
1952 myPipe[k].BlockWidth256BytesC,
1953 myPipe[k].SourcePixelFormat,
1954 myPipe[k].SurfaceTiling,
1955 myPipe[k].BytePerPixelC,
1956 myPipe[k].SourceRotation,
1958 myPipe[k].ViewportHeightChroma,
1959 myPipe[k].ViewportXStartC,
1960 myPipe[k].ViewportYStartC,
1963 HostVMMaxNonCachedPageTableLevels,
1964 GPUVMMaxPageTableLevels,
1965 GPUVMMinPageSizeKBytes[k],
1967 PTEBufferSizeInRequestsForChroma[k],
1969 myPipe[k].DCCMetaPitchC,
1970 myPipe[k].BlockWidthC,
1971 myPipe[k].BlockHeightC,
1975 &PixelPTEBytesPerRowC[k],
1976 &dpte_row_width_chroma_ub[k],
1977 &dpte_row_height_chroma[k],
1978 &dpte_row_height_linear_chroma[k],
1979 &PixelPTEBytesPerRowC_one_row_per_frame[k],
1980 &dpte_row_width_chroma_ub_one_row_per_frame[k],
1981 &dpte_row_height_chroma_one_row_per_frame[k],
1982 &meta_req_width_chroma[k],
1983 &meta_req_height_chroma[k],
1984 &meta_row_width_chroma[k],
1985 &meta_row_height_chroma[k],
1986 &PixelPTEReqWidthC[k],
1987 &PixelPTEReqHeightC[k],
1988 &PTERequestSizeC[k],
1989 &dpde0_bytes_per_frame_ub_c[k],
1990 &meta_pte_bytes_per_frame_ub_c[k]);
1992 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
1993 myPipe[k].VRatioChroma,
1994 myPipe[k].VTapsChroma,
1995 myPipe[k].InterlaceEnable,
1996 myPipe[k].ProgressiveToInterlaceUnitInOPP,
1997 myPipe[k].SwathHeightC,
1998 myPipe[k].SourceRotation,
1999 myPipe[k].ViewportStationary,
2001 myPipe[k].ViewportHeightChroma,
2002 myPipe[k].ViewportXStartC,
2003 myPipe[k].ViewportYStartC,
2009 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2010 PTEBufferSizeInRequestsForChroma[k] = 0;
2011 PixelPTEBytesPerRowC[k] = 0;
2012 PDEAndMetaPTEBytesFrameC = 0;
2013 MetaRowByteC[k] = 0;
2014 MaxNumSwathC[k] = 0;
2015 PrefetchSourceLinesC[k] = 0;
2016 dpte_row_height_chroma_one_row_per_frame[k] = 0;
2017 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2018 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2021 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2022 myPipe[k].ViewportStationary,
2023 myPipe[k].DCCEnable,
2024 myPipe[k].DPPPerSurface,
2025 myPipe[k].BlockHeight256BytesY,
2026 myPipe[k].BlockWidth256BytesY,
2027 myPipe[k].SourcePixelFormat,
2028 myPipe[k].SurfaceTiling,
2029 myPipe[k].BytePerPixelY,
2030 myPipe[k].SourceRotation,
2032 myPipe[k].ViewportHeight,
2033 myPipe[k].ViewportXStart,
2034 myPipe[k].ViewportYStart,
2037 HostVMMaxNonCachedPageTableLevels,
2038 GPUVMMaxPageTableLevels,
2039 GPUVMMinPageSizeKBytes[k],
2041 PTEBufferSizeInRequestsForLuma[k],
2043 myPipe[k].DCCMetaPitchY,
2044 myPipe[k].BlockWidthY,
2045 myPipe[k].BlockHeightY,
2049 &PixelPTEBytesPerRowY[k],
2050 &dpte_row_width_luma_ub[k],
2051 &dpte_row_height_luma[k],
2052 &dpte_row_height_linear_luma[k],
2053 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2054 &dpte_row_width_luma_ub_one_row_per_frame[k],
2055 &dpte_row_height_luma_one_row_per_frame[k],
2057 &meta_req_height[k],
2059 &meta_row_height[k],
2060 &PixelPTEReqWidthY[k],
2061 &PixelPTEReqHeightY[k],
2062 &PTERequestSizeY[k],
2063 &dpde0_bytes_per_frame_ub_l[k],
2064 &meta_pte_bytes_per_frame_ub_l[k]);
2066 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2069 myPipe[k].InterlaceEnable,
2070 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2071 myPipe[k].SwathHeightY,
2072 myPipe[k].SourceRotation,
2073 myPipe[k].ViewportStationary,
2075 myPipe[k].ViewportHeight,
2076 myPipe[k].ViewportXStart,
2077 myPipe[k].ViewportYStart,
2083 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2084 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2086 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2087 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2088 PTEBufferSizeNotExceeded[k] = true;
2090 PTEBufferSizeNotExceeded[k] = false;
2093 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2094 PTEBufferSizeInRequestsForLuma[k] &&
2095 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2098 dml32_CalculateMALLUseForStaticScreen(
2099 NumberOfActiveSurfaces,
2100 MALLAllocatedForDCN,
2101 UseMALLForStaticScreen, // mode
2103 one_row_per_frame_fits_in_buffer,
2105 UsesMALLForStaticScreen); // boolen
2107 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2108 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2109 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2110 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2111 (GPUVMMinPageSizeKBytes[k] > 64);
2112 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2115 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2116 #ifdef __DML_VBA_DEBUG__
2117 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
2118 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2120 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2121 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2122 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2123 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2125 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2126 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2128 if (use_one_row_for_frame[k]) {
2129 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2130 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2131 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2132 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2133 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2134 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2135 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2138 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2139 DCCMetaBufferSizeNotExceeded[k] = true;
2141 DCCMetaBufferSizeNotExceeded[k] = false;
2143 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2144 if (use_one_row_for_frame[k])
2145 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2147 dml32_CalculateRowBandwidth(
2149 myPipe[k].SourcePixelFormat,
2151 myPipe[k].VRatioChroma,
2152 myPipe[k].DCCEnable,
2153 myPipe[k].HTotal / myPipe[k].PixelClock,
2154 MetaRowByteY[k], MetaRowByteC[k],
2156 meta_row_height_chroma[k],
2157 PixelPTEBytesPerRowY[k],
2158 PixelPTEBytesPerRowC[k],
2159 dpte_row_height_luma[k],
2160 dpte_row_height_chroma[k],
2165 #ifdef __DML_VBA_DEBUG__
2166 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
2167 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
2168 __func__, k, use_one_row_for_frame_flip[k]);
2169 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
2170 __func__, k, UseMALLForPStateChange[k]);
2171 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
2172 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
2173 __func__, k, dpte_row_width_luma_ub[k]);
2174 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
2175 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
2176 __func__, k, dpte_row_height_chroma[k]);
2177 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
2178 __func__, k, dpte_row_width_chroma_ub[k]);
2179 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
2180 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
2181 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
2182 __func__, k, PTEBufferSizeNotExceeded[k]);
2183 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2184 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2187 } // CalculateVMRowAndSwath
2189 unsigned int dml32_CalculateVMAndRowBytes(
2190 bool ViewportStationary,
2192 unsigned int NumberOfDPPs,
2193 unsigned int BlockHeight256Bytes,
2194 unsigned int BlockWidth256Bytes,
2195 enum source_format_class SourcePixelFormat,
2196 unsigned int SurfaceTiling,
2197 unsigned int BytePerPixel,
2198 enum dm_rotation_angle SourceRotation,
2200 unsigned int ViewportHeight,
2201 unsigned int ViewportXStart,
2202 unsigned int ViewportYStart,
2205 unsigned int HostVMMaxNonCachedPageTableLevels,
2206 unsigned int GPUVMMaxPageTableLevels,
2207 unsigned int GPUVMMinPageSizeKBytes,
2208 unsigned int HostVMMinPageSize,
2209 unsigned int PTEBufferSizeInRequests,
2211 unsigned int DCCMetaPitch,
2212 unsigned int MacroTileWidth,
2213 unsigned int MacroTileHeight,
2216 unsigned int *MetaRowByte,
2217 unsigned int *PixelPTEBytesPerRow,
2218 unsigned int *dpte_row_width_ub,
2219 unsigned int *dpte_row_height,
2220 unsigned int *dpte_row_height_linear,
2221 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2222 unsigned int *dpte_row_width_ub_one_row_per_frame,
2223 unsigned int *dpte_row_height_one_row_per_frame,
2224 unsigned int *MetaRequestWidth,
2225 unsigned int *MetaRequestHeight,
2226 unsigned int *meta_row_width,
2227 unsigned int *meta_row_height,
2228 unsigned int *PixelPTEReqWidth,
2229 unsigned int *PixelPTEReqHeight,
2230 unsigned int *PTERequestSize,
2231 unsigned int *DPDE0BytesFrame,
2232 unsigned int *MetaPTEBytesFrame)
2234 unsigned int MPDEBytesFrame;
2235 unsigned int DCCMetaSurfaceBytes;
2236 unsigned int ExtraDPDEBytesFrame;
2237 unsigned int PDEAndMetaPTEBytesFrame;
2238 unsigned int HostVMDynamicLevels = 0;
2239 unsigned int MacroTileSizeBytes;
2240 unsigned int vp_height_meta_ub;
2241 unsigned int vp_height_dpte_ub;
2242 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2244 if (GPUVMEnable == true && HostVMEnable == true) {
2245 if (HostVMMinPageSize < 2048)
2246 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2247 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2248 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2250 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2253 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2254 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2255 if (SurfaceTiling == dm_sw_linear) {
2256 *meta_row_height = 32;
2257 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2258 - dml_floor(ViewportXStart, *MetaRequestWidth);
2259 } else if (!IsVertical(SourceRotation)) {
2260 *meta_row_height = *MetaRequestHeight;
2261 if (ViewportStationary && NumberOfDPPs == 1) {
2262 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2263 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2265 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2267 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2269 *meta_row_height = *MetaRequestWidth;
2270 if (ViewportStationary && NumberOfDPPs == 1) {
2271 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2272 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2274 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2276 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2279 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2280 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2281 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2282 } else if (!IsVertical(SourceRotation)) {
2283 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2285 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2288 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2290 if (GPUVMEnable == true) {
2291 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2292 (8 * 4.0 * 1024), 1) + 1) * 64;
2293 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2295 *MetaPTEBytesFrame = 0;
2299 if (DCCEnable != true) {
2300 *MetaPTEBytesFrame = 0;
2305 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2307 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2308 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2309 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2310 MacroTileHeight - 1, MacroTileHeight) -
2311 dml_floor(ViewportYStart, MacroTileHeight);
2312 } else if (!IsVertical(SourceRotation)) {
2313 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2315 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2317 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2318 (8 * 2097152), 1) + 1);
2319 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2321 *DPDE0BytesFrame = 0;
2322 ExtraDPDEBytesFrame = 0;
2323 vp_height_dpte_ub = 0;
2326 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2328 #ifdef __DML_VBA_DEBUG__
2329 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2330 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2331 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2332 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2333 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2334 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2335 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2336 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2337 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2338 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2339 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2340 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2341 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2342 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2343 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2344 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2345 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2348 if (HostVMEnable == true)
2349 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2351 if (SurfaceTiling == dm_sw_linear) {
2352 *PixelPTEReqHeight = 1;
2353 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2354 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2355 *PTERequestSize = 64;
2356 } else if (GPUVMMinPageSizeKBytes == 4) {
2357 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2358 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2359 *PTERequestSize = 128;
2361 *PixelPTEReqHeight = MacroTileHeight;
2362 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2363 *PTERequestSize = 64;
2365 #ifdef __DML_VBA_DEBUG__
2366 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2367 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2368 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2369 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2370 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2371 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2372 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2375 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2376 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2377 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2378 (double) *PixelPTEReqWidth;
2379 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2382 if (SurfaceTiling == dm_sw_linear) {
2383 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2384 *PixelPTEReqWidth / Pitch), 1));
2385 #ifdef __DML_VBA_DEBUG__
2386 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2387 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2388 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2389 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2390 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2391 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2392 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2393 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2394 *PixelPTEReqWidth / Pitch), 1));
2395 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2397 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2398 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2399 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2401 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2402 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2403 PixelPTEReqWidth_linear / Pitch), 1);
2404 if (*dpte_row_height_linear > 128)
2405 *dpte_row_height_linear = 128;
2407 } else if (!IsVertical(SourceRotation)) {
2408 *dpte_row_height = *PixelPTEReqHeight;
2410 if (GPUVMMinPageSizeKBytes > 64) {
2411 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2412 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2413 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2414 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2415 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2416 dml_floor(ViewportXStart, *PixelPTEReqWidth);
2418 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2422 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2424 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2426 if (ViewportStationary && (NumberOfDPPs == 1)) {
2427 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2428 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2430 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2431 * *PixelPTEReqHeight;
2434 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2437 if (GPUVMEnable != true)
2438 *PixelPTEBytesPerRow = 0;
2439 if (HostVMEnable == true)
2440 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2442 #ifdef __DML_VBA_DEBUG__
2443 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2444 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2445 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2446 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2447 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2448 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2449 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2450 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2451 __func__, *dpte_row_width_ub_one_row_per_frame);
2452 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2453 __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2454 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2455 *MetaPTEBytesFrame);
2458 return PDEAndMetaPTEBytesFrame;
2459 } // CalculateVMAndRowBytes
2461 double dml32_CalculatePrefetchSourceLines(
2465 bool ProgressiveToInterlaceUnitInOPP,
2466 unsigned int SwathHeight,
2467 enum dm_rotation_angle SourceRotation,
2468 bool ViewportStationary,
2470 unsigned int ViewportHeight,
2471 unsigned int ViewportXStart,
2472 unsigned int ViewportYStart,
2475 double *VInitPreFill,
2476 unsigned int *MaxNumSwath)
2479 unsigned int vp_start_rot;
2480 unsigned int sw0_tmp;
2481 unsigned int MaxPartialSwath;
2484 #ifdef __DML_VBA_DEBUG__
2485 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2486 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2487 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2488 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2489 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2490 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2492 if (ProgressiveToInterlaceUnitInOPP)
2493 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2495 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2497 if (ViewportStationary) {
2498 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2499 vp_start_rot = SwathHeight -
2500 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2501 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2502 vp_start_rot = ViewportXStart;
2503 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2504 vp_start_rot = SwathHeight -
2505 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2507 vp_start_rot = ViewportYStart;
2509 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2510 if (sw0_tmp < *VInitPreFill)
2511 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2514 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2516 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2517 if (*VInitPreFill > 1)
2518 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2520 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2522 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2524 #ifdef __DML_VBA_DEBUG__
2525 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2526 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2527 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2528 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2529 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2533 } // CalculatePrefetchSourceLines
2535 void dml32_CalculateMALLUseForStaticScreen(
2536 unsigned int NumberOfActiveSurfaces,
2537 unsigned int MALLAllocatedForDCNFinal,
2538 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2539 unsigned int SurfaceSizeInMALL[],
2540 bool one_row_per_frame_fits_in_buffer[],
2543 bool UsesMALLForStaticScreen[])
2546 unsigned int SurfaceToAddToMALL;
2547 bool CanAddAnotherSurfaceToMALL;
2548 unsigned int TotalSurfaceSizeInMALL;
2550 TotalSurfaceSizeInMALL = 0;
2551 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2552 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2553 if (UsesMALLForStaticScreen[k])
2554 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2555 #ifdef __DML_VBA_DEBUG__
2556 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2557 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
2561 SurfaceToAddToMALL = 0;
2562 CanAddAnotherSurfaceToMALL = true;
2563 while (CanAddAnotherSurfaceToMALL) {
2564 CanAddAnotherSurfaceToMALL = false;
2565 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2566 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2567 !UsesMALLForStaticScreen[k] &&
2568 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2569 one_row_per_frame_fits_in_buffer[k] &&
2570 (!CanAddAnotherSurfaceToMALL ||
2571 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2572 CanAddAnotherSurfaceToMALL = true;
2573 SurfaceToAddToMALL = k;
2574 #ifdef __DML_VBA_DEBUG__
2575 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2576 __func__, k, UseMALLForStaticScreen[k]);
2580 if (CanAddAnotherSurfaceToMALL) {
2581 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2582 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2584 #ifdef __DML_VBA_DEBUG__
2585 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
2586 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
2593 void dml32_CalculateRowBandwidth(
2595 enum source_format_class SourcePixelFormat,
2597 double VRatioChroma,
2600 unsigned int MetaRowByteLuma,
2601 unsigned int MetaRowByteChroma,
2602 unsigned int meta_row_height_luma,
2603 unsigned int meta_row_height_chroma,
2604 unsigned int PixelPTEBytesPerRowLuma,
2605 unsigned int PixelPTEBytesPerRowChroma,
2606 unsigned int dpte_row_height_luma,
2607 unsigned int dpte_row_height_chroma,
2609 double *meta_row_bw,
2610 double *dpte_row_bw)
2612 if (DCCEnable != true) {
2614 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2615 SourcePixelFormat == dm_rgbe_alpha) {
2616 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2617 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2619 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2622 if (GPUVMEnable != true) {
2624 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2625 SourcePixelFormat == dm_rgbe_alpha) {
2626 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2627 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2629 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2633 double dml32_CalculateUrgentLatency(
2634 double UrgentLatencyPixelDataOnly,
2635 double UrgentLatencyPixelMixedWithVMData,
2636 double UrgentLatencyVMDataOnly,
2637 bool DoUrgentLatencyAdjustment,
2638 double UrgentLatencyAdjustmentFabricClockComponent,
2639 double UrgentLatencyAdjustmentFabricClockReference,
2644 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2645 if (DoUrgentLatencyAdjustment == true) {
2646 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2647 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2652 void dml32_CalculateUrgentBurstFactor(
2653 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2654 unsigned int swath_width_luma_ub,
2655 unsigned int swath_width_chroma_ub,
2656 unsigned int SwathHeightY,
2657 unsigned int SwathHeightC,
2659 double UrgentLatency,
2660 double CursorBufferSize,
2661 unsigned int CursorWidth,
2662 unsigned int CursorBPP,
2665 double BytePerPixelInDETY,
2666 double BytePerPixelInDETC,
2667 unsigned int DETBufferSizeY,
2668 unsigned int DETBufferSizeC,
2670 double *UrgentBurstFactorCursor,
2671 double *UrgentBurstFactorLuma,
2672 double *UrgentBurstFactorChroma,
2673 bool *NotEnoughUrgentLatencyHiding)
2675 double LinesInDETLuma;
2676 double LinesInDETChroma;
2677 unsigned int LinesInCursorBuffer;
2678 double CursorBufferSizeInTime;
2679 double DETBufferSizeInTimeLuma;
2680 double DETBufferSizeInTimeChroma;
2682 *NotEnoughUrgentLatencyHiding = 0;
2684 if (CursorWidth > 0) {
2685 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2686 (CursorWidth * CursorBPP / 8.0)), 1.0);
2688 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2689 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2690 *NotEnoughUrgentLatencyHiding = 1;
2691 *UrgentBurstFactorCursor = 0;
2693 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2694 (CursorBufferSizeInTime - UrgentLatency);
2697 *UrgentBurstFactorCursor = 1;
2701 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2702 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2705 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2706 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2707 *NotEnoughUrgentLatencyHiding = 1;
2708 *UrgentBurstFactorLuma = 0;
2710 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2713 *UrgentBurstFactorLuma = 1;
2716 if (BytePerPixelInDETC > 0) {
2717 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2718 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2719 / swath_width_chroma_ub;
2722 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2723 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2724 *NotEnoughUrgentLatencyHiding = 1;
2725 *UrgentBurstFactorChroma = 0;
2727 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2728 / (DETBufferSizeInTimeChroma - UrgentLatency);
2731 *UrgentBurstFactorChroma = 1;
2734 } // CalculateUrgentBurstFactor
2736 void dml32_CalculateDCFCLKDeepSleep(
2737 unsigned int NumberOfActiveSurfaces,
2738 unsigned int BytePerPixelY[],
2739 unsigned int BytePerPixelC[],
2741 double VRatioChroma[],
2742 double SwathWidthY[],
2743 double SwathWidthC[],
2744 unsigned int DPPPerSurface[],
2746 double HRatioChroma[],
2747 double PixelClock[],
2748 double PSCL_THROUGHPUT[],
2749 double PSCL_THROUGHPUT_CHROMA[],
2751 double ReadBandwidthLuma[],
2752 double ReadBandwidthChroma[],
2753 unsigned int ReturnBusWidth,
2756 double *DCFClkDeepSleep)
2759 double DisplayPipeLineDeliveryTimeLuma;
2760 double DisplayPipeLineDeliveryTimeChroma;
2761 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2762 double ReadBandwidth = 0.0;
2764 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2766 if (VRatio[k] <= 1) {
2767 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2770 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2772 if (BytePerPixelC[k] == 0) {
2773 DisplayPipeLineDeliveryTimeChroma = 0;
2775 if (VRatioChroma[k] <= 1) {
2776 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2777 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2779 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2784 if (BytePerPixelC[k] > 0) {
2785 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2786 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2787 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2788 32.0 / DisplayPipeLineDeliveryTimeChroma);
2790 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2791 64.0 / DisplayPipeLineDeliveryTimeLuma;
2793 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2795 #ifdef __DML_VBA_DEBUG__
2796 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2797 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2801 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2802 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2804 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2806 #ifdef __DML_VBA_DEBUG__
2807 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2808 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2809 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2810 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2813 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2814 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2815 #ifdef __DML_VBA_DEBUG__
2816 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2818 } // CalculateDCFCLKDeepSleep
2820 double dml32_CalculateWriteBackDelay(
2821 enum source_format_class WritebackPixelFormat,
2822 double WritebackHRatio,
2823 double WritebackVRatio,
2824 unsigned int WritebackVTaps,
2825 unsigned int WritebackDestinationWidth,
2826 unsigned int WritebackDestinationHeight,
2827 unsigned int WritebackSourceHeight,
2828 unsigned int HTotal)
2830 double CalculateWriteBackDelay;
2832 double Output_lines_last_notclamped;
2833 double WritebackVInit;
2835 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2836 Line_length = dml_max((double) WritebackDestinationWidth,
2837 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2838 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2839 dml_ceil(((double)WritebackSourceHeight -
2840 (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2841 if (Output_lines_last_notclamped < 0) {
2842 CalculateWriteBackDelay = 0;
2844 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2845 (HTotal - WritebackDestinationWidth) + 80;
2847 return CalculateWriteBackDelay;
2850 void dml32_UseMinimumDCFCLK(
2851 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2853 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2854 unsigned int MaxInterDCNTileRepeaters,
2855 unsigned int MaxPrefetchMode,
2856 double DRAMClockChangeLatencyFinal,
2857 double FCLKChangeLatency,
2858 double SREnterPlusExitTime,
2859 unsigned int ReturnBusWidth,
2860 unsigned int RoundTripPingLatencyCycles,
2861 unsigned int ReorderingBytes,
2862 unsigned int PixelChunkSizeInKByte,
2863 unsigned int MetaChunkSize,
2865 unsigned int GPUVMMaxPageTableLevels,
2867 unsigned int NumberOfActiveSurfaces,
2868 double HostVMMinPageSize,
2869 unsigned int HostVMMaxNonCachedPageTableLevels,
2870 bool DynamicMetadataVMEnabled,
2871 bool ImmediateFlipRequirement,
2872 bool ProgressiveToInterlaceUnitInOPP,
2873 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2874 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2875 unsigned int VTotal[],
2876 unsigned int VActive[],
2877 unsigned int DynamicMetadataTransmittedBytes[],
2878 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2880 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2881 double RequiredDISPCLK[][2],
2882 double UrgLatency[],
2883 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2884 double ProjectedDCFClkDeepSleep[][2],
2885 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2886 unsigned int TotalNumberOfActiveDPP[][2],
2887 unsigned int TotalNumberOfDCCActiveDPP[][2],
2888 unsigned int dpte_group_bytes[],
2889 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2890 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2891 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2892 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2893 unsigned int BytePerPixelY[],
2894 unsigned int BytePerPixelC[],
2895 unsigned int HTotal[],
2896 double PixelClock[],
2897 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2898 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2899 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2900 bool DynamicMetadataEnable[],
2901 double ReadBandwidthLuma[],
2902 double ReadBandwidthChroma[],
2903 double DCFCLKPerState[],
2905 double DCFCLKState[][2])
2907 unsigned int i, j, k;
2908 unsigned int dummy1;
2909 double dummy2, dummy3;
2910 double NormalEfficiency;
2911 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2913 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2914 for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2915 for (j = 0; j <= 1; ++j) {
2916 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2917 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2918 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2919 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2920 double MinimumTWait = 0.0;
2921 double DPTEBandwidth;
2922 double DCFCLKRequiredForAverageBandwidth;
2923 unsigned int ExtraLatencyBytes;
2924 double ExtraLatencyCycles;
2925 double DCFCLKRequiredForPeakBandwidth;
2926 unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2927 double MinimumTvmPlus2Tr0;
2929 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2930 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2931 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2932 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2933 / (15.75 * HTotal[k] / PixelClock[k]);
2936 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
2937 NoOfDPPState[k] = NoOfDPP[i][j][k];
2939 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
2940 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
2942 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
2943 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
2944 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
2945 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
2946 HostVMMaxNonCachedPageTableLevels);
2947 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
2948 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
2949 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2950 double DCFCLKCyclesRequiredInPrefetch;
2951 double PrefetchTime;
2953 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
2954 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
2955 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
2956 * BytePerPixelC[k]) / NormalEfficiency
2958 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
2959 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
2960 / NormalEfficiency / ReturnBusWidth
2961 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
2962 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
2964 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
2965 + PixelDCFCLKCyclesRequiredInPrefetch[k];
2966 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
2967 * HTotal[k] / PixelClock[k];
2968 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
2969 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
2970 UrgLatency[i] * GPUVMMaxPageTableLevels *
2971 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
2973 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
2974 UseMALLForPStateChange[k],
2975 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2977 DRAMClockChangeLatencyFinal,
2980 SREnterPlusExitTime);
2982 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
2983 MinimumTWait - UrgLatency[i] *
2984 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
2985 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
2986 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
2987 DynamicMetadataVMExtraLatency[k];
2989 if (PrefetchTime > 0) {
2990 double ExpectedVRatioPrefetch;
2992 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
2993 PixelDCFCLKCyclesRequiredInPrefetch[k] /
2994 DCFCLKCyclesRequiredInPrefetch);
2995 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
2996 PixelDCFCLKCyclesRequiredInPrefetch[k] /
2997 PrefetchPixelLinesTime[k] *
2998 dml_max(1.0, ExpectedVRatioPrefetch) *
2999 dml_max(1.0, ExpectedVRatioPrefetch / 4);
3000 if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3001 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3002 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3003 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3004 NormalEfficiency / ReturnBusWidth;
3007 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3009 if (DynamicMetadataEnable[k] == true) {
3014 double AllowedTimeForUrgentExtraLatency;
3016 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3017 MaxInterDCNTileRepeaters,
3018 RequiredDPPCLKPerSurface[i][j][k],
3019 RequiredDISPCLK[i][j],
3020 ProjectedDCFClkDeepSleep[i][j],
3023 VTotal[k] - VActive[k],
3024 DynamicMetadataTransmittedBytes[k],
3025 DynamicMetadataLinesBeforeActiveRequired[k],
3027 ProgressiveToInterlaceUnitInOPP,
3037 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3038 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3039 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3040 if (AllowedTimeForUrgentExtraLatency > 0)
3041 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3042 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3043 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3045 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3048 DCFCLKRequiredForPeakBandwidth = 0;
3049 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3050 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3051 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3053 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3054 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3055 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3056 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3057 double MaximumTvmPlus2Tr0PlusTsw;
3059 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3060 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3061 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3062 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3064 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3065 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3066 MinimumTvmPlus2Tr0 -
3067 PrefetchPixelLinesTime[k] / 4),
3068 (2 * ExtraLatencyCycles +
3069 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3070 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3073 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3074 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3079 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3080 unsigned int TotalNumberOfActiveDPP,
3081 unsigned int PixelChunkSizeInKByte,
3082 unsigned int TotalNumberOfDCCActiveDPP,
3083 unsigned int MetaChunkSize,
3086 unsigned int NumberOfActiveSurfaces,
3087 unsigned int NumberOfDPP[],
3088 unsigned int dpte_group_bytes[],
3089 double HostVMInefficiencyFactor,
3090 double HostVMMinPageSize,
3091 unsigned int HostVMMaxNonCachedPageTableLevels)
3095 unsigned int HostVMDynamicLevels;
3097 if (GPUVMEnable == true && HostVMEnable == true) {
3098 if (HostVMMinPageSize < 2048)
3099 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3100 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3101 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3103 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3105 HostVMDynamicLevels = 0;
3108 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3109 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3111 if (GPUVMEnable == true) {
3112 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3113 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3114 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3120 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3121 unsigned int MaxInterDCNTileRepeaters,
3124 double DCFClkDeepSleep,
3126 unsigned int HTotal,
3127 unsigned int VBlank,
3128 unsigned int DynamicMetadataTransmittedBytes,
3129 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3130 unsigned int InterlaceEnable,
3131 bool ProgressiveToInterlaceUnitInOPP,
3138 unsigned int *VUpdateOffsetPix,
3139 double *VUpdateWidthPix,
3140 double *VReadyOffsetPix)
3142 double TotalRepeaterDelayTime;
3144 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3146 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3147 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
3148 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3149 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3150 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3151 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3152 *Tdmec = HTotal / PixelClock;
3154 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3155 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3157 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3159 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3160 *Tdmsks = *Tdmsks / 2;
3161 #ifdef __DML_VBA_DEBUG__
3162 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3163 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3164 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3166 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3167 __func__, DynamicMetadataLinesBeforeActiveRequired);
3168 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3169 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3170 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3171 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3175 double dml32_CalculateTWait(
3176 unsigned int PrefetchMode,
3177 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3178 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3180 double DRAMClockChangeLatency,
3181 double FCLKChangeLatency,
3182 double UrgentLatency,
3183 double SREnterPlusExitTime)
3187 if (PrefetchMode == 0 &&
3188 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3189 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3190 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3191 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3192 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3193 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3194 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3195 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3196 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3198 TWait = UrgentLatency;
3201 #ifdef __DML_VBA_DEBUG__
3202 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3203 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3208 // Function: get_return_bw_mbps
3209 // Megabyte per second
3210 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3211 const int VoltageLevel,
3212 const bool HostVMEnable,
3213 const double DCFCLK,
3214 const double FabricClock,
3215 const double DRAMSpeed)
3217 double ReturnBW = 0.;
3218 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3219 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3220 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3221 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3222 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3223 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3224 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3225 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3226 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3227 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3228 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3230 if (HostVMEnable != true)
3231 ReturnBW = PixelDataOnlyReturnBW;
3233 ReturnBW = PixelMixedWithVMDataReturnBW;
3235 #ifdef __DML_VBA_DEBUG__
3236 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3237 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3238 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3239 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3240 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3241 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
3242 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
3243 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
3244 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
3245 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3246 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
3251 // Function: get_return_bw_mbps_vm_only
3252 // Megabyte per second
3253 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3254 const int VoltageLevel,
3255 const double DCFCLK,
3256 const double FabricClock,
3257 const double DRAMSpeed)
3259 double VMDataOnlyReturnBW = dml_min3(
3260 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3261 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3262 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3263 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3264 * (VoltageLevel < 2 ?
3265 soc->pct_ideal_dram_bw_after_urgent_strobe :
3266 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3267 #ifdef __DML_VBA_DEBUG__
3268 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3269 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3270 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3271 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3272 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3274 return VMDataOnlyReturnBW;
3277 double dml32_CalculateExtraLatency(
3278 unsigned int RoundTripPingLatencyCycles,
3279 unsigned int ReorderingBytes,
3281 unsigned int TotalNumberOfActiveDPP,
3282 unsigned int PixelChunkSizeInKByte,
3283 unsigned int TotalNumberOfDCCActiveDPP,
3284 unsigned int MetaChunkSize,
3288 unsigned int NumberOfActiveSurfaces,
3289 unsigned int NumberOfDPP[],
3290 unsigned int dpte_group_bytes[],
3291 double HostVMInefficiencyFactor,
3292 double HostVMMinPageSize,
3293 unsigned int HostVMMaxNonCachedPageTableLevels)
3295 double ExtraLatencyBytes;
3296 double ExtraLatency;
3298 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3300 TotalNumberOfActiveDPP,
3301 PixelChunkSizeInKByte,
3302 TotalNumberOfDCCActiveDPP,
3306 NumberOfActiveSurfaces,
3309 HostVMInefficiencyFactor,
3311 HostVMMaxNonCachedPageTableLevels);
3313 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3315 #ifdef __DML_VBA_DEBUG__
3316 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3317 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3318 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3319 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3320 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3323 return ExtraLatency;
3324 } // CalculateExtraLatency
3326 bool dml32_CalculatePrefetchSchedule(
3327 double HostVMInefficiencyFactor,
3329 unsigned int DSCDelay,
3330 double DPPCLKDelaySubtotalPlusCNVCFormater,
3331 double DPPCLKDelaySCL,
3332 double DPPCLKDelaySCLLBOnly,
3333 double DPPCLKDelayCNVCCursor,
3334 double DISPCLKDelaySubtotal,
3335 unsigned int DPP_RECOUT_WIDTH,
3336 enum output_format_class OutputFormat,
3337 unsigned int MaxInterDCNTileRepeaters,
3338 unsigned int VStartup,
3339 unsigned int MaxVStartup,
3340 unsigned int GPUVMPageTableLevels,
3343 unsigned int HostVMMaxNonCachedPageTableLevels,
3344 double HostVMMinPageSize,
3345 bool DynamicMetadataEnable,
3346 bool DynamicMetadataVMEnabled,
3347 int DynamicMetadataLinesBeforeActiveRequired,
3348 unsigned int DynamicMetadataTransmittedBytes,
3349 double UrgentLatency,
3350 double UrgentExtraLatency,
3352 unsigned int PDEAndMetaPTEBytesFrame,
3353 unsigned int MetaRowByte,
3354 unsigned int PixelPTEBytesPerRow,
3355 double PrefetchSourceLinesY,
3356 unsigned int SwathWidthY,
3357 unsigned int VInitPreFillY,
3358 unsigned int MaxNumSwathY,
3359 double PrefetchSourceLinesC,
3360 unsigned int SwathWidthC,
3361 unsigned int VInitPreFillC,
3362 unsigned int MaxNumSwathC,
3363 unsigned int swath_width_luma_ub,
3364 unsigned int swath_width_chroma_ub,
3365 unsigned int SwathHeightY,
3366 unsigned int SwathHeightC,
3369 double *DSTXAfterScaler,
3370 double *DSTYAfterScaler,
3371 double *DestinationLinesForPrefetch,
3372 double *PrefetchBandwidth,
3373 double *DestinationLinesToRequestVMInVBlank,
3374 double *DestinationLinesToRequestRowInVBlank,
3375 double *VRatioPrefetchY,
3376 double *VRatioPrefetchC,
3377 double *RequiredPrefetchPixDataBWLuma,
3378 double *RequiredPrefetchPixDataBWChroma,
3379 bool *NotEnoughTimeForDynamicMetadata,
3381 double *prefetch_vmrow_bw,
3385 unsigned int *VUpdateOffsetPix,
3386 double *VUpdateWidthPix,
3387 double *VReadyOffsetPix)
3389 bool MyError = false;
3390 unsigned int DPPCycles, DISPCLKCycles;
3391 double DSTTotalPixelsAfterScaler;
3393 double dst_y_prefetch_equ;
3394 double prefetch_bw_oto;
3397 double Tvm_oto_lines;
3398 double Tr0_oto_lines;
3399 double dst_y_prefetch_oto;
3400 double TimeForFetchingMetaPTE = 0;
3401 double TimeForFetchingRowInVBlank = 0;
3402 double LinesToRequestPrefetchPixelData = 0;
3403 unsigned int HostVMDynamicLevelsTrips;
3407 double Tvm_trips_rounded;
3408 double Tr0_trips_rounded;
3410 double Tpre_rounded;
3411 double prefetch_bw_equ;
3417 double prefetch_sw_bytes;
3420 unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3422 double Tsw_est1 = 0;
3423 double Tsw_est3 = 0;
3425 if (GPUVMEnable == true && HostVMEnable == true)
3426 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3428 HostVMDynamicLevelsTrips = 0;
3429 #ifdef __DML_VBA_DEBUG__
3430 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
3431 dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
3432 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3433 dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3434 __func__, HostVMEnable, HostVMInefficiencyFactor);
3436 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3437 MaxInterDCNTileRepeaters,
3440 myPipe->DCFClkDeepSleep,
3444 DynamicMetadataTransmittedBytes,
3445 DynamicMetadataLinesBeforeActiveRequired,
3446 myPipe->InterlaceEnable,
3447 myPipe->ProgressiveToInterlaceUnitInOPP,
3458 LineTime = myPipe->HTotal / myPipe->PixelClock;
3459 trip_to_mem = UrgentLatency;
3460 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3462 if (DynamicMetadataVMEnabled == true)
3463 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3465 *Tdmdl = TWait + UrgentExtraLatency;
3467 #ifdef __DML_VBA_ALLOW_DELTA__
3468 if (DynamicMetadataEnable == false)
3472 if (DynamicMetadataEnable == true) {
3473 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3474 *NotEnoughTimeForDynamicMetadata = true;
3475 #ifdef __DML_VBA_DEBUG__
3476 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3477 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3479 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3480 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3482 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3486 *NotEnoughTimeForDynamicMetadata = false;
3489 *NotEnoughTimeForDynamicMetadata = false;
3492 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
3493 GPUVMEnable == true ? TWait + Tvm_trips : 0);
3495 if (myPipe->ScalerEnabled)
3496 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
3498 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
3500 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
3502 DISPCLKCycles = DISPCLKDelaySubtotal;
3504 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3507 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3508 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3510 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3511 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3512 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3513 myPipe->HActive / 2 : 0)
3514 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3516 #ifdef __DML_VBA_DEBUG__
3517 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3518 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3519 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3520 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3521 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
3522 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
3523 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
3524 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3525 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
3528 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3529 *DSTYAfterScaler = 1;
3531 *DSTYAfterScaler = 0;
3533 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3534 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3535 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3536 #ifdef __DML_VBA_DEBUG__
3537 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
3538 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3543 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3545 if (GPUVMEnable == true) {
3546 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3547 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3548 if (GPUVMPageTableLevels >= 3) {
3549 *Tno_bw = UrgentExtraLatency + trip_to_mem *
3550 (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3551 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
3552 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3553 4.0 * LineTime; // VBA_ERROR
3554 *Tno_bw = UrgentExtraLatency;
3558 } else if (myPipe->DCCEnable == true) {
3559 Tvm_trips_rounded = LineTime / 4.0;
3560 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3563 Tvm_trips_rounded = LineTime / 4.0;
3564 Tr0_trips_rounded = LineTime / 2.0;
3567 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3568 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3570 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3571 || myPipe->SourcePixelFormat == dm_420_12) {
3572 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3574 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3577 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3578 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3579 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3580 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3582 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3583 min_Lsw = dml_max(min_Lsw, 1.0);
3584 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3586 if (GPUVMEnable == true) {
3589 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3592 Tvm_oto = LineTime / 4.0;
3594 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3597 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3598 (LineTime - Tvm_oto)/2.0,
3600 #ifdef __DML_VBA_DEBUG__
3601 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3602 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3603 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3604 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3605 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3608 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3610 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3611 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3612 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3614 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3615 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3617 #ifdef __DML_VBA_DEBUG__
3618 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3619 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3620 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3621 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3622 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3623 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3624 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3625 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3626 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3627 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3628 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3629 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3630 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3631 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3632 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3633 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3634 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3635 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3636 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3637 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3638 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3639 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3640 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3641 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3642 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3643 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3644 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3647 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3648 Tpre_rounded = dst_y_prefetch_equ * LineTime;
3649 #ifdef __DML_VBA_DEBUG__
3650 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3651 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3652 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3653 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3654 __func__, VStartup * LineTime);
3655 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3656 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3657 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3658 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3659 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3660 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3661 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3662 __func__, *DSTYAfterScaler);
3664 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3665 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3667 if (prefetch_sw_bytes < dep_bytes)
3668 prefetch_sw_bytes = 2 * dep_bytes;
3670 *PrefetchBandwidth = 0;
3671 *DestinationLinesToRequestVMInVBlank = 0;
3672 *DestinationLinesToRequestRowInVBlank = 0;
3673 *VRatioPrefetchY = 0;
3674 *VRatioPrefetchC = 0;
3675 *RequiredPrefetchPixDataBWLuma = 0;
3676 if (dst_y_prefetch_equ > 1) {
3677 double PrefetchBandwidth1;
3678 double PrefetchBandwidth2;
3679 double PrefetchBandwidth3;
3680 double PrefetchBandwidth4;
3682 if (Tpre_rounded - *Tno_bw > 0) {
3683 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3684 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3685 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3686 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3688 PrefetchBandwidth1 = 0;
3690 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3691 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3692 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3693 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3694 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3697 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3698 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3699 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3701 PrefetchBandwidth2 = 0;
3703 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3704 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3705 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3706 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3708 PrefetchBandwidth3 = 0;
3711 if (VStartup == MaxVStartup &&
3712 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3713 LineTime - Tvm_trips_rounded > 0) {
3714 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3715 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3718 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3719 PrefetchBandwidth4 = prefetch_sw_bytes /
3720 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3722 PrefetchBandwidth4 = 0;
3725 #ifdef __DML_VBA_DEBUG__
3726 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3727 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3728 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3729 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3730 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3731 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3732 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3733 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3734 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3741 if (PrefetchBandwidth1 > 0) {
3742 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3743 >= Tvm_trips_rounded
3744 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3745 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3754 if (PrefetchBandwidth2 > 0) {
3755 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3756 >= Tvm_trips_rounded
3757 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3758 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3767 if (PrefetchBandwidth3 > 0) {
3768 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3769 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3770 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3771 Tr0_trips_rounded) {
3781 prefetch_bw_equ = PrefetchBandwidth1;
3783 prefetch_bw_equ = PrefetchBandwidth2;
3785 prefetch_bw_equ = PrefetchBandwidth3;
3787 prefetch_bw_equ = PrefetchBandwidth4;
3789 #ifdef __DML_VBA_DEBUG__
3790 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3791 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3792 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3793 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3796 if (prefetch_bw_equ > 0) {
3797 if (GPUVMEnable == true) {
3798 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3799 HostVMInefficiencyFactor / prefetch_bw_equ,
3800 Tvm_trips, LineTime / 4);
3802 Tvm_equ = LineTime / 4;
3805 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3806 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3807 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3808 (LineTime - Tvm_equ) / 2, LineTime / 4);
3810 Tr0_equ = (LineTime - Tvm_equ) / 2;
3815 #ifdef __DML_VBA_DEBUG__
3816 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3821 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3822 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3823 TimeForFetchingMetaPTE = Tvm_oto;
3824 TimeForFetchingRowInVBlank = Tr0_oto;
3825 *PrefetchBandwidth = prefetch_bw_oto;
3827 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3828 TimeForFetchingMetaPTE = Tvm_equ;
3829 TimeForFetchingRowInVBlank = Tr0_equ;
3830 *PrefetchBandwidth = prefetch_bw_equ;
3833 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3835 *DestinationLinesToRequestRowInVBlank =
3836 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3838 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3839 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3841 #ifdef __DML_VBA_DEBUG__
3842 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3843 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3844 __func__, *DestinationLinesToRequestVMInVBlank);
3845 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3846 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3847 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3848 __func__, *DestinationLinesToRequestRowInVBlank);
3849 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3850 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3853 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3854 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3855 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3856 #ifdef __DML_VBA_DEBUG__
3857 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3858 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3859 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3861 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3862 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3864 dml_max((double) PrefetchSourceLinesY /
3865 LinesToRequestPrefetchPixelData,
3866 (double) MaxNumSwathY * SwathHeightY /
3867 (LinesToRequestPrefetchPixelData -
3868 (VInitPreFillY - 3.0) / 2.0));
3869 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3872 *VRatioPrefetchY = 0;
3874 #ifdef __DML_VBA_DEBUG__
3875 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3876 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3877 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3881 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3882 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3884 #ifdef __DML_VBA_DEBUG__
3885 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3886 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3887 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3889 if ((SwathHeightC > 4)) {
3890 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3892 dml_max(*VRatioPrefetchC,
3893 (double) MaxNumSwathC * SwathHeightC /
3894 (LinesToRequestPrefetchPixelData -
3895 (VInitPreFillC - 3.0) / 2.0));
3896 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3899 *VRatioPrefetchC = 0;
3901 #ifdef __DML_VBA_DEBUG__
3902 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3903 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3904 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3908 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3909 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3912 #ifdef __DML_VBA_DEBUG__
3913 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3914 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3915 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3916 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3917 __func__, *RequiredPrefetchPixDataBWLuma);
3919 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3920 LinesToRequestPrefetchPixelData
3921 * myPipe->BytePerPixelC
3922 * swath_width_chroma_ub / LineTime;
3925 #ifdef __DML_VBA_DEBUG__
3926 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3927 __func__, LinesToRequestPrefetchPixelData);
3929 *VRatioPrefetchY = 0;
3930 *VRatioPrefetchC = 0;
3931 *RequiredPrefetchPixDataBWLuma = 0;
3932 *RequiredPrefetchPixDataBWChroma = 0;
3934 #ifdef __DML_VBA_DEBUG__
3935 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3936 (double)LinesToRequestPrefetchPixelData * LineTime +
3937 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3938 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3939 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3940 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
3941 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3942 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
3943 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3944 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
3945 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3946 PixelPTEBytesPerRow);
3950 #ifdef __DML_VBA_DEBUG__
3951 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
3952 __func__, dst_y_prefetch_equ);
3957 double prefetch_vm_bw;
3958 double prefetch_row_bw;
3960 if (PDEAndMetaPTEBytesFrame == 0) {
3962 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
3963 #ifdef __DML_VBA_DEBUG__
3964 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3965 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3966 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3967 __func__, *DestinationLinesToRequestVMInVBlank);
3968 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3970 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
3971 (*DestinationLinesToRequestVMInVBlank * LineTime);
3972 #ifdef __DML_VBA_DEBUG__
3973 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
3978 #ifdef __DML_VBA_DEBUG__
3979 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
3980 __func__, *DestinationLinesToRequestVMInVBlank);
3984 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
3985 prefetch_row_bw = 0;
3986 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
3987 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
3988 (*DestinationLinesToRequestRowInVBlank * LineTime);
3990 #ifdef __DML_VBA_DEBUG__
3991 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3992 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3993 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3994 __func__, *DestinationLinesToRequestRowInVBlank);
3995 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
3998 prefetch_row_bw = 0;
4000 #ifdef __DML_VBA_DEBUG__
4001 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4002 __func__, *DestinationLinesToRequestRowInVBlank);
4006 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4010 *PrefetchBandwidth = 0;
4011 TimeForFetchingMetaPTE = 0;
4012 TimeForFetchingRowInVBlank = 0;
4013 *DestinationLinesToRequestVMInVBlank = 0;
4014 *DestinationLinesToRequestRowInVBlank = 0;
4015 *DestinationLinesForPrefetch = 0;
4016 LinesToRequestPrefetchPixelData = 0;
4017 *VRatioPrefetchY = 0;
4018 *VRatioPrefetchC = 0;
4019 *RequiredPrefetchPixDataBWLuma = 0;
4020 *RequiredPrefetchPixDataBWChroma = 0;
4024 } // CalculatePrefetchSchedule
4026 void dml32_CalculateFlipSchedule(
4027 double HostVMInefficiencyFactor,
4028 double UrgentExtraLatency,
4029 double UrgentLatency,
4030 unsigned int GPUVMMaxPageTableLevels,
4032 unsigned int HostVMMaxNonCachedPageTableLevels,
4034 double HostVMMinPageSize,
4035 double PDEAndMetaPTEBytesPerFrame,
4036 double MetaRowBytes,
4037 double DPTEBytesPerRow,
4038 double BandwidthAvailableForImmediateFlip,
4039 unsigned int TotImmediateFlipBytes,
4040 enum source_format_class SourcePixelFormat,
4043 double VRatioChroma,
4046 unsigned int dpte_row_height,
4047 unsigned int meta_row_height,
4048 unsigned int dpte_row_height_chroma,
4049 unsigned int meta_row_height_chroma,
4050 bool use_one_row_for_frame_flip,
4053 double *DestinationLinesToRequestVMInImmediateFlip,
4054 double *DestinationLinesToRequestRowInImmediateFlip,
4055 double *final_flip_bw,
4056 bool *ImmediateFlipSupportedForPipe)
4058 double min_row_time = 0.0;
4059 unsigned int HostVMDynamicLevelsTrips;
4060 double TimeForFetchingMetaPTEImmediateFlip;
4061 double TimeForFetchingRowInVBlankImmediateFlip;
4062 double ImmediateFlipBW;
4064 if (GPUVMEnable == true && HostVMEnable == true)
4065 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4067 HostVMDynamicLevelsTrips = 0;
4069 #ifdef __DML_VBA_DEBUG__
4070 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4071 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4074 if (TotImmediateFlipBytes > 0) {
4075 if (use_one_row_for_frame_flip) {
4076 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4077 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4079 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4080 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4082 if (GPUVMEnable == true) {
4083 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4084 HostVMInefficiencyFactor / ImmediateFlipBW,
4085 UrgentExtraLatency + UrgentLatency *
4086 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4089 TimeForFetchingMetaPTEImmediateFlip = 0;
4091 if ((GPUVMEnable == true || DCCEnable == true)) {
4092 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4093 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4094 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4096 TimeForFetchingRowInVBlankImmediateFlip = 0;
4099 *DestinationLinesToRequestVMInImmediateFlip =
4100 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4101 *DestinationLinesToRequestRowInImmediateFlip =
4102 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4104 if (GPUVMEnable == true) {
4105 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4106 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4107 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4108 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4109 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4110 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4111 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4116 TimeForFetchingMetaPTEImmediateFlip = 0;
4117 TimeForFetchingRowInVBlankImmediateFlip = 0;
4118 *DestinationLinesToRequestVMInImmediateFlip = 0;
4119 *DestinationLinesToRequestRowInImmediateFlip = 0;
4123 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4124 if (GPUVMEnable == true && DCCEnable != true) {
4125 min_row_time = dml_min(dpte_row_height *
4126 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4127 } else if (GPUVMEnable != true && DCCEnable == true) {
4128 min_row_time = dml_min(meta_row_height *
4129 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4131 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4132 LineTime / VRatio, dpte_row_height_chroma * LineTime /
4133 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4136 if (GPUVMEnable == true && DCCEnable != true) {
4137 min_row_time = dpte_row_height * LineTime / VRatio;
4138 } else if (GPUVMEnable != true && DCCEnable == true) {
4139 min_row_time = meta_row_height * LineTime / VRatio;
4142 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4146 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4147 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4149 *ImmediateFlipSupportedForPipe = false;
4151 *ImmediateFlipSupportedForPipe = true;
4154 #ifdef __DML_VBA_DEBUG__
4155 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4156 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4157 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4158 __func__, *DestinationLinesToRequestVMInImmediateFlip);
4159 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4160 __func__, *DestinationLinesToRequestRowInImmediateFlip);
4161 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4162 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4163 __func__, TimeForFetchingRowInVBlankImmediateFlip);
4164 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4165 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4167 } // CalculateFlipSchedule
4169 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4170 bool USRRetrainingRequiredFinal,
4171 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4172 unsigned int PrefetchMode,
4173 unsigned int NumberOfActiveSurfaces,
4174 unsigned int MaxLineBufferLines,
4175 unsigned int LineBufferSize,
4176 unsigned int WritebackInterfaceBufferSize,
4179 bool SynchronizeTimingsFinal,
4180 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4182 unsigned int dpte_group_bytes[],
4183 unsigned int meta_row_height[],
4184 unsigned int meta_row_height_chroma[],
4185 SOCParametersList mmSOCParameters,
4186 unsigned int WritebackChunkSize,
4188 double DCFClkDeepSleep,
4189 unsigned int DETBufferSizeY[],
4190 unsigned int DETBufferSizeC[],
4191 unsigned int SwathHeightY[],
4192 unsigned int SwathHeightC[],
4193 unsigned int LBBitPerPixel[],
4194 double SwathWidthY[],
4195 double SwathWidthC[],
4197 double HRatioChroma[],
4198 unsigned int VTaps[],
4199 unsigned int VTapsChroma[],
4201 double VRatioChroma[],
4202 unsigned int HTotal[],
4203 unsigned int VTotal[],
4204 unsigned int VActive[],
4205 double PixelClock[],
4206 unsigned int BlendingAndTiming[],
4207 unsigned int DPPPerSurface[],
4208 double BytePerPixelDETY[],
4209 double BytePerPixelDETC[],
4210 double DSTXAfterScaler[],
4211 double DSTYAfterScaler[],
4212 bool WritebackEnable[],
4213 enum source_format_class WritebackPixelFormat[],
4214 double WritebackDestinationWidth[],
4215 double WritebackDestinationHeight[],
4216 double WritebackSourceHeight[],
4217 bool UnboundedRequestEnabled,
4218 unsigned int CompressedBufferSizeInkByte,
4221 Watermarks *Watermark,
4222 enum clock_change_support *DRAMClockChangeSupport,
4223 double MaxActiveDRAMClockChangeLatencySupported[],
4224 unsigned int SubViewportLinesNeededInMALL[],
4225 enum dm_fclock_change_support *FCLKChangeSupport,
4226 double *MinActiveFCLKChangeLatencySupported,
4227 bool *USRRetrainingSupport,
4228 double ActiveDRAMClockChangeLatencyMargin[])
4230 unsigned int i, j, k;
4231 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4232 unsigned int DRAMClockChangeSupportNumber = 0;
4233 unsigned int LastSurfaceWithoutMargin;
4234 unsigned int DRAMClockChangeMethod = 0;
4235 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4236 double MinActiveFCLKChangeMargin = 0.;
4237 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4238 double ActiveClockChangeLatencyHidingY;
4239 double ActiveClockChangeLatencyHidingC;
4240 double ActiveClockChangeLatencyHiding;
4241 double EffectiveDETBufferSizeY;
4242 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4243 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4244 double TotalPixelBW = 0.0;
4245 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4246 double EffectiveLBLatencyHidingY;
4247 double EffectiveLBLatencyHidingC;
4248 double LinesInDETY[DC__NUM_DPP__MAX];
4249 double LinesInDETC[DC__NUM_DPP__MAX];
4250 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4251 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4252 double FullDETBufferingTimeY;
4253 double FullDETBufferingTimeC;
4254 double WritebackDRAMClockChangeLatencyMargin;
4255 double WritebackFCLKChangeLatencyMargin;
4256 double WritebackLatencyHiding;
4257 bool SameTimingForFCLKChange;
4259 unsigned int TotalActiveWriteback = 0;
4260 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4261 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4263 Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4264 Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4265 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4266 Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
4267 Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
4268 Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4269 + 10 / DCFClkDeepSleep;
4270 Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4271 + 10 / DCFClkDeepSleep;
4272 Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4273 + 10 / DCFClkDeepSleep;
4274 Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4275 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4277 #ifdef __DML_VBA_DEBUG__
4278 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4279 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4280 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4281 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
4282 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
4283 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
4284 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
4285 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
4286 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
4287 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
4288 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4289 __func__, Watermark->Z8StutterEnterPlusExitWatermark);
4293 TotalActiveWriteback = 0;
4294 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4295 if (WritebackEnable[k] == true)
4296 TotalActiveWriteback = TotalActiveWriteback + 1;
4299 if (TotalActiveWriteback <= 1) {
4300 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4302 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4303 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4305 if (USRRetrainingRequiredFinal)
4306 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
4307 + mmSOCParameters.USRRetrainingLatency;
4309 if (TotalActiveWriteback <= 1) {
4310 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4311 + mmSOCParameters.WritebackLatency;
4312 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4313 + mmSOCParameters.WritebackLatency;
4315 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4316 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4317 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4318 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
4321 if (USRRetrainingRequiredFinal)
4322 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
4323 + mmSOCParameters.USRRetrainingLatency;
4325 if (USRRetrainingRequiredFinal)
4326 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
4327 + mmSOCParameters.USRRetrainingLatency;
4329 #ifdef __DML_VBA_DEBUG__
4330 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4331 __func__, Watermark->WritebackDRAMClockChangeWatermark);
4332 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
4333 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
4334 dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
4335 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4338 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4339 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
4340 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
4343 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4345 LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
4346 LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
4349 #ifdef __DML_VBA_DEBUG__
4350 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
4351 dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize);
4352 dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]);
4353 dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]);
4354 dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]);
4357 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
4358 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
4359 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4361 if (UnboundedRequestEnabled) {
4362 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4363 + CompressedBufferSizeInkByte * 1024
4364 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
4365 / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
4368 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4369 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4370 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
4372 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4373 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
4375 if (NumberOfActiveSurfaces > 1) {
4376 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4377 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
4378 / PixelClock[k] / VRatio[k];
4381 if (BytePerPixelDETC[k] > 0) {
4382 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4383 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4384 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
4386 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4387 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
4389 if (NumberOfActiveSurfaces > 1) {
4390 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4391 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
4392 / PixelClock[k] / VRatioChroma[k];
4394 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4395 ActiveClockChangeLatencyHidingC);
4397 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4400 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4401 - Watermark->DRAMClockChangeWatermark;
4402 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4403 - Watermark->FCLKChangeWatermark;
4404 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
4406 if (WritebackEnable[k]) {
4407 WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
4408 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
4409 / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
4410 if (WritebackPixelFormat[k] == dm_444_64)
4411 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4413 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4414 - Watermark->WritebackDRAMClockChangeWatermark;
4416 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4417 - Watermark->WritebackFCLKChangeWatermark;
4419 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4420 WritebackFCLKChangeLatencyMargin);
4421 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4422 WritebackDRAMClockChangeLatencyMargin);
4424 MaxActiveDRAMClockChangeLatencySupported[k] =
4425 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4427 (ActiveDRAMClockChangeLatencyMargin[k]
4428 + mmSOCParameters.DRAMClockChangeLatency);
4431 for (i = 0; i < NumberOfActiveSurfaces; ++i) {
4432 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
4434 (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
4435 (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
4436 (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
4437 (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
4438 HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
4439 VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4440 (DRRDisplay[i] || DRRDisplay[j]))) {
4441 SynchronizedSurfaces[i][j] = true;
4443 SynchronizedSurfaces[i][j] = false;
4448 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4449 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4450 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4451 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4452 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4453 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4454 SurfaceWithMinActiveFCLKChangeMargin = k;
4458 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4460 SameTimingForFCLKChange = true;
4461 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4462 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4463 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4464 (SameTimingForFCLKChange ||
4465 ActiveFCLKChangeLatencyMargin[k] <
4466 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4467 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4469 SameTimingForFCLKChange = false;
4473 if (MinActiveFCLKChangeMargin > 0) {
4474 *FCLKChangeSupport = dm_fclock_change_vactive;
4475 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4476 (PrefetchMode <= 1)) {
4477 *FCLKChangeSupport = dm_fclock_change_vblank;
4479 *FCLKChangeSupport = dm_fclock_change_unsupported;
4482 *USRRetrainingSupport = true;
4483 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4484 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4485 (USRRetrainingLatencyMargin[k] < 0)) {
4486 *USRRetrainingSupport = false;
4490 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4491 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4492 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4493 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4494 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4495 if (PrefetchMode > 0) {
4496 DRAMClockChangeSupportNumber = 2;
4497 } else if (DRAMClockChangeSupportNumber == 0) {
4498 DRAMClockChangeSupportNumber = 1;
4499 LastSurfaceWithoutMargin = k;
4500 } else if (DRAMClockChangeSupportNumber == 1 &&
4501 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4502 DRAMClockChangeSupportNumber = 2;
4507 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4508 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4509 DRAMClockChangeMethod = 1;
4510 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4511 DRAMClockChangeMethod = 2;
4514 if (DRAMClockChangeMethod == 0) {
4515 if (DRAMClockChangeSupportNumber == 0)
4516 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4517 else if (DRAMClockChangeSupportNumber == 1)
4518 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4520 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4521 } else if (DRAMClockChangeMethod == 1) {
4522 if (DRAMClockChangeSupportNumber == 0)
4523 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4524 else if (DRAMClockChangeSupportNumber == 1)
4525 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4527 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4529 if (DRAMClockChangeSupportNumber == 0)
4530 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4531 else if (DRAMClockChangeSupportNumber == 1)
4532 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4534 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4537 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4538 unsigned int dst_y_pstate;
4539 unsigned int src_y_pstate_l;
4540 unsigned int src_y_pstate_c;
4541 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4543 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
4544 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
4545 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4546 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
4548 #ifdef __DML_VBA_DEBUG__
4549 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
4550 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
4551 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
4552 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
4553 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4554 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
4555 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
4556 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
4557 dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]);
4558 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
4560 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4562 if (BytePerPixelDETC[k] > 0) {
4563 src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
4564 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4565 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
4566 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4568 #ifdef __DML_VBA_DEBUG__
4569 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
4570 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
4571 dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]);
4572 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
4576 #ifdef __DML_VBA_DEBUG__
4577 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4578 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4579 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4580 __func__, *MinActiveFCLKChangeLatencySupported);
4581 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4583 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4585 double dml32_CalculateWriteBackDISPCLK(
4586 enum source_format_class WritebackPixelFormat,
4588 double WritebackHRatio,
4589 double WritebackVRatio,
4590 unsigned int WritebackHTaps,
4591 unsigned int WritebackVTaps,
4592 unsigned int WritebackSourceWidth,
4593 unsigned int WritebackDestinationWidth,
4594 unsigned int HTotal,
4595 unsigned int WritebackLineBufferSize,
4596 double DISPCLKDPPCLKVCOSpeed)
4598 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4600 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4601 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4602 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4603 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4604 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4607 void dml32_CalculateMinAndMaxPrefetchMode(
4608 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4609 unsigned int *MinPrefetchMode,
4610 unsigned int *MaxPrefetchMode)
4612 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4613 *MinPrefetchMode = 3;
4614 *MaxPrefetchMode = 3;
4615 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4616 *MinPrefetchMode = 2;
4617 *MaxPrefetchMode = 2;
4618 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4619 *MinPrefetchMode = 1;
4620 *MaxPrefetchMode = 1;
4621 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4622 *MinPrefetchMode = 0;
4623 *MaxPrefetchMode = 0;
4624 } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4625 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4626 *MinPrefetchMode = 0;
4627 *MaxPrefetchMode = 3;
4629 *MinPrefetchMode = 0;
4630 *MaxPrefetchMode = 3;
4632 } // CalculateMinAndMaxPrefetchMode
4634 void dml32_CalculatePixelDeliveryTimes(
4635 unsigned int NumberOfActiveSurfaces,
4637 double VRatioChroma[],
4638 double VRatioPrefetchY[],
4639 double VRatioPrefetchC[],
4640 unsigned int swath_width_luma_ub[],
4641 unsigned int swath_width_chroma_ub[],
4642 unsigned int DPPPerSurface[],
4644 double HRatioChroma[],
4645 double PixelClock[],
4646 double PSCL_THROUGHPUT[],
4647 double PSCL_THROUGHPUT_CHROMA[],
4649 unsigned int BytePerPixelC[],
4650 enum dm_rotation_angle SourceRotation[],
4651 unsigned int NumberOfCursors[],
4652 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
4653 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
4654 unsigned int BlockWidth256BytesY[],
4655 unsigned int BlockHeight256BytesY[],
4656 unsigned int BlockWidth256BytesC[],
4657 unsigned int BlockHeight256BytesC[],
4660 double DisplayPipeLineDeliveryTimeLuma[],
4661 double DisplayPipeLineDeliveryTimeChroma[],
4662 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4663 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4664 double DisplayPipeRequestDeliveryTimeLuma[],
4665 double DisplayPipeRequestDeliveryTimeChroma[],
4666 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4667 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4668 double CursorRequestDeliveryTime[],
4669 double CursorRequestDeliveryTimePrefetch[])
4671 double req_per_swath_ub;
4674 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4676 #ifdef __DML_VBA_DEBUG__
4677 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4678 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4679 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4680 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4681 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4682 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4683 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4684 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4685 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4686 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4687 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4690 if (VRatio[k] <= 1) {
4691 DisplayPipeLineDeliveryTimeLuma[k] =
4692 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4694 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4697 if (BytePerPixelC[k] == 0) {
4698 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4700 if (VRatioChroma[k] <= 1) {
4701 DisplayPipeLineDeliveryTimeChroma[k] =
4702 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4704 DisplayPipeLineDeliveryTimeChroma[k] =
4705 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4709 if (VRatioPrefetchY[k] <= 1) {
4710 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4711 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4713 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4714 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4717 if (BytePerPixelC[k] == 0) {
4718 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4720 if (VRatioPrefetchC[k] <= 1) {
4721 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4722 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4724 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4725 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4728 #ifdef __DML_VBA_DEBUG__
4729 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4730 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4731 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4732 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4733 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4734 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4735 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4736 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4740 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4741 if (!IsVertical(SourceRotation[k]))
4742 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4744 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4745 #ifdef __DML_VBA_DEBUG__
4746 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4749 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4750 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4751 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4752 if (BytePerPixelC[k] == 0) {
4753 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4754 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4756 if (!IsVertical(SourceRotation[k]))
4757 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4759 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4760 #ifdef __DML_VBA_DEBUG__
4761 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4763 DisplayPipeRequestDeliveryTimeChroma[k] =
4764 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4765 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4766 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4768 #ifdef __DML_VBA_DEBUG__
4769 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4770 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4771 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4772 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4773 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4774 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4775 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4776 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4780 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4781 unsigned int cursor_req_per_width;
4783 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4785 if (NumberOfCursors[k] > 0) {
4786 if (VRatio[k] <= 1) {
4787 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4788 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4790 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4791 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4793 if (VRatioPrefetchY[k] <= 1) {
4794 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4795 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4797 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4798 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4801 CursorRequestDeliveryTime[k] = 0;
4802 CursorRequestDeliveryTimePrefetch[k] = 0;
4804 #ifdef __DML_VBA_DEBUG__
4805 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4806 __func__, k, NumberOfCursors[k]);
4807 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4808 __func__, k, CursorRequestDeliveryTime[k]);
4809 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4810 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4813 } // CalculatePixelDeliveryTimes
4815 void dml32_CalculateMetaAndPTETimes(
4816 bool use_one_row_for_frame[],
4817 unsigned int NumberOfActiveSurfaces,
4819 unsigned int MetaChunkSize,
4820 unsigned int MinMetaChunkSizeBytes,
4821 unsigned int HTotal[],
4823 double VRatioChroma[],
4824 double DestinationLinesToRequestRowInVBlank[],
4825 double DestinationLinesToRequestRowInImmediateFlip[],
4827 double PixelClock[],
4828 unsigned int BytePerPixelY[],
4829 unsigned int BytePerPixelC[],
4830 enum dm_rotation_angle SourceRotation[],
4831 unsigned int dpte_row_height[],
4832 unsigned int dpte_row_height_chroma[],
4833 unsigned int meta_row_width[],
4834 unsigned int meta_row_width_chroma[],
4835 unsigned int meta_row_height[],
4836 unsigned int meta_row_height_chroma[],
4837 unsigned int meta_req_width[],
4838 unsigned int meta_req_width_chroma[],
4839 unsigned int meta_req_height[],
4840 unsigned int meta_req_height_chroma[],
4841 unsigned int dpte_group_bytes[],
4842 unsigned int PTERequestSizeY[],
4843 unsigned int PTERequestSizeC[],
4844 unsigned int PixelPTEReqWidthY[],
4845 unsigned int PixelPTEReqHeightY[],
4846 unsigned int PixelPTEReqWidthC[],
4847 unsigned int PixelPTEReqHeightC[],
4848 unsigned int dpte_row_width_luma_ub[],
4849 unsigned int dpte_row_width_chroma_ub[],
4852 double DST_Y_PER_PTE_ROW_NOM_L[],
4853 double DST_Y_PER_PTE_ROW_NOM_C[],
4854 double DST_Y_PER_META_ROW_NOM_L[],
4855 double DST_Y_PER_META_ROW_NOM_C[],
4856 double TimePerMetaChunkNominal[],
4857 double TimePerChromaMetaChunkNominal[],
4858 double TimePerMetaChunkVBlank[],
4859 double TimePerChromaMetaChunkVBlank[],
4860 double TimePerMetaChunkFlip[],
4861 double TimePerChromaMetaChunkFlip[],
4862 double time_per_pte_group_nom_luma[],
4863 double time_per_pte_group_vblank_luma[],
4864 double time_per_pte_group_flip_luma[],
4865 double time_per_pte_group_nom_chroma[],
4866 double time_per_pte_group_vblank_chroma[],
4867 double time_per_pte_group_flip_chroma[])
4869 unsigned int meta_chunk_width;
4870 unsigned int min_meta_chunk_width;
4871 unsigned int meta_chunk_per_row_int;
4872 unsigned int meta_row_remainder;
4873 unsigned int meta_chunk_threshold;
4874 unsigned int meta_chunks_per_row_ub;
4875 unsigned int meta_chunk_width_chroma;
4876 unsigned int min_meta_chunk_width_chroma;
4877 unsigned int meta_chunk_per_row_int_chroma;
4878 unsigned int meta_row_remainder_chroma;
4879 unsigned int meta_chunk_threshold_chroma;
4880 unsigned int meta_chunks_per_row_ub_chroma;
4881 unsigned int dpte_group_width_luma;
4882 unsigned int dpte_groups_per_row_luma_ub;
4883 unsigned int dpte_group_width_chroma;
4884 unsigned int dpte_groups_per_row_chroma_ub;
4887 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4888 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4889 if (BytePerPixelC[k] == 0)
4890 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4892 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4893 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4894 if (BytePerPixelC[k] == 0)
4895 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4897 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4900 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4901 if (DCCEnable[k] == true) {
4902 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4903 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4904 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4905 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4906 if (!IsVertical(SourceRotation[k]))
4907 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4909 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4911 if (meta_row_remainder <= meta_chunk_threshold)
4912 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4914 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4916 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4917 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4918 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4919 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4920 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4921 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4922 if (BytePerPixelC[k] == 0) {
4923 TimePerChromaMetaChunkNominal[k] = 0;
4924 TimePerChromaMetaChunkVBlank[k] = 0;
4925 TimePerChromaMetaChunkFlip[k] = 0;
4927 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4928 meta_row_height_chroma[k];
4929 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4930 meta_row_height_chroma[k];
4931 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4932 meta_chunk_width_chroma;
4933 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4934 if (!IsVertical(SourceRotation[k])) {
4935 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4936 meta_req_width_chroma[k];
4938 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4939 meta_req_height_chroma[k];
4941 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4942 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4944 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4946 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4947 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4948 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4949 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4950 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4951 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4954 TimePerMetaChunkNominal[k] = 0;
4955 TimePerMetaChunkVBlank[k] = 0;
4956 TimePerMetaChunkFlip[k] = 0;
4957 TimePerChromaMetaChunkNominal[k] = 0;
4958 TimePerChromaMetaChunkVBlank[k] = 0;
4959 TimePerChromaMetaChunkFlip[k] = 0;
4963 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4964 if (GPUVMEnable == true) {
4965 if (!IsVertical(SourceRotation[k])) {
4966 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4967 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4969 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4970 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4973 if (use_one_row_for_frame[k]) {
4974 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4975 (double) dpte_group_width_luma / 2.0, 1.0);
4977 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4978 (double) dpte_group_width_luma, 1.0);
4980 #ifdef __DML_VBA_DEBUG__
4981 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
4982 __func__, k, use_one_row_for_frame[k]);
4983 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
4984 __func__, k, dpte_group_bytes[k]);
4985 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
4986 __func__, k, PTERequestSizeY[k]);
4987 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
4988 __func__, k, PixelPTEReqWidthY[k]);
4989 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
4990 __func__, k, PixelPTEReqHeightY[k]);
4991 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
4992 __func__, k, dpte_row_width_luma_ub[k]);
4993 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
4994 __func__, k, dpte_group_width_luma);
4995 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
4996 __func__, k, dpte_groups_per_row_luma_ub);
4999 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5000 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5001 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5002 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5003 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5004 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5005 if (BytePerPixelC[k] == 0) {
5006 time_per_pte_group_nom_chroma[k] = 0;
5007 time_per_pte_group_vblank_chroma[k] = 0;
5008 time_per_pte_group_flip_chroma[k] = 0;
5010 if (!IsVertical(SourceRotation[k])) {
5011 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5012 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5014 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5015 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5018 if (use_one_row_for_frame[k]) {
5019 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5020 (double) dpte_group_width_chroma / 2.0, 1.0);
5022 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5023 (double) dpte_group_width_chroma, 1.0);
5025 #ifdef __DML_VBA_DEBUG__
5026 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
5027 __func__, k, dpte_row_width_chroma_ub[k]);
5028 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
5029 __func__, k, dpte_group_width_chroma);
5030 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
5031 __func__, k, dpte_groups_per_row_chroma_ub);
5033 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5034 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5035 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5036 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5037 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5038 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5041 time_per_pte_group_nom_luma[k] = 0;
5042 time_per_pte_group_vblank_luma[k] = 0;
5043 time_per_pte_group_flip_luma[k] = 0;
5044 time_per_pte_group_nom_chroma[k] = 0;
5045 time_per_pte_group_vblank_chroma[k] = 0;
5046 time_per_pte_group_flip_chroma[k] = 0;
5048 #ifdef __DML_VBA_DEBUG__
5049 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
5050 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5051 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
5052 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5053 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
5054 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5055 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
5056 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5057 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
5058 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5059 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
5060 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5061 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
5062 __func__, k, TimePerMetaChunkNominal[k]);
5063 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
5064 __func__, k, TimePerMetaChunkVBlank[k]);
5065 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
5066 __func__, k, TimePerMetaChunkFlip[k]);
5067 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
5068 __func__, k, TimePerChromaMetaChunkNominal[k]);
5069 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
5070 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5071 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
5072 __func__, k, TimePerChromaMetaChunkFlip[k]);
5073 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
5074 __func__, k, time_per_pte_group_nom_luma[k]);
5075 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
5076 __func__, k, time_per_pte_group_vblank_luma[k]);
5077 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
5078 __func__, k, time_per_pte_group_flip_luma[k]);
5079 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
5080 __func__, k, time_per_pte_group_nom_chroma[k]);
5081 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5082 __func__, k, time_per_pte_group_vblank_chroma[k]);
5083 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
5084 __func__, k, time_per_pte_group_flip_chroma[k]);
5087 } // CalculateMetaAndPTETimes
5089 void dml32_CalculateVMGroupAndRequestTimes(
5090 unsigned int NumberOfActiveSurfaces,
5092 unsigned int GPUVMMaxPageTableLevels,
5093 unsigned int HTotal[],
5094 unsigned int BytePerPixelC[],
5095 double DestinationLinesToRequestVMInVBlank[],
5096 double DestinationLinesToRequestVMInImmediateFlip[],
5098 double PixelClock[],
5099 unsigned int dpte_row_width_luma_ub[],
5100 unsigned int dpte_row_width_chroma_ub[],
5101 unsigned int vm_group_bytes[],
5102 unsigned int dpde0_bytes_per_frame_ub_l[],
5103 unsigned int dpde0_bytes_per_frame_ub_c[],
5104 unsigned int meta_pte_bytes_per_frame_ub_l[],
5105 unsigned int meta_pte_bytes_per_frame_ub_c[],
5108 double TimePerVMGroupVBlank[],
5109 double TimePerVMGroupFlip[],
5110 double TimePerVMRequestVBlank[],
5111 double TimePerVMRequestFlip[])
5114 unsigned int num_group_per_lower_vm_stage;
5115 unsigned int num_req_per_lower_vm_stage;
5117 #ifdef __DML_VBA_DEBUG__
5118 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5119 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5121 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5123 #ifdef __DML_VBA_DEBUG__
5124 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5125 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5126 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5127 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5128 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5129 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5130 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5131 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5132 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5133 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5136 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5137 if (DCCEnable[k] == false) {
5138 if (BytePerPixelC[k] > 0) {
5139 num_group_per_lower_vm_stage = dml_ceil(
5140 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5141 (double) (vm_group_bytes[k]), 1.0) +
5142 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5143 (double) (vm_group_bytes[k]), 1.0);
5145 num_group_per_lower_vm_stage = dml_ceil(
5146 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5147 (double) (vm_group_bytes[k]), 1.0);
5150 if (GPUVMMaxPageTableLevels == 1) {
5151 if (BytePerPixelC[k] > 0) {
5152 num_group_per_lower_vm_stage = dml_ceil(
5153 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5154 (double) (vm_group_bytes[k]), 1.0) +
5155 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5156 (double) (vm_group_bytes[k]), 1.0);
5158 num_group_per_lower_vm_stage = dml_ceil(
5159 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5160 (double) (vm_group_bytes[k]), 1.0);
5163 if (BytePerPixelC[k] > 0) {
5164 num_group_per_lower_vm_stage = 2 + dml_ceil(
5165 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5166 (double) (vm_group_bytes[k]), 1) +
5167 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5168 (double) (vm_group_bytes[k]), 1) +
5169 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5170 (double) (vm_group_bytes[k]), 1) +
5171 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5172 (double) (vm_group_bytes[k]), 1);
5174 num_group_per_lower_vm_stage = 1 + dml_ceil(
5175 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5176 (double) (vm_group_bytes[k]), 1) + dml_ceil(
5177 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5178 (double) (vm_group_bytes[k]), 1);
5183 if (DCCEnable[k] == false) {
5184 if (BytePerPixelC[k] > 0) {
5185 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5186 dpde0_bytes_per_frame_ub_c[k] / 64;
5188 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5191 if (GPUVMMaxPageTableLevels == 1) {
5192 if (BytePerPixelC[k] > 0) {
5193 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5194 meta_pte_bytes_per_frame_ub_c[k] / 64;
5196 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5199 if (BytePerPixelC[k] > 0) {
5200 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5201 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5202 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5203 meta_pte_bytes_per_frame_ub_c[k] / 64;
5205 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5206 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5211 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5212 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5213 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5214 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5215 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5216 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5217 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5218 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5220 if (GPUVMMaxPageTableLevels > 2) {
5221 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5222 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5223 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5224 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5228 TimePerVMGroupVBlank[k] = 0;
5229 TimePerVMGroupFlip[k] = 0;
5230 TimePerVMRequestVBlank[k] = 0;
5231 TimePerVMRequestFlip[k] = 0;
5234 #ifdef __DML_VBA_DEBUG__
5235 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5236 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5237 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5238 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5241 } // CalculateVMGroupAndRequestTimes
5243 void dml32_CalculateDCCConfiguration(
5245 bool DCCProgrammingAssumesScanDirectionUnknown,
5246 enum source_format_class SourcePixelFormat,
5247 unsigned int SurfaceWidthLuma,
5248 unsigned int SurfaceWidthChroma,
5249 unsigned int SurfaceHeightLuma,
5250 unsigned int SurfaceHeightChroma,
5251 unsigned int nomDETInKByte,
5252 unsigned int RequestHeight256ByteLuma,
5253 unsigned int RequestHeight256ByteChroma,
5254 enum dm_swizzle_mode TilingFormat,
5255 unsigned int BytePerPixelY,
5256 unsigned int BytePerPixelC,
5257 double BytePerPixelDETY,
5258 double BytePerPixelDETC,
5259 enum dm_rotation_angle SourceRotation,
5261 unsigned int *MaxUncompressedBlockLuma,
5262 unsigned int *MaxUncompressedBlockChroma,
5263 unsigned int *MaxCompressedBlockLuma,
5264 unsigned int *MaxCompressedBlockChroma,
5265 unsigned int *IndependentBlockLuma,
5266 unsigned int *IndependentBlockChroma)
5270 REQ_128BytesNonContiguous,
5271 REQ_128BytesContiguous,
5275 RequestType RequestLuma;
5276 RequestType RequestChroma;
5278 unsigned int segment_order_horz_contiguous_luma;
5279 unsigned int segment_order_horz_contiguous_chroma;
5280 unsigned int segment_order_vert_contiguous_luma;
5281 unsigned int segment_order_vert_contiguous_chroma;
5282 unsigned int req128_horz_wc_l;
5283 unsigned int req128_horz_wc_c;
5284 unsigned int req128_vert_wc_l;
5285 unsigned int req128_vert_wc_c;
5286 unsigned int MAS_vp_horz_limit;
5287 unsigned int MAS_vp_vert_limit;
5288 unsigned int max_vp_horz_width;
5289 unsigned int max_vp_vert_height;
5290 unsigned int eff_surf_width_l;
5291 unsigned int eff_surf_width_c;
5292 unsigned int eff_surf_height_l;
5293 unsigned int eff_surf_height_c;
5294 unsigned int full_swath_bytes_horz_wc_l;
5295 unsigned int full_swath_bytes_horz_wc_c;
5296 unsigned int full_swath_bytes_vert_wc_l;
5297 unsigned int full_swath_bytes_vert_wc_c;
5298 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5300 unsigned int yuv420;
5301 unsigned int horz_div_l;
5302 unsigned int horz_div_c;
5303 unsigned int vert_div_l;
5304 unsigned int vert_div_c;
5306 unsigned int swath_buf_size;
5307 double detile_buf_vp_horz_limit;
5308 double detile_buf_vp_vert_limit;
5310 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5311 SourcePixelFormat == dm_420_12) ? 1 : 0);
5317 if (BytePerPixelY == 1)
5319 if (BytePerPixelC == 1)
5322 if (BytePerPixelC == 0) {
5323 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5324 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5325 BytePerPixelY / (1 + horz_div_l));
5326 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5329 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5330 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5331 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5332 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5333 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5334 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5335 (1 + vert_div_c) / (1 + yuv420));
5338 if (SourcePixelFormat == dm_420_10) {
5339 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5340 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5343 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5344 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5346 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5347 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5348 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5349 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5350 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5351 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5352 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5353 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5355 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5356 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5357 if (BytePerPixelC > 0) {
5358 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5359 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5361 full_swath_bytes_horz_wc_c = 0;
5362 full_swath_bytes_vert_wc_c = 0;
5365 if (SourcePixelFormat == dm_420_10) {
5366 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5367 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5368 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5369 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5372 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5373 req128_horz_wc_l = 0;
5374 req128_horz_wc_c = 0;
5375 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5376 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5377 req128_horz_wc_l = 0;
5378 req128_horz_wc_c = 1;
5379 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5380 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5381 req128_horz_wc_l = 1;
5382 req128_horz_wc_c = 0;
5384 req128_horz_wc_l = 1;
5385 req128_horz_wc_c = 1;
5388 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5389 req128_vert_wc_l = 0;
5390 req128_vert_wc_c = 0;
5391 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5392 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5393 req128_vert_wc_l = 0;
5394 req128_vert_wc_c = 1;
5395 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5396 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5397 req128_vert_wc_l = 1;
5398 req128_vert_wc_c = 0;
5400 req128_vert_wc_l = 1;
5401 req128_vert_wc_c = 1;
5404 if (BytePerPixelY == 2) {
5405 segment_order_horz_contiguous_luma = 0;
5406 segment_order_vert_contiguous_luma = 1;
5408 segment_order_horz_contiguous_luma = 1;
5409 segment_order_vert_contiguous_luma = 0;
5412 if (BytePerPixelC == 2) {
5413 segment_order_horz_contiguous_chroma = 0;
5414 segment_order_vert_contiguous_chroma = 1;
5416 segment_order_horz_contiguous_chroma = 1;
5417 segment_order_vert_contiguous_chroma = 0;
5419 #ifdef __DML_VBA_DEBUG__
5420 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5421 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5422 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5423 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5424 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5425 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5426 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5427 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5428 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5429 __func__, segment_order_horz_contiguous_chroma);
5432 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5433 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5434 RequestLuma = REQ_256Bytes;
5435 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5436 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5437 RequestLuma = REQ_128BytesNonContiguous;
5439 RequestLuma = REQ_128BytesContiguous;
5441 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5442 RequestChroma = REQ_256Bytes;
5443 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5444 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5445 RequestChroma = REQ_128BytesNonContiguous;
5447 RequestChroma = REQ_128BytesContiguous;
5449 } else if (!IsVertical(SourceRotation)) {
5450 if (req128_horz_wc_l == 0)
5451 RequestLuma = REQ_256Bytes;
5452 else if (segment_order_horz_contiguous_luma == 0)
5453 RequestLuma = REQ_128BytesNonContiguous;
5455 RequestLuma = REQ_128BytesContiguous;
5457 if (req128_horz_wc_c == 0)
5458 RequestChroma = REQ_256Bytes;
5459 else if (segment_order_horz_contiguous_chroma == 0)
5460 RequestChroma = REQ_128BytesNonContiguous;
5462 RequestChroma = REQ_128BytesContiguous;
5465 if (req128_vert_wc_l == 0)
5466 RequestLuma = REQ_256Bytes;
5467 else if (segment_order_vert_contiguous_luma == 0)
5468 RequestLuma = REQ_128BytesNonContiguous;
5470 RequestLuma = REQ_128BytesContiguous;
5472 if (req128_vert_wc_c == 0)
5473 RequestChroma = REQ_256Bytes;
5474 else if (segment_order_vert_contiguous_chroma == 0)
5475 RequestChroma = REQ_128BytesNonContiguous;
5477 RequestChroma = REQ_128BytesContiguous;
5480 if (RequestLuma == REQ_256Bytes) {
5481 *MaxUncompressedBlockLuma = 256;
5482 *MaxCompressedBlockLuma = 256;
5483 *IndependentBlockLuma = 0;
5484 } else if (RequestLuma == REQ_128BytesContiguous) {
5485 *MaxUncompressedBlockLuma = 256;
5486 *MaxCompressedBlockLuma = 128;
5487 *IndependentBlockLuma = 128;
5489 *MaxUncompressedBlockLuma = 256;
5490 *MaxCompressedBlockLuma = 64;
5491 *IndependentBlockLuma = 64;
5494 if (RequestChroma == REQ_256Bytes) {
5495 *MaxUncompressedBlockChroma = 256;
5496 *MaxCompressedBlockChroma = 256;
5497 *IndependentBlockChroma = 0;
5498 } else if (RequestChroma == REQ_128BytesContiguous) {
5499 *MaxUncompressedBlockChroma = 256;
5500 *MaxCompressedBlockChroma = 128;
5501 *IndependentBlockChroma = 128;
5503 *MaxUncompressedBlockChroma = 256;
5504 *MaxCompressedBlockChroma = 64;
5505 *IndependentBlockChroma = 64;
5508 if (DCCEnabled != true || BytePerPixelC == 0) {
5509 *MaxUncompressedBlockChroma = 0;
5510 *MaxCompressedBlockChroma = 0;
5511 *IndependentBlockChroma = 0;
5514 if (DCCEnabled != true) {
5515 *MaxUncompressedBlockLuma = 0;
5516 *MaxCompressedBlockLuma = 0;
5517 *IndependentBlockLuma = 0;
5520 #ifdef __DML_VBA_DEBUG__
5521 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5522 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5523 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5524 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5525 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5526 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5529 } // CalculateDCCConfiguration
5531 void dml32_CalculateStutterEfficiency(
5532 unsigned int CompressedBufferSizeInkByte,
5533 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5534 bool UnboundedRequestEnabled,
5535 unsigned int MetaFIFOSizeInKEntries,
5536 unsigned int ZeroSizeBufferEntries,
5537 unsigned int PixelChunkSizeInKByte,
5538 unsigned int NumberOfActiveSurfaces,
5539 unsigned int ROBBufferSizeInKByte,
5540 double TotalDataReadBandwidth,
5543 unsigned int CompbufReservedSpace64B,
5544 unsigned int CompbufReservedSpaceZs,
5546 double SRExitZ8Time,
5547 bool SynchronizeTimingsFinal,
5548 unsigned int BlendingAndTiming[],
5549 double StutterEnterPlusExitWatermark,
5550 double Z8StutterEnterPlusExitWatermark,
5551 bool ProgressiveToInterlaceUnitInOPP,
5553 double MinTTUVBlank[],
5554 unsigned int DPPPerSurface[],
5555 unsigned int DETBufferSizeY[],
5556 unsigned int BytePerPixelY[],
5557 double BytePerPixelDETY[],
5558 double SwathWidthY[],
5559 unsigned int SwathHeightY[],
5560 unsigned int SwathHeightC[],
5561 double NetDCCRateLuma[],
5562 double NetDCCRateChroma[],
5563 double DCCFractionOfZeroSizeRequestsLuma[],
5564 double DCCFractionOfZeroSizeRequestsChroma[],
5565 unsigned int HTotal[],
5566 unsigned int VTotal[],
5567 double PixelClock[],
5569 enum dm_rotation_angle SourceRotation[],
5570 unsigned int BlockHeight256BytesY[],
5571 unsigned int BlockWidth256BytesY[],
5572 unsigned int BlockHeight256BytesC[],
5573 unsigned int BlockWidth256BytesC[],
5574 unsigned int DCCYMaxUncompressedBlock[],
5575 unsigned int DCCCMaxUncompressedBlock[],
5576 unsigned int VActive[],
5578 bool WritebackEnable[],
5579 double ReadBandwidthSurfaceLuma[],
5580 double ReadBandwidthSurfaceChroma[],
5581 double meta_row_bw[],
5582 double dpte_row_bw[],
5585 double *StutterEfficiencyNotIncludingVBlank,
5586 double *StutterEfficiency,
5587 unsigned int *NumberOfStutterBurstsPerFrame,
5588 double *Z8StutterEfficiencyNotIncludingVBlank,
5589 double *Z8StutterEfficiency,
5590 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5591 double *StutterPeriod,
5592 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5595 bool FoundCriticalSurface = false;
5596 unsigned int SwathSizeCriticalSurface = 0;
5597 unsigned int LastChunkOfSwathSize;
5598 unsigned int MissingPartOfLastSwathOfDETSize;
5599 double LastZ8StutterPeriod = 0.0;
5600 double LastStutterPeriod = 0.0;
5601 unsigned int TotalNumberOfActiveOTG = 0;
5602 double doublePixelClock;
5603 unsigned int doubleHTotal;
5604 unsigned int doubleVTotal;
5605 bool SameTiming = true;
5606 double DETBufferingTimeY;
5607 double SwathWidthYCriticalSurface = 0.0;
5608 double SwathHeightYCriticalSurface = 0.0;
5609 double VActiveTimeCriticalSurface = 0.0;
5610 double FrameTimeCriticalSurface = 0.0;
5611 unsigned int BytePerPixelYCriticalSurface = 0;
5612 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5613 unsigned int DETBufferSizeYCriticalSurface = 0;
5614 double MinTTUVBlankCriticalSurface = 0.0;
5615 unsigned int BlockWidth256BytesYCriticalSurface = 0;
5616 bool doublePlaneCriticalSurface = 0;
5617 bool doublePipeCriticalSurface = 0;
5618 double TotalCompressedReadBandwidth;
5619 double TotalRowReadBandwidth;
5620 double AverageDCCCompressionRate;
5621 double EffectiveCompressedBufferSize;
5622 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5623 double StutterBurstTime;
5624 unsigned int TotalActiveWriteback;
5626 double LinesInDETYRoundedDownToSwath;
5627 double MaximumEffectiveCompressionLuma;
5628 double MaximumEffectiveCompressionChroma;
5629 double TotalZeroSizeRequestReadBandwidth;
5630 double TotalZeroSizeCompressedReadBandwidth;
5631 double AverageDCCZeroSizeFraction;
5632 double AverageZeroSizeCompressionRate;
5635 TotalZeroSizeRequestReadBandwidth = 0;
5636 TotalZeroSizeCompressedReadBandwidth = 0;
5637 TotalRowReadBandwidth = 0;
5638 TotalCompressedReadBandwidth = 0;
5640 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5641 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5642 if (DCCEnable[k] == true) {
5643 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5644 || (!IsVertical(SourceRotation[k])
5645 && BlockHeight256BytesY[k] > SwathHeightY[k])
5646 || DCCYMaxUncompressedBlock[k] < 256) {
5647 MaximumEffectiveCompressionLuma = 2;
5649 MaximumEffectiveCompressionLuma = 4;
5651 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5652 + ReadBandwidthSurfaceLuma[k]
5653 / dml_min(NetDCCRateLuma[k],
5654 MaximumEffectiveCompressionLuma);
5655 #ifdef __DML_VBA_DEBUG__
5656 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5657 __func__, k, ReadBandwidthSurfaceLuma[k]);
5658 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5659 __func__, k, NetDCCRateLuma[k]);
5660 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5661 __func__, k, MaximumEffectiveCompressionLuma);
5663 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5664 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5665 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5666 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5667 / MaximumEffectiveCompressionLuma;
5669 if (ReadBandwidthSurfaceChroma[k] > 0) {
5670 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5671 || (!IsVertical(SourceRotation[k])
5672 && BlockHeight256BytesC[k] > SwathHeightC[k])
5673 || DCCCMaxUncompressedBlock[k] < 256) {
5674 MaximumEffectiveCompressionChroma = 2;
5676 MaximumEffectiveCompressionChroma = 4;
5678 TotalCompressedReadBandwidth =
5679 TotalCompressedReadBandwidth
5680 + ReadBandwidthSurfaceChroma[k]
5681 / dml_min(NetDCCRateChroma[k],
5682 MaximumEffectiveCompressionChroma);
5683 #ifdef __DML_VBA_DEBUG__
5684 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5685 __func__, k, ReadBandwidthSurfaceChroma[k]);
5686 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5687 __func__, k, NetDCCRateChroma[k]);
5688 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5689 __func__, k, MaximumEffectiveCompressionChroma);
5691 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5692 + ReadBandwidthSurfaceChroma[k]
5693 * DCCFractionOfZeroSizeRequestsChroma[k];
5694 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5695 + ReadBandwidthSurfaceChroma[k]
5696 * DCCFractionOfZeroSizeRequestsChroma[k]
5697 / MaximumEffectiveCompressionChroma;
5700 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5701 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5703 TotalRowReadBandwidth = TotalRowReadBandwidth
5704 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5708 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5709 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5711 #ifdef __DML_VBA_DEBUG__
5712 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5713 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5714 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5715 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5716 __func__, TotalZeroSizeCompressedReadBandwidth);
5717 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5718 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5719 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5720 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5721 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5722 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5723 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5725 if (AverageDCCZeroSizeFraction == 1) {
5726 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5727 / TotalZeroSizeCompressedReadBandwidth;
5728 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5729 * AverageZeroSizeCompressionRate
5730 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5731 * AverageZeroSizeCompressionRate;
5732 } else if (AverageDCCZeroSizeFraction > 0) {
5733 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5734 / TotalZeroSizeCompressedReadBandwidth;
5735 EffectiveCompressedBufferSize = dml_min(
5736 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5737 (double) MetaFIFOSizeInKEntries * 1024 * 64
5738 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5739 + 1 / AverageDCCCompressionRate))
5740 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5741 * AverageDCCCompressionRate,
5742 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5743 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5745 #ifdef __DML_VBA_DEBUG__
5746 dml_print("DML::%s: min 1 = %f\n", __func__,
5747 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5748 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5749 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5750 AverageDCCCompressionRate));
5751 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5752 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5753 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5754 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5757 EffectiveCompressedBufferSize = dml_min(
5758 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5759 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5760 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5761 * AverageDCCCompressionRate;
5763 #ifdef __DML_VBA_DEBUG__
5764 dml_print("DML::%s: min 1 = %f\n", __func__,
5765 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5766 dml_print("DML::%s: min 2 = %f\n", __func__,
5767 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5771 #ifdef __DML_VBA_DEBUG__
5772 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5773 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5774 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5779 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5780 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5781 LinesInDETY = ((double) DETBufferSizeY[k]
5782 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5783 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5784 / BytePerPixelDETY[k] / SwathWidthY[k];
5785 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5786 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5788 #ifdef __DML_VBA_DEBUG__
5789 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5790 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5791 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5792 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5793 __func__, k, ReadBandwidthSurfaceLuma[k]);
5794 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5795 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5796 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5797 __func__, k, LinesInDETYRoundedDownToSwath);
5798 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5799 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5800 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5801 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5802 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5805 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5806 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5808 FoundCriticalSurface = true;
5809 *StutterPeriod = DETBufferingTimeY;
5810 FrameTimeCriticalSurface = (
5812 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5813 * (double) HTotal[k] / PixelClock[k];
5814 VActiveTimeCriticalSurface = (
5816 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5817 * (double) HTotal[k] / PixelClock[k];
5818 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5819 SwathWidthYCriticalSurface = SwathWidthY[k];
5820 SwathHeightYCriticalSurface = SwathHeightY[k];
5821 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5822 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5823 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5824 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5825 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5826 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5827 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5829 #ifdef __DML_VBA_DEBUG__
5830 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
5831 __func__, k, FoundCriticalSurface);
5832 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
5833 __func__, k, *StutterPeriod);
5834 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
5835 __func__, k, MinTTUVBlankCriticalSurface);
5836 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
5837 __func__, k, FrameTimeCriticalSurface);
5838 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
5839 __func__, k, VActiveTimeCriticalSurface);
5840 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
5841 __func__, k, BytePerPixelYCriticalSurface);
5842 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
5843 __func__, k, SwathWidthYCriticalSurface);
5844 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
5845 __func__, k, SwathHeightYCriticalSurface);
5846 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
5847 __func__, k, BlockWidth256BytesYCriticalSurface);
5848 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
5849 __func__, k, doublePlaneCriticalSurface);
5850 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
5851 __func__, k, doublePipeCriticalSurface);
5852 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5853 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5859 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5860 EffectiveCompressedBufferSize);
5861 #ifdef __DML_VBA_DEBUG__
5862 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5863 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5864 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5865 __func__, *StutterPeriod * TotalDataReadBandwidth);
5866 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5867 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5868 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5869 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5870 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5871 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5872 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5875 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5877 + (*StutterPeriod * TotalDataReadBandwidth
5878 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5879 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5880 #ifdef __DML_VBA_DEBUG__
5881 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5882 AverageDCCCompressionRate / ReturnBW);
5883 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5884 __func__, (*StutterPeriod * TotalDataReadBandwidth));
5885 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5886 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5887 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5888 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5890 StutterBurstTime = dml_max(StutterBurstTime,
5891 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5892 * SwathWidthYCriticalSurface / ReturnBW);
5894 #ifdef __DML_VBA_DEBUG__
5895 dml_print("DML::%s: Time to finish residue swath=%f\n",
5897 LinesToFinishSwathTransferStutterCriticalSurface *
5898 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5901 TotalActiveWriteback = 0;
5902 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5903 if (WritebackEnable[k])
5904 TotalActiveWriteback = TotalActiveWriteback + 1;
5907 if (TotalActiveWriteback == 0) {
5908 #ifdef __DML_VBA_DEBUG__
5909 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5910 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5911 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5912 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5914 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5915 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5916 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5917 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5918 *NumberOfStutterBurstsPerFrame = (
5919 *StutterEfficiencyNotIncludingVBlank > 0 ?
5920 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5921 *Z8NumberOfStutterBurstsPerFrame = (
5922 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5923 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5925 *StutterEfficiencyNotIncludingVBlank = 0.;
5926 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5927 *NumberOfStutterBurstsPerFrame = 0;
5928 *Z8NumberOfStutterBurstsPerFrame = 0;
5930 #ifdef __DML_VBA_DEBUG__
5931 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5932 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5933 __func__, *StutterEfficiencyNotIncludingVBlank);
5934 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5935 __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5936 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5937 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5940 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5941 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5942 if (BlendingAndTiming[k] == k) {
5943 if (TotalNumberOfActiveOTG == 0) {
5944 doublePixelClock = PixelClock[k];
5945 doubleHTotal = HTotal[k];
5946 doubleVTotal = VTotal[k];
5947 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5948 || doubleVTotal != VTotal[k]) {
5951 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5956 if (*StutterEfficiencyNotIncludingVBlank > 0) {
5957 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5959 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5960 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5961 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5962 + StutterBurstTime * VActiveTimeCriticalSurface
5963 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5965 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5968 *StutterEfficiency = 0;
5971 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5972 LastZ8StutterPeriod = VActiveTimeCriticalSurface
5973 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5974 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
5975 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
5976 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
5977 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5979 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
5982 *Z8StutterEfficiency = 0.;
5985 #ifdef __DML_VBA_DEBUG__
5986 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
5987 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
5988 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5989 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5990 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
5991 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
5992 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5993 __func__, *StutterEfficiencyNotIncludingVBlank);
5994 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5997 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
5998 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
5999 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6000 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6001 - DETBufferSizeYCriticalSurface;
6003 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6004 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6005 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6006 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6008 #ifdef __DML_VBA_DEBUG__
6009 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6010 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6011 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6012 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6014 } // CalculateStutterEfficiency
6016 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6017 unsigned int ConfigReturnBufferSizeInKByte,
6018 unsigned int ROBBufferSizeInKByte,
6019 unsigned int MaxNumDPP,
6020 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6021 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
6024 unsigned int *MaxTotalDETInKByte,
6025 unsigned int *nomDETInKByte,
6026 unsigned int *MinCompressedBufferSizeInKByte)
6028 bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
6029 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
6031 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6032 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6033 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6034 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6036 #ifdef __DML_VBA_DEBUG__
6037 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6038 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6039 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6040 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6041 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6042 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6045 if (det_buff_size_override_en) {
6046 *nomDETInKByte = det_buff_size_override_val;
6047 #ifdef __DML_VBA_DEBUG__
6048 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6051 } // CalculateMaxDETAndMinCompressedBufferSize
6053 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6055 bool NotUrgentLatencyHiding[],
6056 double ReadBandwidthLuma[],
6057 double ReadBandwidthChroma[],
6059 double meta_row_bandwidth[],
6060 double dpte_row_bandwidth[],
6061 unsigned int NumberOfDPP[],
6062 double UrgentBurstFactorLuma[],
6063 double UrgentBurstFactorChroma[],
6064 double UrgentBurstFactorCursor[])
6067 bool NotEnoughUrgentLatencyHiding = false;
6068 bool CalculateVActiveBandwithSupport_val = false;
6069 double VActiveBandwith = 0;
6071 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6072 if (NotUrgentLatencyHiding[k]) {
6073 NotEnoughUrgentLatencyHiding = true;
6077 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6078 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6081 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6083 #ifdef __DML_VBA_DEBUG__
6084 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6085 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
6086 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6087 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6089 return CalculateVActiveBandwithSupport_val;
6092 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6094 bool NotUrgentLatencyHiding[],
6095 double ReadBandwidthLuma[],
6096 double ReadBandwidthChroma[],
6097 double PrefetchBandwidthLuma[],
6098 double PrefetchBandwidthChroma[],
6100 double meta_row_bandwidth[],
6101 double dpte_row_bandwidth[],
6102 double cursor_bw_pre[],
6103 double prefetch_vmrow_bw[],
6104 unsigned int NumberOfDPP[],
6105 double UrgentBurstFactorLuma[],
6106 double UrgentBurstFactorChroma[],
6107 double UrgentBurstFactorCursor[],
6108 double UrgentBurstFactorLumaPre[],
6109 double UrgentBurstFactorChromaPre[],
6110 double UrgentBurstFactorCursorPre[],
6113 double *PrefetchBandwidth,
6114 double *FractionOfUrgentBandwidth,
6115 bool *PrefetchBandwidthSupport)
6118 bool NotEnoughUrgentLatencyHiding = false;
6119 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6120 if (NotUrgentLatencyHiding[k]) {
6121 NotEnoughUrgentLatencyHiding = true;
6125 *PrefetchBandwidth = 0;
6126 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6127 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6128 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6129 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6132 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6133 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6136 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6138 double ReadBandwidthLuma[],
6139 double ReadBandwidthChroma[],
6140 double PrefetchBandwidthLuma[],
6141 double PrefetchBandwidthChroma[],
6143 double cursor_bw_pre[],
6144 unsigned int NumberOfDPP[],
6145 double UrgentBurstFactorLuma[],
6146 double UrgentBurstFactorChroma[],
6147 double UrgentBurstFactorCursor[],
6148 double UrgentBurstFactorLumaPre[],
6149 double UrgentBurstFactorChromaPre[],
6150 double UrgentBurstFactorCursorPre[])
6153 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6155 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6156 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6157 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6160 return CalculateBandwidthAvailableForImmediateFlip_val;
6163 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6165 enum immediate_flip_requirement ImmediateFlipRequirement[],
6166 double final_flip_bw[],
6167 double ReadBandwidthLuma[],
6168 double ReadBandwidthChroma[],
6169 double PrefetchBandwidthLuma[],
6170 double PrefetchBandwidthChroma[],
6172 double meta_row_bandwidth[],
6173 double dpte_row_bandwidth[],
6174 double cursor_bw_pre[],
6175 double prefetch_vmrow_bw[],
6176 unsigned int NumberOfDPP[],
6177 double UrgentBurstFactorLuma[],
6178 double UrgentBurstFactorChroma[],
6179 double UrgentBurstFactorCursor[],
6180 double UrgentBurstFactorLumaPre[],
6181 double UrgentBurstFactorChromaPre[],
6182 double UrgentBurstFactorCursorPre[],
6185 double *TotalBandwidth,
6186 double *FractionOfUrgentBandwidth,
6187 bool *ImmediateFlipBandwidthSupport)
6190 *TotalBandwidth = 0;
6191 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6192 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6193 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6194 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6195 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6197 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6198 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6199 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6202 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6203 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;