2 * Copyright 2022 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
30 unsigned int dml32_dscceComputeDelay(
33 unsigned int sliceWidth,
34 unsigned int numSlices,
35 enum output_format_class pixelFormat,
36 enum output_encoder_class Output)
38 // valid bpc = source bits per component in the set of {8, 10, 12}
39 // valid bpp = increments of 1/16 of a bit
40 // min = 6/7/8 in N420/N422/444, respectively
41 // max = such that compression is 1:1
42 //valid sliceWidth = number of pixels per slice line,
43 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
44 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
45 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 unsigned int rcModelSize = 8192;
50 // N422/N420 operate at 2 pixels per clock
51 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 if (pixelFormat == dm_420)
56 else if (pixelFormat == dm_n422)
58 // #all other modes operate at 1 pixel per clock
62 //initial transmit delay as per PPS
63 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
73 //divide by pixel per cycle to compute slice width as seen by DSC
74 w = sliceWidth / pixelsPerClock;
76 //422 mode has an additional cycle of delay
77 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
82 //main calculation for the dscce
83 ix = initalXmitDelay + 45;
88 ax = (a + 2) / 3 + D + 6 + 1;
89 L = (ax + wx - 1) / wx;
90 if ((ix % w) == 0 && p != 0)
94 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
96 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
97 pixels = Delay * 3 * pixelsPerClock;
99 #ifdef __DML_VBA_DEBUG__
100 dml_print("DML::%s: bpc: %d\n", __func__, bpc);
101 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
102 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
103 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
104 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
105 dml_print("DML::%s: Output: %d\n", __func__, Output);
106 dml_print("DML::%s: pixels: %d\n", __func__, pixels);
112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
114 unsigned int Delay = 0;
116 if (pixelFormat == dm_420) {
121 // dscc - input deserializer
123 // dscc gets pixels every other cycle
125 // dscc - input cdc fifo
127 // dscc gets pixels every other cycle
129 // dscc - cdc uncertainty
131 // dscc - output cdc fifo
133 // dscc gets pixels every other cycle
135 // dscc - cdc uncertainty
137 // dscc - output serializer
141 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
146 // dscc - input deserializer
148 // dscc - input cdc fifo
150 // dscc - cdc uncertainty
152 // dscc - output cdc fifo
154 // dscc - cdc uncertainty
156 // dscc - output serializer
165 // dscc - input deserializer
167 // dscc - input cdc fifo
169 // dscc - cdc uncertainty
171 // dscc - output cdc fifo
173 // dscc - output serializer
175 // dscc - cdc uncertainty
185 bool IsVertical(enum dm_rotation_angle Scan)
187 bool is_vert = false;
189 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
201 double MaxDCHUBToPSCLThroughput,
202 double MaxPSCLToLBThroughput,
204 enum source_format_class SourcePixelFormat,
206 unsigned int HTapsChroma,
208 unsigned int VTapsChroma,
211 double *PSCL_THROUGHPUT,
212 double *PSCL_THROUGHPUT_CHROMA,
213 double *DPPCLKUsingSingleDPP)
215 double DPPCLKUsingSingleDPPLuma;
216 double DPPCLKUsingSingleDPPChroma;
219 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
220 dml_ceil((double) HTaps / 6.0, 1.0));
222 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
226 *PSCL_THROUGHPUT, 1);
228 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
229 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
231 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
232 SourcePixelFormat != dm_rgbe_alpha)) {
233 *PSCL_THROUGHPUT_CHROMA = 0;
234 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
236 if (HRatioChroma > 1) {
237 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
238 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
240 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
242 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
243 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
244 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
245 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
246 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
250 void dml32_CalculateBytePerPixelAndBlockSizes(
251 enum source_format_class SourcePixelFormat,
252 enum dm_swizzle_mode SurfaceTiling,
255 unsigned int *BytePerPixelY,
256 unsigned int *BytePerPixelC,
257 double *BytePerPixelDETY,
258 double *BytePerPixelDETC,
259 unsigned int *BlockHeight256BytesY,
260 unsigned int *BlockHeight256BytesC,
261 unsigned int *BlockWidth256BytesY,
262 unsigned int *BlockWidth256BytesC,
263 unsigned int *MacroTileHeightY,
264 unsigned int *MacroTileHeightC,
265 unsigned int *MacroTileWidthY,
266 unsigned int *MacroTileWidthC)
268 if (SourcePixelFormat == dm_444_64) {
269 *BytePerPixelDETY = 8;
270 *BytePerPixelDETC = 0;
273 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
274 *BytePerPixelDETY = 4;
275 *BytePerPixelDETC = 0;
278 } else if (SourcePixelFormat == dm_444_16) {
279 *BytePerPixelDETY = 2;
280 *BytePerPixelDETC = 0;
283 } else if (SourcePixelFormat == dm_444_8) {
284 *BytePerPixelDETY = 1;
285 *BytePerPixelDETC = 0;
288 } else if (SourcePixelFormat == dm_rgbe_alpha) {
289 *BytePerPixelDETY = 4;
290 *BytePerPixelDETC = 1;
293 } else if (SourcePixelFormat == dm_420_8) {
294 *BytePerPixelDETY = 1;
295 *BytePerPixelDETC = 2;
298 } else if (SourcePixelFormat == dm_420_12) {
299 *BytePerPixelDETY = 2;
300 *BytePerPixelDETC = 4;
304 *BytePerPixelDETY = 4.0 / 3;
305 *BytePerPixelDETC = 8.0 / 3;
309 #ifdef __DML_VBA_DEBUG__
310 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
311 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
312 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
313 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
314 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
316 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
317 || SourcePixelFormat == dm_444_16
318 || SourcePixelFormat == dm_444_8
319 || SourcePixelFormat == dm_mono_16
320 || SourcePixelFormat == dm_mono_8
321 || SourcePixelFormat == dm_rgbe)) {
322 if (SurfaceTiling == dm_sw_linear)
323 *BlockHeight256BytesY = 1;
324 else if (SourcePixelFormat == dm_444_64)
325 *BlockHeight256BytesY = 4;
326 else if (SourcePixelFormat == dm_444_8)
327 *BlockHeight256BytesY = 16;
329 *BlockHeight256BytesY = 8;
331 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
332 *BlockHeight256BytesC = 0;
333 *BlockWidth256BytesC = 0;
335 if (SurfaceTiling == dm_sw_linear) {
336 *BlockHeight256BytesY = 1;
337 *BlockHeight256BytesC = 1;
338 } else if (SourcePixelFormat == dm_rgbe_alpha) {
339 *BlockHeight256BytesY = 8;
340 *BlockHeight256BytesC = 16;
341 } else if (SourcePixelFormat == dm_420_8) {
342 *BlockHeight256BytesY = 16;
343 *BlockHeight256BytesC = 8;
345 *BlockHeight256BytesY = 8;
346 *BlockHeight256BytesC = 8;
348 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
349 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
351 #ifdef __DML_VBA_DEBUG__
352 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
353 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
354 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
355 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 if (SurfaceTiling == dm_sw_linear) {
359 *MacroTileHeightY = *BlockHeight256BytesY;
360 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
361 *MacroTileHeightC = *BlockHeight256BytesC;
362 if (*MacroTileHeightC == 0)
363 *MacroTileWidthC = 0;
365 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
366 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
367 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
368 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
369 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
370 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
371 if (*MacroTileHeightC == 0)
372 *MacroTileWidthC = 0;
374 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
376 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
377 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
378 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
379 if (*MacroTileHeightC == 0)
380 *MacroTileWidthC = 0;
382 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 #ifdef __DML_VBA_DEBUG__
386 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
387 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
388 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
389 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
391 } // CalculateBytePerPixelAndBlockSizes
393 void dml32_CalculateSwathAndDETConfiguration(
394 struct dml32_CalculateSwathAndDETConfiguration *st_vars,
395 unsigned int DETSizeOverride[],
396 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
397 unsigned int ConfigReturnBufferSizeInKByte,
398 unsigned int MaxTotalDETInKByte,
399 unsigned int MinCompressedBufferSizeInKByte,
400 double ForceSingleDPP,
401 unsigned int NumberOfActiveSurfaces,
402 unsigned int nomDETInKByte,
403 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
404 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
405 unsigned int PixelChunkSizeKBytes,
406 unsigned int ROBSizeKBytes,
407 unsigned int CompressedBufferSegmentSizeInkByteFinal,
408 enum output_encoder_class Output[],
409 double ReadBandwidthLuma[],
410 double ReadBandwidthChroma[],
411 double MaximumSwathWidthLuma[],
412 double MaximumSwathWidthChroma[],
413 enum dm_rotation_angle SourceRotation[],
414 bool ViewportStationary[],
415 enum source_format_class SourcePixelFormat[],
416 enum dm_swizzle_mode SurfaceTiling[],
417 unsigned int ViewportWidth[],
418 unsigned int ViewportHeight[],
419 unsigned int ViewportXStart[],
420 unsigned int ViewportYStart[],
421 unsigned int ViewportXStartC[],
422 unsigned int ViewportYStartC[],
423 unsigned int SurfaceWidthY[],
424 unsigned int SurfaceWidthC[],
425 unsigned int SurfaceHeightY[],
426 unsigned int SurfaceHeightC[],
427 unsigned int Read256BytesBlockHeightY[],
428 unsigned int Read256BytesBlockHeightC[],
429 unsigned int Read256BytesBlockWidthY[],
430 unsigned int Read256BytesBlockWidthC[],
431 enum odm_combine_mode ODMMode[],
432 unsigned int BlendingAndTiming[],
433 unsigned int BytePerPixY[],
434 unsigned int BytePerPixC[],
435 double BytePerPixDETY[],
436 double BytePerPixDETC[],
437 unsigned int HActive[],
439 double HRatioChroma[],
440 unsigned int DPPPerSurface[],
443 unsigned int swath_width_luma_ub[],
444 unsigned int swath_width_chroma_ub[],
446 double SwathWidthChroma[],
447 unsigned int SwathHeightY[],
448 unsigned int SwathHeightC[],
449 unsigned int DETBufferSizeInKByte[],
450 unsigned int DETBufferSizeY[],
451 unsigned int DETBufferSizeC[],
452 bool *UnboundedRequestEnabled,
453 unsigned int *CompressedBufferSizeInkByte,
454 unsigned int *CompBufReservedSpaceKBytes,
455 bool *CompBufReservedSpaceNeedAdjustment,
456 bool ViewportSizeSupportPerSurface[],
457 bool *ViewportSizeSupport)
461 st_vars->TotalActiveDPP = 0;
462 st_vars->NoChromaSurfaces = true;
464 #ifdef __DML_VBA_DEBUG__
465 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
466 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
467 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
469 dml32_CalculateSwathWidth(ForceSingleDPP,
470 NumberOfActiveSurfaces,
487 Read256BytesBlockHeightY,
488 Read256BytesBlockHeightC,
489 Read256BytesBlockWidthY,
490 Read256BytesBlockWidthC,
497 st_vars->SwathWidthdoubleDPP,
498 st_vars->SwathWidthdoubleDPPChroma,
501 st_vars->MaximumSwathHeightY,
502 st_vars->MaximumSwathHeightC,
504 swath_width_chroma_ub);
506 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
507 st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k];
508 st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k];
509 #ifdef __DML_VBA_DEBUG__
510 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
511 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
512 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
513 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]);
514 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
515 st_vars->RoundedUpMaxSwathSizeBytesY[k]);
516 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
517 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
518 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]);
519 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
520 st_vars->RoundedUpMaxSwathSizeBytesC[k]);
523 if (SourcePixelFormat[k] == dm_420_10) {
524 st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256);
525 st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256);
529 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
530 st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
531 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
532 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
533 st_vars->NoChromaSurfaces = false;
537 // By default, just set the reserved space to 2 pixel chunks size
538 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
540 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
541 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
542 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
543 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512);
545 if (*CompBufReservedSpaceNeedAdjustment == 1) {
546 *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512;
549 #ifdef __DML_VBA_DEBUG__
550 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
551 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
554 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
556 dml32_CalculateDETBufferSize(DETSizeOverride,
557 UseMALLForPStateChange,
559 NumberOfActiveSurfaces,
560 *UnboundedRequestEnabled,
563 ConfigReturnBufferSizeInKByte,
564 MinCompressedBufferSizeInKByte,
565 CompressedBufferSegmentSizeInkByteFinal,
569 st_vars->RoundedUpMaxSwathSizeBytesY,
570 st_vars->RoundedUpMaxSwathSizeBytesC,
574 DETBufferSizeInKByte, // per hubp pipe
575 CompressedBufferSizeInkByte);
577 #ifdef __DML_VBA_DEBUG__
578 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP);
579 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
580 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
581 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
582 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
583 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
586 *ViewportSizeSupport = true;
587 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
589 st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
590 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
591 #ifdef __DML_VBA_DEBUG__
592 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
593 st_vars->DETBufferSizeInKByteForSwathCalculation);
596 if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
597 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
598 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
599 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
600 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
601 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
602 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
603 st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
604 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
605 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
606 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
607 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
608 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
609 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
610 st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <=
611 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
612 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
613 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
614 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
615 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
617 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
618 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
619 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
620 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
623 if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 >
624 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
625 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
626 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
627 *ViewportSizeSupport = false;
628 ViewportSizeSupportPerSurface[k] = false;
630 ViewportSizeSupportPerSurface[k] = true;
633 if (SwathHeightC[k] == 0) {
634 #ifdef __DML_VBA_DEBUG__
635 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
637 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
638 DETBufferSizeC[k] = 0;
639 } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) {
640 #ifdef __DML_VBA_DEBUG__
641 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
643 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
644 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
646 #ifdef __DML_VBA_DEBUG__
647 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
649 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
650 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
653 #ifdef __DML_VBA_DEBUG__
654 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
655 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
656 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
657 k, st_vars->RoundedUpMaxSwathSizeBytesY[k]);
658 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
659 k, st_vars->RoundedUpMaxSwathSizeBytesC[k]);
660 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY);
661 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC);
662 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
663 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
664 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
665 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
666 ViewportSizeSupportPerSurface[k]);
670 } // CalculateSwathAndDETConfiguration
672 void dml32_CalculateSwathWidth(
674 unsigned int NumberOfActiveSurfaces,
675 enum source_format_class SourcePixelFormat[],
676 enum dm_rotation_angle SourceRotation[],
677 bool ViewportStationary[],
678 unsigned int ViewportWidth[],
679 unsigned int ViewportHeight[],
680 unsigned int ViewportXStart[],
681 unsigned int ViewportYStart[],
682 unsigned int ViewportXStartC[],
683 unsigned int ViewportYStartC[],
684 unsigned int SurfaceWidthY[],
685 unsigned int SurfaceWidthC[],
686 unsigned int SurfaceHeightY[],
687 unsigned int SurfaceHeightC[],
688 enum odm_combine_mode ODMMode[],
689 unsigned int BytePerPixY[],
690 unsigned int BytePerPixC[],
691 unsigned int Read256BytesBlockHeightY[],
692 unsigned int Read256BytesBlockHeightC[],
693 unsigned int Read256BytesBlockWidthY[],
694 unsigned int Read256BytesBlockWidthC[],
695 unsigned int BlendingAndTiming[],
696 unsigned int HActive[],
698 unsigned int DPPPerSurface[],
701 double SwathWidthdoubleDPPY[],
702 double SwathWidthdoubleDPPC[],
703 double SwathWidthY[], // per-pipe
704 double SwathWidthC[], // per-pipe
705 unsigned int MaximumSwathHeightY[],
706 unsigned int MaximumSwathHeightC[],
707 unsigned int swath_width_luma_ub[], // per-pipe
708 unsigned int swath_width_chroma_ub[]) // per-pipe
711 enum odm_combine_mode MainSurfaceODMMode;
713 unsigned int surface_width_ub_l;
714 unsigned int surface_height_ub_l;
715 unsigned int surface_width_ub_c;
716 unsigned int surface_height_ub_c;
718 #ifdef __DML_VBA_DEBUG__
719 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
720 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
723 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
724 if (!IsVertical(SourceRotation[k]))
725 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
727 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
729 #ifdef __DML_VBA_DEBUG__
730 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
731 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
734 MainSurfaceODMMode = ODMMode[k];
735 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
736 if (BlendingAndTiming[k] == j)
737 MainSurfaceODMMode = ODMMode[j];
740 if (ForceSingleDPP) {
741 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
743 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
744 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
745 dml_round(HActive[k] / 4.0 * HRatio[k]));
746 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
747 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
748 dml_round(HActive[k] / 2.0 * HRatio[k]));
749 } else if (DPPPerSurface[k] == 2) {
750 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
752 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
756 #ifdef __DML_VBA_DEBUG__
757 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
758 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
759 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
760 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
761 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
764 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
765 SourcePixelFormat[k] == dm_420_12) {
766 SwathWidthC[k] = SwathWidthY[k] / 2;
767 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
769 SwathWidthC[k] = SwathWidthY[k];
770 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
773 if (ForceSingleDPP == true) {
774 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
775 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
778 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
779 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
780 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
781 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
783 #ifdef __DML_VBA_DEBUG__
784 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
785 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
786 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
787 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
788 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
789 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
790 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
791 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
792 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
793 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
796 if (!IsVertical(SourceRotation[k])) {
797 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
798 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
799 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
800 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
801 dml_floor(ViewportXStart[k] +
803 Read256BytesBlockWidthY[k] - 1,
804 Read256BytesBlockWidthY[k]) -
805 dml_floor(ViewportXStart[k],
806 Read256BytesBlockWidthY[k]));
808 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
809 dml_ceil(SwathWidthY[k] - 1,
810 Read256BytesBlockWidthY[k]) +
811 Read256BytesBlockWidthY[k]);
813 if (BytePerPixC[k] > 0) {
814 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
815 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
816 dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
817 Read256BytesBlockWidthC[k] - 1,
818 Read256BytesBlockWidthC[k]) -
819 dml_floor(ViewportXStartC[k],
820 Read256BytesBlockWidthC[k]));
822 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
823 dml_ceil(SwathWidthC[k] - 1,
824 Read256BytesBlockWidthC[k]) +
825 Read256BytesBlockWidthC[k]);
828 swath_width_chroma_ub[k] = 0;
831 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
832 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
834 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
836 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
837 Read256BytesBlockHeightY[k]) -
838 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
840 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
841 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
843 if (BytePerPixC[k] > 0) {
844 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
845 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
846 dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
847 Read256BytesBlockHeightC[k] - 1,
848 Read256BytesBlockHeightC[k]) -
849 dml_floor(ViewportYStartC[k],
850 Read256BytesBlockHeightC[k]));
852 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
853 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
854 Read256BytesBlockHeightC[k]);
857 swath_width_chroma_ub[k] = 0;
861 #ifdef __DML_VBA_DEBUG__
862 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
863 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
864 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
865 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
869 } // CalculateSwathWidth
871 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
872 unsigned int TotalNumberOfActiveDPP,
874 enum output_encoder_class Output,
875 enum dm_swizzle_mode SurfaceTiling,
876 bool CompBufReservedSpaceNeedAdjustment,
877 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
879 bool ret_val = false;
881 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
882 TotalNumberOfActiveDPP == 1 && NoChroma);
883 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
886 if (SurfaceTiling == dm_sw_linear)
889 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
892 #ifdef __DML_VBA_DEBUG__
893 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
894 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
895 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
901 void dml32_CalculateDETBufferSize(
902 unsigned int DETSizeOverride[],
903 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
905 unsigned int NumberOfActiveSurfaces,
906 bool UnboundedRequestEnabled,
907 unsigned int nomDETInKByte,
908 unsigned int MaxTotalDETInKByte,
909 unsigned int ConfigReturnBufferSizeInKByte,
910 unsigned int MinCompressedBufferSizeInKByte,
911 unsigned int CompressedBufferSegmentSizeInkByteFinal,
912 enum source_format_class SourcePixelFormat[],
913 double ReadBandwidthLuma[],
914 double ReadBandwidthChroma[],
915 unsigned int RoundedUpMaxSwathSizeBytesY[],
916 unsigned int RoundedUpMaxSwathSizeBytesC[],
917 unsigned int DPPPerSurface[],
919 unsigned int DETBufferSizeInKByte[],
920 unsigned int *CompressedBufferSizeInkByte)
922 unsigned int DETBufferSizePoolInKByte;
923 unsigned int NextDETBufferPieceInKByte;
924 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
925 bool NextPotentialSurfaceToAssignDETPieceFound;
926 unsigned int NextSurfaceToAssignDETPiece;
927 double TotalBandwidth;
928 double BandwidthOfSurfacesNotAssignedDETPiece;
929 unsigned int max_minDET;
931 unsigned int minDET_pipe;
934 #ifdef __DML_VBA_DEBUG__
935 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
936 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
937 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
938 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
939 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
940 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
941 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
942 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
943 CompressedBufferSegmentSizeInkByteFinal);
946 // Note: Will use default det size if that fits 2 swaths
947 if (UnboundedRequestEnabled) {
948 if (DETSizeOverride[0] > 0) {
949 DETBufferSizeInKByte[0] = DETSizeOverride[0];
951 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
952 ((double) RoundedUpMaxSwathSizeBytesY[0] +
953 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
955 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
957 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
958 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
959 DETBufferSizeInKByte[k] = nomDETInKByte;
960 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
961 SourcePixelFormat[k] == dm_420_12) {
962 max_minDET = nomDETInKByte - 64;
964 max_minDET = nomDETInKByte;
969 // add DET resource until can hold 2 full swaths
970 while (minDET <= max_minDET && minDET_pipe == 0) {
971 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
972 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
973 minDET_pipe = minDET;
974 minDET = minDET + 64;
977 #ifdef __DML_VBA_DEBUG__
978 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
979 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
980 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
981 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
982 RoundedUpMaxSwathSizeBytesY[k]);
983 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
984 RoundedUpMaxSwathSizeBytesC[k]);
987 if (minDET_pipe == 0) {
988 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
989 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
990 #ifdef __DML_VBA_DEBUG__
991 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
992 __func__, k, minDET_pipe);
996 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
997 DETBufferSizeInKByte[k] = 0;
998 } else if (DETSizeOverride[k] > 0) {
999 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1000 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1001 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1002 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1003 DETBufferSizeInKByte[k] = minDET_pipe;
1004 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1005 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1008 #ifdef __DML_VBA_DEBUG__
1009 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1010 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1011 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1012 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1017 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1018 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1019 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1021 #ifdef __DML_VBA_DEBUG__
1022 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1023 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1024 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1025 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1026 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1028 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1029 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1031 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1032 DETPieceAssignedToThisSurfaceAlready[k] = true;
1033 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1034 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1035 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1036 DETPieceAssignedToThisSurfaceAlready[k] = true;
1037 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1038 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1040 DETPieceAssignedToThisSurfaceAlready[k] = false;
1042 #ifdef __DML_VBA_DEBUG__
1043 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1044 DETPieceAssignedToThisSurfaceAlready[k]);
1045 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1046 BandwidthOfSurfacesNotAssignedDETPiece);
1050 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1051 NextPotentialSurfaceToAssignDETPieceFound = false;
1052 NextSurfaceToAssignDETPiece = 0;
1054 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1055 #ifdef __DML_VBA_DEBUG__
1056 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1057 ReadBandwidthLuma[k]);
1058 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1059 ReadBandwidthChroma[k]);
1060 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1061 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1063 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1064 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1065 NextSurfaceToAssignDETPiece);
1067 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1068 (!NextPotentialSurfaceToAssignDETPieceFound ||
1069 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1070 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1071 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1072 NextSurfaceToAssignDETPiece = k;
1073 NextPotentialSurfaceToAssignDETPieceFound = true;
1075 #ifdef __DML_VBA_DEBUG__
1076 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1077 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1078 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1079 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1083 if (NextPotentialSurfaceToAssignDETPieceFound) {
1084 // Note: To show the banker's rounding behavior in VBA and also the fact
1085 // that the DET buffer size varies due to precision issue
1087 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1088 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1089 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1090 // BandwidthOfSurfacesNotAssignedDETPiece /
1091 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1092 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1093 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1094 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1095 //BandwidthOfSurfacesNotAssignedDETPiece /
1096 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1099 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1101 NextDETBufferPieceInKByte = dml_min(
1102 dml_round((double) DETBufferSizePoolInKByte *
1103 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1104 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1105 BandwidthOfSurfacesNotAssignedDETPiece /
1106 ((ForceSingleDPP ? 1 :
1107 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1108 (ForceSingleDPP ? 1 :
1109 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1110 dml_floor((double) DETBufferSizePoolInKByte,
1111 (ForceSingleDPP ? 1 :
1112 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1114 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1115 // We should limit the per-pipe DET size to the nominal / max per pipe.
1116 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1117 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1118 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1119 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1120 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1122 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1123 // already has the max per-pipe value
1124 NextDETBufferPieceInKByte = 0;
1128 #ifdef __DML_VBA_DEBUG__
1129 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1130 DETBufferSizePoolInKByte);
1131 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1132 NextSurfaceToAssignDETPiece);
1133 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1134 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1135 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1136 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1137 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1138 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1139 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1140 NextDETBufferPieceInKByte);
1141 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1142 __func__, j, NextSurfaceToAssignDETPiece,
1143 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1146 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1147 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1148 + NextDETBufferPieceInKByte
1149 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1150 #ifdef __DML_VBA_DEBUG__
1151 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1154 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1155 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1156 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1157 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1158 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1161 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1163 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1165 #ifdef __DML_VBA_DEBUG__
1166 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1167 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1168 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1169 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1170 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1173 } // CalculateDETBufferSize
1175 void dml32_CalculateODMMode(
1176 unsigned int MaximumPixelsPerLinePerDSCUnit,
1177 unsigned int HActive,
1178 enum output_encoder_class Output,
1179 enum odm_combine_policy ODMUse,
1180 double StateDispclk,
1183 unsigned int TotalNumberOfActiveDPP,
1184 unsigned int MaxNumDPP,
1186 double DISPCLKDPPCLKDSCCLKDownSpreading,
1187 double DISPCLKRampingMargin,
1188 double DISPCLKDPPCLKVCOSpeed,
1191 bool *TotalAvailablePipesSupport,
1192 unsigned int *NumberOfDPP,
1193 enum odm_combine_mode *ODMMode,
1194 double *RequiredDISPCLKPerSurface)
1197 double SurfaceRequiredDISPCLKWithoutODMCombine;
1198 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1199 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1201 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1202 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1204 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1205 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1207 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1208 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1210 *TotalAvailablePipesSupport = true;
1211 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1213 if (ODMUse == dm_odm_combine_policy_none)
1214 *ODMMode = dm_odm_combine_mode_disabled;
1216 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1219 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1220 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1222 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1223 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1224 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
1225 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1226 *ODMMode = dm_odm_combine_mode_4to1;
1227 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1230 *TotalAvailablePipesSupport = false;
1232 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1233 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1234 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1235 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
1236 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1237 *ODMMode = dm_odm_combine_mode_2to1;
1238 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1241 *TotalAvailablePipesSupport = false;
1244 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1247 *TotalAvailablePipesSupport = false;
1251 double dml32_CalculateRequiredDispclk(
1252 enum odm_combine_mode ODMMode,
1254 double DISPCLKDPPCLKDSCCLKDownSpreading,
1255 double DISPCLKRampingMargin,
1256 double DISPCLKDPPCLKVCOSpeed,
1259 double RequiredDispclk = 0.;
1260 double PixelClockAfterODM;
1261 double DISPCLKWithRampingRoundedToDFSGranularity;
1262 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1263 double MaxDispclkRoundedDownToDFSGranularity;
1265 if (ODMMode == dm_odm_combine_mode_4to1)
1266 PixelClockAfterODM = PixelClock / 4;
1267 else if (ODMMode == dm_odm_combine_mode_2to1)
1268 PixelClockAfterODM = PixelClock / 2;
1270 PixelClockAfterODM = PixelClock;
1273 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1274 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1275 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1277 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1278 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1280 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1282 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1283 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1284 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1285 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1287 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1289 return RequiredDispclk;
1292 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1298 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1300 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1303 void dml32_CalculateOutputLink(
1304 double PHYCLKPerState,
1305 double PHYCLKD18PerState,
1306 double PHYCLKD32PerState,
1307 double Downspreading,
1308 bool IsMainSurfaceUsingTheIndicatedTiming,
1309 enum output_encoder_class Output,
1310 enum output_format_class OutputFormat,
1311 unsigned int HTotal,
1312 unsigned int HActive,
1313 double PixelClockBackEnd,
1314 double ForcedOutputLinkBPP,
1315 unsigned int DSCInputBitPerComponent,
1316 unsigned int NumberOfDSCSlices,
1317 double AudioSampleRate,
1318 unsigned int AudioSampleLayout,
1319 enum odm_combine_mode ODMModeNoDSC,
1320 enum odm_combine_mode ODMModeDSC,
1322 unsigned int OutputLinkDPLanes,
1323 enum dm_output_link_dp_rate OutputLinkDPRate,
1327 double *RequiresFEC,
1329 enum dm_output_type *OutputType,
1330 enum dm_output_rate *OutputRate,
1331 unsigned int *RequiredSlots)
1335 *RequiresDSC = false;
1336 *RequiresFEC = false;
1338 *OutputType = dm_output_type_unknown;
1339 *OutputRate = dm_output_rate_unknown;
1341 if (IsMainSurfaceUsingTheIndicatedTiming) {
1342 if (Output == dm_hdmi) {
1343 *RequiresDSC = false;
1344 *RequiresFEC = false;
1345 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1346 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1347 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1348 ODMModeNoDSC, ODMModeDSC, &dummy);
1349 //OutputTypeAndRate = "HDMI";
1350 *OutputType = dm_output_type_hdmi;
1352 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1353 if (DSCEnable == true) {
1354 *RequiresDSC = true;
1355 LinkDSCEnable = true;
1356 if (Output == dm_dp || Output == dm_dp2p0)
1357 *RequiresFEC = true;
1359 *RequiresFEC = false;
1361 *RequiresDSC = false;
1362 LinkDSCEnable = false;
1363 if (Output == dm_dp2p0)
1364 *RequiresFEC = true;
1366 *RequiresFEC = false;
1368 if (Output == dm_dp2p0) {
1370 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1371 PHYCLKD32PerState >= 10000 / 32) {
1372 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1373 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1374 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1375 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1376 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1377 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1378 ForcedOutputLinkBPP == 0) {
1379 *RequiresDSC = true;
1380 LinkDSCEnable = true;
1381 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1382 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1383 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1384 OutputFormat, DSCInputBitPerComponent,
1385 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1386 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1388 //OutputTypeAndRate = Output & " UHBR10";
1389 *OutputType = dm_output_type_dp2p0;
1390 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1392 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1393 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1394 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1395 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1396 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1397 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1398 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1400 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1401 ForcedOutputLinkBPP == 0) {
1402 *RequiresDSC = true;
1403 LinkDSCEnable = true;
1404 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1405 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1406 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1407 OutputFormat, DSCInputBitPerComponent,
1408 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1409 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1411 //OutputTypeAndRate = Output & " UHBR13p5";
1412 *OutputType = dm_output_type_dp2p0;
1413 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1415 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1416 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1417 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1418 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1419 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1420 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1421 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1422 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1423 *RequiresDSC = true;
1424 LinkDSCEnable = true;
1425 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1426 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1427 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1428 OutputFormat, DSCInputBitPerComponent,
1429 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1430 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 //OutputTypeAndRate = Output & " UHBR20";
1433 *OutputType = dm_output_type_dp2p0;
1434 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1438 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1439 PHYCLKPerState >= 270) {
1440 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1441 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1442 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1443 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1444 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1445 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1446 ForcedOutputLinkBPP == 0) {
1447 *RequiresDSC = true;
1448 LinkDSCEnable = true;
1449 if (Output == dm_dp)
1450 *RequiresFEC = true;
1451 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1452 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1453 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1454 OutputFormat, DSCInputBitPerComponent,
1455 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1456 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1458 //OutputTypeAndRate = Output & " HBR";
1459 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1460 *OutputRate = dm_output_rate_dp_rate_hbr;
1462 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1463 *OutBpp == 0 && PHYCLKPerState >= 540) {
1464 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1465 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1466 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1467 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1468 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1470 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1471 ForcedOutputLinkBPP == 0) {
1472 *RequiresDSC = true;
1473 LinkDSCEnable = true;
1474 if (Output == dm_dp)
1475 *RequiresFEC = true;
1477 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1478 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1479 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1480 OutputFormat, DSCInputBitPerComponent,
1481 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1482 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1484 //OutputTypeAndRate = Output & " HBR2";
1485 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1486 *OutputRate = dm_output_rate_dp_rate_hbr2;
1488 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1489 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1490 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1491 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1492 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1493 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1496 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1497 *RequiresDSC = true;
1498 LinkDSCEnable = true;
1499 if (Output == dm_dp)
1500 *RequiresFEC = true;
1502 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1503 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1504 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1505 OutputFormat, DSCInputBitPerComponent,
1506 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1507 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1509 //OutputTypeAndRate = Output & " HBR3";
1510 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1511 *OutputRate = dm_output_rate_dp_rate_hbr3;
1518 void dml32_CalculateDPPCLK(
1519 unsigned int NumberOfActiveSurfaces,
1520 double DISPCLKDPPCLKDSCCLKDownSpreading,
1521 double DISPCLKDPPCLKVCOSpeed,
1522 double DPPCLKUsingSingleDPP[],
1523 unsigned int DPPPerSurface[],
1526 double *GlobalDPPCLK,
1531 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1532 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1533 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1535 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1536 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1537 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1540 double dml32_TruncToValidBPP(
1543 unsigned int HTotal,
1544 unsigned int HActive,
1548 enum output_encoder_class Output,
1549 enum output_format_class Format,
1550 unsigned int DSCInputBitPerComponent,
1551 unsigned int DSCSlices,
1552 unsigned int AudioRate,
1553 unsigned int AudioLayout,
1554 enum odm_combine_mode ODMModeNoDSC,
1555 enum odm_combine_mode ODMModeDSC,
1557 unsigned int *RequiredSlots)
1560 unsigned int MinDSCBPP;
1562 unsigned int NonDSCBPP0;
1563 unsigned int NonDSCBPP1;
1564 unsigned int NonDSCBPP2;
1565 unsigned int NonDSCBPP3;
1567 if (Format == dm_420) {
1572 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1573 } else if (Format == dm_444) {
1579 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1581 if (Output == dm_hdmi) {
1590 if (Format == dm_n422) {
1592 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1595 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1598 if (Output == dm_dp2p0) {
1599 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1600 } else if (DSCEnable && Output == dm_dp) {
1601 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1603 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1607 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1608 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1609 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1610 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1611 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1612 MaxLinkBPP = 2 * MaxLinkBPP;
1614 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1615 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1616 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1617 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1618 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1619 MaxLinkBPP = 2 * MaxLinkBPP;
1622 if (DesiredBPP == 0) {
1624 if (MaxLinkBPP < MinDSCBPP)
1626 else if (MaxLinkBPP >= MaxDSCBPP)
1629 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1631 if (MaxLinkBPP >= NonDSCBPP3)
1633 else if (MaxLinkBPP >= NonDSCBPP2)
1635 else if (MaxLinkBPP >= NonDSCBPP1)
1637 else if (MaxLinkBPP >= NonDSCBPP0)
1643 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1644 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1645 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1651 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1654 } // TruncToValidBPP
1656 double dml32_RequiredDTBCLK(
1659 enum output_format_class OutputFormat,
1661 unsigned int DSCSlices,
1662 unsigned int HTotal,
1663 unsigned int HActive,
1664 unsigned int AudioRate,
1665 unsigned int AudioLayout)
1667 double PixelWordRate;
1670 double AverageTribyteRate;
1671 double HActiveTribyteRate;
1673 if (DSCEnable != true)
1674 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1676 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1677 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1678 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1680 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1681 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1682 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1683 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1686 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1687 enum odm_combine_mode ODMMode,
1688 unsigned int DSCInputBitPerComponent,
1690 unsigned int HActive,
1691 unsigned int HTotal,
1692 unsigned int NumberOfDSCSlices,
1693 enum output_format_class OutputFormat,
1694 enum output_encoder_class Output,
1696 double PixelClockBackEnd)
1698 unsigned int DSCDelayRequirement_val;
1700 if (DSCEnabled == true && OutputBpp != 0) {
1701 if (ODMMode == dm_odm_combine_mode_4to1) {
1702 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1703 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1704 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1705 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1706 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1707 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1708 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1710 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1711 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1712 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1715 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1716 dml_ceil(DSCDelayRequirement_val / HActive, 1);
1718 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1721 DSCDelayRequirement_val = 0;
1724 #ifdef __DML_VBA_DEBUG__
1725 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
1726 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
1727 dml_print("DML::%s: HActive = %d\n", __func__, HActive);
1728 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
1729 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1730 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
1731 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1734 return DSCDelayRequirement_val;
1737 void dml32_CalculateSurfaceSizeInMall(
1738 unsigned int NumberOfActiveSurfaces,
1739 unsigned int MALLAllocatedForDCN,
1740 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1742 bool ViewportStationary[],
1743 unsigned int ViewportXStartY[],
1744 unsigned int ViewportYStartY[],
1745 unsigned int ViewportXStartC[],
1746 unsigned int ViewportYStartC[],
1747 unsigned int ViewportWidthY[],
1748 unsigned int ViewportHeightY[],
1749 unsigned int BytesPerPixelY[],
1750 unsigned int ViewportWidthC[],
1751 unsigned int ViewportHeightC[],
1752 unsigned int BytesPerPixelC[],
1753 unsigned int SurfaceWidthY[],
1754 unsigned int SurfaceWidthC[],
1755 unsigned int SurfaceHeightY[],
1756 unsigned int SurfaceHeightC[],
1757 unsigned int Read256BytesBlockWidthY[],
1758 unsigned int Read256BytesBlockWidthC[],
1759 unsigned int Read256BytesBlockHeightY[],
1760 unsigned int Read256BytesBlockHeightC[],
1761 unsigned int ReadBlockWidthY[],
1762 unsigned int ReadBlockWidthC[],
1763 unsigned int ReadBlockHeightY[],
1764 unsigned int ReadBlockHeightC[],
1767 unsigned int SurfaceSizeInMALL[],
1768 bool *ExceededMALLSize)
1770 unsigned int TotalSurfaceSizeInMALL = 0;
1773 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1774 if (ViewportStationary[k]) {
1775 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1776 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1777 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1778 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1779 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1780 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1781 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1783 if (ReadBlockWidthC[k] > 0) {
1784 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1785 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1786 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1787 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1788 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1789 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1790 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1791 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1792 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1795 if (DCCEnable[k] == true) {
1796 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1797 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1798 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1799 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1800 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1801 * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1802 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1803 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1804 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1805 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1806 if (Read256BytesBlockWidthC[k] > 0) {
1807 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1808 dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1809 Read256BytesBlockWidthC[k]),
1810 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1811 * Read256BytesBlockWidthC[k] - 1, 8 *
1812 Read256BytesBlockWidthC[k]) -
1813 dml_floor(ViewportXStartC[k], 8 *
1814 Read256BytesBlockWidthC[k])) *
1815 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1816 Read256BytesBlockHeightC[k]),
1817 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1818 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1819 Read256BytesBlockHeightC[k]) -
1820 dml_floor(ViewportYStartC[k], 8 *
1821 Read256BytesBlockHeightC[k])) *
1822 BytesPerPixelC[k] / 256;
1826 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1827 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1828 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1829 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1831 if (ReadBlockWidthC[k] > 0) {
1832 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1833 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1834 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1835 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1836 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1839 if (DCCEnable[k] == true) {
1840 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1841 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1842 Read256BytesBlockWidthY[k] - 1), 8 *
1843 Read256BytesBlockWidthY[k]) *
1844 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1845 Read256BytesBlockHeightY[k] - 1), 8 *
1846 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1848 if (Read256BytesBlockWidthC[k] > 0) {
1849 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1850 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1851 Read256BytesBlockWidthC[k] - 1), 8 *
1852 Read256BytesBlockWidthC[k]) *
1853 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1854 Read256BytesBlockHeightC[k] - 1), 8 *
1855 Read256BytesBlockHeightC[k]) *
1856 BytesPerPixelC[k] / 256;
1862 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1863 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1864 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1866 *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
1867 } // CalculateSurfaceSizeInMall
1869 void dml32_CalculateVMRowAndSwath(
1870 struct dml32_CalculateVMRowAndSwath *st_vars,
1871 unsigned int NumberOfActiveSurfaces,
1873 unsigned int SurfaceSizeInMALL[],
1874 unsigned int PTEBufferSizeInRequestsLuma,
1875 unsigned int PTEBufferSizeInRequestsChroma,
1876 unsigned int DCCMetaBufferSizeBytes,
1877 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1878 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1879 unsigned int MALLAllocatedForDCN,
1880 double SwathWidthY[],
1881 double SwathWidthC[],
1884 unsigned int HostVMMaxNonCachedPageTableLevels,
1885 unsigned int GPUVMMaxPageTableLevels,
1886 unsigned int GPUVMMinPageSizeKBytes[],
1887 unsigned int HostVMMinPageSize,
1890 bool PTEBufferSizeNotExceeded[],
1891 bool DCCMetaBufferSizeNotExceeded[],
1892 unsigned int dpte_row_width_luma_ub[],
1893 unsigned int dpte_row_width_chroma_ub[],
1894 unsigned int dpte_row_height_luma[],
1895 unsigned int dpte_row_height_chroma[],
1896 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1897 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1898 unsigned int meta_req_width[],
1899 unsigned int meta_req_width_chroma[],
1900 unsigned int meta_req_height[],
1901 unsigned int meta_req_height_chroma[],
1902 unsigned int meta_row_width[],
1903 unsigned int meta_row_width_chroma[],
1904 unsigned int meta_row_height[],
1905 unsigned int meta_row_height_chroma[],
1906 unsigned int vm_group_bytes[],
1907 unsigned int dpte_group_bytes[],
1908 unsigned int PixelPTEReqWidthY[],
1909 unsigned int PixelPTEReqHeightY[],
1910 unsigned int PTERequestSizeY[],
1911 unsigned int PixelPTEReqWidthC[],
1912 unsigned int PixelPTEReqHeightC[],
1913 unsigned int PTERequestSizeC[],
1914 unsigned int dpde0_bytes_per_frame_ub_l[],
1915 unsigned int meta_pte_bytes_per_frame_ub_l[],
1916 unsigned int dpde0_bytes_per_frame_ub_c[],
1917 unsigned int meta_pte_bytes_per_frame_ub_c[],
1918 double PrefetchSourceLinesY[],
1919 double PrefetchSourceLinesC[],
1920 double VInitPreFillY[],
1921 double VInitPreFillC[],
1922 unsigned int MaxNumSwathY[],
1923 unsigned int MaxNumSwathC[],
1924 double meta_row_bw[],
1925 double dpte_row_bw[],
1926 double PixelPTEBytesPerRow[],
1927 double PDEAndMetaPTEBytesFrame[],
1928 double MetaRowByte[],
1929 bool use_one_row_for_frame[],
1930 bool use_one_row_for_frame_flip[],
1931 bool UsesMALLForStaticScreen[],
1932 bool PTE_BUFFER_MODE[],
1933 unsigned int BIGK_FRAGMENT_SIZE[])
1937 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1938 if (HostVMEnable == true) {
1939 vm_group_bytes[k] = 512;
1940 dpte_group_bytes[k] = 512;
1941 } else if (GPUVMEnable == true) {
1942 vm_group_bytes[k] = 2048;
1943 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1944 dpte_group_bytes[k] = 512;
1946 dpte_group_bytes[k] = 2048;
1948 vm_group_bytes[k] = 0;
1949 dpte_group_bytes[k] = 0;
1952 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
1953 myPipe[k].SourcePixelFormat == dm_420_12 ||
1954 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
1955 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
1956 !IsVertical(myPipe[k].SourceRotation)) {
1957 st_vars->PTEBufferSizeInRequestsForLuma[k] =
1958 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
1959 st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k];
1961 st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
1962 st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
1965 st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
1966 myPipe[k].ViewportStationary,
1967 myPipe[k].DCCEnable,
1968 myPipe[k].DPPPerSurface,
1969 myPipe[k].BlockHeight256BytesC,
1970 myPipe[k].BlockWidth256BytesC,
1971 myPipe[k].SourcePixelFormat,
1972 myPipe[k].SurfaceTiling,
1973 myPipe[k].BytePerPixelC,
1974 myPipe[k].SourceRotation,
1976 myPipe[k].ViewportHeightChroma,
1977 myPipe[k].ViewportXStartC,
1978 myPipe[k].ViewportYStartC,
1981 HostVMMaxNonCachedPageTableLevels,
1982 GPUVMMaxPageTableLevels,
1983 GPUVMMinPageSizeKBytes[k],
1985 st_vars->PTEBufferSizeInRequestsForChroma[k],
1987 myPipe[k].DCCMetaPitchC,
1988 myPipe[k].BlockWidthC,
1989 myPipe[k].BlockHeightC,
1992 &st_vars->MetaRowByteC[k],
1993 &st_vars->PixelPTEBytesPerRowC[k],
1994 &dpte_row_width_chroma_ub[k],
1995 &dpte_row_height_chroma[k],
1996 &dpte_row_height_linear_chroma[k],
1997 &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k],
1998 &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k],
1999 &st_vars->dpte_row_height_chroma_one_row_per_frame[k],
2000 &meta_req_width_chroma[k],
2001 &meta_req_height_chroma[k],
2002 &meta_row_width_chroma[k],
2003 &meta_row_height_chroma[k],
2004 &PixelPTEReqWidthC[k],
2005 &PixelPTEReqHeightC[k],
2006 &PTERequestSizeC[k],
2007 &dpde0_bytes_per_frame_ub_c[k],
2008 &meta_pte_bytes_per_frame_ub_c[k]);
2010 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2011 myPipe[k].VRatioChroma,
2012 myPipe[k].VTapsChroma,
2013 myPipe[k].InterlaceEnable,
2014 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2015 myPipe[k].SwathHeightC,
2016 myPipe[k].SourceRotation,
2017 myPipe[k].ViewportStationary,
2019 myPipe[k].ViewportHeightChroma,
2020 myPipe[k].ViewportXStartC,
2021 myPipe[k].ViewportYStartC,
2027 st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2028 st_vars->PTEBufferSizeInRequestsForChroma[k] = 0;
2029 st_vars->PixelPTEBytesPerRowC[k] = 0;
2030 st_vars->PDEAndMetaPTEBytesFrameC = 0;
2031 st_vars->MetaRowByteC[k] = 0;
2032 MaxNumSwathC[k] = 0;
2033 PrefetchSourceLinesC[k] = 0;
2034 st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0;
2035 st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2036 st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2039 st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2040 myPipe[k].ViewportStationary,
2041 myPipe[k].DCCEnable,
2042 myPipe[k].DPPPerSurface,
2043 myPipe[k].BlockHeight256BytesY,
2044 myPipe[k].BlockWidth256BytesY,
2045 myPipe[k].SourcePixelFormat,
2046 myPipe[k].SurfaceTiling,
2047 myPipe[k].BytePerPixelY,
2048 myPipe[k].SourceRotation,
2050 myPipe[k].ViewportHeight,
2051 myPipe[k].ViewportXStart,
2052 myPipe[k].ViewportYStart,
2055 HostVMMaxNonCachedPageTableLevels,
2056 GPUVMMaxPageTableLevels,
2057 GPUVMMinPageSizeKBytes[k],
2059 st_vars->PTEBufferSizeInRequestsForLuma[k],
2061 myPipe[k].DCCMetaPitchY,
2062 myPipe[k].BlockWidthY,
2063 myPipe[k].BlockHeightY,
2066 &st_vars->MetaRowByteY[k],
2067 &st_vars->PixelPTEBytesPerRowY[k],
2068 &dpte_row_width_luma_ub[k],
2069 &dpte_row_height_luma[k],
2070 &dpte_row_height_linear_luma[k],
2071 &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k],
2072 &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k],
2073 &st_vars->dpte_row_height_luma_one_row_per_frame[k],
2075 &meta_req_height[k],
2077 &meta_row_height[k],
2078 &PixelPTEReqWidthY[k],
2079 &PixelPTEReqHeightY[k],
2080 &PTERequestSizeY[k],
2081 &dpde0_bytes_per_frame_ub_l[k],
2082 &meta_pte_bytes_per_frame_ub_l[k]);
2084 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2087 myPipe[k].InterlaceEnable,
2088 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2089 myPipe[k].SwathHeightY,
2090 myPipe[k].SourceRotation,
2091 myPipe[k].ViewportStationary,
2093 myPipe[k].ViewportHeight,
2094 myPipe[k].ViewportXStart,
2095 myPipe[k].ViewportYStart,
2101 PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC;
2102 MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k];
2104 if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] &&
2105 st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) {
2106 PTEBufferSizeNotExceeded[k] = true;
2108 PTEBufferSizeNotExceeded[k] = false;
2111 st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2112 st_vars->PTEBufferSizeInRequestsForLuma[k] &&
2113 st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]);
2116 dml32_CalculateMALLUseForStaticScreen(
2117 NumberOfActiveSurfaces,
2118 MALLAllocatedForDCN,
2119 UseMALLForStaticScreen, // mode
2121 st_vars->one_row_per_frame_fits_in_buffer,
2123 UsesMALLForStaticScreen); // boolen
2125 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2126 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2127 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2128 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2129 (GPUVMMinPageSizeKBytes[k] > 64);
2130 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2133 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2134 #ifdef __DML_VBA_DEBUG__
2135 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
2136 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2138 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2139 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2140 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2141 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2143 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2144 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2146 if (use_one_row_for_frame[k]) {
2147 dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k];
2148 dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k];
2149 st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k];
2150 dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k];
2151 dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k];
2152 st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k];
2153 PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k];
2156 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2157 DCCMetaBufferSizeNotExceeded[k] = true;
2159 DCCMetaBufferSizeNotExceeded[k] = false;
2161 PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k];
2162 if (use_one_row_for_frame[k])
2163 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2165 dml32_CalculateRowBandwidth(
2167 myPipe[k].SourcePixelFormat,
2169 myPipe[k].VRatioChroma,
2170 myPipe[k].DCCEnable,
2171 myPipe[k].HTotal / myPipe[k].PixelClock,
2172 st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k],
2174 meta_row_height_chroma[k],
2175 st_vars->PixelPTEBytesPerRowY[k],
2176 st_vars->PixelPTEBytesPerRowC[k],
2177 dpte_row_height_luma[k],
2178 dpte_row_height_chroma[k],
2183 #ifdef __DML_VBA_DEBUG__
2184 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
2185 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
2186 __func__, k, use_one_row_for_frame_flip[k]);
2187 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
2188 __func__, k, UseMALLForPStateChange[k]);
2189 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
2190 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
2191 __func__, k, dpte_row_width_luma_ub[k]);
2192 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]);
2193 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
2194 __func__, k, dpte_row_height_chroma[k]);
2195 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
2196 __func__, k, dpte_row_width_chroma_ub[k]);
2197 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]);
2198 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
2199 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
2200 __func__, k, PTEBufferSizeNotExceeded[k]);
2201 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2202 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2205 } // CalculateVMRowAndSwath
2207 unsigned int dml32_CalculateVMAndRowBytes(
2208 bool ViewportStationary,
2210 unsigned int NumberOfDPPs,
2211 unsigned int BlockHeight256Bytes,
2212 unsigned int BlockWidth256Bytes,
2213 enum source_format_class SourcePixelFormat,
2214 unsigned int SurfaceTiling,
2215 unsigned int BytePerPixel,
2216 enum dm_rotation_angle SourceRotation,
2218 unsigned int ViewportHeight,
2219 unsigned int ViewportXStart,
2220 unsigned int ViewportYStart,
2223 unsigned int HostVMMaxNonCachedPageTableLevels,
2224 unsigned int GPUVMMaxPageTableLevels,
2225 unsigned int GPUVMMinPageSizeKBytes,
2226 unsigned int HostVMMinPageSize,
2227 unsigned int PTEBufferSizeInRequests,
2229 unsigned int DCCMetaPitch,
2230 unsigned int MacroTileWidth,
2231 unsigned int MacroTileHeight,
2234 unsigned int *MetaRowByte,
2235 unsigned int *PixelPTEBytesPerRow,
2236 unsigned int *dpte_row_width_ub,
2237 unsigned int *dpte_row_height,
2238 unsigned int *dpte_row_height_linear,
2239 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2240 unsigned int *dpte_row_width_ub_one_row_per_frame,
2241 unsigned int *dpte_row_height_one_row_per_frame,
2242 unsigned int *MetaRequestWidth,
2243 unsigned int *MetaRequestHeight,
2244 unsigned int *meta_row_width,
2245 unsigned int *meta_row_height,
2246 unsigned int *PixelPTEReqWidth,
2247 unsigned int *PixelPTEReqHeight,
2248 unsigned int *PTERequestSize,
2249 unsigned int *DPDE0BytesFrame,
2250 unsigned int *MetaPTEBytesFrame)
2252 unsigned int MPDEBytesFrame;
2253 unsigned int DCCMetaSurfaceBytes;
2254 unsigned int ExtraDPDEBytesFrame;
2255 unsigned int PDEAndMetaPTEBytesFrame;
2256 unsigned int HostVMDynamicLevels = 0;
2257 unsigned int MacroTileSizeBytes;
2258 unsigned int vp_height_meta_ub;
2259 unsigned int vp_height_dpte_ub;
2260 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2262 if (GPUVMEnable == true && HostVMEnable == true) {
2263 if (HostVMMinPageSize < 2048)
2264 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2265 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2266 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2268 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2271 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2272 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2273 if (SurfaceTiling == dm_sw_linear) {
2274 *meta_row_height = 32;
2275 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2276 - dml_floor(ViewportXStart, *MetaRequestWidth);
2277 } else if (!IsVertical(SourceRotation)) {
2278 *meta_row_height = *MetaRequestHeight;
2279 if (ViewportStationary && NumberOfDPPs == 1) {
2280 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2281 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2283 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2285 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2287 *meta_row_height = *MetaRequestWidth;
2288 if (ViewportStationary && NumberOfDPPs == 1) {
2289 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2290 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2292 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2294 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2297 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2298 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2299 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2300 } else if (!IsVertical(SourceRotation)) {
2301 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2303 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2306 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2308 if (GPUVMEnable == true) {
2309 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2310 (8 * 4.0 * 1024), 1) + 1) * 64;
2311 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2313 *MetaPTEBytesFrame = 0;
2317 if (DCCEnable != true) {
2318 *MetaPTEBytesFrame = 0;
2323 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2325 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2326 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2327 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2328 MacroTileHeight - 1, MacroTileHeight) -
2329 dml_floor(ViewportYStart, MacroTileHeight);
2330 } else if (!IsVertical(SourceRotation)) {
2331 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2333 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2335 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2336 (8 * 2097152), 1) + 1);
2337 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2339 *DPDE0BytesFrame = 0;
2340 ExtraDPDEBytesFrame = 0;
2341 vp_height_dpte_ub = 0;
2344 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2346 #ifdef __DML_VBA_DEBUG__
2347 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2348 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2349 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2350 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2351 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2352 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2353 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2354 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2355 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2356 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2357 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2358 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2359 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2360 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2361 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2362 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2363 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2366 if (HostVMEnable == true)
2367 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2369 if (SurfaceTiling == dm_sw_linear) {
2370 *PixelPTEReqHeight = 1;
2371 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2372 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2373 *PTERequestSize = 64;
2374 } else if (GPUVMMinPageSizeKBytes == 4) {
2375 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2376 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2377 *PTERequestSize = 128;
2379 *PixelPTEReqHeight = MacroTileHeight;
2380 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2381 *PTERequestSize = 64;
2383 #ifdef __DML_VBA_DEBUG__
2384 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2385 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2386 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2387 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2388 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2389 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2390 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2393 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2394 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2395 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2396 (double) *PixelPTEReqWidth;
2397 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2400 if (SurfaceTiling == dm_sw_linear) {
2401 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2402 *PixelPTEReqWidth / Pitch), 1));
2403 #ifdef __DML_VBA_DEBUG__
2404 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2405 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2406 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2407 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2408 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2409 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2410 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2411 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2412 *PixelPTEReqWidth / Pitch), 1));
2413 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2415 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2416 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2417 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2419 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2420 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2421 PixelPTEReqWidth_linear / Pitch), 1);
2422 if (*dpte_row_height_linear > 128)
2423 *dpte_row_height_linear = 128;
2425 } else if (!IsVertical(SourceRotation)) {
2426 *dpte_row_height = *PixelPTEReqHeight;
2428 if (GPUVMMinPageSizeKBytes > 64) {
2429 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2430 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2431 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2432 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2433 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2434 dml_floor(ViewportXStart, *PixelPTEReqWidth);
2436 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2440 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2442 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2444 if (ViewportStationary && (NumberOfDPPs == 1)) {
2445 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2446 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2448 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2449 * *PixelPTEReqHeight;
2452 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2455 if (GPUVMEnable != true)
2456 *PixelPTEBytesPerRow = 0;
2457 if (HostVMEnable == true)
2458 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2460 #ifdef __DML_VBA_DEBUG__
2461 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2462 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2463 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2464 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2465 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2466 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2467 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2468 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2469 __func__, *dpte_row_width_ub_one_row_per_frame);
2470 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2471 __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2472 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2473 *MetaPTEBytesFrame);
2476 return PDEAndMetaPTEBytesFrame;
2477 } // CalculateVMAndRowBytes
2479 double dml32_CalculatePrefetchSourceLines(
2483 bool ProgressiveToInterlaceUnitInOPP,
2484 unsigned int SwathHeight,
2485 enum dm_rotation_angle SourceRotation,
2486 bool ViewportStationary,
2488 unsigned int ViewportHeight,
2489 unsigned int ViewportXStart,
2490 unsigned int ViewportYStart,
2493 double *VInitPreFill,
2494 unsigned int *MaxNumSwath)
2497 unsigned int vp_start_rot;
2498 unsigned int sw0_tmp;
2499 unsigned int MaxPartialSwath;
2502 #ifdef __DML_VBA_DEBUG__
2503 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2504 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2505 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2506 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2507 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2508 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2510 if (ProgressiveToInterlaceUnitInOPP)
2511 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2513 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2515 if (ViewportStationary) {
2516 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2517 vp_start_rot = SwathHeight -
2518 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2519 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2520 vp_start_rot = ViewportXStart;
2521 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2522 vp_start_rot = SwathHeight -
2523 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2525 vp_start_rot = ViewportYStart;
2527 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2528 if (sw0_tmp < *VInitPreFill)
2529 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2532 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2534 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2535 if (*VInitPreFill > 1)
2536 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2538 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2540 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2542 #ifdef __DML_VBA_DEBUG__
2543 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2544 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2545 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2546 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2547 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2551 } // CalculatePrefetchSourceLines
2553 void dml32_CalculateMALLUseForStaticScreen(
2554 unsigned int NumberOfActiveSurfaces,
2555 unsigned int MALLAllocatedForDCNFinal,
2556 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2557 unsigned int SurfaceSizeInMALL[],
2558 bool one_row_per_frame_fits_in_buffer[],
2561 bool UsesMALLForStaticScreen[])
2564 unsigned int SurfaceToAddToMALL;
2565 bool CanAddAnotherSurfaceToMALL;
2566 unsigned int TotalSurfaceSizeInMALL;
2568 TotalSurfaceSizeInMALL = 0;
2569 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2570 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2571 if (UsesMALLForStaticScreen[k])
2572 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2573 #ifdef __DML_VBA_DEBUG__
2574 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2575 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
2579 SurfaceToAddToMALL = 0;
2580 CanAddAnotherSurfaceToMALL = true;
2581 while (CanAddAnotherSurfaceToMALL) {
2582 CanAddAnotherSurfaceToMALL = false;
2583 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2584 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2585 !UsesMALLForStaticScreen[k] &&
2586 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2587 one_row_per_frame_fits_in_buffer[k] &&
2588 (!CanAddAnotherSurfaceToMALL ||
2589 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2590 CanAddAnotherSurfaceToMALL = true;
2591 SurfaceToAddToMALL = k;
2592 #ifdef __DML_VBA_DEBUG__
2593 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2594 __func__, k, UseMALLForStaticScreen[k]);
2598 if (CanAddAnotherSurfaceToMALL) {
2599 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2600 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2602 #ifdef __DML_VBA_DEBUG__
2603 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
2604 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
2611 void dml32_CalculateRowBandwidth(
2613 enum source_format_class SourcePixelFormat,
2615 double VRatioChroma,
2618 unsigned int MetaRowByteLuma,
2619 unsigned int MetaRowByteChroma,
2620 unsigned int meta_row_height_luma,
2621 unsigned int meta_row_height_chroma,
2622 unsigned int PixelPTEBytesPerRowLuma,
2623 unsigned int PixelPTEBytesPerRowChroma,
2624 unsigned int dpte_row_height_luma,
2625 unsigned int dpte_row_height_chroma,
2627 double *meta_row_bw,
2628 double *dpte_row_bw)
2630 if (DCCEnable != true) {
2632 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2633 SourcePixelFormat == dm_rgbe_alpha) {
2634 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2635 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2637 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2640 if (GPUVMEnable != true) {
2642 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2643 SourcePixelFormat == dm_rgbe_alpha) {
2644 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2645 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2647 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2651 double dml32_CalculateUrgentLatency(
2652 double UrgentLatencyPixelDataOnly,
2653 double UrgentLatencyPixelMixedWithVMData,
2654 double UrgentLatencyVMDataOnly,
2655 bool DoUrgentLatencyAdjustment,
2656 double UrgentLatencyAdjustmentFabricClockComponent,
2657 double UrgentLatencyAdjustmentFabricClockReference,
2662 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2663 if (DoUrgentLatencyAdjustment == true) {
2664 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2665 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2670 void dml32_CalculateUrgentBurstFactor(
2671 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2672 unsigned int swath_width_luma_ub,
2673 unsigned int swath_width_chroma_ub,
2674 unsigned int SwathHeightY,
2675 unsigned int SwathHeightC,
2677 double UrgentLatency,
2678 double CursorBufferSize,
2679 unsigned int CursorWidth,
2680 unsigned int CursorBPP,
2683 double BytePerPixelInDETY,
2684 double BytePerPixelInDETC,
2685 unsigned int DETBufferSizeY,
2686 unsigned int DETBufferSizeC,
2688 double *UrgentBurstFactorCursor,
2689 double *UrgentBurstFactorLuma,
2690 double *UrgentBurstFactorChroma,
2691 bool *NotEnoughUrgentLatencyHiding)
2693 double LinesInDETLuma;
2694 double LinesInDETChroma;
2695 unsigned int LinesInCursorBuffer;
2696 double CursorBufferSizeInTime;
2697 double DETBufferSizeInTimeLuma;
2698 double DETBufferSizeInTimeChroma;
2700 *NotEnoughUrgentLatencyHiding = 0;
2702 if (CursorWidth > 0) {
2703 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2704 (CursorWidth * CursorBPP / 8.0)), 1.0);
2706 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2707 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2708 *NotEnoughUrgentLatencyHiding = 1;
2709 *UrgentBurstFactorCursor = 0;
2711 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2712 (CursorBufferSizeInTime - UrgentLatency);
2715 *UrgentBurstFactorCursor = 1;
2719 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2720 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2723 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2724 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2725 *NotEnoughUrgentLatencyHiding = 1;
2726 *UrgentBurstFactorLuma = 0;
2728 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2731 *UrgentBurstFactorLuma = 1;
2734 if (BytePerPixelInDETC > 0) {
2735 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2736 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2737 / swath_width_chroma_ub;
2740 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2741 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2742 *NotEnoughUrgentLatencyHiding = 1;
2743 *UrgentBurstFactorChroma = 0;
2745 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2746 / (DETBufferSizeInTimeChroma - UrgentLatency);
2749 *UrgentBurstFactorChroma = 1;
2752 } // CalculateUrgentBurstFactor
2754 void dml32_CalculateDCFCLKDeepSleep(
2755 unsigned int NumberOfActiveSurfaces,
2756 unsigned int BytePerPixelY[],
2757 unsigned int BytePerPixelC[],
2759 double VRatioChroma[],
2760 double SwathWidthY[],
2761 double SwathWidthC[],
2762 unsigned int DPPPerSurface[],
2764 double HRatioChroma[],
2765 double PixelClock[],
2766 double PSCL_THROUGHPUT[],
2767 double PSCL_THROUGHPUT_CHROMA[],
2769 double ReadBandwidthLuma[],
2770 double ReadBandwidthChroma[],
2771 unsigned int ReturnBusWidth,
2774 double *DCFClkDeepSleep)
2777 double DisplayPipeLineDeliveryTimeLuma;
2778 double DisplayPipeLineDeliveryTimeChroma;
2779 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2780 double ReadBandwidth = 0.0;
2782 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2784 if (VRatio[k] <= 1) {
2785 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2788 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2790 if (BytePerPixelC[k] == 0) {
2791 DisplayPipeLineDeliveryTimeChroma = 0;
2793 if (VRatioChroma[k] <= 1) {
2794 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2795 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2797 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2802 if (BytePerPixelC[k] > 0) {
2803 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2804 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2805 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2806 32.0 / DisplayPipeLineDeliveryTimeChroma);
2808 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2809 64.0 / DisplayPipeLineDeliveryTimeLuma;
2811 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2813 #ifdef __DML_VBA_DEBUG__
2814 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2815 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2819 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2820 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2822 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2824 #ifdef __DML_VBA_DEBUG__
2825 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2826 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2827 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2828 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2831 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2832 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2833 #ifdef __DML_VBA_DEBUG__
2834 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2836 } // CalculateDCFCLKDeepSleep
2838 double dml32_CalculateWriteBackDelay(
2839 enum source_format_class WritebackPixelFormat,
2840 double WritebackHRatio,
2841 double WritebackVRatio,
2842 unsigned int WritebackVTaps,
2843 unsigned int WritebackDestinationWidth,
2844 unsigned int WritebackDestinationHeight,
2845 unsigned int WritebackSourceHeight,
2846 unsigned int HTotal)
2848 double CalculateWriteBackDelay;
2850 double Output_lines_last_notclamped;
2851 double WritebackVInit;
2853 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2854 Line_length = dml_max((double) WritebackDestinationWidth,
2855 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2856 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2857 dml_ceil(((double)WritebackSourceHeight -
2858 (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2859 if (Output_lines_last_notclamped < 0) {
2860 CalculateWriteBackDelay = 0;
2862 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2863 (HTotal - WritebackDestinationWidth) + 80;
2865 return CalculateWriteBackDelay;
2868 void dml32_UseMinimumDCFCLK(
2869 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2871 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2872 unsigned int MaxInterDCNTileRepeaters,
2873 unsigned int MaxPrefetchMode,
2874 double DRAMClockChangeLatencyFinal,
2875 double FCLKChangeLatency,
2876 double SREnterPlusExitTime,
2877 unsigned int ReturnBusWidth,
2878 unsigned int RoundTripPingLatencyCycles,
2879 unsigned int ReorderingBytes,
2880 unsigned int PixelChunkSizeInKByte,
2881 unsigned int MetaChunkSize,
2883 unsigned int GPUVMMaxPageTableLevels,
2885 unsigned int NumberOfActiveSurfaces,
2886 double HostVMMinPageSize,
2887 unsigned int HostVMMaxNonCachedPageTableLevels,
2888 bool DynamicMetadataVMEnabled,
2889 bool ImmediateFlipRequirement,
2890 bool ProgressiveToInterlaceUnitInOPP,
2891 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2892 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2893 unsigned int VTotal[],
2894 unsigned int VActive[],
2895 unsigned int DynamicMetadataTransmittedBytes[],
2896 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2898 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2899 double RequiredDISPCLK[][2],
2900 double UrgLatency[],
2901 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2902 double ProjectedDCFClkDeepSleep[][2],
2903 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2904 unsigned int TotalNumberOfActiveDPP[][2],
2905 unsigned int TotalNumberOfDCCActiveDPP[][2],
2906 unsigned int dpte_group_bytes[],
2907 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2908 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2909 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2910 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2911 unsigned int BytePerPixelY[],
2912 unsigned int BytePerPixelC[],
2913 unsigned int HTotal[],
2914 double PixelClock[],
2915 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2916 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2917 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2918 bool DynamicMetadataEnable[],
2919 double ReadBandwidthLuma[],
2920 double ReadBandwidthChroma[],
2921 double DCFCLKPerState[],
2923 double DCFCLKState[][2])
2925 unsigned int i, j, k;
2926 unsigned int dummy1;
2927 double dummy2, dummy3;
2928 double NormalEfficiency;
2929 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2931 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2932 for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2933 for (j = 0; j <= 1; ++j) {
2934 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2935 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2936 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2937 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2938 double MinimumTWait = 0.0;
2939 double DPTEBandwidth;
2940 double DCFCLKRequiredForAverageBandwidth;
2941 unsigned int ExtraLatencyBytes;
2942 double ExtraLatencyCycles;
2943 double DCFCLKRequiredForPeakBandwidth;
2944 unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2945 double MinimumTvmPlus2Tr0;
2947 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2948 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2949 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2950 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2951 / (15.75 * HTotal[k] / PixelClock[k]);
2954 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
2955 NoOfDPPState[k] = NoOfDPP[i][j][k];
2957 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
2958 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
2960 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
2961 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
2962 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
2963 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
2964 HostVMMaxNonCachedPageTableLevels);
2965 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
2966 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
2967 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2968 double DCFCLKCyclesRequiredInPrefetch;
2969 double PrefetchTime;
2971 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
2972 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
2973 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
2974 * BytePerPixelC[k]) / NormalEfficiency
2976 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
2977 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
2978 / NormalEfficiency / ReturnBusWidth
2979 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
2980 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
2982 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
2983 + PixelDCFCLKCyclesRequiredInPrefetch[k];
2984 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
2985 * HTotal[k] / PixelClock[k];
2986 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
2987 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
2988 UrgLatency[i] * GPUVMMaxPageTableLevels *
2989 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
2991 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
2992 UseMALLForPStateChange[k],
2993 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2995 DRAMClockChangeLatencyFinal,
2998 SREnterPlusExitTime);
3000 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3001 MinimumTWait - UrgLatency[i] *
3002 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3003 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
3004 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3005 DynamicMetadataVMExtraLatency[k];
3007 if (PrefetchTime > 0) {
3008 double ExpectedVRatioPrefetch;
3010 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3011 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3012 DCFCLKCyclesRequiredInPrefetch);
3013 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3014 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3015 PrefetchPixelLinesTime[k] *
3016 dml_max(1.0, ExpectedVRatioPrefetch) *
3017 dml_max(1.0, ExpectedVRatioPrefetch / 4);
3018 if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3019 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3020 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3021 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3022 NormalEfficiency / ReturnBusWidth;
3025 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3027 if (DynamicMetadataEnable[k] == true) {
3032 double AllowedTimeForUrgentExtraLatency;
3034 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3035 MaxInterDCNTileRepeaters,
3036 RequiredDPPCLKPerSurface[i][j][k],
3037 RequiredDISPCLK[i][j],
3038 ProjectedDCFClkDeepSleep[i][j],
3041 VTotal[k] - VActive[k],
3042 DynamicMetadataTransmittedBytes[k],
3043 DynamicMetadataLinesBeforeActiveRequired[k],
3045 ProgressiveToInterlaceUnitInOPP,
3055 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3056 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3057 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3058 if (AllowedTimeForUrgentExtraLatency > 0)
3059 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3060 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3061 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3063 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3066 DCFCLKRequiredForPeakBandwidth = 0;
3067 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3068 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3069 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3071 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3072 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3073 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3074 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3075 double MaximumTvmPlus2Tr0PlusTsw;
3077 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3078 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3079 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3080 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3082 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3083 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3084 MinimumTvmPlus2Tr0 -
3085 PrefetchPixelLinesTime[k] / 4),
3086 (2 * ExtraLatencyCycles +
3087 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3088 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3091 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3092 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3097 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3098 unsigned int TotalNumberOfActiveDPP,
3099 unsigned int PixelChunkSizeInKByte,
3100 unsigned int TotalNumberOfDCCActiveDPP,
3101 unsigned int MetaChunkSize,
3104 unsigned int NumberOfActiveSurfaces,
3105 unsigned int NumberOfDPP[],
3106 unsigned int dpte_group_bytes[],
3107 double HostVMInefficiencyFactor,
3108 double HostVMMinPageSize,
3109 unsigned int HostVMMaxNonCachedPageTableLevels)
3113 unsigned int HostVMDynamicLevels;
3115 if (GPUVMEnable == true && HostVMEnable == true) {
3116 if (HostVMMinPageSize < 2048)
3117 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3118 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3119 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3121 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3123 HostVMDynamicLevels = 0;
3126 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3127 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3129 if (GPUVMEnable == true) {
3130 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3131 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3132 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3138 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3139 unsigned int MaxInterDCNTileRepeaters,
3142 double DCFClkDeepSleep,
3144 unsigned int HTotal,
3145 unsigned int VBlank,
3146 unsigned int DynamicMetadataTransmittedBytes,
3147 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3148 unsigned int InterlaceEnable,
3149 bool ProgressiveToInterlaceUnitInOPP,
3156 unsigned int *VUpdateOffsetPix,
3157 double *VUpdateWidthPix,
3158 double *VReadyOffsetPix)
3160 double TotalRepeaterDelayTime;
3162 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3164 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3165 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
3166 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3167 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3168 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3169 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3170 *Tdmec = HTotal / PixelClock;
3172 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3173 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3175 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3177 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3178 *Tdmsks = *Tdmsks / 2;
3179 #ifdef __DML_VBA_DEBUG__
3180 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3181 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3182 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3184 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3185 __func__, DynamicMetadataLinesBeforeActiveRequired);
3186 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3187 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3188 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3189 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3193 double dml32_CalculateTWait(
3194 unsigned int PrefetchMode,
3195 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3196 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3198 double DRAMClockChangeLatency,
3199 double FCLKChangeLatency,
3200 double UrgentLatency,
3201 double SREnterPlusExitTime)
3205 if (PrefetchMode == 0 &&
3206 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3207 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3208 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3209 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3210 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3211 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3212 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3213 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3214 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3216 TWait = UrgentLatency;
3219 #ifdef __DML_VBA_DEBUG__
3220 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3221 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3226 // Function: get_return_bw_mbps
3227 // Megabyte per second
3228 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3229 const int VoltageLevel,
3230 const bool HostVMEnable,
3231 const double DCFCLK,
3232 const double FabricClock,
3233 const double DRAMSpeed)
3235 double ReturnBW = 0.;
3236 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3237 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3238 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3239 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3240 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3241 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3242 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3243 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3244 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3245 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3246 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3248 if (HostVMEnable != true)
3249 ReturnBW = PixelDataOnlyReturnBW;
3251 ReturnBW = PixelMixedWithVMDataReturnBW;
3253 #ifdef __DML_VBA_DEBUG__
3254 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3255 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3256 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3257 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3258 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3259 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
3260 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
3261 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
3262 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
3263 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3264 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
3269 // Function: get_return_bw_mbps_vm_only
3270 // Megabyte per second
3271 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3272 const int VoltageLevel,
3273 const double DCFCLK,
3274 const double FabricClock,
3275 const double DRAMSpeed)
3277 double VMDataOnlyReturnBW = dml_min3(
3278 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3279 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3280 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3281 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3282 * (VoltageLevel < 2 ?
3283 soc->pct_ideal_dram_bw_after_urgent_strobe :
3284 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3285 #ifdef __DML_VBA_DEBUG__
3286 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3287 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3288 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3289 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3290 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3292 return VMDataOnlyReturnBW;
3295 double dml32_CalculateExtraLatency(
3296 unsigned int RoundTripPingLatencyCycles,
3297 unsigned int ReorderingBytes,
3299 unsigned int TotalNumberOfActiveDPP,
3300 unsigned int PixelChunkSizeInKByte,
3301 unsigned int TotalNumberOfDCCActiveDPP,
3302 unsigned int MetaChunkSize,
3306 unsigned int NumberOfActiveSurfaces,
3307 unsigned int NumberOfDPP[],
3308 unsigned int dpte_group_bytes[],
3309 double HostVMInefficiencyFactor,
3310 double HostVMMinPageSize,
3311 unsigned int HostVMMaxNonCachedPageTableLevels)
3313 double ExtraLatencyBytes;
3314 double ExtraLatency;
3316 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3318 TotalNumberOfActiveDPP,
3319 PixelChunkSizeInKByte,
3320 TotalNumberOfDCCActiveDPP,
3324 NumberOfActiveSurfaces,
3327 HostVMInefficiencyFactor,
3329 HostVMMaxNonCachedPageTableLevels);
3331 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3333 #ifdef __DML_VBA_DEBUG__
3334 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3335 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3336 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3337 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3338 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3341 return ExtraLatency;
3342 } // CalculateExtraLatency
3344 bool dml32_CalculatePrefetchSchedule(
3345 struct dml32_CalculatePrefetchSchedule *st_vars,
3346 double HostVMInefficiencyFactor,
3348 unsigned int DSCDelay,
3349 double DPPCLKDelaySubtotalPlusCNVCFormater,
3350 double DPPCLKDelaySCL,
3351 double DPPCLKDelaySCLLBOnly,
3352 double DPPCLKDelayCNVCCursor,
3353 double DISPCLKDelaySubtotal,
3354 unsigned int DPP_RECOUT_WIDTH,
3355 enum output_format_class OutputFormat,
3356 unsigned int MaxInterDCNTileRepeaters,
3357 unsigned int VStartup,
3358 unsigned int MaxVStartup,
3359 unsigned int GPUVMPageTableLevels,
3362 unsigned int HostVMMaxNonCachedPageTableLevels,
3363 double HostVMMinPageSize,
3364 bool DynamicMetadataEnable,
3365 bool DynamicMetadataVMEnabled,
3366 int DynamicMetadataLinesBeforeActiveRequired,
3367 unsigned int DynamicMetadataTransmittedBytes,
3368 double UrgentLatency,
3369 double UrgentExtraLatency,
3371 unsigned int PDEAndMetaPTEBytesFrame,
3372 unsigned int MetaRowByte,
3373 unsigned int PixelPTEBytesPerRow,
3374 double PrefetchSourceLinesY,
3375 unsigned int SwathWidthY,
3376 unsigned int VInitPreFillY,
3377 unsigned int MaxNumSwathY,
3378 double PrefetchSourceLinesC,
3379 unsigned int SwathWidthC,
3380 unsigned int VInitPreFillC,
3381 unsigned int MaxNumSwathC,
3382 unsigned int swath_width_luma_ub,
3383 unsigned int swath_width_chroma_ub,
3384 unsigned int SwathHeightY,
3385 unsigned int SwathHeightC,
3388 double *DSTXAfterScaler,
3389 double *DSTYAfterScaler,
3390 double *DestinationLinesForPrefetch,
3391 double *PrefetchBandwidth,
3392 double *DestinationLinesToRequestVMInVBlank,
3393 double *DestinationLinesToRequestRowInVBlank,
3394 double *VRatioPrefetchY,
3395 double *VRatioPrefetchC,
3396 double *RequiredPrefetchPixDataBWLuma,
3397 double *RequiredPrefetchPixDataBWChroma,
3398 bool *NotEnoughTimeForDynamicMetadata,
3400 double *prefetch_vmrow_bw,
3404 unsigned int *VUpdateOffsetPix,
3405 double *VUpdateWidthPix,
3406 double *VReadyOffsetPix)
3408 bool MyError = false;
3410 st_vars->TimeForFetchingMetaPTE = 0;
3411 st_vars->TimeForFetchingRowInVBlank = 0;
3412 st_vars->LinesToRequestPrefetchPixelData = 0;
3413 st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3414 st_vars->Tsw_est1 = 0;
3415 st_vars->Tsw_est3 = 0;
3417 if (GPUVMEnable == true && HostVMEnable == true)
3418 st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3420 st_vars->HostVMDynamicLevelsTrips = 0;
3421 #ifdef __DML_VBA_DEBUG__
3422 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
3423 dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
3424 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3425 dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3426 __func__, HostVMEnable, HostVMInefficiencyFactor);
3428 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3429 MaxInterDCNTileRepeaters,
3432 myPipe->DCFClkDeepSleep,
3436 DynamicMetadataTransmittedBytes,
3437 DynamicMetadataLinesBeforeActiveRequired,
3438 myPipe->InterlaceEnable,
3439 myPipe->ProgressiveToInterlaceUnitInOPP,
3450 st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock;
3451 st_vars->trip_to_mem = UrgentLatency;
3452 st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
3454 if (DynamicMetadataVMEnabled == true)
3455 *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem;
3457 *Tdmdl = TWait + UrgentExtraLatency;
3459 #ifdef __DML_VBA_ALLOW_DELTA__
3460 if (DynamicMetadataEnable == false)
3464 if (DynamicMetadataEnable == true) {
3465 if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) {
3466 *NotEnoughTimeForDynamicMetadata = true;
3467 #ifdef __DML_VBA_DEBUG__
3468 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3469 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3470 __func__, st_vars->Tdmbf);
3471 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
3472 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3473 __func__, st_vars->Tdmsks);
3474 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3478 *NotEnoughTimeForDynamicMetadata = false;
3481 *NotEnoughTimeForDynamicMetadata = false;
3484 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
3485 GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0);
3487 if (myPipe->ScalerEnabled)
3488 st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
3490 st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
3492 st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
3494 st_vars->DISPCLKCycles = DISPCLKDelaySubtotal;
3496 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3499 *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles *
3500 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3502 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3503 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3504 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3505 myPipe->HActive / 2 : 0)
3506 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3508 #ifdef __DML_VBA_DEBUG__
3509 dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles);
3510 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3511 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3512 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles);
3513 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
3514 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
3515 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
3516 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3517 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
3520 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3521 *DSTYAfterScaler = 1;
3523 *DSTYAfterScaler = 0;
3525 st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3526 *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3527 *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3528 #ifdef __DML_VBA_DEBUG__
3529 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
3530 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3535 st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1);
3537 if (GPUVMEnable == true) {
3538 st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
3539 st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
3540 if (GPUVMPageTableLevels >= 3) {
3541 *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem *
3542 (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
3543 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
3544 st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) /
3545 4.0 * st_vars->LineTime; // VBA_ERROR
3546 *Tno_bw = UrgentExtraLatency;
3550 } else if (myPipe->DCCEnable == true) {
3551 st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
3552 st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
3555 st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
3556 st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0;
3559 st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0);
3560 st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0);
3562 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3563 || myPipe->SourcePixelFormat == dm_420_12) {
3564 st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3566 st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3569 st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3570 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3571 st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3572 st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime));
3574 st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre;
3575 st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0);
3576 st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0;
3578 if (GPUVMEnable == true) {
3579 st_vars->Tvm_oto = dml_max3(
3581 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto,
3582 st_vars->LineTime / 4.0);
3584 st_vars->Tvm_oto = st_vars->LineTime / 4.0;
3586 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3587 st_vars->Tr0_oto = dml_max4(
3589 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto,
3590 (st_vars->LineTime - st_vars->Tvm_oto)/2.0,
3591 st_vars->LineTime / 4.0);
3592 #ifdef __DML_VBA_DEBUG__
3593 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3594 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto);
3595 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips);
3596 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto);
3597 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4);
3600 st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0;
3602 st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0;
3603 st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0;
3604 st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto;
3606 st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime -
3607 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3609 #ifdef __DML_VBA_DEBUG__
3610 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3611 dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw);
3612 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3613 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3614 dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem);
3615 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3616 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3617 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3618 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3619 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3620 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3621 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes);
3622 dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp);
3623 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3624 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3625 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3626 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3627 dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips);
3628 dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips);
3629 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto);
3630 dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto);
3631 dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto);
3632 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines);
3633 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines);
3634 dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto);
3635 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto);
3636 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ);
3639 st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0;
3640 st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime;
3641 #ifdef __DML_VBA_DEBUG__
3642 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ);
3643 dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime);
3644 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3645 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3646 __func__, VStartup * st_vars->LineTime);
3647 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3648 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3649 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf);
3650 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
3651 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3652 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3653 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3654 __func__, *DSTYAfterScaler);
3656 st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3657 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3659 if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes)
3660 st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes;
3662 *PrefetchBandwidth = 0;
3663 *DestinationLinesToRequestVMInVBlank = 0;
3664 *DestinationLinesToRequestRowInVBlank = 0;
3665 *VRatioPrefetchY = 0;
3666 *VRatioPrefetchC = 0;
3667 *RequiredPrefetchPixDataBWLuma = 0;
3668 if (st_vars->dst_y_prefetch_equ > 1) {
3669 double PrefetchBandwidth1;
3670 double PrefetchBandwidth2;
3671 double PrefetchBandwidth3;
3672 double PrefetchBandwidth4;
3674 if (st_vars->Tpre_rounded - *Tno_bw > 0) {
3675 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3676 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3677 + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw);
3678 st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1;
3680 PrefetchBandwidth1 = 0;
3682 if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw)
3683 && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) {
3684 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3685 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3686 / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw);
3689 if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0)
3690 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) /
3691 (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded);
3693 PrefetchBandwidth2 = 0;
3695 if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) {
3696 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3697 + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded);
3698 st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3;
3700 PrefetchBandwidth3 = 0;
3703 if (VStartup == MaxVStartup &&
3704 (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 *
3705 st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) {
3706 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3707 / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded);
3710 if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) {
3711 PrefetchBandwidth4 = st_vars->prefetch_sw_bytes /
3712 (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded);
3714 PrefetchBandwidth4 = 0;
3717 #ifdef __DML_VBA_DEBUG__
3718 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded);
3719 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3720 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded);
3721 dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1);
3722 dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3);
3723 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3724 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3725 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3726 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3733 if (PrefetchBandwidth1 > 0) {
3734 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3735 >= st_vars->Tvm_trips_rounded
3736 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3737 / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) {
3746 if (PrefetchBandwidth2 > 0) {
3747 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3748 >= st_vars->Tvm_trips_rounded
3749 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3750 / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) {
3759 if (PrefetchBandwidth3 > 0) {
3760 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3761 st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3762 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3763 st_vars->Tr0_trips_rounded) {
3773 st_vars->prefetch_bw_equ = PrefetchBandwidth1;
3775 st_vars->prefetch_bw_equ = PrefetchBandwidth2;
3777 st_vars->prefetch_bw_equ = PrefetchBandwidth3;
3779 st_vars->prefetch_bw_equ = PrefetchBandwidth4;
3781 #ifdef __DML_VBA_DEBUG__
3782 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3783 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3784 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3785 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ);
3788 if (st_vars->prefetch_bw_equ > 0) {
3789 if (GPUVMEnable == true) {
3790 st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3791 HostVMInefficiencyFactor / st_vars->prefetch_bw_equ,
3792 st_vars->Tvm_trips, st_vars->LineTime / 4);
3794 st_vars->Tvm_equ = st_vars->LineTime / 4;
3797 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3798 st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3799 HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips,
3800 (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4);
3802 st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2;
3805 st_vars->Tvm_equ = 0;
3806 st_vars->Tr0_equ = 0;
3807 #ifdef __DML_VBA_DEBUG__
3808 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3813 if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) {
3814 *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto;
3815 st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto;
3816 st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto;
3817 *PrefetchBandwidth = st_vars->prefetch_bw_oto;
3819 *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ;
3820 st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ;
3821 st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ;
3822 *PrefetchBandwidth = st_vars->prefetch_bw_equ;
3825 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0;
3827 *DestinationLinesToRequestRowInVBlank =
3828 dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0;
3830 st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3831 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3833 #ifdef __DML_VBA_DEBUG__
3834 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3835 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3836 __func__, *DestinationLinesToRequestVMInVBlank);
3837 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank);
3838 dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
3839 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3840 __func__, *DestinationLinesToRequestRowInVBlank);
3841 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3842 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData);
3845 if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) {
3846 *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData;
3847 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3848 #ifdef __DML_VBA_DEBUG__
3849 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3850 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3851 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3853 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3854 if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3856 dml_max((double) PrefetchSourceLinesY /
3857 st_vars->LinesToRequestPrefetchPixelData,
3858 (double) MaxNumSwathY * SwathHeightY /
3859 (st_vars->LinesToRequestPrefetchPixelData -
3860 (VInitPreFillY - 3.0) / 2.0));
3861 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3864 *VRatioPrefetchY = 0;
3866 #ifdef __DML_VBA_DEBUG__
3867 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3868 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3869 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3873 *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData;
3874 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3876 #ifdef __DML_VBA_DEBUG__
3877 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3878 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3879 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3881 if ((SwathHeightC > 4)) {
3882 if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3884 dml_max(*VRatioPrefetchC,
3885 (double) MaxNumSwathC * SwathHeightC /
3886 (st_vars->LinesToRequestPrefetchPixelData -
3887 (VInitPreFillC - 3.0) / 2.0));
3888 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3891 *VRatioPrefetchC = 0;
3893 #ifdef __DML_VBA_DEBUG__
3894 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3895 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3896 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3900 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3901 / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3902 / st_vars->LineTime;
3904 #ifdef __DML_VBA_DEBUG__
3905 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3906 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3907 dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
3908 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3909 __func__, *RequiredPrefetchPixDataBWLuma);
3911 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3912 st_vars->LinesToRequestPrefetchPixelData
3913 * myPipe->BytePerPixelC
3914 * swath_width_chroma_ub / st_vars->LineTime;
3917 #ifdef __DML_VBA_DEBUG__
3918 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3919 __func__, st_vars->LinesToRequestPrefetchPixelData);
3921 *VRatioPrefetchY = 0;
3922 *VRatioPrefetchC = 0;
3923 *RequiredPrefetchPixDataBWLuma = 0;
3924 *RequiredPrefetchPixDataBWChroma = 0;
3926 #ifdef __DML_VBA_DEBUG__
3927 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3928 (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime +
3929 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE);
3930 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE);
3931 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3932 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime);
3933 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3934 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime -
3935 st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3936 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup);
3937 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3938 PixelPTEBytesPerRow);
3942 #ifdef __DML_VBA_DEBUG__
3943 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
3944 __func__, st_vars->dst_y_prefetch_equ);
3949 double prefetch_vm_bw;
3950 double prefetch_row_bw;
3952 if (PDEAndMetaPTEBytesFrame == 0) {
3954 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
3955 #ifdef __DML_VBA_DEBUG__
3956 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3957 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3958 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3959 __func__, *DestinationLinesToRequestVMInVBlank);
3960 dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
3962 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
3963 (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime);
3964 #ifdef __DML_VBA_DEBUG__
3965 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
3970 #ifdef __DML_VBA_DEBUG__
3971 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
3972 __func__, *DestinationLinesToRequestVMInVBlank);
3976 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
3977 prefetch_row_bw = 0;
3978 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
3979 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
3980 (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime);
3982 #ifdef __DML_VBA_DEBUG__
3983 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3984 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3985 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3986 __func__, *DestinationLinesToRequestRowInVBlank);
3987 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
3990 prefetch_row_bw = 0;
3992 #ifdef __DML_VBA_DEBUG__
3993 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
3994 __func__, *DestinationLinesToRequestRowInVBlank);
3998 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4002 *PrefetchBandwidth = 0;
4003 st_vars->TimeForFetchingMetaPTE = 0;
4004 st_vars->TimeForFetchingRowInVBlank = 0;
4005 *DestinationLinesToRequestVMInVBlank = 0;
4006 *DestinationLinesToRequestRowInVBlank = 0;
4007 *DestinationLinesForPrefetch = 0;
4008 st_vars->LinesToRequestPrefetchPixelData = 0;
4009 *VRatioPrefetchY = 0;
4010 *VRatioPrefetchC = 0;
4011 *RequiredPrefetchPixDataBWLuma = 0;
4012 *RequiredPrefetchPixDataBWChroma = 0;
4016 } // CalculatePrefetchSchedule
4018 void dml32_CalculateFlipSchedule(
4019 double HostVMInefficiencyFactor,
4020 double UrgentExtraLatency,
4021 double UrgentLatency,
4022 unsigned int GPUVMMaxPageTableLevels,
4024 unsigned int HostVMMaxNonCachedPageTableLevels,
4026 double HostVMMinPageSize,
4027 double PDEAndMetaPTEBytesPerFrame,
4028 double MetaRowBytes,
4029 double DPTEBytesPerRow,
4030 double BandwidthAvailableForImmediateFlip,
4031 unsigned int TotImmediateFlipBytes,
4032 enum source_format_class SourcePixelFormat,
4035 double VRatioChroma,
4038 unsigned int dpte_row_height,
4039 unsigned int meta_row_height,
4040 unsigned int dpte_row_height_chroma,
4041 unsigned int meta_row_height_chroma,
4042 bool use_one_row_for_frame_flip,
4045 double *DestinationLinesToRequestVMInImmediateFlip,
4046 double *DestinationLinesToRequestRowInImmediateFlip,
4047 double *final_flip_bw,
4048 bool *ImmediateFlipSupportedForPipe)
4050 double min_row_time = 0.0;
4051 unsigned int HostVMDynamicLevelsTrips;
4052 double TimeForFetchingMetaPTEImmediateFlip;
4053 double TimeForFetchingRowInVBlankImmediateFlip;
4054 double ImmediateFlipBW;
4056 if (GPUVMEnable == true && HostVMEnable == true)
4057 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4059 HostVMDynamicLevelsTrips = 0;
4061 #ifdef __DML_VBA_DEBUG__
4062 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4063 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4066 if (TotImmediateFlipBytes > 0) {
4067 if (use_one_row_for_frame_flip) {
4068 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4069 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4071 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4072 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4074 if (GPUVMEnable == true) {
4075 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4076 HostVMInefficiencyFactor / ImmediateFlipBW,
4077 UrgentExtraLatency + UrgentLatency *
4078 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4081 TimeForFetchingMetaPTEImmediateFlip = 0;
4083 if ((GPUVMEnable == true || DCCEnable == true)) {
4084 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4085 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4086 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4088 TimeForFetchingRowInVBlankImmediateFlip = 0;
4091 *DestinationLinesToRequestVMInImmediateFlip =
4092 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4093 *DestinationLinesToRequestRowInImmediateFlip =
4094 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4096 if (GPUVMEnable == true) {
4097 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4098 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4099 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4100 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4101 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4102 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4103 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4108 TimeForFetchingMetaPTEImmediateFlip = 0;
4109 TimeForFetchingRowInVBlankImmediateFlip = 0;
4110 *DestinationLinesToRequestVMInImmediateFlip = 0;
4111 *DestinationLinesToRequestRowInImmediateFlip = 0;
4115 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4116 if (GPUVMEnable == true && DCCEnable != true) {
4117 min_row_time = dml_min(dpte_row_height *
4118 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4119 } else if (GPUVMEnable != true && DCCEnable == true) {
4120 min_row_time = dml_min(meta_row_height *
4121 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4123 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4124 LineTime / VRatio, dpte_row_height_chroma * LineTime /
4125 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4128 if (GPUVMEnable == true && DCCEnable != true) {
4129 min_row_time = dpte_row_height * LineTime / VRatio;
4130 } else if (GPUVMEnable != true && DCCEnable == true) {
4131 min_row_time = meta_row_height * LineTime / VRatio;
4134 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4138 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4139 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4141 *ImmediateFlipSupportedForPipe = false;
4143 *ImmediateFlipSupportedForPipe = true;
4146 #ifdef __DML_VBA_DEBUG__
4147 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4148 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4149 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4150 __func__, *DestinationLinesToRequestVMInImmediateFlip);
4151 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4152 __func__, *DestinationLinesToRequestRowInImmediateFlip);
4153 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4154 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4155 __func__, TimeForFetchingRowInVBlankImmediateFlip);
4156 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4157 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4159 } // CalculateFlipSchedule
4161 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4162 struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
4163 bool USRRetrainingRequiredFinal,
4164 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4165 unsigned int PrefetchMode,
4166 unsigned int NumberOfActiveSurfaces,
4167 unsigned int MaxLineBufferLines,
4168 unsigned int LineBufferSize,
4169 unsigned int WritebackInterfaceBufferSize,
4172 bool SynchronizeTimingsFinal,
4173 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4175 unsigned int dpte_group_bytes[],
4176 unsigned int meta_row_height[],
4177 unsigned int meta_row_height_chroma[],
4178 SOCParametersList mmSOCParameters,
4179 unsigned int WritebackChunkSize,
4181 double DCFClkDeepSleep,
4182 unsigned int DETBufferSizeY[],
4183 unsigned int DETBufferSizeC[],
4184 unsigned int SwathHeightY[],
4185 unsigned int SwathHeightC[],
4186 unsigned int LBBitPerPixel[],
4187 double SwathWidthY[],
4188 double SwathWidthC[],
4190 double HRatioChroma[],
4191 unsigned int VTaps[],
4192 unsigned int VTapsChroma[],
4194 double VRatioChroma[],
4195 unsigned int HTotal[],
4196 unsigned int VTotal[],
4197 unsigned int VActive[],
4198 double PixelClock[],
4199 unsigned int BlendingAndTiming[],
4200 unsigned int DPPPerSurface[],
4201 double BytePerPixelDETY[],
4202 double BytePerPixelDETC[],
4203 double DSTXAfterScaler[],
4204 double DSTYAfterScaler[],
4205 bool WritebackEnable[],
4206 enum source_format_class WritebackPixelFormat[],
4207 double WritebackDestinationWidth[],
4208 double WritebackDestinationHeight[],
4209 double WritebackSourceHeight[],
4210 bool UnboundedRequestEnabled,
4211 unsigned int CompressedBufferSizeInkByte,
4214 Watermarks *Watermark,
4215 enum clock_change_support *DRAMClockChangeSupport,
4216 double MaxActiveDRAMClockChangeLatencySupported[],
4217 unsigned int SubViewportLinesNeededInMALL[],
4218 enum dm_fclock_change_support *FCLKChangeSupport,
4219 double *MinActiveFCLKChangeLatencySupported,
4220 bool *USRRetrainingSupport,
4221 double ActiveDRAMClockChangeLatencyMargin[])
4223 unsigned int i, j, k;
4225 st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
4226 st_vars->DRAMClockChangeSupportNumber = 0;
4227 st_vars->DRAMClockChangeMethod = 0;
4228 st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4229 st_vars->MinActiveFCLKChangeMargin = 0.;
4230 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4231 st_vars->TotalPixelBW = 0.0;
4232 st_vars->TotalActiveWriteback = 0;
4234 Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4235 Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4236 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4237 Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
4238 Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
4239 Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4240 + 10 / DCFClkDeepSleep;
4241 Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4242 + 10 / DCFClkDeepSleep;
4243 Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4244 + 10 / DCFClkDeepSleep;
4245 Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4246 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4248 #ifdef __DML_VBA_DEBUG__
4249 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4250 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4251 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4252 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
4253 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
4254 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
4255 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
4256 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
4257 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
4258 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
4259 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4260 __func__, Watermark->Z8StutterEnterPlusExitWatermark);
4264 st_vars->TotalActiveWriteback = 0;
4265 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4266 if (WritebackEnable[k] == true)
4267 st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1;
4270 if (st_vars->TotalActiveWriteback <= 1) {
4271 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4273 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4274 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4276 if (USRRetrainingRequiredFinal)
4277 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
4278 + mmSOCParameters.USRRetrainingLatency;
4280 if (st_vars->TotalActiveWriteback <= 1) {
4281 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4282 + mmSOCParameters.WritebackLatency;
4283 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4284 + mmSOCParameters.WritebackLatency;
4286 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4287 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4288 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4289 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
4292 if (USRRetrainingRequiredFinal)
4293 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
4294 + mmSOCParameters.USRRetrainingLatency;
4296 if (USRRetrainingRequiredFinal)
4297 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
4298 + mmSOCParameters.USRRetrainingLatency;
4300 #ifdef __DML_VBA_DEBUG__
4301 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4302 __func__, Watermark->WritebackDRAMClockChangeWatermark);
4303 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
4304 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
4305 dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
4306 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4309 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4310 st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
4311 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
4314 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4316 st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
4317 st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
4320 #ifdef __DML_VBA_DEBUG__
4321 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
4322 dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize);
4323 dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]);
4324 dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]);
4325 dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]);
4328 st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
4329 st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
4330 st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k];
4332 if (UnboundedRequestEnabled) {
4333 st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY
4334 + CompressedBufferSizeInkByte * 1024
4335 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
4336 / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW;
4339 st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4340 st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]);
4341 st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
4343 st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY
4344 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
4346 if (NumberOfActiveSurfaces > 1) {
4347 st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY
4348 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
4349 / PixelClock[k] / VRatio[k];
4352 if (BytePerPixelDETC[k] > 0) {
4353 st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4354 st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]);
4355 st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
4357 st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC
4358 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
4360 if (NumberOfActiveSurfaces > 1) {
4361 st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC
4362 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
4363 / PixelClock[k] / VRatioChroma[k];
4365 st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY,
4366 st_vars->ActiveClockChangeLatencyHidingC);
4368 st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY;
4371 ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4372 - Watermark->DRAMClockChangeWatermark;
4373 st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4374 - Watermark->FCLKChangeWatermark;
4375 st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
4377 if (WritebackEnable[k]) {
4378 st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
4379 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
4380 / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
4381 if (WritebackPixelFormat[k] == dm_444_64)
4382 st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2;
4384 st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding
4385 - Watermark->WritebackDRAMClockChangeWatermark;
4387 st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding
4388 - Watermark->WritebackFCLKChangeWatermark;
4390 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4391 st_vars->WritebackFCLKChangeLatencyMargin);
4392 st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k],
4393 st_vars->WritebackDRAMClockChangeLatencyMargin);
4395 MaxActiveDRAMClockChangeLatencySupported[k] =
4396 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4398 (ActiveDRAMClockChangeLatencyMargin[k]
4399 + mmSOCParameters.DRAMClockChangeLatency);
4402 for (i = 0; i < NumberOfActiveSurfaces; ++i) {
4403 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
4405 (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
4406 (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
4407 (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
4408 (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
4409 HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
4410 VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4411 (DRRDisplay[i] || DRRDisplay[j]))) {
4412 st_vars->SynchronizedSurfaces[i][j] = true;
4414 st_vars->SynchronizedSurfaces[i][j] = false;
4419 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4420 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4421 (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4422 st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) {
4423 st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4424 st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k];
4425 st_vars->SurfaceWithMinActiveFCLKChangeMargin = k;
4429 *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4431 st_vars->SameTimingForFCLKChange = true;
4432 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4433 if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) {
4434 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4435 (st_vars->SameTimingForFCLKChange ||
4436 st_vars->ActiveFCLKChangeLatencyMargin[k] <
4437 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4438 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k];
4440 st_vars->SameTimingForFCLKChange = false;
4444 if (st_vars->MinActiveFCLKChangeMargin > 0) {
4445 *FCLKChangeSupport = dm_fclock_change_vactive;
4446 } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4447 (PrefetchMode <= 1)) {
4448 *FCLKChangeSupport = dm_fclock_change_vblank;
4450 *FCLKChangeSupport = dm_fclock_change_unsupported;
4453 *USRRetrainingSupport = true;
4454 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4455 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4456 (st_vars->USRRetrainingLatencyMargin[k] < 0)) {
4457 *USRRetrainingSupport = false;
4461 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4462 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4463 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4464 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4465 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4466 if (PrefetchMode > 0) {
4467 st_vars->DRAMClockChangeSupportNumber = 2;
4468 } else if (st_vars->DRAMClockChangeSupportNumber == 0) {
4469 st_vars->DRAMClockChangeSupportNumber = 1;
4470 st_vars->LastSurfaceWithoutMargin = k;
4471 } else if (st_vars->DRAMClockChangeSupportNumber == 1 &&
4472 !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) {
4473 st_vars->DRAMClockChangeSupportNumber = 2;
4478 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4479 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4480 st_vars->DRAMClockChangeMethod = 1;
4481 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4482 st_vars->DRAMClockChangeMethod = 2;
4485 if (st_vars->DRAMClockChangeMethod == 0) {
4486 if (st_vars->DRAMClockChangeSupportNumber == 0)
4487 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4488 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4489 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4491 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4492 } else if (st_vars->DRAMClockChangeMethod == 1) {
4493 if (st_vars->DRAMClockChangeSupportNumber == 0)
4494 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4495 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4496 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4498 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4500 if (st_vars->DRAMClockChangeSupportNumber == 0)
4501 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4502 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4503 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4505 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4508 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4509 unsigned int dst_y_pstate;
4510 unsigned int src_y_pstate_l;
4511 unsigned int src_y_pstate_c;
4512 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4514 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
4515 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
4516 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k];
4517 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
4519 #ifdef __DML_VBA_DEBUG__
4520 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
4521 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
4522 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
4523 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
4524 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]);
4525 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
4526 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
4527 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
4528 dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]);
4529 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
4531 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4533 if (BytePerPixelDETC[k] > 0) {
4534 src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
4535 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k];
4536 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
4537 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4539 #ifdef __DML_VBA_DEBUG__
4540 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
4541 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
4542 dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]);
4543 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
4547 #ifdef __DML_VBA_DEBUG__
4548 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4549 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4550 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4551 __func__, *MinActiveFCLKChangeLatencySupported);
4552 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4554 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4556 double dml32_CalculateWriteBackDISPCLK(
4557 enum source_format_class WritebackPixelFormat,
4559 double WritebackHRatio,
4560 double WritebackVRatio,
4561 unsigned int WritebackHTaps,
4562 unsigned int WritebackVTaps,
4563 unsigned int WritebackSourceWidth,
4564 unsigned int WritebackDestinationWidth,
4565 unsigned int HTotal,
4566 unsigned int WritebackLineBufferSize,
4567 double DISPCLKDPPCLKVCOSpeed)
4569 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4571 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4572 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4573 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4574 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4575 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4578 void dml32_CalculateMinAndMaxPrefetchMode(
4579 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4580 unsigned int *MinPrefetchMode,
4581 unsigned int *MaxPrefetchMode)
4583 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4584 *MinPrefetchMode = 3;
4585 *MaxPrefetchMode = 3;
4586 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4587 *MinPrefetchMode = 2;
4588 *MaxPrefetchMode = 2;
4589 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4590 *MinPrefetchMode = 1;
4591 *MaxPrefetchMode = 1;
4592 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4593 *MinPrefetchMode = 0;
4594 *MaxPrefetchMode = 0;
4595 } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4596 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4597 *MinPrefetchMode = 0;
4598 *MaxPrefetchMode = 3;
4600 *MinPrefetchMode = 0;
4601 *MaxPrefetchMode = 3;
4603 } // CalculateMinAndMaxPrefetchMode
4605 void dml32_CalculatePixelDeliveryTimes(
4606 unsigned int NumberOfActiveSurfaces,
4608 double VRatioChroma[],
4609 double VRatioPrefetchY[],
4610 double VRatioPrefetchC[],
4611 unsigned int swath_width_luma_ub[],
4612 unsigned int swath_width_chroma_ub[],
4613 unsigned int DPPPerSurface[],
4615 double HRatioChroma[],
4616 double PixelClock[],
4617 double PSCL_THROUGHPUT[],
4618 double PSCL_THROUGHPUT_CHROMA[],
4620 unsigned int BytePerPixelC[],
4621 enum dm_rotation_angle SourceRotation[],
4622 unsigned int NumberOfCursors[],
4623 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
4624 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
4625 unsigned int BlockWidth256BytesY[],
4626 unsigned int BlockHeight256BytesY[],
4627 unsigned int BlockWidth256BytesC[],
4628 unsigned int BlockHeight256BytesC[],
4631 double DisplayPipeLineDeliveryTimeLuma[],
4632 double DisplayPipeLineDeliveryTimeChroma[],
4633 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4634 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4635 double DisplayPipeRequestDeliveryTimeLuma[],
4636 double DisplayPipeRequestDeliveryTimeChroma[],
4637 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4638 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4639 double CursorRequestDeliveryTime[],
4640 double CursorRequestDeliveryTimePrefetch[])
4642 double req_per_swath_ub;
4645 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4647 #ifdef __DML_VBA_DEBUG__
4648 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4649 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4650 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4651 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4652 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4653 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4654 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4655 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4656 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4657 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4658 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4661 if (VRatio[k] <= 1) {
4662 DisplayPipeLineDeliveryTimeLuma[k] =
4663 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4665 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4668 if (BytePerPixelC[k] == 0) {
4669 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4671 if (VRatioChroma[k] <= 1) {
4672 DisplayPipeLineDeliveryTimeChroma[k] =
4673 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4675 DisplayPipeLineDeliveryTimeChroma[k] =
4676 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4680 if (VRatioPrefetchY[k] <= 1) {
4681 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4682 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4684 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4685 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4688 if (BytePerPixelC[k] == 0) {
4689 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4691 if (VRatioPrefetchC[k] <= 1) {
4692 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4693 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4695 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4696 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4699 #ifdef __DML_VBA_DEBUG__
4700 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4701 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4702 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4703 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4704 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4705 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4706 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4707 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4711 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4712 if (!IsVertical(SourceRotation[k]))
4713 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4715 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4716 #ifdef __DML_VBA_DEBUG__
4717 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4720 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4721 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4722 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4723 if (BytePerPixelC[k] == 0) {
4724 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4725 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4727 if (!IsVertical(SourceRotation[k]))
4728 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4730 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4731 #ifdef __DML_VBA_DEBUG__
4732 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4734 DisplayPipeRequestDeliveryTimeChroma[k] =
4735 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4736 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4737 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4739 #ifdef __DML_VBA_DEBUG__
4740 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4741 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4742 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4743 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4744 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4745 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4746 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4747 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4751 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4752 unsigned int cursor_req_per_width;
4754 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4756 if (NumberOfCursors[k] > 0) {
4757 if (VRatio[k] <= 1) {
4758 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4759 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4761 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4762 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4764 if (VRatioPrefetchY[k] <= 1) {
4765 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4766 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4768 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4769 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4772 CursorRequestDeliveryTime[k] = 0;
4773 CursorRequestDeliveryTimePrefetch[k] = 0;
4775 #ifdef __DML_VBA_DEBUG__
4776 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4777 __func__, k, NumberOfCursors[k]);
4778 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4779 __func__, k, CursorRequestDeliveryTime[k]);
4780 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4781 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4784 } // CalculatePixelDeliveryTimes
4786 void dml32_CalculateMetaAndPTETimes(
4787 bool use_one_row_for_frame[],
4788 unsigned int NumberOfActiveSurfaces,
4790 unsigned int MetaChunkSize,
4791 unsigned int MinMetaChunkSizeBytes,
4792 unsigned int HTotal[],
4794 double VRatioChroma[],
4795 double DestinationLinesToRequestRowInVBlank[],
4796 double DestinationLinesToRequestRowInImmediateFlip[],
4798 double PixelClock[],
4799 unsigned int BytePerPixelY[],
4800 unsigned int BytePerPixelC[],
4801 enum dm_rotation_angle SourceRotation[],
4802 unsigned int dpte_row_height[],
4803 unsigned int dpte_row_height_chroma[],
4804 unsigned int meta_row_width[],
4805 unsigned int meta_row_width_chroma[],
4806 unsigned int meta_row_height[],
4807 unsigned int meta_row_height_chroma[],
4808 unsigned int meta_req_width[],
4809 unsigned int meta_req_width_chroma[],
4810 unsigned int meta_req_height[],
4811 unsigned int meta_req_height_chroma[],
4812 unsigned int dpte_group_bytes[],
4813 unsigned int PTERequestSizeY[],
4814 unsigned int PTERequestSizeC[],
4815 unsigned int PixelPTEReqWidthY[],
4816 unsigned int PixelPTEReqHeightY[],
4817 unsigned int PixelPTEReqWidthC[],
4818 unsigned int PixelPTEReqHeightC[],
4819 unsigned int dpte_row_width_luma_ub[],
4820 unsigned int dpte_row_width_chroma_ub[],
4823 double DST_Y_PER_PTE_ROW_NOM_L[],
4824 double DST_Y_PER_PTE_ROW_NOM_C[],
4825 double DST_Y_PER_META_ROW_NOM_L[],
4826 double DST_Y_PER_META_ROW_NOM_C[],
4827 double TimePerMetaChunkNominal[],
4828 double TimePerChromaMetaChunkNominal[],
4829 double TimePerMetaChunkVBlank[],
4830 double TimePerChromaMetaChunkVBlank[],
4831 double TimePerMetaChunkFlip[],
4832 double TimePerChromaMetaChunkFlip[],
4833 double time_per_pte_group_nom_luma[],
4834 double time_per_pte_group_vblank_luma[],
4835 double time_per_pte_group_flip_luma[],
4836 double time_per_pte_group_nom_chroma[],
4837 double time_per_pte_group_vblank_chroma[],
4838 double time_per_pte_group_flip_chroma[])
4840 unsigned int meta_chunk_width;
4841 unsigned int min_meta_chunk_width;
4842 unsigned int meta_chunk_per_row_int;
4843 unsigned int meta_row_remainder;
4844 unsigned int meta_chunk_threshold;
4845 unsigned int meta_chunks_per_row_ub;
4846 unsigned int meta_chunk_width_chroma;
4847 unsigned int min_meta_chunk_width_chroma;
4848 unsigned int meta_chunk_per_row_int_chroma;
4849 unsigned int meta_row_remainder_chroma;
4850 unsigned int meta_chunk_threshold_chroma;
4851 unsigned int meta_chunks_per_row_ub_chroma;
4852 unsigned int dpte_group_width_luma;
4853 unsigned int dpte_groups_per_row_luma_ub;
4854 unsigned int dpte_group_width_chroma;
4855 unsigned int dpte_groups_per_row_chroma_ub;
4858 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4859 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4860 if (BytePerPixelC[k] == 0)
4861 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4863 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4864 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4865 if (BytePerPixelC[k] == 0)
4866 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4868 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4871 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4872 if (DCCEnable[k] == true) {
4873 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4874 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4875 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4876 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4877 if (!IsVertical(SourceRotation[k]))
4878 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4880 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4882 if (meta_row_remainder <= meta_chunk_threshold)
4883 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4885 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4887 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4888 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4889 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4890 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4891 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4892 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4893 if (BytePerPixelC[k] == 0) {
4894 TimePerChromaMetaChunkNominal[k] = 0;
4895 TimePerChromaMetaChunkVBlank[k] = 0;
4896 TimePerChromaMetaChunkFlip[k] = 0;
4898 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4899 meta_row_height_chroma[k];
4900 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4901 meta_row_height_chroma[k];
4902 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4903 meta_chunk_width_chroma;
4904 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4905 if (!IsVertical(SourceRotation[k])) {
4906 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4907 meta_req_width_chroma[k];
4909 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4910 meta_req_height_chroma[k];
4912 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4913 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4915 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4917 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4918 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4919 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4920 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4921 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4922 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4925 TimePerMetaChunkNominal[k] = 0;
4926 TimePerMetaChunkVBlank[k] = 0;
4927 TimePerMetaChunkFlip[k] = 0;
4928 TimePerChromaMetaChunkNominal[k] = 0;
4929 TimePerChromaMetaChunkVBlank[k] = 0;
4930 TimePerChromaMetaChunkFlip[k] = 0;
4934 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4935 if (GPUVMEnable == true) {
4936 if (!IsVertical(SourceRotation[k])) {
4937 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4938 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4940 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4941 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4944 if (use_one_row_for_frame[k]) {
4945 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4946 (double) dpte_group_width_luma / 2.0, 1.0);
4948 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4949 (double) dpte_group_width_luma, 1.0);
4951 #ifdef __DML_VBA_DEBUG__
4952 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
4953 __func__, k, use_one_row_for_frame[k]);
4954 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
4955 __func__, k, dpte_group_bytes[k]);
4956 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
4957 __func__, k, PTERequestSizeY[k]);
4958 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
4959 __func__, k, PixelPTEReqWidthY[k]);
4960 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
4961 __func__, k, PixelPTEReqHeightY[k]);
4962 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
4963 __func__, k, dpte_row_width_luma_ub[k]);
4964 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
4965 __func__, k, dpte_group_width_luma);
4966 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
4967 __func__, k, dpte_groups_per_row_luma_ub);
4970 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
4971 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
4972 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
4973 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
4974 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4975 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
4976 if (BytePerPixelC[k] == 0) {
4977 time_per_pte_group_nom_chroma[k] = 0;
4978 time_per_pte_group_vblank_chroma[k] = 0;
4979 time_per_pte_group_flip_chroma[k] = 0;
4981 if (!IsVertical(SourceRotation[k])) {
4982 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
4983 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
4985 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
4986 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
4989 if (use_one_row_for_frame[k]) {
4990 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
4991 (double) dpte_group_width_chroma / 2.0, 1.0);
4993 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
4994 (double) dpte_group_width_chroma, 1.0);
4996 #ifdef __DML_VBA_DEBUG__
4997 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
4998 __func__, k, dpte_row_width_chroma_ub[k]);
4999 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
5000 __func__, k, dpte_group_width_chroma);
5001 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
5002 __func__, k, dpte_groups_per_row_chroma_ub);
5004 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5005 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5006 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5007 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5008 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5009 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5012 time_per_pte_group_nom_luma[k] = 0;
5013 time_per_pte_group_vblank_luma[k] = 0;
5014 time_per_pte_group_flip_luma[k] = 0;
5015 time_per_pte_group_nom_chroma[k] = 0;
5016 time_per_pte_group_vblank_chroma[k] = 0;
5017 time_per_pte_group_flip_chroma[k] = 0;
5019 #ifdef __DML_VBA_DEBUG__
5020 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
5021 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5022 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
5023 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5024 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
5025 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5026 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
5027 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5028 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
5029 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5030 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
5031 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5032 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
5033 __func__, k, TimePerMetaChunkNominal[k]);
5034 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
5035 __func__, k, TimePerMetaChunkVBlank[k]);
5036 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
5037 __func__, k, TimePerMetaChunkFlip[k]);
5038 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
5039 __func__, k, TimePerChromaMetaChunkNominal[k]);
5040 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
5041 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5042 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
5043 __func__, k, TimePerChromaMetaChunkFlip[k]);
5044 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
5045 __func__, k, time_per_pte_group_nom_luma[k]);
5046 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
5047 __func__, k, time_per_pte_group_vblank_luma[k]);
5048 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
5049 __func__, k, time_per_pte_group_flip_luma[k]);
5050 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
5051 __func__, k, time_per_pte_group_nom_chroma[k]);
5052 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5053 __func__, k, time_per_pte_group_vblank_chroma[k]);
5054 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
5055 __func__, k, time_per_pte_group_flip_chroma[k]);
5058 } // CalculateMetaAndPTETimes
5060 void dml32_CalculateVMGroupAndRequestTimes(
5061 unsigned int NumberOfActiveSurfaces,
5063 unsigned int GPUVMMaxPageTableLevels,
5064 unsigned int HTotal[],
5065 unsigned int BytePerPixelC[],
5066 double DestinationLinesToRequestVMInVBlank[],
5067 double DestinationLinesToRequestVMInImmediateFlip[],
5069 double PixelClock[],
5070 unsigned int dpte_row_width_luma_ub[],
5071 unsigned int dpte_row_width_chroma_ub[],
5072 unsigned int vm_group_bytes[],
5073 unsigned int dpde0_bytes_per_frame_ub_l[],
5074 unsigned int dpde0_bytes_per_frame_ub_c[],
5075 unsigned int meta_pte_bytes_per_frame_ub_l[],
5076 unsigned int meta_pte_bytes_per_frame_ub_c[],
5079 double TimePerVMGroupVBlank[],
5080 double TimePerVMGroupFlip[],
5081 double TimePerVMRequestVBlank[],
5082 double TimePerVMRequestFlip[])
5085 unsigned int num_group_per_lower_vm_stage;
5086 unsigned int num_req_per_lower_vm_stage;
5088 #ifdef __DML_VBA_DEBUG__
5089 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5090 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5092 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5094 #ifdef __DML_VBA_DEBUG__
5095 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5096 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5097 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5098 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5099 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5100 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5101 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5102 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5103 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5104 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5107 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5108 if (DCCEnable[k] == false) {
5109 if (BytePerPixelC[k] > 0) {
5110 num_group_per_lower_vm_stage = dml_ceil(
5111 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5112 (double) (vm_group_bytes[k]), 1.0) +
5113 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5114 (double) (vm_group_bytes[k]), 1.0);
5116 num_group_per_lower_vm_stage = dml_ceil(
5117 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5118 (double) (vm_group_bytes[k]), 1.0);
5121 if (GPUVMMaxPageTableLevels == 1) {
5122 if (BytePerPixelC[k] > 0) {
5123 num_group_per_lower_vm_stage = dml_ceil(
5124 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5125 (double) (vm_group_bytes[k]), 1.0) +
5126 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5127 (double) (vm_group_bytes[k]), 1.0);
5129 num_group_per_lower_vm_stage = dml_ceil(
5130 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5131 (double) (vm_group_bytes[k]), 1.0);
5134 if (BytePerPixelC[k] > 0) {
5135 num_group_per_lower_vm_stage = 2 + dml_ceil(
5136 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5137 (double) (vm_group_bytes[k]), 1) +
5138 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5139 (double) (vm_group_bytes[k]), 1) +
5140 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5141 (double) (vm_group_bytes[k]), 1) +
5142 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5143 (double) (vm_group_bytes[k]), 1);
5145 num_group_per_lower_vm_stage = 1 + dml_ceil(
5146 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5147 (double) (vm_group_bytes[k]), 1) + dml_ceil(
5148 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5149 (double) (vm_group_bytes[k]), 1);
5154 if (DCCEnable[k] == false) {
5155 if (BytePerPixelC[k] > 0) {
5156 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5157 dpde0_bytes_per_frame_ub_c[k] / 64;
5159 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5162 if (GPUVMMaxPageTableLevels == 1) {
5163 if (BytePerPixelC[k] > 0) {
5164 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5165 meta_pte_bytes_per_frame_ub_c[k] / 64;
5167 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5170 if (BytePerPixelC[k] > 0) {
5171 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5172 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5173 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5174 meta_pte_bytes_per_frame_ub_c[k] / 64;
5176 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5177 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5182 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5183 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5184 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5185 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5186 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5187 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5188 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5189 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5191 if (GPUVMMaxPageTableLevels > 2) {
5192 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5193 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5194 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5195 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5199 TimePerVMGroupVBlank[k] = 0;
5200 TimePerVMGroupFlip[k] = 0;
5201 TimePerVMRequestVBlank[k] = 0;
5202 TimePerVMRequestFlip[k] = 0;
5205 #ifdef __DML_VBA_DEBUG__
5206 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5207 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5208 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5209 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5212 } // CalculateVMGroupAndRequestTimes
5214 void dml32_CalculateDCCConfiguration(
5216 bool DCCProgrammingAssumesScanDirectionUnknown,
5217 enum source_format_class SourcePixelFormat,
5218 unsigned int SurfaceWidthLuma,
5219 unsigned int SurfaceWidthChroma,
5220 unsigned int SurfaceHeightLuma,
5221 unsigned int SurfaceHeightChroma,
5222 unsigned int nomDETInKByte,
5223 unsigned int RequestHeight256ByteLuma,
5224 unsigned int RequestHeight256ByteChroma,
5225 enum dm_swizzle_mode TilingFormat,
5226 unsigned int BytePerPixelY,
5227 unsigned int BytePerPixelC,
5228 double BytePerPixelDETY,
5229 double BytePerPixelDETC,
5230 enum dm_rotation_angle SourceRotation,
5232 unsigned int *MaxUncompressedBlockLuma,
5233 unsigned int *MaxUncompressedBlockChroma,
5234 unsigned int *MaxCompressedBlockLuma,
5235 unsigned int *MaxCompressedBlockChroma,
5236 unsigned int *IndependentBlockLuma,
5237 unsigned int *IndependentBlockChroma)
5241 REQ_128BytesNonContiguous,
5242 REQ_128BytesContiguous,
5246 RequestType RequestLuma;
5247 RequestType RequestChroma;
5249 unsigned int segment_order_horz_contiguous_luma;
5250 unsigned int segment_order_horz_contiguous_chroma;
5251 unsigned int segment_order_vert_contiguous_luma;
5252 unsigned int segment_order_vert_contiguous_chroma;
5253 unsigned int req128_horz_wc_l;
5254 unsigned int req128_horz_wc_c;
5255 unsigned int req128_vert_wc_l;
5256 unsigned int req128_vert_wc_c;
5257 unsigned int MAS_vp_horz_limit;
5258 unsigned int MAS_vp_vert_limit;
5259 unsigned int max_vp_horz_width;
5260 unsigned int max_vp_vert_height;
5261 unsigned int eff_surf_width_l;
5262 unsigned int eff_surf_width_c;
5263 unsigned int eff_surf_height_l;
5264 unsigned int eff_surf_height_c;
5265 unsigned int full_swath_bytes_horz_wc_l;
5266 unsigned int full_swath_bytes_horz_wc_c;
5267 unsigned int full_swath_bytes_vert_wc_l;
5268 unsigned int full_swath_bytes_vert_wc_c;
5269 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5271 unsigned int yuv420;
5272 unsigned int horz_div_l;
5273 unsigned int horz_div_c;
5274 unsigned int vert_div_l;
5275 unsigned int vert_div_c;
5277 unsigned int swath_buf_size;
5278 double detile_buf_vp_horz_limit;
5279 double detile_buf_vp_vert_limit;
5281 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5282 SourcePixelFormat == dm_420_12) ? 1 : 0);
5288 if (BytePerPixelY == 1)
5290 if (BytePerPixelC == 1)
5293 if (BytePerPixelC == 0) {
5294 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5295 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5296 BytePerPixelY / (1 + horz_div_l));
5297 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5300 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5301 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5302 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5303 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5304 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5305 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5306 (1 + vert_div_c) / (1 + yuv420));
5309 if (SourcePixelFormat == dm_420_10) {
5310 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5311 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5314 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5315 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5317 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5318 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5319 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5320 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5321 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5322 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5323 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5324 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5326 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5327 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5328 if (BytePerPixelC > 0) {
5329 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5330 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5332 full_swath_bytes_horz_wc_c = 0;
5333 full_swath_bytes_vert_wc_c = 0;
5336 if (SourcePixelFormat == dm_420_10) {
5337 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5338 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5339 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5340 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5343 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5344 req128_horz_wc_l = 0;
5345 req128_horz_wc_c = 0;
5346 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5347 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5348 req128_horz_wc_l = 0;
5349 req128_horz_wc_c = 1;
5350 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5351 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5352 req128_horz_wc_l = 1;
5353 req128_horz_wc_c = 0;
5355 req128_horz_wc_l = 1;
5356 req128_horz_wc_c = 1;
5359 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5360 req128_vert_wc_l = 0;
5361 req128_vert_wc_c = 0;
5362 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5363 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5364 req128_vert_wc_l = 0;
5365 req128_vert_wc_c = 1;
5366 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5367 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5368 req128_vert_wc_l = 1;
5369 req128_vert_wc_c = 0;
5371 req128_vert_wc_l = 1;
5372 req128_vert_wc_c = 1;
5375 if (BytePerPixelY == 2) {
5376 segment_order_horz_contiguous_luma = 0;
5377 segment_order_vert_contiguous_luma = 1;
5379 segment_order_horz_contiguous_luma = 1;
5380 segment_order_vert_contiguous_luma = 0;
5383 if (BytePerPixelC == 2) {
5384 segment_order_horz_contiguous_chroma = 0;
5385 segment_order_vert_contiguous_chroma = 1;
5387 segment_order_horz_contiguous_chroma = 1;
5388 segment_order_vert_contiguous_chroma = 0;
5390 #ifdef __DML_VBA_DEBUG__
5391 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5392 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5393 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5394 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5395 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5396 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5397 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5398 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5399 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5400 __func__, segment_order_horz_contiguous_chroma);
5403 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5404 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5405 RequestLuma = REQ_256Bytes;
5406 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5407 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5408 RequestLuma = REQ_128BytesNonContiguous;
5410 RequestLuma = REQ_128BytesContiguous;
5412 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5413 RequestChroma = REQ_256Bytes;
5414 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5415 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5416 RequestChroma = REQ_128BytesNonContiguous;
5418 RequestChroma = REQ_128BytesContiguous;
5420 } else if (!IsVertical(SourceRotation)) {
5421 if (req128_horz_wc_l == 0)
5422 RequestLuma = REQ_256Bytes;
5423 else if (segment_order_horz_contiguous_luma == 0)
5424 RequestLuma = REQ_128BytesNonContiguous;
5426 RequestLuma = REQ_128BytesContiguous;
5428 if (req128_horz_wc_c == 0)
5429 RequestChroma = REQ_256Bytes;
5430 else if (segment_order_horz_contiguous_chroma == 0)
5431 RequestChroma = REQ_128BytesNonContiguous;
5433 RequestChroma = REQ_128BytesContiguous;
5436 if (req128_vert_wc_l == 0)
5437 RequestLuma = REQ_256Bytes;
5438 else if (segment_order_vert_contiguous_luma == 0)
5439 RequestLuma = REQ_128BytesNonContiguous;
5441 RequestLuma = REQ_128BytesContiguous;
5443 if (req128_vert_wc_c == 0)
5444 RequestChroma = REQ_256Bytes;
5445 else if (segment_order_vert_contiguous_chroma == 0)
5446 RequestChroma = REQ_128BytesNonContiguous;
5448 RequestChroma = REQ_128BytesContiguous;
5451 if (RequestLuma == REQ_256Bytes) {
5452 *MaxUncompressedBlockLuma = 256;
5453 *MaxCompressedBlockLuma = 256;
5454 *IndependentBlockLuma = 0;
5455 } else if (RequestLuma == REQ_128BytesContiguous) {
5456 *MaxUncompressedBlockLuma = 256;
5457 *MaxCompressedBlockLuma = 128;
5458 *IndependentBlockLuma = 128;
5460 *MaxUncompressedBlockLuma = 256;
5461 *MaxCompressedBlockLuma = 64;
5462 *IndependentBlockLuma = 64;
5465 if (RequestChroma == REQ_256Bytes) {
5466 *MaxUncompressedBlockChroma = 256;
5467 *MaxCompressedBlockChroma = 256;
5468 *IndependentBlockChroma = 0;
5469 } else if (RequestChroma == REQ_128BytesContiguous) {
5470 *MaxUncompressedBlockChroma = 256;
5471 *MaxCompressedBlockChroma = 128;
5472 *IndependentBlockChroma = 128;
5474 *MaxUncompressedBlockChroma = 256;
5475 *MaxCompressedBlockChroma = 64;
5476 *IndependentBlockChroma = 64;
5479 if (DCCEnabled != true || BytePerPixelC == 0) {
5480 *MaxUncompressedBlockChroma = 0;
5481 *MaxCompressedBlockChroma = 0;
5482 *IndependentBlockChroma = 0;
5485 if (DCCEnabled != true) {
5486 *MaxUncompressedBlockLuma = 0;
5487 *MaxCompressedBlockLuma = 0;
5488 *IndependentBlockLuma = 0;
5491 #ifdef __DML_VBA_DEBUG__
5492 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5493 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5494 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5495 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5496 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5497 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5500 } // CalculateDCCConfiguration
5502 void dml32_CalculateStutterEfficiency(
5503 unsigned int CompressedBufferSizeInkByte,
5504 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5505 bool UnboundedRequestEnabled,
5506 unsigned int MetaFIFOSizeInKEntries,
5507 unsigned int ZeroSizeBufferEntries,
5508 unsigned int PixelChunkSizeInKByte,
5509 unsigned int NumberOfActiveSurfaces,
5510 unsigned int ROBBufferSizeInKByte,
5511 double TotalDataReadBandwidth,
5514 unsigned int CompbufReservedSpace64B,
5515 unsigned int CompbufReservedSpaceZs,
5517 double SRExitZ8Time,
5518 bool SynchronizeTimingsFinal,
5519 unsigned int BlendingAndTiming[],
5520 double StutterEnterPlusExitWatermark,
5521 double Z8StutterEnterPlusExitWatermark,
5522 bool ProgressiveToInterlaceUnitInOPP,
5524 double MinTTUVBlank[],
5525 unsigned int DPPPerSurface[],
5526 unsigned int DETBufferSizeY[],
5527 unsigned int BytePerPixelY[],
5528 double BytePerPixelDETY[],
5529 double SwathWidthY[],
5530 unsigned int SwathHeightY[],
5531 unsigned int SwathHeightC[],
5532 double NetDCCRateLuma[],
5533 double NetDCCRateChroma[],
5534 double DCCFractionOfZeroSizeRequestsLuma[],
5535 double DCCFractionOfZeroSizeRequestsChroma[],
5536 unsigned int HTotal[],
5537 unsigned int VTotal[],
5538 double PixelClock[],
5540 enum dm_rotation_angle SourceRotation[],
5541 unsigned int BlockHeight256BytesY[],
5542 unsigned int BlockWidth256BytesY[],
5543 unsigned int BlockHeight256BytesC[],
5544 unsigned int BlockWidth256BytesC[],
5545 unsigned int DCCYMaxUncompressedBlock[],
5546 unsigned int DCCCMaxUncompressedBlock[],
5547 unsigned int VActive[],
5549 bool WritebackEnable[],
5550 double ReadBandwidthSurfaceLuma[],
5551 double ReadBandwidthSurfaceChroma[],
5552 double meta_row_bw[],
5553 double dpte_row_bw[],
5556 double *StutterEfficiencyNotIncludingVBlank,
5557 double *StutterEfficiency,
5558 unsigned int *NumberOfStutterBurstsPerFrame,
5559 double *Z8StutterEfficiencyNotIncludingVBlank,
5560 double *Z8StutterEfficiency,
5561 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5562 double *StutterPeriod,
5563 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5566 bool FoundCriticalSurface = false;
5567 unsigned int SwathSizeCriticalSurface = 0;
5568 unsigned int LastChunkOfSwathSize;
5569 unsigned int MissingPartOfLastSwathOfDETSize;
5570 double LastZ8StutterPeriod = 0.0;
5571 double LastStutterPeriod = 0.0;
5572 unsigned int TotalNumberOfActiveOTG = 0;
5573 double doublePixelClock;
5574 unsigned int doubleHTotal;
5575 unsigned int doubleVTotal;
5576 bool SameTiming = true;
5577 double DETBufferingTimeY;
5578 double SwathWidthYCriticalSurface = 0.0;
5579 double SwathHeightYCriticalSurface = 0.0;
5580 double VActiveTimeCriticalSurface = 0.0;
5581 double FrameTimeCriticalSurface = 0.0;
5582 unsigned int BytePerPixelYCriticalSurface = 0;
5583 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5584 unsigned int DETBufferSizeYCriticalSurface = 0;
5585 double MinTTUVBlankCriticalSurface = 0.0;
5586 unsigned int BlockWidth256BytesYCriticalSurface = 0;
5587 bool doublePlaneCriticalSurface = 0;
5588 bool doublePipeCriticalSurface = 0;
5589 double TotalCompressedReadBandwidth;
5590 double TotalRowReadBandwidth;
5591 double AverageDCCCompressionRate;
5592 double EffectiveCompressedBufferSize;
5593 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5594 double StutterBurstTime;
5595 unsigned int TotalActiveWriteback;
5597 double LinesInDETYRoundedDownToSwath;
5598 double MaximumEffectiveCompressionLuma;
5599 double MaximumEffectiveCompressionChroma;
5600 double TotalZeroSizeRequestReadBandwidth;
5601 double TotalZeroSizeCompressedReadBandwidth;
5602 double AverageDCCZeroSizeFraction;
5603 double AverageZeroSizeCompressionRate;
5606 TotalZeroSizeRequestReadBandwidth = 0;
5607 TotalZeroSizeCompressedReadBandwidth = 0;
5608 TotalRowReadBandwidth = 0;
5609 TotalCompressedReadBandwidth = 0;
5611 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5612 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5613 if (DCCEnable[k] == true) {
5614 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5615 || (!IsVertical(SourceRotation[k])
5616 && BlockHeight256BytesY[k] > SwathHeightY[k])
5617 || DCCYMaxUncompressedBlock[k] < 256) {
5618 MaximumEffectiveCompressionLuma = 2;
5620 MaximumEffectiveCompressionLuma = 4;
5622 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5623 + ReadBandwidthSurfaceLuma[k]
5624 / dml_min(NetDCCRateLuma[k],
5625 MaximumEffectiveCompressionLuma);
5626 #ifdef __DML_VBA_DEBUG__
5627 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5628 __func__, k, ReadBandwidthSurfaceLuma[k]);
5629 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5630 __func__, k, NetDCCRateLuma[k]);
5631 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5632 __func__, k, MaximumEffectiveCompressionLuma);
5634 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5635 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5636 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5637 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5638 / MaximumEffectiveCompressionLuma;
5640 if (ReadBandwidthSurfaceChroma[k] > 0) {
5641 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5642 || (!IsVertical(SourceRotation[k])
5643 && BlockHeight256BytesC[k] > SwathHeightC[k])
5644 || DCCCMaxUncompressedBlock[k] < 256) {
5645 MaximumEffectiveCompressionChroma = 2;
5647 MaximumEffectiveCompressionChroma = 4;
5649 TotalCompressedReadBandwidth =
5650 TotalCompressedReadBandwidth
5651 + ReadBandwidthSurfaceChroma[k]
5652 / dml_min(NetDCCRateChroma[k],
5653 MaximumEffectiveCompressionChroma);
5654 #ifdef __DML_VBA_DEBUG__
5655 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5656 __func__, k, ReadBandwidthSurfaceChroma[k]);
5657 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5658 __func__, k, NetDCCRateChroma[k]);
5659 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5660 __func__, k, MaximumEffectiveCompressionChroma);
5662 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5663 + ReadBandwidthSurfaceChroma[k]
5664 * DCCFractionOfZeroSizeRequestsChroma[k];
5665 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5666 + ReadBandwidthSurfaceChroma[k]
5667 * DCCFractionOfZeroSizeRequestsChroma[k]
5668 / MaximumEffectiveCompressionChroma;
5671 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5672 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5674 TotalRowReadBandwidth = TotalRowReadBandwidth
5675 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5679 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5680 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5682 #ifdef __DML_VBA_DEBUG__
5683 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5684 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5685 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5686 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5687 __func__, TotalZeroSizeCompressedReadBandwidth);
5688 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5689 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5690 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5691 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5692 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5693 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5694 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5696 if (AverageDCCZeroSizeFraction == 1) {
5697 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5698 / TotalZeroSizeCompressedReadBandwidth;
5699 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5700 * AverageZeroSizeCompressionRate
5701 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5702 * AverageZeroSizeCompressionRate;
5703 } else if (AverageDCCZeroSizeFraction > 0) {
5704 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5705 / TotalZeroSizeCompressedReadBandwidth;
5706 EffectiveCompressedBufferSize = dml_min(
5707 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5708 (double) MetaFIFOSizeInKEntries * 1024 * 64
5709 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5710 + 1 / AverageDCCCompressionRate))
5711 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5712 * AverageDCCCompressionRate,
5713 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5714 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5716 #ifdef __DML_VBA_DEBUG__
5717 dml_print("DML::%s: min 1 = %f\n", __func__,
5718 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5719 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5720 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5721 AverageDCCCompressionRate));
5722 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5723 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5724 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5725 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5728 EffectiveCompressedBufferSize = dml_min(
5729 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5730 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5731 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5732 * AverageDCCCompressionRate;
5734 #ifdef __DML_VBA_DEBUG__
5735 dml_print("DML::%s: min 1 = %f\n", __func__,
5736 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5737 dml_print("DML::%s: min 2 = %f\n", __func__,
5738 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5742 #ifdef __DML_VBA_DEBUG__
5743 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5744 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5745 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5750 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5751 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5752 LinesInDETY = ((double) DETBufferSizeY[k]
5753 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5754 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5755 / BytePerPixelDETY[k] / SwathWidthY[k];
5756 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5757 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5759 #ifdef __DML_VBA_DEBUG__
5760 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5761 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5762 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5763 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5764 __func__, k, ReadBandwidthSurfaceLuma[k]);
5765 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5766 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5767 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5768 __func__, k, LinesInDETYRoundedDownToSwath);
5769 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5770 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5771 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5772 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5773 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5776 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5777 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5779 FoundCriticalSurface = true;
5780 *StutterPeriod = DETBufferingTimeY;
5781 FrameTimeCriticalSurface = (
5783 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5784 * (double) HTotal[k] / PixelClock[k];
5785 VActiveTimeCriticalSurface = (
5787 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5788 * (double) HTotal[k] / PixelClock[k];
5789 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5790 SwathWidthYCriticalSurface = SwathWidthY[k];
5791 SwathHeightYCriticalSurface = SwathHeightY[k];
5792 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5793 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5794 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5795 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5796 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5797 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5798 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5800 #ifdef __DML_VBA_DEBUG__
5801 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
5802 __func__, k, FoundCriticalSurface);
5803 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
5804 __func__, k, *StutterPeriod);
5805 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
5806 __func__, k, MinTTUVBlankCriticalSurface);
5807 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
5808 __func__, k, FrameTimeCriticalSurface);
5809 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
5810 __func__, k, VActiveTimeCriticalSurface);
5811 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
5812 __func__, k, BytePerPixelYCriticalSurface);
5813 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
5814 __func__, k, SwathWidthYCriticalSurface);
5815 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
5816 __func__, k, SwathHeightYCriticalSurface);
5817 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
5818 __func__, k, BlockWidth256BytesYCriticalSurface);
5819 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
5820 __func__, k, doublePlaneCriticalSurface);
5821 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
5822 __func__, k, doublePipeCriticalSurface);
5823 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5824 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5830 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5831 EffectiveCompressedBufferSize);
5832 #ifdef __DML_VBA_DEBUG__
5833 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5834 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5835 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5836 __func__, *StutterPeriod * TotalDataReadBandwidth);
5837 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5838 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5839 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5840 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5841 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5842 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5843 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5846 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5848 + (*StutterPeriod * TotalDataReadBandwidth
5849 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5850 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5851 #ifdef __DML_VBA_DEBUG__
5852 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5853 AverageDCCCompressionRate / ReturnBW);
5854 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5855 __func__, (*StutterPeriod * TotalDataReadBandwidth));
5856 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5857 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5858 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5859 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5861 StutterBurstTime = dml_max(StutterBurstTime,
5862 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5863 * SwathWidthYCriticalSurface / ReturnBW);
5865 #ifdef __DML_VBA_DEBUG__
5866 dml_print("DML::%s: Time to finish residue swath=%f\n",
5868 LinesToFinishSwathTransferStutterCriticalSurface *
5869 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5872 TotalActiveWriteback = 0;
5873 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5874 if (WritebackEnable[k])
5875 TotalActiveWriteback = TotalActiveWriteback + 1;
5878 if (TotalActiveWriteback == 0) {
5879 #ifdef __DML_VBA_DEBUG__
5880 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5881 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5882 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5883 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5885 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5886 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5887 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5888 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5889 *NumberOfStutterBurstsPerFrame = (
5890 *StutterEfficiencyNotIncludingVBlank > 0 ?
5891 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5892 *Z8NumberOfStutterBurstsPerFrame = (
5893 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5894 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5896 *StutterEfficiencyNotIncludingVBlank = 0.;
5897 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5898 *NumberOfStutterBurstsPerFrame = 0;
5899 *Z8NumberOfStutterBurstsPerFrame = 0;
5901 #ifdef __DML_VBA_DEBUG__
5902 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5903 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5904 __func__, *StutterEfficiencyNotIncludingVBlank);
5905 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5906 __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5907 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5908 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5911 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5912 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5913 if (BlendingAndTiming[k] == k) {
5914 if (TotalNumberOfActiveOTG == 0) {
5915 doublePixelClock = PixelClock[k];
5916 doubleHTotal = HTotal[k];
5917 doubleVTotal = VTotal[k];
5918 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5919 || doubleVTotal != VTotal[k]) {
5922 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5927 if (*StutterEfficiencyNotIncludingVBlank > 0) {
5928 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5930 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5931 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5932 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5933 + StutterBurstTime * VActiveTimeCriticalSurface
5934 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5936 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5939 *StutterEfficiency = 0;
5942 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5943 LastZ8StutterPeriod = VActiveTimeCriticalSurface
5944 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5945 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
5946 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
5947 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
5948 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5950 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
5953 *Z8StutterEfficiency = 0.;
5956 #ifdef __DML_VBA_DEBUG__
5957 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
5958 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
5959 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5960 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5961 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
5962 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
5963 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5964 __func__, *StutterEfficiencyNotIncludingVBlank);
5965 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5968 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
5969 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
5970 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
5971 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
5972 - DETBufferSizeYCriticalSurface;
5974 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
5975 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
5976 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
5977 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
5979 #ifdef __DML_VBA_DEBUG__
5980 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
5981 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
5982 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
5983 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
5985 } // CalculateStutterEfficiency
5987 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
5988 unsigned int ConfigReturnBufferSizeInKByte,
5989 unsigned int ROBBufferSizeInKByte,
5990 unsigned int MaxNumDPP,
5991 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
5992 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
5995 unsigned int *MaxTotalDETInKByte,
5996 unsigned int *nomDETInKByte,
5997 unsigned int *MinCompressedBufferSizeInKByte)
5999 bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
6000 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
6002 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6003 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6004 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6005 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6007 #ifdef __DML_VBA_DEBUG__
6008 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6009 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6010 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6011 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6012 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6013 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6016 if (det_buff_size_override_en) {
6017 *nomDETInKByte = det_buff_size_override_val;
6018 #ifdef __DML_VBA_DEBUG__
6019 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6022 } // CalculateMaxDETAndMinCompressedBufferSize
6024 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6026 bool NotUrgentLatencyHiding[],
6027 double ReadBandwidthLuma[],
6028 double ReadBandwidthChroma[],
6030 double meta_row_bandwidth[],
6031 double dpte_row_bandwidth[],
6032 unsigned int NumberOfDPP[],
6033 double UrgentBurstFactorLuma[],
6034 double UrgentBurstFactorChroma[],
6035 double UrgentBurstFactorCursor[])
6038 bool NotEnoughUrgentLatencyHiding = false;
6039 bool CalculateVActiveBandwithSupport_val = false;
6040 double VActiveBandwith = 0;
6042 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6043 if (NotUrgentLatencyHiding[k]) {
6044 NotEnoughUrgentLatencyHiding = true;
6048 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6049 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6052 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6054 #ifdef __DML_VBA_DEBUG__
6055 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6056 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
6057 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6058 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6060 return CalculateVActiveBandwithSupport_val;
6063 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6065 bool NotUrgentLatencyHiding[],
6066 double ReadBandwidthLuma[],
6067 double ReadBandwidthChroma[],
6068 double PrefetchBandwidthLuma[],
6069 double PrefetchBandwidthChroma[],
6071 double meta_row_bandwidth[],
6072 double dpte_row_bandwidth[],
6073 double cursor_bw_pre[],
6074 double prefetch_vmrow_bw[],
6075 unsigned int NumberOfDPP[],
6076 double UrgentBurstFactorLuma[],
6077 double UrgentBurstFactorChroma[],
6078 double UrgentBurstFactorCursor[],
6079 double UrgentBurstFactorLumaPre[],
6080 double UrgentBurstFactorChromaPre[],
6081 double UrgentBurstFactorCursorPre[],
6084 double *PrefetchBandwidth,
6085 double *FractionOfUrgentBandwidth,
6086 bool *PrefetchBandwidthSupport)
6089 bool NotEnoughUrgentLatencyHiding = false;
6090 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6091 if (NotUrgentLatencyHiding[k]) {
6092 NotEnoughUrgentLatencyHiding = true;
6096 *PrefetchBandwidth = 0;
6097 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6098 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6099 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6100 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6103 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6104 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6107 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6109 double ReadBandwidthLuma[],
6110 double ReadBandwidthChroma[],
6111 double PrefetchBandwidthLuma[],
6112 double PrefetchBandwidthChroma[],
6114 double cursor_bw_pre[],
6115 unsigned int NumberOfDPP[],
6116 double UrgentBurstFactorLuma[],
6117 double UrgentBurstFactorChroma[],
6118 double UrgentBurstFactorCursor[],
6119 double UrgentBurstFactorLumaPre[],
6120 double UrgentBurstFactorChromaPre[],
6121 double UrgentBurstFactorCursorPre[])
6124 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6126 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6127 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6128 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6131 return CalculateBandwidthAvailableForImmediateFlip_val;
6134 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6136 enum immediate_flip_requirement ImmediateFlipRequirement[],
6137 double final_flip_bw[],
6138 double ReadBandwidthLuma[],
6139 double ReadBandwidthChroma[],
6140 double PrefetchBandwidthLuma[],
6141 double PrefetchBandwidthChroma[],
6143 double meta_row_bandwidth[],
6144 double dpte_row_bandwidth[],
6145 double cursor_bw_pre[],
6146 double prefetch_vmrow_bw[],
6147 unsigned int NumberOfDPP[],
6148 double UrgentBurstFactorLuma[],
6149 double UrgentBurstFactorChroma[],
6150 double UrgentBurstFactorCursor[],
6151 double UrgentBurstFactorLumaPre[],
6152 double UrgentBurstFactorChromaPre[],
6153 double UrgentBurstFactorCursorPre[],
6156 double *TotalBandwidth,
6157 double *FractionOfUrgentBandwidth,
6158 bool *ImmediateFlipBandwidthSupport)
6161 *TotalBandwidth = 0;
6162 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6163 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6164 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6165 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6166 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6168 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6169 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6170 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6173 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6174 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;