Merge tag 'sched_urgent_for_v6.1_rc6' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / amd / display / dc / dml / dcn32 / display_mode_vba_util_32.c
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31
32 unsigned int dml32_dscceComputeDelay(
33                 unsigned int bpc,
34                 double BPP,
35                 unsigned int sliceWidth,
36                 unsigned int numSlices,
37                 enum output_format_class pixelFormat,
38                 enum output_encoder_class Output)
39 {
40         // valid bpc         = source bits per component in the set of {8, 10, 12}
41         // valid bpp         = increments of 1/16 of a bit
42         //                    min = 6/7/8 in N420/N422/444, respectively
43         //                    max = such that compression is 1:1
44         //valid sliceWidth  = number of pixels per slice line,
45         //      must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46         //valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47         //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48
49         // fixed value
50         unsigned int rcModelSize = 8192;
51
52         // N422/N420 operate at 2 pixels per clock
53         unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54         Delay, pixels;
55
56         if (pixelFormat == dm_420)
57                 pixelsPerClock = 2;
58         else if (pixelFormat == dm_n422)
59                 pixelsPerClock = 2;
60         // #all other modes operate at 1 pixel per clock
61         else
62                 pixelsPerClock = 1;
63
64         //initial transmit delay as per PPS
65         initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66
67         //compute ssm delay
68         if (bpc == 8)
69                 D = 81;
70         else if (bpc == 10)
71                 D = 89;
72         else
73                 D = 113;
74
75         //divide by pixel per cycle to compute slice width as seen by DSC
76         w = sliceWidth / pixelsPerClock;
77
78         //422 mode has an additional cycle of delay
79         if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80                 s = 0;
81         else
82                 s = 1;
83
84         //main calculation for the dscce
85         ix = initalXmitDelay + 45;
86         wx = (w + 2) / 3;
87         p = 3 * wx - w;
88         l0 = ix / w;
89         a = ix + p * l0;
90         ax = (a + 2) / 3 + D + 6 + 1;
91         L = (ax + wx - 1) / wx;
92         if ((ix % w) == 0 && p != 0)
93                 lstall = 1;
94         else
95                 lstall = 0;
96         Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97
98         //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99         pixels = Delay * 3 * pixelsPerClock;
100
101 #ifdef __DML_VBA_DEBUG__
102         dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103         dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104         dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105         dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106         dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107         dml_print("DML::%s: Output: %d\n", __func__, Output);
108         dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110
111         return pixels;
112 }
113
114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116         unsigned int Delay = 0;
117
118         if (pixelFormat == dm_420) {
119                 //   sfr
120                 Delay = Delay + 2;
121                 //   dsccif
122                 Delay = Delay + 0;
123                 //   dscc - input deserializer
124                 Delay = Delay + 3;
125                 //   dscc gets pixels every other cycle
126                 Delay = Delay + 2;
127                 //   dscc - input cdc fifo
128                 Delay = Delay + 12;
129                 //   dscc gets pixels every other cycle
130                 Delay = Delay + 13;
131                 //   dscc - cdc uncertainty
132                 Delay = Delay + 2;
133                 //   dscc - output cdc fifo
134                 Delay = Delay + 7;
135                 //   dscc gets pixels every other cycle
136                 Delay = Delay + 3;
137                 //   dscc - cdc uncertainty
138                 Delay = Delay + 2;
139                 //   dscc - output serializer
140                 Delay = Delay + 1;
141                 //   sft
142                 Delay = Delay + 1;
143         } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144                 //   sfr
145                 Delay = Delay + 2;
146                 //   dsccif
147                 Delay = Delay + 1;
148                 //   dscc - input deserializer
149                 Delay = Delay + 5;
150                 //  dscc - input cdc fifo
151                 Delay = Delay + 25;
152                 //   dscc - cdc uncertainty
153                 Delay = Delay + 2;
154                 //   dscc - output cdc fifo
155                 Delay = Delay + 10;
156                 //   dscc - cdc uncertainty
157                 Delay = Delay + 2;
158                 //   dscc - output serializer
159                 Delay = Delay + 1;
160                 //   sft
161                 Delay = Delay + 1;
162         } else {
163                 //   sfr
164                 Delay = Delay + 2;
165                 //   dsccif
166                 Delay = Delay + 0;
167                 //   dscc - input deserializer
168                 Delay = Delay + 3;
169                 //   dscc - input cdc fifo
170                 Delay = Delay + 12;
171                 //   dscc - cdc uncertainty
172                 Delay = Delay + 2;
173                 //   dscc - output cdc fifo
174                 Delay = Delay + 7;
175                 //   dscc - output serializer
176                 Delay = Delay + 1;
177                 //   dscc - cdc uncertainty
178                 Delay = Delay + 2;
179                 //   sft
180                 Delay = Delay + 1;
181         }
182
183         return Delay;
184 }
185
186
187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189         bool is_vert = false;
190
191         if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192                 is_vert = true;
193         else
194                 is_vert = false;
195         return is_vert;
196 }
197
198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199                 double HRatio,
200                 double HRatioChroma,
201                 double VRatio,
202                 double VRatioChroma,
203                 double MaxDCHUBToPSCLThroughput,
204                 double MaxPSCLToLBThroughput,
205                 double PixelClock,
206                 enum source_format_class SourcePixelFormat,
207                 unsigned int HTaps,
208                 unsigned int HTapsChroma,
209                 unsigned int VTaps,
210                 unsigned int VTapsChroma,
211
212                 /* output */
213                 double *PSCL_THROUGHPUT,
214                 double *PSCL_THROUGHPUT_CHROMA,
215                 double *DPPCLKUsingSingleDPP)
216 {
217         double DPPCLKUsingSingleDPPLuma;
218         double DPPCLKUsingSingleDPPChroma;
219
220         if (HRatio > 1) {
221                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222                                 dml_ceil((double) HTaps / 6.0, 1.0));
223         } else {
224                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225         }
226
227         DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228                         *PSCL_THROUGHPUT, 1);
229
230         if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231                 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232
233         if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234                         SourcePixelFormat != dm_rgbe_alpha)) {
235                 *PSCL_THROUGHPUT_CHROMA = 0;
236                 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237         } else {
238                 if (HRatioChroma > 1) {
239                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240                                         HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241                 } else {
242                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243                 }
244                 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245                                 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246                 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247                         DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248                 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249         }
250 }
251
252 void dml32_CalculateBytePerPixelAndBlockSizes(
253                 enum source_format_class SourcePixelFormat,
254                 enum dm_swizzle_mode SurfaceTiling,
255
256                 /* Output */
257                 unsigned int *BytePerPixelY,
258                 unsigned int *BytePerPixelC,
259                 double  *BytePerPixelDETY,
260                 double  *BytePerPixelDETC,
261                 unsigned int *BlockHeight256BytesY,
262                 unsigned int *BlockHeight256BytesC,
263                 unsigned int *BlockWidth256BytesY,
264                 unsigned int *BlockWidth256BytesC,
265                 unsigned int *MacroTileHeightY,
266                 unsigned int *MacroTileHeightC,
267                 unsigned int *MacroTileWidthY,
268                 unsigned int *MacroTileWidthC)
269 {
270         if (SourcePixelFormat == dm_444_64) {
271                 *BytePerPixelDETY = 8;
272                 *BytePerPixelDETC = 0;
273                 *BytePerPixelY = 8;
274                 *BytePerPixelC = 0;
275         } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276                 *BytePerPixelDETY = 4;
277                 *BytePerPixelDETC = 0;
278                 *BytePerPixelY = 4;
279                 *BytePerPixelC = 0;
280         } else if (SourcePixelFormat == dm_444_16) {
281                 *BytePerPixelDETY = 2;
282                 *BytePerPixelDETC = 0;
283                 *BytePerPixelY = 2;
284                 *BytePerPixelC = 0;
285         } else if (SourcePixelFormat == dm_444_8) {
286                 *BytePerPixelDETY = 1;
287                 *BytePerPixelDETC = 0;
288                 *BytePerPixelY = 1;
289                 *BytePerPixelC = 0;
290         } else if (SourcePixelFormat == dm_rgbe_alpha) {
291                 *BytePerPixelDETY = 4;
292                 *BytePerPixelDETC = 1;
293                 *BytePerPixelY = 4;
294                 *BytePerPixelC = 1;
295         } else if (SourcePixelFormat == dm_420_8) {
296                 *BytePerPixelDETY = 1;
297                 *BytePerPixelDETC = 2;
298                 *BytePerPixelY = 1;
299                 *BytePerPixelC = 2;
300         } else if (SourcePixelFormat == dm_420_12) {
301                 *BytePerPixelDETY = 2;
302                 *BytePerPixelDETC = 4;
303                 *BytePerPixelY = 2;
304                 *BytePerPixelC = 4;
305         } else {
306                 *BytePerPixelDETY = 4.0 / 3;
307                 *BytePerPixelDETC = 8.0 / 3;
308                 *BytePerPixelY = 2;
309                 *BytePerPixelC = 4;
310         }
311 #ifdef __DML_VBA_DEBUG__
312         dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313         dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314         dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315         dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
316         dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
317 #endif
318         if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319                         || SourcePixelFormat == dm_444_16
320                         || SourcePixelFormat == dm_444_8
321                         || SourcePixelFormat == dm_mono_16
322                         || SourcePixelFormat == dm_mono_8
323                         || SourcePixelFormat == dm_rgbe)) {
324                 if (SurfaceTiling == dm_sw_linear)
325                         *BlockHeight256BytesY = 1;
326                 else if (SourcePixelFormat == dm_444_64)
327                         *BlockHeight256BytesY = 4;
328                 else if (SourcePixelFormat == dm_444_8)
329                         *BlockHeight256BytesY = 16;
330                 else
331                         *BlockHeight256BytesY = 8;
332
333                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334                 *BlockHeight256BytesC = 0;
335                 *BlockWidth256BytesC = 0;
336         } else {
337                 if (SurfaceTiling == dm_sw_linear) {
338                         *BlockHeight256BytesY = 1;
339                         *BlockHeight256BytesC = 1;
340                 } else if (SourcePixelFormat == dm_rgbe_alpha) {
341                         *BlockHeight256BytesY = 8;
342                         *BlockHeight256BytesC = 16;
343                 } else if (SourcePixelFormat == dm_420_8) {
344                         *BlockHeight256BytesY = 16;
345                         *BlockHeight256BytesC = 8;
346                 } else {
347                         *BlockHeight256BytesY = 8;
348                         *BlockHeight256BytesC = 8;
349                 }
350                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351                 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352         }
353 #ifdef __DML_VBA_DEBUG__
354         dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
355         dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356         dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
357         dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359
360         if (SurfaceTiling == dm_sw_linear) {
361                 *MacroTileHeightY = *BlockHeight256BytesY;
362                 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363                 *MacroTileHeightC = *BlockHeight256BytesC;
364                 if (*MacroTileHeightC == 0)
365                         *MacroTileWidthC = 0;
366                 else
367                         *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368         } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369                         SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370                 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
371                 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372                 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
373                 if (*MacroTileHeightC == 0)
374                         *MacroTileWidthC = 0;
375                 else
376                         *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377         } else {
378                 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
379                 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380                 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
381                 if (*MacroTileHeightC == 0)
382                         *MacroTileWidthC = 0;
383                 else
384                         *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385         }
386
387 #ifdef __DML_VBA_DEBUG__
388         dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
389         dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390         dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
391         dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394
395 void dml32_CalculateSwathAndDETConfiguration(
396                 unsigned int DETSizeOverride[],
397                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398                 unsigned int ConfigReturnBufferSizeInKByte,
399                 unsigned int MaxTotalDETInKByte,
400                 unsigned int MinCompressedBufferSizeInKByte,
401                 double ForceSingleDPP,
402                 unsigned int NumberOfActiveSurfaces,
403                 unsigned int nomDETInKByte,
404                 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405                 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406                 unsigned int PixelChunkSizeKBytes,
407                 unsigned int ROBSizeKBytes,
408                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
409                 enum output_encoder_class Output[],
410                 double ReadBandwidthLuma[],
411                 double ReadBandwidthChroma[],
412                 double MaximumSwathWidthLuma[],
413                 double MaximumSwathWidthChroma[],
414                 enum dm_rotation_angle SourceRotation[],
415                 bool ViewportStationary[],
416                 enum source_format_class SourcePixelFormat[],
417                 enum dm_swizzle_mode SurfaceTiling[],
418                 unsigned int ViewportWidth[],
419                 unsigned int ViewportHeight[],
420                 unsigned int ViewportXStart[],
421                 unsigned int ViewportYStart[],
422                 unsigned int ViewportXStartC[],
423                 unsigned int ViewportYStartC[],
424                 unsigned int SurfaceWidthY[],
425                 unsigned int SurfaceWidthC[],
426                 unsigned int SurfaceHeightY[],
427                 unsigned int SurfaceHeightC[],
428                 unsigned int Read256BytesBlockHeightY[],
429                 unsigned int Read256BytesBlockHeightC[],
430                 unsigned int Read256BytesBlockWidthY[],
431                 unsigned int Read256BytesBlockWidthC[],
432                 enum odm_combine_mode ODMMode[],
433                 unsigned int BlendingAndTiming[],
434                 unsigned int BytePerPixY[],
435                 unsigned int BytePerPixC[],
436                 double BytePerPixDETY[],
437                 double BytePerPixDETC[],
438                 unsigned int HActive[],
439                 double HRatio[],
440                 double HRatioChroma[],
441                 unsigned int DPPPerSurface[],
442
443                 /* Output */
444                 unsigned int swath_width_luma_ub[],
445                 unsigned int swath_width_chroma_ub[],
446                 double SwathWidth[],
447                 double SwathWidthChroma[],
448                 unsigned int SwathHeightY[],
449                 unsigned int SwathHeightC[],
450                 unsigned int DETBufferSizeInKByte[],
451                 unsigned int DETBufferSizeY[],
452                 unsigned int DETBufferSizeC[],
453                 bool *UnboundedRequestEnabled,
454                 unsigned int *CompressedBufferSizeInkByte,
455                 unsigned int *CompBufReservedSpaceKBytes,
456                 bool *CompBufReservedSpaceNeedAdjustment,
457                 bool ViewportSizeSupportPerSurface[],
458                 bool *ViewportSizeSupport)
459 {
460         unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461         unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462         unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463         unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464         unsigned int RoundedUpSwathSizeBytesY;
465         unsigned int RoundedUpSwathSizeBytesC;
466         double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467         double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468         unsigned int k;
469         unsigned int TotalActiveDPP = 0;
470         bool NoChromaSurfaces = true;
471         unsigned int DETBufferSizeInKByteForSwathCalculation;
472
473 #ifdef __DML_VBA_DEBUG__
474         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475         dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476         dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478         dml32_CalculateSwathWidth(ForceSingleDPP,
479                         NumberOfActiveSurfaces,
480                         SourcePixelFormat,
481                         SourceRotation,
482                         ViewportStationary,
483                         ViewportWidth,
484                         ViewportHeight,
485                         ViewportXStart,
486                         ViewportYStart,
487                         ViewportXStartC,
488                         ViewportYStartC,
489                         SurfaceWidthY,
490                         SurfaceWidthC,
491                         SurfaceHeightY,
492                         SurfaceHeightC,
493                         ODMMode,
494                         BytePerPixY,
495                         BytePerPixC,
496                         Read256BytesBlockHeightY,
497                         Read256BytesBlockHeightC,
498                         Read256BytesBlockWidthY,
499                         Read256BytesBlockWidthC,
500                         BlendingAndTiming,
501                         HActive,
502                         HRatio,
503                         DPPPerSurface,
504
505                         /* Output */
506                         SwathWidthdoubleDPP,
507                         SwathWidthdoubleDPPChroma,
508                         SwathWidth,
509                         SwathWidthChroma,
510                         MaximumSwathHeightY,
511                         MaximumSwathHeightC,
512                         swath_width_luma_ub,
513                         swath_width_chroma_ub);
514
515         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516                 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517                 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519                 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520                 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521                 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522                 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524                                 RoundedUpMaxSwathSizeBytesY[k]);
525                 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526                 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527                 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529                                 RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531
532                 if (SourcePixelFormat[k] == dm_420_10) {
533                         RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534                         RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535                 }
536         }
537
538         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539                 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541                                 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542                         NoChromaSurfaces = false;
543                 }
544         }
545
546         // By default, just set the reserved space to 2 pixel chunks size
547         *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548
549         // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550         // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551         // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552         *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553
554         if (*CompBufReservedSpaceNeedAdjustment == 1) {
555                 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556         }
557
558         #ifdef __DML_VBA_DEBUG__
559                 dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
560                 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
561         #endif
562
563         *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564
565         dml32_CalculateDETBufferSize(DETSizeOverride,
566                         UseMALLForPStateChange,
567                         ForceSingleDPP,
568                         NumberOfActiveSurfaces,
569                         *UnboundedRequestEnabled,
570                         nomDETInKByte,
571                         MaxTotalDETInKByte,
572                         ConfigReturnBufferSizeInKByte,
573                         MinCompressedBufferSizeInKByte,
574                         CompressedBufferSegmentSizeInkByteFinal,
575                         SourcePixelFormat,
576                         ReadBandwidthLuma,
577                         ReadBandwidthChroma,
578                         RoundedUpMaxSwathSizeBytesY,
579                         RoundedUpMaxSwathSizeBytesC,
580                         DPPPerSurface,
581
582                         /* Output */
583                         DETBufferSizeInKByte,    // per hubp pipe
584                         CompressedBufferSizeInkByte);
585
586 #ifdef __DML_VBA_DEBUG__
587         dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590         dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594
595         *ViewportSizeSupport = true;
596         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597
598                 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599                                 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601                 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602                                 DETBufferSizeInKByteForSwathCalculation);
603 #endif
604
605                 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607                         SwathHeightY[k] = MaximumSwathHeightY[k];
608                         SwathHeightC[k] = MaximumSwathHeightC[k];
609                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611                 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612                                 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614                         SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615                         SwathHeightC[k] = MaximumSwathHeightC[k];
616                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618                 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619                                 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621                         SwathHeightY[k] = MaximumSwathHeightY[k];
622                         SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625                 } else {
626                         SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627                         SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630                 }
631
632                 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634                                 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635                                                 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636                         *ViewportSizeSupport = false;
637                         ViewportSizeSupportPerSurface[k] = false;
638                 } else {
639                         ViewportSizeSupportPerSurface[k] = true;
640                 }
641
642                 if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644                         dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647                         DETBufferSizeC[k] = 0;
648                 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650                         dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654                 } else {
655 #ifdef __DML_VBA_DEBUG__
656                         dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658                         DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660                 }
661
662 #ifdef __DML_VBA_DEBUG__
663                 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664                 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666                                 k, RoundedUpMaxSwathSizeBytesY[k]);
667                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668                                 k, RoundedUpMaxSwathSizeBytesC[k]);
669                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671                 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672                 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673                 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674                 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675                                 ViewportSizeSupportPerSurface[k]);
676 #endif
677
678         }
679 } // CalculateSwathAndDETConfiguration
680
681 void dml32_CalculateSwathWidth(
682                 bool                            ForceSingleDPP,
683                 unsigned int                    NumberOfActiveSurfaces,
684                 enum source_format_class        SourcePixelFormat[],
685                 enum dm_rotation_angle          SourceRotation[],
686                 bool                            ViewportStationary[],
687                 unsigned int                    ViewportWidth[],
688                 unsigned int                    ViewportHeight[],
689                 unsigned int                    ViewportXStart[],
690                 unsigned int                    ViewportYStart[],
691                 unsigned int                    ViewportXStartC[],
692                 unsigned int                    ViewportYStartC[],
693                 unsigned int                    SurfaceWidthY[],
694                 unsigned int                    SurfaceWidthC[],
695                 unsigned int                    SurfaceHeightY[],
696                 unsigned int                    SurfaceHeightC[],
697                 enum odm_combine_mode           ODMMode[],
698                 unsigned int                    BytePerPixY[],
699                 unsigned int                    BytePerPixC[],
700                 unsigned int                    Read256BytesBlockHeightY[],
701                 unsigned int                    Read256BytesBlockHeightC[],
702                 unsigned int                    Read256BytesBlockWidthY[],
703                 unsigned int                    Read256BytesBlockWidthC[],
704                 unsigned int                    BlendingAndTiming[],
705                 unsigned int                    HActive[],
706                 double                          HRatio[],
707                 unsigned int                    DPPPerSurface[],
708
709                 /* Output */
710                 double                          SwathWidthdoubleDPPY[],
711                 double                          SwathWidthdoubleDPPC[],
712                 double                          SwathWidthY[], // per-pipe
713                 double                          SwathWidthC[], // per-pipe
714                 unsigned int                    MaximumSwathHeightY[],
715                 unsigned int                    MaximumSwathHeightC[],
716                 unsigned int                    swath_width_luma_ub[], // per-pipe
717                 unsigned int                    swath_width_chroma_ub[]) // per-pipe
718 {
719         unsigned int k, j;
720         enum odm_combine_mode MainSurfaceODMMode;
721
722         unsigned int surface_width_ub_l;
723         unsigned int surface_height_ub_l;
724         unsigned int surface_width_ub_c = 0;
725         unsigned int surface_height_ub_c = 0;
726
727 #ifdef __DML_VBA_DEBUG__
728         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731
732         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733                 if (!IsVertical(SourceRotation[k]))
734                         SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735                 else
736                         SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737
738 #ifdef __DML_VBA_DEBUG__
739                 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740                 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742
743                 MainSurfaceODMMode = ODMMode[k];
744                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745                         if (BlendingAndTiming[k] == j)
746                                 MainSurfaceODMMode = ODMMode[j];
747                 }
748
749                 if (ForceSingleDPP) {
750                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751                 } else {
752                         if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754                                                 dml_round(HActive[k] / 4.0 * HRatio[k]));
755                         } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757                                                 dml_round(HActive[k] / 2.0 * HRatio[k]));
758                         } else if (DPPPerSurface[k] == 2) {
759                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760                         } else {
761                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762                         }
763                 }
764
765 #ifdef __DML_VBA_DEBUG__
766                 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767                 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768                 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769                 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770                 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772
773                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774                                 SourcePixelFormat[k] == dm_420_12) {
775                         SwathWidthC[k] = SwathWidthY[k] / 2;
776                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777                 } else {
778                         SwathWidthC[k] = SwathWidthY[k];
779                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780                 }
781
782                 if (ForceSingleDPP == true) {
783                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784                         SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785                 }
786
787                 surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788                 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789
790                 if (!IsVertical(SourceRotation[k])) {
791                         MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792                         MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795                                                 dml_floor(ViewportXStart[k] +
796                                                                 SwathWidthY[k] +
797                                                                 Read256BytesBlockWidthY[k] - 1,
798                                                                 Read256BytesBlockWidthY[k]) -
799                                                                 dml_floor(ViewportXStart[k],
800                                                                 Read256BytesBlockWidthY[k]));
801                         } else {
802                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803                                                 dml_ceil(SwathWidthY[k] - 1,
804                                                                 Read256BytesBlockWidthY[k]) +
805                                                                 Read256BytesBlockWidthY[k]);
806                         }
807                         if (BytePerPixC[k] > 0) {
808                                 surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811                                                         dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812                                                                         Read256BytesBlockWidthC[k] - 1,
813                                                                         Read256BytesBlockWidthC[k]) -
814                                                                         dml_floor(ViewportXStartC[k],
815                                                                         Read256BytesBlockWidthC[k]));
816                                 } else {
817                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818                                                         dml_ceil(SwathWidthC[k] - 1,
819                                                                 Read256BytesBlockWidthC[k]) +
820                                                                 Read256BytesBlockWidthC[k]);
821                                 }
822                         } else {
823                                 swath_width_chroma_ub[k] = 0;
824                         }
825                 } else {
826                         MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827                         MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828
829                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831                                                 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832                                                 Read256BytesBlockHeightY[k]) -
833                                                 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834                         } else {
835                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836                                                 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837                         }
838                         if (BytePerPixC[k] > 0) {
839                                 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842                                                         dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843                                                                         Read256BytesBlockHeightC[k] - 1,
844                                                                         Read256BytesBlockHeightC[k]) -
845                                                                         dml_floor(ViewportYStartC[k],
846                                                                                         Read256BytesBlockHeightC[k]));
847                                 } else {
848                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849                                                         dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850                                                         Read256BytesBlockHeightC[k]);
851                                 }
852                         } else {
853                                 swath_width_chroma_ub[k] = 0;
854                         }
855                 }
856
857 #ifdef __DML_VBA_DEBUG__
858                 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859                 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860                 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861                 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862                 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863                 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864                 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865                 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866                 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867                 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868                 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869                 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870                 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871                 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873
874         }
875 } // CalculateSwathWidth
876
877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878                         unsigned int TotalNumberOfActiveDPP,
879                         bool NoChroma,
880                         enum output_encoder_class Output,
881                         enum dm_swizzle_mode SurfaceTiling,
882                         bool CompBufReservedSpaceNeedAdjustment,
883                         bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885         bool ret_val = false;
886
887         ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888                         TotalNumberOfActiveDPP == 1 && NoChroma);
889         if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890                 ret_val = false;
891
892         if (SurfaceTiling == dm_sw_linear)
893                 ret_val = false;
894
895         if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896                 ret_val = false;
897
898 #ifdef __DML_VBA_DEBUG__
899         dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
900         dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901         dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
902 #endif
903
904         return (ret_val);
905 }
906
907 void dml32_CalculateDETBufferSize(
908                 unsigned int DETSizeOverride[],
909                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910                 bool ForceSingleDPP,
911                 unsigned int NumberOfActiveSurfaces,
912                 bool UnboundedRequestEnabled,
913                 unsigned int nomDETInKByte,
914                 unsigned int MaxTotalDETInKByte,
915                 unsigned int ConfigReturnBufferSizeInKByte,
916                 unsigned int MinCompressedBufferSizeInKByte,
917                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
918                 enum source_format_class SourcePixelFormat[],
919                 double ReadBandwidthLuma[],
920                 double ReadBandwidthChroma[],
921                 unsigned int RoundedUpMaxSwathSizeBytesY[],
922                 unsigned int RoundedUpMaxSwathSizeBytesC[],
923                 unsigned int DPPPerSurface[],
924                 /* Output */
925                 unsigned int DETBufferSizeInKByte[],
926                 unsigned int *CompressedBufferSizeInkByte)
927 {
928         unsigned int DETBufferSizePoolInKByte;
929         unsigned int NextDETBufferPieceInKByte;
930         bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931         bool NextPotentialSurfaceToAssignDETPieceFound;
932         unsigned int NextSurfaceToAssignDETPiece;
933         double TotalBandwidth;
934         double BandwidthOfSurfacesNotAssignedDETPiece;
935         unsigned int max_minDET;
936         unsigned int minDET;
937         unsigned int minDET_pipe;
938         unsigned int j, k;
939
940 #ifdef __DML_VBA_DEBUG__
941         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945         dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948         dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949                         CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951
952         // Note: Will use default det size if that fits 2 swaths
953         if (UnboundedRequestEnabled) {
954                 if (DETSizeOverride[0] > 0) {
955                         DETBufferSizeInKByte[0] = DETSizeOverride[0];
956                 } else {
957                         DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958                                         ((double) RoundedUpMaxSwathSizeBytesY[0] +
959                                                         (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960                 }
961                 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962         } else {
963                 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965                         DETBufferSizeInKByte[k] = nomDETInKByte;
966                         if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967                                         SourcePixelFormat[k] == dm_420_12) {
968                                 max_minDET = nomDETInKByte - 64;
969                         } else {
970                                 max_minDET = nomDETInKByte;
971                         }
972                         minDET = 128;
973                         minDET_pipe = 0;
974
975                         // add DET resource until can hold 2 full swaths
976                         while (minDET <= max_minDET && minDET_pipe == 0) {
977                                 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978                                                 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979                                         minDET_pipe = minDET;
980                                 minDET = minDET + 64;
981                         }
982
983 #ifdef __DML_VBA_DEBUG__
984                         dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
985                         dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
986                         dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
987                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988                                         RoundedUpMaxSwathSizeBytesY[k]);
989                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990                                         RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992
993                         if (minDET_pipe == 0) {
994                                 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995                                                 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997                                 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998                                                 __func__, k, minDET_pipe);
999 #endif
1000                         }
1001
1002                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003                                 DETBufferSizeInKByte[k] = 0;
1004                         } else if (DETSizeOverride[k] > 0) {
1005                                 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008                         } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009                                 DETBufferSizeInKByte[k] = minDET_pipe;
1010                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012                         }
1013
1014 #ifdef __DML_VBA_DEBUG__
1015                         dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016                         dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017                         dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018                         dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020                 }
1021
1022                 TotalBandwidth = 0;
1023                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024                         if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025                                 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026                 }
1027 #ifdef __DML_VBA_DEBUG__
1028                 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029                 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030                         dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031                 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032                 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034                 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036
1037                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1039                         } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040                                         (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041                                         ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1043                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044                                                 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045                         } else {
1046                                 DETPieceAssignedToThisSurfaceAlready[k] = false;
1047                         }
1048 #ifdef __DML_VBA_DEBUG__
1049                         dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050                                         DETPieceAssignedToThisSurfaceAlready[k]);
1051                         dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052                                         BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054                 }
1055
1056                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057                         NextPotentialSurfaceToAssignDETPieceFound = false;
1058                         NextSurfaceToAssignDETPiece = 0;
1059
1060                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063                                                 ReadBandwidthLuma[k]);
1064                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065                                                 ReadBandwidthChroma[k]);
1066                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070                                 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071                                                 NextSurfaceToAssignDETPiece);
1072 #endif
1073                                 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074                                                 (!NextPotentialSurfaceToAssignDETPieceFound ||
1075                                                 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078                                         NextSurfaceToAssignDETPiece = k;
1079                                         NextPotentialSurfaceToAssignDETPieceFound = true;
1080                                 }
1081 #ifdef __DML_VBA_DEBUG__
1082                                 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083                                                 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084                                 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085                                                 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087                         }
1088
1089                         if (NextPotentialSurfaceToAssignDETPieceFound) {
1090                                 // Note: To show the banker's rounding behavior in VBA and also the fact
1091                                 // that the DET buffer size varies due to precision issue
1092                                 //
1093                                 //double tmp1 =  ((double) DETBufferSizePoolInKByte *
1094                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096                                 // BandwidthOfSurfacesNotAssignedDETPiece /
1097                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098                                 //double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1099                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101                                  //BandwidthOfSurfacesNotAssignedDETPiece /
1102                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103                                 //
1104                                 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105                                 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106
1107                                 NextDETBufferPieceInKByte = dml_min(
1108                                         dml_round((double) DETBufferSizePoolInKByte *
1109                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111                                                 BandwidthOfSurfacesNotAssignedDETPiece /
1112                                                 ((ForceSingleDPP ? 1 :
1113                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114                                                 (ForceSingleDPP ? 1 :
1115                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116                                                 dml_floor((double) DETBufferSizePoolInKByte,
1117                                                 (ForceSingleDPP ? 1 :
1118                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119
1120                                 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1121                                 // We should limit the per-pipe DET size to the nominal / max per pipe.
1122                                 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123                                         if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124                                                         nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125                                                 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126                                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127                                         } else {
1128                                                 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129                                                 // already has the max per-pipe value
1130                                                 NextDETBufferPieceInKByte = 0;
1131                                         }
1132                                 }
1133
1134 #ifdef __DML_VBA_DEBUG__
1135                                 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136                                         DETBufferSizePoolInKByte);
1137                                 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138                                         NextSurfaceToAssignDETPiece);
1139                                 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140                                         NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141                                 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142                                         NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143                                 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144                                         __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145                                 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146                                         NextDETBufferPieceInKByte);
1147                                 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148                                         __func__, j, NextSurfaceToAssignDETPiece,
1149                                         DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151
1152                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154                                                 + NextDETBufferPieceInKByte
1155                                                 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157                                 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159
1160                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161                                 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164                                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165                         }
1166                 }
1167                 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168         }
1169         *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170
1171 #ifdef __DML_VBA_DEBUG__
1172         dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174         for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175                 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176                                 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177         }
1178 #endif
1179 } // CalculateDETBufferSize
1180
1181 void dml32_CalculateODMMode(
1182                 unsigned int MaximumPixelsPerLinePerDSCUnit,
1183                 unsigned int HActive,
1184                 enum output_format_class OutFormat,
1185                 enum output_encoder_class Output,
1186                 enum odm_combine_policy ODMUse,
1187                 double StateDispclk,
1188                 double MaxDispclk,
1189                 bool DSCEnable,
1190                 unsigned int TotalNumberOfActiveDPP,
1191                 unsigned int MaxNumDPP,
1192                 double PixelClock,
1193                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1194                 double DISPCLKRampingMargin,
1195                 double DISPCLKDPPCLKVCOSpeed,
1196                 unsigned int NumberOfDSCSlices,
1197
1198                 /* Output */
1199                 bool *TotalAvailablePipesSupport,
1200                 unsigned int *NumberOfDPP,
1201                 enum odm_combine_mode *ODMMode,
1202                 double *RequiredDISPCLKPerSurface)
1203 {
1204
1205         double SurfaceRequiredDISPCLKWithoutODMCombine;
1206         double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207         double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208
1209         SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211                         MaxDispclk);
1212         SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214                         MaxDispclk);
1215         SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217                         MaxDispclk);
1218         *TotalAvailablePipesSupport = true;
1219         *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220
1221         if (ODMUse == dm_odm_combine_policy_none)
1222                 *ODMMode = dm_odm_combine_mode_disabled;
1223
1224         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225         *NumberOfDPP = 0;
1226
1227         // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228         // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229
1230         if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231                         ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232                                         (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233                                         || NumberOfDSCSlices > 8)))) {
1234                 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235                         *ODMMode = dm_odm_combine_mode_4to1;
1236                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237                         *NumberOfDPP = 4;
1238                 } else {
1239                         *TotalAvailablePipesSupport = false;
1240                 }
1241         } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242                         (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243                                         SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244                                         (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245                                         || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246                 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247                         *ODMMode = dm_odm_combine_mode_2to1;
1248                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249                         *NumberOfDPP = 2;
1250                 } else {
1251                         *TotalAvailablePipesSupport = false;
1252                 }
1253         } else {
1254                 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255                         *NumberOfDPP = 1;
1256                 else
1257                         *TotalAvailablePipesSupport = false;
1258         }
1259         if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260                         ODMUse != dm_odm_combine_policy_4to1) {
1261                 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262                         *ODMMode = dm_odm_combine_mode_disabled;
1263                         *NumberOfDPP = 0;
1264                         *TotalAvailablePipesSupport = false;
1265                 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266                                 *ODMMode == dm_odm_combine_mode_4to1) {
1267                         *ODMMode = dm_odm_combine_mode_4to1;
1268                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269                         *NumberOfDPP = 4;
1270                 } else {
1271                         *ODMMode = dm_odm_combine_mode_2to1;
1272                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273                         *NumberOfDPP = 2;
1274                 }
1275         }
1276         if (Output == dm_hdmi && OutFormat == dm_420 &&
1277                         HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278                 *ODMMode = dm_odm_combine_mode_disabled;
1279                 *NumberOfDPP = 0;
1280                 *TotalAvailablePipesSupport = false;
1281         }
1282 }
1283
1284 double dml32_CalculateRequiredDispclk(
1285                 enum odm_combine_mode ODMMode,
1286                 double PixelClock,
1287                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1288                 double DISPCLKRampingMargin,
1289                 double DISPCLKDPPCLKVCOSpeed,
1290                 double MaxDispclk)
1291 {
1292         double RequiredDispclk = 0.;
1293         double PixelClockAfterODM;
1294         double DISPCLKWithRampingRoundedToDFSGranularity;
1295         double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296         double MaxDispclkRoundedDownToDFSGranularity;
1297
1298         if (ODMMode == dm_odm_combine_mode_4to1)
1299                 PixelClockAfterODM = PixelClock / 4;
1300         else if (ODMMode == dm_odm_combine_mode_2to1)
1301                 PixelClockAfterODM = PixelClock / 2;
1302         else
1303                 PixelClockAfterODM = PixelClock;
1304
1305
1306         DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308                                         * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309
1310         DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312
1313         MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314
1315         if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316                 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317         else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318                 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319         else
1320                 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321
1322         return RequiredDispclk;
1323 }
1324
1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327         if (Clock <= 0.0)
1328                 return 0.0;
1329
1330         if (round_up)
1331                 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332         else
1333                 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335
1336 void dml32_CalculateOutputLink(
1337                 double PHYCLKPerState,
1338                 double PHYCLKD18PerState,
1339                 double PHYCLKD32PerState,
1340                 double Downspreading,
1341                 bool IsMainSurfaceUsingTheIndicatedTiming,
1342                 enum output_encoder_class Output,
1343                 enum output_format_class OutputFormat,
1344                 unsigned int HTotal,
1345                 unsigned int HActive,
1346                 double PixelClockBackEnd,
1347                 double ForcedOutputLinkBPP,
1348                 unsigned int DSCInputBitPerComponent,
1349                 unsigned int NumberOfDSCSlices,
1350                 double AudioSampleRate,
1351                 unsigned int AudioSampleLayout,
1352                 enum odm_combine_mode ODMModeNoDSC,
1353                 enum odm_combine_mode ODMModeDSC,
1354                 bool DSCEnable,
1355                 unsigned int OutputLinkDPLanes,
1356                 enum dm_output_link_dp_rate OutputLinkDPRate,
1357
1358                 /* Output */
1359                 bool *RequiresDSC,
1360                 double *RequiresFEC,
1361                 double  *OutBpp,
1362                 enum dm_output_type *OutputType,
1363                 enum dm_output_rate *OutputRate,
1364                 unsigned int *RequiredSlots)
1365 {
1366         bool LinkDSCEnable;
1367         unsigned int dummy;
1368         *RequiresDSC = false;
1369         *RequiresFEC = false;
1370         *OutBpp = 0;
1371         *OutputType = dm_output_type_unknown;
1372         *OutputRate = dm_output_rate_unknown;
1373
1374         if (IsMainSurfaceUsingTheIndicatedTiming) {
1375                 if (Output == dm_hdmi) {
1376                         *RequiresDSC = false;
1377                         *RequiresFEC = false;
1378                         *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379                                         PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381                                         ODMModeNoDSC, ODMModeDSC, &dummy);
1382                         //OutputTypeAndRate = "HDMI";
1383                         *OutputType = dm_output_type_hdmi;
1384
1385                 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386                         if (DSCEnable == true) {
1387                                 *RequiresDSC = true;
1388                                 LinkDSCEnable = true;
1389                                 if (Output == dm_dp || Output == dm_dp2p0)
1390                                         *RequiresFEC = true;
1391                                 else
1392                                         *RequiresFEC = false;
1393                         } else {
1394                                 *RequiresDSC = false;
1395                                 LinkDSCEnable = false;
1396                                 if (Output == dm_dp2p0)
1397                                         *RequiresFEC = true;
1398                                 else
1399                                         *RequiresFEC = false;
1400                         }
1401                         if (Output == dm_dp2p0) {
1402                                 *OutBpp = 0;
1403                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404                                                 PHYCLKD32PerState >= 10000 / 32) {
1405                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410                                         if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411                                                         ForcedOutputLinkBPP == 0) {
1412                                                 *RequiresDSC = true;
1413                                                 LinkDSCEnable = true;
1414                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417                                                                 OutputFormat, DSCInputBitPerComponent,
1418                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420                                         }
1421                                         //OutputTypeAndRate = Output & " UHBR10";
1422                                         *OutputType = dm_output_type_dp2p0;
1423                                         *OutputRate = dm_output_rate_dp_rate_uhbr10;
1424                                 }
1425                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426                                                 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432
1433                                         if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434                                                         ForcedOutputLinkBPP == 0) {
1435                                                 *RequiresDSC = true;
1436                                                 LinkDSCEnable = true;
1437                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440                                                                 OutputFormat, DSCInputBitPerComponent,
1441                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443                                         }
1444                                         //OutputTypeAndRate = Output & " UHBR13p5";
1445                                         *OutputType = dm_output_type_dp2p0;
1446                                         *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447                                 }
1448                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449                                                 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456                                                 *RequiresDSC = true;
1457                                                 LinkDSCEnable = true;
1458                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461                                                                 OutputFormat, DSCInputBitPerComponent,
1462                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464                                         }
1465                                         //OutputTypeAndRate = Output & " UHBR20";
1466                                         *OutputType = dm_output_type_dp2p0;
1467                                         *OutputRate = dm_output_rate_dp_rate_uhbr20;
1468                                 }
1469                         } else {
1470                                 *OutBpp = 0;
1471                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472                                                 PHYCLKPerState >= 270) {
1473                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478                                         if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479                                                         ForcedOutputLinkBPP == 0) {
1480                                                 *RequiresDSC = true;
1481                                                 LinkDSCEnable = true;
1482                                                 if (Output == dm_dp)
1483                                                         *RequiresFEC = true;
1484                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487                                                                 OutputFormat, DSCInputBitPerComponent,
1488                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490                                         }
1491                                         //OutputTypeAndRate = Output & " HBR";
1492                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493                                         *OutputRate = dm_output_rate_dp_rate_hbr;
1494                                 }
1495                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496                                                 *OutBpp == 0 && PHYCLKPerState >= 540) {
1497                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502
1503                                         if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504                                                         ForcedOutputLinkBPP == 0) {
1505                                                 *RequiresDSC = true;
1506                                                 LinkDSCEnable = true;
1507                                                 if (Output == dm_dp)
1508                                                         *RequiresFEC = true;
1509
1510                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513                                                                 OutputFormat, DSCInputBitPerComponent,
1514                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516                                         }
1517                                         //OutputTypeAndRate = Output & " HBR2";
1518                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519                                         *OutputRate = dm_output_rate_dp_rate_hbr2;
1520                                 }
1521                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525                                                         OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526                                                         AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527                                                         RequiredSlots);
1528
1529                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530                                                 *RequiresDSC = true;
1531                                                 LinkDSCEnable = true;
1532                                                 if (Output == dm_dp)
1533                                                         *RequiresFEC = true;
1534
1535                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538                                                                 OutputFormat, DSCInputBitPerComponent,
1539                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541                                         }
1542                                         //OutputTypeAndRate = Output & " HBR3";
1543                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544                                         *OutputRate = dm_output_rate_dp_rate_hbr3;
1545                                 }
1546                         }
1547                 }
1548         }
1549 }
1550
1551 void dml32_CalculateDPPCLK(
1552                 unsigned int NumberOfActiveSurfaces,
1553                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1554                 double DISPCLKDPPCLKVCOSpeed,
1555                 double DPPCLKUsingSingleDPP[],
1556                 unsigned int DPPPerSurface[],
1557
1558                 /* output */
1559                 double *GlobalDPPCLK,
1560                 double Dppclk[])
1561 {
1562         unsigned int k;
1563         *GlobalDPPCLK = 0;
1564         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565                 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566                 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567         }
1568         *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569         for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570                 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572
1573 double dml32_TruncToValidBPP(
1574                 double LinkBitRate,
1575                 unsigned int Lanes,
1576                 unsigned int HTotal,
1577                 unsigned int HActive,
1578                 double PixelClock,
1579                 double DesiredBPP,
1580                 bool DSCEnable,
1581                 enum output_encoder_class Output,
1582                 enum output_format_class Format,
1583                 unsigned int DSCInputBitPerComponent,
1584                 unsigned int DSCSlices,
1585                 unsigned int AudioRate,
1586                 unsigned int AudioLayout,
1587                 enum odm_combine_mode ODMModeNoDSC,
1588                 enum odm_combine_mode ODMModeDSC,
1589                 /* Output */
1590                 unsigned int *RequiredSlots)
1591 {
1592         double    MaxLinkBPP;
1593         unsigned int   MinDSCBPP;
1594         double    MaxDSCBPP;
1595         unsigned int   NonDSCBPP0;
1596         unsigned int   NonDSCBPP1;
1597         unsigned int   NonDSCBPP2;
1598         unsigned int   NonDSCBPP3;
1599
1600         if (Format == dm_420) {
1601                 NonDSCBPP0 = 12;
1602                 NonDSCBPP1 = 15;
1603                 NonDSCBPP2 = 18;
1604                 MinDSCBPP = 6;
1605                 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1606         } else if (Format == dm_444) {
1607                 NonDSCBPP0 = 18;
1608                 NonDSCBPP1 = 24;
1609                 NonDSCBPP2 = 30;
1610                 NonDSCBPP3 = 36;
1611                 MinDSCBPP = 8;
1612                 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1613         } else {
1614                 if (Output == dm_hdmi) {
1615                         NonDSCBPP0 = 24;
1616                         NonDSCBPP1 = 24;
1617                         NonDSCBPP2 = 24;
1618                 } else {
1619                         NonDSCBPP0 = 16;
1620                         NonDSCBPP1 = 20;
1621                         NonDSCBPP2 = 24;
1622                 }
1623                 if (Format == dm_n422) {
1624                         MinDSCBPP = 7;
1625                         MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1626                 } else {
1627                         MinDSCBPP = 8;
1628                         MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1629                 }
1630         }
1631         if (Output == dm_dp2p0) {
1632                 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1633         } else if (DSCEnable && Output == dm_dp) {
1634                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1635         } else {
1636                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1637         }
1638
1639         if (DSCEnable) {
1640                 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1641                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1642                 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1643                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1644                 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1645                         MaxLinkBPP = 2 * MaxLinkBPP;
1646         } else {
1647                 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1648                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1649                 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1650                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1651                 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1652                         MaxLinkBPP = 2 * MaxLinkBPP;
1653         }
1654
1655         if (DesiredBPP == 0) {
1656                 if (DSCEnable) {
1657                         if (MaxLinkBPP < MinDSCBPP)
1658                                 return BPP_INVALID;
1659                         else if (MaxLinkBPP >= MaxDSCBPP)
1660                                 return MaxDSCBPP;
1661                         else
1662                                 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1663                 } else {
1664                         if (MaxLinkBPP >= NonDSCBPP3)
1665                                 return NonDSCBPP3;
1666                         else if (MaxLinkBPP >= NonDSCBPP2)
1667                                 return NonDSCBPP2;
1668                         else if (MaxLinkBPP >= NonDSCBPP1)
1669                                 return NonDSCBPP1;
1670                         else if (MaxLinkBPP >= NonDSCBPP0)
1671                                 return 16.0;
1672                         else
1673                                 return BPP_INVALID;
1674                 }
1675         } else {
1676                 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1677                                 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1678                                 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1679                         return BPP_INVALID;
1680                 else
1681                         return DesiredBPP;
1682         }
1683
1684         *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1685
1686         return BPP_INVALID;
1687 } // TruncToValidBPP
1688
1689 double dml32_RequiredDTBCLK(
1690                 bool              DSCEnable,
1691                 double               PixelClock,
1692                 enum output_format_class  OutputFormat,
1693                 double               OutputBpp,
1694                 unsigned int              DSCSlices,
1695                 unsigned int                 HTotal,
1696                 unsigned int                 HActive,
1697                 unsigned int              AudioRate,
1698                 unsigned int              AudioLayout)
1699 {
1700         double PixelWordRate;
1701         double HCActive;
1702         double HCBlank;
1703         double AverageTribyteRate;
1704         double HActiveTribyteRate;
1705
1706         if (DSCEnable != true)
1707                 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1708
1709         PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1710         HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1711                         dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1712         HCBlank = 64 + 32 *
1713                         dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1714         AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1715         HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1716         return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1717 }
1718
1719 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1720                 enum odm_combine_mode ODMMode,
1721                 unsigned int DSCInputBitPerComponent,
1722                 double OutputBpp,
1723                 unsigned int HActive,
1724                 unsigned int HTotal,
1725                 unsigned int NumberOfDSCSlices,
1726                 enum output_format_class  OutputFormat,
1727                 enum output_encoder_class Output,
1728                 double PixelClock,
1729                 double PixelClockBackEnd,
1730                 double dsc_delay_factor_wa)
1731 {
1732         unsigned int DSCDelayRequirement_val;
1733
1734         if (DSCEnabled == true && OutputBpp != 0) {
1735                 if (ODMMode == dm_odm_combine_mode_4to1) {
1736                         DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1737                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1738                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1739                 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1740                         DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1741                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1742                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1743                 } else {
1744                         DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1745                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1746                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1747                 }
1748
1749                 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1750                                 dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1751
1752                 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1753
1754         } else {
1755                 DSCDelayRequirement_val = 0;
1756         }
1757
1758 #ifdef __DML_VBA_DEBUG__
1759         dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1760         dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1761         dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1762         dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1763         dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1764         dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1765         dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1766 #endif
1767
1768         return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1769 }
1770
1771 void dml32_CalculateSurfaceSizeInMall(
1772                 unsigned int NumberOfActiveSurfaces,
1773                 unsigned int MALLAllocatedForDCN,
1774                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1775                 bool DCCEnable[],
1776                 bool ViewportStationary[],
1777                 unsigned int ViewportXStartY[],
1778                 unsigned int ViewportYStartY[],
1779                 unsigned int ViewportXStartC[],
1780                 unsigned int ViewportYStartC[],
1781                 unsigned int ViewportWidthY[],
1782                 unsigned int ViewportHeightY[],
1783                 unsigned int BytesPerPixelY[],
1784                 unsigned int ViewportWidthC[],
1785                 unsigned int ViewportHeightC[],
1786                 unsigned int BytesPerPixelC[],
1787                 unsigned int SurfaceWidthY[],
1788                 unsigned int SurfaceWidthC[],
1789                 unsigned int SurfaceHeightY[],
1790                 unsigned int SurfaceHeightC[],
1791                 unsigned int Read256BytesBlockWidthY[],
1792                 unsigned int Read256BytesBlockWidthC[],
1793                 unsigned int Read256BytesBlockHeightY[],
1794                 unsigned int Read256BytesBlockHeightC[],
1795                 unsigned int ReadBlockWidthY[],
1796                 unsigned int ReadBlockWidthC[],
1797                 unsigned int ReadBlockHeightY[],
1798                 unsigned int ReadBlockHeightC[],
1799
1800                 /* Output */
1801                 unsigned int    SurfaceSizeInMALL[],
1802                 bool *ExceededMALLSize)
1803 {
1804         unsigned int TotalSurfaceSizeInMALL  = 0;
1805         unsigned int k;
1806
1807         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1808                 if (ViewportStationary[k]) {
1809                         SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1810                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1811                                                 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1812                                                 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1813                                                 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1814                                                 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1815                                                 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1816
1817                         if (ReadBlockWidthC[k] > 0) {
1818                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1819                                                 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1820                                                         dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1821                                                         ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1822                                                         dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1823                                                         dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1824                                                         dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1825                                                         ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1826                                                         dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1827                                                         BytesPerPixelC[k];
1828                         }
1829                         if (DCCEnable[k] == true) {
1830                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1831                                                 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1832                                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1833                                                         Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1834                                                         - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1835                                                         * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1836                                                         Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1837                                                         ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1838                                                         Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1839                                                         * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1840                                 if (Read256BytesBlockWidthC[k] > 0) {
1841                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1842                                                         dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1843                                                                 Read256BytesBlockWidthC[k]),
1844                                                                 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1845                                                                 * Read256BytesBlockWidthC[k] - 1, 8 *
1846                                                                 Read256BytesBlockWidthC[k]) -
1847                                                                 dml_floor(ViewportXStartC[k], 8 *
1848                                                                 Read256BytesBlockWidthC[k])) *
1849                                                                 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1850                                                                 Read256BytesBlockHeightC[k]),
1851                                                                 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1852                                                                 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1853                                                                 Read256BytesBlockHeightC[k]) -
1854                                                                 dml_floor(ViewportYStartC[k], 8 *
1855                                                                 Read256BytesBlockHeightC[k])) *
1856                                                                 BytesPerPixelC[k] / 256;
1857                                 }
1858                         }
1859                 } else {
1860                         SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1861                                         ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1862                                         dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1863                                                         ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1864                                                         BytesPerPixelY[k];
1865                         if (ReadBlockWidthC[k] > 0) {
1866                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1867                                                 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1868                                                                 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1869                                                 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1870                                                                 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1871                                                                 BytesPerPixelC[k];
1872                         }
1873                         if (DCCEnable[k] == true) {
1874                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1875                                                 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1876                                                                 Read256BytesBlockWidthY[k] - 1), 8 *
1877                                                                 Read256BytesBlockWidthY[k]) *
1878                                                 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1879                                                                 Read256BytesBlockHeightY[k] - 1), 8 *
1880                                                                 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1881
1882                                 if (Read256BytesBlockWidthC[k] > 0) {
1883                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1884                                                         dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1885                                                                         Read256BytesBlockWidthC[k] - 1), 8 *
1886                                                                         Read256BytesBlockWidthC[k]) *
1887                                                         dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1888                                                                         Read256BytesBlockHeightC[k] - 1), 8 *
1889                                                                         Read256BytesBlockHeightC[k]) *
1890                                                                         BytesPerPixelC[k] / 256;
1891                                 }
1892                         }
1893                 }
1894         }
1895
1896         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1897                 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1898                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1899         }
1900         *ExceededMALLSize =  (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
1901 } // CalculateSurfaceSizeInMall
1902
1903 void dml32_CalculateVMRowAndSwath(
1904                 unsigned int NumberOfActiveSurfaces,
1905                 DmlPipe myPipe[],
1906                 unsigned int SurfaceSizeInMALL[],
1907                 unsigned int PTEBufferSizeInRequestsLuma,
1908                 unsigned int PTEBufferSizeInRequestsChroma,
1909                 unsigned int DCCMetaBufferSizeBytes,
1910                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1911                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1912                 unsigned int MALLAllocatedForDCN,
1913                 double SwathWidthY[],
1914                 double SwathWidthC[],
1915                 bool GPUVMEnable,
1916                 bool HostVMEnable,
1917                 unsigned int HostVMMaxNonCachedPageTableLevels,
1918                 unsigned int GPUVMMaxPageTableLevels,
1919                 unsigned int GPUVMMinPageSizeKBytes[],
1920                 unsigned int HostVMMinPageSize,
1921
1922                 /* Output */
1923                 bool PTEBufferSizeNotExceeded[],
1924                 bool DCCMetaBufferSizeNotExceeded[],
1925                 unsigned int dpte_row_width_luma_ub[],
1926                 unsigned int dpte_row_width_chroma_ub[],
1927                 unsigned int dpte_row_height_luma[],
1928                 unsigned int dpte_row_height_chroma[],
1929                 unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1930                 unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1931                 unsigned int meta_req_width[],
1932                 unsigned int meta_req_width_chroma[],
1933                 unsigned int meta_req_height[],
1934                 unsigned int meta_req_height_chroma[],
1935                 unsigned int meta_row_width[],
1936                 unsigned int meta_row_width_chroma[],
1937                 unsigned int meta_row_height[],
1938                 unsigned int meta_row_height_chroma[],
1939                 unsigned int vm_group_bytes[],
1940                 unsigned int dpte_group_bytes[],
1941                 unsigned int PixelPTEReqWidthY[],
1942                 unsigned int PixelPTEReqHeightY[],
1943                 unsigned int PTERequestSizeY[],
1944                 unsigned int PixelPTEReqWidthC[],
1945                 unsigned int PixelPTEReqHeightC[],
1946                 unsigned int PTERequestSizeC[],
1947                 unsigned int dpde0_bytes_per_frame_ub_l[],
1948                 unsigned int meta_pte_bytes_per_frame_ub_l[],
1949                 unsigned int dpde0_bytes_per_frame_ub_c[],
1950                 unsigned int meta_pte_bytes_per_frame_ub_c[],
1951                 double PrefetchSourceLinesY[],
1952                 double PrefetchSourceLinesC[],
1953                 double VInitPreFillY[],
1954                 double VInitPreFillC[],
1955                 unsigned int MaxNumSwathY[],
1956                 unsigned int MaxNumSwathC[],
1957                 double meta_row_bw[],
1958                 double dpte_row_bw[],
1959                 double PixelPTEBytesPerRow[],
1960                 double PDEAndMetaPTEBytesFrame[],
1961                 double MetaRowByte[],
1962                 bool use_one_row_for_frame[],
1963                 bool use_one_row_for_frame_flip[],
1964                 bool UsesMALLForStaticScreen[],
1965                 bool PTE_BUFFER_MODE[],
1966                 unsigned int BIGK_FRAGMENT_SIZE[])
1967 {
1968         unsigned int k;
1969         unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1970         unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1971         unsigned int PDEAndMetaPTEBytesFrameY;
1972         unsigned int PDEAndMetaPTEBytesFrameC;
1973         unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1974         unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1975         unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1976         unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1977         unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1978         unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1979         unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1980         unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1981         unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1982         unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1983         bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1984
1985         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1986                 if (HostVMEnable == true) {
1987                         vm_group_bytes[k] = 512;
1988                         dpte_group_bytes[k] = 512;
1989                 } else if (GPUVMEnable == true) {
1990                         vm_group_bytes[k] = 2048;
1991                         if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1992                                 dpte_group_bytes[k] = 512;
1993                         else
1994                                 dpte_group_bytes[k] = 2048;
1995                 } else {
1996                         vm_group_bytes[k] = 0;
1997                         dpte_group_bytes[k] = 0;
1998                 }
1999
2000                 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2001                                 myPipe[k].SourcePixelFormat == dm_420_12 ||
2002                                 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2003                         if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2004                                         !IsVertical(myPipe[k].SourceRotation)) {
2005                                 PTEBufferSizeInRequestsForLuma[k] =
2006                                                 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2007                                 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2008                         } else {
2009                                 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2010                                 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2011                         }
2012
2013                         PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2014                                         myPipe[k].ViewportStationary,
2015                                         myPipe[k].DCCEnable,
2016                                         myPipe[k].DPPPerSurface,
2017                                         myPipe[k].BlockHeight256BytesC,
2018                                         myPipe[k].BlockWidth256BytesC,
2019                                         myPipe[k].SourcePixelFormat,
2020                                         myPipe[k].SurfaceTiling,
2021                                         myPipe[k].BytePerPixelC,
2022                                         myPipe[k].SourceRotation,
2023                                         SwathWidthC[k],
2024                                         myPipe[k].ViewportHeightChroma,
2025                                         myPipe[k].ViewportXStartC,
2026                                         myPipe[k].ViewportYStartC,
2027                                         GPUVMEnable,
2028                                         HostVMEnable,
2029                                         HostVMMaxNonCachedPageTableLevels,
2030                                         GPUVMMaxPageTableLevels,
2031                                         GPUVMMinPageSizeKBytes[k],
2032                                         HostVMMinPageSize,
2033                                         PTEBufferSizeInRequestsForChroma[k],
2034                                         myPipe[k].PitchC,
2035                                         myPipe[k].DCCMetaPitchC,
2036                                         myPipe[k].BlockWidthC,
2037                                         myPipe[k].BlockHeightC,
2038
2039                                         /* Output */
2040                                         &MetaRowByteC[k],
2041                                         &PixelPTEBytesPerRowC[k],
2042                                         &dpte_row_width_chroma_ub[k],
2043                                         &dpte_row_height_chroma[k],
2044                                         &dpte_row_height_linear_chroma[k],
2045                                         &PixelPTEBytesPerRowC_one_row_per_frame[k],
2046                                         &dpte_row_width_chroma_ub_one_row_per_frame[k],
2047                                         &dpte_row_height_chroma_one_row_per_frame[k],
2048                                         &meta_req_width_chroma[k],
2049                                         &meta_req_height_chroma[k],
2050                                         &meta_row_width_chroma[k],
2051                                         &meta_row_height_chroma[k],
2052                                         &PixelPTEReqWidthC[k],
2053                                         &PixelPTEReqHeightC[k],
2054                                         &PTERequestSizeC[k],
2055                                         &dpde0_bytes_per_frame_ub_c[k],
2056                                         &meta_pte_bytes_per_frame_ub_c[k]);
2057
2058                         PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2059                                         myPipe[k].VRatioChroma,
2060                                         myPipe[k].VTapsChroma,
2061                                         myPipe[k].InterlaceEnable,
2062                                         myPipe[k].ProgressiveToInterlaceUnitInOPP,
2063                                         myPipe[k].SwathHeightC,
2064                                         myPipe[k].SourceRotation,
2065                                         myPipe[k].ViewportStationary,
2066                                         SwathWidthC[k],
2067                                         myPipe[k].ViewportHeightChroma,
2068                                         myPipe[k].ViewportXStartC,
2069                                         myPipe[k].ViewportYStartC,
2070
2071                                         /* Output */
2072                                         &VInitPreFillC[k],
2073                                         &MaxNumSwathC[k]);
2074                 } else {
2075                         PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2076                         PTEBufferSizeInRequestsForChroma[k] = 0;
2077                         PixelPTEBytesPerRowC[k] = 0;
2078                         PDEAndMetaPTEBytesFrameC = 0;
2079                         MetaRowByteC[k] = 0;
2080                         MaxNumSwathC[k] = 0;
2081                         PrefetchSourceLinesC[k] = 0;
2082                         dpte_row_height_chroma_one_row_per_frame[k] = 0;
2083                         dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2084                         PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2085                 }
2086
2087                 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2088                                 myPipe[k].ViewportStationary,
2089                                 myPipe[k].DCCEnable,
2090                                 myPipe[k].DPPPerSurface,
2091                                 myPipe[k].BlockHeight256BytesY,
2092                                 myPipe[k].BlockWidth256BytesY,
2093                                 myPipe[k].SourcePixelFormat,
2094                                 myPipe[k].SurfaceTiling,
2095                                 myPipe[k].BytePerPixelY,
2096                                 myPipe[k].SourceRotation,
2097                                 SwathWidthY[k],
2098                                 myPipe[k].ViewportHeight,
2099                                 myPipe[k].ViewportXStart,
2100                                 myPipe[k].ViewportYStart,
2101                                 GPUVMEnable,
2102                                 HostVMEnable,
2103                                 HostVMMaxNonCachedPageTableLevels,
2104                                 GPUVMMaxPageTableLevels,
2105                                 GPUVMMinPageSizeKBytes[k],
2106                                 HostVMMinPageSize,
2107                                 PTEBufferSizeInRequestsForLuma[k],
2108                                 myPipe[k].PitchY,
2109                                 myPipe[k].DCCMetaPitchY,
2110                                 myPipe[k].BlockWidthY,
2111                                 myPipe[k].BlockHeightY,
2112
2113                                 /* Output */
2114                                 &MetaRowByteY[k],
2115                                 &PixelPTEBytesPerRowY[k],
2116                                 &dpte_row_width_luma_ub[k],
2117                                 &dpte_row_height_luma[k],
2118                                 &dpte_row_height_linear_luma[k],
2119                                 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2120                                 &dpte_row_width_luma_ub_one_row_per_frame[k],
2121                                 &dpte_row_height_luma_one_row_per_frame[k],
2122                                 &meta_req_width[k],
2123                                 &meta_req_height[k],
2124                                 &meta_row_width[k],
2125                                 &meta_row_height[k],
2126                                 &PixelPTEReqWidthY[k],
2127                                 &PixelPTEReqHeightY[k],
2128                                 &PTERequestSizeY[k],
2129                                 &dpde0_bytes_per_frame_ub_l[k],
2130                                 &meta_pte_bytes_per_frame_ub_l[k]);
2131
2132                 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2133                                 myPipe[k].VRatio,
2134                                 myPipe[k].VTaps,
2135                                 myPipe[k].InterlaceEnable,
2136                                 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2137                                 myPipe[k].SwathHeightY,
2138                                 myPipe[k].SourceRotation,
2139                                 myPipe[k].ViewportStationary,
2140                                 SwathWidthY[k],
2141                                 myPipe[k].ViewportHeight,
2142                                 myPipe[k].ViewportXStart,
2143                                 myPipe[k].ViewportYStart,
2144
2145                                 /* Output */
2146                                 &VInitPreFillY[k],
2147                                 &MaxNumSwathY[k]);
2148
2149                 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2150                 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2151
2152                 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2153                                 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2154                         PTEBufferSizeNotExceeded[k] = true;
2155                 } else {
2156                         PTEBufferSizeNotExceeded[k] = false;
2157                 }
2158
2159                 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2160                         PTEBufferSizeInRequestsForLuma[k] &&
2161                         PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2162         }
2163
2164         dml32_CalculateMALLUseForStaticScreen(
2165                         NumberOfActiveSurfaces,
2166                         MALLAllocatedForDCN,
2167                         UseMALLForStaticScreen,   // mode
2168                         SurfaceSizeInMALL,
2169                         one_row_per_frame_fits_in_buffer,
2170                         /* Output */
2171                         UsesMALLForStaticScreen); // boolen
2172
2173         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2174                 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2175                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2176                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2177                                 (GPUVMMinPageSizeKBytes[k] > 64);
2178                 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2179         }
2180
2181         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2182 #ifdef __DML_VBA_DEBUG__
2183                 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2184                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2185 #endif
2186                 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2187                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2188                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2189                                 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2190
2191                 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2192                                 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2193
2194                 if (use_one_row_for_frame[k]) {
2195                         dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2196                         dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2197                         PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2198                         dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2199                         dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2200                         PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2201                         PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2202                 }
2203
2204                 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2205                         DCCMetaBufferSizeNotExceeded[k] = true;
2206                 else
2207                         DCCMetaBufferSizeNotExceeded[k] = false;
2208
2209                 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2210                 if (use_one_row_for_frame[k])
2211                         PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2212
2213                 dml32_CalculateRowBandwidth(
2214                                 GPUVMEnable,
2215                                 myPipe[k].SourcePixelFormat,
2216                                 myPipe[k].VRatio,
2217                                 myPipe[k].VRatioChroma,
2218                                 myPipe[k].DCCEnable,
2219                                 myPipe[k].HTotal / myPipe[k].PixelClock,
2220                                 MetaRowByteY[k], MetaRowByteC[k],
2221                                 meta_row_height[k],
2222                                 meta_row_height_chroma[k],
2223                                 PixelPTEBytesPerRowY[k],
2224                                 PixelPTEBytesPerRowC[k],
2225                                 dpte_row_height_luma[k],
2226                                 dpte_row_height_chroma[k],
2227
2228                                 /* Output */
2229                                 &meta_row_bw[k],
2230                                 &dpte_row_bw[k]);
2231 #ifdef __DML_VBA_DEBUG__
2232                 dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2233                 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2234                                 __func__, k, use_one_row_for_frame_flip[k]);
2235                 dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2236                                 __func__, k, UseMALLForPStateChange[k]);
2237                 dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2238                 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2239                                 __func__, k, dpte_row_width_luma_ub[k]);
2240                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2241                 dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2242                                 __func__, k, dpte_row_height_chroma[k]);
2243                 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2244                                 __func__, k, dpte_row_width_chroma_ub[k]);
2245                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2246                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2247                 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2248                                 __func__, k, PTEBufferSizeNotExceeded[k]);
2249                 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2250                 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2251 #endif
2252         }
2253 } // CalculateVMRowAndSwath
2254
2255 unsigned int dml32_CalculateVMAndRowBytes(
2256                 bool ViewportStationary,
2257                 bool DCCEnable,
2258                 unsigned int NumberOfDPPs,
2259                 unsigned int BlockHeight256Bytes,
2260                 unsigned int BlockWidth256Bytes,
2261                 enum source_format_class SourcePixelFormat,
2262                 unsigned int SurfaceTiling,
2263                 unsigned int BytePerPixel,
2264                 enum dm_rotation_angle SourceRotation,
2265                 double SwathWidth,
2266                 unsigned int ViewportHeight,
2267                 unsigned int    ViewportXStart,
2268                 unsigned int    ViewportYStart,
2269                 bool GPUVMEnable,
2270                 bool HostVMEnable,
2271                 unsigned int HostVMMaxNonCachedPageTableLevels,
2272                 unsigned int GPUVMMaxPageTableLevels,
2273                 unsigned int GPUVMMinPageSizeKBytes,
2274                 unsigned int HostVMMinPageSize,
2275                 unsigned int PTEBufferSizeInRequests,
2276                 unsigned int Pitch,
2277                 unsigned int DCCMetaPitch,
2278                 unsigned int MacroTileWidth,
2279                 unsigned int MacroTileHeight,
2280
2281                 /* Output */
2282                 unsigned int *MetaRowByte,
2283                 unsigned int *PixelPTEBytesPerRow,
2284                 unsigned int    *dpte_row_width_ub,
2285                 unsigned int *dpte_row_height,
2286                 unsigned int *dpte_row_height_linear,
2287                 unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2288                 unsigned int    *dpte_row_width_ub_one_row_per_frame,
2289                 unsigned int    *dpte_row_height_one_row_per_frame,
2290                 unsigned int *MetaRequestWidth,
2291                 unsigned int *MetaRequestHeight,
2292                 unsigned int *meta_row_width,
2293                 unsigned int *meta_row_height,
2294                 unsigned int *PixelPTEReqWidth,
2295                 unsigned int *PixelPTEReqHeight,
2296                 unsigned int *PTERequestSize,
2297                 unsigned int    *DPDE0BytesFrame,
2298                 unsigned int    *MetaPTEBytesFrame)
2299 {
2300         unsigned int MPDEBytesFrame;
2301         unsigned int DCCMetaSurfaceBytes;
2302         unsigned int ExtraDPDEBytesFrame;
2303         unsigned int PDEAndMetaPTEBytesFrame;
2304         unsigned int HostVMDynamicLevels = 0;
2305         unsigned int    MacroTileSizeBytes;
2306         unsigned int    vp_height_meta_ub;
2307         unsigned int    vp_height_dpte_ub;
2308         unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2309
2310         if (GPUVMEnable == true && HostVMEnable == true) {
2311                 if (HostVMMinPageSize < 2048)
2312                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2313                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2314                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2315                 else
2316                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2317         }
2318
2319         *MetaRequestHeight = 8 * BlockHeight256Bytes;
2320         *MetaRequestWidth = 8 * BlockWidth256Bytes;
2321         if (SurfaceTiling == dm_sw_linear) {
2322                 *meta_row_height = 32;
2323                 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2324                                 - dml_floor(ViewportXStart, *MetaRequestWidth);
2325         } else if (!IsVertical(SourceRotation)) {
2326                 *meta_row_height = *MetaRequestHeight;
2327                 if (ViewportStationary && NumberOfDPPs == 1) {
2328                         *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2329                                         *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2330                 } else {
2331                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2332                 }
2333                 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2334         } else {
2335                 *meta_row_height = *MetaRequestWidth;
2336                 if (ViewportStationary && NumberOfDPPs == 1) {
2337                         *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2338                                         *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2339                 } else {
2340                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2341                 }
2342                 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2343         }
2344
2345         if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2346                 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2347                                 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2348         } else if (!IsVertical(SourceRotation)) {
2349                 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2350         } else {
2351                 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2352         }
2353
2354         DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2355
2356         if (GPUVMEnable == true) {
2357                 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2358                                 (8 * 4.0 * 1024), 1) + 1) * 64;
2359                 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2360         } else {
2361                 *MetaPTEBytesFrame = 0;
2362                 MPDEBytesFrame = 0;
2363         }
2364
2365         if (DCCEnable != true) {
2366                 *MetaPTEBytesFrame = 0;
2367                 MPDEBytesFrame = 0;
2368                 *MetaRowByte = 0;
2369         }
2370
2371         MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2372
2373         if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2374                 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2375                         vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2376                                         MacroTileHeight - 1, MacroTileHeight) -
2377                                         dml_floor(ViewportYStart, MacroTileHeight);
2378                 } else if (!IsVertical(SourceRotation)) {
2379                         vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2380                 } else {
2381                         vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2382                 }
2383                 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2384                                 (8 * 2097152), 1) + 1);
2385                 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2386         } else {
2387                 *DPDE0BytesFrame = 0;
2388                 ExtraDPDEBytesFrame = 0;
2389                 vp_height_dpte_ub = 0;
2390         }
2391
2392         PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2393
2394 #ifdef __DML_VBA_DEBUG__
2395         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2396         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2397         dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2398         dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2399         dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2400         dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2401         dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2402         dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2403         dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2404         dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2405         dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2406         dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2407         dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2408         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2409         dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2410         dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2411         dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2412 #endif
2413
2414         if (HostVMEnable == true)
2415                 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2416
2417         if (SurfaceTiling == dm_sw_linear) {
2418                 *PixelPTEReqHeight = 1;
2419                 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2420                 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2421                 *PTERequestSize = 64;
2422         } else if (GPUVMMinPageSizeKBytes == 4) {
2423                 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2424                 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2425                 *PTERequestSize = 128;
2426         } else {
2427                 *PixelPTEReqHeight = MacroTileHeight;
2428                 *PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2429                 *PTERequestSize = 64;
2430         }
2431 #ifdef __DML_VBA_DEBUG__
2432         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2433         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2434         dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2435         dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2436         dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2437         dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2438         dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2439 #endif
2440
2441         *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2442         *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2443                         (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2444                                         (double) *PixelPTEReqWidth;
2445         *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2446                         *PTERequestSize;
2447
2448         if (SurfaceTiling == dm_sw_linear) {
2449                 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2450                                 *PixelPTEReqWidth / Pitch), 1));
2451 #ifdef __DML_VBA_DEBUG__
2452                 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2453                                 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2454                 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2455                                 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2456                 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2457                                 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2458                 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2459                                 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2460                                                 *PixelPTEReqWidth / Pitch), 1));
2461                 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2462 #endif
2463                 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2464                                 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2465                 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2466
2467                 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2468                 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2469                                 PixelPTEReqWidth_linear / Pitch), 1);
2470                 if (*dpte_row_height_linear > 128)
2471                         *dpte_row_height_linear = 128;
2472
2473         } else if (!IsVertical(SourceRotation)) {
2474                 *dpte_row_height = *PixelPTEReqHeight;
2475
2476                 if (GPUVMMinPageSizeKBytes > 64) {
2477                         *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2478                                         *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2479                 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2480                         *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2481                                         *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2482                                         dml_floor(ViewportXStart, *PixelPTEReqWidth);
2483                 } else {
2484                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2485                                         *PixelPTEReqWidth;
2486                 }
2487
2488                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2489         } else {
2490                 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2491
2492                 if (ViewportStationary && (NumberOfDPPs == 1)) {
2493                         *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2494                                         *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2495                 } else {
2496                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2497                                         * *PixelPTEReqHeight;
2498                 }
2499
2500                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2501         }
2502
2503         if (GPUVMEnable != true)
2504                 *PixelPTEBytesPerRow = 0;
2505         if (HostVMEnable == true)
2506                 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2507
2508 #ifdef __DML_VBA_DEBUG__
2509         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2510         dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2511         dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2512         dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2513         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2514         dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2515         dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2516         dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2517                         __func__, *dpte_row_width_ub_one_row_per_frame);
2518         dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2519                         __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2520         dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2521                         *MetaPTEBytesFrame);
2522 #endif
2523
2524         return PDEAndMetaPTEBytesFrame;
2525 } // CalculateVMAndRowBytes
2526
2527 double dml32_CalculatePrefetchSourceLines(
2528                 double VRatio,
2529                 unsigned int VTaps,
2530                 bool Interlace,
2531                 bool ProgressiveToInterlaceUnitInOPP,
2532                 unsigned int SwathHeight,
2533                 enum dm_rotation_angle SourceRotation,
2534                 bool ViewportStationary,
2535                 double SwathWidth,
2536                 unsigned int ViewportHeight,
2537                 unsigned int ViewportXStart,
2538                 unsigned int ViewportYStart,
2539
2540                 /* Output */
2541                 double *VInitPreFill,
2542                 unsigned int *MaxNumSwath)
2543 {
2544
2545         unsigned int vp_start_rot;
2546         unsigned int sw0_tmp;
2547         unsigned int MaxPartialSwath;
2548         double numLines;
2549
2550 #ifdef __DML_VBA_DEBUG__
2551         dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2552         dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2553         dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2554         dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2555         dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2556         dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2557 #endif
2558         if (ProgressiveToInterlaceUnitInOPP)
2559                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2560         else
2561                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2562
2563         if (ViewportStationary) {
2564                 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2565                         vp_start_rot = SwathHeight -
2566                                         (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2567                 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2568                         vp_start_rot = ViewportXStart;
2569                 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2570                         vp_start_rot = SwathHeight -
2571                                         (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2572                 } else {
2573                         vp_start_rot = ViewportYStart;
2574                 }
2575                 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2576                 if (sw0_tmp < *VInitPreFill)
2577                         *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2578                 else
2579                         *MaxNumSwath = 1;
2580                 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2581         } else {
2582                 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2583                 if (*VInitPreFill > 1)
2584                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2585                 else
2586                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2587         }
2588         numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2589
2590 #ifdef __DML_VBA_DEBUG__
2591         dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2592         dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2593         dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2594         dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2595         dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2596 #endif
2597         return numLines;
2598
2599 } // CalculatePrefetchSourceLines
2600
2601 void dml32_CalculateMALLUseForStaticScreen(
2602                 unsigned int NumberOfActiveSurfaces,
2603                 unsigned int MALLAllocatedForDCNFinal,
2604                 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2605                 unsigned int SurfaceSizeInMALL[],
2606                 bool one_row_per_frame_fits_in_buffer[],
2607
2608                 /* output */
2609                 bool UsesMALLForStaticScreen[])
2610 {
2611         unsigned int k;
2612         unsigned int SurfaceToAddToMALL;
2613         bool CanAddAnotherSurfaceToMALL;
2614         unsigned int TotalSurfaceSizeInMALL;
2615
2616         TotalSurfaceSizeInMALL = 0;
2617         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2618                 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2619                 if (UsesMALLForStaticScreen[k])
2620                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2621 #ifdef __DML_VBA_DEBUG__
2622                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2623                 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2624 #endif
2625         }
2626
2627         SurfaceToAddToMALL = 0;
2628         CanAddAnotherSurfaceToMALL = true;
2629         while (CanAddAnotherSurfaceToMALL) {
2630                 CanAddAnotherSurfaceToMALL = false;
2631                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2632                         if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2633                                         !UsesMALLForStaticScreen[k] &&
2634                                         UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2635                                         one_row_per_frame_fits_in_buffer[k] &&
2636                                         (!CanAddAnotherSurfaceToMALL ||
2637                                         SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2638                                 CanAddAnotherSurfaceToMALL = true;
2639                                 SurfaceToAddToMALL = k;
2640 #ifdef __DML_VBA_DEBUG__
2641                                 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2642                                                 __func__, k, UseMALLForStaticScreen[k]);
2643 #endif
2644                         }
2645                 }
2646                 if (CanAddAnotherSurfaceToMALL) {
2647                         UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2648                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2649
2650 #ifdef __DML_VBA_DEBUG__
2651                         dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2652                         dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2653 #endif
2654
2655                 }
2656         }
2657 }
2658
2659 void dml32_CalculateRowBandwidth(
2660                 bool GPUVMEnable,
2661                 enum source_format_class SourcePixelFormat,
2662                 double VRatio,
2663                 double VRatioChroma,
2664                 bool DCCEnable,
2665                 double LineTime,
2666                 unsigned int MetaRowByteLuma,
2667                 unsigned int MetaRowByteChroma,
2668                 unsigned int meta_row_height_luma,
2669                 unsigned int meta_row_height_chroma,
2670                 unsigned int PixelPTEBytesPerRowLuma,
2671                 unsigned int PixelPTEBytesPerRowChroma,
2672                 unsigned int dpte_row_height_luma,
2673                 unsigned int dpte_row_height_chroma,
2674                 /* Output */
2675                 double *meta_row_bw,
2676                 double *dpte_row_bw)
2677 {
2678         if (DCCEnable != true) {
2679                 *meta_row_bw = 0;
2680         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2681                         SourcePixelFormat == dm_rgbe_alpha) {
2682                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2683                                 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2684         } else {
2685                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2686         }
2687
2688         if (GPUVMEnable != true) {
2689                 *dpte_row_bw = 0;
2690         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2691                         SourcePixelFormat == dm_rgbe_alpha) {
2692                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2693                                 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2694         } else {
2695                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2696         }
2697 }
2698
2699 double dml32_CalculateUrgentLatency(
2700                 double UrgentLatencyPixelDataOnly,
2701                 double UrgentLatencyPixelMixedWithVMData,
2702                 double UrgentLatencyVMDataOnly,
2703                 bool   DoUrgentLatencyAdjustment,
2704                 double UrgentLatencyAdjustmentFabricClockComponent,
2705                 double UrgentLatencyAdjustmentFabricClockReference,
2706                 double FabricClock)
2707 {
2708         double   ret;
2709
2710         ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2711         if (DoUrgentLatencyAdjustment == true) {
2712                 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2713                                 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2714         }
2715         return ret;
2716 }
2717
2718 void dml32_CalculateUrgentBurstFactor(
2719                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2720                 unsigned int    swath_width_luma_ub,
2721                 unsigned int    swath_width_chroma_ub,
2722                 unsigned int SwathHeightY,
2723                 unsigned int SwathHeightC,
2724                 double  LineTime,
2725                 double  UrgentLatency,
2726                 double  CursorBufferSize,
2727                 unsigned int CursorWidth,
2728                 unsigned int CursorBPP,
2729                 double  VRatio,
2730                 double  VRatioC,
2731                 double  BytePerPixelInDETY,
2732                 double  BytePerPixelInDETC,
2733                 unsigned int    DETBufferSizeY,
2734                 unsigned int    DETBufferSizeC,
2735                 /* Output */
2736                 double *UrgentBurstFactorCursor,
2737                 double *UrgentBurstFactorLuma,
2738                 double *UrgentBurstFactorChroma,
2739                 bool   *NotEnoughUrgentLatencyHiding)
2740 {
2741         double       LinesInDETLuma;
2742         double       LinesInDETChroma;
2743         unsigned int LinesInCursorBuffer;
2744         double       CursorBufferSizeInTime;
2745         double       DETBufferSizeInTimeLuma;
2746         double       DETBufferSizeInTimeChroma;
2747
2748         *NotEnoughUrgentLatencyHiding = 0;
2749
2750         if (CursorWidth > 0) {
2751                 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2752                                 (CursorWidth * CursorBPP / 8.0)), 1.0);
2753                 if (VRatio > 0) {
2754                         CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2755                         if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2756                                 *NotEnoughUrgentLatencyHiding = 1;
2757                                 *UrgentBurstFactorCursor = 0;
2758                         } else {
2759                                 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2760                                                 (CursorBufferSizeInTime - UrgentLatency);
2761                         }
2762                 } else {
2763                         *UrgentBurstFactorCursor = 1;
2764                 }
2765         }
2766
2767         LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2768                         DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2769
2770         if (VRatio > 0) {
2771                 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2772                 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2773                         *NotEnoughUrgentLatencyHiding = 1;
2774                         *UrgentBurstFactorLuma = 0;
2775                 } else {
2776                         *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2777                 }
2778         } else {
2779                 *UrgentBurstFactorLuma = 1;
2780         }
2781
2782         if (BytePerPixelInDETC > 0) {
2783                 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2784                                         1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2785                                         / swath_width_chroma_ub;
2786
2787                 if (VRatio > 0) {
2788                         DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2789                         if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2790                                 *NotEnoughUrgentLatencyHiding = 1;
2791                                 *UrgentBurstFactorChroma = 0;
2792                         } else {
2793                                 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2794                                                 / (DETBufferSizeInTimeChroma - UrgentLatency);
2795                         }
2796                 } else {
2797                         *UrgentBurstFactorChroma = 1;
2798                 }
2799         }
2800 } // CalculateUrgentBurstFactor
2801
2802 void dml32_CalculateDCFCLKDeepSleep(
2803                 unsigned int NumberOfActiveSurfaces,
2804                 unsigned int BytePerPixelY[],
2805                 unsigned int BytePerPixelC[],
2806                 double VRatio[],
2807                 double VRatioChroma[],
2808                 double SwathWidthY[],
2809                 double SwathWidthC[],
2810                 unsigned int DPPPerSurface[],
2811                 double HRatio[],
2812                 double HRatioChroma[],
2813                 double PixelClock[],
2814                 double PSCL_THROUGHPUT[],
2815                 double PSCL_THROUGHPUT_CHROMA[],
2816                 double Dppclk[],
2817                 double ReadBandwidthLuma[],
2818                 double ReadBandwidthChroma[],
2819                 unsigned int ReturnBusWidth,
2820
2821                 /* Output */
2822                 double *DCFClkDeepSleep)
2823 {
2824         unsigned int k;
2825         double   DisplayPipeLineDeliveryTimeLuma;
2826         double   DisplayPipeLineDeliveryTimeChroma;
2827         double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2828         double ReadBandwidth = 0.0;
2829
2830         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2831
2832                 if (VRatio[k] <= 1) {
2833                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2834                                         / PixelClock[k];
2835                 } else {
2836                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2837                 }
2838                 if (BytePerPixelC[k] == 0) {
2839                         DisplayPipeLineDeliveryTimeChroma = 0;
2840                 } else {
2841                         if (VRatioChroma[k] <= 1) {
2842                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2843                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2844                         } else {
2845                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2846                                                 / Dppclk[k];
2847                         }
2848                 }
2849
2850                 if (BytePerPixelC[k] > 0) {
2851                         DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2852                                         BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2853                                         __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2854                                         32.0 / DisplayPipeLineDeliveryTimeChroma);
2855                 } else {
2856                         DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2857                                         64.0 / DisplayPipeLineDeliveryTimeLuma;
2858                 }
2859                 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2860
2861 #ifdef __DML_VBA_DEBUG__
2862                 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2863                 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2864 #endif
2865         }
2866
2867         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2868                 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2869
2870         *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2871
2872 #ifdef __DML_VBA_DEBUG__
2873         dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2874         dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2875         dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2876         dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2877 #endif
2878
2879         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2880                 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2881 #ifdef __DML_VBA_DEBUG__
2882         dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2883 #endif
2884 } // CalculateDCFCLKDeepSleep
2885
2886 double dml32_CalculateWriteBackDelay(
2887                 enum source_format_class WritebackPixelFormat,
2888                 double WritebackHRatio,
2889                 double WritebackVRatio,
2890                 unsigned int WritebackVTaps,
2891                 unsigned int         WritebackDestinationWidth,
2892                 unsigned int         WritebackDestinationHeight,
2893                 unsigned int         WritebackSourceHeight,
2894                 unsigned int HTotal)
2895 {
2896         double CalculateWriteBackDelay;
2897         double Line_length;
2898         double Output_lines_last_notclamped;
2899         double WritebackVInit;
2900
2901         WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2902         Line_length = dml_max((double) WritebackDestinationWidth,
2903                         dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2904         Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2905                         dml_ceil(((double)WritebackSourceHeight -
2906                                         (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2907         if (Output_lines_last_notclamped < 0) {
2908                 CalculateWriteBackDelay = 0;
2909         } else {
2910                 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2911                                 (HTotal - WritebackDestinationWidth) + 80;
2912         }
2913         return CalculateWriteBackDelay;
2914 }
2915
2916 void dml32_UseMinimumDCFCLK(
2917                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2918                 bool DRRDisplay[],
2919                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2920                 unsigned int MaxInterDCNTileRepeaters,
2921                 unsigned int MaxPrefetchMode,
2922                 double DRAMClockChangeLatencyFinal,
2923                 double FCLKChangeLatency,
2924                 double SREnterPlusExitTime,
2925                 unsigned int ReturnBusWidth,
2926                 unsigned int RoundTripPingLatencyCycles,
2927                 unsigned int ReorderingBytes,
2928                 unsigned int PixelChunkSizeInKByte,
2929                 unsigned int MetaChunkSize,
2930                 bool GPUVMEnable,
2931                 unsigned int GPUVMMaxPageTableLevels,
2932                 bool HostVMEnable,
2933                 unsigned int NumberOfActiveSurfaces,
2934                 double HostVMMinPageSize,
2935                 unsigned int HostVMMaxNonCachedPageTableLevels,
2936                 bool DynamicMetadataVMEnabled,
2937                 bool ImmediateFlipRequirement,
2938                 bool ProgressiveToInterlaceUnitInOPP,
2939                 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2940                 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2941                 unsigned int VTotal[],
2942                 unsigned int VActive[],
2943                 unsigned int DynamicMetadataTransmittedBytes[],
2944                 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2945                 bool Interlace[],
2946                 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2947                 double RequiredDISPCLK[][2],
2948                 double UrgLatency[],
2949                 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2950                 double ProjectedDCFClkDeepSleep[][2],
2951                 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2952                 unsigned int TotalNumberOfActiveDPP[][2],
2953                 unsigned int TotalNumberOfDCCActiveDPP[][2],
2954                 unsigned int dpte_group_bytes[],
2955                 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2956                 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2957                 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2958                 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2959                 unsigned int BytePerPixelY[],
2960                 unsigned int BytePerPixelC[],
2961                 unsigned int HTotal[],
2962                 double PixelClock[],
2963                 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2964                 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2965                 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2966                 bool DynamicMetadataEnable[],
2967                 double ReadBandwidthLuma[],
2968                 double ReadBandwidthChroma[],
2969                 double DCFCLKPerState[],
2970                 /* Output */
2971                 double DCFCLKState[][2])
2972 {
2973         unsigned int i, j, k;
2974         unsigned int     dummy1;
2975         double dummy2, dummy3;
2976         double   NormalEfficiency;
2977         double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2978
2979         NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2980         for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2981                 for  (j = 0; j <= 1; ++j) {
2982                         double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2983                         double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2984                         double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2985                         double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2986                         double MinimumTWait = 0.0;
2987                         double DPTEBandwidth;
2988                         double DCFCLKRequiredForAverageBandwidth;
2989                         unsigned int ExtraLatencyBytes;
2990                         double ExtraLatencyCycles;
2991                         double DCFCLKRequiredForPeakBandwidth;
2992                         unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2993                         double MinimumTvmPlus2Tr0;
2994
2995                         TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2996                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2997                                 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2998                                                 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2999                                                                 / (15.75 * HTotal[k] / PixelClock[k]);
3000                         }
3001
3002                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3003                                 NoOfDPPState[k] = NoOfDPP[i][j][k];
3004
3005                         DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3006                         DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3007
3008                         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3009                                         TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3010                                         TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3011                                         NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3012                                         HostVMMaxNonCachedPageTableLevels);
3013                         ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3014                                         + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3015                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3016                                 double DCFCLKCyclesRequiredInPrefetch;
3017                                 double PrefetchTime;
3018
3019                                 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3020                                                 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3021                                                 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3022                                                                 * BytePerPixelC[k]) / NormalEfficiency
3023                                                 / ReturnBusWidth;
3024                                 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3025                                                 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3026                                                                 / NormalEfficiency / ReturnBusWidth
3027                                                                 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3028                                                 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3029                                                                 / ReturnBusWidth
3030                                                 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3031                                                 + PixelDCFCLKCyclesRequiredInPrefetch[k];
3032                                 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3033                                                 * HTotal[k] / PixelClock[k];
3034                                 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3035                                                 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3036                                                 UrgLatency[i] * GPUVMMaxPageTableLevels *
3037                                                 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3038
3039                                 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3040                                                 UseMALLForPStateChange[k],
3041                                                 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3042                                                 DRRDisplay[k],
3043                                                 DRAMClockChangeLatencyFinal,
3044                                                 FCLKChangeLatency,
3045                                                 UrgLatency[i],
3046                                                 SREnterPlusExitTime);
3047
3048                                 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3049                                                 MinimumTWait - UrgLatency[i] *
3050                                                 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3051                                                 GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3052                                                 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3053                                                 DynamicMetadataVMExtraLatency[k];
3054
3055                                 if (PrefetchTime > 0) {
3056                                         double ExpectedVRatioPrefetch;
3057
3058                                         ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3059                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
3060                                                         DCFCLKCyclesRequiredInPrefetch);
3061                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3062                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
3063                                                         PrefetchPixelLinesTime[k] *
3064                                                         dml_max(1.0, ExpectedVRatioPrefetch) *
3065                                                         dml_max(1.0, ExpectedVRatioPrefetch / 4);
3066                                         if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3067                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3068                                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3069                                                                 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3070                                                                 NormalEfficiency / ReturnBusWidth;
3071                                         }
3072                                 } else {
3073                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3074                                 }
3075                                 if (DynamicMetadataEnable[k] == true) {
3076                                         double TSetupPipe;
3077                                         double TdmbfPipe;
3078                                         double TdmsksPipe;
3079                                         double TdmecPipe;
3080                                         double AllowedTimeForUrgentExtraLatency;
3081
3082                                         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3083                                                         MaxInterDCNTileRepeaters,
3084                                                         RequiredDPPCLKPerSurface[i][j][k],
3085                                                         RequiredDISPCLK[i][j],
3086                                                         ProjectedDCFClkDeepSleep[i][j],
3087                                                         PixelClock[k],
3088                                                         HTotal[k],
3089                                                         VTotal[k] - VActive[k],
3090                                                         DynamicMetadataTransmittedBytes[k],
3091                                                         DynamicMetadataLinesBeforeActiveRequired[k],
3092                                                         Interlace[k],
3093                                                         ProgressiveToInterlaceUnitInOPP,
3094
3095                                                         /* output */
3096                                                         &TSetupPipe,
3097                                                         &TdmbfPipe,
3098                                                         &TdmecPipe,
3099                                                         &TdmsksPipe,
3100                                                         &dummy1,
3101                                                         &dummy2,
3102                                                         &dummy3);
3103                                         AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3104                                                         PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3105                                                         TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3106                                         if (AllowedTimeForUrgentExtraLatency > 0)
3107                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3108                                                                 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3109                                                                 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3110                                         else
3111                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3112                                 }
3113                         }
3114                         DCFCLKRequiredForPeakBandwidth = 0;
3115                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3116                                 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3117                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3118                         }
3119                         MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3120                                         (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3121                                         (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3122                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3123                                 double MaximumTvmPlus2Tr0PlusTsw;
3124
3125                                 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3126                                                 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3127                                 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3128                                         DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3129                                 } else {
3130                                         DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3131                                                         2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3132                                                                 MinimumTvmPlus2Tr0 -
3133                                                                 PrefetchPixelLinesTime[k] / 4),
3134                                                         (2 * ExtraLatencyCycles +
3135                                                                 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3136                                                                 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3137                                 }
3138                         }
3139                         DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3140                                         dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3141                 }
3142         }
3143 }
3144
3145 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3146                 unsigned int TotalNumberOfActiveDPP,
3147                 unsigned int PixelChunkSizeInKByte,
3148                 unsigned int TotalNumberOfDCCActiveDPP,
3149                 unsigned int MetaChunkSize,
3150                 bool GPUVMEnable,
3151                 bool HostVMEnable,
3152                 unsigned int NumberOfActiveSurfaces,
3153                 unsigned int NumberOfDPP[],
3154                 unsigned int dpte_group_bytes[],
3155                 double HostVMInefficiencyFactor,
3156                 double HostVMMinPageSize,
3157                 unsigned int HostVMMaxNonCachedPageTableLevels)
3158 {
3159         unsigned int k;
3160         double   ret;
3161         unsigned int  HostVMDynamicLevels;
3162
3163         if (GPUVMEnable == true && HostVMEnable == true) {
3164                 if (HostVMMinPageSize < 2048)
3165                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3166                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3167                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3168                 else
3169                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3170         } else {
3171                 HostVMDynamicLevels = 0;
3172         }
3173
3174         ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3175                         TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3176
3177         if (GPUVMEnable == true) {
3178                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3179                         ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3180                                         (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3181                 }
3182         }
3183         return ret;
3184 }
3185
3186 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3187                 unsigned int MaxInterDCNTileRepeaters,
3188                 double Dppclk,
3189                 double Dispclk,
3190                 double DCFClkDeepSleep,
3191                 double PixelClock,
3192                 unsigned int HTotal,
3193                 unsigned int VBlank,
3194                 unsigned int DynamicMetadataTransmittedBytes,
3195                 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3196                 unsigned int InterlaceEnable,
3197                 bool ProgressiveToInterlaceUnitInOPP,
3198
3199                 /* output */
3200                 double *TSetup,
3201                 double *Tdmbf,
3202                 double *Tdmec,
3203                 double *Tdmsks,
3204                 unsigned int *VUpdateOffsetPix,
3205                 double *VUpdateWidthPix,
3206                 double *VReadyOffsetPix)
3207 {
3208         double TotalRepeaterDelayTime;
3209
3210         TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3211         *VUpdateWidthPix  =
3212                         dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3213         *VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3214                         TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3215         *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3216         *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3217         *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3218         *Tdmec = HTotal / PixelClock;
3219
3220         if (DynamicMetadataLinesBeforeActiveRequired == 0)
3221                 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3222         else
3223                 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3224
3225         if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3226                 *Tdmsks = *Tdmsks / 2;
3227 #ifdef __DML_VBA_DEBUG__
3228         dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3229         dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3230         dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3231
3232         dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3233                         __func__, DynamicMetadataLinesBeforeActiveRequired);
3234         dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3235         dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3236         dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3237         dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3238 #endif
3239 }
3240
3241 double dml32_CalculateTWait(
3242                 unsigned int PrefetchMode,
3243                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3244                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3245                 bool DRRDisplay,
3246                 double DRAMClockChangeLatency,
3247                 double FCLKChangeLatency,
3248                 double UrgentLatency,
3249                 double SREnterPlusExitTime)
3250 {
3251         double TWait = 0.0;
3252
3253         if (PrefetchMode == 0 &&
3254                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3255                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3256                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3257                         !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3258                 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3259         } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3260                 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3261         } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3262                 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3263         } else {
3264                 TWait = UrgentLatency;
3265         }
3266
3267 #ifdef __DML_VBA_DEBUG__
3268         dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3269         dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3270 #endif
3271         return TWait;
3272 } // CalculateTWait
3273
3274 // Function: get_return_bw_mbps
3275 // Megabyte per second
3276 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3277                 const int VoltageLevel,
3278                 const bool HostVMEnable,
3279                 const double DCFCLK,
3280                 const double FabricClock,
3281                 const double DRAMSpeed)
3282 {
3283         double ReturnBW = 0.;
3284         double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3285         double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3286         double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3287         double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3288                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3289                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3290                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3291         double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3292                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3293                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3294                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3295
3296         if (HostVMEnable != true)
3297                 ReturnBW = PixelDataOnlyReturnBW;
3298         else
3299                 ReturnBW = PixelMixedWithVMDataReturnBW;
3300
3301 #ifdef __DML_VBA_DEBUG__
3302         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3303         dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3304         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3305         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3306         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3307         dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3308         dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3309         dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3310         dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3311         dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3312         dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3313 #endif
3314         return ReturnBW;
3315 }
3316
3317 // Function: get_return_bw_mbps_vm_only
3318 // Megabyte per second
3319 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3320                 const int VoltageLevel,
3321                 const double DCFCLK,
3322                 const double FabricClock,
3323                 const double DRAMSpeed)
3324 {
3325         double VMDataOnlyReturnBW = dml_min3(
3326                         soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3327                         FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3328                                         * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3329                         DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3330                                         * (VoltageLevel < 2 ?
3331                                                         soc->pct_ideal_dram_bw_after_urgent_strobe :
3332                                                         soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3333 #ifdef __DML_VBA_DEBUG__
3334         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3335         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3336         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3337         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3338         dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3339 #endif
3340         return VMDataOnlyReturnBW;
3341 }
3342
3343 double dml32_CalculateExtraLatency(
3344                 unsigned int RoundTripPingLatencyCycles,
3345                 unsigned int ReorderingBytes,
3346                 double DCFCLK,
3347                 unsigned int TotalNumberOfActiveDPP,
3348                 unsigned int PixelChunkSizeInKByte,
3349                 unsigned int TotalNumberOfDCCActiveDPP,
3350                 unsigned int MetaChunkSize,
3351                 double ReturnBW,
3352                 bool GPUVMEnable,
3353                 bool HostVMEnable,
3354                 unsigned int NumberOfActiveSurfaces,
3355                 unsigned int NumberOfDPP[],
3356                 unsigned int dpte_group_bytes[],
3357                 double HostVMInefficiencyFactor,
3358                 double HostVMMinPageSize,
3359                 unsigned int HostVMMaxNonCachedPageTableLevels)
3360 {
3361         double ExtraLatencyBytes;
3362         double ExtraLatency;
3363
3364         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3365                         ReorderingBytes,
3366                         TotalNumberOfActiveDPP,
3367                         PixelChunkSizeInKByte,
3368                         TotalNumberOfDCCActiveDPP,
3369                         MetaChunkSize,
3370                         GPUVMEnable,
3371                         HostVMEnable,
3372                         NumberOfActiveSurfaces,
3373                         NumberOfDPP,
3374                         dpte_group_bytes,
3375                         HostVMInefficiencyFactor,
3376                         HostVMMinPageSize,
3377                         HostVMMaxNonCachedPageTableLevels);
3378
3379         ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3380
3381 #ifdef __DML_VBA_DEBUG__
3382         dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3383         dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3384         dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3385         dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3386         dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3387 #endif
3388
3389         return ExtraLatency;
3390 } // CalculateExtraLatency
3391
3392 bool dml32_CalculatePrefetchSchedule(
3393                 struct vba_vars_st *v,
3394                 unsigned int k,
3395                 double HostVMInefficiencyFactor,
3396                 DmlPipe *myPipe,
3397                 unsigned int DSCDelay,
3398                 unsigned int DPP_RECOUT_WIDTH,
3399                 unsigned int VStartup,
3400                 unsigned int MaxVStartup,
3401                 double UrgentLatency,
3402                 double UrgentExtraLatency,
3403                 double TCalc,
3404                 unsigned int PDEAndMetaPTEBytesFrame,
3405                 unsigned int MetaRowByte,
3406                 unsigned int PixelPTEBytesPerRow,
3407                 double PrefetchSourceLinesY,
3408                 unsigned int SwathWidthY,
3409                 unsigned int VInitPreFillY,
3410                 unsigned int MaxNumSwathY,
3411                 double PrefetchSourceLinesC,
3412                 unsigned int SwathWidthC,
3413                 unsigned int VInitPreFillC,
3414                 unsigned int MaxNumSwathC,
3415                 unsigned int swath_width_luma_ub,
3416                 unsigned int swath_width_chroma_ub,
3417                 unsigned int SwathHeightY,
3418                 unsigned int SwathHeightC,
3419                 double TWait,
3420                 double TPreReq,
3421                 /* Output */
3422                 double   *DSTXAfterScaler,
3423                 double   *DSTYAfterScaler,
3424                 double *DestinationLinesForPrefetch,
3425                 double *PrefetchBandwidth,
3426                 double *DestinationLinesToRequestVMInVBlank,
3427                 double *DestinationLinesToRequestRowInVBlank,
3428                 double *VRatioPrefetchY,
3429                 double *VRatioPrefetchC,
3430                 double *RequiredPrefetchPixDataBWLuma,
3431                 double *RequiredPrefetchPixDataBWChroma,
3432                 bool   *NotEnoughTimeForDynamicMetadata,
3433                 double *Tno_bw,
3434                 double *prefetch_vmrow_bw,
3435                 double *Tdmdl_vm,
3436                 double *Tdmdl,
3437                 double *TSetup,
3438                 unsigned int   *VUpdateOffsetPix,
3439                 double   *VUpdateWidthPix,
3440                 double   *VReadyOffsetPix)
3441 {
3442         double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3443         bool MyError = false;
3444         unsigned int DPPCycles, DISPCLKCycles;
3445         double DSTTotalPixelsAfterScaler;
3446         double LineTime;
3447         double dst_y_prefetch_equ;
3448         double prefetch_bw_oto;
3449         double Tvm_oto;
3450         double Tr0_oto;
3451         double Tvm_oto_lines;
3452         double Tr0_oto_lines;
3453         double dst_y_prefetch_oto;
3454         double TimeForFetchingMetaPTE = 0;
3455         double TimeForFetchingRowInVBlank = 0;
3456         double LinesToRequestPrefetchPixelData = 0;
3457         unsigned int HostVMDynamicLevelsTrips;
3458         double  trip_to_mem;
3459         double  Tvm_trips;
3460         double  Tr0_trips;
3461         double  Tvm_trips_rounded;
3462         double  Tr0_trips_rounded;
3463         double  Lsw_oto;
3464         double  Tpre_rounded;
3465         double  prefetch_bw_equ;
3466         double  Tvm_equ;
3467         double  Tr0_equ;
3468         double  Tdmbf;
3469         double  Tdmec;
3470         double  Tdmsks;
3471         double  prefetch_sw_bytes;
3472         double  bytes_pp;
3473         double  dep_bytes;
3474         unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3475         double  min_Lsw;
3476         double  Tsw_est1 = 0;
3477         double  Tsw_est3 = 0;
3478
3479         if (v->GPUVMEnable == true && v->HostVMEnable == true)
3480                 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3481         else
3482                 HostVMDynamicLevelsTrips = 0;
3483 #ifdef __DML_VBA_DEBUG__
3484         dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3485         dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3486         dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3487         dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3488                         __func__, v->HostVMEnable, HostVMInefficiencyFactor);
3489 #endif
3490         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3491                         v->MaxInterDCNTileRepeaters,
3492                         myPipe->Dppclk,
3493                         myPipe->Dispclk,
3494                         myPipe->DCFClkDeepSleep,
3495                         myPipe->PixelClock,
3496                         myPipe->HTotal,
3497                         myPipe->VBlank,
3498                         v->DynamicMetadataTransmittedBytes[k],
3499                         v->DynamicMetadataLinesBeforeActiveRequired[k],
3500                         myPipe->InterlaceEnable,
3501                         myPipe->ProgressiveToInterlaceUnitInOPP,
3502                         TSetup,
3503
3504                         /* output */
3505                         &Tdmbf,
3506                         &Tdmec,
3507                         &Tdmsks,
3508                         VUpdateOffsetPix,
3509                         VUpdateWidthPix,
3510                         VReadyOffsetPix);
3511
3512         LineTime = myPipe->HTotal / myPipe->PixelClock;
3513         trip_to_mem = UrgentLatency;
3514         Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3515
3516         if (v->DynamicMetadataVMEnabled == true)
3517                 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3518         else
3519                 *Tdmdl = TWait + UrgentExtraLatency;
3520
3521 #ifdef __DML_VBA_ALLOW_DELTA__
3522         if (v->DynamicMetadataEnable[k] == false)
3523                 *Tdmdl = 0.0;
3524 #endif
3525
3526         if (v->DynamicMetadataEnable[k] == true) {
3527                 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3528                         *NotEnoughTimeForDynamicMetadata = true;
3529 #ifdef __DML_VBA_DEBUG__
3530                         dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3531                         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3532                                         __func__, Tdmbf);
3533                         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3534                         dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3535                                         __func__, Tdmsks);
3536                         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3537                                         __func__, *Tdmdl);
3538 #endif
3539                 } else {
3540                         *NotEnoughTimeForDynamicMetadata = false;
3541                 }
3542         } else {
3543                 *NotEnoughTimeForDynamicMetadata = false;
3544         }
3545
3546         *Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3547                         v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3548
3549         if (myPipe->ScalerEnabled)
3550                 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3551         else
3552                 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3553
3554         DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3555
3556         DISPCLKCycles = v->DISPCLKDelaySubtotal;
3557
3558         if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3559                 return true;
3560
3561         *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3562                         myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3563
3564         *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3565                         + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3566                         + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3567                                         myPipe->HActive / 2 : 0)
3568                         + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3569
3570 #ifdef __DML_VBA_DEBUG__
3571         dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3572         dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3573         dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3574         dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3575         dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3576         dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3577         dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3578         dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3579         dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3580 #endif
3581
3582         if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3583                 *DSTYAfterScaler = 1;
3584         else
3585                 *DSTYAfterScaler = 0;
3586
3587         DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3588         *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3589         *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3590 #ifdef __DML_VBA_DEBUG__
3591         dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3592         dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3593 #endif
3594
3595         MyError = false;
3596
3597         Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3598
3599         if (v->GPUVMEnable == true) {
3600                 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3601                 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3602                 if (v->GPUVMMaxPageTableLevels >= 3) {
3603                         *Tno_bw = UrgentExtraLatency + trip_to_mem *
3604                                         (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3605                 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3606                         Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3607                                         4.0 * LineTime; // VBA_ERROR
3608                         *Tno_bw = UrgentExtraLatency;
3609                 } else {
3610                         *Tno_bw = 0;
3611                 }
3612         } else if (myPipe->DCCEnable == true) {
3613                 Tvm_trips_rounded = LineTime / 4.0;
3614                 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3615                 *Tno_bw = 0;
3616         } else {
3617                 Tvm_trips_rounded = LineTime / 4.0;
3618                 Tr0_trips_rounded = LineTime / 2.0;
3619                 *Tno_bw = 0;
3620         }
3621         Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3622         Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3623
3624         if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3625                         || myPipe->SourcePixelFormat == dm_420_12) {
3626                 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3627         } else {
3628                 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3629         }
3630
3631         prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3632                         + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3633         prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3634                         prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3635
3636         min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3637         min_Lsw = dml_max(min_Lsw, 1.0);
3638         Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3639
3640         if (v->GPUVMEnable == true) {
3641                 Tvm_oto = dml_max3(
3642                                 Tvm_trips,
3643                                 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3644                                 LineTime / 4.0);
3645         } else
3646                 Tvm_oto = LineTime / 4.0;
3647
3648         if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3649                 Tr0_oto = dml_max4(
3650                                 Tr0_trips,
3651                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3652                                 (LineTime - Tvm_oto)/2.0,
3653                                 LineTime / 4.0);
3654 #ifdef __DML_VBA_DEBUG__
3655                 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3656                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3657                 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3658                 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3659                 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3660 #endif
3661         } else
3662                 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3663
3664         Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3665         Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3666         dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3667
3668         dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3669                         (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3670
3671         dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3672 #ifdef __DML_VBA_DEBUG__
3673         dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3674         dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3675         dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3676         dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3677         dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3678         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3679         dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3680         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3681         dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3682         dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3683         dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3684         dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3685         dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3686         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3687         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3688         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3689         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3690         dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3691         dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3692         dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3693         dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3694         dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3695         dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3696         dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3697         dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3698         dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3699         dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3700 #endif
3701
3702         dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3703         Tpre_rounded = dst_y_prefetch_equ * LineTime;
3704 #ifdef __DML_VBA_DEBUG__
3705         dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3706         dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3707         dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3708         dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3709                         __func__, VStartup * LineTime);
3710         dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3711         dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3712         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3713         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3714         dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3715         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3716         dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3717                         __func__, *DSTYAfterScaler);
3718 #endif
3719         dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3720                         MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3721
3722         if (prefetch_sw_bytes < dep_bytes)
3723                 prefetch_sw_bytes = 2 * dep_bytes;
3724
3725         *PrefetchBandwidth = 0;
3726         *DestinationLinesToRequestVMInVBlank = 0;
3727         *DestinationLinesToRequestRowInVBlank = 0;
3728         *VRatioPrefetchY = 0;
3729         *VRatioPrefetchC = 0;
3730         *RequiredPrefetchPixDataBWLuma = 0;
3731         if (dst_y_prefetch_equ > 1 &&
3732                         (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3733                 double PrefetchBandwidth1;
3734                 double PrefetchBandwidth2;
3735                 double PrefetchBandwidth3;
3736                 double PrefetchBandwidth4;
3737
3738                 if (Tpre_rounded - *Tno_bw > 0) {
3739                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3740                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3741                                         + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3742                         Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3743                 } else
3744                         PrefetchBandwidth1 = 0;
3745
3746                 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3747                                 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3748                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3749                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3750                                         / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3751                 }
3752
3753                 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3754                         PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3755                         (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3756                 else
3757                         PrefetchBandwidth2 = 0;
3758
3759                 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3760                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3761                                         + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3762                         Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3763                 } else
3764                         PrefetchBandwidth3 = 0;
3765
3766
3767                 if (VStartup == MaxVStartup &&
3768                                 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3769                                 LineTime - Tvm_trips_rounded > 0) {
3770                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3771                                         / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3772                 }
3773
3774                 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3775                         PrefetchBandwidth4 = prefetch_sw_bytes /
3776                                         (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3777                 } else {
3778                         PrefetchBandwidth4 = 0;
3779                 }
3780
3781 #ifdef __DML_VBA_DEBUG__
3782                 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3783                 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3784                 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3785                 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3786                 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3787                 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3788                 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3789                 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3790                 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3791 #endif
3792                 {
3793                         bool Case1OK;
3794                         bool Case2OK;
3795                         bool Case3OK;
3796
3797                         if (PrefetchBandwidth1 > 0) {
3798                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3799                                                 >= Tvm_trips_rounded
3800                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3801                                                                 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3802                                         Case1OK = true;
3803                                 } else {
3804                                         Case1OK = false;
3805                                 }
3806                         } else {
3807                                 Case1OK = false;
3808                         }
3809
3810                         if (PrefetchBandwidth2 > 0) {
3811                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3812                                                 >= Tvm_trips_rounded
3813                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3814                                                 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3815                                         Case2OK = true;
3816                                 } else {
3817                                         Case2OK = false;
3818                                 }
3819                         } else {
3820                                 Case2OK = false;
3821                         }
3822
3823                         if (PrefetchBandwidth3 > 0) {
3824                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3825                                                 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3826                                                                 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3827                                                                 Tr0_trips_rounded) {
3828                                         Case3OK = true;
3829                                 } else {
3830                                         Case3OK = false;
3831                                 }
3832                         } else {
3833                                 Case3OK = false;
3834                         }
3835
3836                         if (Case1OK)
3837                                 prefetch_bw_equ = PrefetchBandwidth1;
3838                         else if (Case2OK)
3839                                 prefetch_bw_equ = PrefetchBandwidth2;
3840                         else if (Case3OK)
3841                                 prefetch_bw_equ = PrefetchBandwidth3;
3842                         else
3843                                 prefetch_bw_equ = PrefetchBandwidth4;
3844
3845 #ifdef __DML_VBA_DEBUG__
3846                         dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3847                         dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3848                         dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3849                         dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3850 #endif
3851
3852                         if (prefetch_bw_equ > 0) {
3853                                 if (v->GPUVMEnable == true) {
3854                                         Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3855                                                         HostVMInefficiencyFactor / prefetch_bw_equ,
3856                                                         Tvm_trips, LineTime / 4);
3857                                 } else {
3858                                         Tvm_equ = LineTime / 4;
3859                                 }
3860
3861                                 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3862                                         Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3863                                                         HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3864                                                         (LineTime - Tvm_equ) / 2, LineTime / 4);
3865                                 } else {
3866                                         Tr0_equ = (LineTime - Tvm_equ) / 2;
3867                                 }
3868                         } else {
3869                                 Tvm_equ = 0;
3870                                 Tr0_equ = 0;
3871 #ifdef __DML_VBA_DEBUG__
3872                                 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3873 #endif
3874                         }
3875                 }
3876
3877                 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3878                         if (dst_y_prefetch_oto * LineTime < TPreReq) {
3879                                 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3880                         } else {
3881                                 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3882                         }
3883                         TimeForFetchingMetaPTE = Tvm_oto;
3884                         TimeForFetchingRowInVBlank = Tr0_oto;
3885                         *PrefetchBandwidth = prefetch_bw_oto;
3886                 } else {
3887                         *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3888                         TimeForFetchingMetaPTE = Tvm_equ;
3889                         TimeForFetchingRowInVBlank = Tr0_equ;
3890                         *PrefetchBandwidth = prefetch_bw_equ;
3891                 }
3892
3893                 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3894
3895                 *DestinationLinesToRequestRowInVBlank =
3896                                 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3897
3898                 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3899                                 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3900
3901 #ifdef __DML_VBA_DEBUG__
3902                 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3903                 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3904                                 __func__, *DestinationLinesToRequestVMInVBlank);
3905                 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3906                 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3907                 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3908                                 __func__, *DestinationLinesToRequestRowInVBlank);
3909                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3910                 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3911 #endif
3912
3913                 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3914                         *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3915                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3916 #ifdef __DML_VBA_DEBUG__
3917                         dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3918                         dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3919                         dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3920 #endif
3921                         if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3922                                 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3923                                         *VRatioPrefetchY =
3924                                                         dml_max((double) PrefetchSourceLinesY /
3925                                                                         LinesToRequestPrefetchPixelData,
3926                                                                         (double) MaxNumSwathY * SwathHeightY /
3927                                                                         (LinesToRequestPrefetchPixelData -
3928                                                                         (VInitPreFillY - 3.0) / 2.0));
3929                                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3930                                 } else {
3931                                         MyError = true;
3932                                         *VRatioPrefetchY = 0;
3933                                 }
3934 #ifdef __DML_VBA_DEBUG__
3935                                 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3936                                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3937                                 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3938 #endif
3939                         }
3940
3941                         *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3942                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3943
3944 #ifdef __DML_VBA_DEBUG__
3945                         dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3946                         dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3947                         dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3948 #endif
3949                         if ((SwathHeightC > 4)) {
3950                                 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3951                                         *VRatioPrefetchC =
3952                                                 dml_max(*VRatioPrefetchC,
3953                                                         (double) MaxNumSwathC * SwathHeightC /
3954                                                         (LinesToRequestPrefetchPixelData -
3955                                                         (VInitPreFillC - 3.0) / 2.0));
3956                                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3957                                 } else {
3958                                         MyError = true;
3959                                         *VRatioPrefetchC = 0;
3960                                 }
3961 #ifdef __DML_VBA_DEBUG__
3962                                 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3963                                 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3964                                 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3965 #endif
3966                         }
3967
3968                         *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3969                                         / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3970                                         / LineTime;
3971
3972 #ifdef __DML_VBA_DEBUG__
3973                         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3974                         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3975                         dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3976                         dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3977                                         __func__, *RequiredPrefetchPixDataBWLuma);
3978 #endif
3979                         *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3980                                         LinesToRequestPrefetchPixelData
3981                                         * myPipe->BytePerPixelC
3982                                         * swath_width_chroma_ub / LineTime;
3983                 } else {
3984                         MyError = true;
3985 #ifdef __DML_VBA_DEBUG__
3986                         dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3987                                         __func__, LinesToRequestPrefetchPixelData);
3988 #endif
3989                         *VRatioPrefetchY = 0;
3990                         *VRatioPrefetchC = 0;
3991                         *RequiredPrefetchPixDataBWLuma = 0;
3992                         *RequiredPrefetchPixDataBWChroma = 0;
3993                 }
3994 #ifdef __DML_VBA_DEBUG__
3995                 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3996                         (double)LinesToRequestPrefetchPixelData * LineTime +
3997                         2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3998                 dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3999                 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4000                         (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4001                 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4002                 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4003                         TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4004                         ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4005                 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4006                                 PixelPTEBytesPerRow);
4007 #endif
4008         } else {
4009                 MyError = true;
4010 #ifdef __DML_VBA_DEBUG__
4011                 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4012                                 __func__, dst_y_prefetch_equ);
4013 #endif
4014         }
4015
4016         {
4017                 double prefetch_vm_bw;
4018                 double prefetch_row_bw;
4019
4020                 if (PDEAndMetaPTEBytesFrame == 0) {
4021                         prefetch_vm_bw = 0;
4022                 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
4023 #ifdef __DML_VBA_DEBUG__
4024                         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4025                         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4026                         dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4027                                         __func__, *DestinationLinesToRequestVMInVBlank);
4028                         dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4029 #endif
4030                         prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4031                                         (*DestinationLinesToRequestVMInVBlank * LineTime);
4032 #ifdef __DML_VBA_DEBUG__
4033                         dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4034 #endif
4035                 } else {
4036                         prefetch_vm_bw = 0;
4037                         MyError = true;
4038 #ifdef __DML_VBA_DEBUG__
4039                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4040                                         __func__, *DestinationLinesToRequestVMInVBlank);
4041 #endif
4042                 }
4043
4044                 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4045                         prefetch_row_bw = 0;
4046                 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4047                         prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4048                                         (*DestinationLinesToRequestRowInVBlank * LineTime);
4049
4050 #ifdef __DML_VBA_DEBUG__
4051                         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4052                         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4053                         dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4054                                         __func__, *DestinationLinesToRequestRowInVBlank);
4055                         dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4056 #endif
4057                 } else {
4058                         prefetch_row_bw = 0;
4059                         MyError = true;
4060 #ifdef __DML_VBA_DEBUG__
4061                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4062                                         __func__, *DestinationLinesToRequestRowInVBlank);
4063 #endif
4064                 }
4065
4066                 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4067         }
4068
4069         if (MyError) {
4070                 *PrefetchBandwidth = 0;
4071                 TimeForFetchingMetaPTE = 0;
4072                 TimeForFetchingRowInVBlank = 0;
4073                 *DestinationLinesToRequestVMInVBlank = 0;
4074                 *DestinationLinesToRequestRowInVBlank = 0;
4075                 *DestinationLinesForPrefetch = 0;
4076                 LinesToRequestPrefetchPixelData = 0;
4077                 *VRatioPrefetchY = 0;
4078                 *VRatioPrefetchC = 0;
4079                 *RequiredPrefetchPixDataBWLuma = 0;
4080                 *RequiredPrefetchPixDataBWChroma = 0;
4081         }
4082
4083         return MyError;
4084 } // CalculatePrefetchSchedule
4085
4086 void dml32_CalculateFlipSchedule(
4087                 double HostVMInefficiencyFactor,
4088                 double UrgentExtraLatency,
4089                 double UrgentLatency,
4090                 unsigned int GPUVMMaxPageTableLevels,
4091                 bool HostVMEnable,
4092                 unsigned int HostVMMaxNonCachedPageTableLevels,
4093                 bool GPUVMEnable,
4094                 double HostVMMinPageSize,
4095                 double PDEAndMetaPTEBytesPerFrame,
4096                 double MetaRowBytes,
4097                 double DPTEBytesPerRow,
4098                 double BandwidthAvailableForImmediateFlip,
4099                 unsigned int TotImmediateFlipBytes,
4100                 enum source_format_class SourcePixelFormat,
4101                 double LineTime,
4102                 double VRatio,
4103                 double VRatioChroma,
4104                 double Tno_bw,
4105                 bool DCCEnable,
4106                 unsigned int dpte_row_height,
4107                 unsigned int meta_row_height,
4108                 unsigned int dpte_row_height_chroma,
4109                 unsigned int meta_row_height_chroma,
4110                 bool    use_one_row_for_frame_flip,
4111
4112                 /* Output */
4113                 double *DestinationLinesToRequestVMInImmediateFlip,
4114                 double *DestinationLinesToRequestRowInImmediateFlip,
4115                 double *final_flip_bw,
4116                 bool *ImmediateFlipSupportedForPipe)
4117 {
4118         double min_row_time = 0.0;
4119         unsigned int HostVMDynamicLevelsTrips;
4120         double TimeForFetchingMetaPTEImmediateFlip;
4121         double TimeForFetchingRowInVBlankImmediateFlip;
4122         double ImmediateFlipBW;
4123
4124         if (GPUVMEnable == true && HostVMEnable == true)
4125                 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4126         else
4127                 HostVMDynamicLevelsTrips = 0;
4128
4129 #ifdef __DML_VBA_DEBUG__
4130         dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4131         dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4132 #endif
4133
4134         if (TotImmediateFlipBytes > 0) {
4135                 if (use_one_row_for_frame_flip) {
4136                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4137                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4138                 } else {
4139                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4140                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4141                 }
4142                 if (GPUVMEnable == true) {
4143                         TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4144                                         HostVMInefficiencyFactor / ImmediateFlipBW,
4145                                         UrgentExtraLatency + UrgentLatency *
4146                                         (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4147                                         LineTime / 4.0);
4148                 } else {
4149                         TimeForFetchingMetaPTEImmediateFlip = 0;
4150                 }
4151                 if ((GPUVMEnable == true || DCCEnable == true)) {
4152                         TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4153                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4154                                         UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4155                 } else {
4156                         TimeForFetchingRowInVBlankImmediateFlip = 0;
4157                 }
4158
4159                 *DestinationLinesToRequestVMInImmediateFlip =
4160                                 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4161                 *DestinationLinesToRequestRowInImmediateFlip =
4162                                 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4163
4164                 if (GPUVMEnable == true) {
4165                         *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4166                                         (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4167                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4168                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4169                 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4170                         *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4171                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4172                 } else {
4173                         *final_flip_bw = 0;
4174                 }
4175         } else {
4176                 TimeForFetchingMetaPTEImmediateFlip = 0;
4177                 TimeForFetchingRowInVBlankImmediateFlip = 0;
4178                 *DestinationLinesToRequestVMInImmediateFlip = 0;
4179                 *DestinationLinesToRequestRowInImmediateFlip = 0;
4180                 *final_flip_bw = 0;
4181         }
4182
4183         if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4184                 if (GPUVMEnable == true && DCCEnable != true) {
4185                         min_row_time = dml_min(dpte_row_height *
4186                                         LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4187                 } else if (GPUVMEnable != true && DCCEnable == true) {
4188                         min_row_time = dml_min(meta_row_height *
4189                                         LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4190                 } else {
4191                         min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4192                                         LineTime / VRatio, dpte_row_height_chroma * LineTime /
4193                                         VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4194                 }
4195         } else {
4196                 if (GPUVMEnable == true && DCCEnable != true) {
4197                         min_row_time = dpte_row_height * LineTime / VRatio;
4198                 } else if (GPUVMEnable != true && DCCEnable == true) {
4199                         min_row_time = meta_row_height * LineTime / VRatio;
4200                 } else {
4201                         min_row_time =
4202                                 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4203                 }
4204         }
4205
4206         if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4207                         || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4208                                         > min_row_time) {
4209                 *ImmediateFlipSupportedForPipe = false;
4210         } else {
4211                 *ImmediateFlipSupportedForPipe = true;
4212         }
4213
4214 #ifdef __DML_VBA_DEBUG__
4215         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4216         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4217         dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4218                         __func__, *DestinationLinesToRequestVMInImmediateFlip);
4219         dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4220                         __func__, *DestinationLinesToRequestRowInImmediateFlip);
4221         dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4222         dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4223                         __func__, TimeForFetchingRowInVBlankImmediateFlip);
4224         dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4225         dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4226 #endif
4227 } // CalculateFlipSchedule
4228
4229 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4230                 struct vba_vars_st *v,
4231                 unsigned int PrefetchMode,
4232                 double DCFCLK,
4233                 double ReturnBW,
4234                 SOCParametersList mmSOCParameters,
4235                 double SOCCLK,
4236                 double DCFClkDeepSleep,
4237                 unsigned int DETBufferSizeY[],
4238                 unsigned int DETBufferSizeC[],
4239                 unsigned int SwathHeightY[],
4240                 unsigned int SwathHeightC[],
4241                 double SwathWidthY[],
4242                 double SwathWidthC[],
4243                 unsigned int DPPPerSurface[],
4244                 double BytePerPixelDETY[],
4245                 double BytePerPixelDETC[],
4246                 double DSTXAfterScaler[],
4247                 double DSTYAfterScaler[],
4248                 bool UnboundedRequestEnabled,
4249                 unsigned int CompressedBufferSizeInkByte,
4250
4251                 /* Output */
4252                 enum clock_change_support *DRAMClockChangeSupport,
4253                 double MaxActiveDRAMClockChangeLatencySupported[],
4254                 unsigned int SubViewportLinesNeededInMALL[],
4255                 enum dm_fclock_change_support *FCLKChangeSupport,
4256                 double *MinActiveFCLKChangeLatencySupported,
4257                 bool *USRRetrainingSupport,
4258                 double ActiveDRAMClockChangeLatencyMargin[])
4259 {
4260         unsigned int i, j, k;
4261         unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4262         unsigned int DRAMClockChangeSupportNumber = 0;
4263         unsigned int LastSurfaceWithoutMargin;
4264         unsigned int DRAMClockChangeMethod = 0;
4265         bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4266         double MinActiveFCLKChangeMargin = 0.;
4267         double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4268         double ActiveClockChangeLatencyHidingY;
4269         double ActiveClockChangeLatencyHidingC;
4270         double ActiveClockChangeLatencyHiding;
4271         double EffectiveDETBufferSizeY;
4272         double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4273         double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4274         double TotalPixelBW = 0.0;
4275         bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4276         double     EffectiveLBLatencyHidingY;
4277         double     EffectiveLBLatencyHidingC;
4278         double     LinesInDETY[DC__NUM_DPP__MAX];
4279         double     LinesInDETC[DC__NUM_DPP__MAX];
4280         unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4281         unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4282         double     FullDETBufferingTimeY;
4283         double     FullDETBufferingTimeC;
4284         double     WritebackDRAMClockChangeLatencyMargin;
4285         double     WritebackFCLKChangeLatencyMargin;
4286         double     WritebackLatencyHiding;
4287         bool    SameTimingForFCLKChange;
4288
4289         unsigned int    TotalActiveWriteback = 0;
4290         unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4291         unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4292
4293         v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4294         v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4295                         + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4296         v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4297         v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4298         v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4299                         + 10 / DCFClkDeepSleep;
4300         v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4301                         + 10 / DCFClkDeepSleep;
4302         v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4303                         + 10 / DCFClkDeepSleep;
4304         v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4305                         + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4306
4307 #ifdef __DML_VBA_DEBUG__
4308         dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4309         dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4310         dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4311         dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4312         dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4313         dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4314         dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4315         dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4316         dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4317         dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4318         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4319                         __func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4320 #endif
4321
4322
4323         TotalActiveWriteback = 0;
4324         for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4325                 if (v->WritebackEnable[k] == true)
4326                         TotalActiveWriteback = TotalActiveWriteback + 1;
4327         }
4328
4329         if (TotalActiveWriteback <= 1) {
4330                 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4331         } else {
4332                 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4333                                 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4334         }
4335         if (v->USRRetrainingRequiredFinal)
4336                 v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark
4337                                 + mmSOCParameters.USRRetrainingLatency;
4338
4339         if (TotalActiveWriteback <= 1) {
4340                 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4341                                 + mmSOCParameters.WritebackLatency;
4342                 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4343                                 + mmSOCParameters.WritebackLatency;
4344         } else {
4345                 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4346                                 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4347                 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4348                                 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4349         }
4350
4351         if (v->USRRetrainingRequiredFinal)
4352                 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4353                                 + mmSOCParameters.USRRetrainingLatency;
4354
4355         if (v->USRRetrainingRequiredFinal)
4356                 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4357                                 + mmSOCParameters.USRRetrainingLatency;
4358
4359 #ifdef __DML_VBA_DEBUG__
4360         dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4361                         __func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4362         dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4363         dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4364         dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4365         dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4366 #endif
4367
4368         for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4369                 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4370                                 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4371         }
4372
4373         for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4374
4375                 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4376                 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4377
4378
4379 #ifdef __DML_VBA_DEBUG__
4380                 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4381                 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
4382                 dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
4383                 dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
4384                 dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
4385 #endif
4386
4387                 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4388                 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4389                 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4390
4391                 if (UnboundedRequestEnabled) {
4392                         EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4393                                         + CompressedBufferSizeInkByte * 1024
4394                                                         * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4395                                                         / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4396                 }
4397
4398                 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4399                 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4400                 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4401
4402                 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4403                                 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4404
4405                 if (v->NumberOfActiveSurfaces > 1) {
4406                         ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4407                                         - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4408                                                         / v->PixelClock[k] / v->VRatio[k];
4409                 }
4410
4411                 if (BytePerPixelDETC[k] > 0) {
4412                         LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4413                         LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4414                         FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4415                                         / v->VRatioChroma[k];
4416                         ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4417                                         - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4418                                                         / v->PixelClock[k];
4419                         if (v->NumberOfActiveSurfaces > 1) {
4420                                 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4421                                                 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4422                                                                 / v->PixelClock[k] / v->VRatioChroma[k];
4423                         }
4424                         ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4425                                         ActiveClockChangeLatencyHidingC);
4426                 } else {
4427                         ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4428                 }
4429
4430                 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4431                                 - v->Watermark.DRAMClockChangeWatermark;
4432                 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4433                                 - v->Watermark.FCLKChangeWatermark;
4434                 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4435
4436                 if (v->WritebackEnable[k]) {
4437                         WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4438                                         / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4439                                                         / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4440                         if (v->WritebackPixelFormat[k] == dm_444_64)
4441                                 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4442
4443                         WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4444                                         - v->Watermark.WritebackDRAMClockChangeWatermark;
4445
4446                         WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4447                                         - v->Watermark.WritebackFCLKChangeWatermark;
4448
4449                         ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4450                                         WritebackFCLKChangeLatencyMargin);
4451                         ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4452                                         WritebackDRAMClockChangeLatencyMargin);
4453                 }
4454                 MaxActiveDRAMClockChangeLatencySupported[k] =
4455                                 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4456                                                 0 :
4457                                                 (ActiveDRAMClockChangeLatencyMargin[k]
4458                                                                 + mmSOCParameters.DRAMClockChangeLatency);
4459         }
4460
4461         for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4462                 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4463                         if (i == j ||
4464                                         (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4465                                         (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4466                                         (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4467                                         (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4468                                         v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4469                                         v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4470                                         (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4471                                 SynchronizedSurfaces[i][j] = true;
4472                         } else {
4473                                 SynchronizedSurfaces[i][j] = false;
4474                         }
4475                 }
4476         }
4477
4478         for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4479                 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4480                                 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4481                                 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4482                         FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4483                         MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4484                         SurfaceWithMinActiveFCLKChangeMargin = k;
4485                 }
4486         }
4487
4488         *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4489
4490         SameTimingForFCLKChange = true;
4491         for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4492                 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4493                         if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4494                                         (SameTimingForFCLKChange ||
4495                                         ActiveFCLKChangeLatencyMargin[k] <
4496                                         SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4497                                 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4498                         }
4499                         SameTimingForFCLKChange = false;
4500                 }
4501         }
4502
4503         if (MinActiveFCLKChangeMargin > 0) {
4504                 *FCLKChangeSupport = dm_fclock_change_vactive;
4505         } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4506                         (PrefetchMode <= 1)) {
4507                 *FCLKChangeSupport = dm_fclock_change_vblank;
4508         } else {
4509                 *FCLKChangeSupport = dm_fclock_change_unsupported;
4510         }
4511
4512         *USRRetrainingSupport = true;
4513         for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4514                 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4515                                 (USRRetrainingLatencyMargin[k] < 0)) {
4516                         *USRRetrainingSupport = false;
4517                 }
4518         }
4519
4520         for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4521                 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4522                                 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4523                                 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4524                                 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4525                         if (PrefetchMode > 0) {
4526                                 DRAMClockChangeSupportNumber = 2;
4527                         } else if (DRAMClockChangeSupportNumber == 0) {
4528                                 DRAMClockChangeSupportNumber = 1;
4529                                 LastSurfaceWithoutMargin = k;
4530                         } else if (DRAMClockChangeSupportNumber == 1 &&
4531                                         !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4532                                 DRAMClockChangeSupportNumber = 2;
4533                         }
4534                 }
4535         }
4536
4537         for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4538                 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4539                         DRAMClockChangeMethod = 1;
4540                 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4541                         DRAMClockChangeMethod = 2;
4542         }
4543
4544         if (DRAMClockChangeMethod == 0) {
4545                 if (DRAMClockChangeSupportNumber == 0)
4546                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4547                 else if (DRAMClockChangeSupportNumber == 1)
4548                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4549                 else
4550                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4551         } else if (DRAMClockChangeMethod == 1) {
4552                 if (DRAMClockChangeSupportNumber == 0)
4553                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4554                 else if (DRAMClockChangeSupportNumber == 1)
4555                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4556                 else
4557                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4558         } else {
4559                 if (DRAMClockChangeSupportNumber == 0)
4560                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4561                 else if (DRAMClockChangeSupportNumber == 1)
4562                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4563                 else
4564                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4565         }
4566
4567         for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4568                 unsigned int dst_y_pstate;
4569                 unsigned int src_y_pstate_l;
4570                 unsigned int src_y_pstate_c;
4571                 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4572
4573                 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4574                 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4575                 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4576                 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4577
4578 #ifdef __DML_VBA_DEBUG__
4579 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4580 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4581 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4582 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4583 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4584 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4585 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4586 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4587 dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
4588 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4589 #endif
4590                 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4591
4592                 if (BytePerPixelDETC[k] > 0) {
4593                         src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4594                         src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4595                         sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4596                         SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4597
4598 #ifdef __DML_VBA_DEBUG__
4599 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4600 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4601 dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4602 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4603 #endif
4604                 }
4605         }
4606 #ifdef __DML_VBA_DEBUG__
4607         dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4608         dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4609         dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4610                         __func__, *MinActiveFCLKChangeLatencySupported);
4611         dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4612 #endif
4613 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4614
4615 double dml32_CalculateWriteBackDISPCLK(
4616                 enum source_format_class WritebackPixelFormat,
4617                 double PixelClock,
4618                 double WritebackHRatio,
4619                 double WritebackVRatio,
4620                 unsigned int WritebackHTaps,
4621                 unsigned int WritebackVTaps,
4622                 unsigned int   WritebackSourceWidth,
4623                 unsigned int   WritebackDestinationWidth,
4624                 unsigned int HTotal,
4625                 unsigned int WritebackLineBufferSize,
4626                 double DISPCLKDPPCLKVCOSpeed)
4627 {
4628         double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4629
4630         DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4631         DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4632         DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4633                         WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4634         return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4635 }
4636
4637 void dml32_CalculateMinAndMaxPrefetchMode(
4638                 enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4639                 unsigned int             *MinPrefetchMode,
4640                 unsigned int             *MaxPrefetchMode)
4641 {
4642         if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4643                 *MinPrefetchMode = 3;
4644                 *MaxPrefetchMode = 3;
4645         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4646                 *MinPrefetchMode = 2;
4647                 *MaxPrefetchMode = 2;
4648         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4649                 *MinPrefetchMode = 1;
4650                 *MaxPrefetchMode = 1;
4651         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4652                 *MinPrefetchMode = 0;
4653                 *MaxPrefetchMode = 0;
4654         } else {
4655                 *MinPrefetchMode = 0;
4656                 *MaxPrefetchMode = 3;
4657         }
4658 } // CalculateMinAndMaxPrefetchMode
4659
4660 void dml32_CalculatePixelDeliveryTimes(
4661                 unsigned int             NumberOfActiveSurfaces,
4662                 double              VRatio[],
4663                 double              VRatioChroma[],
4664                 double              VRatioPrefetchY[],
4665                 double              VRatioPrefetchC[],
4666                 unsigned int             swath_width_luma_ub[],
4667                 unsigned int             swath_width_chroma_ub[],
4668                 unsigned int             DPPPerSurface[],
4669                 double              HRatio[],
4670                 double              HRatioChroma[],
4671                 double              PixelClock[],
4672                 double              PSCL_THROUGHPUT[],
4673                 double              PSCL_THROUGHPUT_CHROMA[],
4674                 double              Dppclk[],
4675                 unsigned int             BytePerPixelC[],
4676                 enum dm_rotation_angle   SourceRotation[],
4677                 unsigned int             NumberOfCursors[],
4678                 unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4679                 unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4680                 unsigned int             BlockWidth256BytesY[],
4681                 unsigned int             BlockHeight256BytesY[],
4682                 unsigned int             BlockWidth256BytesC[],
4683                 unsigned int             BlockHeight256BytesC[],
4684
4685                 /* Output */
4686                 double              DisplayPipeLineDeliveryTimeLuma[],
4687                 double              DisplayPipeLineDeliveryTimeChroma[],
4688                 double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4689                 double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4690                 double              DisplayPipeRequestDeliveryTimeLuma[],
4691                 double              DisplayPipeRequestDeliveryTimeChroma[],
4692                 double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4693                 double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4694                 double              CursorRequestDeliveryTime[],
4695                 double              CursorRequestDeliveryTimePrefetch[])
4696 {
4697         double   req_per_swath_ub;
4698         unsigned int k;
4699
4700         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4701
4702 #ifdef __DML_VBA_DEBUG__
4703                 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4704                 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4705                 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4706                 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4707                 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4708                 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4709                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4710                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4711                 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4712                 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4713                 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4714 #endif
4715
4716                 if (VRatio[k] <= 1) {
4717                         DisplayPipeLineDeliveryTimeLuma[k] =
4718                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4719                 } else {
4720                         DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4721                 }
4722
4723                 if (BytePerPixelC[k] == 0) {
4724                         DisplayPipeLineDeliveryTimeChroma[k] = 0;
4725                 } else {
4726                         if (VRatioChroma[k] <= 1) {
4727                                 DisplayPipeLineDeliveryTimeChroma[k] =
4728                                         swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4729                         } else {
4730                                 DisplayPipeLineDeliveryTimeChroma[k] =
4731                                         swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4732                         }
4733                 }
4734
4735                 if (VRatioPrefetchY[k] <= 1) {
4736                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4737                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4738                 } else {
4739                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4740                                         swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4741                 }
4742
4743                 if (BytePerPixelC[k] == 0) {
4744                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4745                 } else {
4746                         if (VRatioPrefetchC[k] <= 1) {
4747                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4748                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4749                         } else {
4750                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4751                                                 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4752                         }
4753                 }
4754 #ifdef __DML_VBA_DEBUG__
4755                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4756                                 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4757                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4758                                 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4759                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4760                                 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4761                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4762                                 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4763 #endif
4764         }
4765
4766         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4767                 if (!IsVertical(SourceRotation[k]))
4768                         req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4769                 else
4770                         req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4771 #ifdef __DML_VBA_DEBUG__
4772                 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4773 #endif
4774
4775                 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4776                 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4777                                 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4778                 if (BytePerPixelC[k] == 0) {
4779                         DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4780                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4781                 } else {
4782                         if (!IsVertical(SourceRotation[k]))
4783                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4784                         else
4785                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4786 #ifdef __DML_VBA_DEBUG__
4787                         dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4788 #endif
4789                         DisplayPipeRequestDeliveryTimeChroma[k] =
4790                                         DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4791                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4792                                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4793                 }
4794 #ifdef __DML_VBA_DEBUG__
4795                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4796                                 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4797                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4798                                 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4799                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4800                                 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4801                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4802                                 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4803 #endif
4804         }
4805
4806         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4807                 unsigned int cursor_req_per_width;
4808
4809                 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4810                                 256.0 / 8.0, 1.0);
4811                 if (NumberOfCursors[k] > 0) {
4812                         if (VRatio[k] <= 1) {
4813                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4814                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4815                         } else {
4816                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4817                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4818                         }
4819                         if (VRatioPrefetchY[k] <= 1) {
4820                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4821                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4822                         } else {
4823                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4824                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4825                         }
4826                 } else {
4827                         CursorRequestDeliveryTime[k] = 0;
4828                         CursorRequestDeliveryTimePrefetch[k] = 0;
4829                 }
4830 #ifdef __DML_VBA_DEBUG__
4831                 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4832                                 __func__, k, NumberOfCursors[k]);
4833                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4834                                 __func__, k, CursorRequestDeliveryTime[k]);
4835                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4836                                 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4837 #endif
4838         }
4839 } // CalculatePixelDeliveryTimes
4840
4841 void dml32_CalculateMetaAndPTETimes(
4842                 bool use_one_row_for_frame[],
4843                 unsigned int NumberOfActiveSurfaces,
4844                 bool GPUVMEnable,
4845                 unsigned int MetaChunkSize,
4846                 unsigned int MinMetaChunkSizeBytes,
4847                 unsigned int    HTotal[],
4848                 double  VRatio[],
4849                 double  VRatioChroma[],
4850                 double  DestinationLinesToRequestRowInVBlank[],
4851                 double  DestinationLinesToRequestRowInImmediateFlip[],
4852                 bool DCCEnable[],
4853                 double  PixelClock[],
4854                 unsigned int BytePerPixelY[],
4855                 unsigned int BytePerPixelC[],
4856                 enum dm_rotation_angle SourceRotation[],
4857                 unsigned int dpte_row_height[],
4858                 unsigned int dpte_row_height_chroma[],
4859                 unsigned int meta_row_width[],
4860                 unsigned int meta_row_width_chroma[],
4861                 unsigned int meta_row_height[],
4862                 unsigned int meta_row_height_chroma[],
4863                 unsigned int meta_req_width[],
4864                 unsigned int meta_req_width_chroma[],
4865                 unsigned int meta_req_height[],
4866                 unsigned int meta_req_height_chroma[],
4867                 unsigned int dpte_group_bytes[],
4868                 unsigned int    PTERequestSizeY[],
4869                 unsigned int    PTERequestSizeC[],
4870                 unsigned int    PixelPTEReqWidthY[],
4871                 unsigned int    PixelPTEReqHeightY[],
4872                 unsigned int    PixelPTEReqWidthC[],
4873                 unsigned int    PixelPTEReqHeightC[],
4874                 unsigned int    dpte_row_width_luma_ub[],
4875                 unsigned int    dpte_row_width_chroma_ub[],
4876
4877                 /* Output */
4878                 double DST_Y_PER_PTE_ROW_NOM_L[],
4879                 double DST_Y_PER_PTE_ROW_NOM_C[],
4880                 double DST_Y_PER_META_ROW_NOM_L[],
4881                 double DST_Y_PER_META_ROW_NOM_C[],
4882                 double TimePerMetaChunkNominal[],
4883                 double TimePerChromaMetaChunkNominal[],
4884                 double TimePerMetaChunkVBlank[],
4885                 double TimePerChromaMetaChunkVBlank[],
4886                 double TimePerMetaChunkFlip[],
4887                 double TimePerChromaMetaChunkFlip[],
4888                 double time_per_pte_group_nom_luma[],
4889                 double time_per_pte_group_vblank_luma[],
4890                 double time_per_pte_group_flip_luma[],
4891                 double time_per_pte_group_nom_chroma[],
4892                 double time_per_pte_group_vblank_chroma[],
4893                 double time_per_pte_group_flip_chroma[])
4894 {
4895         unsigned int   meta_chunk_width;
4896         unsigned int   min_meta_chunk_width;
4897         unsigned int   meta_chunk_per_row_int;
4898         unsigned int   meta_row_remainder;
4899         unsigned int   meta_chunk_threshold;
4900         unsigned int   meta_chunks_per_row_ub;
4901         unsigned int   meta_chunk_width_chroma;
4902         unsigned int   min_meta_chunk_width_chroma;
4903         unsigned int   meta_chunk_per_row_int_chroma;
4904         unsigned int   meta_row_remainder_chroma;
4905         unsigned int   meta_chunk_threshold_chroma;
4906         unsigned int   meta_chunks_per_row_ub_chroma;
4907         unsigned int   dpte_group_width_luma;
4908         unsigned int   dpte_groups_per_row_luma_ub;
4909         unsigned int   dpte_group_width_chroma;
4910         unsigned int   dpte_groups_per_row_chroma_ub;
4911         unsigned int k;
4912
4913         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4914                 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4915                 if (BytePerPixelC[k] == 0)
4916                         DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4917                 else
4918                         DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4919                 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4920                 if (BytePerPixelC[k] == 0)
4921                         DST_Y_PER_META_ROW_NOM_C[k] = 0;
4922                 else
4923                         DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4924         }
4925
4926         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4927                 if (DCCEnable[k] == true) {
4928                         meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4929                         min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4930                         meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4931                         meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4932                         if (!IsVertical(SourceRotation[k]))
4933                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4934                         else
4935                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4936
4937                         if (meta_row_remainder <= meta_chunk_threshold)
4938                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4939                         else
4940                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4941
4942                         TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4943                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4944                         TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4945                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4946                         TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4947                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4948                         if (BytePerPixelC[k] == 0) {
4949                                 TimePerChromaMetaChunkNominal[k] = 0;
4950                                 TimePerChromaMetaChunkVBlank[k] = 0;
4951                                 TimePerChromaMetaChunkFlip[k] = 0;
4952                         } else {
4953                                 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4954                                                 meta_row_height_chroma[k];
4955                                 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4956                                                 meta_row_height_chroma[k];
4957                                 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4958                                                 meta_chunk_width_chroma;
4959                                 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4960                                 if (!IsVertical(SourceRotation[k])) {
4961                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4962                                                         meta_req_width_chroma[k];
4963                                 } else {
4964                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4965                                                         meta_req_height_chroma[k];
4966                                 }
4967                                 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4968                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4969                                 else
4970                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4971
4972                                 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4973                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4974                                 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4975                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4976                                 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4977                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4978                         }
4979                 } else {
4980                         TimePerMetaChunkNominal[k] = 0;
4981                         TimePerMetaChunkVBlank[k] = 0;
4982                         TimePerMetaChunkFlip[k] = 0;
4983                         TimePerChromaMetaChunkNominal[k] = 0;
4984                         TimePerChromaMetaChunkVBlank[k] = 0;
4985                         TimePerChromaMetaChunkFlip[k] = 0;
4986                 }
4987         }
4988
4989         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4990                 if (GPUVMEnable == true) {
4991                         if (!IsVertical(SourceRotation[k])) {
4992                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4993                                                 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4994                         } else {
4995                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4996                                                 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4997                         }
4998
4999                         if (use_one_row_for_frame[k]) {
5000                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5001                                                 (double) dpte_group_width_luma / 2.0, 1.0);
5002                         } else {
5003                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5004                                                 (double) dpte_group_width_luma, 1.0);
5005                         }
5006 #ifdef __DML_VBA_DEBUG__
5007                         dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5008                                         __func__, k, use_one_row_for_frame[k]);
5009                         dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5010                                         __func__, k, dpte_group_bytes[k]);
5011                         dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5012                                         __func__, k, PTERequestSizeY[k]);
5013                         dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5014                                         __func__, k, PixelPTEReqWidthY[k]);
5015                         dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5016                                         __func__, k, PixelPTEReqHeightY[k]);
5017                         dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5018                                         __func__, k, dpte_row_width_luma_ub[k]);
5019                         dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5020                                         __func__, k, dpte_group_width_luma);
5021                         dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5022                                         __func__, k, dpte_groups_per_row_luma_ub);
5023 #endif
5024
5025                         time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5026                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5027                         time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5028                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5029                         time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5030                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5031                         if (BytePerPixelC[k] == 0) {
5032                                 time_per_pte_group_nom_chroma[k] = 0;
5033                                 time_per_pte_group_vblank_chroma[k] = 0;
5034                                 time_per_pte_group_flip_chroma[k] = 0;
5035                         } else {
5036                                 if (!IsVertical(SourceRotation[k])) {
5037                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5038                                                         (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5039                                 } else {
5040                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5041                                                         (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5042                                 }
5043
5044                                 if (use_one_row_for_frame[k]) {
5045                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5046                                                         (double) dpte_group_width_chroma / 2.0, 1.0);
5047                                 } else {
5048                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5049                                                         (double) dpte_group_width_chroma, 1.0);
5050                                 }
5051 #ifdef __DML_VBA_DEBUG__
5052                                 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5053                                                 __func__, k, dpte_row_width_chroma_ub[k]);
5054                                 dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5055                                                 __func__, k, dpte_group_width_chroma);
5056                                 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5057                                                 __func__, k, dpte_groups_per_row_chroma_ub);
5058 #endif
5059                                 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5060                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5061                                 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5062                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5063                                 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5064                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5065                         }
5066                 } else {
5067                         time_per_pte_group_nom_luma[k] = 0;
5068                         time_per_pte_group_vblank_luma[k] = 0;
5069                         time_per_pte_group_flip_luma[k] = 0;
5070                         time_per_pte_group_nom_chroma[k] = 0;
5071                         time_per_pte_group_vblank_chroma[k] = 0;
5072                         time_per_pte_group_flip_chroma[k] = 0;
5073                 }
5074 #ifdef __DML_VBA_DEBUG__
5075                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5076                                 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5077                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5078                                 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5079                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5080                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5081                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5082                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5083                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5084                                 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5085                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5086                                 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5087                 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5088                                 __func__, k, TimePerMetaChunkNominal[k]);
5089                 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5090                                 __func__, k, TimePerMetaChunkVBlank[k]);
5091                 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5092                                 __func__, k, TimePerMetaChunkFlip[k]);
5093                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5094                                 __func__, k, TimePerChromaMetaChunkNominal[k]);
5095                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5096                                 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5097                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5098                                 __func__, k, TimePerChromaMetaChunkFlip[k]);
5099                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5100                                 __func__, k, time_per_pte_group_nom_luma[k]);
5101                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5102                                 __func__, k, time_per_pte_group_vblank_luma[k]);
5103                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5104                                 __func__, k, time_per_pte_group_flip_luma[k]);
5105                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5106                                 __func__, k, time_per_pte_group_nom_chroma[k]);
5107                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5108                                 __func__, k, time_per_pte_group_vblank_chroma[k]);
5109                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5110                                 __func__, k, time_per_pte_group_flip_chroma[k]);
5111 #endif
5112         }
5113 } // CalculateMetaAndPTETimes
5114
5115 void dml32_CalculateVMGroupAndRequestTimes(
5116                 unsigned int     NumberOfActiveSurfaces,
5117                 bool     GPUVMEnable,
5118                 unsigned int     GPUVMMaxPageTableLevels,
5119                 unsigned int     HTotal[],
5120                 unsigned int     BytePerPixelC[],
5121                 double      DestinationLinesToRequestVMInVBlank[],
5122                 double      DestinationLinesToRequestVMInImmediateFlip[],
5123                 bool     DCCEnable[],
5124                 double      PixelClock[],
5125                 unsigned int        dpte_row_width_luma_ub[],
5126                 unsigned int        dpte_row_width_chroma_ub[],
5127                 unsigned int     vm_group_bytes[],
5128                 unsigned int     dpde0_bytes_per_frame_ub_l[],
5129                 unsigned int     dpde0_bytes_per_frame_ub_c[],
5130                 unsigned int        meta_pte_bytes_per_frame_ub_l[],
5131                 unsigned int        meta_pte_bytes_per_frame_ub_c[],
5132
5133                 /* Output */
5134                 double      TimePerVMGroupVBlank[],
5135                 double      TimePerVMGroupFlip[],
5136                 double      TimePerVMRequestVBlank[],
5137                 double      TimePerVMRequestFlip[])
5138 {
5139         unsigned int k;
5140         unsigned int   num_group_per_lower_vm_stage;
5141         unsigned int   num_req_per_lower_vm_stage;
5142
5143 #ifdef __DML_VBA_DEBUG__
5144         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5145         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5146 #endif
5147         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5148
5149 #ifdef __DML_VBA_DEBUG__
5150                 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5151                 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5152                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5153                                 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5154                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5155                                 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5156                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5157                                 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5158                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5159                                 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5160 #endif
5161
5162                 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5163                         if (DCCEnable[k] == false) {
5164                                 if (BytePerPixelC[k] > 0) {
5165                                         num_group_per_lower_vm_stage = dml_ceil(
5166                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5167                                                         (double) (vm_group_bytes[k]), 1.0) +
5168                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5169                                                         (double) (vm_group_bytes[k]), 1.0);
5170                                 } else {
5171                                         num_group_per_lower_vm_stage = dml_ceil(
5172                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5173                                                         (double) (vm_group_bytes[k]), 1.0);
5174                                 }
5175                         } else {
5176                                 if (GPUVMMaxPageTableLevels == 1) {
5177                                         if (BytePerPixelC[k] > 0) {
5178                                                 num_group_per_lower_vm_stage = dml_ceil(
5179                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5180                                                         (double) (vm_group_bytes[k]), 1.0) +
5181                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5182                                                         (double) (vm_group_bytes[k]), 1.0);
5183                                         } else {
5184                                                 num_group_per_lower_vm_stage = dml_ceil(
5185                                                                 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5186                                                                 (double) (vm_group_bytes[k]), 1.0);
5187                                         }
5188                                 } else {
5189                                         if (BytePerPixelC[k] > 0) {
5190                                                 num_group_per_lower_vm_stage = 2 + dml_ceil(
5191                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5192                                                         (double) (vm_group_bytes[k]), 1) +
5193                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5194                                                         (double) (vm_group_bytes[k]), 1) +
5195                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5196                                                         (double) (vm_group_bytes[k]), 1) +
5197                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5198                                                         (double) (vm_group_bytes[k]), 1);
5199                                         } else {
5200                                                 num_group_per_lower_vm_stage = 1 + dml_ceil(
5201                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5202                                                         (double) (vm_group_bytes[k]), 1) + dml_ceil(
5203                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5204                                                         (double) (vm_group_bytes[k]), 1);
5205                                         }
5206                                 }
5207                         }
5208
5209                         if (DCCEnable[k] == false) {
5210                                 if (BytePerPixelC[k] > 0) {
5211                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5212                                                         dpde0_bytes_per_frame_ub_c[k] / 64;
5213                                 } else {
5214                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5215                                 }
5216                         } else {
5217                                 if (GPUVMMaxPageTableLevels == 1) {
5218                                         if (BytePerPixelC[k] > 0) {
5219                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5220                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5221                                         } else {
5222                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5223                                         }
5224                                 } else {
5225                                         if (BytePerPixelC[k] > 0) {
5226                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5227                                                                 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5228                                                                 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5229                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5230                                         } else {
5231                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5232                                                                 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5233                                         }
5234                                 }
5235                         }
5236
5237                         TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5238                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5239                         TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5240                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5241                         TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5242                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5243                         TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5244                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5245
5246                         if (GPUVMMaxPageTableLevels > 2) {
5247                                 TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5248                                 TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5249                                 TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5250                                 TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5251                         }
5252
5253                 } else {
5254                         TimePerVMGroupVBlank[k] = 0;
5255                         TimePerVMGroupFlip[k] = 0;
5256                         TimePerVMRequestVBlank[k] = 0;
5257                         TimePerVMRequestFlip[k] = 0;
5258                 }
5259
5260 #ifdef __DML_VBA_DEBUG__
5261                 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5262                 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5263                 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5264                 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5265 #endif
5266         }
5267 } // CalculateVMGroupAndRequestTimes
5268
5269 void dml32_CalculateDCCConfiguration(
5270                 bool             DCCEnabled,
5271                 bool             DCCProgrammingAssumesScanDirectionUnknown,
5272                 enum source_format_class SourcePixelFormat,
5273                 unsigned int             SurfaceWidthLuma,
5274                 unsigned int             SurfaceWidthChroma,
5275                 unsigned int             SurfaceHeightLuma,
5276                 unsigned int             SurfaceHeightChroma,
5277                 unsigned int                nomDETInKByte,
5278                 unsigned int             RequestHeight256ByteLuma,
5279                 unsigned int             RequestHeight256ByteChroma,
5280                 enum dm_swizzle_mode     TilingFormat,
5281                 unsigned int             BytePerPixelY,
5282                 unsigned int             BytePerPixelC,
5283                 double              BytePerPixelDETY,
5284                 double              BytePerPixelDETC,
5285                 enum dm_rotation_angle   SourceRotation,
5286                 /* Output */
5287                 unsigned int        *MaxUncompressedBlockLuma,
5288                 unsigned int        *MaxUncompressedBlockChroma,
5289                 unsigned int        *MaxCompressedBlockLuma,
5290                 unsigned int        *MaxCompressedBlockChroma,
5291                 unsigned int        *IndependentBlockLuma,
5292                 unsigned int        *IndependentBlockChroma)
5293 {
5294         typedef enum {
5295                 REQ_256Bytes,
5296                 REQ_128BytesNonContiguous,
5297                 REQ_128BytesContiguous,
5298                 REQ_NA
5299         } RequestType;
5300
5301         RequestType   RequestLuma;
5302         RequestType   RequestChroma;
5303
5304         unsigned int   segment_order_horz_contiguous_luma;
5305         unsigned int   segment_order_horz_contiguous_chroma;
5306         unsigned int   segment_order_vert_contiguous_luma;
5307         unsigned int   segment_order_vert_contiguous_chroma;
5308         unsigned int req128_horz_wc_l;
5309         unsigned int req128_horz_wc_c;
5310         unsigned int req128_vert_wc_l;
5311         unsigned int req128_vert_wc_c;
5312         unsigned int MAS_vp_horz_limit;
5313         unsigned int MAS_vp_vert_limit;
5314         unsigned int max_vp_horz_width;
5315         unsigned int max_vp_vert_height;
5316         unsigned int eff_surf_width_l;
5317         unsigned int eff_surf_width_c;
5318         unsigned int eff_surf_height_l;
5319         unsigned int eff_surf_height_c;
5320         unsigned int full_swath_bytes_horz_wc_l;
5321         unsigned int full_swath_bytes_horz_wc_c;
5322         unsigned int full_swath_bytes_vert_wc_l;
5323         unsigned int full_swath_bytes_vert_wc_c;
5324         unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5325
5326         unsigned int   yuv420;
5327         unsigned int   horz_div_l;
5328         unsigned int   horz_div_c;
5329         unsigned int   vert_div_l;
5330         unsigned int   vert_div_c;
5331
5332         unsigned int     swath_buf_size;
5333         double   detile_buf_vp_horz_limit;
5334         double   detile_buf_vp_vert_limit;
5335
5336         yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5337                         SourcePixelFormat == dm_420_12) ? 1 : 0);
5338         horz_div_l = 1;
5339         horz_div_c = 1;
5340         vert_div_l = 1;
5341         vert_div_c = 1;
5342
5343         if (BytePerPixelY == 1)
5344                 vert_div_l = 0;
5345         if (BytePerPixelC == 1)
5346                 vert_div_c = 0;
5347
5348         if (BytePerPixelC == 0) {
5349                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5350                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5351                                 BytePerPixelY / (1 + horz_div_l));
5352                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5353                                 (1 + vert_div_l));
5354         } else {
5355                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5356                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5357                                 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5358                                 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5359                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5360                                 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5361                                 (1 + vert_div_c) / (1 + yuv420));
5362         }
5363
5364         if (SourcePixelFormat == dm_420_10) {
5365                 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5366                 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5367         }
5368
5369         detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5370         detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5371
5372         MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5373         MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5374         max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5375         max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5376         eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5377         eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5378         eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5379         eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5380
5381         full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5382         full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5383         if (BytePerPixelC > 0) {
5384                 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5385                 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5386         } else {
5387                 full_swath_bytes_horz_wc_c = 0;
5388                 full_swath_bytes_vert_wc_c = 0;
5389         }
5390
5391         if (SourcePixelFormat == dm_420_10) {
5392                 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5393                 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5394                 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5395                 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5396         }
5397
5398         if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5399                 req128_horz_wc_l = 0;
5400                 req128_horz_wc_c = 0;
5401         } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5402                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5403                 req128_horz_wc_l = 0;
5404                 req128_horz_wc_c = 1;
5405         } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5406                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5407                 req128_horz_wc_l = 1;
5408                 req128_horz_wc_c = 0;
5409         } else {
5410                 req128_horz_wc_l = 1;
5411                 req128_horz_wc_c = 1;
5412         }
5413
5414         if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5415                 req128_vert_wc_l = 0;
5416                 req128_vert_wc_c = 0;
5417         } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5418                         full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5419                 req128_vert_wc_l = 0;
5420                 req128_vert_wc_c = 1;
5421         } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5422                         full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5423                 req128_vert_wc_l = 1;
5424                 req128_vert_wc_c = 0;
5425         } else {
5426                 req128_vert_wc_l = 1;
5427                 req128_vert_wc_c = 1;
5428         }
5429
5430         if (BytePerPixelY == 2) {
5431                 segment_order_horz_contiguous_luma = 0;
5432                 segment_order_vert_contiguous_luma = 1;
5433         } else {
5434                 segment_order_horz_contiguous_luma = 1;
5435                 segment_order_vert_contiguous_luma = 0;
5436         }
5437
5438         if (BytePerPixelC == 2) {
5439                 segment_order_horz_contiguous_chroma = 0;
5440                 segment_order_vert_contiguous_chroma = 1;
5441         } else {
5442                 segment_order_horz_contiguous_chroma = 1;
5443                 segment_order_vert_contiguous_chroma = 0;
5444         }
5445 #ifdef __DML_VBA_DEBUG__
5446         dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5447         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5448         dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5449         dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5450         dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5451         dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5452         dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5453         dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5454         dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5455                         __func__, segment_order_horz_contiguous_chroma);
5456 #endif
5457
5458         if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5459                 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5460                         RequestLuma = REQ_256Bytes;
5461                 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5462                                 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5463                         RequestLuma = REQ_128BytesNonContiguous;
5464                 else
5465                         RequestLuma = REQ_128BytesContiguous;
5466
5467                 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5468                         RequestChroma = REQ_256Bytes;
5469                 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5470                                 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5471                         RequestChroma = REQ_128BytesNonContiguous;
5472                 else
5473                         RequestChroma = REQ_128BytesContiguous;
5474
5475         } else if (!IsVertical(SourceRotation)) {
5476                 if (req128_horz_wc_l == 0)
5477                         RequestLuma = REQ_256Bytes;
5478                 else if (segment_order_horz_contiguous_luma == 0)
5479                         RequestLuma = REQ_128BytesNonContiguous;
5480                 else
5481                         RequestLuma = REQ_128BytesContiguous;
5482
5483                 if (req128_horz_wc_c == 0)
5484                         RequestChroma = REQ_256Bytes;
5485                 else if (segment_order_horz_contiguous_chroma == 0)
5486                         RequestChroma = REQ_128BytesNonContiguous;
5487                 else
5488                         RequestChroma = REQ_128BytesContiguous;
5489
5490         } else {
5491                 if (req128_vert_wc_l == 0)
5492                         RequestLuma = REQ_256Bytes;
5493                 else if (segment_order_vert_contiguous_luma == 0)
5494                         RequestLuma = REQ_128BytesNonContiguous;
5495                 else
5496                         RequestLuma = REQ_128BytesContiguous;
5497
5498                 if (req128_vert_wc_c == 0)
5499                         RequestChroma = REQ_256Bytes;
5500                 else if (segment_order_vert_contiguous_chroma == 0)
5501                         RequestChroma = REQ_128BytesNonContiguous;
5502                 else
5503                         RequestChroma = REQ_128BytesContiguous;
5504         }
5505
5506         if (RequestLuma == REQ_256Bytes) {
5507                 *MaxUncompressedBlockLuma = 256;
5508                 *MaxCompressedBlockLuma = 256;
5509                 *IndependentBlockLuma = 0;
5510         } else if (RequestLuma == REQ_128BytesContiguous) {
5511                 *MaxUncompressedBlockLuma = 256;
5512                 *MaxCompressedBlockLuma = 128;
5513                 *IndependentBlockLuma = 128;
5514         } else {
5515                 *MaxUncompressedBlockLuma = 256;
5516                 *MaxCompressedBlockLuma = 64;
5517                 *IndependentBlockLuma = 64;
5518         }
5519
5520         if (RequestChroma == REQ_256Bytes) {
5521                 *MaxUncompressedBlockChroma = 256;
5522                 *MaxCompressedBlockChroma = 256;
5523                 *IndependentBlockChroma = 0;
5524         } else if (RequestChroma == REQ_128BytesContiguous) {
5525                 *MaxUncompressedBlockChroma = 256;
5526                 *MaxCompressedBlockChroma = 128;
5527                 *IndependentBlockChroma = 128;
5528         } else {
5529                 *MaxUncompressedBlockChroma = 256;
5530                 *MaxCompressedBlockChroma = 64;
5531                 *IndependentBlockChroma = 64;
5532         }
5533
5534         if (DCCEnabled != true || BytePerPixelC == 0) {
5535                 *MaxUncompressedBlockChroma = 0;
5536                 *MaxCompressedBlockChroma = 0;
5537                 *IndependentBlockChroma = 0;
5538         }
5539
5540         if (DCCEnabled != true) {
5541                 *MaxUncompressedBlockLuma = 0;
5542                 *MaxCompressedBlockLuma = 0;
5543                 *IndependentBlockLuma = 0;
5544         }
5545
5546 #ifdef __DML_VBA_DEBUG__
5547         dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5548         dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5549         dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5550         dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5551         dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5552         dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5553 #endif
5554
5555 } // CalculateDCCConfiguration
5556
5557 void dml32_CalculateStutterEfficiency(
5558                 unsigned int      CompressedBufferSizeInkByte,
5559                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5560                 bool   UnboundedRequestEnabled,
5561                 unsigned int      MetaFIFOSizeInKEntries,
5562                 unsigned int      ZeroSizeBufferEntries,
5563                 unsigned int      PixelChunkSizeInKByte,
5564                 unsigned int   NumberOfActiveSurfaces,
5565                 unsigned int      ROBBufferSizeInKByte,
5566                 double    TotalDataReadBandwidth,
5567                 double    DCFCLK,
5568                 double    ReturnBW,
5569                 unsigned int      CompbufReservedSpace64B,
5570                 unsigned int      CompbufReservedSpaceZs,
5571                 double    SRExitTime,
5572                 double    SRExitZ8Time,
5573                 bool   SynchronizeTimingsFinal,
5574                 unsigned int   BlendingAndTiming[],
5575                 double    StutterEnterPlusExitWatermark,
5576                 double    Z8StutterEnterPlusExitWatermark,
5577                 bool   ProgressiveToInterlaceUnitInOPP,
5578                 bool   Interlace[],
5579                 double    MinTTUVBlank[],
5580                 unsigned int   DPPPerSurface[],
5581                 unsigned int      DETBufferSizeY[],
5582                 unsigned int   BytePerPixelY[],
5583                 double    BytePerPixelDETY[],
5584                 double      SwathWidthY[],
5585                 unsigned int   SwathHeightY[],
5586                 unsigned int   SwathHeightC[],
5587                 double    NetDCCRateLuma[],
5588                 double    NetDCCRateChroma[],
5589                 double    DCCFractionOfZeroSizeRequestsLuma[],
5590                 double    DCCFractionOfZeroSizeRequestsChroma[],
5591                 unsigned int      HTotal[],
5592                 unsigned int      VTotal[],
5593                 double    PixelClock[],
5594                 double    VRatio[],
5595                 enum dm_rotation_angle SourceRotation[],
5596                 unsigned int   BlockHeight256BytesY[],
5597                 unsigned int   BlockWidth256BytesY[],
5598                 unsigned int   BlockHeight256BytesC[],
5599                 unsigned int   BlockWidth256BytesC[],
5600                 unsigned int   DCCYMaxUncompressedBlock[],
5601                 unsigned int   DCCCMaxUncompressedBlock[],
5602                 unsigned int      VActive[],
5603                 bool   DCCEnable[],
5604                 bool   WritebackEnable[],
5605                 double    ReadBandwidthSurfaceLuma[],
5606                 double    ReadBandwidthSurfaceChroma[],
5607                 double    meta_row_bw[],
5608                 double    dpte_row_bw[],
5609
5610                 /* Output */
5611                 double   *StutterEfficiencyNotIncludingVBlank,
5612                 double   *StutterEfficiency,
5613                 unsigned int     *NumberOfStutterBurstsPerFrame,
5614                 double   *Z8StutterEfficiencyNotIncludingVBlank,
5615                 double   *Z8StutterEfficiency,
5616                 unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5617                 double   *StutterPeriod,
5618                 bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5619 {
5620
5621         bool FoundCriticalSurface = false;
5622         unsigned int SwathSizeCriticalSurface = 0;
5623         unsigned int LastChunkOfSwathSize;
5624         unsigned int MissingPartOfLastSwathOfDETSize;
5625         double LastZ8StutterPeriod = 0.0;
5626         double LastStutterPeriod = 0.0;
5627         unsigned int TotalNumberOfActiveOTG = 0;
5628         double doublePixelClock;
5629         unsigned int doubleHTotal;
5630         unsigned int doubleVTotal;
5631         bool SameTiming = true;
5632         double DETBufferingTimeY;
5633         double SwathWidthYCriticalSurface = 0.0;
5634         double SwathHeightYCriticalSurface = 0.0;
5635         double VActiveTimeCriticalSurface = 0.0;
5636         double FrameTimeCriticalSurface = 0.0;
5637         unsigned int BytePerPixelYCriticalSurface = 0;
5638         double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5639         unsigned int DETBufferSizeYCriticalSurface = 0;
5640         double MinTTUVBlankCriticalSurface = 0.0;
5641         unsigned int BlockWidth256BytesYCriticalSurface = 0;
5642         bool doublePlaneCriticalSurface = 0;
5643         bool doublePipeCriticalSurface = 0;
5644         double TotalCompressedReadBandwidth;
5645         double TotalRowReadBandwidth;
5646         double AverageDCCCompressionRate;
5647         double EffectiveCompressedBufferSize;
5648         double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5649         double StutterBurstTime;
5650         unsigned int TotalActiveWriteback;
5651         double LinesInDETY;
5652         double LinesInDETYRoundedDownToSwath;
5653         double MaximumEffectiveCompressionLuma;
5654         double MaximumEffectiveCompressionChroma;
5655         double TotalZeroSizeRequestReadBandwidth;
5656         double TotalZeroSizeCompressedReadBandwidth;
5657         double AverageDCCZeroSizeFraction;
5658         double AverageZeroSizeCompressionRate;
5659         unsigned int k;
5660
5661         TotalZeroSizeRequestReadBandwidth = 0;
5662         TotalZeroSizeCompressedReadBandwidth = 0;
5663         TotalRowReadBandwidth = 0;
5664         TotalCompressedReadBandwidth = 0;
5665
5666         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5667                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5668                         if (DCCEnable[k] == true) {
5669                                 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5670                                                 || (!IsVertical(SourceRotation[k])
5671                                                                 && BlockHeight256BytesY[k] > SwathHeightY[k])
5672                                                 || DCCYMaxUncompressedBlock[k] < 256) {
5673                                         MaximumEffectiveCompressionLuma = 2;
5674                                 } else {
5675                                         MaximumEffectiveCompressionLuma = 4;
5676                                 }
5677                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5678                                                 + ReadBandwidthSurfaceLuma[k]
5679                                                                 / dml_min(NetDCCRateLuma[k],
5680                                                                                 MaximumEffectiveCompressionLuma);
5681 #ifdef __DML_VBA_DEBUG__
5682                                 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5683                                                 __func__, k, ReadBandwidthSurfaceLuma[k]);
5684                                 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5685                                                 __func__, k, NetDCCRateLuma[k]);
5686                                 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5687                                                 __func__, k, MaximumEffectiveCompressionLuma);
5688 #endif
5689                                 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5690                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5691                                 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5692                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5693                                                                 / MaximumEffectiveCompressionLuma;
5694
5695                                 if (ReadBandwidthSurfaceChroma[k] > 0) {
5696                                         if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5697                                                         || (!IsVertical(SourceRotation[k])
5698                                                                         && BlockHeight256BytesC[k] > SwathHeightC[k])
5699                                                         || DCCCMaxUncompressedBlock[k] < 256) {
5700                                                 MaximumEffectiveCompressionChroma = 2;
5701                                         } else {
5702                                                 MaximumEffectiveCompressionChroma = 4;
5703                                         }
5704                                         TotalCompressedReadBandwidth =
5705                                                         TotalCompressedReadBandwidth
5706                                                         + ReadBandwidthSurfaceChroma[k]
5707                                                         / dml_min(NetDCCRateChroma[k],
5708                                                         MaximumEffectiveCompressionChroma);
5709 #ifdef __DML_VBA_DEBUG__
5710                                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5711                                                         __func__, k, ReadBandwidthSurfaceChroma[k]);
5712                                         dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5713                                                         __func__, k, NetDCCRateChroma[k]);
5714                                         dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5715                                                         __func__, k, MaximumEffectiveCompressionChroma);
5716 #endif
5717                                         TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5718                                                         + ReadBandwidthSurfaceChroma[k]
5719                                                                         * DCCFractionOfZeroSizeRequestsChroma[k];
5720                                         TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5721                                                         + ReadBandwidthSurfaceChroma[k]
5722                                                                         * DCCFractionOfZeroSizeRequestsChroma[k]
5723                                                                         / MaximumEffectiveCompressionChroma;
5724                                 }
5725                         } else {
5726                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5727                                                 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5728                         }
5729                         TotalRowReadBandwidth = TotalRowReadBandwidth
5730                                         + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5731                 }
5732         }
5733
5734         AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5735         AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5736
5737 #ifdef __DML_VBA_DEBUG__
5738         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5739         dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5740         dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5741         dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5742                         __func__, TotalZeroSizeCompressedReadBandwidth);
5743         dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5744         dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5745         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5746         dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5747         dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5748         dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5749         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5750 #endif
5751         if (AverageDCCZeroSizeFraction == 1) {
5752                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5753                                 / TotalZeroSizeCompressedReadBandwidth;
5754                 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5755                                 * AverageZeroSizeCompressionRate
5756                                 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5757                                                 * AverageZeroSizeCompressionRate;
5758         } else if (AverageDCCZeroSizeFraction > 0) {
5759                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5760                                 / TotalZeroSizeCompressedReadBandwidth;
5761                 EffectiveCompressedBufferSize = dml_min(
5762                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5763                                 (double) MetaFIFOSizeInKEntries * 1024 * 64
5764                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5765                                         + 1 / AverageDCCCompressionRate))
5766                                         + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5767                                         * AverageDCCCompressionRate,
5768                                         ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5769                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5770
5771 #ifdef __DML_VBA_DEBUG__
5772                 dml_print("DML::%s: min 1 = %f\n", __func__,
5773                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5774                 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5775                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5776                                                 AverageDCCCompressionRate));
5777                 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5778                                 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5779                 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5780                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5781 #endif
5782         } else {
5783                 EffectiveCompressedBufferSize = dml_min(
5784                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5785                                 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5786                                 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5787                                                 * AverageDCCCompressionRate;
5788
5789 #ifdef __DML_VBA_DEBUG__
5790                 dml_print("DML::%s: min 1 = %f\n", __func__,
5791                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5792                 dml_print("DML::%s: min 2 = %f\n", __func__,
5793                                 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5794 #endif
5795         }
5796
5797 #ifdef __DML_VBA_DEBUG__
5798         dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5799         dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5800         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5801 #endif
5802
5803         *StutterPeriod = 0;
5804
5805         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5806                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5807                         LinesInDETY = ((double) DETBufferSizeY[k]
5808                                         + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5809                                                         * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5810                                         / BytePerPixelDETY[k] / SwathWidthY[k];
5811                         LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5812                         DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5813                                         / VRatio[k];
5814 #ifdef __DML_VBA_DEBUG__
5815                         dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5816                         dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5817                         dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5818                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5819                                         __func__, k, ReadBandwidthSurfaceLuma[k]);
5820                         dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5821                         dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5822                         dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5823                                         __func__, k, LinesInDETYRoundedDownToSwath);
5824                         dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5825                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5826                         dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5827                         dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5828                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5829 #endif
5830
5831                         if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5832                                 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5833
5834                                 FoundCriticalSurface = true;
5835                                 *StutterPeriod = DETBufferingTimeY;
5836                                 FrameTimeCriticalSurface = (
5837                                                 isInterlaceTiming ?
5838                                                                 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5839                                                 * (double) HTotal[k] / PixelClock[k];
5840                                 VActiveTimeCriticalSurface = (
5841                                                 isInterlaceTiming ?
5842                                                                 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5843                                                 * (double) HTotal[k] / PixelClock[k];
5844                                 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5845                                 SwathWidthYCriticalSurface = SwathWidthY[k];
5846                                 SwathHeightYCriticalSurface = SwathHeightY[k];
5847                                 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5848                                 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5849                                                 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5850                                 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5851                                 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5852                                 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5853                                 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5854
5855 #ifdef __DML_VBA_DEBUG__
5856                                 dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5857                                                 __func__, k, FoundCriticalSurface);
5858                                 dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5859                                                 __func__, k, *StutterPeriod);
5860                                 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5861                                                 __func__, k, MinTTUVBlankCriticalSurface);
5862                                 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5863                                                 __func__, k, FrameTimeCriticalSurface);
5864                                 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5865                                                 __func__, k, VActiveTimeCriticalSurface);
5866                                 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5867                                                 __func__, k, BytePerPixelYCriticalSurface);
5868                                 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5869                                                 __func__, k, SwathWidthYCriticalSurface);
5870                                 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5871                                                 __func__, k, SwathHeightYCriticalSurface);
5872                                 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5873                                                 __func__, k, BlockWidth256BytesYCriticalSurface);
5874                                 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5875                                                 __func__, k, doublePlaneCriticalSurface);
5876                                 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5877                                                 __func__, k, doublePipeCriticalSurface);
5878                                 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5879                                                 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5880 #endif
5881                         }
5882                 }
5883         }
5884
5885         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5886                         EffectiveCompressedBufferSize);
5887 #ifdef __DML_VBA_DEBUG__
5888         dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5889         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5890         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5891                         __func__, *StutterPeriod * TotalDataReadBandwidth);
5892         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5893         dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5894                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5895         dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5896         dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5897         dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5898         dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5899 #endif
5900
5901         StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5902                         / ReturnBW
5903                         + (*StutterPeriod * TotalDataReadBandwidth
5904                                         - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5905                         + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5906 #ifdef __DML_VBA_DEBUG__
5907         dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5908                         AverageDCCCompressionRate / ReturnBW);
5909         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5910                         __func__, (*StutterPeriod * TotalDataReadBandwidth));
5911         dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5912                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5913         dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5914         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5915 #endif
5916         StutterBurstTime = dml_max(StutterBurstTime,
5917                         LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5918                                         * SwathWidthYCriticalSurface / ReturnBW);
5919
5920 #ifdef __DML_VBA_DEBUG__
5921         dml_print("DML::%s: Time to finish residue swath=%f\n",
5922                         __func__,
5923                         LinesToFinishSwathTransferStutterCriticalSurface *
5924                         BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5925 #endif
5926
5927         TotalActiveWriteback = 0;
5928         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5929                 if (WritebackEnable[k])
5930                         TotalActiveWriteback = TotalActiveWriteback + 1;
5931         }
5932
5933         if (TotalActiveWriteback == 0) {
5934 #ifdef __DML_VBA_DEBUG__
5935                 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5936                 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5937                 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5938                 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5939 #endif
5940                 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5941                                 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5942                 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5943                                 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5944                 *NumberOfStutterBurstsPerFrame = (
5945                                 *StutterEfficiencyNotIncludingVBlank > 0 ?
5946                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5947                 *Z8NumberOfStutterBurstsPerFrame = (
5948                                 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5949                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5950         } else {
5951                 *StutterEfficiencyNotIncludingVBlank = 0.;
5952                 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5953                 *NumberOfStutterBurstsPerFrame = 0;
5954                 *Z8NumberOfStutterBurstsPerFrame = 0;
5955         }
5956 #ifdef __DML_VBA_DEBUG__
5957         dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5958         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5959                         __func__, *StutterEfficiencyNotIncludingVBlank);
5960         dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5961                         __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5962         dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5963         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5964 #endif
5965
5966         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5967                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5968                         if (BlendingAndTiming[k] == k) {
5969                                 if (TotalNumberOfActiveOTG == 0) {
5970                                         doublePixelClock = PixelClock[k];
5971                                         doubleHTotal = HTotal[k];
5972                                         doubleVTotal = VTotal[k];
5973                                 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5974                                                 || doubleVTotal != VTotal[k]) {
5975                                         SameTiming = false;
5976                                 }
5977                                 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5978                         }
5979                 }
5980         }
5981
5982         if (*StutterEfficiencyNotIncludingVBlank > 0) {
5983                 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5984
5985                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5986                                 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5987                         *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5988                                                 + StutterBurstTime * VActiveTimeCriticalSurface
5989                                                 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5990                 } else {
5991                         *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5992                 }
5993         } else {
5994                 *StutterEfficiency = 0;
5995         }
5996
5997         if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5998                 LastZ8StutterPeriod = VActiveTimeCriticalSurface
5999                                 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6000                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6001                                 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6002                         *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6003                                 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6004                 } else {
6005                         *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6006                 }
6007         } else {
6008                 *Z8StutterEfficiency = 0.;
6009         }
6010
6011 #ifdef __DML_VBA_DEBUG__
6012         dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6013         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6014         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6015         dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6016         dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6017         dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6018         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6019                         __func__, *StutterEfficiencyNotIncludingVBlank);
6020         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6021 #endif
6022
6023         SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6024                         * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6025         LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6026         MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6027                         - DETBufferSizeYCriticalSurface;
6028
6029         *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6030                         && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6031                         && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6032                         && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6033
6034 #ifdef __DML_VBA_DEBUG__
6035         dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6036         dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6037         dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6038         dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6039 #endif
6040 } // CalculateStutterEfficiency
6041
6042 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6043                 unsigned int    ConfigReturnBufferSizeInKByte,
6044                 unsigned int    ROBBufferSizeInKByte,
6045                 unsigned int MaxNumDPP,
6046                 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6047                 unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6048
6049                 /* Output */
6050                 unsigned int *MaxTotalDETInKByte,
6051                 unsigned int *nomDETInKByte,
6052                 unsigned int *MinCompressedBufferSizeInKByte)
6053 {
6054         bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6055         unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6056
6057         *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6058                         (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6059         *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6060         *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6061
6062 #ifdef __DML_VBA_DEBUG__
6063         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6064         dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6065         dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6066         dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6067         dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6068         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6069 #endif
6070
6071         if (det_buff_size_override_en) {
6072                 *nomDETInKByte = det_buff_size_override_val;
6073 #ifdef __DML_VBA_DEBUG__
6074                 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6075 #endif
6076         }
6077 } // CalculateMaxDETAndMinCompressedBufferSize
6078
6079 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6080                 double ReturnBW,
6081                 bool NotUrgentLatencyHiding[],
6082                 double ReadBandwidthLuma[],
6083                 double ReadBandwidthChroma[],
6084                 double cursor_bw[],
6085                 double meta_row_bandwidth[],
6086                 double dpte_row_bandwidth[],
6087                 unsigned int NumberOfDPP[],
6088                 double UrgentBurstFactorLuma[],
6089                 double UrgentBurstFactorChroma[],
6090                 double UrgentBurstFactorCursor[])
6091 {
6092         unsigned int k;
6093         bool NotEnoughUrgentLatencyHiding = false;
6094         bool CalculateVActiveBandwithSupport_val = false;
6095         double VActiveBandwith = 0;
6096
6097         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6098                 if (NotUrgentLatencyHiding[k]) {
6099                         NotEnoughUrgentLatencyHiding = true;
6100                 }
6101         }
6102
6103         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6104                 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6105         }
6106
6107         CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6108
6109 #ifdef __DML_VBA_DEBUG__
6110 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6111 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6112 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6113 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6114 #endif
6115         return CalculateVActiveBandwithSupport_val;
6116 }
6117
6118 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6119                 double ReturnBW,
6120                 bool NotUrgentLatencyHiding[],
6121                 double ReadBandwidthLuma[],
6122                 double ReadBandwidthChroma[],
6123                 double PrefetchBandwidthLuma[],
6124                 double PrefetchBandwidthChroma[],
6125                 double cursor_bw[],
6126                 double meta_row_bandwidth[],
6127                 double dpte_row_bandwidth[],
6128                 double cursor_bw_pre[],
6129                 double prefetch_vmrow_bw[],
6130                 unsigned int NumberOfDPP[],
6131                 double UrgentBurstFactorLuma[],
6132                 double UrgentBurstFactorChroma[],
6133                 double UrgentBurstFactorCursor[],
6134                 double UrgentBurstFactorLumaPre[],
6135                 double UrgentBurstFactorChromaPre[],
6136                 double UrgentBurstFactorCursorPre[],
6137
6138                 /* output */
6139                 double  *PrefetchBandwidth,
6140                 double  *FractionOfUrgentBandwidth,
6141                 bool *PrefetchBandwidthSupport)
6142 {
6143         unsigned int k;
6144         bool NotEnoughUrgentLatencyHiding = false;
6145         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6146                 if (NotUrgentLatencyHiding[k]) {
6147                         NotEnoughUrgentLatencyHiding = true;
6148                 }
6149         }
6150
6151         *PrefetchBandwidth = 0;
6152         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6153                 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6154                                 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6155                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6156         }
6157
6158         *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6159         *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6160 }
6161
6162 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6163                 double ReturnBW,
6164                 double ReadBandwidthLuma[],
6165                 double ReadBandwidthChroma[],
6166                 double PrefetchBandwidthLuma[],
6167                 double PrefetchBandwidthChroma[],
6168                 double cursor_bw[],
6169                 double cursor_bw_pre[],
6170                 unsigned int NumberOfDPP[],
6171                 double UrgentBurstFactorLuma[],
6172                 double UrgentBurstFactorChroma[],
6173                 double UrgentBurstFactorCursor[],
6174                 double UrgentBurstFactorLumaPre[],
6175                 double UrgentBurstFactorChromaPre[],
6176                 double UrgentBurstFactorCursorPre[])
6177 {
6178         unsigned int k;
6179         double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6180
6181         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6182                 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6183                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6184         }
6185
6186         return CalculateBandwidthAvailableForImmediateFlip_val;
6187 }
6188
6189 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6190                 double ReturnBW,
6191                 enum immediate_flip_requirement ImmediateFlipRequirement[],
6192                 double final_flip_bw[],
6193                 double ReadBandwidthLuma[],
6194                 double ReadBandwidthChroma[],
6195                 double PrefetchBandwidthLuma[],
6196                 double PrefetchBandwidthChroma[],
6197                 double cursor_bw[],
6198                 double meta_row_bandwidth[],
6199                 double dpte_row_bandwidth[],
6200                 double cursor_bw_pre[],
6201                 double prefetch_vmrow_bw[],
6202                 unsigned int NumberOfDPP[],
6203                 double UrgentBurstFactorLuma[],
6204                 double UrgentBurstFactorChroma[],
6205                 double UrgentBurstFactorCursor[],
6206                 double UrgentBurstFactorLumaPre[],
6207                 double UrgentBurstFactorChromaPre[],
6208                 double UrgentBurstFactorCursorPre[],
6209
6210                 /* output */
6211                 double  *TotalBandwidth,
6212                 double  *FractionOfUrgentBandwidth,
6213                 bool *ImmediateFlipBandwidthSupport)
6214 {
6215         unsigned int k;
6216         *TotalBandwidth = 0;
6217         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6218                 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6219                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6220                                         NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6221                                         NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6222                 } else {
6223                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6224                                         NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6225                                         NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6226                 }
6227         }
6228         *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6229         *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6230 }