drm/amd/display: Remove unused dml32_CalculatedoublePipeDPPCLKAndSCLThroughput function
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / amd / display / dc / dml / dcn32 / display_mode_vba_util_32.c
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29
30 unsigned int dml32_dscceComputeDelay(
31                 unsigned int bpc,
32                 double BPP,
33                 unsigned int sliceWidth,
34                 unsigned int numSlices,
35                 enum output_format_class pixelFormat,
36                 enum output_encoder_class Output)
37 {
38         // valid bpc         = source bits per component in the set of {8, 10, 12}
39         // valid bpp         = increments of 1/16 of a bit
40         //                    min = 6/7/8 in N420/N422/444, respectively
41         //                    max = such that compression is 1:1
42         //valid sliceWidth  = number of pixels per slice line,
43         //      must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
44         //valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
45         //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
46
47         // fixed value
48         unsigned int rcModelSize = 8192;
49
50         // N422/N420 operate at 2 pixels per clock
51         unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
52         Delay, pixels;
53
54         if (pixelFormat == dm_420)
55                 pixelsPerClock = 2;
56         else if (pixelFormat == dm_n422)
57                 pixelsPerClock = 2;
58         // #all other modes operate at 1 pixel per clock
59         else
60                 pixelsPerClock = 1;
61
62         //initial transmit delay as per PPS
63         initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
64
65         //compute ssm delay
66         if (bpc == 8)
67                 D = 81;
68         else if (bpc == 10)
69                 D = 89;
70         else
71                 D = 113;
72
73         //divide by pixel per cycle to compute slice width as seen by DSC
74         w = sliceWidth / pixelsPerClock;
75
76         //422 mode has an additional cycle of delay
77         if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
78                 s = 0;
79         else
80                 s = 1;
81
82         //main calculation for the dscce
83         ix = initalXmitDelay + 45;
84         wx = (w + 2) / 3;
85         p = 3 * wx - w;
86         l0 = ix / w;
87         a = ix + p * l0;
88         ax = (a + 2) / 3 + D + 6 + 1;
89         L = (ax + wx - 1) / wx;
90         if ((ix % w) == 0 && p != 0)
91                 lstall = 1;
92         else
93                 lstall = 0;
94         Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
95
96         //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
97         pixels = Delay * 3 * pixelsPerClock;
98
99 #ifdef __DML_VBA_DEBUG__
100         dml_print("DML::%s: bpc: %d\n", __func__, bpc);
101         dml_print("DML::%s: BPP: %f\n", __func__, BPP);
102         dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
103         dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
104         dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
105         dml_print("DML::%s: Output: %d\n", __func__, Output);
106         dml_print("DML::%s: pixels: %d\n", __func__, pixels);
107 #endif
108
109         return pixels;
110 }
111
112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
113 {
114         unsigned int Delay = 0;
115
116         if (pixelFormat == dm_420) {
117                 //   sfr
118                 Delay = Delay + 2;
119                 //   dsccif
120                 Delay = Delay + 0;
121                 //   dscc - input deserializer
122                 Delay = Delay + 3;
123                 //   dscc gets pixels every other cycle
124                 Delay = Delay + 2;
125                 //   dscc - input cdc fifo
126                 Delay = Delay + 12;
127                 //   dscc gets pixels every other cycle
128                 Delay = Delay + 13;
129                 //   dscc - cdc uncertainty
130                 Delay = Delay + 2;
131                 //   dscc - output cdc fifo
132                 Delay = Delay + 7;
133                 //   dscc gets pixels every other cycle
134                 Delay = Delay + 3;
135                 //   dscc - cdc uncertainty
136                 Delay = Delay + 2;
137                 //   dscc - output serializer
138                 Delay = Delay + 1;
139                 //   sft
140                 Delay = Delay + 1;
141         } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
142                 //   sfr
143                 Delay = Delay + 2;
144                 //   dsccif
145                 Delay = Delay + 1;
146                 //   dscc - input deserializer
147                 Delay = Delay + 5;
148                 //  dscc - input cdc fifo
149                 Delay = Delay + 25;
150                 //   dscc - cdc uncertainty
151                 Delay = Delay + 2;
152                 //   dscc - output cdc fifo
153                 Delay = Delay + 10;
154                 //   dscc - cdc uncertainty
155                 Delay = Delay + 2;
156                 //   dscc - output serializer
157                 Delay = Delay + 1;
158                 //   sft
159                 Delay = Delay + 1;
160         } else {
161                 //   sfr
162                 Delay = Delay + 2;
163                 //   dsccif
164                 Delay = Delay + 0;
165                 //   dscc - input deserializer
166                 Delay = Delay + 3;
167                 //   dscc - input cdc fifo
168                 Delay = Delay + 12;
169                 //   dscc - cdc uncertainty
170                 Delay = Delay + 2;
171                 //   dscc - output cdc fifo
172                 Delay = Delay + 7;
173                 //   dscc - output serializer
174                 Delay = Delay + 1;
175                 //   dscc - cdc uncertainty
176                 Delay = Delay + 2;
177                 //   sft
178                 Delay = Delay + 1;
179         }
180
181         return Delay;
182 }
183
184
185 bool IsVertical(enum dm_rotation_angle Scan)
186 {
187         bool is_vert = false;
188
189         if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
190                 is_vert = true;
191         else
192                 is_vert = false;
193         return is_vert;
194 }
195
196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
197                 double HRatio,
198                 double HRatioChroma,
199                 double VRatio,
200                 double VRatioChroma,
201                 double MaxDCHUBToPSCLThroughput,
202                 double MaxPSCLToLBThroughput,
203                 double PixelClock,
204                 enum source_format_class SourcePixelFormat,
205                 unsigned int HTaps,
206                 unsigned int HTapsChroma,
207                 unsigned int VTaps,
208                 unsigned int VTapsChroma,
209
210                 /* output */
211                 double *PSCL_THROUGHPUT,
212                 double *PSCL_THROUGHPUT_CHROMA,
213                 double *DPPCLKUsingSingleDPP)
214 {
215         double DPPCLKUsingSingleDPPLuma;
216         double DPPCLKUsingSingleDPPChroma;
217
218         if (HRatio > 1) {
219                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
220                                 dml_ceil((double) HTaps / 6.0, 1.0));
221         } else {
222                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
223         }
224
225         DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
226                         *PSCL_THROUGHPUT, 1);
227
228         if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
229                 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
230
231         if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
232                         SourcePixelFormat != dm_rgbe_alpha)) {
233                 *PSCL_THROUGHPUT_CHROMA = 0;
234                 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
235         } else {
236                 if (HRatioChroma > 1) {
237                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
238                                         HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
239                 } else {
240                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
241                 }
242                 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
243                                 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
244                 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
245                         DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
246                 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
247         }
248 }
249
250 void dml32_CalculateBytePerPixelAndBlockSizes(
251                 enum source_format_class SourcePixelFormat,
252                 enum dm_swizzle_mode SurfaceTiling,
253
254                 /* Output */
255                 unsigned int *BytePerPixelY,
256                 unsigned int *BytePerPixelC,
257                 double  *BytePerPixelDETY,
258                 double  *BytePerPixelDETC,
259                 unsigned int *BlockHeight256BytesY,
260                 unsigned int *BlockHeight256BytesC,
261                 unsigned int *BlockWidth256BytesY,
262                 unsigned int *BlockWidth256BytesC,
263                 unsigned int *MacroTileHeightY,
264                 unsigned int *MacroTileHeightC,
265                 unsigned int *MacroTileWidthY,
266                 unsigned int *MacroTileWidthC)
267 {
268         if (SourcePixelFormat == dm_444_64) {
269                 *BytePerPixelDETY = 8;
270                 *BytePerPixelDETC = 0;
271                 *BytePerPixelY = 8;
272                 *BytePerPixelC = 0;
273         } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
274                 *BytePerPixelDETY = 4;
275                 *BytePerPixelDETC = 0;
276                 *BytePerPixelY = 4;
277                 *BytePerPixelC = 0;
278         } else if (SourcePixelFormat == dm_444_16) {
279                 *BytePerPixelDETY = 2;
280                 *BytePerPixelDETC = 0;
281                 *BytePerPixelY = 2;
282                 *BytePerPixelC = 0;
283         } else if (SourcePixelFormat == dm_444_8) {
284                 *BytePerPixelDETY = 1;
285                 *BytePerPixelDETC = 0;
286                 *BytePerPixelY = 1;
287                 *BytePerPixelC = 0;
288         } else if (SourcePixelFormat == dm_rgbe_alpha) {
289                 *BytePerPixelDETY = 4;
290                 *BytePerPixelDETC = 1;
291                 *BytePerPixelY = 4;
292                 *BytePerPixelC = 1;
293         } else if (SourcePixelFormat == dm_420_8) {
294                 *BytePerPixelDETY = 1;
295                 *BytePerPixelDETC = 2;
296                 *BytePerPixelY = 1;
297                 *BytePerPixelC = 2;
298         } else if (SourcePixelFormat == dm_420_12) {
299                 *BytePerPixelDETY = 2;
300                 *BytePerPixelDETC = 4;
301                 *BytePerPixelY = 2;
302                 *BytePerPixelC = 4;
303         } else {
304                 *BytePerPixelDETY = 4.0 / 3;
305                 *BytePerPixelDETC = 8.0 / 3;
306                 *BytePerPixelY = 2;
307                 *BytePerPixelC = 4;
308         }
309 #ifdef __DML_VBA_DEBUG__
310         dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
311         dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
312         dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
313         dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
314         dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
315 #endif
316         if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
317                         || SourcePixelFormat == dm_444_16
318                         || SourcePixelFormat == dm_444_8
319                         || SourcePixelFormat == dm_mono_16
320                         || SourcePixelFormat == dm_mono_8
321                         || SourcePixelFormat == dm_rgbe)) {
322                 if (SurfaceTiling == dm_sw_linear)
323                         *BlockHeight256BytesY = 1;
324                 else if (SourcePixelFormat == dm_444_64)
325                         *BlockHeight256BytesY = 4;
326                 else if (SourcePixelFormat == dm_444_8)
327                         *BlockHeight256BytesY = 16;
328                 else
329                         *BlockHeight256BytesY = 8;
330
331                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
332                 *BlockHeight256BytesC = 0;
333                 *BlockWidth256BytesC = 0;
334         } else {
335                 if (SurfaceTiling == dm_sw_linear) {
336                         *BlockHeight256BytesY = 1;
337                         *BlockHeight256BytesC = 1;
338                 } else if (SourcePixelFormat == dm_rgbe_alpha) {
339                         *BlockHeight256BytesY = 8;
340                         *BlockHeight256BytesC = 16;
341                 } else if (SourcePixelFormat == dm_420_8) {
342                         *BlockHeight256BytesY = 16;
343                         *BlockHeight256BytesC = 8;
344                 } else {
345                         *BlockHeight256BytesY = 8;
346                         *BlockHeight256BytesC = 8;
347                 }
348                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
349                 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
350         }
351 #ifdef __DML_VBA_DEBUG__
352         dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
353         dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
354         dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
355         dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
356 #endif
357
358         if (SurfaceTiling == dm_sw_linear) {
359                 *MacroTileHeightY = *BlockHeight256BytesY;
360                 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
361                 *MacroTileHeightC = *BlockHeight256BytesC;
362                 if (*MacroTileHeightC == 0)
363                         *MacroTileWidthC = 0;
364                 else
365                         *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
366         } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
367                         SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
368                 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
369                 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
370                 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
371                 if (*MacroTileHeightC == 0)
372                         *MacroTileWidthC = 0;
373                 else
374                         *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
375         } else {
376                 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
377                 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
378                 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
379                 if (*MacroTileHeightC == 0)
380                         *MacroTileWidthC = 0;
381                 else
382                         *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
383         }
384
385 #ifdef __DML_VBA_DEBUG__
386         dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
387         dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
388         dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
389         dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
390 #endif
391 } // CalculateBytePerPixelAndBlockSizes
392
393 void dml32_CalculateSwathAndDETConfiguration(
394                 unsigned int DETSizeOverride[],
395                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
396                 unsigned int ConfigReturnBufferSizeInKByte,
397                 unsigned int MaxTotalDETInKByte,
398                 unsigned int MinCompressedBufferSizeInKByte,
399                 double ForceSingleDPP,
400                 unsigned int NumberOfActiveSurfaces,
401                 unsigned int nomDETInKByte,
402                 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
403                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
404                 enum output_encoder_class Output[],
405                 double ReadBandwidthLuma[],
406                 double ReadBandwidthChroma[],
407                 double MaximumSwathWidthLuma[],
408                 double MaximumSwathWidthChroma[],
409                 enum dm_rotation_angle SourceRotation[],
410                 bool ViewportStationary[],
411                 enum source_format_class SourcePixelFormat[],
412                 enum dm_swizzle_mode SurfaceTiling[],
413                 unsigned int ViewportWidth[],
414                 unsigned int ViewportHeight[],
415                 unsigned int ViewportXStart[],
416                 unsigned int ViewportYStart[],
417                 unsigned int ViewportXStartC[],
418                 unsigned int ViewportYStartC[],
419                 unsigned int SurfaceWidthY[],
420                 unsigned int SurfaceWidthC[],
421                 unsigned int SurfaceHeightY[],
422                 unsigned int SurfaceHeightC[],
423                 unsigned int Read256BytesBlockHeightY[],
424                 unsigned int Read256BytesBlockHeightC[],
425                 unsigned int Read256BytesBlockWidthY[],
426                 unsigned int Read256BytesBlockWidthC[],
427                 enum odm_combine_mode ODMMode[],
428                 unsigned int BlendingAndTiming[],
429                 unsigned int BytePerPixY[],
430                 unsigned int BytePerPixC[],
431                 double BytePerPixDETY[],
432                 double BytePerPixDETC[],
433                 unsigned int HActive[],
434                 double HRatio[],
435                 double HRatioChroma[],
436                 unsigned int DPPPerSurface[],
437
438                 /* Output */
439                 unsigned int swath_width_luma_ub[],
440                 unsigned int swath_width_chroma_ub[],
441                 double SwathWidth[],
442                 double SwathWidthChroma[],
443                 unsigned int SwathHeightY[],
444                 unsigned int SwathHeightC[],
445                 unsigned int DETBufferSizeInKByte[],
446                 unsigned int DETBufferSizeY[],
447                 unsigned int DETBufferSizeC[],
448                 bool *UnboundedRequestEnabled,
449                 unsigned int *CompressedBufferSizeInkByte,
450                 bool ViewportSizeSupportPerSurface[],
451                 bool *ViewportSizeSupport)
452 {
453         unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
454         unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
455         unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
456         unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
457         unsigned int RoundedUpSwathSizeBytesY;
458         unsigned int RoundedUpSwathSizeBytesC;
459         double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
460         double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
461         unsigned int k;
462         unsigned int TotalActiveDPP = 0;
463         bool NoChromaSurfaces = true;
464         unsigned int DETBufferSizeInKByteForSwathCalculation;
465
466 #ifdef __DML_VBA_DEBUG__
467         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
468 #endif
469         dml32_CalculateSwathWidth(ForceSingleDPP,
470                         NumberOfActiveSurfaces,
471                         SourcePixelFormat,
472                         SourceRotation,
473                         ViewportStationary,
474                         ViewportWidth,
475                         ViewportHeight,
476                         ViewportXStart,
477                         ViewportYStart,
478                         ViewportXStartC,
479                         ViewportYStartC,
480                         SurfaceWidthY,
481                         SurfaceWidthC,
482                         SurfaceHeightY,
483                         SurfaceHeightC,
484                         ODMMode,
485                         BytePerPixY,
486                         BytePerPixC,
487                         Read256BytesBlockHeightY,
488                         Read256BytesBlockHeightC,
489                         Read256BytesBlockWidthY,
490                         Read256BytesBlockWidthC,
491                         BlendingAndTiming,
492                         HActive,
493                         HRatio,
494                         DPPPerSurface,
495
496                         /* Output */
497                         SwathWidthdoubleDPP,
498                         SwathWidthdoubleDPPChroma,
499                         SwathWidth,
500                         SwathWidthChroma,
501                         MaximumSwathHeightY,
502                         MaximumSwathHeightC,
503                         swath_width_luma_ub,
504                         swath_width_chroma_ub);
505
506         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
507                 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
508                 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
509 #ifdef __DML_VBA_DEBUG__
510                 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
511                 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
512                 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
513                 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
514                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
515                                 RoundedUpMaxSwathSizeBytesY[k]);
516                 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
517                 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
518                 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
519                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
520                                 RoundedUpMaxSwathSizeBytesC[k]);
521 #endif
522
523                 if (SourcePixelFormat[k] == dm_420_10) {
524                         RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
525                         RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
526                 }
527         }
528
529         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
530                 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
531                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
532                                 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
533                         NoChromaSurfaces = false;
534                 }
535         }
536
537         *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP,
538                         NoChromaSurfaces, Output[0]);
539
540         dml32_CalculateDETBufferSize(DETSizeOverride,
541                         UseMALLForPStateChange,
542                         ForceSingleDPP,
543                         NumberOfActiveSurfaces,
544                         *UnboundedRequestEnabled,
545                         nomDETInKByte,
546                         MaxTotalDETInKByte,
547                         ConfigReturnBufferSizeInKByte,
548                         MinCompressedBufferSizeInKByte,
549                         CompressedBufferSegmentSizeInkByteFinal,
550                         SourcePixelFormat,
551                         ReadBandwidthLuma,
552                         ReadBandwidthChroma,
553                         RoundedUpMaxSwathSizeBytesY,
554                         RoundedUpMaxSwathSizeBytesC,
555                         DPPPerSurface,
556
557                         /* Output */
558                         DETBufferSizeInKByte,    // per hubp pipe
559                         CompressedBufferSizeInkByte);
560
561 #ifdef __DML_VBA_DEBUG__
562         dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
563         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
564         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
565         dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
566         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
567         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
568 #endif
569
570         *ViewportSizeSupport = true;
571         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
572
573                 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
574                                 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
575 #ifdef __DML_VBA_DEBUG__
576                 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
577                                 DETBufferSizeInKByteForSwathCalculation);
578 #endif
579
580                 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
581                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
582                         SwathHeightY[k] = MaximumSwathHeightY[k];
583                         SwathHeightC[k] = MaximumSwathHeightC[k];
584                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
585                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
586                 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
587                                 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
588                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
589                         SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
590                         SwathHeightC[k] = MaximumSwathHeightC[k];
591                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
592                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
593                 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
594                                 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
595                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
596                         SwathHeightY[k] = MaximumSwathHeightY[k];
597                         SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
598                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
599                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
600                 } else {
601                         SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
602                         SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
603                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
604                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
605                 }
606
607                 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
608                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
609                                 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
610                                                 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
611                         *ViewportSizeSupport = false;
612                         ViewportSizeSupportPerSurface[k] = false;
613                 } else {
614                         ViewportSizeSupportPerSurface[k] = true;
615                 }
616
617                 if (SwathHeightC[k] == 0) {
618 #ifdef __DML_VBA_DEBUG__
619                         dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
620 #endif
621                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
622                         DETBufferSizeC[k] = 0;
623                 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
624 #ifdef __DML_VBA_DEBUG__
625                         dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
626 #endif
627                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
628                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
629                 } else {
630 #ifdef __DML_VBA_DEBUG__
631                         dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
632 #endif
633                         DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
634                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
635                 }
636
637 #ifdef __DML_VBA_DEBUG__
638                 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
639                 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
640                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
641                                 k, RoundedUpMaxSwathSizeBytesY[k]);
642                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
643                                 k, RoundedUpMaxSwathSizeBytesC[k]);
644                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
645                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
646                 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
647                 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
648                 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
649                 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
650                                 ViewportSizeSupportPerSurface[k]);
651 #endif
652
653         }
654 } // CalculateSwathAndDETConfiguration
655
656 void dml32_CalculateSwathWidth(
657                 bool                            ForceSingleDPP,
658                 unsigned int                    NumberOfActiveSurfaces,
659                 enum source_format_class        SourcePixelFormat[],
660                 enum dm_rotation_angle          SourceRotation[],
661                 bool                            ViewportStationary[],
662                 unsigned int                    ViewportWidth[],
663                 unsigned int                    ViewportHeight[],
664                 unsigned int                    ViewportXStart[],
665                 unsigned int                    ViewportYStart[],
666                 unsigned int                    ViewportXStartC[],
667                 unsigned int                    ViewportYStartC[],
668                 unsigned int                    SurfaceWidthY[],
669                 unsigned int                    SurfaceWidthC[],
670                 unsigned int                    SurfaceHeightY[],
671                 unsigned int                    SurfaceHeightC[],
672                 enum odm_combine_mode           ODMMode[],
673                 unsigned int                    BytePerPixY[],
674                 unsigned int                    BytePerPixC[],
675                 unsigned int                    Read256BytesBlockHeightY[],
676                 unsigned int                    Read256BytesBlockHeightC[],
677                 unsigned int                    Read256BytesBlockWidthY[],
678                 unsigned int                    Read256BytesBlockWidthC[],
679                 unsigned int                    BlendingAndTiming[],
680                 unsigned int                    HActive[],
681                 double                          HRatio[],
682                 unsigned int                    DPPPerSurface[],
683
684                 /* Output */
685                 double                          SwathWidthdoubleDPPY[],
686                 double                          SwathWidthdoubleDPPC[],
687                 double                          SwathWidthY[], // per-pipe
688                 double                          SwathWidthC[], // per-pipe
689                 unsigned int                    MaximumSwathHeightY[],
690                 unsigned int                    MaximumSwathHeightC[],
691                 unsigned int                    swath_width_luma_ub[], // per-pipe
692                 unsigned int                    swath_width_chroma_ub[]) // per-pipe
693 {
694         unsigned int k, j;
695         enum odm_combine_mode MainSurfaceODMMode;
696
697     unsigned int surface_width_ub_l;
698     unsigned int surface_height_ub_l;
699     unsigned int surface_width_ub_c;
700     unsigned int surface_height_ub_c;
701
702 #ifdef __DML_VBA_DEBUG__
703         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
704         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
705 #endif
706
707         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
708                 if (!IsVertical(SourceRotation[k]))
709                         SwathWidthdoubleDPPY[k] = ViewportWidth[k];
710                 else
711                         SwathWidthdoubleDPPY[k] = ViewportHeight[k];
712
713 #ifdef __DML_VBA_DEBUG__
714                 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
715                 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
716 #endif
717
718                 MainSurfaceODMMode = ODMMode[k];
719                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
720                         if (BlendingAndTiming[k] == j)
721                                 MainSurfaceODMMode = ODMMode[j];
722                 }
723
724                 if (ForceSingleDPP) {
725                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
726                 } else {
727                         if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
728                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
729                                                 dml_round(HActive[k] / 4.0 * HRatio[k]));
730                         } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
731                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
732                                                 dml_round(HActive[k] / 2.0 * HRatio[k]));
733                         } else if (DPPPerSurface[k] == 2) {
734                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
735                         } else {
736                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
737                         }
738                 }
739
740 #ifdef __DML_VBA_DEBUG__
741                 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
742                 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
743                 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
744                 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
745                 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
746 #endif
747
748                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
749                                 SourcePixelFormat[k] == dm_420_12) {
750                         SwathWidthC[k] = SwathWidthY[k] / 2;
751                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
752                 } else {
753                         SwathWidthC[k] = SwathWidthY[k];
754                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
755                 }
756
757                 if (ForceSingleDPP == true) {
758                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
759                         SwathWidthC[k] = SwathWidthdoubleDPPC[k];
760                 }
761
762                 surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
763                 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
764                 surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
765                 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
766
767 #ifdef __DML_VBA_DEBUG__
768                 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
769                 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
770                 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
771                 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
772                 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
773                 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
774                 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
775                 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
776                 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
777                 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
778 #endif
779
780                 if (!IsVertical(SourceRotation[k])) {
781                         MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
782                         MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
783                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
784                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
785                                                 dml_floor(ViewportXStart[k] +
786                                                                 SwathWidthY[k] +
787                                                                 Read256BytesBlockWidthY[k] - 1,
788                                                                 Read256BytesBlockWidthY[k]) -
789                                                                 dml_floor(ViewportXStart[k],
790                                                                 Read256BytesBlockWidthY[k]));
791                         } else {
792                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
793                                                 dml_ceil(SwathWidthY[k] - 1,
794                                                                 Read256BytesBlockWidthY[k]) +
795                                                                 Read256BytesBlockWidthY[k]);
796                         }
797                         if (BytePerPixC[k] > 0) {
798                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
799                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
800                                                         dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
801                                                                         Read256BytesBlockWidthC[k] - 1,
802                                                                         Read256BytesBlockWidthC[k]) -
803                                                                         dml_floor(ViewportXStartC[k],
804                                                                         Read256BytesBlockWidthC[k]));
805                                 } else {
806                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
807                                                         dml_ceil(SwathWidthC[k] - 1,
808                                                                 Read256BytesBlockWidthC[k]) +
809                                                                 Read256BytesBlockWidthC[k]);
810                                 }
811                         } else {
812                                 swath_width_chroma_ub[k] = 0;
813                         }
814                 } else {
815                         MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
816                         MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
817
818                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
819                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
820                                                 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
821                                                 Read256BytesBlockHeightY[k]) -
822                                                 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
823                         } else {
824                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
825                                                 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
826                         }
827                         if (BytePerPixC[k] > 0) {
828                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
829                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
830                                                         dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
831                                                                         Read256BytesBlockHeightC[k] - 1,
832                                                                         Read256BytesBlockHeightC[k]) -
833                                                                         dml_floor(ViewportYStartC[k],
834                                                                                         Read256BytesBlockHeightC[k]));
835                                 } else {
836                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
837                                                         dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
838                                                         Read256BytesBlockHeightC[k]);
839                                 }
840                         } else {
841                                 swath_width_chroma_ub[k] = 0;
842                         }
843                 }
844
845 #ifdef __DML_VBA_DEBUG__
846                 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
847                 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
848                 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
849                 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
850 #endif
851
852         }
853 } // CalculateSwathWidth
854
855 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
856                 unsigned int TotalNumberOfActiveDPP,
857                 bool NoChroma,
858                 enum output_encoder_class Output)
859 {
860         bool ret_val = false;
861
862         ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
863                         TotalNumberOfActiveDPP == 1 && NoChroma);
864         if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
865                 ret_val = false;
866         return ret_val;
867 }
868
869 void dml32_CalculateDETBufferSize(
870                 unsigned int DETSizeOverride[],
871                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
872                 bool ForceSingleDPP,
873                 unsigned int NumberOfActiveSurfaces,
874                 bool UnboundedRequestEnabled,
875                 unsigned int nomDETInKByte,
876                 unsigned int MaxTotalDETInKByte,
877                 unsigned int ConfigReturnBufferSizeInKByte,
878                 unsigned int MinCompressedBufferSizeInKByte,
879                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
880                 enum source_format_class SourcePixelFormat[],
881                 double ReadBandwidthLuma[],
882                 double ReadBandwidthChroma[],
883                 unsigned int RoundedUpMaxSwathSizeBytesY[],
884                 unsigned int RoundedUpMaxSwathSizeBytesC[],
885                 unsigned int DPPPerSurface[],
886                 /* Output */
887                 unsigned int DETBufferSizeInKByte[],
888                 unsigned int *CompressedBufferSizeInkByte)
889 {
890         unsigned int DETBufferSizePoolInKByte;
891         unsigned int NextDETBufferPieceInKByte;
892         bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
893         bool NextPotentialSurfaceToAssignDETPieceFound;
894         unsigned int NextSurfaceToAssignDETPiece;
895         double TotalBandwidth;
896         double BandwidthOfSurfacesNotAssignedDETPiece;
897         unsigned int max_minDET;
898         unsigned int minDET;
899         unsigned int minDET_pipe;
900         unsigned int j, k;
901
902 #ifdef __DML_VBA_DEBUG__
903         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
904         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
905         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
906         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
907         dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
908         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
909         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
910         dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
911                         CompressedBufferSegmentSizeInkByteFinal);
912 #endif
913
914         // Note: Will use default det size if that fits 2 swaths
915         if (UnboundedRequestEnabled) {
916                 if (DETSizeOverride[0] > 0) {
917                         DETBufferSizeInKByte[0] = DETSizeOverride[0];
918                 } else {
919                         DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
920                                         ((double) RoundedUpMaxSwathSizeBytesY[0] +
921                                                         (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
922                 }
923                 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
924         } else {
925                 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
926                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
927                         DETBufferSizeInKByte[k] = nomDETInKByte;
928                         if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
929                                         SourcePixelFormat[k] == dm_420_12) {
930                                 max_minDET = nomDETInKByte - 64;
931                         } else {
932                                 max_minDET = nomDETInKByte;
933                         }
934                         minDET = 128;
935                         minDET_pipe = 0;
936
937                         // add DET resource until can hold 2 full swaths
938                         while (minDET <= max_minDET && minDET_pipe == 0) {
939                                 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
940                                                 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
941                                         minDET_pipe = minDET;
942                                 minDET = minDET + 64;
943                         }
944
945 #ifdef __DML_VBA_DEBUG__
946                         dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
947                         dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
948                         dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
949                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
950                                         RoundedUpMaxSwathSizeBytesY[k]);
951                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
952                                         RoundedUpMaxSwathSizeBytesC[k]);
953 #endif
954
955                         if (minDET_pipe == 0) {
956                                 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
957                                                 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
958 #ifdef __DML_VBA_DEBUG__
959                                 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
960                                                 __func__, k, minDET_pipe);
961 #endif
962                         }
963
964                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
965                                 DETBufferSizeInKByte[k] = 0;
966                         } else if (DETSizeOverride[k] > 0) {
967                                 DETBufferSizeInKByte[k] = DETSizeOverride[k];
968                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
969                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
970                         } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
971                                 DETBufferSizeInKByte[k] = minDET_pipe;
972                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
973                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
974                         }
975
976 #ifdef __DML_VBA_DEBUG__
977                         dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
978                         dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
979                         dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
980                         dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
981 #endif
982                 }
983
984                 TotalBandwidth = 0;
985                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
986                         if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
987                                 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
988                 }
989 #ifdef __DML_VBA_DEBUG__
990                 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
991                 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
992                         dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
993                 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
994                 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
995 #endif
996                 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
997                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
998
999                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1000                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1001                         } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1002                                         (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1003                                         ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1004                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1005                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1006                                                 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1007                         } else {
1008                                 DETPieceAssignedToThisSurfaceAlready[k] = false;
1009                         }
1010 #ifdef __DML_VBA_DEBUG__
1011                         dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1012                                         DETPieceAssignedToThisSurfaceAlready[k]);
1013                         dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1014                                         BandwidthOfSurfacesNotAssignedDETPiece);
1015 #endif
1016                 }
1017
1018                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1019                         NextPotentialSurfaceToAssignDETPieceFound = false;
1020                         NextSurfaceToAssignDETPiece = 0;
1021
1022                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1023 #ifdef __DML_VBA_DEBUG__
1024                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1025                                                 ReadBandwidthLuma[k]);
1026                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1027                                                 ReadBandwidthChroma[k]);
1028                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1029                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1030                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1031                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1032                                 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1033                                                 NextSurfaceToAssignDETPiece);
1034 #endif
1035                                 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1036                                                 (!NextPotentialSurfaceToAssignDETPieceFound ||
1037                                                 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1038                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1039                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1040                                         NextSurfaceToAssignDETPiece = k;
1041                                         NextPotentialSurfaceToAssignDETPieceFound = true;
1042                                 }
1043 #ifdef __DML_VBA_DEBUG__
1044                                 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1045                                                 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1046                                 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1047                                                 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1048 #endif
1049                         }
1050
1051                         if (NextPotentialSurfaceToAssignDETPieceFound) {
1052                                 // Note: To show the banker's rounding behavior in VBA and also the fact
1053                                 // that the DET buffer size varies due to precision issue
1054                                 //
1055                                 //double tmp1 =  ((double) DETBufferSizePoolInKByte *
1056                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1057                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1058                                 // BandwidthOfSurfacesNotAssignedDETPiece /
1059                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1060                                 //double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1061                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1062                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1063                                  //BandwidthOfSurfacesNotAssignedDETPiece /
1064                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1065                                 //
1066                                 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1067                                 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1068
1069                                 NextDETBufferPieceInKByte = dml_min(
1070                                         dml_round((double) DETBufferSizePoolInKByte *
1071                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1072                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1073                                                 BandwidthOfSurfacesNotAssignedDETPiece /
1074                                                 ((ForceSingleDPP ? 1 :
1075                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1076                                                 (ForceSingleDPP ? 1 :
1077                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1078                                                 dml_floor((double) DETBufferSizePoolInKByte,
1079                                                 (ForceSingleDPP ? 1 :
1080                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1081
1082                                 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1083                                 // We should limit the per-pipe DET size to the nominal / max per pipe.
1084                                 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1085                                         if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1086                                                         nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1087                                                 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1088                                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1089                                         } else {
1090                                                 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1091                                                 // already has the max per-pipe value
1092                                                 NextDETBufferPieceInKByte = 0;
1093                                         }
1094                                 }
1095
1096 #ifdef __DML_VBA_DEBUG__
1097                                 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1098                                         DETBufferSizePoolInKByte);
1099                                 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1100                                         NextSurfaceToAssignDETPiece);
1101                                 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1102                                         NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1103                                 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1104                                         NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1105                                 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1106                                         __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1107                                 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1108                                         NextDETBufferPieceInKByte);
1109                                 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1110                                         __func__, j, NextSurfaceToAssignDETPiece,
1111                                         DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1112 #endif
1113
1114                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1115                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1116                                                 + NextDETBufferPieceInKByte
1117                                                 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1118 #ifdef __DML_VBA_DEBUG__
1119                                 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1120 #endif
1121
1122                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1123                                 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1124                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1125                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1126                                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1127                         }
1128                 }
1129                 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1130         }
1131         *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1132
1133 #ifdef __DML_VBA_DEBUG__
1134         dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1135         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1136         for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1137                 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1138                                 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1139         }
1140 #endif
1141 } // CalculateDETBufferSize
1142
1143 void dml32_CalculateODMMode(
1144                 unsigned int MaximumPixelsPerLinePerDSCUnit,
1145                 unsigned int HActive,
1146                 enum output_encoder_class Output,
1147                 enum odm_combine_policy ODMUse,
1148                 double StateDispclk,
1149                 double MaxDispclk,
1150                 bool DSCEnable,
1151                 unsigned int TotalNumberOfActiveDPP,
1152                 unsigned int MaxNumDPP,
1153                 double PixelClock,
1154                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1155                 double DISPCLKRampingMargin,
1156                 double DISPCLKDPPCLKVCOSpeed,
1157
1158                 /* Output */
1159                 bool *TotalAvailablePipesSupport,
1160                 unsigned int *NumberOfDPP,
1161                 enum odm_combine_mode *ODMMode,
1162                 double *RequiredDISPCLKPerSurface)
1163 {
1164
1165         double SurfaceRequiredDISPCLKWithoutODMCombine;
1166         double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1167         double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1168
1169         SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1170                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1171                         MaxDispclk);
1172         SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1173                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1174                         MaxDispclk);
1175         SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1176                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1177                         MaxDispclk);
1178         *TotalAvailablePipesSupport = true;
1179         *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1180
1181         if (ODMUse == dm_odm_combine_policy_none)
1182                 *ODMMode = dm_odm_combine_mode_disabled;
1183
1184         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1185         *NumberOfDPP = 0;
1186
1187         // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1188         // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1189
1190         if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1191                         ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1192                                         (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
1193                 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1194                         *ODMMode = dm_odm_combine_mode_4to1;
1195                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1196                         *NumberOfDPP = 4;
1197                 } else {
1198                         *TotalAvailablePipesSupport = false;
1199                 }
1200         } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1201                         (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1202                                         SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1203                                         (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
1204                 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1205                         *ODMMode = dm_odm_combine_mode_2to1;
1206                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207                         *NumberOfDPP = 2;
1208                 } else {
1209                         *TotalAvailablePipesSupport = false;
1210                 }
1211         } else {
1212                 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1213                         *NumberOfDPP = 1;
1214                 else
1215                         *TotalAvailablePipesSupport = false;
1216         }
1217 }
1218
1219 double dml32_CalculateRequiredDispclk(
1220                 enum odm_combine_mode ODMMode,
1221                 double PixelClock,
1222                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1223                 double DISPCLKRampingMargin,
1224                 double DISPCLKDPPCLKVCOSpeed,
1225                 double MaxDispclk)
1226 {
1227         double RequiredDispclk = 0.;
1228         double PixelClockAfterODM;
1229         double DISPCLKWithRampingRoundedToDFSGranularity;
1230         double DISPCLKWithoutRampingRoundedToDFSGranularity;
1231         double MaxDispclkRoundedDownToDFSGranularity;
1232
1233         if (ODMMode == dm_odm_combine_mode_4to1)
1234                 PixelClockAfterODM = PixelClock / 4;
1235         else if (ODMMode == dm_odm_combine_mode_2to1)
1236                 PixelClockAfterODM = PixelClock / 2;
1237         else
1238                 PixelClockAfterODM = PixelClock;
1239
1240
1241         DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1242                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1243                                         * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1244
1245         DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1246                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1247
1248         MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1249
1250         if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1251                 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1252         else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1253                 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1254         else
1255                 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1256
1257         return RequiredDispclk;
1258 }
1259
1260 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1261 {
1262         if (Clock <= 0.0)
1263                 return 0.0;
1264
1265         if (round_up)
1266                 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1267         else
1268                 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1269 }
1270
1271 void dml32_CalculateOutputLink(
1272                 double PHYCLKPerState,
1273                 double PHYCLKD18PerState,
1274                 double PHYCLKD32PerState,
1275                 double Downspreading,
1276                 bool IsMainSurfaceUsingTheIndicatedTiming,
1277                 enum output_encoder_class Output,
1278                 enum output_format_class OutputFormat,
1279                 unsigned int HTotal,
1280                 unsigned int HActive,
1281                 double PixelClockBackEnd,
1282                 double ForcedOutputLinkBPP,
1283                 unsigned int DSCInputBitPerComponent,
1284                 unsigned int NumberOfDSCSlices,
1285                 double AudioSampleRate,
1286                 unsigned int AudioSampleLayout,
1287                 enum odm_combine_mode ODMModeNoDSC,
1288                 enum odm_combine_mode ODMModeDSC,
1289                 bool DSCEnable,
1290                 unsigned int OutputLinkDPLanes,
1291                 enum dm_output_link_dp_rate OutputLinkDPRate,
1292
1293                 /* Output */
1294                 bool *RequiresDSC,
1295                 double *RequiresFEC,
1296                 double  *OutBpp,
1297                 enum dm_output_type *OutputType,
1298                 enum dm_output_rate *OutputRate,
1299                 unsigned int *RequiredSlots)
1300 {
1301         bool LinkDSCEnable;
1302         unsigned int dummy;
1303         *RequiresDSC = false;
1304         *RequiresFEC = false;
1305         *OutBpp = 0;
1306         *OutputType = dm_output_type_unknown;
1307         *OutputRate = dm_output_rate_unknown;
1308
1309         if (IsMainSurfaceUsingTheIndicatedTiming) {
1310                 if (Output == dm_hdmi) {
1311                         *RequiresDSC = false;
1312                         *RequiresFEC = false;
1313                         *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1314                                         PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1315                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1316                                         ODMModeNoDSC, ODMModeDSC, &dummy);
1317                         //OutputTypeAndRate = "HDMI";
1318                         *OutputType = dm_output_type_hdmi;
1319
1320                 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1321                         if (DSCEnable == true) {
1322                                 *RequiresDSC = true;
1323                                 LinkDSCEnable = true;
1324                                 if (Output == dm_dp || Output == dm_dp2p0)
1325                                         *RequiresFEC = true;
1326                                 else
1327                                         *RequiresFEC = false;
1328                         } else {
1329                                 *RequiresDSC = false;
1330                                 LinkDSCEnable = false;
1331                                 if (Output == dm_dp2p0)
1332                                         *RequiresFEC = true;
1333                                 else
1334                                         *RequiresFEC = false;
1335                         }
1336                         if (Output == dm_dp2p0) {
1337                                 *OutBpp = 0;
1338                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1339                                                 PHYCLKD32PerState >= 10000 / 32) {
1340                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1341                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1342                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1343                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1344                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1345                                         if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1346                                                         ForcedOutputLinkBPP == 0) {
1347                                                 *RequiresDSC = true;
1348                                                 LinkDSCEnable = true;
1349                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1350                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1351                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1352                                                                 OutputFormat, DSCInputBitPerComponent,
1353                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1354                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1355                                         }
1356                                         //OutputTypeAndRate = Output & " UHBR10";
1357                                         *OutputType = dm_output_type_dp2p0;
1358                                         *OutputRate = dm_output_rate_dp_rate_uhbr10;
1359                                 }
1360                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1361                                                 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1362                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1363                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1364                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1365                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1366                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1367
1368                                         if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1369                                                         ForcedOutputLinkBPP == 0) {
1370                                                 *RequiresDSC = true;
1371                                                 LinkDSCEnable = true;
1372                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1373                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1374                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1375                                                                 OutputFormat, DSCInputBitPerComponent,
1376                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1377                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1378                                         }
1379                                         //OutputTypeAndRate = Output & " UHBR13p5";
1380                                         *OutputType = dm_output_type_dp2p0;
1381                                         *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1382                                 }
1383                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1384                                                 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1385                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1386                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1387                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1388                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1389                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1390                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1391                                                 *RequiresDSC = true;
1392                                                 LinkDSCEnable = true;
1393                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1394                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1395                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1396                                                                 OutputFormat, DSCInputBitPerComponent,
1397                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1398                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1399                                         }
1400                                         //OutputTypeAndRate = Output & " UHBR20";
1401                                         *OutputType = dm_output_type_dp2p0;
1402                                         *OutputRate = dm_output_rate_dp_rate_uhbr20;
1403                                 }
1404                         } else {
1405                                 *OutBpp = 0;
1406                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1407                                                 PHYCLKPerState >= 270) {
1408                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1409                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1410                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1411                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1412                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1413                                         if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1414                                                         ForcedOutputLinkBPP == 0) {
1415                                                 *RequiresDSC = true;
1416                                                 LinkDSCEnable = true;
1417                                                 if (Output == dm_dp)
1418                                                         *RequiresFEC = true;
1419                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1420                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1421                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1422                                                                 OutputFormat, DSCInputBitPerComponent,
1423                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1424                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1425                                         }
1426                                         //OutputTypeAndRate = Output & " HBR";
1427                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1428                                         *OutputRate = dm_output_rate_dp_rate_hbr;
1429                                 }
1430                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1431                                                 *OutBpp == 0 && PHYCLKPerState >= 540) {
1432                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1433                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1434                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1435                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1436                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1437
1438                                         if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1439                                                         ForcedOutputLinkBPP == 0) {
1440                                                 *RequiresDSC = true;
1441                                                 LinkDSCEnable = true;
1442                                                 if (Output == dm_dp)
1443                                                         *RequiresFEC = true;
1444
1445                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1446                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1447                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1448                                                                 OutputFormat, DSCInputBitPerComponent,
1449                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1450                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1451                                         }
1452                                         //OutputTypeAndRate = Output & " HBR2";
1453                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1454                                         *OutputRate = dm_output_rate_dp_rate_hbr2;
1455                                 }
1456                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1457                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1458                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1459                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output,
1460                                                         OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1461                                                         AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1462                                                         RequiredSlots);
1463
1464                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1465                                                 *RequiresDSC = true;
1466                                                 LinkDSCEnable = true;
1467                                                 if (Output == dm_dp)
1468                                                         *RequiresFEC = true;
1469
1470                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1471                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1472                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1473                                                                 OutputFormat, DSCInputBitPerComponent,
1474                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1475                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1476                                         }
1477                                         //OutputTypeAndRate = Output & " HBR3";
1478                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1479                                         *OutputRate = dm_output_rate_dp_rate_hbr3;
1480                                 }
1481                         }
1482                 }
1483         }
1484 }
1485
1486 void dml32_CalculateDPPCLK(
1487                 unsigned int NumberOfActiveSurfaces,
1488                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1489                 double DISPCLKDPPCLKVCOSpeed,
1490                 double DPPCLKUsingSingleDPP[],
1491                 unsigned int DPPPerSurface[],
1492
1493                 /* output */
1494                 double *GlobalDPPCLK,
1495                 double Dppclk[])
1496 {
1497         unsigned int k;
1498         *GlobalDPPCLK = 0;
1499         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1500                 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1501                 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1502         }
1503         *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1504         for (k = 0; k < NumberOfActiveSurfaces; ++k)
1505                 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1506 }
1507
1508 double dml32_TruncToValidBPP(
1509                 double LinkBitRate,
1510                 unsigned int Lanes,
1511                 unsigned int HTotal,
1512                 unsigned int HActive,
1513                 double PixelClock,
1514                 double DesiredBPP,
1515                 bool DSCEnable,
1516                 enum output_encoder_class Output,
1517                 enum output_format_class Format,
1518                 unsigned int DSCInputBitPerComponent,
1519                 unsigned int DSCSlices,
1520                 unsigned int AudioRate,
1521                 unsigned int AudioLayout,
1522                 enum odm_combine_mode ODMModeNoDSC,
1523                 enum odm_combine_mode ODMModeDSC,
1524                 /* Output */
1525                 unsigned int *RequiredSlots)
1526 {
1527         double    MaxLinkBPP;
1528         unsigned int   MinDSCBPP;
1529         double    MaxDSCBPP;
1530         unsigned int   NonDSCBPP0;
1531         unsigned int   NonDSCBPP1;
1532         unsigned int   NonDSCBPP2;
1533         unsigned int   NonDSCBPP3;
1534
1535         if (Format == dm_420) {
1536                 NonDSCBPP0 = 12;
1537                 NonDSCBPP1 = 15;
1538                 NonDSCBPP2 = 18;
1539                 MinDSCBPP = 6;
1540                 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1541         } else if (Format == dm_444) {
1542                 NonDSCBPP0 = 18;
1543                 NonDSCBPP1 = 24;
1544                 NonDSCBPP2 = 30;
1545                 NonDSCBPP3 = 36;
1546                 MinDSCBPP = 8;
1547                 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1548         } else {
1549                 if (Output == dm_hdmi) {
1550                         NonDSCBPP0 = 24;
1551                         NonDSCBPP1 = 24;
1552                         NonDSCBPP2 = 24;
1553                 } else {
1554                         NonDSCBPP0 = 16;
1555                         NonDSCBPP1 = 20;
1556                         NonDSCBPP2 = 24;
1557                 }
1558                 if (Format == dm_n422) {
1559                         MinDSCBPP = 7;
1560                         MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1561                 } else {
1562                         MinDSCBPP = 8;
1563                         MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1564                 }
1565         }
1566         if (Output == dm_dp2p0) {
1567                 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1568         } else if (DSCEnable && Output == dm_dp) {
1569                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1570         } else {
1571                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1572         }
1573
1574         if (DSCEnable) {
1575                 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1576                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1577                 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1578                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1579                 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1580                         MaxLinkBPP = 2 * MaxLinkBPP;
1581         } else {
1582                 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1583                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1584                 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1585                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1586                 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1587                         MaxLinkBPP = 2 * MaxLinkBPP;
1588         }
1589
1590         if (DesiredBPP == 0) {
1591                 if (DSCEnable) {
1592                         if (MaxLinkBPP < MinDSCBPP)
1593                                 return BPP_INVALID;
1594                         else if (MaxLinkBPP >= MaxDSCBPP)
1595                                 return MaxDSCBPP;
1596                         else
1597                                 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1598                 } else {
1599                         if (MaxLinkBPP >= NonDSCBPP3)
1600                                 return NonDSCBPP3;
1601                         else if (MaxLinkBPP >= NonDSCBPP2)
1602                                 return NonDSCBPP2;
1603                         else if (MaxLinkBPP >= NonDSCBPP1)
1604                                 return NonDSCBPP1;
1605                         else if (MaxLinkBPP >= NonDSCBPP0)
1606                                 return 16.0;
1607                         else
1608                                 return BPP_INVALID;
1609                 }
1610         } else {
1611                 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1612                                 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1613                                 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1614                         return BPP_INVALID;
1615                 else
1616                         return DesiredBPP;
1617         }
1618
1619         *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1620
1621         return BPP_INVALID;
1622 } // TruncToValidBPP
1623
1624 double dml32_RequiredDTBCLK(
1625                 bool              DSCEnable,
1626                 double               PixelClock,
1627                 enum output_format_class  OutputFormat,
1628                 double               OutputBpp,
1629                 unsigned int              DSCSlices,
1630                 unsigned int                 HTotal,
1631                 unsigned int                 HActive,
1632                 unsigned int              AudioRate,
1633                 unsigned int              AudioLayout)
1634 {
1635         double PixelWordRate;
1636         double HCActive;
1637         double HCBlank;
1638         double AverageTribyteRate;
1639         double HActiveTribyteRate;
1640
1641         if (DSCEnable != true)
1642                 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1643
1644         PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1645         HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1646                         dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1647         HCBlank = 64 + 32 *
1648                         dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1649         AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1650         HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1651         return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1652 }
1653
1654 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1655                 enum odm_combine_mode ODMMode,
1656                 unsigned int DSCInputBitPerComponent,
1657                 double OutputBpp,
1658                 unsigned int HActive,
1659                 unsigned int HTotal,
1660                 unsigned int NumberOfDSCSlices,
1661                 enum output_format_class  OutputFormat,
1662                 enum output_encoder_class Output,
1663                 double PixelClock,
1664                 double PixelClockBackEnd)
1665 {
1666         unsigned int DSCDelayRequirement_val;
1667
1668         if (DSCEnabled == true && OutputBpp != 0) {
1669                 if (ODMMode == dm_odm_combine_mode_4to1) {
1670                         DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1671                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1672                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1673                 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1674                         DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1675                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1676                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1677                 } else {
1678                         DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1679                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1680                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1681                 }
1682
1683                 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1684                                 dml_ceil(DSCDelayRequirement_val / HActive, 1);
1685
1686                 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1687
1688         } else {
1689                 DSCDelayRequirement_val = 0;
1690         }
1691
1692 #ifdef __DML_VBA_DEBUG__
1693         dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1694         dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1695         dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1696         dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1697         dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1698         dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1699         dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1700 #endif
1701
1702         return DSCDelayRequirement_val;
1703 }
1704
1705 void dml32_CalculateSurfaceSizeInMall(
1706                 unsigned int NumberOfActiveSurfaces,
1707                 unsigned int MALLAllocatedForDCN,
1708                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1709                 bool DCCEnable[],
1710                 bool ViewportStationary[],
1711                 unsigned int ViewportXStartY[],
1712                 unsigned int ViewportYStartY[],
1713                 unsigned int ViewportXStartC[],
1714                 unsigned int ViewportYStartC[],
1715                 unsigned int ViewportWidthY[],
1716                 unsigned int ViewportHeightY[],
1717                 unsigned int BytesPerPixelY[],
1718                 unsigned int ViewportWidthC[],
1719                 unsigned int ViewportHeightC[],
1720                 unsigned int BytesPerPixelC[],
1721                 unsigned int SurfaceWidthY[],
1722                 unsigned int SurfaceWidthC[],
1723                 unsigned int SurfaceHeightY[],
1724                 unsigned int SurfaceHeightC[],
1725                 unsigned int Read256BytesBlockWidthY[],
1726                 unsigned int Read256BytesBlockWidthC[],
1727                 unsigned int Read256BytesBlockHeightY[],
1728                 unsigned int Read256BytesBlockHeightC[],
1729                 unsigned int ReadBlockWidthY[],
1730                 unsigned int ReadBlockWidthC[],
1731                 unsigned int ReadBlockHeightY[],
1732                 unsigned int ReadBlockHeightC[],
1733
1734                 /* Output */
1735                 unsigned int    SurfaceSizeInMALL[],
1736                 bool *ExceededMALLSize)
1737 {
1738         unsigned int TotalSurfaceSizeInMALL  = 0;
1739         unsigned int k;
1740
1741         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1742                 if (ViewportStationary[k]) {
1743                         SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1744                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1745                                                 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1746                                                 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1747                                                 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1748                                                 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1749                                                 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1750
1751                         if (ReadBlockWidthC[k] > 0) {
1752                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1753                                                 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1754                                                         dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1755                                                         ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1756                                                         dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1757                                                         dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1758                                                         dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1759                                                         ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1760                                                         dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1761                                                         BytesPerPixelC[k];
1762                         }
1763                         if (DCCEnable[k] == true) {
1764                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1765                                                 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1766                                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1767                                                         Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1768                                                         - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1769                                                         * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1770                                                         Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1771                                                         ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1772                                                         Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1773                                                         * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1774                                 if (Read256BytesBlockWidthC[k] > 0) {
1775                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1776                                                         dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1777                                                                 Read256BytesBlockWidthC[k]),
1778                                                                 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1779                                                                 * Read256BytesBlockWidthC[k] - 1, 8 *
1780                                                                 Read256BytesBlockWidthC[k]) -
1781                                                                 dml_floor(ViewportXStartC[k], 8 *
1782                                                                 Read256BytesBlockWidthC[k])) *
1783                                                                 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1784                                                                 Read256BytesBlockHeightC[k]),
1785                                                                 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1786                                                                 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1787                                                                 Read256BytesBlockHeightC[k]) -
1788                                                                 dml_floor(ViewportYStartC[k], 8 *
1789                                                                 Read256BytesBlockHeightC[k])) *
1790                                                                 BytesPerPixelC[k] / 256;
1791                                 }
1792                         }
1793                 } else {
1794                         SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1795                                         ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1796                                         dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1797                                                         ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1798                                                         BytesPerPixelY[k];
1799                         if (ReadBlockWidthC[k] > 0) {
1800                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1801                                                 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1802                                                                 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1803                                                 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1804                                                                 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1805                                                                 BytesPerPixelC[k];
1806                         }
1807                         if (DCCEnable[k] == true) {
1808                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1809                                                 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1810                                                                 Read256BytesBlockWidthY[k] - 1), 8 *
1811                                                                 Read256BytesBlockWidthY[k]) *
1812                                                 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1813                                                                 Read256BytesBlockHeightY[k] - 1), 8 *
1814                                                                 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1815
1816                                 if (Read256BytesBlockWidthC[k] > 0) {
1817                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1818                                                         dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1819                                                                         Read256BytesBlockWidthC[k] - 1), 8 *
1820                                                                         Read256BytesBlockWidthC[k]) *
1821                                                         dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1822                                                                         Read256BytesBlockHeightC[k] - 1), 8 *
1823                                                                         Read256BytesBlockHeightC[k]) *
1824                                                                         BytesPerPixelC[k] / 256;
1825                                 }
1826                         }
1827                 }
1828         }
1829
1830         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1831                 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1832                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1833         }
1834         *ExceededMALLSize =  (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
1835 } // CalculateSurfaceSizeInMall
1836
1837 void dml32_CalculateVMRowAndSwath(
1838                 unsigned int NumberOfActiveSurfaces,
1839                 DmlPipe myPipe[],
1840                 unsigned int SurfaceSizeInMALL[],
1841                 unsigned int PTEBufferSizeInRequestsLuma,
1842                 unsigned int PTEBufferSizeInRequestsChroma,
1843                 unsigned int DCCMetaBufferSizeBytes,
1844                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1845                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1846                 unsigned int MALLAllocatedForDCN,
1847                 double SwathWidthY[],
1848                 double SwathWidthC[],
1849                 bool GPUVMEnable,
1850                 bool HostVMEnable,
1851                 unsigned int HostVMMaxNonCachedPageTableLevels,
1852                 unsigned int GPUVMMaxPageTableLevels,
1853                 unsigned int GPUVMMinPageSizeKBytes[],
1854                 unsigned int HostVMMinPageSize,
1855
1856                 /* Output */
1857                 bool PTEBufferSizeNotExceeded[],
1858                 bool DCCMetaBufferSizeNotExceeded[],
1859                 unsigned int dpte_row_width_luma_ub[],
1860                 unsigned int dpte_row_width_chroma_ub[],
1861                 unsigned int dpte_row_height_luma[],
1862                 unsigned int dpte_row_height_chroma[],
1863                 unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1864                 unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1865                 unsigned int meta_req_width[],
1866                 unsigned int meta_req_width_chroma[],
1867                 unsigned int meta_req_height[],
1868                 unsigned int meta_req_height_chroma[],
1869                 unsigned int meta_row_width[],
1870                 unsigned int meta_row_width_chroma[],
1871                 unsigned int meta_row_height[],
1872                 unsigned int meta_row_height_chroma[],
1873                 unsigned int vm_group_bytes[],
1874                 unsigned int dpte_group_bytes[],
1875                 unsigned int PixelPTEReqWidthY[],
1876                 unsigned int PixelPTEReqHeightY[],
1877                 unsigned int PTERequestSizeY[],
1878                 unsigned int PixelPTEReqWidthC[],
1879                 unsigned int PixelPTEReqHeightC[],
1880                 unsigned int PTERequestSizeC[],
1881                 unsigned int dpde0_bytes_per_frame_ub_l[],
1882                 unsigned int meta_pte_bytes_per_frame_ub_l[],
1883                 unsigned int dpde0_bytes_per_frame_ub_c[],
1884                 unsigned int meta_pte_bytes_per_frame_ub_c[],
1885                 double PrefetchSourceLinesY[],
1886                 double PrefetchSourceLinesC[],
1887                 double VInitPreFillY[],
1888                 double VInitPreFillC[],
1889                 unsigned int MaxNumSwathY[],
1890                 unsigned int MaxNumSwathC[],
1891                 double meta_row_bw[],
1892                 double dpte_row_bw[],
1893                 double PixelPTEBytesPerRow[],
1894                 double PDEAndMetaPTEBytesFrame[],
1895                 double MetaRowByte[],
1896                 bool use_one_row_for_frame[],
1897                 bool use_one_row_for_frame_flip[],
1898                 bool UsesMALLForStaticScreen[],
1899                 bool PTE_BUFFER_MODE[],
1900                 unsigned int BIGK_FRAGMENT_SIZE[])
1901 {
1902         unsigned int k;
1903         unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1904         unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1905         unsigned int PDEAndMetaPTEBytesFrameY;
1906         unsigned int PDEAndMetaPTEBytesFrameC;
1907         unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1908         unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1909         unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1910         unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1911         unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1912         unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1913         unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1914         unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1915         unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1916         unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1917         bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1918
1919         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1920                 if (HostVMEnable == true) {
1921                         vm_group_bytes[k] = 512;
1922                         dpte_group_bytes[k] = 512;
1923                 } else if (GPUVMEnable == true) {
1924                         vm_group_bytes[k] = 2048;
1925                         if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1926                                 dpte_group_bytes[k] = 512;
1927                         else
1928                                 dpte_group_bytes[k] = 2048;
1929                 } else {
1930                         vm_group_bytes[k] = 0;
1931                         dpte_group_bytes[k] = 0;
1932                 }
1933
1934                 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
1935                                 myPipe[k].SourcePixelFormat == dm_420_12 ||
1936                                 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
1937                         if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
1938                                         !IsVertical(myPipe[k].SourceRotation)) {
1939                                 PTEBufferSizeInRequestsForLuma[k] =
1940                                                 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
1941                                 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
1942                         } else {
1943                                 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
1944                                 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
1945                         }
1946
1947                         PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
1948                                         myPipe[k].ViewportStationary,
1949                                         myPipe[k].DCCEnable,
1950                                         myPipe[k].DPPPerSurface,
1951                                         myPipe[k].BlockHeight256BytesC,
1952                                         myPipe[k].BlockWidth256BytesC,
1953                                         myPipe[k].SourcePixelFormat,
1954                                         myPipe[k].SurfaceTiling,
1955                                         myPipe[k].BytePerPixelC,
1956                                         myPipe[k].SourceRotation,
1957                                         SwathWidthC[k],
1958                                         myPipe[k].ViewportHeightChroma,
1959                                         myPipe[k].ViewportXStartC,
1960                                         myPipe[k].ViewportYStartC,
1961                                         GPUVMEnable,
1962                                         HostVMEnable,
1963                                         HostVMMaxNonCachedPageTableLevels,
1964                                         GPUVMMaxPageTableLevels,
1965                                         GPUVMMinPageSizeKBytes[k],
1966                                         HostVMMinPageSize,
1967                                         PTEBufferSizeInRequestsForChroma[k],
1968                                         myPipe[k].PitchC,
1969                                         myPipe[k].DCCMetaPitchC,
1970                                         myPipe[k].BlockWidthC,
1971                                         myPipe[k].BlockHeightC,
1972
1973                                         /* Output */
1974                                         &MetaRowByteC[k],
1975                                         &PixelPTEBytesPerRowC[k],
1976                                         &dpte_row_width_chroma_ub[k],
1977                                         &dpte_row_height_chroma[k],
1978                                         &dpte_row_height_linear_chroma[k],
1979                                         &PixelPTEBytesPerRowC_one_row_per_frame[k],
1980                                         &dpte_row_width_chroma_ub_one_row_per_frame[k],
1981                                         &dpte_row_height_chroma_one_row_per_frame[k],
1982                                         &meta_req_width_chroma[k],
1983                                         &meta_req_height_chroma[k],
1984                                         &meta_row_width_chroma[k],
1985                                         &meta_row_height_chroma[k],
1986                                         &PixelPTEReqWidthC[k],
1987                                         &PixelPTEReqHeightC[k],
1988                                         &PTERequestSizeC[k],
1989                                         &dpde0_bytes_per_frame_ub_c[k],
1990                                         &meta_pte_bytes_per_frame_ub_c[k]);
1991
1992                         PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
1993                                         myPipe[k].VRatioChroma,
1994                                         myPipe[k].VTapsChroma,
1995                                         myPipe[k].InterlaceEnable,
1996                                         myPipe[k].ProgressiveToInterlaceUnitInOPP,
1997                                         myPipe[k].SwathHeightC,
1998                                         myPipe[k].SourceRotation,
1999                                         myPipe[k].ViewportStationary,
2000                                         SwathWidthC[k],
2001                                         myPipe[k].ViewportHeightChroma,
2002                                         myPipe[k].ViewportXStartC,
2003                                         myPipe[k].ViewportYStartC,
2004
2005                                         /* Output */
2006                                         &VInitPreFillC[k],
2007                                         &MaxNumSwathC[k]);
2008                 } else {
2009                         PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2010                         PTEBufferSizeInRequestsForChroma[k] = 0;
2011                         PixelPTEBytesPerRowC[k] = 0;
2012                         PDEAndMetaPTEBytesFrameC = 0;
2013                         MetaRowByteC[k] = 0;
2014                         MaxNumSwathC[k] = 0;
2015                         PrefetchSourceLinesC[k] = 0;
2016                         dpte_row_height_chroma_one_row_per_frame[k] = 0;
2017                         dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2018                         PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2019                 }
2020
2021                 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2022                                 myPipe[k].ViewportStationary,
2023                                 myPipe[k].DCCEnable,
2024                                 myPipe[k].DPPPerSurface,
2025                                 myPipe[k].BlockHeight256BytesY,
2026                                 myPipe[k].BlockWidth256BytesY,
2027                                 myPipe[k].SourcePixelFormat,
2028                                 myPipe[k].SurfaceTiling,
2029                                 myPipe[k].BytePerPixelY,
2030                                 myPipe[k].SourceRotation,
2031                                 SwathWidthY[k],
2032                                 myPipe[k].ViewportHeight,
2033                                 myPipe[k].ViewportXStart,
2034                                 myPipe[k].ViewportYStart,
2035                                 GPUVMEnable,
2036                                 HostVMEnable,
2037                                 HostVMMaxNonCachedPageTableLevels,
2038                                 GPUVMMaxPageTableLevels,
2039                                 GPUVMMinPageSizeKBytes[k],
2040                                 HostVMMinPageSize,
2041                                 PTEBufferSizeInRequestsForLuma[k],
2042                                 myPipe[k].PitchY,
2043                                 myPipe[k].DCCMetaPitchY,
2044                                 myPipe[k].BlockWidthY,
2045                                 myPipe[k].BlockHeightY,
2046
2047                                 /* Output */
2048                                 &MetaRowByteY[k],
2049                                 &PixelPTEBytesPerRowY[k],
2050                                 &dpte_row_width_luma_ub[k],
2051                                 &dpte_row_height_luma[k],
2052                                 &dpte_row_height_linear_luma[k],
2053                                 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2054                                 &dpte_row_width_luma_ub_one_row_per_frame[k],
2055                                 &dpte_row_height_luma_one_row_per_frame[k],
2056                                 &meta_req_width[k],
2057                                 &meta_req_height[k],
2058                                 &meta_row_width[k],
2059                                 &meta_row_height[k],
2060                                 &PixelPTEReqWidthY[k],
2061                                 &PixelPTEReqHeightY[k],
2062                                 &PTERequestSizeY[k],
2063                                 &dpde0_bytes_per_frame_ub_l[k],
2064                                 &meta_pte_bytes_per_frame_ub_l[k]);
2065
2066                 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2067                                 myPipe[k].VRatio,
2068                                 myPipe[k].VTaps,
2069                                 myPipe[k].InterlaceEnable,
2070                                 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2071                                 myPipe[k].SwathHeightY,
2072                                 myPipe[k].SourceRotation,
2073                                 myPipe[k].ViewportStationary,
2074                                 SwathWidthY[k],
2075                                 myPipe[k].ViewportHeight,
2076                                 myPipe[k].ViewportXStart,
2077                                 myPipe[k].ViewportYStart,
2078
2079                                 /* Output */
2080                                 &VInitPreFillY[k],
2081                                 &MaxNumSwathY[k]);
2082
2083                 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2084                 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2085
2086                 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2087                                 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2088                         PTEBufferSizeNotExceeded[k] = true;
2089                 } else {
2090                         PTEBufferSizeNotExceeded[k] = false;
2091                 }
2092
2093                 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2094                         PTEBufferSizeInRequestsForLuma[k] &&
2095                         PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2096         }
2097
2098         dml32_CalculateMALLUseForStaticScreen(
2099                         NumberOfActiveSurfaces,
2100                         MALLAllocatedForDCN,
2101                         UseMALLForStaticScreen,   // mode
2102                         SurfaceSizeInMALL,
2103                         one_row_per_frame_fits_in_buffer,
2104                         /* Output */
2105                         UsesMALLForStaticScreen); // boolen
2106
2107         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2108                 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2109                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2110                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2111                                 (GPUVMMinPageSizeKBytes[k] > 64);
2112                 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2113         }
2114
2115         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2116 #ifdef __DML_VBA_DEBUG__
2117                 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2118                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2119 #endif
2120                 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2121                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2122                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2123                                 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2124
2125                 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2126                                 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2127
2128                 if (use_one_row_for_frame[k]) {
2129                         dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2130                         dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2131                         PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2132                         dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2133                         dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2134                         PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2135                         PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2136                 }
2137
2138                 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2139                         DCCMetaBufferSizeNotExceeded[k] = true;
2140                 else
2141                         DCCMetaBufferSizeNotExceeded[k] = false;
2142
2143                 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2144                 if (use_one_row_for_frame[k])
2145                         PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2146
2147                 dml32_CalculateRowBandwidth(
2148                                 GPUVMEnable,
2149                                 myPipe[k].SourcePixelFormat,
2150                                 myPipe[k].VRatio,
2151                                 myPipe[k].VRatioChroma,
2152                                 myPipe[k].DCCEnable,
2153                                 myPipe[k].HTotal / myPipe[k].PixelClock,
2154                                 MetaRowByteY[k], MetaRowByteC[k],
2155                                 meta_row_height[k],
2156                                 meta_row_height_chroma[k],
2157                                 PixelPTEBytesPerRowY[k],
2158                                 PixelPTEBytesPerRowC[k],
2159                                 dpte_row_height_luma[k],
2160                                 dpte_row_height_chroma[k],
2161
2162                                 /* Output */
2163                                 &meta_row_bw[k],
2164                                 &dpte_row_bw[k]);
2165 #ifdef __DML_VBA_DEBUG__
2166                 dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2167                 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2168                                 __func__, k, use_one_row_for_frame_flip[k]);
2169                 dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2170                                 __func__, k, UseMALLForPStateChange[k]);
2171                 dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2172                 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2173                                 __func__, k, dpte_row_width_luma_ub[k]);
2174                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2175                 dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2176                                 __func__, k, dpte_row_height_chroma[k]);
2177                 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2178                                 __func__, k, dpte_row_width_chroma_ub[k]);
2179                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2180                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2181                 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2182                                 __func__, k, PTEBufferSizeNotExceeded[k]);
2183                 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2184                 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2185 #endif
2186         }
2187 } // CalculateVMRowAndSwath
2188
2189 unsigned int dml32_CalculateVMAndRowBytes(
2190                 bool ViewportStationary,
2191                 bool DCCEnable,
2192                 unsigned int NumberOfDPPs,
2193                 unsigned int BlockHeight256Bytes,
2194                 unsigned int BlockWidth256Bytes,
2195                 enum source_format_class SourcePixelFormat,
2196                 unsigned int SurfaceTiling,
2197                 unsigned int BytePerPixel,
2198                 enum dm_rotation_angle SourceRotation,
2199                 double SwathWidth,
2200                 unsigned int ViewportHeight,
2201                 unsigned int    ViewportXStart,
2202                 unsigned int    ViewportYStart,
2203                 bool GPUVMEnable,
2204                 bool HostVMEnable,
2205                 unsigned int HostVMMaxNonCachedPageTableLevels,
2206                 unsigned int GPUVMMaxPageTableLevels,
2207                 unsigned int GPUVMMinPageSizeKBytes,
2208                 unsigned int HostVMMinPageSize,
2209                 unsigned int PTEBufferSizeInRequests,
2210                 unsigned int Pitch,
2211                 unsigned int DCCMetaPitch,
2212                 unsigned int MacroTileWidth,
2213                 unsigned int MacroTileHeight,
2214
2215                 /* Output */
2216                 unsigned int *MetaRowByte,
2217                 unsigned int *PixelPTEBytesPerRow,
2218                 unsigned int    *dpte_row_width_ub,
2219                 unsigned int *dpte_row_height,
2220                 unsigned int *dpte_row_height_linear,
2221                 unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2222                 unsigned int    *dpte_row_width_ub_one_row_per_frame,
2223                 unsigned int    *dpte_row_height_one_row_per_frame,
2224                 unsigned int *MetaRequestWidth,
2225                 unsigned int *MetaRequestHeight,
2226                 unsigned int *meta_row_width,
2227                 unsigned int *meta_row_height,
2228                 unsigned int *PixelPTEReqWidth,
2229                 unsigned int *PixelPTEReqHeight,
2230                 unsigned int *PTERequestSize,
2231                 unsigned int    *DPDE0BytesFrame,
2232                 unsigned int    *MetaPTEBytesFrame)
2233 {
2234         unsigned int MPDEBytesFrame;
2235         unsigned int DCCMetaSurfaceBytes;
2236         unsigned int ExtraDPDEBytesFrame;
2237         unsigned int PDEAndMetaPTEBytesFrame;
2238         unsigned int HostVMDynamicLevels = 0;
2239         unsigned int    MacroTileSizeBytes;
2240         unsigned int    vp_height_meta_ub;
2241         unsigned int    vp_height_dpte_ub;
2242         unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2243
2244         if (GPUVMEnable == true && HostVMEnable == true) {
2245                 if (HostVMMinPageSize < 2048)
2246                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2247                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2248                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2249                 else
2250                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2251         }
2252
2253         *MetaRequestHeight = 8 * BlockHeight256Bytes;
2254         *MetaRequestWidth = 8 * BlockWidth256Bytes;
2255         if (SurfaceTiling == dm_sw_linear) {
2256                 *meta_row_height = 32;
2257                 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2258                                 - dml_floor(ViewportXStart, *MetaRequestWidth);
2259         } else if (!IsVertical(SourceRotation)) {
2260                 *meta_row_height = *MetaRequestHeight;
2261                 if (ViewportStationary && NumberOfDPPs == 1) {
2262                         *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2263                                         *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2264                 } else {
2265                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2266                 }
2267                 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2268         } else {
2269                 *meta_row_height = *MetaRequestWidth;
2270                 if (ViewportStationary && NumberOfDPPs == 1) {
2271                         *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2272                                         *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2273                 } else {
2274                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2275                 }
2276                 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2277         }
2278
2279         if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2280                 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2281                                 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2282         } else if (!IsVertical(SourceRotation)) {
2283                 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2284         } else {
2285                 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2286         }
2287
2288         DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2289
2290         if (GPUVMEnable == true) {
2291                 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2292                                 (8 * 4.0 * 1024), 1) + 1) * 64;
2293                 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2294         } else {
2295                 *MetaPTEBytesFrame = 0;
2296                 MPDEBytesFrame = 0;
2297         }
2298
2299         if (DCCEnable != true) {
2300                 *MetaPTEBytesFrame = 0;
2301                 MPDEBytesFrame = 0;
2302                 *MetaRowByte = 0;
2303         }
2304
2305         MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2306
2307         if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2308                 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2309                         vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2310                                         MacroTileHeight - 1, MacroTileHeight) -
2311                                         dml_floor(ViewportYStart, MacroTileHeight);
2312                 } else if (!IsVertical(SourceRotation)) {
2313                         vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2314                 } else {
2315                         vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2316                 }
2317                 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2318                                 (8 * 2097152), 1) + 1);
2319                 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2320         } else {
2321                 *DPDE0BytesFrame = 0;
2322                 ExtraDPDEBytesFrame = 0;
2323                 vp_height_dpte_ub = 0;
2324         }
2325
2326         PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2327
2328 #ifdef __DML_VBA_DEBUG__
2329         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2330         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2331         dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2332         dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2333         dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2334         dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2335         dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2336         dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2337         dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2338         dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2339         dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2340         dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2341         dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2342         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2343         dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2344         dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2345         dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2346 #endif
2347
2348         if (HostVMEnable == true)
2349                 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2350
2351         if (SurfaceTiling == dm_sw_linear) {
2352                 *PixelPTEReqHeight = 1;
2353                 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2354                 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2355                 *PTERequestSize = 64;
2356         } else if (GPUVMMinPageSizeKBytes == 4) {
2357                 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2358                 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2359                 *PTERequestSize = 128;
2360         } else {
2361                 *PixelPTEReqHeight = MacroTileHeight;
2362                 *PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2363                 *PTERequestSize = 64;
2364         }
2365 #ifdef __DML_VBA_DEBUG__
2366         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2367         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2368         dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2369         dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2370         dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2371         dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2372         dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2373 #endif
2374
2375         *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2376         *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2377                         (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2378                                         (double) *PixelPTEReqWidth;
2379         *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2380                         *PTERequestSize;
2381
2382         if (SurfaceTiling == dm_sw_linear) {
2383                 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2384                                 *PixelPTEReqWidth / Pitch), 1));
2385 #ifdef __DML_VBA_DEBUG__
2386                 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2387                                 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2388                 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2389                                 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2390                 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2391                                 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2392                 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2393                                 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2394                                                 *PixelPTEReqWidth / Pitch), 1));
2395                 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2396 #endif
2397                 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2398                                 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2399                 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2400
2401                 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2402                 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2403                                 PixelPTEReqWidth_linear / Pitch), 1);
2404                 if (*dpte_row_height_linear > 128)
2405                         *dpte_row_height_linear = 128;
2406
2407         } else if (!IsVertical(SourceRotation)) {
2408                 *dpte_row_height = *PixelPTEReqHeight;
2409
2410                 if (GPUVMMinPageSizeKBytes > 64) {
2411                         *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2412                                         *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2413                 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2414                         *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2415                                         *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2416                                         dml_floor(ViewportXStart, *PixelPTEReqWidth);
2417                 } else {
2418                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2419                                         *PixelPTEReqWidth;
2420                 }
2421
2422                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2423         } else {
2424                 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2425
2426                 if (ViewportStationary && (NumberOfDPPs == 1)) {
2427                         *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2428                                         *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2429                 } else {
2430                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2431                                         * *PixelPTEReqHeight;
2432                 }
2433
2434                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2435         }
2436
2437         if (GPUVMEnable != true)
2438                 *PixelPTEBytesPerRow = 0;
2439         if (HostVMEnable == true)
2440                 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2441
2442 #ifdef __DML_VBA_DEBUG__
2443         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2444         dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2445         dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2446         dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2447         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2448         dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2449         dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2450         dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2451                         __func__, *dpte_row_width_ub_one_row_per_frame);
2452         dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2453                         __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2454         dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2455                         *MetaPTEBytesFrame);
2456 #endif
2457
2458         return PDEAndMetaPTEBytesFrame;
2459 } // CalculateVMAndRowBytes
2460
2461 double dml32_CalculatePrefetchSourceLines(
2462                 double VRatio,
2463                 unsigned int VTaps,
2464                 bool Interlace,
2465                 bool ProgressiveToInterlaceUnitInOPP,
2466                 unsigned int SwathHeight,
2467                 enum dm_rotation_angle SourceRotation,
2468                 bool ViewportStationary,
2469                 double SwathWidth,
2470                 unsigned int ViewportHeight,
2471                 unsigned int ViewportXStart,
2472                 unsigned int ViewportYStart,
2473
2474                 /* Output */
2475                 double *VInitPreFill,
2476                 unsigned int *MaxNumSwath)
2477 {
2478
2479         unsigned int vp_start_rot;
2480         unsigned int sw0_tmp;
2481         unsigned int MaxPartialSwath;
2482         double numLines;
2483
2484 #ifdef __DML_VBA_DEBUG__
2485         dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2486         dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2487         dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2488         dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2489         dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2490         dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2491 #endif
2492         if (ProgressiveToInterlaceUnitInOPP)
2493                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2494         else
2495                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2496
2497         if (ViewportStationary) {
2498                 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2499                         vp_start_rot = SwathHeight -
2500                                         (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2501                 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2502                         vp_start_rot = ViewportXStart;
2503                 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2504                         vp_start_rot = SwathHeight -
2505                                         (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2506                 } else {
2507                         vp_start_rot = ViewportYStart;
2508                 }
2509                 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2510                 if (sw0_tmp < *VInitPreFill)
2511                         *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2512                 else
2513                         *MaxNumSwath = 1;
2514                 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2515         } else {
2516                 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2517                 if (*VInitPreFill > 1)
2518                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2519                 else
2520                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2521         }
2522         numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2523
2524 #ifdef __DML_VBA_DEBUG__
2525         dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2526         dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2527         dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2528         dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2529         dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2530 #endif
2531         return numLines;
2532
2533 } // CalculatePrefetchSourceLines
2534
2535 void dml32_CalculateMALLUseForStaticScreen(
2536                 unsigned int NumberOfActiveSurfaces,
2537                 unsigned int MALLAllocatedForDCNFinal,
2538                 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2539                 unsigned int SurfaceSizeInMALL[],
2540                 bool one_row_per_frame_fits_in_buffer[],
2541
2542                 /* output */
2543                 bool UsesMALLForStaticScreen[])
2544 {
2545         unsigned int k;
2546         unsigned int SurfaceToAddToMALL;
2547         bool CanAddAnotherSurfaceToMALL;
2548         unsigned int TotalSurfaceSizeInMALL;
2549
2550         TotalSurfaceSizeInMALL = 0;
2551         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2552                 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2553                 if (UsesMALLForStaticScreen[k])
2554                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2555 #ifdef __DML_VBA_DEBUG__
2556                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2557                 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2558 #endif
2559         }
2560
2561         SurfaceToAddToMALL = 0;
2562         CanAddAnotherSurfaceToMALL = true;
2563         while (CanAddAnotherSurfaceToMALL) {
2564                 CanAddAnotherSurfaceToMALL = false;
2565                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2566                         if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2567                                         !UsesMALLForStaticScreen[k] &&
2568                                         UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2569                                         one_row_per_frame_fits_in_buffer[k] &&
2570                                         (!CanAddAnotherSurfaceToMALL ||
2571                                         SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2572                                 CanAddAnotherSurfaceToMALL = true;
2573                                 SurfaceToAddToMALL = k;
2574 #ifdef __DML_VBA_DEBUG__
2575                                 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2576                                                 __func__, k, UseMALLForStaticScreen[k]);
2577 #endif
2578                         }
2579                 }
2580                 if (CanAddAnotherSurfaceToMALL) {
2581                         UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2582                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2583
2584 #ifdef __DML_VBA_DEBUG__
2585                         dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2586                         dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2587 #endif
2588
2589                 }
2590         }
2591 }
2592
2593 void dml32_CalculateRowBandwidth(
2594                 bool GPUVMEnable,
2595                 enum source_format_class SourcePixelFormat,
2596                 double VRatio,
2597                 double VRatioChroma,
2598                 bool DCCEnable,
2599                 double LineTime,
2600                 unsigned int MetaRowByteLuma,
2601                 unsigned int MetaRowByteChroma,
2602                 unsigned int meta_row_height_luma,
2603                 unsigned int meta_row_height_chroma,
2604                 unsigned int PixelPTEBytesPerRowLuma,
2605                 unsigned int PixelPTEBytesPerRowChroma,
2606                 unsigned int dpte_row_height_luma,
2607                 unsigned int dpte_row_height_chroma,
2608                 /* Output */
2609                 double *meta_row_bw,
2610                 double *dpte_row_bw)
2611 {
2612         if (DCCEnable != true) {
2613                 *meta_row_bw = 0;
2614         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2615                         SourcePixelFormat == dm_rgbe_alpha) {
2616                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2617                                 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2618         } else {
2619                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2620         }
2621
2622         if (GPUVMEnable != true) {
2623                 *dpte_row_bw = 0;
2624         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2625                         SourcePixelFormat == dm_rgbe_alpha) {
2626                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2627                                 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2628         } else {
2629                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2630         }
2631 }
2632
2633 double dml32_CalculateUrgentLatency(
2634                 double UrgentLatencyPixelDataOnly,
2635                 double UrgentLatencyPixelMixedWithVMData,
2636                 double UrgentLatencyVMDataOnly,
2637                 bool   DoUrgentLatencyAdjustment,
2638                 double UrgentLatencyAdjustmentFabricClockComponent,
2639                 double UrgentLatencyAdjustmentFabricClockReference,
2640                 double FabricClock)
2641 {
2642         double   ret;
2643
2644         ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2645         if (DoUrgentLatencyAdjustment == true) {
2646                 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2647                                 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2648         }
2649         return ret;
2650 }
2651
2652 void dml32_CalculateUrgentBurstFactor(
2653                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2654                 unsigned int    swath_width_luma_ub,
2655                 unsigned int    swath_width_chroma_ub,
2656                 unsigned int SwathHeightY,
2657                 unsigned int SwathHeightC,
2658                 double  LineTime,
2659                 double  UrgentLatency,
2660                 double  CursorBufferSize,
2661                 unsigned int CursorWidth,
2662                 unsigned int CursorBPP,
2663                 double  VRatio,
2664                 double  VRatioC,
2665                 double  BytePerPixelInDETY,
2666                 double  BytePerPixelInDETC,
2667                 unsigned int    DETBufferSizeY,
2668                 unsigned int    DETBufferSizeC,
2669                 /* Output */
2670                 double *UrgentBurstFactorCursor,
2671                 double *UrgentBurstFactorLuma,
2672                 double *UrgentBurstFactorChroma,
2673                 bool   *NotEnoughUrgentLatencyHiding)
2674 {
2675         double       LinesInDETLuma;
2676         double       LinesInDETChroma;
2677         unsigned int LinesInCursorBuffer;
2678         double       CursorBufferSizeInTime;
2679         double       DETBufferSizeInTimeLuma;
2680         double       DETBufferSizeInTimeChroma;
2681
2682         *NotEnoughUrgentLatencyHiding = 0;
2683
2684         if (CursorWidth > 0) {
2685                 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2686                                 (CursorWidth * CursorBPP / 8.0)), 1.0);
2687                 if (VRatio > 0) {
2688                         CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2689                         if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2690                                 *NotEnoughUrgentLatencyHiding = 1;
2691                                 *UrgentBurstFactorCursor = 0;
2692                         } else {
2693                                 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2694                                                 (CursorBufferSizeInTime - UrgentLatency);
2695                         }
2696                 } else {
2697                         *UrgentBurstFactorCursor = 1;
2698                 }
2699         }
2700
2701         LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2702                         DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2703
2704         if (VRatio > 0) {
2705                 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2706                 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2707                         *NotEnoughUrgentLatencyHiding = 1;
2708                         *UrgentBurstFactorLuma = 0;
2709                 } else {
2710                         *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2711                 }
2712         } else {
2713                 *UrgentBurstFactorLuma = 1;
2714         }
2715
2716         if (BytePerPixelInDETC > 0) {
2717                 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2718                                         1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2719                                         / swath_width_chroma_ub;
2720
2721                 if (VRatio > 0) {
2722                         DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2723                         if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2724                                 *NotEnoughUrgentLatencyHiding = 1;
2725                                 *UrgentBurstFactorChroma = 0;
2726                         } else {
2727                                 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2728                                                 / (DETBufferSizeInTimeChroma - UrgentLatency);
2729                         }
2730                 } else {
2731                         *UrgentBurstFactorChroma = 1;
2732                 }
2733         }
2734 } // CalculateUrgentBurstFactor
2735
2736 void dml32_CalculateDCFCLKDeepSleep(
2737                 unsigned int NumberOfActiveSurfaces,
2738                 unsigned int BytePerPixelY[],
2739                 unsigned int BytePerPixelC[],
2740                 double VRatio[],
2741                 double VRatioChroma[],
2742                 double SwathWidthY[],
2743                 double SwathWidthC[],
2744                 unsigned int DPPPerSurface[],
2745                 double HRatio[],
2746                 double HRatioChroma[],
2747                 double PixelClock[],
2748                 double PSCL_THROUGHPUT[],
2749                 double PSCL_THROUGHPUT_CHROMA[],
2750                 double Dppclk[],
2751                 double ReadBandwidthLuma[],
2752                 double ReadBandwidthChroma[],
2753                 unsigned int ReturnBusWidth,
2754
2755                 /* Output */
2756                 double *DCFClkDeepSleep)
2757 {
2758         unsigned int k;
2759         double   DisplayPipeLineDeliveryTimeLuma;
2760         double   DisplayPipeLineDeliveryTimeChroma;
2761         double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2762         double ReadBandwidth = 0.0;
2763
2764         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2765
2766                 if (VRatio[k] <= 1) {
2767                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2768                                         / PixelClock[k];
2769                 } else {
2770                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2771                 }
2772                 if (BytePerPixelC[k] == 0) {
2773                         DisplayPipeLineDeliveryTimeChroma = 0;
2774                 } else {
2775                         if (VRatioChroma[k] <= 1) {
2776                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2777                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2778                         } else {
2779                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2780                                                 / Dppclk[k];
2781                         }
2782                 }
2783
2784                 if (BytePerPixelC[k] > 0) {
2785                         DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2786                                         BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2787                                         __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2788                                         32.0 / DisplayPipeLineDeliveryTimeChroma);
2789                 } else {
2790                         DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2791                                         64.0 / DisplayPipeLineDeliveryTimeLuma;
2792                 }
2793                 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2794
2795 #ifdef __DML_VBA_DEBUG__
2796                 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2797                 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2798 #endif
2799         }
2800
2801         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2802                 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2803
2804         *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2805
2806 #ifdef __DML_VBA_DEBUG__
2807         dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2808         dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2809         dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2810         dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2811 #endif
2812
2813         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2814                 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2815 #ifdef __DML_VBA_DEBUG__
2816         dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2817 #endif
2818 } // CalculateDCFCLKDeepSleep
2819
2820 double dml32_CalculateWriteBackDelay(
2821                 enum source_format_class WritebackPixelFormat,
2822                 double WritebackHRatio,
2823                 double WritebackVRatio,
2824                 unsigned int WritebackVTaps,
2825                 unsigned int         WritebackDestinationWidth,
2826                 unsigned int         WritebackDestinationHeight,
2827                 unsigned int         WritebackSourceHeight,
2828                 unsigned int HTotal)
2829 {
2830         double CalculateWriteBackDelay;
2831         double Line_length;
2832         double Output_lines_last_notclamped;
2833         double WritebackVInit;
2834
2835         WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2836         Line_length = dml_max((double) WritebackDestinationWidth,
2837                         dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2838         Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2839                         dml_ceil(((double)WritebackSourceHeight -
2840                                         (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2841         if (Output_lines_last_notclamped < 0) {
2842                 CalculateWriteBackDelay = 0;
2843         } else {
2844                 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2845                                 (HTotal - WritebackDestinationWidth) + 80;
2846         }
2847         return CalculateWriteBackDelay;
2848 }
2849
2850 void dml32_UseMinimumDCFCLK(
2851                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2852                 bool DRRDisplay[],
2853                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2854                 unsigned int MaxInterDCNTileRepeaters,
2855                 unsigned int MaxPrefetchMode,
2856                 double DRAMClockChangeLatencyFinal,
2857                 double FCLKChangeLatency,
2858                 double SREnterPlusExitTime,
2859                 unsigned int ReturnBusWidth,
2860                 unsigned int RoundTripPingLatencyCycles,
2861                 unsigned int ReorderingBytes,
2862                 unsigned int PixelChunkSizeInKByte,
2863                 unsigned int MetaChunkSize,
2864                 bool GPUVMEnable,
2865                 unsigned int GPUVMMaxPageTableLevels,
2866                 bool HostVMEnable,
2867                 unsigned int NumberOfActiveSurfaces,
2868                 double HostVMMinPageSize,
2869                 unsigned int HostVMMaxNonCachedPageTableLevels,
2870                 bool DynamicMetadataVMEnabled,
2871                 bool ImmediateFlipRequirement,
2872                 bool ProgressiveToInterlaceUnitInOPP,
2873                 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2874                 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2875                 unsigned int VTotal[],
2876                 unsigned int VActive[],
2877                 unsigned int DynamicMetadataTransmittedBytes[],
2878                 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2879                 bool Interlace[],
2880                 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2881                 double RequiredDISPCLK[][2],
2882                 double UrgLatency[],
2883                 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2884                 double ProjectedDCFClkDeepSleep[][2],
2885                 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2886                 unsigned int TotalNumberOfActiveDPP[][2],
2887                 unsigned int TotalNumberOfDCCActiveDPP[][2],
2888                 unsigned int dpte_group_bytes[],
2889                 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2890                 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2891                 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2892                 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2893                 unsigned int BytePerPixelY[],
2894                 unsigned int BytePerPixelC[],
2895                 unsigned int HTotal[],
2896                 double PixelClock[],
2897                 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2898                 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2899                 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2900                 bool DynamicMetadataEnable[],
2901                 double ReadBandwidthLuma[],
2902                 double ReadBandwidthChroma[],
2903                 double DCFCLKPerState[],
2904                 /* Output */
2905                 double DCFCLKState[][2])
2906 {
2907         unsigned int i, j, k;
2908         unsigned int     dummy1;
2909         double dummy2, dummy3;
2910         double   NormalEfficiency;
2911         double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2912
2913         NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2914         for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2915                 for  (j = 0; j <= 1; ++j) {
2916                         double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2917                         double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2918                         double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2919                         double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2920                         double MinimumTWait = 0.0;
2921                         double DPTEBandwidth;
2922                         double DCFCLKRequiredForAverageBandwidth;
2923                         unsigned int ExtraLatencyBytes;
2924                         double ExtraLatencyCycles;
2925                         double DCFCLKRequiredForPeakBandwidth;
2926                         unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2927                         double MinimumTvmPlus2Tr0;
2928
2929                         TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2930                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2931                                 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2932                                                 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2933                                                                 / (15.75 * HTotal[k] / PixelClock[k]);
2934                         }
2935
2936                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
2937                                 NoOfDPPState[k] = NoOfDPP[i][j][k];
2938
2939                         DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
2940                         DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
2941
2942                         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
2943                                         TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
2944                                         TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
2945                                         NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
2946                                         HostVMMaxNonCachedPageTableLevels);
2947                         ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
2948                                         + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
2949                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2950                                 double DCFCLKCyclesRequiredInPrefetch;
2951                                 double PrefetchTime;
2952
2953                                 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
2954                                                 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
2955                                                 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
2956                                                                 * BytePerPixelC[k]) / NormalEfficiency
2957                                                 / ReturnBusWidth;
2958                                 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
2959                                                 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
2960                                                                 / NormalEfficiency / ReturnBusWidth
2961                                                                 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
2962                                                 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
2963                                                                 / ReturnBusWidth
2964                                                 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
2965                                                 + PixelDCFCLKCyclesRequiredInPrefetch[k];
2966                                 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
2967                                                 * HTotal[k] / PixelClock[k];
2968                                 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
2969                                                 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
2970                                                 UrgLatency[i] * GPUVMMaxPageTableLevels *
2971                                                 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
2972
2973                                 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
2974                                                 UseMALLForPStateChange[k],
2975                                                 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2976                                                 DRRDisplay[k],
2977                                                 DRAMClockChangeLatencyFinal,
2978                                                 FCLKChangeLatency,
2979                                                 UrgLatency[i],
2980                                                 SREnterPlusExitTime);
2981
2982                                 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
2983                                                 MinimumTWait - UrgLatency[i] *
2984                                                 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
2985                                                 GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
2986                                                 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
2987                                                 DynamicMetadataVMExtraLatency[k];
2988
2989                                 if (PrefetchTime > 0) {
2990                                         double ExpectedVRatioPrefetch;
2991
2992                                         ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
2993                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
2994                                                         DCFCLKCyclesRequiredInPrefetch);
2995                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
2996                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
2997                                                         PrefetchPixelLinesTime[k] *
2998                                                         dml_max(1.0, ExpectedVRatioPrefetch) *
2999                                                         dml_max(1.0, ExpectedVRatioPrefetch / 4);
3000                                         if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3001                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3002                                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3003                                                                 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3004                                                                 NormalEfficiency / ReturnBusWidth;
3005                                         }
3006                                 } else {
3007                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3008                                 }
3009                                 if (DynamicMetadataEnable[k] == true) {
3010                                         double TSetupPipe;
3011                                         double TdmbfPipe;
3012                                         double TdmsksPipe;
3013                                         double TdmecPipe;
3014                                         double AllowedTimeForUrgentExtraLatency;
3015
3016                                         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3017                                                         MaxInterDCNTileRepeaters,
3018                                                         RequiredDPPCLKPerSurface[i][j][k],
3019                                                         RequiredDISPCLK[i][j],
3020                                                         ProjectedDCFClkDeepSleep[i][j],
3021                                                         PixelClock[k],
3022                                                         HTotal[k],
3023                                                         VTotal[k] - VActive[k],
3024                                                         DynamicMetadataTransmittedBytes[k],
3025                                                         DynamicMetadataLinesBeforeActiveRequired[k],
3026                                                         Interlace[k],
3027                                                         ProgressiveToInterlaceUnitInOPP,
3028
3029                                                         /* output */
3030                                                         &TSetupPipe,
3031                                                         &TdmbfPipe,
3032                                                         &TdmecPipe,
3033                                                         &TdmsksPipe,
3034                                                         &dummy1,
3035                                                         &dummy2,
3036                                                         &dummy3);
3037                                         AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3038                                                         PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3039                                                         TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3040                                         if (AllowedTimeForUrgentExtraLatency > 0)
3041                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3042                                                                 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3043                                                                 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3044                                         else
3045                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3046                                 }
3047                         }
3048                         DCFCLKRequiredForPeakBandwidth = 0;
3049                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3050                                 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3051                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3052                         }
3053                         MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3054                                         (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3055                                         (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3056                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3057                                 double MaximumTvmPlus2Tr0PlusTsw;
3058
3059                                 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3060                                                 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3061                                 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3062                                         DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3063                                 } else {
3064                                         DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3065                                                         2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3066                                                                 MinimumTvmPlus2Tr0 -
3067                                                                 PrefetchPixelLinesTime[k] / 4),
3068                                                         (2 * ExtraLatencyCycles +
3069                                                                 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3070                                                                 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3071                                 }
3072                         }
3073                         DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3074                                         dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3075                 }
3076         }
3077 }
3078
3079 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3080                 unsigned int TotalNumberOfActiveDPP,
3081                 unsigned int PixelChunkSizeInKByte,
3082                 unsigned int TotalNumberOfDCCActiveDPP,
3083                 unsigned int MetaChunkSize,
3084                 bool GPUVMEnable,
3085                 bool HostVMEnable,
3086                 unsigned int NumberOfActiveSurfaces,
3087                 unsigned int NumberOfDPP[],
3088                 unsigned int dpte_group_bytes[],
3089                 double HostVMInefficiencyFactor,
3090                 double HostVMMinPageSize,
3091                 unsigned int HostVMMaxNonCachedPageTableLevels)
3092 {
3093         unsigned int k;
3094         double   ret;
3095         unsigned int  HostVMDynamicLevels;
3096
3097         if (GPUVMEnable == true && HostVMEnable == true) {
3098                 if (HostVMMinPageSize < 2048)
3099                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3100                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3101                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3102                 else
3103                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3104         } else {
3105                 HostVMDynamicLevels = 0;
3106         }
3107
3108         ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3109                         TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3110
3111         if (GPUVMEnable == true) {
3112                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3113                         ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3114                                         (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3115                 }
3116         }
3117         return ret;
3118 }
3119
3120 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3121                 unsigned int MaxInterDCNTileRepeaters,
3122                 double Dppclk,
3123                 double Dispclk,
3124                 double DCFClkDeepSleep,
3125                 double PixelClock,
3126                 unsigned int HTotal,
3127                 unsigned int VBlank,
3128                 unsigned int DynamicMetadataTransmittedBytes,
3129                 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3130                 unsigned int InterlaceEnable,
3131                 bool ProgressiveToInterlaceUnitInOPP,
3132
3133                 /* output */
3134                 double *TSetup,
3135                 double *Tdmbf,
3136                 double *Tdmec,
3137                 double *Tdmsks,
3138                 unsigned int *VUpdateOffsetPix,
3139                 double *VUpdateWidthPix,
3140                 double *VReadyOffsetPix)
3141 {
3142         double TotalRepeaterDelayTime;
3143
3144         TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3145         *VUpdateWidthPix  =
3146                         dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3147         *VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3148                         TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3149         *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3150         *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3151         *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3152         *Tdmec = HTotal / PixelClock;
3153
3154         if (DynamicMetadataLinesBeforeActiveRequired == 0)
3155                 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3156         else
3157                 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3158
3159         if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3160                 *Tdmsks = *Tdmsks / 2;
3161 #ifdef __DML_VBA_DEBUG__
3162         dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3163         dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3164         dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3165
3166         dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3167                         __func__, DynamicMetadataLinesBeforeActiveRequired);
3168         dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3169         dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3170         dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3171         dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3172 #endif
3173 }
3174
3175 double dml32_CalculateTWait(
3176                 unsigned int PrefetchMode,
3177                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3178                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3179                 bool DRRDisplay,
3180                 double DRAMClockChangeLatency,
3181                 double FCLKChangeLatency,
3182                 double UrgentLatency,
3183                 double SREnterPlusExitTime)
3184 {
3185         double TWait = 0.0;
3186
3187         if (PrefetchMode == 0 &&
3188                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3189                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3190                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3191                         !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3192                 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3193         } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3194                 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3195         } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3196                 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3197         } else {
3198                 TWait = UrgentLatency;
3199         }
3200
3201 #ifdef __DML_VBA_DEBUG__
3202         dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3203         dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3204 #endif
3205         return TWait;
3206 } // CalculateTWait
3207
3208 // Function: get_return_bw_mbps
3209 // Megabyte per second
3210 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3211                 const int VoltageLevel,
3212                 const bool HostVMEnable,
3213                 const double DCFCLK,
3214                 const double FabricClock,
3215                 const double DRAMSpeed)
3216 {
3217         double ReturnBW = 0.;
3218         double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3219         double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3220         double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3221         double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3222                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3223                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3224                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3225         double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3226                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3227                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3228                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3229
3230         if (HostVMEnable != true)
3231                 ReturnBW = PixelDataOnlyReturnBW;
3232         else
3233                 ReturnBW = PixelMixedWithVMDataReturnBW;
3234
3235 #ifdef __DML_VBA_DEBUG__
3236         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3237         dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3238         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3239         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3240         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3241         dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3242         dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3243         dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3244         dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3245         dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3246         dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3247 #endif
3248         return ReturnBW;
3249 }
3250
3251 // Function: get_return_bw_mbps_vm_only
3252 // Megabyte per second
3253 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3254                 const int VoltageLevel,
3255                 const double DCFCLK,
3256                 const double FabricClock,
3257                 const double DRAMSpeed)
3258 {
3259         double VMDataOnlyReturnBW = dml_min3(
3260                         soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3261                         FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3262                                         * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3263                         DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3264                                         * (VoltageLevel < 2 ?
3265                                                         soc->pct_ideal_dram_bw_after_urgent_strobe :
3266                                                         soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3267 #ifdef __DML_VBA_DEBUG__
3268         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3269         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3270         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3271         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3272         dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3273 #endif
3274         return VMDataOnlyReturnBW;
3275 }
3276
3277 double dml32_CalculateExtraLatency(
3278                 unsigned int RoundTripPingLatencyCycles,
3279                 unsigned int ReorderingBytes,
3280                 double DCFCLK,
3281                 unsigned int TotalNumberOfActiveDPP,
3282                 unsigned int PixelChunkSizeInKByte,
3283                 unsigned int TotalNumberOfDCCActiveDPP,
3284                 unsigned int MetaChunkSize,
3285                 double ReturnBW,
3286                 bool GPUVMEnable,
3287                 bool HostVMEnable,
3288                 unsigned int NumberOfActiveSurfaces,
3289                 unsigned int NumberOfDPP[],
3290                 unsigned int dpte_group_bytes[],
3291                 double HostVMInefficiencyFactor,
3292                 double HostVMMinPageSize,
3293                 unsigned int HostVMMaxNonCachedPageTableLevels)
3294 {
3295         double ExtraLatencyBytes;
3296         double ExtraLatency;
3297
3298         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3299                         ReorderingBytes,
3300                         TotalNumberOfActiveDPP,
3301                         PixelChunkSizeInKByte,
3302                         TotalNumberOfDCCActiveDPP,
3303                         MetaChunkSize,
3304                         GPUVMEnable,
3305                         HostVMEnable,
3306                         NumberOfActiveSurfaces,
3307                         NumberOfDPP,
3308                         dpte_group_bytes,
3309                         HostVMInefficiencyFactor,
3310                         HostVMMinPageSize,
3311                         HostVMMaxNonCachedPageTableLevels);
3312
3313         ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3314
3315 #ifdef __DML_VBA_DEBUG__
3316         dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3317         dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3318         dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3319         dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3320         dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3321 #endif
3322
3323         return ExtraLatency;
3324 } // CalculateExtraLatency
3325
3326 bool dml32_CalculatePrefetchSchedule(
3327                 double HostVMInefficiencyFactor,
3328                 DmlPipe *myPipe,
3329                 unsigned int DSCDelay,
3330                 double DPPCLKDelaySubtotalPlusCNVCFormater,
3331                 double DPPCLKDelaySCL,
3332                 double DPPCLKDelaySCLLBOnly,
3333                 double DPPCLKDelayCNVCCursor,
3334                 double DISPCLKDelaySubtotal,
3335                 unsigned int DPP_RECOUT_WIDTH,
3336                 enum output_format_class OutputFormat,
3337                 unsigned int MaxInterDCNTileRepeaters,
3338                 unsigned int VStartup,
3339                 unsigned int MaxVStartup,
3340                 unsigned int GPUVMPageTableLevels,
3341                 bool GPUVMEnable,
3342                 bool HostVMEnable,
3343                 unsigned int HostVMMaxNonCachedPageTableLevels,
3344                 double HostVMMinPageSize,
3345                 bool DynamicMetadataEnable,
3346                 bool DynamicMetadataVMEnabled,
3347                 int DynamicMetadataLinesBeforeActiveRequired,
3348                 unsigned int DynamicMetadataTransmittedBytes,
3349                 double UrgentLatency,
3350                 double UrgentExtraLatency,
3351                 double TCalc,
3352                 unsigned int PDEAndMetaPTEBytesFrame,
3353                 unsigned int MetaRowByte,
3354                 unsigned int PixelPTEBytesPerRow,
3355                 double PrefetchSourceLinesY,
3356                 unsigned int SwathWidthY,
3357                 unsigned int VInitPreFillY,
3358                 unsigned int MaxNumSwathY,
3359                 double PrefetchSourceLinesC,
3360                 unsigned int SwathWidthC,
3361                 unsigned int VInitPreFillC,
3362                 unsigned int MaxNumSwathC,
3363                 unsigned int swath_width_luma_ub,
3364                 unsigned int swath_width_chroma_ub,
3365                 unsigned int SwathHeightY,
3366                 unsigned int SwathHeightC,
3367                 double TWait,
3368                 /* Output */
3369                 double   *DSTXAfterScaler,
3370                 double   *DSTYAfterScaler,
3371                 double *DestinationLinesForPrefetch,
3372                 double *PrefetchBandwidth,
3373                 double *DestinationLinesToRequestVMInVBlank,
3374                 double *DestinationLinesToRequestRowInVBlank,
3375                 double *VRatioPrefetchY,
3376                 double *VRatioPrefetchC,
3377                 double *RequiredPrefetchPixDataBWLuma,
3378                 double *RequiredPrefetchPixDataBWChroma,
3379                 bool   *NotEnoughTimeForDynamicMetadata,
3380                 double *Tno_bw,
3381                 double *prefetch_vmrow_bw,
3382                 double *Tdmdl_vm,
3383                 double *Tdmdl,
3384                 double *TSetup,
3385                 unsigned int   *VUpdateOffsetPix,
3386                 double   *VUpdateWidthPix,
3387                 double   *VReadyOffsetPix)
3388 {
3389         bool MyError = false;
3390         unsigned int DPPCycles, DISPCLKCycles;
3391         double DSTTotalPixelsAfterScaler;
3392         double LineTime;
3393         double dst_y_prefetch_equ;
3394         double prefetch_bw_oto;
3395         double Tvm_oto;
3396         double Tr0_oto;
3397         double Tvm_oto_lines;
3398         double Tr0_oto_lines;
3399         double dst_y_prefetch_oto;
3400         double TimeForFetchingMetaPTE = 0;
3401         double TimeForFetchingRowInVBlank = 0;
3402         double LinesToRequestPrefetchPixelData = 0;
3403         unsigned int HostVMDynamicLevelsTrips;
3404         double  trip_to_mem;
3405         double  Tvm_trips;
3406         double  Tr0_trips;
3407         double  Tvm_trips_rounded;
3408         double  Tr0_trips_rounded;
3409         double  Lsw_oto;
3410         double  Tpre_rounded;
3411         double  prefetch_bw_equ;
3412         double  Tvm_equ;
3413         double  Tr0_equ;
3414         double  Tdmbf;
3415         double  Tdmec;
3416         double  Tdmsks;
3417         double  prefetch_sw_bytes;
3418         double  bytes_pp;
3419         double  dep_bytes;
3420         unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3421         double  min_Lsw;
3422         double  Tsw_est1 = 0;
3423         double  Tsw_est3 = 0;
3424
3425         if (GPUVMEnable == true && HostVMEnable == true)
3426                 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3427         else
3428                 HostVMDynamicLevelsTrips = 0;
3429 #ifdef __DML_VBA_DEBUG__
3430         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
3431         dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
3432         dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3433         dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3434                         __func__, HostVMEnable, HostVMInefficiencyFactor);
3435 #endif
3436         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3437                         MaxInterDCNTileRepeaters,
3438                         myPipe->Dppclk,
3439                         myPipe->Dispclk,
3440                         myPipe->DCFClkDeepSleep,
3441                         myPipe->PixelClock,
3442                         myPipe->HTotal,
3443                         myPipe->VBlank,
3444                         DynamicMetadataTransmittedBytes,
3445                         DynamicMetadataLinesBeforeActiveRequired,
3446                         myPipe->InterlaceEnable,
3447                         myPipe->ProgressiveToInterlaceUnitInOPP,
3448                         TSetup,
3449
3450                         /* output */
3451                         &Tdmbf,
3452                         &Tdmec,
3453                         &Tdmsks,
3454                         VUpdateOffsetPix,
3455                         VUpdateWidthPix,
3456                         VReadyOffsetPix);
3457
3458         LineTime = myPipe->HTotal / myPipe->PixelClock;
3459         trip_to_mem = UrgentLatency;
3460         Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3461
3462         if (DynamicMetadataVMEnabled == true)
3463                 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3464         else
3465                 *Tdmdl = TWait + UrgentExtraLatency;
3466
3467 #ifdef __DML_VBA_ALLOW_DELTA__
3468         if (DynamicMetadataEnable == false)
3469                 *Tdmdl = 0.0;
3470 #endif
3471
3472         if (DynamicMetadataEnable == true) {
3473                 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3474                         *NotEnoughTimeForDynamicMetadata = true;
3475 #ifdef __DML_VBA_DEBUG__
3476                         dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3477                         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3478                                         __func__, Tdmbf);
3479                         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3480                         dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3481                                         __func__, Tdmsks);
3482                         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3483                                         __func__, *Tdmdl);
3484 #endif
3485                 } else {
3486                         *NotEnoughTimeForDynamicMetadata = false;
3487                 }
3488         } else {
3489                 *NotEnoughTimeForDynamicMetadata = false;
3490         }
3491
3492         *Tdmdl_vm =  (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
3493                         GPUVMEnable == true ? TWait + Tvm_trips : 0);
3494
3495         if (myPipe->ScalerEnabled)
3496                 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
3497         else
3498                 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
3499
3500         DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
3501
3502         DISPCLKCycles = DISPCLKDelaySubtotal;
3503
3504         if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3505                 return true;
3506
3507         *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3508                         myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3509
3510         *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3511                         + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3512                         + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3513                                         myPipe->HActive / 2 : 0)
3514                         + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3515
3516 #ifdef __DML_VBA_DEBUG__
3517         dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3518         dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3519         dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3520         dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3521         dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3522         dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3523         dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3524         dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3525         dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3526 #endif
3527
3528         if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3529                 *DSTYAfterScaler = 1;
3530         else
3531                 *DSTYAfterScaler = 0;
3532
3533         DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3534         *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3535         *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3536 #ifdef __DML_VBA_DEBUG__
3537         dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3538         dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3539 #endif
3540
3541         MyError = false;
3542
3543         Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3544
3545         if (GPUVMEnable == true) {
3546                 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3547                 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3548                 if (GPUVMPageTableLevels >= 3) {
3549                         *Tno_bw = UrgentExtraLatency + trip_to_mem *
3550                                         (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3551                 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
3552                         Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3553                                         4.0 * LineTime; // VBA_ERROR
3554                         *Tno_bw = UrgentExtraLatency;
3555                 } else {
3556                         *Tno_bw = 0;
3557                 }
3558         } else if (myPipe->DCCEnable == true) {
3559                 Tvm_trips_rounded = LineTime / 4.0;
3560                 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3561                 *Tno_bw = 0;
3562         } else {
3563                 Tvm_trips_rounded = LineTime / 4.0;
3564                 Tr0_trips_rounded = LineTime / 2.0;
3565                 *Tno_bw = 0;
3566         }
3567         Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3568         Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3569
3570         if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3571                         || myPipe->SourcePixelFormat == dm_420_12) {
3572                 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3573         } else {
3574                 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3575         }
3576
3577         prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3578                         + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3579         prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3580                         prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3581
3582         min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3583         min_Lsw = dml_max(min_Lsw, 1.0);
3584         Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3585
3586         if (GPUVMEnable == true) {
3587                 Tvm_oto = dml_max3(
3588                                 Tvm_trips,
3589                                 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3590                                 LineTime / 4.0);
3591         } else
3592                 Tvm_oto = LineTime / 4.0;
3593
3594         if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3595                 Tr0_oto = dml_max4(
3596                                 Tr0_trips,
3597                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3598                                 (LineTime - Tvm_oto)/2.0,
3599                                 LineTime / 4.0);
3600 #ifdef __DML_VBA_DEBUG__
3601                 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3602                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3603                 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3604                 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3605                 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3606 #endif
3607         } else
3608                 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3609
3610         Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3611         Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3612         dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3613
3614         dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3615                         (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3616
3617 #ifdef __DML_VBA_DEBUG__
3618         dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3619         dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3620         dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3621         dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3622         dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3623         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3624         dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3625         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3626         dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3627         dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3628         dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3629         dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3630         dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3631         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3632         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3633         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3634         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3635         dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3636         dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3637         dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3638         dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3639         dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3640         dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3641         dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3642         dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3643         dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3644         dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3645 #endif
3646
3647         dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3648         Tpre_rounded = dst_y_prefetch_equ * LineTime;
3649 #ifdef __DML_VBA_DEBUG__
3650         dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3651         dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3652         dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3653         dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3654                         __func__, VStartup * LineTime);
3655         dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3656         dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3657         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3658         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3659         dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3660         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3661         dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3662                         __func__, *DSTYAfterScaler);
3663 #endif
3664         dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3665                         MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3666
3667         if (prefetch_sw_bytes < dep_bytes)
3668                 prefetch_sw_bytes = 2 * dep_bytes;
3669
3670         *PrefetchBandwidth = 0;
3671         *DestinationLinesToRequestVMInVBlank = 0;
3672         *DestinationLinesToRequestRowInVBlank = 0;
3673         *VRatioPrefetchY = 0;
3674         *VRatioPrefetchC = 0;
3675         *RequiredPrefetchPixDataBWLuma = 0;
3676         if (dst_y_prefetch_equ > 1) {
3677                 double PrefetchBandwidth1;
3678                 double PrefetchBandwidth2;
3679                 double PrefetchBandwidth3;
3680                 double PrefetchBandwidth4;
3681
3682                 if (Tpre_rounded - *Tno_bw > 0) {
3683                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3684                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3685                                         + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3686                         Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3687                 } else
3688                         PrefetchBandwidth1 = 0;
3689
3690                 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3691                                 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3692                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3693                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3694                                         / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3695                 }
3696
3697                 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3698                         PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3699                         (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3700                 else
3701                         PrefetchBandwidth2 = 0;
3702
3703                 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3704                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3705                                         + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3706                         Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3707                 } else
3708                         PrefetchBandwidth3 = 0;
3709
3710
3711                 if (VStartup == MaxVStartup &&
3712                                 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3713                                 LineTime - Tvm_trips_rounded > 0) {
3714                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3715                                         / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3716                 }
3717
3718                 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3719                         PrefetchBandwidth4 = prefetch_sw_bytes /
3720                                         (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3721                 } else {
3722                         PrefetchBandwidth4 = 0;
3723                 }
3724
3725 #ifdef __DML_VBA_DEBUG__
3726                 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3727                 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3728                 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3729                 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3730                 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3731                 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3732                 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3733                 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3734                 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3735 #endif
3736                 {
3737                         bool Case1OK;
3738                         bool Case2OK;
3739                         bool Case3OK;
3740
3741                         if (PrefetchBandwidth1 > 0) {
3742                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3743                                                 >= Tvm_trips_rounded
3744                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3745                                                                 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3746                                         Case1OK = true;
3747                                 } else {
3748                                         Case1OK = false;
3749                                 }
3750                         } else {
3751                                 Case1OK = false;
3752                         }
3753
3754                         if (PrefetchBandwidth2 > 0) {
3755                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3756                                                 >= Tvm_trips_rounded
3757                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3758                                                 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3759                                         Case2OK = true;
3760                                 } else {
3761                                         Case2OK = false;
3762                                 }
3763                         } else {
3764                                 Case2OK = false;
3765                         }
3766
3767                         if (PrefetchBandwidth3 > 0) {
3768                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3769                                                 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3770                                                                 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3771                                                                 Tr0_trips_rounded) {
3772                                         Case3OK = true;
3773                                 } else {
3774                                         Case3OK = false;
3775                                 }
3776                         } else {
3777                                 Case3OK = false;
3778                         }
3779
3780                         if (Case1OK)
3781                                 prefetch_bw_equ = PrefetchBandwidth1;
3782                         else if (Case2OK)
3783                                 prefetch_bw_equ = PrefetchBandwidth2;
3784                         else if (Case3OK)
3785                                 prefetch_bw_equ = PrefetchBandwidth3;
3786                         else
3787                                 prefetch_bw_equ = PrefetchBandwidth4;
3788
3789 #ifdef __DML_VBA_DEBUG__
3790                         dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3791                         dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3792                         dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3793                         dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3794 #endif
3795
3796                         if (prefetch_bw_equ > 0) {
3797                                 if (GPUVMEnable == true) {
3798                                         Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3799                                                         HostVMInefficiencyFactor / prefetch_bw_equ,
3800                                                         Tvm_trips, LineTime / 4);
3801                                 } else {
3802                                         Tvm_equ = LineTime / 4;
3803                                 }
3804
3805                                 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3806                                         Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3807                                                         HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3808                                                         (LineTime - Tvm_equ) / 2, LineTime / 4);
3809                                 } else {
3810                                         Tr0_equ = (LineTime - Tvm_equ) / 2;
3811                                 }
3812                         } else {
3813                                 Tvm_equ = 0;
3814                                 Tr0_equ = 0;
3815 #ifdef __DML_VBA_DEBUG__
3816                                 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3817 #endif
3818                         }
3819                 }
3820
3821                 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3822                         *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3823                         TimeForFetchingMetaPTE = Tvm_oto;
3824                         TimeForFetchingRowInVBlank = Tr0_oto;
3825                         *PrefetchBandwidth = prefetch_bw_oto;
3826                 } else {
3827                         *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3828                         TimeForFetchingMetaPTE = Tvm_equ;
3829                         TimeForFetchingRowInVBlank = Tr0_equ;
3830                         *PrefetchBandwidth = prefetch_bw_equ;
3831                 }
3832
3833                 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3834
3835                 *DestinationLinesToRequestRowInVBlank =
3836                                 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3837
3838                 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3839                                 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3840
3841 #ifdef __DML_VBA_DEBUG__
3842                 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3843                 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3844                                 __func__, *DestinationLinesToRequestVMInVBlank);
3845                 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3846                 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3847                 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3848                                 __func__, *DestinationLinesToRequestRowInVBlank);
3849                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3850                 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3851 #endif
3852
3853                 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3854                         *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3855                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3856 #ifdef __DML_VBA_DEBUG__
3857                         dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3858                         dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3859                         dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3860 #endif
3861                         if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3862                                 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3863                                         *VRatioPrefetchY =
3864                                                         dml_max((double) PrefetchSourceLinesY /
3865                                                                         LinesToRequestPrefetchPixelData,
3866                                                                         (double) MaxNumSwathY * SwathHeightY /
3867                                                                         (LinesToRequestPrefetchPixelData -
3868                                                                         (VInitPreFillY - 3.0) / 2.0));
3869                                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3870                                 } else {
3871                                         MyError = true;
3872                                         *VRatioPrefetchY = 0;
3873                                 }
3874 #ifdef __DML_VBA_DEBUG__
3875                                 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3876                                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3877                                 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3878 #endif
3879                         }
3880
3881                         *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3882                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3883
3884 #ifdef __DML_VBA_DEBUG__
3885                         dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3886                         dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3887                         dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3888 #endif
3889                         if ((SwathHeightC > 4)) {
3890                                 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3891                                         *VRatioPrefetchC =
3892                                                 dml_max(*VRatioPrefetchC,
3893                                                         (double) MaxNumSwathC * SwathHeightC /
3894                                                         (LinesToRequestPrefetchPixelData -
3895                                                         (VInitPreFillC - 3.0) / 2.0));
3896                                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3897                                 } else {
3898                                         MyError = true;
3899                                         *VRatioPrefetchC = 0;
3900                                 }
3901 #ifdef __DML_VBA_DEBUG__
3902                                 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3903                                 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3904                                 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3905 #endif
3906                         }
3907
3908                         *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3909                                         / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3910                                         / LineTime;
3911
3912 #ifdef __DML_VBA_DEBUG__
3913                         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3914                         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3915                         dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3916                         dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3917                                         __func__, *RequiredPrefetchPixDataBWLuma);
3918 #endif
3919                         *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3920                                         LinesToRequestPrefetchPixelData
3921                                         * myPipe->BytePerPixelC
3922                                         * swath_width_chroma_ub / LineTime;
3923                 } else {
3924                         MyError = true;
3925 #ifdef __DML_VBA_DEBUG__
3926                         dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3927                                         __func__, LinesToRequestPrefetchPixelData);
3928 #endif
3929                         *VRatioPrefetchY = 0;
3930                         *VRatioPrefetchC = 0;
3931                         *RequiredPrefetchPixDataBWLuma = 0;
3932                         *RequiredPrefetchPixDataBWChroma = 0;
3933                 }
3934 #ifdef __DML_VBA_DEBUG__
3935                 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3936                         (double)LinesToRequestPrefetchPixelData * LineTime +
3937                         2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3938                 dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3939                 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3940                         (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
3941                 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3942                 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
3943                         TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3944                         ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
3945                 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3946                                 PixelPTEBytesPerRow);
3947 #endif
3948         } else {
3949                 MyError = true;
3950 #ifdef __DML_VBA_DEBUG__
3951                 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
3952                                 __func__, dst_y_prefetch_equ);
3953 #endif
3954         }
3955
3956         {
3957                 double prefetch_vm_bw;
3958                 double prefetch_row_bw;
3959
3960                 if (PDEAndMetaPTEBytesFrame == 0) {
3961                         prefetch_vm_bw = 0;
3962                 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
3963 #ifdef __DML_VBA_DEBUG__
3964                         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3965                         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3966                         dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3967                                         __func__, *DestinationLinesToRequestVMInVBlank);
3968                         dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3969 #endif
3970                         prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
3971                                         (*DestinationLinesToRequestVMInVBlank * LineTime);
3972 #ifdef __DML_VBA_DEBUG__
3973                         dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
3974 #endif
3975                 } else {
3976                         prefetch_vm_bw = 0;
3977                         MyError = true;
3978 #ifdef __DML_VBA_DEBUG__
3979                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
3980                                         __func__, *DestinationLinesToRequestVMInVBlank);
3981 #endif
3982                 }
3983
3984                 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
3985                         prefetch_row_bw = 0;
3986                 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
3987                         prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
3988                                         (*DestinationLinesToRequestRowInVBlank * LineTime);
3989
3990 #ifdef __DML_VBA_DEBUG__
3991                         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3992                         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3993                         dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3994                                         __func__, *DestinationLinesToRequestRowInVBlank);
3995                         dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
3996 #endif
3997                 } else {
3998                         prefetch_row_bw = 0;
3999                         MyError = true;
4000 #ifdef __DML_VBA_DEBUG__
4001                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4002                                         __func__, *DestinationLinesToRequestRowInVBlank);
4003 #endif
4004                 }
4005
4006                 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4007         }
4008
4009         if (MyError) {
4010                 *PrefetchBandwidth = 0;
4011                 TimeForFetchingMetaPTE = 0;
4012                 TimeForFetchingRowInVBlank = 0;
4013                 *DestinationLinesToRequestVMInVBlank = 0;
4014                 *DestinationLinesToRequestRowInVBlank = 0;
4015                 *DestinationLinesForPrefetch = 0;
4016                 LinesToRequestPrefetchPixelData = 0;
4017                 *VRatioPrefetchY = 0;
4018                 *VRatioPrefetchC = 0;
4019                 *RequiredPrefetchPixDataBWLuma = 0;
4020                 *RequiredPrefetchPixDataBWChroma = 0;
4021         }
4022
4023         return MyError;
4024 } // CalculatePrefetchSchedule
4025
4026 void dml32_CalculateFlipSchedule(
4027                 double HostVMInefficiencyFactor,
4028                 double UrgentExtraLatency,
4029                 double UrgentLatency,
4030                 unsigned int GPUVMMaxPageTableLevels,
4031                 bool HostVMEnable,
4032                 unsigned int HostVMMaxNonCachedPageTableLevels,
4033                 bool GPUVMEnable,
4034                 double HostVMMinPageSize,
4035                 double PDEAndMetaPTEBytesPerFrame,
4036                 double MetaRowBytes,
4037                 double DPTEBytesPerRow,
4038                 double BandwidthAvailableForImmediateFlip,
4039                 unsigned int TotImmediateFlipBytes,
4040                 enum source_format_class SourcePixelFormat,
4041                 double LineTime,
4042                 double VRatio,
4043                 double VRatioChroma,
4044                 double Tno_bw,
4045                 bool DCCEnable,
4046                 unsigned int dpte_row_height,
4047                 unsigned int meta_row_height,
4048                 unsigned int dpte_row_height_chroma,
4049                 unsigned int meta_row_height_chroma,
4050                 bool    use_one_row_for_frame_flip,
4051
4052                 /* Output */
4053                 double *DestinationLinesToRequestVMInImmediateFlip,
4054                 double *DestinationLinesToRequestRowInImmediateFlip,
4055                 double *final_flip_bw,
4056                 bool *ImmediateFlipSupportedForPipe)
4057 {
4058         double min_row_time = 0.0;
4059         unsigned int HostVMDynamicLevelsTrips;
4060         double TimeForFetchingMetaPTEImmediateFlip;
4061         double TimeForFetchingRowInVBlankImmediateFlip;
4062         double ImmediateFlipBW;
4063
4064         if (GPUVMEnable == true && HostVMEnable == true)
4065                 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4066         else
4067                 HostVMDynamicLevelsTrips = 0;
4068
4069 #ifdef __DML_VBA_DEBUG__
4070         dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4071         dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4072 #endif
4073
4074         if (TotImmediateFlipBytes > 0) {
4075                 if (use_one_row_for_frame_flip) {
4076                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4077                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4078                 } else {
4079                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4080                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4081                 }
4082                 if (GPUVMEnable == true) {
4083                         TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4084                                         HostVMInefficiencyFactor / ImmediateFlipBW,
4085                                         UrgentExtraLatency + UrgentLatency *
4086                                         (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4087                                         LineTime / 4.0);
4088                 } else {
4089                         TimeForFetchingMetaPTEImmediateFlip = 0;
4090                 }
4091                 if ((GPUVMEnable == true || DCCEnable == true)) {
4092                         TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4093                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4094                                         UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4095                 } else {
4096                         TimeForFetchingRowInVBlankImmediateFlip = 0;
4097                 }
4098
4099                 *DestinationLinesToRequestVMInImmediateFlip =
4100                                 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4101                 *DestinationLinesToRequestRowInImmediateFlip =
4102                                 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4103
4104                 if (GPUVMEnable == true) {
4105                         *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4106                                         (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4107                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4108                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4109                 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4110                         *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4111                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4112                 } else {
4113                         *final_flip_bw = 0;
4114                 }
4115         } else {
4116                 TimeForFetchingMetaPTEImmediateFlip = 0;
4117                 TimeForFetchingRowInVBlankImmediateFlip = 0;
4118                 *DestinationLinesToRequestVMInImmediateFlip = 0;
4119                 *DestinationLinesToRequestRowInImmediateFlip = 0;
4120                 *final_flip_bw = 0;
4121         }
4122
4123         if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4124                 if (GPUVMEnable == true && DCCEnable != true) {
4125                         min_row_time = dml_min(dpte_row_height *
4126                                         LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4127                 } else if (GPUVMEnable != true && DCCEnable == true) {
4128                         min_row_time = dml_min(meta_row_height *
4129                                         LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4130                 } else {
4131                         min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4132                                         LineTime / VRatio, dpte_row_height_chroma * LineTime /
4133                                         VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4134                 }
4135         } else {
4136                 if (GPUVMEnable == true && DCCEnable != true) {
4137                         min_row_time = dpte_row_height * LineTime / VRatio;
4138                 } else if (GPUVMEnable != true && DCCEnable == true) {
4139                         min_row_time = meta_row_height * LineTime / VRatio;
4140                 } else {
4141                         min_row_time =
4142                                 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4143                 }
4144         }
4145
4146         if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4147                         || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4148                                         > min_row_time) {
4149                 *ImmediateFlipSupportedForPipe = false;
4150         } else {
4151                 *ImmediateFlipSupportedForPipe = true;
4152         }
4153
4154 #ifdef __DML_VBA_DEBUG__
4155         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4156         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4157         dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4158                         __func__, *DestinationLinesToRequestVMInImmediateFlip);
4159         dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4160                         __func__, *DestinationLinesToRequestRowInImmediateFlip);
4161         dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4162         dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4163                         __func__, TimeForFetchingRowInVBlankImmediateFlip);
4164         dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4165         dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4166 #endif
4167 } // CalculateFlipSchedule
4168
4169 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4170                 bool USRRetrainingRequiredFinal,
4171                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4172                 unsigned int PrefetchMode,
4173                 unsigned int NumberOfActiveSurfaces,
4174                 unsigned int MaxLineBufferLines,
4175                 unsigned int LineBufferSize,
4176                 unsigned int WritebackInterfaceBufferSize,
4177                 double DCFCLK,
4178                 double ReturnBW,
4179                 bool SynchronizeTimingsFinal,
4180                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4181                 bool DRRDisplay[],
4182                 unsigned int dpte_group_bytes[],
4183                 unsigned int meta_row_height[],
4184                 unsigned int meta_row_height_chroma[],
4185                 SOCParametersList mmSOCParameters,
4186                 unsigned int WritebackChunkSize,
4187                 double SOCCLK,
4188                 double DCFClkDeepSleep,
4189                 unsigned int DETBufferSizeY[],
4190                 unsigned int DETBufferSizeC[],
4191                 unsigned int SwathHeightY[],
4192                 unsigned int SwathHeightC[],
4193                 unsigned int LBBitPerPixel[],
4194                 double SwathWidthY[],
4195                 double SwathWidthC[],
4196                 double HRatio[],
4197                 double HRatioChroma[],
4198                 unsigned int VTaps[],
4199                 unsigned int VTapsChroma[],
4200                 double VRatio[],
4201                 double VRatioChroma[],
4202                 unsigned int HTotal[],
4203                 unsigned int VTotal[],
4204                 unsigned int VActive[],
4205                 double PixelClock[],
4206                 unsigned int BlendingAndTiming[],
4207                 unsigned int DPPPerSurface[],
4208                 double BytePerPixelDETY[],
4209                 double BytePerPixelDETC[],
4210                 double DSTXAfterScaler[],
4211                 double DSTYAfterScaler[],
4212                 bool WritebackEnable[],
4213                 enum source_format_class WritebackPixelFormat[],
4214                 double WritebackDestinationWidth[],
4215                 double WritebackDestinationHeight[],
4216                 double WritebackSourceHeight[],
4217                 bool UnboundedRequestEnabled,
4218                 unsigned int CompressedBufferSizeInkByte,
4219
4220                 /* Output */
4221                 Watermarks *Watermark,
4222                 enum clock_change_support *DRAMClockChangeSupport,
4223                 double MaxActiveDRAMClockChangeLatencySupported[],
4224                 unsigned int SubViewportLinesNeededInMALL[],
4225                 enum dm_fclock_change_support *FCLKChangeSupport,
4226                 double *MinActiveFCLKChangeLatencySupported,
4227                 bool *USRRetrainingSupport,
4228                 double ActiveDRAMClockChangeLatencyMargin[])
4229 {
4230         unsigned int i, j, k;
4231         unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4232         unsigned int DRAMClockChangeSupportNumber = 0;
4233         unsigned int LastSurfaceWithoutMargin;
4234         unsigned int DRAMClockChangeMethod = 0;
4235         bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4236         double MinActiveFCLKChangeMargin = 0.;
4237         double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4238         double ActiveClockChangeLatencyHidingY;
4239         double ActiveClockChangeLatencyHidingC;
4240         double ActiveClockChangeLatencyHiding;
4241     double EffectiveDETBufferSizeY;
4242         double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4243         double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4244         double TotalPixelBW = 0.0;
4245         bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4246         double     EffectiveLBLatencyHidingY;
4247         double     EffectiveLBLatencyHidingC;
4248         double     LinesInDETY[DC__NUM_DPP__MAX];
4249         double     LinesInDETC[DC__NUM_DPP__MAX];
4250         unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4251         unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4252         double     FullDETBufferingTimeY;
4253         double     FullDETBufferingTimeC;
4254         double     WritebackDRAMClockChangeLatencyMargin;
4255         double     WritebackFCLKChangeLatencyMargin;
4256         double     WritebackLatencyHiding;
4257         bool    SameTimingForFCLKChange;
4258
4259         unsigned int    TotalActiveWriteback = 0;
4260         unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4261         unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4262
4263         Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4264         Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4265                         + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4266         Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
4267         Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
4268         Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4269                         + 10 / DCFClkDeepSleep;
4270         Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4271                         + 10 / DCFClkDeepSleep;
4272         Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4273                         + 10 / DCFClkDeepSleep;
4274         Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4275                         + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4276
4277 #ifdef __DML_VBA_DEBUG__
4278         dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4279         dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4280         dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4281         dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
4282         dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
4283         dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
4284         dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
4285         dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
4286         dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
4287         dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
4288         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4289                         __func__, Watermark->Z8StutterEnterPlusExitWatermark);
4290 #endif
4291
4292
4293         TotalActiveWriteback = 0;
4294         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4295                 if (WritebackEnable[k] == true)
4296                         TotalActiveWriteback = TotalActiveWriteback + 1;
4297         }
4298
4299         if (TotalActiveWriteback <= 1) {
4300                 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4301         } else {
4302                 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4303                                 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4304         }
4305         if (USRRetrainingRequiredFinal)
4306                 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
4307                                 + mmSOCParameters.USRRetrainingLatency;
4308
4309         if (TotalActiveWriteback <= 1) {
4310                 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4311                                 + mmSOCParameters.WritebackLatency;
4312                 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4313                                 + mmSOCParameters.WritebackLatency;
4314         } else {
4315                 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4316                                 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4317                 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4318                                 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
4319         }
4320
4321         if (USRRetrainingRequiredFinal)
4322                 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
4323                                 + mmSOCParameters.USRRetrainingLatency;
4324
4325         if (USRRetrainingRequiredFinal)
4326                 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
4327                                 + mmSOCParameters.USRRetrainingLatency;
4328
4329 #ifdef __DML_VBA_DEBUG__
4330         dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4331                         __func__, Watermark->WritebackDRAMClockChangeWatermark);
4332         dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
4333         dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
4334         dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
4335         dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4336 #endif
4337
4338         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4339                 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
4340                                 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
4341         }
4342
4343         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4344
4345                 LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
4346                 LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
4347
4348
4349 #ifdef __DML_VBA_DEBUG__
4350                 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
4351                 dml_print("DML::%s: k=%d, LineBufferSize     = %d\n", __func__, k, LineBufferSize);
4352                 dml_print("DML::%s: k=%d, LBBitPerPixel      = %d\n", __func__, k, LBBitPerPixel[k]);
4353                 dml_print("DML::%s: k=%d, HRatio             = %f\n", __func__, k, HRatio[k]);
4354                 dml_print("DML::%s: k=%d, VTaps              = %d\n", __func__, k, VTaps[k]);
4355 #endif
4356
4357                 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
4358                 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
4359                 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4360
4361                 if (UnboundedRequestEnabled) {
4362                         EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4363                                         + CompressedBufferSizeInkByte * 1024
4364                                                         * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
4365                                                         / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
4366                 }
4367
4368                 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4369                 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4370                 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
4371
4372                 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4373                                 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
4374
4375                 if (NumberOfActiveSurfaces > 1) {
4376                         ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4377                                         - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
4378                                                         / PixelClock[k] / VRatio[k];
4379                 }
4380
4381                 if (BytePerPixelDETC[k] > 0) {
4382                         LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4383                         LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4384                         FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
4385                                         / VRatioChroma[k];
4386                         ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4387                                         - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
4388                                                         / PixelClock[k];
4389                         if (NumberOfActiveSurfaces > 1) {
4390                                 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4391                                                 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
4392                                                                 / PixelClock[k] / VRatioChroma[k];
4393                         }
4394                         ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4395                                         ActiveClockChangeLatencyHidingC);
4396                 } else {
4397                         ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4398                 }
4399
4400                 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4401                                 - Watermark->DRAMClockChangeWatermark;
4402                 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4403                                 - Watermark->FCLKChangeWatermark;
4404                 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
4405
4406                 if (WritebackEnable[k]) {
4407                         WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
4408                                         / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
4409                                                         / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
4410                         if (WritebackPixelFormat[k] == dm_444_64)
4411                                 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4412
4413                         WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4414                                         - Watermark->WritebackDRAMClockChangeWatermark;
4415
4416                         WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4417                                         - Watermark->WritebackFCLKChangeWatermark;
4418
4419                         ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4420                                         WritebackFCLKChangeLatencyMargin);
4421                         ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4422                                         WritebackDRAMClockChangeLatencyMargin);
4423                 }
4424                 MaxActiveDRAMClockChangeLatencySupported[k] =
4425                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4426                                                 0 :
4427                                                 (ActiveDRAMClockChangeLatencyMargin[k]
4428                                                                 + mmSOCParameters.DRAMClockChangeLatency);
4429         }
4430
4431         for (i = 0; i < NumberOfActiveSurfaces; ++i) {
4432                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
4433                         if (i == j ||
4434                                         (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
4435                                         (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
4436                                         (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
4437                                         (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
4438                                         HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
4439                                         VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4440                                         (DRRDisplay[i] || DRRDisplay[j]))) {
4441                                 SynchronizedSurfaces[i][j] = true;
4442                         } else {
4443                                 SynchronizedSurfaces[i][j] = false;
4444                         }
4445                 }
4446         }
4447
4448         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4449                 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4450                                 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4451                                 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4452                         FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4453                         MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4454                         SurfaceWithMinActiveFCLKChangeMargin = k;
4455                 }
4456         }
4457
4458         *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4459
4460         SameTimingForFCLKChange = true;
4461         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4462                 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4463                         if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4464                                         (SameTimingForFCLKChange ||
4465                                         ActiveFCLKChangeLatencyMargin[k] <
4466                                         SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4467                                 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4468                         }
4469                         SameTimingForFCLKChange = false;
4470                 }
4471         }
4472
4473         if (MinActiveFCLKChangeMargin > 0) {
4474                 *FCLKChangeSupport = dm_fclock_change_vactive;
4475         } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4476                         (PrefetchMode <= 1)) {
4477                 *FCLKChangeSupport = dm_fclock_change_vblank;
4478         } else {
4479                 *FCLKChangeSupport = dm_fclock_change_unsupported;
4480         }
4481
4482         *USRRetrainingSupport = true;
4483         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4484                 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4485                                 (USRRetrainingLatencyMargin[k] < 0)) {
4486                         *USRRetrainingSupport = false;
4487                 }
4488         }
4489
4490         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4491                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4492                                 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4493                                 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4494                                 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4495                         if (PrefetchMode > 0) {
4496                                 DRAMClockChangeSupportNumber = 2;
4497                         } else if (DRAMClockChangeSupportNumber == 0) {
4498                                 DRAMClockChangeSupportNumber = 1;
4499                                 LastSurfaceWithoutMargin = k;
4500                         } else if (DRAMClockChangeSupportNumber == 1 &&
4501                                         !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4502                                 DRAMClockChangeSupportNumber = 2;
4503                         }
4504                 }
4505         }
4506
4507         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4508                 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4509                         DRAMClockChangeMethod = 1;
4510                 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4511                         DRAMClockChangeMethod = 2;
4512         }
4513
4514         if (DRAMClockChangeMethod == 0) {
4515                 if (DRAMClockChangeSupportNumber == 0)
4516                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4517                 else if (DRAMClockChangeSupportNumber == 1)
4518                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4519                 else
4520                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4521         } else if (DRAMClockChangeMethod == 1) {
4522                 if (DRAMClockChangeSupportNumber == 0)
4523                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4524                 else if (DRAMClockChangeSupportNumber == 1)
4525                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4526                 else
4527                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4528         } else {
4529                 if (DRAMClockChangeSupportNumber == 0)
4530                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4531                 else if (DRAMClockChangeSupportNumber == 1)
4532                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4533                 else
4534                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4535         }
4536
4537         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4538                 unsigned int dst_y_pstate;
4539                 unsigned int src_y_pstate_l;
4540                 unsigned int src_y_pstate_c;
4541                 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4542
4543                 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
4544                 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
4545                 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4546                 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
4547
4548 #ifdef __DML_VBA_DEBUG__
4549 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4550 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4551 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4552 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4553 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4554 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4555 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4556 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4557 dml_print("DML::%s: k=%d, meta_row_height   = %d\n", __func__, k, meta_row_height[k]);
4558 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4559 #endif
4560                 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4561
4562                 if (BytePerPixelDETC[k] > 0) {
4563                         src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
4564                         src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4565                         sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
4566                         SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4567
4568 #ifdef __DML_VBA_DEBUG__
4569 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4570 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4571 dml_print("DML::%s: k=%d, meta_row_height_chroma    = %d\n", __func__, k, meta_row_height_chroma[k]);
4572 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4573 #endif
4574                 }
4575         }
4576 #ifdef __DML_VBA_DEBUG__
4577         dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4578         dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4579         dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4580                         __func__, *MinActiveFCLKChangeLatencySupported);
4581         dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4582 #endif
4583 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4584
4585 double dml32_CalculateWriteBackDISPCLK(
4586                 enum source_format_class WritebackPixelFormat,
4587                 double PixelClock,
4588                 double WritebackHRatio,
4589                 double WritebackVRatio,
4590                 unsigned int WritebackHTaps,
4591                 unsigned int WritebackVTaps,
4592                 unsigned int   WritebackSourceWidth,
4593                 unsigned int   WritebackDestinationWidth,
4594                 unsigned int HTotal,
4595                 unsigned int WritebackLineBufferSize,
4596                 double DISPCLKDPPCLKVCOSpeed)
4597 {
4598         double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4599
4600         DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4601         DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4602         DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4603                         WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4604         return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4605 }
4606
4607 void dml32_CalculateMinAndMaxPrefetchMode(
4608                 enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4609                 unsigned int             *MinPrefetchMode,
4610                 unsigned int             *MaxPrefetchMode)
4611 {
4612         if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4613                 *MinPrefetchMode = 3;
4614                 *MaxPrefetchMode = 3;
4615         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4616                 *MinPrefetchMode = 2;
4617                 *MaxPrefetchMode = 2;
4618         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4619                 *MinPrefetchMode = 1;
4620                 *MaxPrefetchMode = 1;
4621         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4622                 *MinPrefetchMode = 0;
4623                 *MaxPrefetchMode = 0;
4624         } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4625                         dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4626                 *MinPrefetchMode = 0;
4627                 *MaxPrefetchMode = 3;
4628         } else {
4629                 *MinPrefetchMode = 0;
4630                 *MaxPrefetchMode = 3;
4631         }
4632 } // CalculateMinAndMaxPrefetchMode
4633
4634 void dml32_CalculatePixelDeliveryTimes(
4635                 unsigned int             NumberOfActiveSurfaces,
4636                 double              VRatio[],
4637                 double              VRatioChroma[],
4638                 double              VRatioPrefetchY[],
4639                 double              VRatioPrefetchC[],
4640                 unsigned int             swath_width_luma_ub[],
4641                 unsigned int             swath_width_chroma_ub[],
4642                 unsigned int             DPPPerSurface[],
4643                 double              HRatio[],
4644                 double              HRatioChroma[],
4645                 double              PixelClock[],
4646                 double              PSCL_THROUGHPUT[],
4647                 double              PSCL_THROUGHPUT_CHROMA[],
4648                 double              Dppclk[],
4649                 unsigned int             BytePerPixelC[],
4650                 enum dm_rotation_angle   SourceRotation[],
4651                 unsigned int             NumberOfCursors[],
4652                 unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4653                 unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4654                 unsigned int             BlockWidth256BytesY[],
4655                 unsigned int             BlockHeight256BytesY[],
4656                 unsigned int             BlockWidth256BytesC[],
4657                 unsigned int             BlockHeight256BytesC[],
4658
4659                 /* Output */
4660                 double              DisplayPipeLineDeliveryTimeLuma[],
4661                 double              DisplayPipeLineDeliveryTimeChroma[],
4662                 double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4663                 double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4664                 double              DisplayPipeRequestDeliveryTimeLuma[],
4665                 double              DisplayPipeRequestDeliveryTimeChroma[],
4666                 double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4667                 double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4668                 double              CursorRequestDeliveryTime[],
4669                 double              CursorRequestDeliveryTimePrefetch[])
4670 {
4671         double   req_per_swath_ub;
4672         unsigned int k;
4673
4674         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4675
4676 #ifdef __DML_VBA_DEBUG__
4677                 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4678                 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4679                 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4680                 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4681                 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4682                 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4683                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4684                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4685                 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4686                 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4687                 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4688 #endif
4689
4690                 if (VRatio[k] <= 1) {
4691                         DisplayPipeLineDeliveryTimeLuma[k] =
4692                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4693                 } else {
4694                         DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4695                 }
4696
4697                 if (BytePerPixelC[k] == 0) {
4698                         DisplayPipeLineDeliveryTimeChroma[k] = 0;
4699                 } else {
4700                         if (VRatioChroma[k] <= 1) {
4701                                 DisplayPipeLineDeliveryTimeChroma[k] =
4702                                         swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4703                         } else {
4704                                 DisplayPipeLineDeliveryTimeChroma[k] =
4705                                         swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4706                         }
4707                 }
4708
4709                 if (VRatioPrefetchY[k] <= 1) {
4710                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4711                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4712                 } else {
4713                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4714                                         swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4715                 }
4716
4717                 if (BytePerPixelC[k] == 0) {
4718                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4719                 } else {
4720                         if (VRatioPrefetchC[k] <= 1) {
4721                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4722                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4723                         } else {
4724                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4725                                                 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4726                         }
4727                 }
4728 #ifdef __DML_VBA_DEBUG__
4729                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4730                                 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4731                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4732                                 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4733                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4734                                 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4735                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4736                                 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4737 #endif
4738         }
4739
4740         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4741                 if (!IsVertical(SourceRotation[k]))
4742                         req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4743                 else
4744                         req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4745 #ifdef __DML_VBA_DEBUG__
4746                 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4747 #endif
4748
4749                 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4750                 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4751                                 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4752                 if (BytePerPixelC[k] == 0) {
4753                         DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4754                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4755                 } else {
4756                         if (!IsVertical(SourceRotation[k]))
4757                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4758                         else
4759                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4760 #ifdef __DML_VBA_DEBUG__
4761                         dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4762 #endif
4763                         DisplayPipeRequestDeliveryTimeChroma[k] =
4764                                         DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4765                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4766                                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4767                 }
4768 #ifdef __DML_VBA_DEBUG__
4769                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4770                                 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4771                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4772                                 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4773                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4774                                 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4775                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4776                                 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4777 #endif
4778         }
4779
4780         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4781                 unsigned int cursor_req_per_width;
4782
4783                 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4784                                 256.0 / 8.0, 1.0);
4785                 if (NumberOfCursors[k] > 0) {
4786                         if (VRatio[k] <= 1) {
4787                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4788                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4789                         } else {
4790                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4791                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4792                         }
4793                         if (VRatioPrefetchY[k] <= 1) {
4794                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4795                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4796                         } else {
4797                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4798                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4799                         }
4800                 } else {
4801                         CursorRequestDeliveryTime[k] = 0;
4802                         CursorRequestDeliveryTimePrefetch[k] = 0;
4803                 }
4804 #ifdef __DML_VBA_DEBUG__
4805                 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4806                                 __func__, k, NumberOfCursors[k]);
4807                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4808                                 __func__, k, CursorRequestDeliveryTime[k]);
4809                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4810                                 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4811 #endif
4812         }
4813 } // CalculatePixelDeliveryTimes
4814
4815 void dml32_CalculateMetaAndPTETimes(
4816                 bool use_one_row_for_frame[],
4817                 unsigned int NumberOfActiveSurfaces,
4818                 bool GPUVMEnable,
4819                 unsigned int MetaChunkSize,
4820                 unsigned int MinMetaChunkSizeBytes,
4821                 unsigned int    HTotal[],
4822                 double  VRatio[],
4823                 double  VRatioChroma[],
4824                 double  DestinationLinesToRequestRowInVBlank[],
4825                 double  DestinationLinesToRequestRowInImmediateFlip[],
4826                 bool DCCEnable[],
4827                 double  PixelClock[],
4828                 unsigned int BytePerPixelY[],
4829                 unsigned int BytePerPixelC[],
4830                 enum dm_rotation_angle SourceRotation[],
4831                 unsigned int dpte_row_height[],
4832                 unsigned int dpte_row_height_chroma[],
4833                 unsigned int meta_row_width[],
4834                 unsigned int meta_row_width_chroma[],
4835                 unsigned int meta_row_height[],
4836                 unsigned int meta_row_height_chroma[],
4837                 unsigned int meta_req_width[],
4838                 unsigned int meta_req_width_chroma[],
4839                 unsigned int meta_req_height[],
4840                 unsigned int meta_req_height_chroma[],
4841                 unsigned int dpte_group_bytes[],
4842                 unsigned int    PTERequestSizeY[],
4843                 unsigned int    PTERequestSizeC[],
4844                 unsigned int    PixelPTEReqWidthY[],
4845                 unsigned int    PixelPTEReqHeightY[],
4846                 unsigned int    PixelPTEReqWidthC[],
4847                 unsigned int    PixelPTEReqHeightC[],
4848                 unsigned int    dpte_row_width_luma_ub[],
4849                 unsigned int    dpte_row_width_chroma_ub[],
4850
4851                 /* Output */
4852                 double DST_Y_PER_PTE_ROW_NOM_L[],
4853                 double DST_Y_PER_PTE_ROW_NOM_C[],
4854                 double DST_Y_PER_META_ROW_NOM_L[],
4855                 double DST_Y_PER_META_ROW_NOM_C[],
4856                 double TimePerMetaChunkNominal[],
4857                 double TimePerChromaMetaChunkNominal[],
4858                 double TimePerMetaChunkVBlank[],
4859                 double TimePerChromaMetaChunkVBlank[],
4860                 double TimePerMetaChunkFlip[],
4861                 double TimePerChromaMetaChunkFlip[],
4862                 double time_per_pte_group_nom_luma[],
4863                 double time_per_pte_group_vblank_luma[],
4864                 double time_per_pte_group_flip_luma[],
4865                 double time_per_pte_group_nom_chroma[],
4866                 double time_per_pte_group_vblank_chroma[],
4867                 double time_per_pte_group_flip_chroma[])
4868 {
4869         unsigned int   meta_chunk_width;
4870         unsigned int   min_meta_chunk_width;
4871         unsigned int   meta_chunk_per_row_int;
4872         unsigned int   meta_row_remainder;
4873         unsigned int   meta_chunk_threshold;
4874         unsigned int   meta_chunks_per_row_ub;
4875         unsigned int   meta_chunk_width_chroma;
4876         unsigned int   min_meta_chunk_width_chroma;
4877         unsigned int   meta_chunk_per_row_int_chroma;
4878         unsigned int   meta_row_remainder_chroma;
4879         unsigned int   meta_chunk_threshold_chroma;
4880         unsigned int   meta_chunks_per_row_ub_chroma;
4881         unsigned int   dpte_group_width_luma;
4882         unsigned int   dpte_groups_per_row_luma_ub;
4883         unsigned int   dpte_group_width_chroma;
4884         unsigned int   dpte_groups_per_row_chroma_ub;
4885         unsigned int k;
4886
4887         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4888                 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4889                 if (BytePerPixelC[k] == 0)
4890                         DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4891                 else
4892                         DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4893                 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4894                 if (BytePerPixelC[k] == 0)
4895                         DST_Y_PER_META_ROW_NOM_C[k] = 0;
4896                 else
4897                         DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4898         }
4899
4900         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4901                 if (DCCEnable[k] == true) {
4902                         meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4903                         min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4904                         meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4905                         meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4906                         if (!IsVertical(SourceRotation[k]))
4907                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4908                         else
4909                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4910
4911                         if (meta_row_remainder <= meta_chunk_threshold)
4912                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4913                         else
4914                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4915
4916                         TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4917                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4918                         TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4919                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4920                         TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4921                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4922                         if (BytePerPixelC[k] == 0) {
4923                                 TimePerChromaMetaChunkNominal[k] = 0;
4924                                 TimePerChromaMetaChunkVBlank[k] = 0;
4925                                 TimePerChromaMetaChunkFlip[k] = 0;
4926                         } else {
4927                                 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4928                                                 meta_row_height_chroma[k];
4929                                 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4930                                                 meta_row_height_chroma[k];
4931                                 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4932                                                 meta_chunk_width_chroma;
4933                                 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4934                                 if (!IsVertical(SourceRotation[k])) {
4935                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4936                                                         meta_req_width_chroma[k];
4937                                 } else {
4938                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4939                                                         meta_req_height_chroma[k];
4940                                 }
4941                                 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4942                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4943                                 else
4944                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4945
4946                                 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4947                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4948                                 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4949                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4950                                 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4951                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4952                         }
4953                 } else {
4954                         TimePerMetaChunkNominal[k] = 0;
4955                         TimePerMetaChunkVBlank[k] = 0;
4956                         TimePerMetaChunkFlip[k] = 0;
4957                         TimePerChromaMetaChunkNominal[k] = 0;
4958                         TimePerChromaMetaChunkVBlank[k] = 0;
4959                         TimePerChromaMetaChunkFlip[k] = 0;
4960                 }
4961         }
4962
4963         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4964                 if (GPUVMEnable == true) {
4965                         if (!IsVertical(SourceRotation[k])) {
4966                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4967                                                 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4968                         } else {
4969                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4970                                                 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4971                         }
4972
4973                         if (use_one_row_for_frame[k]) {
4974                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4975                                                 (double) dpte_group_width_luma / 2.0, 1.0);
4976                         } else {
4977                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4978                                                 (double) dpte_group_width_luma, 1.0);
4979                         }
4980 #ifdef __DML_VBA_DEBUG__
4981                         dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
4982                                         __func__, k, use_one_row_for_frame[k]);
4983                         dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
4984                                         __func__, k, dpte_group_bytes[k]);
4985                         dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
4986                                         __func__, k, PTERequestSizeY[k]);
4987                         dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
4988                                         __func__, k, PixelPTEReqWidthY[k]);
4989                         dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
4990                                         __func__, k, PixelPTEReqHeightY[k]);
4991                         dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
4992                                         __func__, k, dpte_row_width_luma_ub[k]);
4993                         dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
4994                                         __func__, k, dpte_group_width_luma);
4995                         dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
4996                                         __func__, k, dpte_groups_per_row_luma_ub);
4997 #endif
4998
4999                         time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5000                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5001                         time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5002                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5003                         time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5004                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5005                         if (BytePerPixelC[k] == 0) {
5006                                 time_per_pte_group_nom_chroma[k] = 0;
5007                                 time_per_pte_group_vblank_chroma[k] = 0;
5008                                 time_per_pte_group_flip_chroma[k] = 0;
5009                         } else {
5010                                 if (!IsVertical(SourceRotation[k])) {
5011                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5012                                                         (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5013                                 } else {
5014                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5015                                                         (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5016                                 }
5017
5018                                 if (use_one_row_for_frame[k]) {
5019                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5020                                                         (double) dpte_group_width_chroma / 2.0, 1.0);
5021                                 } else {
5022                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5023                                                         (double) dpte_group_width_chroma, 1.0);
5024                                 }
5025 #ifdef __DML_VBA_DEBUG__
5026                                 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5027                                                 __func__, k, dpte_row_width_chroma_ub[k]);
5028                                 dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5029                                                 __func__, k, dpte_group_width_chroma);
5030                                 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5031                                                 __func__, k, dpte_groups_per_row_chroma_ub);
5032 #endif
5033                                 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5034                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5035                                 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5036                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5037                                 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5038                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5039                         }
5040                 } else {
5041                         time_per_pte_group_nom_luma[k] = 0;
5042                         time_per_pte_group_vblank_luma[k] = 0;
5043                         time_per_pte_group_flip_luma[k] = 0;
5044                         time_per_pte_group_nom_chroma[k] = 0;
5045                         time_per_pte_group_vblank_chroma[k] = 0;
5046                         time_per_pte_group_flip_chroma[k] = 0;
5047                 }
5048 #ifdef __DML_VBA_DEBUG__
5049                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5050                                 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5051                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5052                                 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5053                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5054                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5055                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5056                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5057                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5058                                 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5059                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5060                                 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5061                 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5062                                 __func__, k, TimePerMetaChunkNominal[k]);
5063                 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5064                                 __func__, k, TimePerMetaChunkVBlank[k]);
5065                 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5066                                 __func__, k, TimePerMetaChunkFlip[k]);
5067                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5068                                 __func__, k, TimePerChromaMetaChunkNominal[k]);
5069                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5070                                 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5071                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5072                                 __func__, k, TimePerChromaMetaChunkFlip[k]);
5073                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5074                                 __func__, k, time_per_pte_group_nom_luma[k]);
5075                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5076                                 __func__, k, time_per_pte_group_vblank_luma[k]);
5077                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5078                                 __func__, k, time_per_pte_group_flip_luma[k]);
5079                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5080                                 __func__, k, time_per_pte_group_nom_chroma[k]);
5081                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5082                                 __func__, k, time_per_pte_group_vblank_chroma[k]);
5083                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5084                                 __func__, k, time_per_pte_group_flip_chroma[k]);
5085 #endif
5086         }
5087 } // CalculateMetaAndPTETimes
5088
5089 void dml32_CalculateVMGroupAndRequestTimes(
5090                 unsigned int     NumberOfActiveSurfaces,
5091                 bool     GPUVMEnable,
5092                 unsigned int     GPUVMMaxPageTableLevels,
5093                 unsigned int     HTotal[],
5094                 unsigned int     BytePerPixelC[],
5095                 double      DestinationLinesToRequestVMInVBlank[],
5096                 double      DestinationLinesToRequestVMInImmediateFlip[],
5097                 bool     DCCEnable[],
5098                 double      PixelClock[],
5099                 unsigned int        dpte_row_width_luma_ub[],
5100                 unsigned int        dpte_row_width_chroma_ub[],
5101                 unsigned int     vm_group_bytes[],
5102                 unsigned int     dpde0_bytes_per_frame_ub_l[],
5103                 unsigned int     dpde0_bytes_per_frame_ub_c[],
5104                 unsigned int        meta_pte_bytes_per_frame_ub_l[],
5105                 unsigned int        meta_pte_bytes_per_frame_ub_c[],
5106
5107                 /* Output */
5108                 double      TimePerVMGroupVBlank[],
5109                 double      TimePerVMGroupFlip[],
5110                 double      TimePerVMRequestVBlank[],
5111                 double      TimePerVMRequestFlip[])
5112 {
5113         unsigned int k;
5114         unsigned int   num_group_per_lower_vm_stage;
5115         unsigned int   num_req_per_lower_vm_stage;
5116
5117 #ifdef __DML_VBA_DEBUG__
5118         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5119         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5120 #endif
5121         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5122
5123 #ifdef __DML_VBA_DEBUG__
5124                 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5125                 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5126                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5127                                 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5128                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5129                                 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5130                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5131                                 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5132                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5133                                 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5134 #endif
5135
5136                 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5137                         if (DCCEnable[k] == false) {
5138                                 if (BytePerPixelC[k] > 0) {
5139                                         num_group_per_lower_vm_stage = dml_ceil(
5140                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5141                                                         (double) (vm_group_bytes[k]), 1.0) +
5142                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5143                                                         (double) (vm_group_bytes[k]), 1.0);
5144                                 } else {
5145                                         num_group_per_lower_vm_stage = dml_ceil(
5146                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5147                                                         (double) (vm_group_bytes[k]), 1.0);
5148                                 }
5149                         } else {
5150                                 if (GPUVMMaxPageTableLevels == 1) {
5151                                         if (BytePerPixelC[k] > 0) {
5152                                                 num_group_per_lower_vm_stage = dml_ceil(
5153                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5154                                                         (double) (vm_group_bytes[k]), 1.0) +
5155                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5156                                                         (double) (vm_group_bytes[k]), 1.0);
5157                                         } else {
5158                                                 num_group_per_lower_vm_stage = dml_ceil(
5159                                                                 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5160                                                                 (double) (vm_group_bytes[k]), 1.0);
5161                                         }
5162                                 } else {
5163                                         if (BytePerPixelC[k] > 0) {
5164                                                 num_group_per_lower_vm_stage = 2 + dml_ceil(
5165                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5166                                                         (double) (vm_group_bytes[k]), 1) +
5167                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5168                                                         (double) (vm_group_bytes[k]), 1) +
5169                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5170                                                         (double) (vm_group_bytes[k]), 1) +
5171                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5172                                                         (double) (vm_group_bytes[k]), 1);
5173                                         } else {
5174                                                 num_group_per_lower_vm_stage = 1 + dml_ceil(
5175                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5176                                                         (double) (vm_group_bytes[k]), 1) + dml_ceil(
5177                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5178                                                         (double) (vm_group_bytes[k]), 1);
5179                                         }
5180                                 }
5181                         }
5182
5183                         if (DCCEnable[k] == false) {
5184                                 if (BytePerPixelC[k] > 0) {
5185                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5186                                                         dpde0_bytes_per_frame_ub_c[k] / 64;
5187                                 } else {
5188                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5189                                 }
5190                         } else {
5191                                 if (GPUVMMaxPageTableLevels == 1) {
5192                                         if (BytePerPixelC[k] > 0) {
5193                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5194                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5195                                         } else {
5196                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5197                                         }
5198                                 } else {
5199                                         if (BytePerPixelC[k] > 0) {
5200                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5201                                                                 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5202                                                                 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5203                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5204                                         } else {
5205                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5206                                                                 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5207                                         }
5208                                 }
5209                         }
5210
5211                         TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5212                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5213                         TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5214                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5215                         TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5216                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5217                         TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5218                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5219
5220                         if (GPUVMMaxPageTableLevels > 2) {
5221                                 TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5222                                 TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5223                                 TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5224                                 TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5225                         }
5226
5227                 } else {
5228                         TimePerVMGroupVBlank[k] = 0;
5229                         TimePerVMGroupFlip[k] = 0;
5230                         TimePerVMRequestVBlank[k] = 0;
5231                         TimePerVMRequestFlip[k] = 0;
5232                 }
5233
5234 #ifdef __DML_VBA_DEBUG__
5235                 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5236                 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5237                 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5238                 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5239 #endif
5240         }
5241 } // CalculateVMGroupAndRequestTimes
5242
5243 void dml32_CalculateDCCConfiguration(
5244                 bool             DCCEnabled,
5245                 bool             DCCProgrammingAssumesScanDirectionUnknown,
5246                 enum source_format_class SourcePixelFormat,
5247                 unsigned int             SurfaceWidthLuma,
5248                 unsigned int             SurfaceWidthChroma,
5249                 unsigned int             SurfaceHeightLuma,
5250                 unsigned int             SurfaceHeightChroma,
5251                 unsigned int                nomDETInKByte,
5252                 unsigned int             RequestHeight256ByteLuma,
5253                 unsigned int             RequestHeight256ByteChroma,
5254                 enum dm_swizzle_mode     TilingFormat,
5255                 unsigned int             BytePerPixelY,
5256                 unsigned int             BytePerPixelC,
5257                 double              BytePerPixelDETY,
5258                 double              BytePerPixelDETC,
5259                 enum dm_rotation_angle   SourceRotation,
5260                 /* Output */
5261                 unsigned int        *MaxUncompressedBlockLuma,
5262                 unsigned int        *MaxUncompressedBlockChroma,
5263                 unsigned int        *MaxCompressedBlockLuma,
5264                 unsigned int        *MaxCompressedBlockChroma,
5265                 unsigned int        *IndependentBlockLuma,
5266                 unsigned int        *IndependentBlockChroma)
5267 {
5268         typedef enum {
5269                 REQ_256Bytes,
5270                 REQ_128BytesNonContiguous,
5271                 REQ_128BytesContiguous,
5272                 REQ_NA
5273         } RequestType;
5274
5275         RequestType   RequestLuma;
5276         RequestType   RequestChroma;
5277
5278         unsigned int   segment_order_horz_contiguous_luma;
5279         unsigned int   segment_order_horz_contiguous_chroma;
5280         unsigned int   segment_order_vert_contiguous_luma;
5281         unsigned int   segment_order_vert_contiguous_chroma;
5282         unsigned int req128_horz_wc_l;
5283         unsigned int req128_horz_wc_c;
5284         unsigned int req128_vert_wc_l;
5285         unsigned int req128_vert_wc_c;
5286         unsigned int MAS_vp_horz_limit;
5287         unsigned int MAS_vp_vert_limit;
5288         unsigned int max_vp_horz_width;
5289         unsigned int max_vp_vert_height;
5290         unsigned int eff_surf_width_l;
5291         unsigned int eff_surf_width_c;
5292         unsigned int eff_surf_height_l;
5293         unsigned int eff_surf_height_c;
5294         unsigned int full_swath_bytes_horz_wc_l;
5295         unsigned int full_swath_bytes_horz_wc_c;
5296         unsigned int full_swath_bytes_vert_wc_l;
5297         unsigned int full_swath_bytes_vert_wc_c;
5298         unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5299
5300         unsigned int   yuv420;
5301         unsigned int   horz_div_l;
5302         unsigned int   horz_div_c;
5303         unsigned int   vert_div_l;
5304         unsigned int   vert_div_c;
5305
5306         unsigned int     swath_buf_size;
5307         double   detile_buf_vp_horz_limit;
5308         double   detile_buf_vp_vert_limit;
5309
5310         yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5311                         SourcePixelFormat == dm_420_12) ? 1 : 0);
5312         horz_div_l = 1;
5313         horz_div_c = 1;
5314         vert_div_l = 1;
5315         vert_div_c = 1;
5316
5317         if (BytePerPixelY == 1)
5318                 vert_div_l = 0;
5319         if (BytePerPixelC == 1)
5320                 vert_div_c = 0;
5321
5322         if (BytePerPixelC == 0) {
5323                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5324                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5325                                 BytePerPixelY / (1 + horz_div_l));
5326                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5327                                 (1 + vert_div_l));
5328         } else {
5329                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5330                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5331                                 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5332                                 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5333                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5334                                 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5335                                 (1 + vert_div_c) / (1 + yuv420));
5336         }
5337
5338         if (SourcePixelFormat == dm_420_10) {
5339                 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5340                 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5341         }
5342
5343         detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5344         detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5345
5346         MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5347         MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5348         max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5349         max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5350         eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5351         eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5352         eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5353         eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5354
5355         full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5356         full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5357         if (BytePerPixelC > 0) {
5358                 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5359                 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5360         } else {
5361                 full_swath_bytes_horz_wc_c = 0;
5362                 full_swath_bytes_vert_wc_c = 0;
5363         }
5364
5365         if (SourcePixelFormat == dm_420_10) {
5366                 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5367                 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5368                 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5369                 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5370         }
5371
5372         if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5373                 req128_horz_wc_l = 0;
5374                 req128_horz_wc_c = 0;
5375         } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5376                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5377                 req128_horz_wc_l = 0;
5378                 req128_horz_wc_c = 1;
5379         } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5380                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5381                 req128_horz_wc_l = 1;
5382                 req128_horz_wc_c = 0;
5383         } else {
5384                 req128_horz_wc_l = 1;
5385                 req128_horz_wc_c = 1;
5386         }
5387
5388         if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5389                 req128_vert_wc_l = 0;
5390                 req128_vert_wc_c = 0;
5391         } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5392                         full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5393                 req128_vert_wc_l = 0;
5394                 req128_vert_wc_c = 1;
5395         } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5396                         full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5397                 req128_vert_wc_l = 1;
5398                 req128_vert_wc_c = 0;
5399         } else {
5400                 req128_vert_wc_l = 1;
5401                 req128_vert_wc_c = 1;
5402         }
5403
5404         if (BytePerPixelY == 2) {
5405                 segment_order_horz_contiguous_luma = 0;
5406                 segment_order_vert_contiguous_luma = 1;
5407         } else {
5408                 segment_order_horz_contiguous_luma = 1;
5409                 segment_order_vert_contiguous_luma = 0;
5410         }
5411
5412         if (BytePerPixelC == 2) {
5413                 segment_order_horz_contiguous_chroma = 0;
5414                 segment_order_vert_contiguous_chroma = 1;
5415         } else {
5416                 segment_order_horz_contiguous_chroma = 1;
5417                 segment_order_vert_contiguous_chroma = 0;
5418         }
5419 #ifdef __DML_VBA_DEBUG__
5420         dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5421         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5422         dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5423         dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5424         dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5425         dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5426         dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5427         dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5428         dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5429                         __func__, segment_order_horz_contiguous_chroma);
5430 #endif
5431
5432         if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5433                 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5434                         RequestLuma = REQ_256Bytes;
5435                 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5436                                 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5437                         RequestLuma = REQ_128BytesNonContiguous;
5438                 else
5439                         RequestLuma = REQ_128BytesContiguous;
5440
5441                 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5442                         RequestChroma = REQ_256Bytes;
5443                 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5444                                 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5445                         RequestChroma = REQ_128BytesNonContiguous;
5446                 else
5447                         RequestChroma = REQ_128BytesContiguous;
5448
5449         } else if (!IsVertical(SourceRotation)) {
5450                 if (req128_horz_wc_l == 0)
5451                         RequestLuma = REQ_256Bytes;
5452                 else if (segment_order_horz_contiguous_luma == 0)
5453                         RequestLuma = REQ_128BytesNonContiguous;
5454                 else
5455                         RequestLuma = REQ_128BytesContiguous;
5456
5457                 if (req128_horz_wc_c == 0)
5458                         RequestChroma = REQ_256Bytes;
5459                 else if (segment_order_horz_contiguous_chroma == 0)
5460                         RequestChroma = REQ_128BytesNonContiguous;
5461                 else
5462                         RequestChroma = REQ_128BytesContiguous;
5463
5464         } else {
5465                 if (req128_vert_wc_l == 0)
5466                         RequestLuma = REQ_256Bytes;
5467                 else if (segment_order_vert_contiguous_luma == 0)
5468                         RequestLuma = REQ_128BytesNonContiguous;
5469                 else
5470                         RequestLuma = REQ_128BytesContiguous;
5471
5472                 if (req128_vert_wc_c == 0)
5473                         RequestChroma = REQ_256Bytes;
5474                 else if (segment_order_vert_contiguous_chroma == 0)
5475                         RequestChroma = REQ_128BytesNonContiguous;
5476                 else
5477                         RequestChroma = REQ_128BytesContiguous;
5478         }
5479
5480         if (RequestLuma == REQ_256Bytes) {
5481                 *MaxUncompressedBlockLuma = 256;
5482                 *MaxCompressedBlockLuma = 256;
5483                 *IndependentBlockLuma = 0;
5484         } else if (RequestLuma == REQ_128BytesContiguous) {
5485                 *MaxUncompressedBlockLuma = 256;
5486                 *MaxCompressedBlockLuma = 128;
5487                 *IndependentBlockLuma = 128;
5488         } else {
5489                 *MaxUncompressedBlockLuma = 256;
5490                 *MaxCompressedBlockLuma = 64;
5491                 *IndependentBlockLuma = 64;
5492         }
5493
5494         if (RequestChroma == REQ_256Bytes) {
5495                 *MaxUncompressedBlockChroma = 256;
5496                 *MaxCompressedBlockChroma = 256;
5497                 *IndependentBlockChroma = 0;
5498         } else if (RequestChroma == REQ_128BytesContiguous) {
5499                 *MaxUncompressedBlockChroma = 256;
5500                 *MaxCompressedBlockChroma = 128;
5501                 *IndependentBlockChroma = 128;
5502         } else {
5503                 *MaxUncompressedBlockChroma = 256;
5504                 *MaxCompressedBlockChroma = 64;
5505                 *IndependentBlockChroma = 64;
5506         }
5507
5508         if (DCCEnabled != true || BytePerPixelC == 0) {
5509                 *MaxUncompressedBlockChroma = 0;
5510                 *MaxCompressedBlockChroma = 0;
5511                 *IndependentBlockChroma = 0;
5512         }
5513
5514         if (DCCEnabled != true) {
5515                 *MaxUncompressedBlockLuma = 0;
5516                 *MaxCompressedBlockLuma = 0;
5517                 *IndependentBlockLuma = 0;
5518         }
5519
5520 #ifdef __DML_VBA_DEBUG__
5521         dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5522         dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5523         dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5524         dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5525         dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5526         dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5527 #endif
5528
5529 } // CalculateDCCConfiguration
5530
5531 void dml32_CalculateStutterEfficiency(
5532                 unsigned int      CompressedBufferSizeInkByte,
5533                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5534                 bool   UnboundedRequestEnabled,
5535                 unsigned int      MetaFIFOSizeInKEntries,
5536                 unsigned int      ZeroSizeBufferEntries,
5537                 unsigned int      PixelChunkSizeInKByte,
5538                 unsigned int   NumberOfActiveSurfaces,
5539                 unsigned int      ROBBufferSizeInKByte,
5540                 double    TotalDataReadBandwidth,
5541                 double    DCFCLK,
5542                 double    ReturnBW,
5543                 unsigned int      CompbufReservedSpace64B,
5544                 unsigned int      CompbufReservedSpaceZs,
5545                 double    SRExitTime,
5546                 double    SRExitZ8Time,
5547                 bool   SynchronizeTimingsFinal,
5548                 unsigned int   BlendingAndTiming[],
5549                 double    StutterEnterPlusExitWatermark,
5550                 double    Z8StutterEnterPlusExitWatermark,
5551                 bool   ProgressiveToInterlaceUnitInOPP,
5552                 bool   Interlace[],
5553                 double    MinTTUVBlank[],
5554                 unsigned int   DPPPerSurface[],
5555                 unsigned int      DETBufferSizeY[],
5556                 unsigned int   BytePerPixelY[],
5557                 double    BytePerPixelDETY[],
5558                 double      SwathWidthY[],
5559                 unsigned int   SwathHeightY[],
5560                 unsigned int   SwathHeightC[],
5561                 double    NetDCCRateLuma[],
5562                 double    NetDCCRateChroma[],
5563                 double    DCCFractionOfZeroSizeRequestsLuma[],
5564                 double    DCCFractionOfZeroSizeRequestsChroma[],
5565                 unsigned int      HTotal[],
5566                 unsigned int      VTotal[],
5567                 double    PixelClock[],
5568                 double    VRatio[],
5569                 enum dm_rotation_angle SourceRotation[],
5570                 unsigned int   BlockHeight256BytesY[],
5571                 unsigned int   BlockWidth256BytesY[],
5572                 unsigned int   BlockHeight256BytesC[],
5573                 unsigned int   BlockWidth256BytesC[],
5574                 unsigned int   DCCYMaxUncompressedBlock[],
5575                 unsigned int   DCCCMaxUncompressedBlock[],
5576                 unsigned int      VActive[],
5577                 bool   DCCEnable[],
5578                 bool   WritebackEnable[],
5579                 double    ReadBandwidthSurfaceLuma[],
5580                 double    ReadBandwidthSurfaceChroma[],
5581                 double    meta_row_bw[],
5582                 double    dpte_row_bw[],
5583
5584                 /* Output */
5585                 double   *StutterEfficiencyNotIncludingVBlank,
5586                 double   *StutterEfficiency,
5587                 unsigned int     *NumberOfStutterBurstsPerFrame,
5588                 double   *Z8StutterEfficiencyNotIncludingVBlank,
5589                 double   *Z8StutterEfficiency,
5590                 unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5591                 double   *StutterPeriod,
5592                 bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5593 {
5594
5595         bool FoundCriticalSurface = false;
5596         unsigned int SwathSizeCriticalSurface = 0;
5597         unsigned int LastChunkOfSwathSize;
5598         unsigned int MissingPartOfLastSwathOfDETSize;
5599         double LastZ8StutterPeriod = 0.0;
5600         double LastStutterPeriod = 0.0;
5601         unsigned int TotalNumberOfActiveOTG = 0;
5602         double doublePixelClock;
5603         unsigned int doubleHTotal;
5604         unsigned int doubleVTotal;
5605         bool SameTiming = true;
5606         double DETBufferingTimeY;
5607         double SwathWidthYCriticalSurface = 0.0;
5608         double SwathHeightYCriticalSurface = 0.0;
5609         double VActiveTimeCriticalSurface = 0.0;
5610         double FrameTimeCriticalSurface = 0.0;
5611         unsigned int BytePerPixelYCriticalSurface = 0;
5612         double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5613         unsigned int DETBufferSizeYCriticalSurface = 0;
5614         double MinTTUVBlankCriticalSurface = 0.0;
5615         unsigned int BlockWidth256BytesYCriticalSurface = 0;
5616         bool doublePlaneCriticalSurface = 0;
5617         bool doublePipeCriticalSurface = 0;
5618         double TotalCompressedReadBandwidth;
5619         double TotalRowReadBandwidth;
5620         double AverageDCCCompressionRate;
5621         double EffectiveCompressedBufferSize;
5622         double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5623         double StutterBurstTime;
5624         unsigned int TotalActiveWriteback;
5625         double LinesInDETY;
5626         double LinesInDETYRoundedDownToSwath;
5627         double MaximumEffectiveCompressionLuma;
5628         double MaximumEffectiveCompressionChroma;
5629         double TotalZeroSizeRequestReadBandwidth;
5630         double TotalZeroSizeCompressedReadBandwidth;
5631         double AverageDCCZeroSizeFraction;
5632         double AverageZeroSizeCompressionRate;
5633         unsigned int k;
5634
5635         TotalZeroSizeRequestReadBandwidth = 0;
5636         TotalZeroSizeCompressedReadBandwidth = 0;
5637         TotalRowReadBandwidth = 0;
5638         TotalCompressedReadBandwidth = 0;
5639
5640         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5641                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5642                         if (DCCEnable[k] == true) {
5643                                 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5644                                                 || (!IsVertical(SourceRotation[k])
5645                                                                 && BlockHeight256BytesY[k] > SwathHeightY[k])
5646                                                 || DCCYMaxUncompressedBlock[k] < 256) {
5647                                         MaximumEffectiveCompressionLuma = 2;
5648                                 } else {
5649                                         MaximumEffectiveCompressionLuma = 4;
5650                                 }
5651                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5652                                                 + ReadBandwidthSurfaceLuma[k]
5653                                                                 / dml_min(NetDCCRateLuma[k],
5654                                                                                 MaximumEffectiveCompressionLuma);
5655 #ifdef __DML_VBA_DEBUG__
5656                                 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5657                                                 __func__, k, ReadBandwidthSurfaceLuma[k]);
5658                                 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5659                                                 __func__, k, NetDCCRateLuma[k]);
5660                                 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5661                                                 __func__, k, MaximumEffectiveCompressionLuma);
5662 #endif
5663                                 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5664                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5665                                 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5666                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5667                                                                 / MaximumEffectiveCompressionLuma;
5668
5669                                 if (ReadBandwidthSurfaceChroma[k] > 0) {
5670                                         if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5671                                                         || (!IsVertical(SourceRotation[k])
5672                                                                         && BlockHeight256BytesC[k] > SwathHeightC[k])
5673                                                         || DCCCMaxUncompressedBlock[k] < 256) {
5674                                                 MaximumEffectiveCompressionChroma = 2;
5675                                         } else {
5676                                                 MaximumEffectiveCompressionChroma = 4;
5677                                         }
5678                                         TotalCompressedReadBandwidth =
5679                                                         TotalCompressedReadBandwidth
5680                                                         + ReadBandwidthSurfaceChroma[k]
5681                                                         / dml_min(NetDCCRateChroma[k],
5682                                                         MaximumEffectiveCompressionChroma);
5683 #ifdef __DML_VBA_DEBUG__
5684                                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5685                                                         __func__, k, ReadBandwidthSurfaceChroma[k]);
5686                                         dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5687                                                         __func__, k, NetDCCRateChroma[k]);
5688                                         dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5689                                                         __func__, k, MaximumEffectiveCompressionChroma);
5690 #endif
5691                                         TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5692                                                         + ReadBandwidthSurfaceChroma[k]
5693                                                                         * DCCFractionOfZeroSizeRequestsChroma[k];
5694                                         TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5695                                                         + ReadBandwidthSurfaceChroma[k]
5696                                                                         * DCCFractionOfZeroSizeRequestsChroma[k]
5697                                                                         / MaximumEffectiveCompressionChroma;
5698                                 }
5699                         } else {
5700                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5701                                                 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5702                         }
5703                         TotalRowReadBandwidth = TotalRowReadBandwidth
5704                                         + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5705                 }
5706         }
5707
5708         AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5709         AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5710
5711 #ifdef __DML_VBA_DEBUG__
5712         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5713         dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5714         dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5715         dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5716                         __func__, TotalZeroSizeCompressedReadBandwidth);
5717         dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5718         dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5719         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5720         dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5721         dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5722         dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5723         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5724 #endif
5725         if (AverageDCCZeroSizeFraction == 1) {
5726                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5727                                 / TotalZeroSizeCompressedReadBandwidth;
5728                 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5729                                 * AverageZeroSizeCompressionRate
5730                                 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5731                                                 * AverageZeroSizeCompressionRate;
5732         } else if (AverageDCCZeroSizeFraction > 0) {
5733                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5734                                 / TotalZeroSizeCompressedReadBandwidth;
5735                 EffectiveCompressedBufferSize = dml_min(
5736                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5737                                 (double) MetaFIFOSizeInKEntries * 1024 * 64
5738                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5739                                         + 1 / AverageDCCCompressionRate))
5740                                         + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5741                                         * AverageDCCCompressionRate,
5742                                         ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5743                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5744
5745 #ifdef __DML_VBA_DEBUG__
5746                 dml_print("DML::%s: min 1 = %f\n", __func__,
5747                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5748                 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5749                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5750                                                 AverageDCCCompressionRate));
5751                 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5752                                 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5753                 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5754                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5755 #endif
5756         } else {
5757                 EffectiveCompressedBufferSize = dml_min(
5758                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5759                                 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5760                                 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5761                                                 * AverageDCCCompressionRate;
5762
5763 #ifdef __DML_VBA_DEBUG__
5764                 dml_print("DML::%s: min 1 = %f\n", __func__,
5765                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5766                 dml_print("DML::%s: min 2 = %f\n", __func__,
5767                                 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5768 #endif
5769         }
5770
5771 #ifdef __DML_VBA_DEBUG__
5772         dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5773         dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5774         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5775 #endif
5776
5777         *StutterPeriod = 0;
5778
5779         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5780                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5781                         LinesInDETY = ((double) DETBufferSizeY[k]
5782                                         + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5783                                                         * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5784                                         / BytePerPixelDETY[k] / SwathWidthY[k];
5785                         LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5786                         DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5787                                         / VRatio[k];
5788 #ifdef __DML_VBA_DEBUG__
5789                         dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5790                         dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5791                         dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5792                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5793                                         __func__, k, ReadBandwidthSurfaceLuma[k]);
5794                         dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5795                         dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5796                         dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5797                                         __func__, k, LinesInDETYRoundedDownToSwath);
5798                         dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5799                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5800                         dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5801                         dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5802                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5803 #endif
5804
5805                         if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5806                                 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5807
5808                                 FoundCriticalSurface = true;
5809                                 *StutterPeriod = DETBufferingTimeY;
5810                                 FrameTimeCriticalSurface = (
5811                                                 isInterlaceTiming ?
5812                                                                 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5813                                                 * (double) HTotal[k] / PixelClock[k];
5814                                 VActiveTimeCriticalSurface = (
5815                                                 isInterlaceTiming ?
5816                                                                 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5817                                                 * (double) HTotal[k] / PixelClock[k];
5818                                 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5819                                 SwathWidthYCriticalSurface = SwathWidthY[k];
5820                                 SwathHeightYCriticalSurface = SwathHeightY[k];
5821                                 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5822                                 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5823                                                 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5824                                 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5825                                 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5826                                 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5827                                 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5828
5829 #ifdef __DML_VBA_DEBUG__
5830                                 dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5831                                                 __func__, k, FoundCriticalSurface);
5832                                 dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5833                                                 __func__, k, *StutterPeriod);
5834                                 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5835                                                 __func__, k, MinTTUVBlankCriticalSurface);
5836                                 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5837                                                 __func__, k, FrameTimeCriticalSurface);
5838                                 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5839                                                 __func__, k, VActiveTimeCriticalSurface);
5840                                 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5841                                                 __func__, k, BytePerPixelYCriticalSurface);
5842                                 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5843                                                 __func__, k, SwathWidthYCriticalSurface);
5844                                 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5845                                                 __func__, k, SwathHeightYCriticalSurface);
5846                                 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5847                                                 __func__, k, BlockWidth256BytesYCriticalSurface);
5848                                 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5849                                                 __func__, k, doublePlaneCriticalSurface);
5850                                 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5851                                                 __func__, k, doublePipeCriticalSurface);
5852                                 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5853                                                 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5854 #endif
5855                         }
5856                 }
5857         }
5858
5859         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5860                         EffectiveCompressedBufferSize);
5861 #ifdef __DML_VBA_DEBUG__
5862         dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5863         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5864         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5865                         __func__, *StutterPeriod * TotalDataReadBandwidth);
5866         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5867         dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5868                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5869         dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5870         dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5871         dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5872         dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5873 #endif
5874
5875         StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5876                         / ReturnBW
5877                         + (*StutterPeriod * TotalDataReadBandwidth
5878                                         - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5879                         + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5880 #ifdef __DML_VBA_DEBUG__
5881         dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5882                         AverageDCCCompressionRate / ReturnBW);
5883         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5884                         __func__, (*StutterPeriod * TotalDataReadBandwidth));
5885         dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5886                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5887         dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5888         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5889 #endif
5890         StutterBurstTime = dml_max(StutterBurstTime,
5891                         LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5892                                         * SwathWidthYCriticalSurface / ReturnBW);
5893
5894 #ifdef __DML_VBA_DEBUG__
5895         dml_print("DML::%s: Time to finish residue swath=%f\n",
5896                         __func__,
5897                         LinesToFinishSwathTransferStutterCriticalSurface *
5898                         BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5899 #endif
5900
5901         TotalActiveWriteback = 0;
5902         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5903                 if (WritebackEnable[k])
5904                         TotalActiveWriteback = TotalActiveWriteback + 1;
5905         }
5906
5907         if (TotalActiveWriteback == 0) {
5908 #ifdef __DML_VBA_DEBUG__
5909                 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5910                 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5911                 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5912                 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5913 #endif
5914                 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5915                                 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5916                 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5917                                 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5918                 *NumberOfStutterBurstsPerFrame = (
5919                                 *StutterEfficiencyNotIncludingVBlank > 0 ?
5920                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5921                 *Z8NumberOfStutterBurstsPerFrame = (
5922                                 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5923                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5924         } else {
5925                 *StutterEfficiencyNotIncludingVBlank = 0.;
5926                 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5927                 *NumberOfStutterBurstsPerFrame = 0;
5928                 *Z8NumberOfStutterBurstsPerFrame = 0;
5929         }
5930 #ifdef __DML_VBA_DEBUG__
5931         dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5932         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5933                         __func__, *StutterEfficiencyNotIncludingVBlank);
5934         dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5935                         __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5936         dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5937         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5938 #endif
5939
5940         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5941                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5942                         if (BlendingAndTiming[k] == k) {
5943                                 if (TotalNumberOfActiveOTG == 0) {
5944                                         doublePixelClock = PixelClock[k];
5945                                         doubleHTotal = HTotal[k];
5946                                         doubleVTotal = VTotal[k];
5947                                 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5948                                                 || doubleVTotal != VTotal[k]) {
5949                                         SameTiming = false;
5950                                 }
5951                                 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5952                         }
5953                 }
5954         }
5955
5956         if (*StutterEfficiencyNotIncludingVBlank > 0) {
5957                 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5958
5959                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5960                                 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5961                         *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5962                                                 + StutterBurstTime * VActiveTimeCriticalSurface
5963                                                 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5964                 } else {
5965                         *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5966                 }
5967         } else {
5968                 *StutterEfficiency = 0;
5969         }
5970
5971         if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5972                 LastZ8StutterPeriod = VActiveTimeCriticalSurface
5973                                 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5974                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
5975                                 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
5976                         *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
5977                                 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5978                 } else {
5979                         *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
5980                 }
5981         } else {
5982                 *Z8StutterEfficiency = 0.;
5983         }
5984
5985 #ifdef __DML_VBA_DEBUG__
5986         dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
5987         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
5988         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5989         dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5990         dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
5991         dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
5992         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5993                         __func__, *StutterEfficiencyNotIncludingVBlank);
5994         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5995 #endif
5996
5997         SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
5998                         * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
5999         LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6000         MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6001                         - DETBufferSizeYCriticalSurface;
6002
6003         *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6004                         && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6005                         && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6006                         && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6007
6008 #ifdef __DML_VBA_DEBUG__
6009         dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6010         dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6011         dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6012         dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6013 #endif
6014 } // CalculateStutterEfficiency
6015
6016 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6017                 unsigned int    ConfigReturnBufferSizeInKByte,
6018                 unsigned int    ROBBufferSizeInKByte,
6019                 unsigned int MaxNumDPP,
6020                 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6021                 unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6022
6023                 /* Output */
6024                 unsigned int *MaxTotalDETInKByte,
6025                 unsigned int *nomDETInKByte,
6026                 unsigned int *MinCompressedBufferSizeInKByte)
6027 {
6028         bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6029         unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6030
6031         *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6032                         (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6033         *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6034         *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6035
6036 #ifdef __DML_VBA_DEBUG__
6037         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6038         dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6039         dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6040         dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6041         dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6042         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6043 #endif
6044
6045         if (det_buff_size_override_en) {
6046                 *nomDETInKByte = det_buff_size_override_val;
6047 #ifdef __DML_VBA_DEBUG__
6048                 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6049 #endif
6050         }
6051 } // CalculateMaxDETAndMinCompressedBufferSize
6052
6053 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6054                 double ReturnBW,
6055                 bool NotUrgentLatencyHiding[],
6056                 double ReadBandwidthLuma[],
6057                 double ReadBandwidthChroma[],
6058                 double cursor_bw[],
6059                 double meta_row_bandwidth[],
6060                 double dpte_row_bandwidth[],
6061                 unsigned int NumberOfDPP[],
6062                 double UrgentBurstFactorLuma[],
6063                 double UrgentBurstFactorChroma[],
6064                 double UrgentBurstFactorCursor[])
6065 {
6066         unsigned int k;
6067         bool NotEnoughUrgentLatencyHiding = false;
6068         bool CalculateVActiveBandwithSupport_val = false;
6069         double VActiveBandwith = 0;
6070
6071         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6072                 if (NotUrgentLatencyHiding[k]) {
6073                         NotEnoughUrgentLatencyHiding = true;
6074                 }
6075         }
6076
6077         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6078                 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6079         }
6080
6081         CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6082
6083 #ifdef __DML_VBA_DEBUG__
6084 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6085 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6086 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6087 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6088 #endif
6089         return CalculateVActiveBandwithSupport_val;
6090 }
6091
6092 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6093                 double ReturnBW,
6094                 bool NotUrgentLatencyHiding[],
6095                 double ReadBandwidthLuma[],
6096                 double ReadBandwidthChroma[],
6097                 double PrefetchBandwidthLuma[],
6098                 double PrefetchBandwidthChroma[],
6099                 double cursor_bw[],
6100                 double meta_row_bandwidth[],
6101                 double dpte_row_bandwidth[],
6102                 double cursor_bw_pre[],
6103                 double prefetch_vmrow_bw[],
6104                 unsigned int NumberOfDPP[],
6105                 double UrgentBurstFactorLuma[],
6106                 double UrgentBurstFactorChroma[],
6107                 double UrgentBurstFactorCursor[],
6108                 double UrgentBurstFactorLumaPre[],
6109                 double UrgentBurstFactorChromaPre[],
6110                 double UrgentBurstFactorCursorPre[],
6111
6112                 /* output */
6113                 double  *PrefetchBandwidth,
6114                 double  *FractionOfUrgentBandwidth,
6115                 bool *PrefetchBandwidthSupport)
6116 {
6117         unsigned int k;
6118         bool NotEnoughUrgentLatencyHiding = false;
6119         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6120                 if (NotUrgentLatencyHiding[k]) {
6121                         NotEnoughUrgentLatencyHiding = true;
6122                 }
6123         }
6124
6125         *PrefetchBandwidth = 0;
6126         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6127                 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6128                                 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6129                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6130         }
6131
6132         *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6133         *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6134 }
6135
6136 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6137                 double ReturnBW,
6138                 double ReadBandwidthLuma[],
6139                 double ReadBandwidthChroma[],
6140                 double PrefetchBandwidthLuma[],
6141                 double PrefetchBandwidthChroma[],
6142                 double cursor_bw[],
6143                 double cursor_bw_pre[],
6144                 unsigned int NumberOfDPP[],
6145                 double UrgentBurstFactorLuma[],
6146                 double UrgentBurstFactorChroma[],
6147                 double UrgentBurstFactorCursor[],
6148                 double UrgentBurstFactorLumaPre[],
6149                 double UrgentBurstFactorChromaPre[],
6150                 double UrgentBurstFactorCursorPre[])
6151 {
6152         unsigned int k;
6153         double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6154
6155         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6156                 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6157                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6158         }
6159
6160         return CalculateBandwidthAvailableForImmediateFlip_val;
6161 }
6162
6163 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6164                 double ReturnBW,
6165                 enum immediate_flip_requirement ImmediateFlipRequirement[],
6166                 double final_flip_bw[],
6167                 double ReadBandwidthLuma[],
6168                 double ReadBandwidthChroma[],
6169                 double PrefetchBandwidthLuma[],
6170                 double PrefetchBandwidthChroma[],
6171                 double cursor_bw[],
6172                 double meta_row_bandwidth[],
6173                 double dpte_row_bandwidth[],
6174                 double cursor_bw_pre[],
6175                 double prefetch_vmrow_bw[],
6176                 unsigned int NumberOfDPP[],
6177                 double UrgentBurstFactorLuma[],
6178                 double UrgentBurstFactorChroma[],
6179                 double UrgentBurstFactorCursor[],
6180                 double UrgentBurstFactorLumaPre[],
6181                 double UrgentBurstFactorChromaPre[],
6182                 double UrgentBurstFactorCursorPre[],
6183
6184                 /* output */
6185                 double  *TotalBandwidth,
6186                 double  *FractionOfUrgentBandwidth,
6187                 bool *ImmediateFlipBandwidthSupport)
6188 {
6189         unsigned int k;
6190         *TotalBandwidth = 0;
6191         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6192                 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6193                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6194                                         NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6195                                         NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6196                 } else {
6197                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6198                                         NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6199                                         NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6200                 }
6201         }
6202         *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6203         *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6204 }