clk: baikal-t1: Convert to platform device driver
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / amd / display / dc / dml / dcn32 / display_mode_vba_util_32.c
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29
30 unsigned int dml32_dscceComputeDelay(
31                 unsigned int bpc,
32                 double BPP,
33                 unsigned int sliceWidth,
34                 unsigned int numSlices,
35                 enum output_format_class pixelFormat,
36                 enum output_encoder_class Output)
37 {
38         // valid bpc         = source bits per component in the set of {8, 10, 12}
39         // valid bpp         = increments of 1/16 of a bit
40         //                    min = 6/7/8 in N420/N422/444, respectively
41         //                    max = such that compression is 1:1
42         //valid sliceWidth  = number of pixels per slice line,
43         //      must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
44         //valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
45         //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
46
47         // fixed value
48         unsigned int rcModelSize = 8192;
49
50         // N422/N420 operate at 2 pixels per clock
51         unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
52         Delay, pixels;
53
54         if (pixelFormat == dm_420)
55                 pixelsPerClock = 2;
56         else if (pixelFormat == dm_n422)
57                 pixelsPerClock = 2;
58         // #all other modes operate at 1 pixel per clock
59         else
60                 pixelsPerClock = 1;
61
62         //initial transmit delay as per PPS
63         initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
64
65         //compute ssm delay
66         if (bpc == 8)
67                 D = 81;
68         else if (bpc == 10)
69                 D = 89;
70         else
71                 D = 113;
72
73         //divide by pixel per cycle to compute slice width as seen by DSC
74         w = sliceWidth / pixelsPerClock;
75
76         //422 mode has an additional cycle of delay
77         if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
78                 s = 0;
79         else
80                 s = 1;
81
82         //main calculation for the dscce
83         ix = initalXmitDelay + 45;
84         wx = (w + 2) / 3;
85         p = 3 * wx - w;
86         l0 = ix / w;
87         a = ix + p * l0;
88         ax = (a + 2) / 3 + D + 6 + 1;
89         L = (ax + wx - 1) / wx;
90         if ((ix % w) == 0 && p != 0)
91                 lstall = 1;
92         else
93                 lstall = 0;
94         Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
95
96         //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
97         pixels = Delay * 3 * pixelsPerClock;
98
99 #ifdef __DML_VBA_DEBUG__
100         dml_print("DML::%s: bpc: %d\n", __func__, bpc);
101         dml_print("DML::%s: BPP: %f\n", __func__, BPP);
102         dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
103         dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
104         dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
105         dml_print("DML::%s: Output: %d\n", __func__, Output);
106         dml_print("DML::%s: pixels: %d\n", __func__, pixels);
107 #endif
108
109         return pixels;
110 }
111
112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
113 {
114         unsigned int Delay = 0;
115
116         if (pixelFormat == dm_420) {
117                 //   sfr
118                 Delay = Delay + 2;
119                 //   dsccif
120                 Delay = Delay + 0;
121                 //   dscc - input deserializer
122                 Delay = Delay + 3;
123                 //   dscc gets pixels every other cycle
124                 Delay = Delay + 2;
125                 //   dscc - input cdc fifo
126                 Delay = Delay + 12;
127                 //   dscc gets pixels every other cycle
128                 Delay = Delay + 13;
129                 //   dscc - cdc uncertainty
130                 Delay = Delay + 2;
131                 //   dscc - output cdc fifo
132                 Delay = Delay + 7;
133                 //   dscc gets pixels every other cycle
134                 Delay = Delay + 3;
135                 //   dscc - cdc uncertainty
136                 Delay = Delay + 2;
137                 //   dscc - output serializer
138                 Delay = Delay + 1;
139                 //   sft
140                 Delay = Delay + 1;
141         } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
142                 //   sfr
143                 Delay = Delay + 2;
144                 //   dsccif
145                 Delay = Delay + 1;
146                 //   dscc - input deserializer
147                 Delay = Delay + 5;
148                 //  dscc - input cdc fifo
149                 Delay = Delay + 25;
150                 //   dscc - cdc uncertainty
151                 Delay = Delay + 2;
152                 //   dscc - output cdc fifo
153                 Delay = Delay + 10;
154                 //   dscc - cdc uncertainty
155                 Delay = Delay + 2;
156                 //   dscc - output serializer
157                 Delay = Delay + 1;
158                 //   sft
159                 Delay = Delay + 1;
160         } else {
161                 //   sfr
162                 Delay = Delay + 2;
163                 //   dsccif
164                 Delay = Delay + 0;
165                 //   dscc - input deserializer
166                 Delay = Delay + 3;
167                 //   dscc - input cdc fifo
168                 Delay = Delay + 12;
169                 //   dscc - cdc uncertainty
170                 Delay = Delay + 2;
171                 //   dscc - output cdc fifo
172                 Delay = Delay + 7;
173                 //   dscc - output serializer
174                 Delay = Delay + 1;
175                 //   dscc - cdc uncertainty
176                 Delay = Delay + 2;
177                 //   sft
178                 Delay = Delay + 1;
179         }
180
181         return Delay;
182 }
183
184
185 bool IsVertical(enum dm_rotation_angle Scan)
186 {
187         bool is_vert = false;
188
189         if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
190                 is_vert = true;
191         else
192                 is_vert = false;
193         return is_vert;
194 }
195
196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
197                 double HRatio,
198                 double HRatioChroma,
199                 double VRatio,
200                 double VRatioChroma,
201                 double MaxDCHUBToPSCLThroughput,
202                 double MaxPSCLToLBThroughput,
203                 double PixelClock,
204                 enum source_format_class SourcePixelFormat,
205                 unsigned int HTaps,
206                 unsigned int HTapsChroma,
207                 unsigned int VTaps,
208                 unsigned int VTapsChroma,
209
210                 /* output */
211                 double *PSCL_THROUGHPUT,
212                 double *PSCL_THROUGHPUT_CHROMA,
213                 double *DPPCLKUsingSingleDPP)
214 {
215         double DPPCLKUsingSingleDPPLuma;
216         double DPPCLKUsingSingleDPPChroma;
217
218         if (HRatio > 1) {
219                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
220                                 dml_ceil((double) HTaps / 6.0, 1.0));
221         } else {
222                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
223         }
224
225         DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
226                         *PSCL_THROUGHPUT, 1);
227
228         if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
229                 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
230
231         if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
232                         SourcePixelFormat != dm_rgbe_alpha)) {
233                 *PSCL_THROUGHPUT_CHROMA = 0;
234                 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
235         } else {
236                 if (HRatioChroma > 1) {
237                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
238                                         HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
239                 } else {
240                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
241                 }
242                 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
243                                 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
244                 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
245                         DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
246                 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
247         }
248 }
249
250 void dml32_CalculateBytePerPixelAndBlockSizes(
251                 enum source_format_class SourcePixelFormat,
252                 enum dm_swizzle_mode SurfaceTiling,
253
254                 /* Output */
255                 unsigned int *BytePerPixelY,
256                 unsigned int *BytePerPixelC,
257                 double  *BytePerPixelDETY,
258                 double  *BytePerPixelDETC,
259                 unsigned int *BlockHeight256BytesY,
260                 unsigned int *BlockHeight256BytesC,
261                 unsigned int *BlockWidth256BytesY,
262                 unsigned int *BlockWidth256BytesC,
263                 unsigned int *MacroTileHeightY,
264                 unsigned int *MacroTileHeightC,
265                 unsigned int *MacroTileWidthY,
266                 unsigned int *MacroTileWidthC)
267 {
268         if (SourcePixelFormat == dm_444_64) {
269                 *BytePerPixelDETY = 8;
270                 *BytePerPixelDETC = 0;
271                 *BytePerPixelY = 8;
272                 *BytePerPixelC = 0;
273         } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
274                 *BytePerPixelDETY = 4;
275                 *BytePerPixelDETC = 0;
276                 *BytePerPixelY = 4;
277                 *BytePerPixelC = 0;
278         } else if (SourcePixelFormat == dm_444_16) {
279                 *BytePerPixelDETY = 2;
280                 *BytePerPixelDETC = 0;
281                 *BytePerPixelY = 2;
282                 *BytePerPixelC = 0;
283         } else if (SourcePixelFormat == dm_444_8) {
284                 *BytePerPixelDETY = 1;
285                 *BytePerPixelDETC = 0;
286                 *BytePerPixelY = 1;
287                 *BytePerPixelC = 0;
288         } else if (SourcePixelFormat == dm_rgbe_alpha) {
289                 *BytePerPixelDETY = 4;
290                 *BytePerPixelDETC = 1;
291                 *BytePerPixelY = 4;
292                 *BytePerPixelC = 1;
293         } else if (SourcePixelFormat == dm_420_8) {
294                 *BytePerPixelDETY = 1;
295                 *BytePerPixelDETC = 2;
296                 *BytePerPixelY = 1;
297                 *BytePerPixelC = 2;
298         } else if (SourcePixelFormat == dm_420_12) {
299                 *BytePerPixelDETY = 2;
300                 *BytePerPixelDETC = 4;
301                 *BytePerPixelY = 2;
302                 *BytePerPixelC = 4;
303         } else {
304                 *BytePerPixelDETY = 4.0 / 3;
305                 *BytePerPixelDETC = 8.0 / 3;
306                 *BytePerPixelY = 2;
307                 *BytePerPixelC = 4;
308         }
309 #ifdef __DML_VBA_DEBUG__
310         dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
311         dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
312         dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
313         dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
314         dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
315 #endif
316         if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
317                         || SourcePixelFormat == dm_444_16
318                         || SourcePixelFormat == dm_444_8
319                         || SourcePixelFormat == dm_mono_16
320                         || SourcePixelFormat == dm_mono_8
321                         || SourcePixelFormat == dm_rgbe)) {
322                 if (SurfaceTiling == dm_sw_linear)
323                         *BlockHeight256BytesY = 1;
324                 else if (SourcePixelFormat == dm_444_64)
325                         *BlockHeight256BytesY = 4;
326                 else if (SourcePixelFormat == dm_444_8)
327                         *BlockHeight256BytesY = 16;
328                 else
329                         *BlockHeight256BytesY = 8;
330
331                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
332                 *BlockHeight256BytesC = 0;
333                 *BlockWidth256BytesC = 0;
334         } else {
335                 if (SurfaceTiling == dm_sw_linear) {
336                         *BlockHeight256BytesY = 1;
337                         *BlockHeight256BytesC = 1;
338                 } else if (SourcePixelFormat == dm_rgbe_alpha) {
339                         *BlockHeight256BytesY = 8;
340                         *BlockHeight256BytesC = 16;
341                 } else if (SourcePixelFormat == dm_420_8) {
342                         *BlockHeight256BytesY = 16;
343                         *BlockHeight256BytesC = 8;
344                 } else {
345                         *BlockHeight256BytesY = 8;
346                         *BlockHeight256BytesC = 8;
347                 }
348                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
349                 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
350         }
351 #ifdef __DML_VBA_DEBUG__
352         dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
353         dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
354         dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
355         dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
356 #endif
357
358         if (SurfaceTiling == dm_sw_linear) {
359                 *MacroTileHeightY = *BlockHeight256BytesY;
360                 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
361                 *MacroTileHeightC = *BlockHeight256BytesC;
362                 if (*MacroTileHeightC == 0)
363                         *MacroTileWidthC = 0;
364                 else
365                         *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
366         } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
367                         SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
368                 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
369                 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
370                 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
371                 if (*MacroTileHeightC == 0)
372                         *MacroTileWidthC = 0;
373                 else
374                         *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
375         } else {
376                 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
377                 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
378                 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
379                 if (*MacroTileHeightC == 0)
380                         *MacroTileWidthC = 0;
381                 else
382                         *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
383         }
384
385 #ifdef __DML_VBA_DEBUG__
386         dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
387         dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
388         dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
389         dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
390 #endif
391 } // CalculateBytePerPixelAndBlockSizes
392
393 void dml32_CalculateSwathAndDETConfiguration(
394                 struct dml32_CalculateSwathAndDETConfiguration *st_vars,
395                 unsigned int DETSizeOverride[],
396                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
397                 unsigned int ConfigReturnBufferSizeInKByte,
398                 unsigned int MaxTotalDETInKByte,
399                 unsigned int MinCompressedBufferSizeInKByte,
400                 double ForceSingleDPP,
401                 unsigned int NumberOfActiveSurfaces,
402                 unsigned int nomDETInKByte,
403                 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
404                 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
405                 unsigned int PixelChunkSizeKBytes,
406                 unsigned int ROBSizeKBytes,
407                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
408                 enum output_encoder_class Output[],
409                 double ReadBandwidthLuma[],
410                 double ReadBandwidthChroma[],
411                 double MaximumSwathWidthLuma[],
412                 double MaximumSwathWidthChroma[],
413                 enum dm_rotation_angle SourceRotation[],
414                 bool ViewportStationary[],
415                 enum source_format_class SourcePixelFormat[],
416                 enum dm_swizzle_mode SurfaceTiling[],
417                 unsigned int ViewportWidth[],
418                 unsigned int ViewportHeight[],
419                 unsigned int ViewportXStart[],
420                 unsigned int ViewportYStart[],
421                 unsigned int ViewportXStartC[],
422                 unsigned int ViewportYStartC[],
423                 unsigned int SurfaceWidthY[],
424                 unsigned int SurfaceWidthC[],
425                 unsigned int SurfaceHeightY[],
426                 unsigned int SurfaceHeightC[],
427                 unsigned int Read256BytesBlockHeightY[],
428                 unsigned int Read256BytesBlockHeightC[],
429                 unsigned int Read256BytesBlockWidthY[],
430                 unsigned int Read256BytesBlockWidthC[],
431                 enum odm_combine_mode ODMMode[],
432                 unsigned int BlendingAndTiming[],
433                 unsigned int BytePerPixY[],
434                 unsigned int BytePerPixC[],
435                 double BytePerPixDETY[],
436                 double BytePerPixDETC[],
437                 unsigned int HActive[],
438                 double HRatio[],
439                 double HRatioChroma[],
440                 unsigned int DPPPerSurface[],
441
442                 /* Output */
443                 unsigned int swath_width_luma_ub[],
444                 unsigned int swath_width_chroma_ub[],
445                 double SwathWidth[],
446                 double SwathWidthChroma[],
447                 unsigned int SwathHeightY[],
448                 unsigned int SwathHeightC[],
449                 unsigned int DETBufferSizeInKByte[],
450                 unsigned int DETBufferSizeY[],
451                 unsigned int DETBufferSizeC[],
452                 bool *UnboundedRequestEnabled,
453                 unsigned int *CompressedBufferSizeInkByte,
454                 unsigned int *CompBufReservedSpaceKBytes,
455                 bool *CompBufReservedSpaceNeedAdjustment,
456                 bool ViewportSizeSupportPerSurface[],
457                 bool *ViewportSizeSupport)
458 {
459         unsigned int k;
460
461         st_vars->TotalActiveDPP = 0;
462         st_vars->NoChromaSurfaces = true;
463
464 #ifdef __DML_VBA_DEBUG__
465         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
466         dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
467         dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
468 #endif
469         dml32_CalculateSwathWidth(ForceSingleDPP,
470                         NumberOfActiveSurfaces,
471                         SourcePixelFormat,
472                         SourceRotation,
473                         ViewportStationary,
474                         ViewportWidth,
475                         ViewportHeight,
476                         ViewportXStart,
477                         ViewportYStart,
478                         ViewportXStartC,
479                         ViewportYStartC,
480                         SurfaceWidthY,
481                         SurfaceWidthC,
482                         SurfaceHeightY,
483                         SurfaceHeightC,
484                         ODMMode,
485                         BytePerPixY,
486                         BytePerPixC,
487                         Read256BytesBlockHeightY,
488                         Read256BytesBlockHeightC,
489                         Read256BytesBlockWidthY,
490                         Read256BytesBlockWidthC,
491                         BlendingAndTiming,
492                         HActive,
493                         HRatio,
494                         DPPPerSurface,
495
496                         /* Output */
497                         st_vars->SwathWidthdoubleDPP,
498                         st_vars->SwathWidthdoubleDPPChroma,
499                         SwathWidth,
500                         SwathWidthChroma,
501                         st_vars->MaximumSwathHeightY,
502                         st_vars->MaximumSwathHeightC,
503                         swath_width_luma_ub,
504                         swath_width_chroma_ub);
505
506         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
507                 st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k];
508                 st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k];
509 #ifdef __DML_VBA_DEBUG__
510                 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
511                 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
512                 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
513                 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]);
514                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
515                                 st_vars->RoundedUpMaxSwathSizeBytesY[k]);
516                 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
517                 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
518                 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]);
519                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
520                                 st_vars->RoundedUpMaxSwathSizeBytesC[k]);
521 #endif
522
523                 if (SourcePixelFormat[k] == dm_420_10) {
524                         st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256);
525                         st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256);
526                 }
527         }
528
529         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
530                 st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
531                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
532                                 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
533                         st_vars->NoChromaSurfaces = false;
534                 }
535         }
536
537         // By default, just set the reserved space to 2 pixel chunks size
538         *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
539
540         // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
541         // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
542         // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
543         *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512);
544
545         if (*CompBufReservedSpaceNeedAdjustment == 1) {
546                 *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512;
547         }
548
549         #ifdef __DML_VBA_DEBUG__
550                 dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
551                 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
552         #endif
553
554         *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
555
556         dml32_CalculateDETBufferSize(DETSizeOverride,
557                         UseMALLForPStateChange,
558                         ForceSingleDPP,
559                         NumberOfActiveSurfaces,
560                         *UnboundedRequestEnabled,
561                         nomDETInKByte,
562                         MaxTotalDETInKByte,
563                         ConfigReturnBufferSizeInKByte,
564                         MinCompressedBufferSizeInKByte,
565                         CompressedBufferSegmentSizeInkByteFinal,
566                         SourcePixelFormat,
567                         ReadBandwidthLuma,
568                         ReadBandwidthChroma,
569                         st_vars->RoundedUpMaxSwathSizeBytesY,
570                         st_vars->RoundedUpMaxSwathSizeBytesC,
571                         DPPPerSurface,
572
573                         /* Output */
574                         DETBufferSizeInKByte,    // per hubp pipe
575                         CompressedBufferSizeInkByte);
576
577 #ifdef __DML_VBA_DEBUG__
578         dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP);
579         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
580         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
581         dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
582         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
583         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
584 #endif
585
586         *ViewportSizeSupport = true;
587         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
588
589                 st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
590                                 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
591 #ifdef __DML_VBA_DEBUG__
592                 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
593                                 st_vars->DETBufferSizeInKByteForSwathCalculation);
594 #endif
595
596                 if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
597                                 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
598                         SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
599                         SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
600                         st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
601                         st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
602                 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
603                                 st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
604                                 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
605                         SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
606                         SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
607                         st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
608                         st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
609                 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
610                                 st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <=
611                                 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
612                         SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
613                         SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
614                         st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
615                         st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
616                 } else {
617                         SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
618                         SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
619                         st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
620                         st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
621                 }
622
623                 if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 >
624                                 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
625                                 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
626                                                 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
627                         *ViewportSizeSupport = false;
628                         ViewportSizeSupportPerSurface[k] = false;
629                 } else {
630                         ViewportSizeSupportPerSurface[k] = true;
631                 }
632
633                 if (SwathHeightC[k] == 0) {
634 #ifdef __DML_VBA_DEBUG__
635                         dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
636 #endif
637                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
638                         DETBufferSizeC[k] = 0;
639                 } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) {
640 #ifdef __DML_VBA_DEBUG__
641                         dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
642 #endif
643                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
644                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
645                 } else {
646 #ifdef __DML_VBA_DEBUG__
647                         dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
648 #endif
649                         DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
650                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
651                 }
652
653 #ifdef __DML_VBA_DEBUG__
654                 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
655                 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
656                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
657                                 k, st_vars->RoundedUpMaxSwathSizeBytesY[k]);
658                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
659                                 k, st_vars->RoundedUpMaxSwathSizeBytesC[k]);
660                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY);
661                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC);
662                 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
663                 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
664                 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
665                 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
666                                 ViewportSizeSupportPerSurface[k]);
667 #endif
668
669         }
670 } // CalculateSwathAndDETConfiguration
671
672 void dml32_CalculateSwathWidth(
673                 bool                            ForceSingleDPP,
674                 unsigned int                    NumberOfActiveSurfaces,
675                 enum source_format_class        SourcePixelFormat[],
676                 enum dm_rotation_angle          SourceRotation[],
677                 bool                            ViewportStationary[],
678                 unsigned int                    ViewportWidth[],
679                 unsigned int                    ViewportHeight[],
680                 unsigned int                    ViewportXStart[],
681                 unsigned int                    ViewportYStart[],
682                 unsigned int                    ViewportXStartC[],
683                 unsigned int                    ViewportYStartC[],
684                 unsigned int                    SurfaceWidthY[],
685                 unsigned int                    SurfaceWidthC[],
686                 unsigned int                    SurfaceHeightY[],
687                 unsigned int                    SurfaceHeightC[],
688                 enum odm_combine_mode           ODMMode[],
689                 unsigned int                    BytePerPixY[],
690                 unsigned int                    BytePerPixC[],
691                 unsigned int                    Read256BytesBlockHeightY[],
692                 unsigned int                    Read256BytesBlockHeightC[],
693                 unsigned int                    Read256BytesBlockWidthY[],
694                 unsigned int                    Read256BytesBlockWidthC[],
695                 unsigned int                    BlendingAndTiming[],
696                 unsigned int                    HActive[],
697                 double                          HRatio[],
698                 unsigned int                    DPPPerSurface[],
699
700                 /* Output */
701                 double                          SwathWidthdoubleDPPY[],
702                 double                          SwathWidthdoubleDPPC[],
703                 double                          SwathWidthY[], // per-pipe
704                 double                          SwathWidthC[], // per-pipe
705                 unsigned int                    MaximumSwathHeightY[],
706                 unsigned int                    MaximumSwathHeightC[],
707                 unsigned int                    swath_width_luma_ub[], // per-pipe
708                 unsigned int                    swath_width_chroma_ub[]) // per-pipe
709 {
710         unsigned int k, j;
711         enum odm_combine_mode MainSurfaceODMMode;
712
713         unsigned int surface_width_ub_l;
714         unsigned int surface_height_ub_l;
715         unsigned int surface_width_ub_c;
716         unsigned int surface_height_ub_c;
717
718 #ifdef __DML_VBA_DEBUG__
719         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
720         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
721 #endif
722
723         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
724                 if (!IsVertical(SourceRotation[k]))
725                         SwathWidthdoubleDPPY[k] = ViewportWidth[k];
726                 else
727                         SwathWidthdoubleDPPY[k] = ViewportHeight[k];
728
729 #ifdef __DML_VBA_DEBUG__
730                 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
731                 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
732 #endif
733
734                 MainSurfaceODMMode = ODMMode[k];
735                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
736                         if (BlendingAndTiming[k] == j)
737                                 MainSurfaceODMMode = ODMMode[j];
738                 }
739
740                 if (ForceSingleDPP) {
741                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
742                 } else {
743                         if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
744                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
745                                                 dml_round(HActive[k] / 4.0 * HRatio[k]));
746                         } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
747                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
748                                                 dml_round(HActive[k] / 2.0 * HRatio[k]));
749                         } else if (DPPPerSurface[k] == 2) {
750                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
751                         } else {
752                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
753                         }
754                 }
755
756 #ifdef __DML_VBA_DEBUG__
757                 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
758                 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
759                 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
760                 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
761                 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
762 #endif
763
764                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
765                                 SourcePixelFormat[k] == dm_420_12) {
766                         SwathWidthC[k] = SwathWidthY[k] / 2;
767                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
768                 } else {
769                         SwathWidthC[k] = SwathWidthY[k];
770                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
771                 }
772
773                 if (ForceSingleDPP == true) {
774                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
775                         SwathWidthC[k] = SwathWidthdoubleDPPC[k];
776                 }
777
778                 surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
779                 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
780                 surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
781                 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
782
783 #ifdef __DML_VBA_DEBUG__
784                 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
785                 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
786                 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
787                 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
788                 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
789                 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
790                 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
791                 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
792                 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
793                 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
794 #endif
795
796                 if (!IsVertical(SourceRotation[k])) {
797                         MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
798                         MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
799                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
800                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
801                                                 dml_floor(ViewportXStart[k] +
802                                                                 SwathWidthY[k] +
803                                                                 Read256BytesBlockWidthY[k] - 1,
804                                                                 Read256BytesBlockWidthY[k]) -
805                                                                 dml_floor(ViewportXStart[k],
806                                                                 Read256BytesBlockWidthY[k]));
807                         } else {
808                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
809                                                 dml_ceil(SwathWidthY[k] - 1,
810                                                                 Read256BytesBlockWidthY[k]) +
811                                                                 Read256BytesBlockWidthY[k]);
812                         }
813                         if (BytePerPixC[k] > 0) {
814                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
815                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
816                                                         dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
817                                                                         Read256BytesBlockWidthC[k] - 1,
818                                                                         Read256BytesBlockWidthC[k]) -
819                                                                         dml_floor(ViewportXStartC[k],
820                                                                         Read256BytesBlockWidthC[k]));
821                                 } else {
822                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
823                                                         dml_ceil(SwathWidthC[k] - 1,
824                                                                 Read256BytesBlockWidthC[k]) +
825                                                                 Read256BytesBlockWidthC[k]);
826                                 }
827                         } else {
828                                 swath_width_chroma_ub[k] = 0;
829                         }
830                 } else {
831                         MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
832                         MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
833
834                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
835                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
836                                                 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
837                                                 Read256BytesBlockHeightY[k]) -
838                                                 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
839                         } else {
840                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
841                                                 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
842                         }
843                         if (BytePerPixC[k] > 0) {
844                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
845                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
846                                                         dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
847                                                                         Read256BytesBlockHeightC[k] - 1,
848                                                                         Read256BytesBlockHeightC[k]) -
849                                                                         dml_floor(ViewportYStartC[k],
850                                                                                         Read256BytesBlockHeightC[k]));
851                                 } else {
852                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
853                                                         dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
854                                                         Read256BytesBlockHeightC[k]);
855                                 }
856                         } else {
857                                 swath_width_chroma_ub[k] = 0;
858                         }
859                 }
860
861 #ifdef __DML_VBA_DEBUG__
862                 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
863                 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
864                 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
865                 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
866 #endif
867
868         }
869 } // CalculateSwathWidth
870
871 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
872                         unsigned int TotalNumberOfActiveDPP,
873                         bool NoChroma,
874                         enum output_encoder_class Output,
875                         enum dm_swizzle_mode SurfaceTiling,
876                         bool CompBufReservedSpaceNeedAdjustment,
877                         bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
878 {
879         bool ret_val = false;
880
881         ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
882                         TotalNumberOfActiveDPP == 1 && NoChroma);
883         if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
884                 ret_val = false;
885
886         if (SurfaceTiling == dm_sw_linear)
887                 ret_val = false;
888
889         if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
890                 ret_val = false;
891
892 #ifdef __DML_VBA_DEBUG__
893         dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
894         dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
895         dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
896 #endif
897
898         return (ret_val);
899 }
900
901 void dml32_CalculateDETBufferSize(
902                 unsigned int DETSizeOverride[],
903                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
904                 bool ForceSingleDPP,
905                 unsigned int NumberOfActiveSurfaces,
906                 bool UnboundedRequestEnabled,
907                 unsigned int nomDETInKByte,
908                 unsigned int MaxTotalDETInKByte,
909                 unsigned int ConfigReturnBufferSizeInKByte,
910                 unsigned int MinCompressedBufferSizeInKByte,
911                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
912                 enum source_format_class SourcePixelFormat[],
913                 double ReadBandwidthLuma[],
914                 double ReadBandwidthChroma[],
915                 unsigned int RoundedUpMaxSwathSizeBytesY[],
916                 unsigned int RoundedUpMaxSwathSizeBytesC[],
917                 unsigned int DPPPerSurface[],
918                 /* Output */
919                 unsigned int DETBufferSizeInKByte[],
920                 unsigned int *CompressedBufferSizeInkByte)
921 {
922         unsigned int DETBufferSizePoolInKByte;
923         unsigned int NextDETBufferPieceInKByte;
924         bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
925         bool NextPotentialSurfaceToAssignDETPieceFound;
926         unsigned int NextSurfaceToAssignDETPiece;
927         double TotalBandwidth;
928         double BandwidthOfSurfacesNotAssignedDETPiece;
929         unsigned int max_minDET;
930         unsigned int minDET;
931         unsigned int minDET_pipe;
932         unsigned int j, k;
933
934 #ifdef __DML_VBA_DEBUG__
935         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
936         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
937         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
938         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
939         dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
940         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
941         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
942         dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
943                         CompressedBufferSegmentSizeInkByteFinal);
944 #endif
945
946         // Note: Will use default det size if that fits 2 swaths
947         if (UnboundedRequestEnabled) {
948                 if (DETSizeOverride[0] > 0) {
949                         DETBufferSizeInKByte[0] = DETSizeOverride[0];
950                 } else {
951                         DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
952                                         ((double) RoundedUpMaxSwathSizeBytesY[0] +
953                                                         (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
954                 }
955                 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
956         } else {
957                 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
958                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
959                         DETBufferSizeInKByte[k] = nomDETInKByte;
960                         if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
961                                         SourcePixelFormat[k] == dm_420_12) {
962                                 max_minDET = nomDETInKByte - 64;
963                         } else {
964                                 max_minDET = nomDETInKByte;
965                         }
966                         minDET = 128;
967                         minDET_pipe = 0;
968
969                         // add DET resource until can hold 2 full swaths
970                         while (minDET <= max_minDET && minDET_pipe == 0) {
971                                 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
972                                                 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
973                                         minDET_pipe = minDET;
974                                 minDET = minDET + 64;
975                         }
976
977 #ifdef __DML_VBA_DEBUG__
978                         dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
979                         dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
980                         dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
981                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
982                                         RoundedUpMaxSwathSizeBytesY[k]);
983                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
984                                         RoundedUpMaxSwathSizeBytesC[k]);
985 #endif
986
987                         if (minDET_pipe == 0) {
988                                 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
989                                                 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
990 #ifdef __DML_VBA_DEBUG__
991                                 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
992                                                 __func__, k, minDET_pipe);
993 #endif
994                         }
995
996                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
997                                 DETBufferSizeInKByte[k] = 0;
998                         } else if (DETSizeOverride[k] > 0) {
999                                 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1000                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1001                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1002                         } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1003                                 DETBufferSizeInKByte[k] = minDET_pipe;
1004                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1005                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1006                         }
1007
1008 #ifdef __DML_VBA_DEBUG__
1009                         dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1010                         dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1011                         dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1012                         dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1013 #endif
1014                 }
1015
1016                 TotalBandwidth = 0;
1017                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1018                         if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1019                                 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1020                 }
1021 #ifdef __DML_VBA_DEBUG__
1022                 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1023                 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1024                         dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1025                 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1026                 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1027 #endif
1028                 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1029                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1030
1031                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1032                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1033                         } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1034                                         (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1035                                         ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1036                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1037                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1038                                                 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1039                         } else {
1040                                 DETPieceAssignedToThisSurfaceAlready[k] = false;
1041                         }
1042 #ifdef __DML_VBA_DEBUG__
1043                         dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1044                                         DETPieceAssignedToThisSurfaceAlready[k]);
1045                         dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1046                                         BandwidthOfSurfacesNotAssignedDETPiece);
1047 #endif
1048                 }
1049
1050                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1051                         NextPotentialSurfaceToAssignDETPieceFound = false;
1052                         NextSurfaceToAssignDETPiece = 0;
1053
1054                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1055 #ifdef __DML_VBA_DEBUG__
1056                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1057                                                 ReadBandwidthLuma[k]);
1058                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1059                                                 ReadBandwidthChroma[k]);
1060                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1061                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1062                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1063                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1064                                 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1065                                                 NextSurfaceToAssignDETPiece);
1066 #endif
1067                                 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1068                                                 (!NextPotentialSurfaceToAssignDETPieceFound ||
1069                                                 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1070                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1071                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1072                                         NextSurfaceToAssignDETPiece = k;
1073                                         NextPotentialSurfaceToAssignDETPieceFound = true;
1074                                 }
1075 #ifdef __DML_VBA_DEBUG__
1076                                 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1077                                                 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1078                                 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1079                                                 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1080 #endif
1081                         }
1082
1083                         if (NextPotentialSurfaceToAssignDETPieceFound) {
1084                                 // Note: To show the banker's rounding behavior in VBA and also the fact
1085                                 // that the DET buffer size varies due to precision issue
1086                                 //
1087                                 //double tmp1 =  ((double) DETBufferSizePoolInKByte *
1088                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1089                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1090                                 // BandwidthOfSurfacesNotAssignedDETPiece /
1091                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1092                                 //double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1093                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1094                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1095                                  //BandwidthOfSurfacesNotAssignedDETPiece /
1096                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1097                                 //
1098                                 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1099                                 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1100
1101                                 NextDETBufferPieceInKByte = dml_min(
1102                                         dml_round((double) DETBufferSizePoolInKByte *
1103                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1104                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1105                                                 BandwidthOfSurfacesNotAssignedDETPiece /
1106                                                 ((ForceSingleDPP ? 1 :
1107                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1108                                                 (ForceSingleDPP ? 1 :
1109                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1110                                                 dml_floor((double) DETBufferSizePoolInKByte,
1111                                                 (ForceSingleDPP ? 1 :
1112                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1113
1114                                 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1115                                 // We should limit the per-pipe DET size to the nominal / max per pipe.
1116                                 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1117                                         if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1118                                                         nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1119                                                 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1120                                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1121                                         } else {
1122                                                 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1123                                                 // already has the max per-pipe value
1124                                                 NextDETBufferPieceInKByte = 0;
1125                                         }
1126                                 }
1127
1128 #ifdef __DML_VBA_DEBUG__
1129                                 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1130                                         DETBufferSizePoolInKByte);
1131                                 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1132                                         NextSurfaceToAssignDETPiece);
1133                                 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1134                                         NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1135                                 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1136                                         NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1137                                 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1138                                         __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1139                                 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1140                                         NextDETBufferPieceInKByte);
1141                                 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1142                                         __func__, j, NextSurfaceToAssignDETPiece,
1143                                         DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1144 #endif
1145
1146                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1147                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1148                                                 + NextDETBufferPieceInKByte
1149                                                 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1150 #ifdef __DML_VBA_DEBUG__
1151                                 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1152 #endif
1153
1154                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1155                                 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1156                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1157                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1158                                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1159                         }
1160                 }
1161                 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1162         }
1163         *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1164
1165 #ifdef __DML_VBA_DEBUG__
1166         dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1167         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1168         for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1169                 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1170                                 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1171         }
1172 #endif
1173 } // CalculateDETBufferSize
1174
1175 void dml32_CalculateODMMode(
1176                 unsigned int MaximumPixelsPerLinePerDSCUnit,
1177                 unsigned int HActive,
1178                 enum output_encoder_class Output,
1179                 enum odm_combine_policy ODMUse,
1180                 double StateDispclk,
1181                 double MaxDispclk,
1182                 bool DSCEnable,
1183                 unsigned int TotalNumberOfActiveDPP,
1184                 unsigned int MaxNumDPP,
1185                 double PixelClock,
1186                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1187                 double DISPCLKRampingMargin,
1188                 double DISPCLKDPPCLKVCOSpeed,
1189
1190                 /* Output */
1191                 bool *TotalAvailablePipesSupport,
1192                 unsigned int *NumberOfDPP,
1193                 enum odm_combine_mode *ODMMode,
1194                 double *RequiredDISPCLKPerSurface)
1195 {
1196
1197         double SurfaceRequiredDISPCLKWithoutODMCombine;
1198         double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1199         double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1200
1201         SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1202                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1203                         MaxDispclk);
1204         SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1205                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1206                         MaxDispclk);
1207         SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1208                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1209                         MaxDispclk);
1210         *TotalAvailablePipesSupport = true;
1211         *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1212
1213         if (ODMUse == dm_odm_combine_policy_none)
1214                 *ODMMode = dm_odm_combine_mode_disabled;
1215
1216         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1217         *NumberOfDPP = 0;
1218
1219         // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1220         // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1221
1222         if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1223                         ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1224                                         (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
1225                 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1226                         *ODMMode = dm_odm_combine_mode_4to1;
1227                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1228                         *NumberOfDPP = 4;
1229                 } else {
1230                         *TotalAvailablePipesSupport = false;
1231                 }
1232         } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1233                         (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1234                                         SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1235                                         (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
1236                 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1237                         *ODMMode = dm_odm_combine_mode_2to1;
1238                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1239                         *NumberOfDPP = 2;
1240                 } else {
1241                         *TotalAvailablePipesSupport = false;
1242                 }
1243         } else {
1244                 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1245                         *NumberOfDPP = 1;
1246                 else
1247                         *TotalAvailablePipesSupport = false;
1248         }
1249 }
1250
1251 double dml32_CalculateRequiredDispclk(
1252                 enum odm_combine_mode ODMMode,
1253                 double PixelClock,
1254                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1255                 double DISPCLKRampingMargin,
1256                 double DISPCLKDPPCLKVCOSpeed,
1257                 double MaxDispclk)
1258 {
1259         double RequiredDispclk = 0.;
1260         double PixelClockAfterODM;
1261         double DISPCLKWithRampingRoundedToDFSGranularity;
1262         double DISPCLKWithoutRampingRoundedToDFSGranularity;
1263         double MaxDispclkRoundedDownToDFSGranularity;
1264
1265         if (ODMMode == dm_odm_combine_mode_4to1)
1266                 PixelClockAfterODM = PixelClock / 4;
1267         else if (ODMMode == dm_odm_combine_mode_2to1)
1268                 PixelClockAfterODM = PixelClock / 2;
1269         else
1270                 PixelClockAfterODM = PixelClock;
1271
1272
1273         DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1274                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1275                                         * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1276
1277         DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1278                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1279
1280         MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1281
1282         if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1283                 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1284         else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1285                 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1286         else
1287                 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1288
1289         return RequiredDispclk;
1290 }
1291
1292 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1293 {
1294         if (Clock <= 0.0)
1295                 return 0.0;
1296
1297         if (round_up)
1298                 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1299         else
1300                 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1301 }
1302
1303 void dml32_CalculateOutputLink(
1304                 double PHYCLKPerState,
1305                 double PHYCLKD18PerState,
1306                 double PHYCLKD32PerState,
1307                 double Downspreading,
1308                 bool IsMainSurfaceUsingTheIndicatedTiming,
1309                 enum output_encoder_class Output,
1310                 enum output_format_class OutputFormat,
1311                 unsigned int HTotal,
1312                 unsigned int HActive,
1313                 double PixelClockBackEnd,
1314                 double ForcedOutputLinkBPP,
1315                 unsigned int DSCInputBitPerComponent,
1316                 unsigned int NumberOfDSCSlices,
1317                 double AudioSampleRate,
1318                 unsigned int AudioSampleLayout,
1319                 enum odm_combine_mode ODMModeNoDSC,
1320                 enum odm_combine_mode ODMModeDSC,
1321                 bool DSCEnable,
1322                 unsigned int OutputLinkDPLanes,
1323                 enum dm_output_link_dp_rate OutputLinkDPRate,
1324
1325                 /* Output */
1326                 bool *RequiresDSC,
1327                 double *RequiresFEC,
1328                 double  *OutBpp,
1329                 enum dm_output_type *OutputType,
1330                 enum dm_output_rate *OutputRate,
1331                 unsigned int *RequiredSlots)
1332 {
1333         bool LinkDSCEnable;
1334         unsigned int dummy;
1335         *RequiresDSC = false;
1336         *RequiresFEC = false;
1337         *OutBpp = 0;
1338         *OutputType = dm_output_type_unknown;
1339         *OutputRate = dm_output_rate_unknown;
1340
1341         if (IsMainSurfaceUsingTheIndicatedTiming) {
1342                 if (Output == dm_hdmi) {
1343                         *RequiresDSC = false;
1344                         *RequiresFEC = false;
1345                         *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1346                                         PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1347                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1348                                         ODMModeNoDSC, ODMModeDSC, &dummy);
1349                         //OutputTypeAndRate = "HDMI";
1350                         *OutputType = dm_output_type_hdmi;
1351
1352                 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1353                         if (DSCEnable == true) {
1354                                 *RequiresDSC = true;
1355                                 LinkDSCEnable = true;
1356                                 if (Output == dm_dp || Output == dm_dp2p0)
1357                                         *RequiresFEC = true;
1358                                 else
1359                                         *RequiresFEC = false;
1360                         } else {
1361                                 *RequiresDSC = false;
1362                                 LinkDSCEnable = false;
1363                                 if (Output == dm_dp2p0)
1364                                         *RequiresFEC = true;
1365                                 else
1366                                         *RequiresFEC = false;
1367                         }
1368                         if (Output == dm_dp2p0) {
1369                                 *OutBpp = 0;
1370                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1371                                                 PHYCLKD32PerState >= 10000 / 32) {
1372                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1373                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1374                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1375                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1376                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1377                                         if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1378                                                         ForcedOutputLinkBPP == 0) {
1379                                                 *RequiresDSC = true;
1380                                                 LinkDSCEnable = true;
1381                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1382                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1383                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1384                                                                 OutputFormat, DSCInputBitPerComponent,
1385                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1386                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1387                                         }
1388                                         //OutputTypeAndRate = Output & " UHBR10";
1389                                         *OutputType = dm_output_type_dp2p0;
1390                                         *OutputRate = dm_output_rate_dp_rate_uhbr10;
1391                                 }
1392                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1393                                                 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1394                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1395                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1396                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1397                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1398                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1399
1400                                         if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1401                                                         ForcedOutputLinkBPP == 0) {
1402                                                 *RequiresDSC = true;
1403                                                 LinkDSCEnable = true;
1404                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1405                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1406                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1407                                                                 OutputFormat, DSCInputBitPerComponent,
1408                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1409                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410                                         }
1411                                         //OutputTypeAndRate = Output & " UHBR13p5";
1412                                         *OutputType = dm_output_type_dp2p0;
1413                                         *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1414                                 }
1415                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1416                                                 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1417                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1418                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1419                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1420                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1421                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1422                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1423                                                 *RequiresDSC = true;
1424                                                 LinkDSCEnable = true;
1425                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1426                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1427                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1428                                                                 OutputFormat, DSCInputBitPerComponent,
1429                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1430                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1431                                         }
1432                                         //OutputTypeAndRate = Output & " UHBR20";
1433                                         *OutputType = dm_output_type_dp2p0;
1434                                         *OutputRate = dm_output_rate_dp_rate_uhbr20;
1435                                 }
1436                         } else {
1437                                 *OutBpp = 0;
1438                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1439                                                 PHYCLKPerState >= 270) {
1440                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1441                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1442                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1443                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1444                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1445                                         if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1446                                                         ForcedOutputLinkBPP == 0) {
1447                                                 *RequiresDSC = true;
1448                                                 LinkDSCEnable = true;
1449                                                 if (Output == dm_dp)
1450                                                         *RequiresFEC = true;
1451                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1452                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1453                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1454                                                                 OutputFormat, DSCInputBitPerComponent,
1455                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1456                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1457                                         }
1458                                         //OutputTypeAndRate = Output & " HBR";
1459                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1460                                         *OutputRate = dm_output_rate_dp_rate_hbr;
1461                                 }
1462                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1463                                                 *OutBpp == 0 && PHYCLKPerState >= 540) {
1464                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1465                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1466                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1467                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1468                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1469
1470                                         if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1471                                                         ForcedOutputLinkBPP == 0) {
1472                                                 *RequiresDSC = true;
1473                                                 LinkDSCEnable = true;
1474                                                 if (Output == dm_dp)
1475                                                         *RequiresFEC = true;
1476
1477                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1478                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1479                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1480                                                                 OutputFormat, DSCInputBitPerComponent,
1481                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1482                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1483                                         }
1484                                         //OutputTypeAndRate = Output & " HBR2";
1485                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1486                                         *OutputRate = dm_output_rate_dp_rate_hbr2;
1487                                 }
1488                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1489                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1490                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1491                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output,
1492                                                         OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1493                                                         AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1494                                                         RequiredSlots);
1495
1496                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1497                                                 *RequiresDSC = true;
1498                                                 LinkDSCEnable = true;
1499                                                 if (Output == dm_dp)
1500                                                         *RequiresFEC = true;
1501
1502                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1503                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1504                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1505                                                                 OutputFormat, DSCInputBitPerComponent,
1506                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1507                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1508                                         }
1509                                         //OutputTypeAndRate = Output & " HBR3";
1510                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1511                                         *OutputRate = dm_output_rate_dp_rate_hbr3;
1512                                 }
1513                         }
1514                 }
1515         }
1516 }
1517
1518 void dml32_CalculateDPPCLK(
1519                 unsigned int NumberOfActiveSurfaces,
1520                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1521                 double DISPCLKDPPCLKVCOSpeed,
1522                 double DPPCLKUsingSingleDPP[],
1523                 unsigned int DPPPerSurface[],
1524
1525                 /* output */
1526                 double *GlobalDPPCLK,
1527                 double Dppclk[])
1528 {
1529         unsigned int k;
1530         *GlobalDPPCLK = 0;
1531         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1532                 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1533                 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1534         }
1535         *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1536         for (k = 0; k < NumberOfActiveSurfaces; ++k)
1537                 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1538 }
1539
1540 double dml32_TruncToValidBPP(
1541                 double LinkBitRate,
1542                 unsigned int Lanes,
1543                 unsigned int HTotal,
1544                 unsigned int HActive,
1545                 double PixelClock,
1546                 double DesiredBPP,
1547                 bool DSCEnable,
1548                 enum output_encoder_class Output,
1549                 enum output_format_class Format,
1550                 unsigned int DSCInputBitPerComponent,
1551                 unsigned int DSCSlices,
1552                 unsigned int AudioRate,
1553                 unsigned int AudioLayout,
1554                 enum odm_combine_mode ODMModeNoDSC,
1555                 enum odm_combine_mode ODMModeDSC,
1556                 /* Output */
1557                 unsigned int *RequiredSlots)
1558 {
1559         double    MaxLinkBPP;
1560         unsigned int   MinDSCBPP;
1561         double    MaxDSCBPP;
1562         unsigned int   NonDSCBPP0;
1563         unsigned int   NonDSCBPP1;
1564         unsigned int   NonDSCBPP2;
1565         unsigned int   NonDSCBPP3;
1566
1567         if (Format == dm_420) {
1568                 NonDSCBPP0 = 12;
1569                 NonDSCBPP1 = 15;
1570                 NonDSCBPP2 = 18;
1571                 MinDSCBPP = 6;
1572                 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1573         } else if (Format == dm_444) {
1574                 NonDSCBPP0 = 18;
1575                 NonDSCBPP1 = 24;
1576                 NonDSCBPP2 = 30;
1577                 NonDSCBPP3 = 36;
1578                 MinDSCBPP = 8;
1579                 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1580         } else {
1581                 if (Output == dm_hdmi) {
1582                         NonDSCBPP0 = 24;
1583                         NonDSCBPP1 = 24;
1584                         NonDSCBPP2 = 24;
1585                 } else {
1586                         NonDSCBPP0 = 16;
1587                         NonDSCBPP1 = 20;
1588                         NonDSCBPP2 = 24;
1589                 }
1590                 if (Format == dm_n422) {
1591                         MinDSCBPP = 7;
1592                         MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1593                 } else {
1594                         MinDSCBPP = 8;
1595                         MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1596                 }
1597         }
1598         if (Output == dm_dp2p0) {
1599                 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1600         } else if (DSCEnable && Output == dm_dp) {
1601                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1602         } else {
1603                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1604         }
1605
1606         if (DSCEnable) {
1607                 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1608                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1609                 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1610                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1611                 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1612                         MaxLinkBPP = 2 * MaxLinkBPP;
1613         } else {
1614                 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1615                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1616                 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1617                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1618                 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1619                         MaxLinkBPP = 2 * MaxLinkBPP;
1620         }
1621
1622         if (DesiredBPP == 0) {
1623                 if (DSCEnable) {
1624                         if (MaxLinkBPP < MinDSCBPP)
1625                                 return BPP_INVALID;
1626                         else if (MaxLinkBPP >= MaxDSCBPP)
1627                                 return MaxDSCBPP;
1628                         else
1629                                 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1630                 } else {
1631                         if (MaxLinkBPP >= NonDSCBPP3)
1632                                 return NonDSCBPP3;
1633                         else if (MaxLinkBPP >= NonDSCBPP2)
1634                                 return NonDSCBPP2;
1635                         else if (MaxLinkBPP >= NonDSCBPP1)
1636                                 return NonDSCBPP1;
1637                         else if (MaxLinkBPP >= NonDSCBPP0)
1638                                 return 16.0;
1639                         else
1640                                 return BPP_INVALID;
1641                 }
1642         } else {
1643                 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1644                                 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1645                                 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1646                         return BPP_INVALID;
1647                 else
1648                         return DesiredBPP;
1649         }
1650
1651         *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1652
1653         return BPP_INVALID;
1654 } // TruncToValidBPP
1655
1656 double dml32_RequiredDTBCLK(
1657                 bool              DSCEnable,
1658                 double               PixelClock,
1659                 enum output_format_class  OutputFormat,
1660                 double               OutputBpp,
1661                 unsigned int              DSCSlices,
1662                 unsigned int                 HTotal,
1663                 unsigned int                 HActive,
1664                 unsigned int              AudioRate,
1665                 unsigned int              AudioLayout)
1666 {
1667         double PixelWordRate;
1668         double HCActive;
1669         double HCBlank;
1670         double AverageTribyteRate;
1671         double HActiveTribyteRate;
1672
1673         if (DSCEnable != true)
1674                 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1675
1676         PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1677         HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1678                         dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1679         HCBlank = 64 + 32 *
1680                         dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1681         AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1682         HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1683         return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1684 }
1685
1686 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1687                 enum odm_combine_mode ODMMode,
1688                 unsigned int DSCInputBitPerComponent,
1689                 double OutputBpp,
1690                 unsigned int HActive,
1691                 unsigned int HTotal,
1692                 unsigned int NumberOfDSCSlices,
1693                 enum output_format_class  OutputFormat,
1694                 enum output_encoder_class Output,
1695                 double PixelClock,
1696                 double PixelClockBackEnd)
1697 {
1698         unsigned int DSCDelayRequirement_val;
1699
1700         if (DSCEnabled == true && OutputBpp != 0) {
1701                 if (ODMMode == dm_odm_combine_mode_4to1) {
1702                         DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1703                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1704                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1705                 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1706                         DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1707                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1708                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1709                 } else {
1710                         DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1711                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1712                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1713                 }
1714
1715                 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1716                                 dml_ceil(DSCDelayRequirement_val / HActive, 1);
1717
1718                 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1719
1720         } else {
1721                 DSCDelayRequirement_val = 0;
1722         }
1723
1724 #ifdef __DML_VBA_DEBUG__
1725         dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1726         dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1727         dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1728         dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1729         dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1730         dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1731         dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1732 #endif
1733
1734         return DSCDelayRequirement_val;
1735 }
1736
1737 void dml32_CalculateSurfaceSizeInMall(
1738                 unsigned int NumberOfActiveSurfaces,
1739                 unsigned int MALLAllocatedForDCN,
1740                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1741                 bool DCCEnable[],
1742                 bool ViewportStationary[],
1743                 unsigned int ViewportXStartY[],
1744                 unsigned int ViewportYStartY[],
1745                 unsigned int ViewportXStartC[],
1746                 unsigned int ViewportYStartC[],
1747                 unsigned int ViewportWidthY[],
1748                 unsigned int ViewportHeightY[],
1749                 unsigned int BytesPerPixelY[],
1750                 unsigned int ViewportWidthC[],
1751                 unsigned int ViewportHeightC[],
1752                 unsigned int BytesPerPixelC[],
1753                 unsigned int SurfaceWidthY[],
1754                 unsigned int SurfaceWidthC[],
1755                 unsigned int SurfaceHeightY[],
1756                 unsigned int SurfaceHeightC[],
1757                 unsigned int Read256BytesBlockWidthY[],
1758                 unsigned int Read256BytesBlockWidthC[],
1759                 unsigned int Read256BytesBlockHeightY[],
1760                 unsigned int Read256BytesBlockHeightC[],
1761                 unsigned int ReadBlockWidthY[],
1762                 unsigned int ReadBlockWidthC[],
1763                 unsigned int ReadBlockHeightY[],
1764                 unsigned int ReadBlockHeightC[],
1765
1766                 /* Output */
1767                 unsigned int    SurfaceSizeInMALL[],
1768                 bool *ExceededMALLSize)
1769 {
1770         unsigned int TotalSurfaceSizeInMALL  = 0;
1771         unsigned int k;
1772
1773         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1774                 if (ViewportStationary[k]) {
1775                         SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1776                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1777                                                 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1778                                                 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1779                                                 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1780                                                 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1781                                                 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1782
1783                         if (ReadBlockWidthC[k] > 0) {
1784                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1785                                                 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1786                                                         dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1787                                                         ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1788                                                         dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1789                                                         dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1790                                                         dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1791                                                         ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1792                                                         dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1793                                                         BytesPerPixelC[k];
1794                         }
1795                         if (DCCEnable[k] == true) {
1796                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1797                                                 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1798                                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1799                                                         Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1800                                                         - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1801                                                         * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1802                                                         Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1803                                                         ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1804                                                         Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1805                                                         * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1806                                 if (Read256BytesBlockWidthC[k] > 0) {
1807                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1808                                                         dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1809                                                                 Read256BytesBlockWidthC[k]),
1810                                                                 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1811                                                                 * Read256BytesBlockWidthC[k] - 1, 8 *
1812                                                                 Read256BytesBlockWidthC[k]) -
1813                                                                 dml_floor(ViewportXStartC[k], 8 *
1814                                                                 Read256BytesBlockWidthC[k])) *
1815                                                                 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1816                                                                 Read256BytesBlockHeightC[k]),
1817                                                                 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1818                                                                 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1819                                                                 Read256BytesBlockHeightC[k]) -
1820                                                                 dml_floor(ViewportYStartC[k], 8 *
1821                                                                 Read256BytesBlockHeightC[k])) *
1822                                                                 BytesPerPixelC[k] / 256;
1823                                 }
1824                         }
1825                 } else {
1826                         SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1827                                         ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1828                                         dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1829                                                         ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1830                                                         BytesPerPixelY[k];
1831                         if (ReadBlockWidthC[k] > 0) {
1832                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1833                                                 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1834                                                                 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1835                                                 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1836                                                                 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1837                                                                 BytesPerPixelC[k];
1838                         }
1839                         if (DCCEnable[k] == true) {
1840                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1841                                                 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1842                                                                 Read256BytesBlockWidthY[k] - 1), 8 *
1843                                                                 Read256BytesBlockWidthY[k]) *
1844                                                 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1845                                                                 Read256BytesBlockHeightY[k] - 1), 8 *
1846                                                                 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1847
1848                                 if (Read256BytesBlockWidthC[k] > 0) {
1849                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1850                                                         dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1851                                                                         Read256BytesBlockWidthC[k] - 1), 8 *
1852                                                                         Read256BytesBlockWidthC[k]) *
1853                                                         dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1854                                                                         Read256BytesBlockHeightC[k] - 1), 8 *
1855                                                                         Read256BytesBlockHeightC[k]) *
1856                                                                         BytesPerPixelC[k] / 256;
1857                                 }
1858                         }
1859                 }
1860         }
1861
1862         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1863                 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1864                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1865         }
1866         *ExceededMALLSize =  (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
1867 } // CalculateSurfaceSizeInMall
1868
1869 void dml32_CalculateVMRowAndSwath(
1870                 struct dml32_CalculateVMRowAndSwath *st_vars,
1871                 unsigned int NumberOfActiveSurfaces,
1872                 DmlPipe myPipe[],
1873                 unsigned int SurfaceSizeInMALL[],
1874                 unsigned int PTEBufferSizeInRequestsLuma,
1875                 unsigned int PTEBufferSizeInRequestsChroma,
1876                 unsigned int DCCMetaBufferSizeBytes,
1877                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1878                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1879                 unsigned int MALLAllocatedForDCN,
1880                 double SwathWidthY[],
1881                 double SwathWidthC[],
1882                 bool GPUVMEnable,
1883                 bool HostVMEnable,
1884                 unsigned int HostVMMaxNonCachedPageTableLevels,
1885                 unsigned int GPUVMMaxPageTableLevels,
1886                 unsigned int GPUVMMinPageSizeKBytes[],
1887                 unsigned int HostVMMinPageSize,
1888
1889                 /* Output */
1890                 bool PTEBufferSizeNotExceeded[],
1891                 bool DCCMetaBufferSizeNotExceeded[],
1892                 unsigned int dpte_row_width_luma_ub[],
1893                 unsigned int dpte_row_width_chroma_ub[],
1894                 unsigned int dpte_row_height_luma[],
1895                 unsigned int dpte_row_height_chroma[],
1896                 unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1897                 unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1898                 unsigned int meta_req_width[],
1899                 unsigned int meta_req_width_chroma[],
1900                 unsigned int meta_req_height[],
1901                 unsigned int meta_req_height_chroma[],
1902                 unsigned int meta_row_width[],
1903                 unsigned int meta_row_width_chroma[],
1904                 unsigned int meta_row_height[],
1905                 unsigned int meta_row_height_chroma[],
1906                 unsigned int vm_group_bytes[],
1907                 unsigned int dpte_group_bytes[],
1908                 unsigned int PixelPTEReqWidthY[],
1909                 unsigned int PixelPTEReqHeightY[],
1910                 unsigned int PTERequestSizeY[],
1911                 unsigned int PixelPTEReqWidthC[],
1912                 unsigned int PixelPTEReqHeightC[],
1913                 unsigned int PTERequestSizeC[],
1914                 unsigned int dpde0_bytes_per_frame_ub_l[],
1915                 unsigned int meta_pte_bytes_per_frame_ub_l[],
1916                 unsigned int dpde0_bytes_per_frame_ub_c[],
1917                 unsigned int meta_pte_bytes_per_frame_ub_c[],
1918                 double PrefetchSourceLinesY[],
1919                 double PrefetchSourceLinesC[],
1920                 double VInitPreFillY[],
1921                 double VInitPreFillC[],
1922                 unsigned int MaxNumSwathY[],
1923                 unsigned int MaxNumSwathC[],
1924                 double meta_row_bw[],
1925                 double dpte_row_bw[],
1926                 double PixelPTEBytesPerRow[],
1927                 double PDEAndMetaPTEBytesFrame[],
1928                 double MetaRowByte[],
1929                 bool use_one_row_for_frame[],
1930                 bool use_one_row_for_frame_flip[],
1931                 bool UsesMALLForStaticScreen[],
1932                 bool PTE_BUFFER_MODE[],
1933                 unsigned int BIGK_FRAGMENT_SIZE[])
1934 {
1935         unsigned int k;
1936
1937         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1938                 if (HostVMEnable == true) {
1939                         vm_group_bytes[k] = 512;
1940                         dpte_group_bytes[k] = 512;
1941                 } else if (GPUVMEnable == true) {
1942                         vm_group_bytes[k] = 2048;
1943                         if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1944                                 dpte_group_bytes[k] = 512;
1945                         else
1946                                 dpte_group_bytes[k] = 2048;
1947                 } else {
1948                         vm_group_bytes[k] = 0;
1949                         dpte_group_bytes[k] = 0;
1950                 }
1951
1952                 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
1953                                 myPipe[k].SourcePixelFormat == dm_420_12 ||
1954                                 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
1955                         if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
1956                                         !IsVertical(myPipe[k].SourceRotation)) {
1957                                 st_vars->PTEBufferSizeInRequestsForLuma[k] =
1958                                                 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
1959                                 st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k];
1960                         } else {
1961                                 st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
1962                                 st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
1963                         }
1964
1965                         st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
1966                                         myPipe[k].ViewportStationary,
1967                                         myPipe[k].DCCEnable,
1968                                         myPipe[k].DPPPerSurface,
1969                                         myPipe[k].BlockHeight256BytesC,
1970                                         myPipe[k].BlockWidth256BytesC,
1971                                         myPipe[k].SourcePixelFormat,
1972                                         myPipe[k].SurfaceTiling,
1973                                         myPipe[k].BytePerPixelC,
1974                                         myPipe[k].SourceRotation,
1975                                         SwathWidthC[k],
1976                                         myPipe[k].ViewportHeightChroma,
1977                                         myPipe[k].ViewportXStartC,
1978                                         myPipe[k].ViewportYStartC,
1979                                         GPUVMEnable,
1980                                         HostVMEnable,
1981                                         HostVMMaxNonCachedPageTableLevels,
1982                                         GPUVMMaxPageTableLevels,
1983                                         GPUVMMinPageSizeKBytes[k],
1984                                         HostVMMinPageSize,
1985                                         st_vars->PTEBufferSizeInRequestsForChroma[k],
1986                                         myPipe[k].PitchC,
1987                                         myPipe[k].DCCMetaPitchC,
1988                                         myPipe[k].BlockWidthC,
1989                                         myPipe[k].BlockHeightC,
1990
1991                                         /* Output */
1992                                         &st_vars->MetaRowByteC[k],
1993                                         &st_vars->PixelPTEBytesPerRowC[k],
1994                                         &dpte_row_width_chroma_ub[k],
1995                                         &dpte_row_height_chroma[k],
1996                                         &dpte_row_height_linear_chroma[k],
1997                                         &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k],
1998                                         &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k],
1999                                         &st_vars->dpte_row_height_chroma_one_row_per_frame[k],
2000                                         &meta_req_width_chroma[k],
2001                                         &meta_req_height_chroma[k],
2002                                         &meta_row_width_chroma[k],
2003                                         &meta_row_height_chroma[k],
2004                                         &PixelPTEReqWidthC[k],
2005                                         &PixelPTEReqHeightC[k],
2006                                         &PTERequestSizeC[k],
2007                                         &dpde0_bytes_per_frame_ub_c[k],
2008                                         &meta_pte_bytes_per_frame_ub_c[k]);
2009
2010                         PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2011                                         myPipe[k].VRatioChroma,
2012                                         myPipe[k].VTapsChroma,
2013                                         myPipe[k].InterlaceEnable,
2014                                         myPipe[k].ProgressiveToInterlaceUnitInOPP,
2015                                         myPipe[k].SwathHeightC,
2016                                         myPipe[k].SourceRotation,
2017                                         myPipe[k].ViewportStationary,
2018                                         SwathWidthC[k],
2019                                         myPipe[k].ViewportHeightChroma,
2020                                         myPipe[k].ViewportXStartC,
2021                                         myPipe[k].ViewportYStartC,
2022
2023                                         /* Output */
2024                                         &VInitPreFillC[k],
2025                                         &MaxNumSwathC[k]);
2026                 } else {
2027                         st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2028                         st_vars->PTEBufferSizeInRequestsForChroma[k] = 0;
2029                         st_vars->PixelPTEBytesPerRowC[k] = 0;
2030                         st_vars->PDEAndMetaPTEBytesFrameC = 0;
2031                         st_vars->MetaRowByteC[k] = 0;
2032                         MaxNumSwathC[k] = 0;
2033                         PrefetchSourceLinesC[k] = 0;
2034                         st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0;
2035                         st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2036                         st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2037                 }
2038
2039                 st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2040                                 myPipe[k].ViewportStationary,
2041                                 myPipe[k].DCCEnable,
2042                                 myPipe[k].DPPPerSurface,
2043                                 myPipe[k].BlockHeight256BytesY,
2044                                 myPipe[k].BlockWidth256BytesY,
2045                                 myPipe[k].SourcePixelFormat,
2046                                 myPipe[k].SurfaceTiling,
2047                                 myPipe[k].BytePerPixelY,
2048                                 myPipe[k].SourceRotation,
2049                                 SwathWidthY[k],
2050                                 myPipe[k].ViewportHeight,
2051                                 myPipe[k].ViewportXStart,
2052                                 myPipe[k].ViewportYStart,
2053                                 GPUVMEnable,
2054                                 HostVMEnable,
2055                                 HostVMMaxNonCachedPageTableLevels,
2056                                 GPUVMMaxPageTableLevels,
2057                                 GPUVMMinPageSizeKBytes[k],
2058                                 HostVMMinPageSize,
2059                                 st_vars->PTEBufferSizeInRequestsForLuma[k],
2060                                 myPipe[k].PitchY,
2061                                 myPipe[k].DCCMetaPitchY,
2062                                 myPipe[k].BlockWidthY,
2063                                 myPipe[k].BlockHeightY,
2064
2065                                 /* Output */
2066                                 &st_vars->MetaRowByteY[k],
2067                                 &st_vars->PixelPTEBytesPerRowY[k],
2068                                 &dpte_row_width_luma_ub[k],
2069                                 &dpte_row_height_luma[k],
2070                                 &dpte_row_height_linear_luma[k],
2071                                 &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k],
2072                                 &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k],
2073                                 &st_vars->dpte_row_height_luma_one_row_per_frame[k],
2074                                 &meta_req_width[k],
2075                                 &meta_req_height[k],
2076                                 &meta_row_width[k],
2077                                 &meta_row_height[k],
2078                                 &PixelPTEReqWidthY[k],
2079                                 &PixelPTEReqHeightY[k],
2080                                 &PTERequestSizeY[k],
2081                                 &dpde0_bytes_per_frame_ub_l[k],
2082                                 &meta_pte_bytes_per_frame_ub_l[k]);
2083
2084                 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2085                                 myPipe[k].VRatio,
2086                                 myPipe[k].VTaps,
2087                                 myPipe[k].InterlaceEnable,
2088                                 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2089                                 myPipe[k].SwathHeightY,
2090                                 myPipe[k].SourceRotation,
2091                                 myPipe[k].ViewportStationary,
2092                                 SwathWidthY[k],
2093                                 myPipe[k].ViewportHeight,
2094                                 myPipe[k].ViewportXStart,
2095                                 myPipe[k].ViewportYStart,
2096
2097                                 /* Output */
2098                                 &VInitPreFillY[k],
2099                                 &MaxNumSwathY[k]);
2100
2101                 PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC;
2102                 MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k];
2103
2104                 if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] &&
2105                                 st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) {
2106                         PTEBufferSizeNotExceeded[k] = true;
2107                 } else {
2108                         PTEBufferSizeNotExceeded[k] = false;
2109                 }
2110
2111                 st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2112                         st_vars->PTEBufferSizeInRequestsForLuma[k] &&
2113                         st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]);
2114         }
2115
2116         dml32_CalculateMALLUseForStaticScreen(
2117                         NumberOfActiveSurfaces,
2118                         MALLAllocatedForDCN,
2119                         UseMALLForStaticScreen,   // mode
2120                         SurfaceSizeInMALL,
2121                         st_vars->one_row_per_frame_fits_in_buffer,
2122                         /* Output */
2123                         UsesMALLForStaticScreen); // boolen
2124
2125         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2126                 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2127                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2128                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2129                                 (GPUVMMinPageSizeKBytes[k] > 64);
2130                 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2131         }
2132
2133         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2134 #ifdef __DML_VBA_DEBUG__
2135                 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2136                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2137 #endif
2138                 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2139                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2140                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2141                                 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2142
2143                 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2144                                 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2145
2146                 if (use_one_row_for_frame[k]) {
2147                         dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k];
2148                         dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k];
2149                         st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k];
2150                         dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k];
2151                         dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k];
2152                         st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k];
2153                         PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k];
2154                 }
2155
2156                 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2157                         DCCMetaBufferSizeNotExceeded[k] = true;
2158                 else
2159                         DCCMetaBufferSizeNotExceeded[k] = false;
2160
2161                 PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k];
2162                 if (use_one_row_for_frame[k])
2163                         PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2164
2165                 dml32_CalculateRowBandwidth(
2166                                 GPUVMEnable,
2167                                 myPipe[k].SourcePixelFormat,
2168                                 myPipe[k].VRatio,
2169                                 myPipe[k].VRatioChroma,
2170                                 myPipe[k].DCCEnable,
2171                                 myPipe[k].HTotal / myPipe[k].PixelClock,
2172                                 st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k],
2173                                 meta_row_height[k],
2174                                 meta_row_height_chroma[k],
2175                                 st_vars->PixelPTEBytesPerRowY[k],
2176                                 st_vars->PixelPTEBytesPerRowC[k],
2177                                 dpte_row_height_luma[k],
2178                                 dpte_row_height_chroma[k],
2179
2180                                 /* Output */
2181                                 &meta_row_bw[k],
2182                                 &dpte_row_bw[k]);
2183 #ifdef __DML_VBA_DEBUG__
2184                 dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2185                 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2186                                 __func__, k, use_one_row_for_frame_flip[k]);
2187                 dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2188                                 __func__, k, UseMALLForPStateChange[k]);
2189                 dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2190                 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2191                                 __func__, k, dpte_row_width_luma_ub[k]);
2192                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, st_vars->PixelPTEBytesPerRowY[k]);
2193                 dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2194                                 __func__, k, dpte_row_height_chroma[k]);
2195                 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2196                                 __func__, k, dpte_row_width_chroma_ub[k]);
2197                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, st_vars->PixelPTEBytesPerRowC[k]);
2198                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2199                 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2200                                 __func__, k, PTEBufferSizeNotExceeded[k]);
2201                 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2202                 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2203 #endif
2204         }
2205 } // CalculateVMRowAndSwath
2206
2207 unsigned int dml32_CalculateVMAndRowBytes(
2208                 bool ViewportStationary,
2209                 bool DCCEnable,
2210                 unsigned int NumberOfDPPs,
2211                 unsigned int BlockHeight256Bytes,
2212                 unsigned int BlockWidth256Bytes,
2213                 enum source_format_class SourcePixelFormat,
2214                 unsigned int SurfaceTiling,
2215                 unsigned int BytePerPixel,
2216                 enum dm_rotation_angle SourceRotation,
2217                 double SwathWidth,
2218                 unsigned int ViewportHeight,
2219                 unsigned int    ViewportXStart,
2220                 unsigned int    ViewportYStart,
2221                 bool GPUVMEnable,
2222                 bool HostVMEnable,
2223                 unsigned int HostVMMaxNonCachedPageTableLevels,
2224                 unsigned int GPUVMMaxPageTableLevels,
2225                 unsigned int GPUVMMinPageSizeKBytes,
2226                 unsigned int HostVMMinPageSize,
2227                 unsigned int PTEBufferSizeInRequests,
2228                 unsigned int Pitch,
2229                 unsigned int DCCMetaPitch,
2230                 unsigned int MacroTileWidth,
2231                 unsigned int MacroTileHeight,
2232
2233                 /* Output */
2234                 unsigned int *MetaRowByte,
2235                 unsigned int *PixelPTEBytesPerRow,
2236                 unsigned int    *dpte_row_width_ub,
2237                 unsigned int *dpte_row_height,
2238                 unsigned int *dpte_row_height_linear,
2239                 unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2240                 unsigned int    *dpte_row_width_ub_one_row_per_frame,
2241                 unsigned int    *dpte_row_height_one_row_per_frame,
2242                 unsigned int *MetaRequestWidth,
2243                 unsigned int *MetaRequestHeight,
2244                 unsigned int *meta_row_width,
2245                 unsigned int *meta_row_height,
2246                 unsigned int *PixelPTEReqWidth,
2247                 unsigned int *PixelPTEReqHeight,
2248                 unsigned int *PTERequestSize,
2249                 unsigned int    *DPDE0BytesFrame,
2250                 unsigned int    *MetaPTEBytesFrame)
2251 {
2252         unsigned int MPDEBytesFrame;
2253         unsigned int DCCMetaSurfaceBytes;
2254         unsigned int ExtraDPDEBytesFrame;
2255         unsigned int PDEAndMetaPTEBytesFrame;
2256         unsigned int HostVMDynamicLevels = 0;
2257         unsigned int    MacroTileSizeBytes;
2258         unsigned int    vp_height_meta_ub;
2259         unsigned int    vp_height_dpte_ub;
2260         unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2261
2262         if (GPUVMEnable == true && HostVMEnable == true) {
2263                 if (HostVMMinPageSize < 2048)
2264                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2265                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2266                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2267                 else
2268                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2269         }
2270
2271         *MetaRequestHeight = 8 * BlockHeight256Bytes;
2272         *MetaRequestWidth = 8 * BlockWidth256Bytes;
2273         if (SurfaceTiling == dm_sw_linear) {
2274                 *meta_row_height = 32;
2275                 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2276                                 - dml_floor(ViewportXStart, *MetaRequestWidth);
2277         } else if (!IsVertical(SourceRotation)) {
2278                 *meta_row_height = *MetaRequestHeight;
2279                 if (ViewportStationary && NumberOfDPPs == 1) {
2280                         *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2281                                         *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2282                 } else {
2283                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2284                 }
2285                 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2286         } else {
2287                 *meta_row_height = *MetaRequestWidth;
2288                 if (ViewportStationary && NumberOfDPPs == 1) {
2289                         *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2290                                         *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2291                 } else {
2292                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2293                 }
2294                 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2295         }
2296
2297         if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2298                 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2299                                 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2300         } else if (!IsVertical(SourceRotation)) {
2301                 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2302         } else {
2303                 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2304         }
2305
2306         DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2307
2308         if (GPUVMEnable == true) {
2309                 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2310                                 (8 * 4.0 * 1024), 1) + 1) * 64;
2311                 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2312         } else {
2313                 *MetaPTEBytesFrame = 0;
2314                 MPDEBytesFrame = 0;
2315         }
2316
2317         if (DCCEnable != true) {
2318                 *MetaPTEBytesFrame = 0;
2319                 MPDEBytesFrame = 0;
2320                 *MetaRowByte = 0;
2321         }
2322
2323         MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2324
2325         if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2326                 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2327                         vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2328                                         MacroTileHeight - 1, MacroTileHeight) -
2329                                         dml_floor(ViewportYStart, MacroTileHeight);
2330                 } else if (!IsVertical(SourceRotation)) {
2331                         vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2332                 } else {
2333                         vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2334                 }
2335                 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2336                                 (8 * 2097152), 1) + 1);
2337                 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2338         } else {
2339                 *DPDE0BytesFrame = 0;
2340                 ExtraDPDEBytesFrame = 0;
2341                 vp_height_dpte_ub = 0;
2342         }
2343
2344         PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2345
2346 #ifdef __DML_VBA_DEBUG__
2347         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2348         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2349         dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2350         dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2351         dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2352         dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2353         dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2354         dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2355         dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2356         dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2357         dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2358         dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2359         dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2360         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2361         dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2362         dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2363         dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2364 #endif
2365
2366         if (HostVMEnable == true)
2367                 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2368
2369         if (SurfaceTiling == dm_sw_linear) {
2370                 *PixelPTEReqHeight = 1;
2371                 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2372                 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2373                 *PTERequestSize = 64;
2374         } else if (GPUVMMinPageSizeKBytes == 4) {
2375                 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2376                 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2377                 *PTERequestSize = 128;
2378         } else {
2379                 *PixelPTEReqHeight = MacroTileHeight;
2380                 *PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2381                 *PTERequestSize = 64;
2382         }
2383 #ifdef __DML_VBA_DEBUG__
2384         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2385         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2386         dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2387         dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2388         dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2389         dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2390         dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2391 #endif
2392
2393         *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2394         *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2395                         (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2396                                         (double) *PixelPTEReqWidth;
2397         *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2398                         *PTERequestSize;
2399
2400         if (SurfaceTiling == dm_sw_linear) {
2401                 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2402                                 *PixelPTEReqWidth / Pitch), 1));
2403 #ifdef __DML_VBA_DEBUG__
2404                 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2405                                 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2406                 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2407                                 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2408                 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2409                                 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2410                 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2411                                 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2412                                                 *PixelPTEReqWidth / Pitch), 1));
2413                 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2414 #endif
2415                 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2416                                 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2417                 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2418
2419                 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2420                 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2421                                 PixelPTEReqWidth_linear / Pitch), 1);
2422                 if (*dpte_row_height_linear > 128)
2423                         *dpte_row_height_linear = 128;
2424
2425         } else if (!IsVertical(SourceRotation)) {
2426                 *dpte_row_height = *PixelPTEReqHeight;
2427
2428                 if (GPUVMMinPageSizeKBytes > 64) {
2429                         *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2430                                         *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2431                 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2432                         *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2433                                         *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2434                                         dml_floor(ViewportXStart, *PixelPTEReqWidth);
2435                 } else {
2436                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2437                                         *PixelPTEReqWidth;
2438                 }
2439
2440                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2441         } else {
2442                 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2443
2444                 if (ViewportStationary && (NumberOfDPPs == 1)) {
2445                         *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2446                                         *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2447                 } else {
2448                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2449                                         * *PixelPTEReqHeight;
2450                 }
2451
2452                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2453         }
2454
2455         if (GPUVMEnable != true)
2456                 *PixelPTEBytesPerRow = 0;
2457         if (HostVMEnable == true)
2458                 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2459
2460 #ifdef __DML_VBA_DEBUG__
2461         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2462         dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2463         dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2464         dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2465         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2466         dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2467         dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2468         dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2469                         __func__, *dpte_row_width_ub_one_row_per_frame);
2470         dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2471                         __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2472         dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2473                         *MetaPTEBytesFrame);
2474 #endif
2475
2476         return PDEAndMetaPTEBytesFrame;
2477 } // CalculateVMAndRowBytes
2478
2479 double dml32_CalculatePrefetchSourceLines(
2480                 double VRatio,
2481                 unsigned int VTaps,
2482                 bool Interlace,
2483                 bool ProgressiveToInterlaceUnitInOPP,
2484                 unsigned int SwathHeight,
2485                 enum dm_rotation_angle SourceRotation,
2486                 bool ViewportStationary,
2487                 double SwathWidth,
2488                 unsigned int ViewportHeight,
2489                 unsigned int ViewportXStart,
2490                 unsigned int ViewportYStart,
2491
2492                 /* Output */
2493                 double *VInitPreFill,
2494                 unsigned int *MaxNumSwath)
2495 {
2496
2497         unsigned int vp_start_rot;
2498         unsigned int sw0_tmp;
2499         unsigned int MaxPartialSwath;
2500         double numLines;
2501
2502 #ifdef __DML_VBA_DEBUG__
2503         dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2504         dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2505         dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2506         dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2507         dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2508         dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2509 #endif
2510         if (ProgressiveToInterlaceUnitInOPP)
2511                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2512         else
2513                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2514
2515         if (ViewportStationary) {
2516                 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2517                         vp_start_rot = SwathHeight -
2518                                         (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2519                 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2520                         vp_start_rot = ViewportXStart;
2521                 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2522                         vp_start_rot = SwathHeight -
2523                                         (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2524                 } else {
2525                         vp_start_rot = ViewportYStart;
2526                 }
2527                 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2528                 if (sw0_tmp < *VInitPreFill)
2529                         *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2530                 else
2531                         *MaxNumSwath = 1;
2532                 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2533         } else {
2534                 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2535                 if (*VInitPreFill > 1)
2536                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2537                 else
2538                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2539         }
2540         numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2541
2542 #ifdef __DML_VBA_DEBUG__
2543         dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2544         dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2545         dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2546         dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2547         dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2548 #endif
2549         return numLines;
2550
2551 } // CalculatePrefetchSourceLines
2552
2553 void dml32_CalculateMALLUseForStaticScreen(
2554                 unsigned int NumberOfActiveSurfaces,
2555                 unsigned int MALLAllocatedForDCNFinal,
2556                 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2557                 unsigned int SurfaceSizeInMALL[],
2558                 bool one_row_per_frame_fits_in_buffer[],
2559
2560                 /* output */
2561                 bool UsesMALLForStaticScreen[])
2562 {
2563         unsigned int k;
2564         unsigned int SurfaceToAddToMALL;
2565         bool CanAddAnotherSurfaceToMALL;
2566         unsigned int TotalSurfaceSizeInMALL;
2567
2568         TotalSurfaceSizeInMALL = 0;
2569         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2570                 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2571                 if (UsesMALLForStaticScreen[k])
2572                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2573 #ifdef __DML_VBA_DEBUG__
2574                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2575                 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2576 #endif
2577         }
2578
2579         SurfaceToAddToMALL = 0;
2580         CanAddAnotherSurfaceToMALL = true;
2581         while (CanAddAnotherSurfaceToMALL) {
2582                 CanAddAnotherSurfaceToMALL = false;
2583                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2584                         if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2585                                         !UsesMALLForStaticScreen[k] &&
2586                                         UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2587                                         one_row_per_frame_fits_in_buffer[k] &&
2588                                         (!CanAddAnotherSurfaceToMALL ||
2589                                         SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2590                                 CanAddAnotherSurfaceToMALL = true;
2591                                 SurfaceToAddToMALL = k;
2592 #ifdef __DML_VBA_DEBUG__
2593                                 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2594                                                 __func__, k, UseMALLForStaticScreen[k]);
2595 #endif
2596                         }
2597                 }
2598                 if (CanAddAnotherSurfaceToMALL) {
2599                         UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2600                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2601
2602 #ifdef __DML_VBA_DEBUG__
2603                         dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2604                         dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2605 #endif
2606
2607                 }
2608         }
2609 }
2610
2611 void dml32_CalculateRowBandwidth(
2612                 bool GPUVMEnable,
2613                 enum source_format_class SourcePixelFormat,
2614                 double VRatio,
2615                 double VRatioChroma,
2616                 bool DCCEnable,
2617                 double LineTime,
2618                 unsigned int MetaRowByteLuma,
2619                 unsigned int MetaRowByteChroma,
2620                 unsigned int meta_row_height_luma,
2621                 unsigned int meta_row_height_chroma,
2622                 unsigned int PixelPTEBytesPerRowLuma,
2623                 unsigned int PixelPTEBytesPerRowChroma,
2624                 unsigned int dpte_row_height_luma,
2625                 unsigned int dpte_row_height_chroma,
2626                 /* Output */
2627                 double *meta_row_bw,
2628                 double *dpte_row_bw)
2629 {
2630         if (DCCEnable != true) {
2631                 *meta_row_bw = 0;
2632         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2633                         SourcePixelFormat == dm_rgbe_alpha) {
2634                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2635                                 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2636         } else {
2637                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2638         }
2639
2640         if (GPUVMEnable != true) {
2641                 *dpte_row_bw = 0;
2642         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2643                         SourcePixelFormat == dm_rgbe_alpha) {
2644                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2645                                 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2646         } else {
2647                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2648         }
2649 }
2650
2651 double dml32_CalculateUrgentLatency(
2652                 double UrgentLatencyPixelDataOnly,
2653                 double UrgentLatencyPixelMixedWithVMData,
2654                 double UrgentLatencyVMDataOnly,
2655                 bool   DoUrgentLatencyAdjustment,
2656                 double UrgentLatencyAdjustmentFabricClockComponent,
2657                 double UrgentLatencyAdjustmentFabricClockReference,
2658                 double FabricClock)
2659 {
2660         double   ret;
2661
2662         ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2663         if (DoUrgentLatencyAdjustment == true) {
2664                 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2665                                 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2666         }
2667         return ret;
2668 }
2669
2670 void dml32_CalculateUrgentBurstFactor(
2671                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2672                 unsigned int    swath_width_luma_ub,
2673                 unsigned int    swath_width_chroma_ub,
2674                 unsigned int SwathHeightY,
2675                 unsigned int SwathHeightC,
2676                 double  LineTime,
2677                 double  UrgentLatency,
2678                 double  CursorBufferSize,
2679                 unsigned int CursorWidth,
2680                 unsigned int CursorBPP,
2681                 double  VRatio,
2682                 double  VRatioC,
2683                 double  BytePerPixelInDETY,
2684                 double  BytePerPixelInDETC,
2685                 unsigned int    DETBufferSizeY,
2686                 unsigned int    DETBufferSizeC,
2687                 /* Output */
2688                 double *UrgentBurstFactorCursor,
2689                 double *UrgentBurstFactorLuma,
2690                 double *UrgentBurstFactorChroma,
2691                 bool   *NotEnoughUrgentLatencyHiding)
2692 {
2693         double       LinesInDETLuma;
2694         double       LinesInDETChroma;
2695         unsigned int LinesInCursorBuffer;
2696         double       CursorBufferSizeInTime;
2697         double       DETBufferSizeInTimeLuma;
2698         double       DETBufferSizeInTimeChroma;
2699
2700         *NotEnoughUrgentLatencyHiding = 0;
2701
2702         if (CursorWidth > 0) {
2703                 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2704                                 (CursorWidth * CursorBPP / 8.0)), 1.0);
2705                 if (VRatio > 0) {
2706                         CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2707                         if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2708                                 *NotEnoughUrgentLatencyHiding = 1;
2709                                 *UrgentBurstFactorCursor = 0;
2710                         } else {
2711                                 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2712                                                 (CursorBufferSizeInTime - UrgentLatency);
2713                         }
2714                 } else {
2715                         *UrgentBurstFactorCursor = 1;
2716                 }
2717         }
2718
2719         LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2720                         DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2721
2722         if (VRatio > 0) {
2723                 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2724                 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2725                         *NotEnoughUrgentLatencyHiding = 1;
2726                         *UrgentBurstFactorLuma = 0;
2727                 } else {
2728                         *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2729                 }
2730         } else {
2731                 *UrgentBurstFactorLuma = 1;
2732         }
2733
2734         if (BytePerPixelInDETC > 0) {
2735                 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2736                                         1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2737                                         / swath_width_chroma_ub;
2738
2739                 if (VRatio > 0) {
2740                         DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2741                         if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2742                                 *NotEnoughUrgentLatencyHiding = 1;
2743                                 *UrgentBurstFactorChroma = 0;
2744                         } else {
2745                                 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2746                                                 / (DETBufferSizeInTimeChroma - UrgentLatency);
2747                         }
2748                 } else {
2749                         *UrgentBurstFactorChroma = 1;
2750                 }
2751         }
2752 } // CalculateUrgentBurstFactor
2753
2754 void dml32_CalculateDCFCLKDeepSleep(
2755                 unsigned int NumberOfActiveSurfaces,
2756                 unsigned int BytePerPixelY[],
2757                 unsigned int BytePerPixelC[],
2758                 double VRatio[],
2759                 double VRatioChroma[],
2760                 double SwathWidthY[],
2761                 double SwathWidthC[],
2762                 unsigned int DPPPerSurface[],
2763                 double HRatio[],
2764                 double HRatioChroma[],
2765                 double PixelClock[],
2766                 double PSCL_THROUGHPUT[],
2767                 double PSCL_THROUGHPUT_CHROMA[],
2768                 double Dppclk[],
2769                 double ReadBandwidthLuma[],
2770                 double ReadBandwidthChroma[],
2771                 unsigned int ReturnBusWidth,
2772
2773                 /* Output */
2774                 double *DCFClkDeepSleep)
2775 {
2776         unsigned int k;
2777         double   DisplayPipeLineDeliveryTimeLuma;
2778         double   DisplayPipeLineDeliveryTimeChroma;
2779         double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2780         double ReadBandwidth = 0.0;
2781
2782         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2783
2784                 if (VRatio[k] <= 1) {
2785                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2786                                         / PixelClock[k];
2787                 } else {
2788                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2789                 }
2790                 if (BytePerPixelC[k] == 0) {
2791                         DisplayPipeLineDeliveryTimeChroma = 0;
2792                 } else {
2793                         if (VRatioChroma[k] <= 1) {
2794                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2795                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2796                         } else {
2797                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2798                                                 / Dppclk[k];
2799                         }
2800                 }
2801
2802                 if (BytePerPixelC[k] > 0) {
2803                         DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2804                                         BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2805                                         __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2806                                         32.0 / DisplayPipeLineDeliveryTimeChroma);
2807                 } else {
2808                         DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2809                                         64.0 / DisplayPipeLineDeliveryTimeLuma;
2810                 }
2811                 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2812
2813 #ifdef __DML_VBA_DEBUG__
2814                 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2815                 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2816 #endif
2817         }
2818
2819         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2820                 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2821
2822         *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2823
2824 #ifdef __DML_VBA_DEBUG__
2825         dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2826         dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2827         dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2828         dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2829 #endif
2830
2831         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2832                 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2833 #ifdef __DML_VBA_DEBUG__
2834         dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2835 #endif
2836 } // CalculateDCFCLKDeepSleep
2837
2838 double dml32_CalculateWriteBackDelay(
2839                 enum source_format_class WritebackPixelFormat,
2840                 double WritebackHRatio,
2841                 double WritebackVRatio,
2842                 unsigned int WritebackVTaps,
2843                 unsigned int         WritebackDestinationWidth,
2844                 unsigned int         WritebackDestinationHeight,
2845                 unsigned int         WritebackSourceHeight,
2846                 unsigned int HTotal)
2847 {
2848         double CalculateWriteBackDelay;
2849         double Line_length;
2850         double Output_lines_last_notclamped;
2851         double WritebackVInit;
2852
2853         WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2854         Line_length = dml_max((double) WritebackDestinationWidth,
2855                         dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2856         Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2857                         dml_ceil(((double)WritebackSourceHeight -
2858                                         (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2859         if (Output_lines_last_notclamped < 0) {
2860                 CalculateWriteBackDelay = 0;
2861         } else {
2862                 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2863                                 (HTotal - WritebackDestinationWidth) + 80;
2864         }
2865         return CalculateWriteBackDelay;
2866 }
2867
2868 void dml32_UseMinimumDCFCLK(
2869                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2870                 bool DRRDisplay[],
2871                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2872                 unsigned int MaxInterDCNTileRepeaters,
2873                 unsigned int MaxPrefetchMode,
2874                 double DRAMClockChangeLatencyFinal,
2875                 double FCLKChangeLatency,
2876                 double SREnterPlusExitTime,
2877                 unsigned int ReturnBusWidth,
2878                 unsigned int RoundTripPingLatencyCycles,
2879                 unsigned int ReorderingBytes,
2880                 unsigned int PixelChunkSizeInKByte,
2881                 unsigned int MetaChunkSize,
2882                 bool GPUVMEnable,
2883                 unsigned int GPUVMMaxPageTableLevels,
2884                 bool HostVMEnable,
2885                 unsigned int NumberOfActiveSurfaces,
2886                 double HostVMMinPageSize,
2887                 unsigned int HostVMMaxNonCachedPageTableLevels,
2888                 bool DynamicMetadataVMEnabled,
2889                 bool ImmediateFlipRequirement,
2890                 bool ProgressiveToInterlaceUnitInOPP,
2891                 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2892                 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2893                 unsigned int VTotal[],
2894                 unsigned int VActive[],
2895                 unsigned int DynamicMetadataTransmittedBytes[],
2896                 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2897                 bool Interlace[],
2898                 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2899                 double RequiredDISPCLK[][2],
2900                 double UrgLatency[],
2901                 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2902                 double ProjectedDCFClkDeepSleep[][2],
2903                 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2904                 unsigned int TotalNumberOfActiveDPP[][2],
2905                 unsigned int TotalNumberOfDCCActiveDPP[][2],
2906                 unsigned int dpte_group_bytes[],
2907                 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2908                 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2909                 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2910                 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2911                 unsigned int BytePerPixelY[],
2912                 unsigned int BytePerPixelC[],
2913                 unsigned int HTotal[],
2914                 double PixelClock[],
2915                 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2916                 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2917                 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2918                 bool DynamicMetadataEnable[],
2919                 double ReadBandwidthLuma[],
2920                 double ReadBandwidthChroma[],
2921                 double DCFCLKPerState[],
2922                 /* Output */
2923                 double DCFCLKState[][2])
2924 {
2925         unsigned int i, j, k;
2926         unsigned int     dummy1;
2927         double dummy2, dummy3;
2928         double   NormalEfficiency;
2929         double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2930
2931         NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2932         for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2933                 for  (j = 0; j <= 1; ++j) {
2934                         double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2935                         double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2936                         double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2937                         double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2938                         double MinimumTWait = 0.0;
2939                         double DPTEBandwidth;
2940                         double DCFCLKRequiredForAverageBandwidth;
2941                         unsigned int ExtraLatencyBytes;
2942                         double ExtraLatencyCycles;
2943                         double DCFCLKRequiredForPeakBandwidth;
2944                         unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2945                         double MinimumTvmPlus2Tr0;
2946
2947                         TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2948                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2949                                 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2950                                                 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2951                                                                 / (15.75 * HTotal[k] / PixelClock[k]);
2952                         }
2953
2954                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
2955                                 NoOfDPPState[k] = NoOfDPP[i][j][k];
2956
2957                         DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
2958                         DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
2959
2960                         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
2961                                         TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
2962                                         TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
2963                                         NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
2964                                         HostVMMaxNonCachedPageTableLevels);
2965                         ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
2966                                         + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
2967                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2968                                 double DCFCLKCyclesRequiredInPrefetch;
2969                                 double PrefetchTime;
2970
2971                                 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
2972                                                 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
2973                                                 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
2974                                                                 * BytePerPixelC[k]) / NormalEfficiency
2975                                                 / ReturnBusWidth;
2976                                 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
2977                                                 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
2978                                                                 / NormalEfficiency / ReturnBusWidth
2979                                                                 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
2980                                                 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
2981                                                                 / ReturnBusWidth
2982                                                 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
2983                                                 + PixelDCFCLKCyclesRequiredInPrefetch[k];
2984                                 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
2985                                                 * HTotal[k] / PixelClock[k];
2986                                 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
2987                                                 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
2988                                                 UrgLatency[i] * GPUVMMaxPageTableLevels *
2989                                                 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
2990
2991                                 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
2992                                                 UseMALLForPStateChange[k],
2993                                                 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2994                                                 DRRDisplay[k],
2995                                                 DRAMClockChangeLatencyFinal,
2996                                                 FCLKChangeLatency,
2997                                                 UrgLatency[i],
2998                                                 SREnterPlusExitTime);
2999
3000                                 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3001                                                 MinimumTWait - UrgLatency[i] *
3002                                                 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3003                                                 GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3004                                                 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3005                                                 DynamicMetadataVMExtraLatency[k];
3006
3007                                 if (PrefetchTime > 0) {
3008                                         double ExpectedVRatioPrefetch;
3009
3010                                         ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3011                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
3012                                                         DCFCLKCyclesRequiredInPrefetch);
3013                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3014                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
3015                                                         PrefetchPixelLinesTime[k] *
3016                                                         dml_max(1.0, ExpectedVRatioPrefetch) *
3017                                                         dml_max(1.0, ExpectedVRatioPrefetch / 4);
3018                                         if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3019                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3020                                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3021                                                                 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3022                                                                 NormalEfficiency / ReturnBusWidth;
3023                                         }
3024                                 } else {
3025                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3026                                 }
3027                                 if (DynamicMetadataEnable[k] == true) {
3028                                         double TSetupPipe;
3029                                         double TdmbfPipe;
3030                                         double TdmsksPipe;
3031                                         double TdmecPipe;
3032                                         double AllowedTimeForUrgentExtraLatency;
3033
3034                                         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3035                                                         MaxInterDCNTileRepeaters,
3036                                                         RequiredDPPCLKPerSurface[i][j][k],
3037                                                         RequiredDISPCLK[i][j],
3038                                                         ProjectedDCFClkDeepSleep[i][j],
3039                                                         PixelClock[k],
3040                                                         HTotal[k],
3041                                                         VTotal[k] - VActive[k],
3042                                                         DynamicMetadataTransmittedBytes[k],
3043                                                         DynamicMetadataLinesBeforeActiveRequired[k],
3044                                                         Interlace[k],
3045                                                         ProgressiveToInterlaceUnitInOPP,
3046
3047                                                         /* output */
3048                                                         &TSetupPipe,
3049                                                         &TdmbfPipe,
3050                                                         &TdmecPipe,
3051                                                         &TdmsksPipe,
3052                                                         &dummy1,
3053                                                         &dummy2,
3054                                                         &dummy3);
3055                                         AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3056                                                         PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3057                                                         TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3058                                         if (AllowedTimeForUrgentExtraLatency > 0)
3059                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3060                                                                 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3061                                                                 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3062                                         else
3063                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3064                                 }
3065                         }
3066                         DCFCLKRequiredForPeakBandwidth = 0;
3067                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3068                                 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3069                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3070                         }
3071                         MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3072                                         (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3073                                         (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3074                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3075                                 double MaximumTvmPlus2Tr0PlusTsw;
3076
3077                                 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3078                                                 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3079                                 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3080                                         DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3081                                 } else {
3082                                         DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3083                                                         2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3084                                                                 MinimumTvmPlus2Tr0 -
3085                                                                 PrefetchPixelLinesTime[k] / 4),
3086                                                         (2 * ExtraLatencyCycles +
3087                                                                 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3088                                                                 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3089                                 }
3090                         }
3091                         DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3092                                         dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3093                 }
3094         }
3095 }
3096
3097 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3098                 unsigned int TotalNumberOfActiveDPP,
3099                 unsigned int PixelChunkSizeInKByte,
3100                 unsigned int TotalNumberOfDCCActiveDPP,
3101                 unsigned int MetaChunkSize,
3102                 bool GPUVMEnable,
3103                 bool HostVMEnable,
3104                 unsigned int NumberOfActiveSurfaces,
3105                 unsigned int NumberOfDPP[],
3106                 unsigned int dpte_group_bytes[],
3107                 double HostVMInefficiencyFactor,
3108                 double HostVMMinPageSize,
3109                 unsigned int HostVMMaxNonCachedPageTableLevels)
3110 {
3111         unsigned int k;
3112         double   ret;
3113         unsigned int  HostVMDynamicLevels;
3114
3115         if (GPUVMEnable == true && HostVMEnable == true) {
3116                 if (HostVMMinPageSize < 2048)
3117                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3118                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3119                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3120                 else
3121                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3122         } else {
3123                 HostVMDynamicLevels = 0;
3124         }
3125
3126         ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3127                         TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3128
3129         if (GPUVMEnable == true) {
3130                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3131                         ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3132                                         (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3133                 }
3134         }
3135         return ret;
3136 }
3137
3138 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3139                 unsigned int MaxInterDCNTileRepeaters,
3140                 double Dppclk,
3141                 double Dispclk,
3142                 double DCFClkDeepSleep,
3143                 double PixelClock,
3144                 unsigned int HTotal,
3145                 unsigned int VBlank,
3146                 unsigned int DynamicMetadataTransmittedBytes,
3147                 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3148                 unsigned int InterlaceEnable,
3149                 bool ProgressiveToInterlaceUnitInOPP,
3150
3151                 /* output */
3152                 double *TSetup,
3153                 double *Tdmbf,
3154                 double *Tdmec,
3155                 double *Tdmsks,
3156                 unsigned int *VUpdateOffsetPix,
3157                 double *VUpdateWidthPix,
3158                 double *VReadyOffsetPix)
3159 {
3160         double TotalRepeaterDelayTime;
3161
3162         TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3163         *VUpdateWidthPix  =
3164                         dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3165         *VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3166                         TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3167         *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3168         *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3169         *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3170         *Tdmec = HTotal / PixelClock;
3171
3172         if (DynamicMetadataLinesBeforeActiveRequired == 0)
3173                 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3174         else
3175                 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3176
3177         if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3178                 *Tdmsks = *Tdmsks / 2;
3179 #ifdef __DML_VBA_DEBUG__
3180         dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3181         dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3182         dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3183
3184         dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3185                         __func__, DynamicMetadataLinesBeforeActiveRequired);
3186         dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3187         dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3188         dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3189         dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3190 #endif
3191 }
3192
3193 double dml32_CalculateTWait(
3194                 unsigned int PrefetchMode,
3195                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3196                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3197                 bool DRRDisplay,
3198                 double DRAMClockChangeLatency,
3199                 double FCLKChangeLatency,
3200                 double UrgentLatency,
3201                 double SREnterPlusExitTime)
3202 {
3203         double TWait = 0.0;
3204
3205         if (PrefetchMode == 0 &&
3206                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3207                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3208                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3209                         !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3210                 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3211         } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3212                 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3213         } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3214                 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3215         } else {
3216                 TWait = UrgentLatency;
3217         }
3218
3219 #ifdef __DML_VBA_DEBUG__
3220         dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3221         dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3222 #endif
3223         return TWait;
3224 } // CalculateTWait
3225
3226 // Function: get_return_bw_mbps
3227 // Megabyte per second
3228 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3229                 const int VoltageLevel,
3230                 const bool HostVMEnable,
3231                 const double DCFCLK,
3232                 const double FabricClock,
3233                 const double DRAMSpeed)
3234 {
3235         double ReturnBW = 0.;
3236         double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3237         double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3238         double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3239         double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3240                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3241                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3242                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3243         double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3244                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3245                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3246                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3247
3248         if (HostVMEnable != true)
3249                 ReturnBW = PixelDataOnlyReturnBW;
3250         else
3251                 ReturnBW = PixelMixedWithVMDataReturnBW;
3252
3253 #ifdef __DML_VBA_DEBUG__
3254         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3255         dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3256         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3257         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3258         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3259         dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3260         dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3261         dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3262         dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3263         dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3264         dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3265 #endif
3266         return ReturnBW;
3267 }
3268
3269 // Function: get_return_bw_mbps_vm_only
3270 // Megabyte per second
3271 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3272                 const int VoltageLevel,
3273                 const double DCFCLK,
3274                 const double FabricClock,
3275                 const double DRAMSpeed)
3276 {
3277         double VMDataOnlyReturnBW = dml_min3(
3278                         soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3279                         FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3280                                         * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3281                         DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3282                                         * (VoltageLevel < 2 ?
3283                                                         soc->pct_ideal_dram_bw_after_urgent_strobe :
3284                                                         soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3285 #ifdef __DML_VBA_DEBUG__
3286         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3287         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3288         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3289         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3290         dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3291 #endif
3292         return VMDataOnlyReturnBW;
3293 }
3294
3295 double dml32_CalculateExtraLatency(
3296                 unsigned int RoundTripPingLatencyCycles,
3297                 unsigned int ReorderingBytes,
3298                 double DCFCLK,
3299                 unsigned int TotalNumberOfActiveDPP,
3300                 unsigned int PixelChunkSizeInKByte,
3301                 unsigned int TotalNumberOfDCCActiveDPP,
3302                 unsigned int MetaChunkSize,
3303                 double ReturnBW,
3304                 bool GPUVMEnable,
3305                 bool HostVMEnable,
3306                 unsigned int NumberOfActiveSurfaces,
3307                 unsigned int NumberOfDPP[],
3308                 unsigned int dpte_group_bytes[],
3309                 double HostVMInefficiencyFactor,
3310                 double HostVMMinPageSize,
3311                 unsigned int HostVMMaxNonCachedPageTableLevels)
3312 {
3313         double ExtraLatencyBytes;
3314         double ExtraLatency;
3315
3316         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3317                         ReorderingBytes,
3318                         TotalNumberOfActiveDPP,
3319                         PixelChunkSizeInKByte,
3320                         TotalNumberOfDCCActiveDPP,
3321                         MetaChunkSize,
3322                         GPUVMEnable,
3323                         HostVMEnable,
3324                         NumberOfActiveSurfaces,
3325                         NumberOfDPP,
3326                         dpte_group_bytes,
3327                         HostVMInefficiencyFactor,
3328                         HostVMMinPageSize,
3329                         HostVMMaxNonCachedPageTableLevels);
3330
3331         ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3332
3333 #ifdef __DML_VBA_DEBUG__
3334         dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3335         dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3336         dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3337         dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3338         dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3339 #endif
3340
3341         return ExtraLatency;
3342 } // CalculateExtraLatency
3343
3344 bool dml32_CalculatePrefetchSchedule(
3345                 struct dml32_CalculatePrefetchSchedule *st_vars,
3346                 double HostVMInefficiencyFactor,
3347                 DmlPipe *myPipe,
3348                 unsigned int DSCDelay,
3349                 double DPPCLKDelaySubtotalPlusCNVCFormater,
3350                 double DPPCLKDelaySCL,
3351                 double DPPCLKDelaySCLLBOnly,
3352                 double DPPCLKDelayCNVCCursor,
3353                 double DISPCLKDelaySubtotal,
3354                 unsigned int DPP_RECOUT_WIDTH,
3355                 enum output_format_class OutputFormat,
3356                 unsigned int MaxInterDCNTileRepeaters,
3357                 unsigned int VStartup,
3358                 unsigned int MaxVStartup,
3359                 unsigned int GPUVMPageTableLevels,
3360                 bool GPUVMEnable,
3361                 bool HostVMEnable,
3362                 unsigned int HostVMMaxNonCachedPageTableLevels,
3363                 double HostVMMinPageSize,
3364                 bool DynamicMetadataEnable,
3365                 bool DynamicMetadataVMEnabled,
3366                 int DynamicMetadataLinesBeforeActiveRequired,
3367                 unsigned int DynamicMetadataTransmittedBytes,
3368                 double UrgentLatency,
3369                 double UrgentExtraLatency,
3370                 double TCalc,
3371                 unsigned int PDEAndMetaPTEBytesFrame,
3372                 unsigned int MetaRowByte,
3373                 unsigned int PixelPTEBytesPerRow,
3374                 double PrefetchSourceLinesY,
3375                 unsigned int SwathWidthY,
3376                 unsigned int VInitPreFillY,
3377                 unsigned int MaxNumSwathY,
3378                 double PrefetchSourceLinesC,
3379                 unsigned int SwathWidthC,
3380                 unsigned int VInitPreFillC,
3381                 unsigned int MaxNumSwathC,
3382                 unsigned int swath_width_luma_ub,
3383                 unsigned int swath_width_chroma_ub,
3384                 unsigned int SwathHeightY,
3385                 unsigned int SwathHeightC,
3386                 double TWait,
3387                 /* Output */
3388                 double   *DSTXAfterScaler,
3389                 double   *DSTYAfterScaler,
3390                 double *DestinationLinesForPrefetch,
3391                 double *PrefetchBandwidth,
3392                 double *DestinationLinesToRequestVMInVBlank,
3393                 double *DestinationLinesToRequestRowInVBlank,
3394                 double *VRatioPrefetchY,
3395                 double *VRatioPrefetchC,
3396                 double *RequiredPrefetchPixDataBWLuma,
3397                 double *RequiredPrefetchPixDataBWChroma,
3398                 bool   *NotEnoughTimeForDynamicMetadata,
3399                 double *Tno_bw,
3400                 double *prefetch_vmrow_bw,
3401                 double *Tdmdl_vm,
3402                 double *Tdmdl,
3403                 double *TSetup,
3404                 unsigned int   *VUpdateOffsetPix,
3405                 double   *VUpdateWidthPix,
3406                 double   *VReadyOffsetPix)
3407 {
3408         bool MyError = false;
3409
3410         st_vars->TimeForFetchingMetaPTE = 0;
3411         st_vars->TimeForFetchingRowInVBlank = 0;
3412         st_vars->LinesToRequestPrefetchPixelData = 0;
3413         st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3414         st_vars->Tsw_est1 = 0;
3415         st_vars->Tsw_est3 = 0;
3416
3417         if (GPUVMEnable == true && HostVMEnable == true)
3418                 st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3419         else
3420                 st_vars->HostVMDynamicLevelsTrips = 0;
3421 #ifdef __DML_VBA_DEBUG__
3422         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
3423         dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
3424         dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3425         dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3426                         __func__, HostVMEnable, HostVMInefficiencyFactor);
3427 #endif
3428         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3429                         MaxInterDCNTileRepeaters,
3430                         myPipe->Dppclk,
3431                         myPipe->Dispclk,
3432                         myPipe->DCFClkDeepSleep,
3433                         myPipe->PixelClock,
3434                         myPipe->HTotal,
3435                         myPipe->VBlank,
3436                         DynamicMetadataTransmittedBytes,
3437                         DynamicMetadataLinesBeforeActiveRequired,
3438                         myPipe->InterlaceEnable,
3439                         myPipe->ProgressiveToInterlaceUnitInOPP,
3440                         TSetup,
3441
3442                         /* output */
3443                         &st_vars->Tdmbf,
3444                         &st_vars->Tdmec,
3445                         &st_vars->Tdmsks,
3446                         VUpdateOffsetPix,
3447                         VUpdateWidthPix,
3448                         VReadyOffsetPix);
3449
3450         st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock;
3451         st_vars->trip_to_mem = UrgentLatency;
3452         st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
3453
3454         if (DynamicMetadataVMEnabled == true)
3455                 *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem;
3456         else
3457                 *Tdmdl = TWait + UrgentExtraLatency;
3458
3459 #ifdef __DML_VBA_ALLOW_DELTA__
3460         if (DynamicMetadataEnable == false)
3461                 *Tdmdl = 0.0;
3462 #endif
3463
3464         if (DynamicMetadataEnable == true) {
3465                 if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) {
3466                         *NotEnoughTimeForDynamicMetadata = true;
3467 #ifdef __DML_VBA_DEBUG__
3468                         dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3469                         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3470                                         __func__, st_vars->Tdmbf);
3471                         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
3472                         dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3473                                         __func__, st_vars->Tdmsks);
3474                         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3475                                         __func__, *Tdmdl);
3476 #endif
3477                 } else {
3478                         *NotEnoughTimeForDynamicMetadata = false;
3479                 }
3480         } else {
3481                 *NotEnoughTimeForDynamicMetadata = false;
3482         }
3483
3484         *Tdmdl_vm =  (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
3485                         GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0);
3486
3487         if (myPipe->ScalerEnabled)
3488                 st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
3489         else
3490                 st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
3491
3492         st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
3493
3494         st_vars->DISPCLKCycles = DISPCLKDelaySubtotal;
3495
3496         if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3497                 return true;
3498
3499         *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles *
3500                         myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3501
3502         *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3503                         + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3504                         + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3505                                         myPipe->HActive / 2 : 0)
3506                         + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3507
3508 #ifdef __DML_VBA_DEBUG__
3509         dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles);
3510         dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3511         dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3512         dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles);
3513         dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3514         dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3515         dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3516         dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3517         dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3518 #endif
3519
3520         if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3521                 *DSTYAfterScaler = 1;
3522         else
3523                 *DSTYAfterScaler = 0;
3524
3525         st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3526         *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3527         *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3528 #ifdef __DML_VBA_DEBUG__
3529         dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3530         dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3531 #endif
3532
3533         MyError = false;
3534
3535         st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1);
3536
3537         if (GPUVMEnable == true) {
3538                 st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
3539                 st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
3540                 if (GPUVMPageTableLevels >= 3) {
3541                         *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem *
3542                                         (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
3543                 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
3544                         st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) /
3545                                         4.0 * st_vars->LineTime; // VBA_ERROR
3546                         *Tno_bw = UrgentExtraLatency;
3547                 } else {
3548                         *Tno_bw = 0;
3549                 }
3550         } else if (myPipe->DCCEnable == true) {
3551                 st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
3552                 st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
3553                 *Tno_bw = 0;
3554         } else {
3555                 st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
3556                 st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0;
3557                 *Tno_bw = 0;
3558         }
3559         st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0);
3560         st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0);
3561
3562         if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3563                         || myPipe->SourcePixelFormat == dm_420_12) {
3564                 st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3565         } else {
3566                 st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3567         }
3568
3569         st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3570                         + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3571         st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3572                         st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime));
3573
3574         st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre;
3575         st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0);
3576         st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0;
3577
3578         if (GPUVMEnable == true) {
3579                 st_vars->Tvm_oto = dml_max3(
3580                                 st_vars->Tvm_trips,
3581                                 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto,
3582                                 st_vars->LineTime / 4.0);
3583         } else
3584                 st_vars->Tvm_oto = st_vars->LineTime / 4.0;
3585
3586         if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3587                 st_vars->Tr0_oto = dml_max4(
3588                                 st_vars->Tr0_trips,
3589                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto,
3590                                 (st_vars->LineTime - st_vars->Tvm_oto)/2.0,
3591                                 st_vars->LineTime / 4.0);
3592 #ifdef __DML_VBA_DEBUG__
3593                 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3594                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto);
3595                 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips);
3596                 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto);
3597                 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4);
3598 #endif
3599         } else
3600                 st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0;
3601
3602         st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0;
3603         st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0;
3604         st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto;
3605
3606         st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime -
3607                         (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3608
3609 #ifdef __DML_VBA_DEBUG__
3610         dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3611         dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw);
3612         dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3613         dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3614         dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem);
3615         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3616         dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3617         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3618         dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3619         dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3620         dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3621         dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes);
3622         dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp);
3623         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3624         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3625         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3626         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3627         dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips);
3628         dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips);
3629         dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto);
3630         dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto);
3631         dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto);
3632         dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines);
3633         dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines);
3634         dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto);
3635         dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto);
3636         dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ);
3637 #endif
3638
3639         st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0;
3640         st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime;
3641 #ifdef __DML_VBA_DEBUG__
3642         dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ);
3643         dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime);
3644         dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3645         dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3646                         __func__, VStartup * st_vars->LineTime);
3647         dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3648         dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3649         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf);
3650         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
3651         dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3652         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3653         dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3654                         __func__, *DSTYAfterScaler);
3655 #endif
3656         st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3657                         MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3658
3659         if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes)
3660                 st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes;
3661
3662         *PrefetchBandwidth = 0;
3663         *DestinationLinesToRequestVMInVBlank = 0;
3664         *DestinationLinesToRequestRowInVBlank = 0;
3665         *VRatioPrefetchY = 0;
3666         *VRatioPrefetchC = 0;
3667         *RequiredPrefetchPixDataBWLuma = 0;
3668         if (st_vars->dst_y_prefetch_equ > 1) {
3669                 double PrefetchBandwidth1;
3670                 double PrefetchBandwidth2;
3671                 double PrefetchBandwidth3;
3672                 double PrefetchBandwidth4;
3673
3674                 if (st_vars->Tpre_rounded - *Tno_bw > 0) {
3675                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3676                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3677                                         + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw);
3678                         st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1;
3679                 } else
3680                         PrefetchBandwidth1 = 0;
3681
3682                 if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw)
3683                                 && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) {
3684                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3685                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3686                                         / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw);
3687                 }
3688
3689                 if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0)
3690                         PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) /
3691                         (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded);
3692                 else
3693                         PrefetchBandwidth2 = 0;
3694
3695                 if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) {
3696                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3697                                         + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded);
3698                         st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3;
3699                 } else
3700                         PrefetchBandwidth3 = 0;
3701
3702
3703                 if (VStartup == MaxVStartup &&
3704                                 (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 *
3705                                 st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) {
3706                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3707                                         / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded);
3708                 }
3709
3710                 if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) {
3711                         PrefetchBandwidth4 = st_vars->prefetch_sw_bytes /
3712                                         (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded);
3713                 } else {
3714                         PrefetchBandwidth4 = 0;
3715                 }
3716
3717 #ifdef __DML_VBA_DEBUG__
3718                 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded);
3719                 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3720                 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded);
3721                 dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1);
3722                 dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3);
3723                 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3724                 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3725                 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3726                 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3727 #endif
3728                 {
3729                         bool Case1OK;
3730                         bool Case2OK;
3731                         bool Case3OK;
3732
3733                         if (PrefetchBandwidth1 > 0) {
3734                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3735                                                 >= st_vars->Tvm_trips_rounded
3736                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3737                                                                 / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) {
3738                                         Case1OK = true;
3739                                 } else {
3740                                         Case1OK = false;
3741                                 }
3742                         } else {
3743                                 Case1OK = false;
3744                         }
3745
3746                         if (PrefetchBandwidth2 > 0) {
3747                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3748                                                 >= st_vars->Tvm_trips_rounded
3749                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3750                                                 / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) {
3751                                         Case2OK = true;
3752                                 } else {
3753                                         Case2OK = false;
3754                                 }
3755                         } else {
3756                                 Case2OK = false;
3757                         }
3758
3759                         if (PrefetchBandwidth3 > 0) {
3760                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3761                                                 st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3762                                                                 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3763                                                                 st_vars->Tr0_trips_rounded) {
3764                                         Case3OK = true;
3765                                 } else {
3766                                         Case3OK = false;
3767                                 }
3768                         } else {
3769                                 Case3OK = false;
3770                         }
3771
3772                         if (Case1OK)
3773                                 st_vars->prefetch_bw_equ = PrefetchBandwidth1;
3774                         else if (Case2OK)
3775                                 st_vars->prefetch_bw_equ = PrefetchBandwidth2;
3776                         else if (Case3OK)
3777                                 st_vars->prefetch_bw_equ = PrefetchBandwidth3;
3778                         else
3779                                 st_vars->prefetch_bw_equ = PrefetchBandwidth4;
3780
3781 #ifdef __DML_VBA_DEBUG__
3782                         dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3783                         dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3784                         dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3785                         dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ);
3786 #endif
3787
3788                         if (st_vars->prefetch_bw_equ > 0) {
3789                                 if (GPUVMEnable == true) {
3790                                         st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3791                                                         HostVMInefficiencyFactor / st_vars->prefetch_bw_equ,
3792                                                         st_vars->Tvm_trips, st_vars->LineTime / 4);
3793                                 } else {
3794                                         st_vars->Tvm_equ = st_vars->LineTime / 4;
3795                                 }
3796
3797                                 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3798                                         st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3799                                                         HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips,
3800                                                         (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4);
3801                                 } else {
3802                                         st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2;
3803                                 }
3804                         } else {
3805                                 st_vars->Tvm_equ = 0;
3806                                 st_vars->Tr0_equ = 0;
3807 #ifdef __DML_VBA_DEBUG__
3808                                 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3809 #endif
3810                         }
3811                 }
3812
3813                 if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) {
3814                         *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto;
3815                         st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto;
3816                         st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto;
3817                         *PrefetchBandwidth = st_vars->prefetch_bw_oto;
3818                 } else {
3819                         *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ;
3820                         st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ;
3821                         st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ;
3822                         *PrefetchBandwidth = st_vars->prefetch_bw_equ;
3823                 }
3824
3825                 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0;
3826
3827                 *DestinationLinesToRequestRowInVBlank =
3828                                 dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0;
3829
3830                 st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3831                                 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3832
3833 #ifdef __DML_VBA_DEBUG__
3834                 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3835                 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3836                                 __func__, *DestinationLinesToRequestVMInVBlank);
3837                 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank);
3838                 dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
3839                 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3840                                 __func__, *DestinationLinesToRequestRowInVBlank);
3841                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3842                 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData);
3843 #endif
3844
3845                 if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) {
3846                         *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData;
3847                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3848 #ifdef __DML_VBA_DEBUG__
3849                         dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3850                         dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3851                         dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3852 #endif
3853                         if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3854                                 if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3855                                         *VRatioPrefetchY =
3856                                                         dml_max((double) PrefetchSourceLinesY /
3857                                                                         st_vars->LinesToRequestPrefetchPixelData,
3858                                                                         (double) MaxNumSwathY * SwathHeightY /
3859                                                                         (st_vars->LinesToRequestPrefetchPixelData -
3860                                                                         (VInitPreFillY - 3.0) / 2.0));
3861                                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3862                                 } else {
3863                                         MyError = true;
3864                                         *VRatioPrefetchY = 0;
3865                                 }
3866 #ifdef __DML_VBA_DEBUG__
3867                                 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3868                                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3869                                 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3870 #endif
3871                         }
3872
3873                         *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData;
3874                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3875
3876 #ifdef __DML_VBA_DEBUG__
3877                         dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3878                         dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3879                         dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3880 #endif
3881                         if ((SwathHeightC > 4)) {
3882                                 if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3883                                         *VRatioPrefetchC =
3884                                                 dml_max(*VRatioPrefetchC,
3885                                                         (double) MaxNumSwathC * SwathHeightC /
3886                                                         (st_vars->LinesToRequestPrefetchPixelData -
3887                                                         (VInitPreFillC - 3.0) / 2.0));
3888                                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3889                                 } else {
3890                                         MyError = true;
3891                                         *VRatioPrefetchC = 0;
3892                                 }
3893 #ifdef __DML_VBA_DEBUG__
3894                                 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3895                                 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3896                                 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3897 #endif
3898                         }
3899
3900                         *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3901                                         / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3902                                         / st_vars->LineTime;
3903
3904 #ifdef __DML_VBA_DEBUG__
3905                         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3906                         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3907                         dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
3908                         dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3909                                         __func__, *RequiredPrefetchPixDataBWLuma);
3910 #endif
3911                         *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3912                                         st_vars->LinesToRequestPrefetchPixelData
3913                                         * myPipe->BytePerPixelC
3914                                         * swath_width_chroma_ub / st_vars->LineTime;
3915                 } else {
3916                         MyError = true;
3917 #ifdef __DML_VBA_DEBUG__
3918                         dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3919                                         __func__, st_vars->LinesToRequestPrefetchPixelData);
3920 #endif
3921                         *VRatioPrefetchY = 0;
3922                         *VRatioPrefetchC = 0;
3923                         *RequiredPrefetchPixDataBWLuma = 0;
3924                         *RequiredPrefetchPixDataBWChroma = 0;
3925                 }
3926 #ifdef __DML_VBA_DEBUG__
3927                 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3928                         (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime +
3929                         2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE);
3930                 dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE);
3931                 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3932                         (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime);
3933                 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3934                 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime -
3935                         st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3936                         ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup);
3937                 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3938                                 PixelPTEBytesPerRow);
3939 #endif
3940         } else {
3941                 MyError = true;
3942 #ifdef __DML_VBA_DEBUG__
3943                 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
3944                                 __func__, st_vars->dst_y_prefetch_equ);
3945 #endif
3946         }
3947
3948         {
3949                 double prefetch_vm_bw;
3950                 double prefetch_row_bw;
3951
3952                 if (PDEAndMetaPTEBytesFrame == 0) {
3953                         prefetch_vm_bw = 0;
3954                 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
3955 #ifdef __DML_VBA_DEBUG__
3956                         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3957                         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3958                         dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3959                                         __func__, *DestinationLinesToRequestVMInVBlank);
3960                         dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
3961 #endif
3962                         prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
3963                                         (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime);
3964 #ifdef __DML_VBA_DEBUG__
3965                         dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
3966 #endif
3967                 } else {
3968                         prefetch_vm_bw = 0;
3969                         MyError = true;
3970 #ifdef __DML_VBA_DEBUG__
3971                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
3972                                         __func__, *DestinationLinesToRequestVMInVBlank);
3973 #endif
3974                 }
3975
3976                 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
3977                         prefetch_row_bw = 0;
3978                 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
3979                         prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
3980                                         (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime);
3981
3982 #ifdef __DML_VBA_DEBUG__
3983                         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3984                         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3985                         dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3986                                         __func__, *DestinationLinesToRequestRowInVBlank);
3987                         dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
3988 #endif
3989                 } else {
3990                         prefetch_row_bw = 0;
3991                         MyError = true;
3992 #ifdef __DML_VBA_DEBUG__
3993                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
3994                                         __func__, *DestinationLinesToRequestRowInVBlank);
3995 #endif
3996                 }
3997
3998                 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
3999         }
4000
4001         if (MyError) {
4002                 *PrefetchBandwidth = 0;
4003                 st_vars->TimeForFetchingMetaPTE = 0;
4004                 st_vars->TimeForFetchingRowInVBlank = 0;
4005                 *DestinationLinesToRequestVMInVBlank = 0;
4006                 *DestinationLinesToRequestRowInVBlank = 0;
4007                 *DestinationLinesForPrefetch = 0;
4008                 st_vars->LinesToRequestPrefetchPixelData = 0;
4009                 *VRatioPrefetchY = 0;
4010                 *VRatioPrefetchC = 0;
4011                 *RequiredPrefetchPixDataBWLuma = 0;
4012                 *RequiredPrefetchPixDataBWChroma = 0;
4013         }
4014
4015         return MyError;
4016 } // CalculatePrefetchSchedule
4017
4018 void dml32_CalculateFlipSchedule(
4019                 double HostVMInefficiencyFactor,
4020                 double UrgentExtraLatency,
4021                 double UrgentLatency,
4022                 unsigned int GPUVMMaxPageTableLevels,
4023                 bool HostVMEnable,
4024                 unsigned int HostVMMaxNonCachedPageTableLevels,
4025                 bool GPUVMEnable,
4026                 double HostVMMinPageSize,
4027                 double PDEAndMetaPTEBytesPerFrame,
4028                 double MetaRowBytes,
4029                 double DPTEBytesPerRow,
4030                 double BandwidthAvailableForImmediateFlip,
4031                 unsigned int TotImmediateFlipBytes,
4032                 enum source_format_class SourcePixelFormat,
4033                 double LineTime,
4034                 double VRatio,
4035                 double VRatioChroma,
4036                 double Tno_bw,
4037                 bool DCCEnable,
4038                 unsigned int dpte_row_height,
4039                 unsigned int meta_row_height,
4040                 unsigned int dpte_row_height_chroma,
4041                 unsigned int meta_row_height_chroma,
4042                 bool    use_one_row_for_frame_flip,
4043
4044                 /* Output */
4045                 double *DestinationLinesToRequestVMInImmediateFlip,
4046                 double *DestinationLinesToRequestRowInImmediateFlip,
4047                 double *final_flip_bw,
4048                 bool *ImmediateFlipSupportedForPipe)
4049 {
4050         double min_row_time = 0.0;
4051         unsigned int HostVMDynamicLevelsTrips;
4052         double TimeForFetchingMetaPTEImmediateFlip;
4053         double TimeForFetchingRowInVBlankImmediateFlip;
4054         double ImmediateFlipBW;
4055
4056         if (GPUVMEnable == true && HostVMEnable == true)
4057                 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4058         else
4059                 HostVMDynamicLevelsTrips = 0;
4060
4061 #ifdef __DML_VBA_DEBUG__
4062         dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4063         dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4064 #endif
4065
4066         if (TotImmediateFlipBytes > 0) {
4067                 if (use_one_row_for_frame_flip) {
4068                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4069                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4070                 } else {
4071                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4072                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4073                 }
4074                 if (GPUVMEnable == true) {
4075                         TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4076                                         HostVMInefficiencyFactor / ImmediateFlipBW,
4077                                         UrgentExtraLatency + UrgentLatency *
4078                                         (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4079                                         LineTime / 4.0);
4080                 } else {
4081                         TimeForFetchingMetaPTEImmediateFlip = 0;
4082                 }
4083                 if ((GPUVMEnable == true || DCCEnable == true)) {
4084                         TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4085                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4086                                         UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4087                 } else {
4088                         TimeForFetchingRowInVBlankImmediateFlip = 0;
4089                 }
4090
4091                 *DestinationLinesToRequestVMInImmediateFlip =
4092                                 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4093                 *DestinationLinesToRequestRowInImmediateFlip =
4094                                 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4095
4096                 if (GPUVMEnable == true) {
4097                         *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4098                                         (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4099                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4100                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4101                 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4102                         *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4103                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4104                 } else {
4105                         *final_flip_bw = 0;
4106                 }
4107         } else {
4108                 TimeForFetchingMetaPTEImmediateFlip = 0;
4109                 TimeForFetchingRowInVBlankImmediateFlip = 0;
4110                 *DestinationLinesToRequestVMInImmediateFlip = 0;
4111                 *DestinationLinesToRequestRowInImmediateFlip = 0;
4112                 *final_flip_bw = 0;
4113         }
4114
4115         if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4116                 if (GPUVMEnable == true && DCCEnable != true) {
4117                         min_row_time = dml_min(dpte_row_height *
4118                                         LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4119                 } else if (GPUVMEnable != true && DCCEnable == true) {
4120                         min_row_time = dml_min(meta_row_height *
4121                                         LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4122                 } else {
4123                         min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4124                                         LineTime / VRatio, dpte_row_height_chroma * LineTime /
4125                                         VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4126                 }
4127         } else {
4128                 if (GPUVMEnable == true && DCCEnable != true) {
4129                         min_row_time = dpte_row_height * LineTime / VRatio;
4130                 } else if (GPUVMEnable != true && DCCEnable == true) {
4131                         min_row_time = meta_row_height * LineTime / VRatio;
4132                 } else {
4133                         min_row_time =
4134                                 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4135                 }
4136         }
4137
4138         if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4139                         || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4140                                         > min_row_time) {
4141                 *ImmediateFlipSupportedForPipe = false;
4142         } else {
4143                 *ImmediateFlipSupportedForPipe = true;
4144         }
4145
4146 #ifdef __DML_VBA_DEBUG__
4147         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4148         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4149         dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4150                         __func__, *DestinationLinesToRequestVMInImmediateFlip);
4151         dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4152                         __func__, *DestinationLinesToRequestRowInImmediateFlip);
4153         dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4154         dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4155                         __func__, TimeForFetchingRowInVBlankImmediateFlip);
4156         dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4157         dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4158 #endif
4159 } // CalculateFlipSchedule
4160
4161 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4162                 struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
4163                 bool USRRetrainingRequiredFinal,
4164                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4165                 unsigned int PrefetchMode,
4166                 unsigned int NumberOfActiveSurfaces,
4167                 unsigned int MaxLineBufferLines,
4168                 unsigned int LineBufferSize,
4169                 unsigned int WritebackInterfaceBufferSize,
4170                 double DCFCLK,
4171                 double ReturnBW,
4172                 bool SynchronizeTimingsFinal,
4173                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4174                 bool DRRDisplay[],
4175                 unsigned int dpte_group_bytes[],
4176                 unsigned int meta_row_height[],
4177                 unsigned int meta_row_height_chroma[],
4178                 SOCParametersList mmSOCParameters,
4179                 unsigned int WritebackChunkSize,
4180                 double SOCCLK,
4181                 double DCFClkDeepSleep,
4182                 unsigned int DETBufferSizeY[],
4183                 unsigned int DETBufferSizeC[],
4184                 unsigned int SwathHeightY[],
4185                 unsigned int SwathHeightC[],
4186                 unsigned int LBBitPerPixel[],
4187                 double SwathWidthY[],
4188                 double SwathWidthC[],
4189                 double HRatio[],
4190                 double HRatioChroma[],
4191                 unsigned int VTaps[],
4192                 unsigned int VTapsChroma[],
4193                 double VRatio[],
4194                 double VRatioChroma[],
4195                 unsigned int HTotal[],
4196                 unsigned int VTotal[],
4197                 unsigned int VActive[],
4198                 double PixelClock[],
4199                 unsigned int BlendingAndTiming[],
4200                 unsigned int DPPPerSurface[],
4201                 double BytePerPixelDETY[],
4202                 double BytePerPixelDETC[],
4203                 double DSTXAfterScaler[],
4204                 double DSTYAfterScaler[],
4205                 bool WritebackEnable[],
4206                 enum source_format_class WritebackPixelFormat[],
4207                 double WritebackDestinationWidth[],
4208                 double WritebackDestinationHeight[],
4209                 double WritebackSourceHeight[],
4210                 bool UnboundedRequestEnabled,
4211                 unsigned int CompressedBufferSizeInkByte,
4212
4213                 /* Output */
4214                 Watermarks *Watermark,
4215                 enum clock_change_support *DRAMClockChangeSupport,
4216                 double MaxActiveDRAMClockChangeLatencySupported[],
4217                 unsigned int SubViewportLinesNeededInMALL[],
4218                 enum dm_fclock_change_support *FCLKChangeSupport,
4219                 double *MinActiveFCLKChangeLatencySupported,
4220                 bool *USRRetrainingSupport,
4221                 double ActiveDRAMClockChangeLatencyMargin[])
4222 {
4223         unsigned int i, j, k;
4224
4225         st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
4226         st_vars->DRAMClockChangeSupportNumber = 0;
4227         st_vars->DRAMClockChangeMethod = 0;
4228         st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4229         st_vars->MinActiveFCLKChangeMargin = 0.;
4230         st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4231         st_vars->TotalPixelBW = 0.0;
4232         st_vars->TotalActiveWriteback = 0;
4233
4234         Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4235         Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4236                         + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4237         Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
4238         Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
4239         Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4240                         + 10 / DCFClkDeepSleep;
4241         Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4242                         + 10 / DCFClkDeepSleep;
4243         Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4244                         + 10 / DCFClkDeepSleep;
4245         Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4246                         + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4247
4248 #ifdef __DML_VBA_DEBUG__
4249         dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4250         dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4251         dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4252         dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
4253         dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
4254         dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
4255         dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
4256         dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
4257         dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
4258         dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
4259         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4260                         __func__, Watermark->Z8StutterEnterPlusExitWatermark);
4261 #endif
4262
4263
4264         st_vars->TotalActiveWriteback = 0;
4265         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4266                 if (WritebackEnable[k] == true)
4267                         st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1;
4268         }
4269
4270         if (st_vars->TotalActiveWriteback <= 1) {
4271                 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4272         } else {
4273                 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4274                                 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4275         }
4276         if (USRRetrainingRequiredFinal)
4277                 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
4278                                 + mmSOCParameters.USRRetrainingLatency;
4279
4280         if (st_vars->TotalActiveWriteback <= 1) {
4281                 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4282                                 + mmSOCParameters.WritebackLatency;
4283                 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4284                                 + mmSOCParameters.WritebackLatency;
4285         } else {
4286                 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4287                                 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4288                 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4289                                 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
4290         }
4291
4292         if (USRRetrainingRequiredFinal)
4293                 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
4294                                 + mmSOCParameters.USRRetrainingLatency;
4295
4296         if (USRRetrainingRequiredFinal)
4297                 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
4298                                 + mmSOCParameters.USRRetrainingLatency;
4299
4300 #ifdef __DML_VBA_DEBUG__
4301         dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4302                         __func__, Watermark->WritebackDRAMClockChangeWatermark);
4303         dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
4304         dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
4305         dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
4306         dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4307 #endif
4308
4309         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4310                 st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
4311                                 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
4312         }
4313
4314         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4315
4316                 st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
4317                 st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
4318
4319
4320 #ifdef __DML_VBA_DEBUG__
4321                 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
4322                 dml_print("DML::%s: k=%d, LineBufferSize     = %d\n", __func__, k, LineBufferSize);
4323                 dml_print("DML::%s: k=%d, LBBitPerPixel      = %d\n", __func__, k, LBBitPerPixel[k]);
4324                 dml_print("DML::%s: k=%d, HRatio             = %f\n", __func__, k, HRatio[k]);
4325                 dml_print("DML::%s: k=%d, VTaps              = %d\n", __func__, k, VTaps[k]);
4326 #endif
4327
4328                 st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
4329                 st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
4330                 st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k];
4331
4332                 if (UnboundedRequestEnabled) {
4333                         st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY
4334                                         + CompressedBufferSizeInkByte * 1024
4335                                                         * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
4336                                                         / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW;
4337                 }
4338
4339                 st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4340                 st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]);
4341                 st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
4342
4343                 st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY
4344                                 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
4345
4346                 if (NumberOfActiveSurfaces > 1) {
4347                         st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY
4348                                         - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
4349                                                         / PixelClock[k] / VRatio[k];
4350                 }
4351
4352                 if (BytePerPixelDETC[k] > 0) {
4353                         st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4354                         st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]);
4355                         st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
4356                                         / VRatioChroma[k];
4357                         st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC
4358                                         - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
4359                                                         / PixelClock[k];
4360                         if (NumberOfActiveSurfaces > 1) {
4361                                 st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC
4362                                                 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
4363                                                                 / PixelClock[k] / VRatioChroma[k];
4364                         }
4365                         st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY,
4366                                         st_vars->ActiveClockChangeLatencyHidingC);
4367                 } else {
4368                         st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY;
4369                 }
4370
4371                 ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4372                                 - Watermark->DRAMClockChangeWatermark;
4373                 st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4374                                 - Watermark->FCLKChangeWatermark;
4375                 st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
4376
4377                 if (WritebackEnable[k]) {
4378                         st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
4379                                         / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
4380                                                         / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
4381                         if (WritebackPixelFormat[k] == dm_444_64)
4382                                 st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2;
4383
4384                         st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding
4385                                         - Watermark->WritebackDRAMClockChangeWatermark;
4386
4387                         st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding
4388                                         - Watermark->WritebackFCLKChangeWatermark;
4389
4390                         ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4391                                         st_vars->WritebackFCLKChangeLatencyMargin);
4392                         st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k],
4393                                         st_vars->WritebackDRAMClockChangeLatencyMargin);
4394                 }
4395                 MaxActiveDRAMClockChangeLatencySupported[k] =
4396                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4397                                                 0 :
4398                                                 (ActiveDRAMClockChangeLatencyMargin[k]
4399                                                                 + mmSOCParameters.DRAMClockChangeLatency);
4400         }
4401
4402         for (i = 0; i < NumberOfActiveSurfaces; ++i) {
4403                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
4404                         if (i == j ||
4405                                         (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
4406                                         (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
4407                                         (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
4408                                         (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
4409                                         HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
4410                                         VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4411                                         (DRRDisplay[i] || DRRDisplay[j]))) {
4412                                 st_vars->SynchronizedSurfaces[i][j] = true;
4413                         } else {
4414                                 st_vars->SynchronizedSurfaces[i][j] = false;
4415                         }
4416                 }
4417         }
4418
4419         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4420                 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4421                                 (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4422                                 st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) {
4423                         st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4424                         st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k];
4425                         st_vars->SurfaceWithMinActiveFCLKChangeMargin = k;
4426                 }
4427         }
4428
4429         *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4430
4431         st_vars->SameTimingForFCLKChange = true;
4432         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4433                 if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) {
4434                         if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4435                                         (st_vars->SameTimingForFCLKChange ||
4436                                         st_vars->ActiveFCLKChangeLatencyMargin[k] <
4437                                         st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4438                                 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k];
4439                         }
4440                         st_vars->SameTimingForFCLKChange = false;
4441                 }
4442         }
4443
4444         if (st_vars->MinActiveFCLKChangeMargin > 0) {
4445                 *FCLKChangeSupport = dm_fclock_change_vactive;
4446         } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4447                         (PrefetchMode <= 1)) {
4448                 *FCLKChangeSupport = dm_fclock_change_vblank;
4449         } else {
4450                 *FCLKChangeSupport = dm_fclock_change_unsupported;
4451         }
4452
4453         *USRRetrainingSupport = true;
4454         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4455                 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4456                                 (st_vars->USRRetrainingLatencyMargin[k] < 0)) {
4457                         *USRRetrainingSupport = false;
4458                 }
4459         }
4460
4461         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4462                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4463                                 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4464                                 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4465                                 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4466                         if (PrefetchMode > 0) {
4467                                 st_vars->DRAMClockChangeSupportNumber = 2;
4468                         } else if (st_vars->DRAMClockChangeSupportNumber == 0) {
4469                                 st_vars->DRAMClockChangeSupportNumber = 1;
4470                                 st_vars->LastSurfaceWithoutMargin = k;
4471                         } else if (st_vars->DRAMClockChangeSupportNumber == 1 &&
4472                                         !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) {
4473                                 st_vars->DRAMClockChangeSupportNumber = 2;
4474                         }
4475                 }
4476         }
4477
4478         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4479                 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4480                         st_vars->DRAMClockChangeMethod = 1;
4481                 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4482                         st_vars->DRAMClockChangeMethod = 2;
4483         }
4484
4485         if (st_vars->DRAMClockChangeMethod == 0) {
4486                 if (st_vars->DRAMClockChangeSupportNumber == 0)
4487                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4488                 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4489                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4490                 else
4491                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4492         } else if (st_vars->DRAMClockChangeMethod == 1) {
4493                 if (st_vars->DRAMClockChangeSupportNumber == 0)
4494                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4495                 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4496                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4497                 else
4498                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4499         } else {
4500                 if (st_vars->DRAMClockChangeSupportNumber == 0)
4501                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4502                 else if (st_vars->DRAMClockChangeSupportNumber == 1)
4503                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4504                 else
4505                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4506         }
4507
4508         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4509                 unsigned int dst_y_pstate;
4510                 unsigned int src_y_pstate_l;
4511                 unsigned int src_y_pstate_c;
4512                 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4513
4514                 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
4515                 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
4516                 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k];
4517                 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
4518
4519 #ifdef __DML_VBA_DEBUG__
4520 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4521 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4522 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4523 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4524 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]);
4525 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4526 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4527 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4528 dml_print("DML::%s: k=%d, meta_row_height   = %d\n", __func__, k, meta_row_height[k]);
4529 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4530 #endif
4531                 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4532
4533                 if (BytePerPixelDETC[k] > 0) {
4534                         src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
4535                         src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k];
4536                         sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
4537                         SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4538
4539 #ifdef __DML_VBA_DEBUG__
4540 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4541 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4542 dml_print("DML::%s: k=%d, meta_row_height_chroma    = %d\n", __func__, k, meta_row_height_chroma[k]);
4543 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4544 #endif
4545                 }
4546         }
4547 #ifdef __DML_VBA_DEBUG__
4548         dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4549         dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4550         dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4551                         __func__, *MinActiveFCLKChangeLatencySupported);
4552         dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4553 #endif
4554 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4555
4556 double dml32_CalculateWriteBackDISPCLK(
4557                 enum source_format_class WritebackPixelFormat,
4558                 double PixelClock,
4559                 double WritebackHRatio,
4560                 double WritebackVRatio,
4561                 unsigned int WritebackHTaps,
4562                 unsigned int WritebackVTaps,
4563                 unsigned int   WritebackSourceWidth,
4564                 unsigned int   WritebackDestinationWidth,
4565                 unsigned int HTotal,
4566                 unsigned int WritebackLineBufferSize,
4567                 double DISPCLKDPPCLKVCOSpeed)
4568 {
4569         double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4570
4571         DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4572         DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4573         DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4574                         WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4575         return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4576 }
4577
4578 void dml32_CalculateMinAndMaxPrefetchMode(
4579                 enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4580                 unsigned int             *MinPrefetchMode,
4581                 unsigned int             *MaxPrefetchMode)
4582 {
4583         if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4584                 *MinPrefetchMode = 3;
4585                 *MaxPrefetchMode = 3;
4586         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4587                 *MinPrefetchMode = 2;
4588                 *MaxPrefetchMode = 2;
4589         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4590                 *MinPrefetchMode = 1;
4591                 *MaxPrefetchMode = 1;
4592         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4593                 *MinPrefetchMode = 0;
4594                 *MaxPrefetchMode = 0;
4595         } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4596                         dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4597                 *MinPrefetchMode = 0;
4598                 *MaxPrefetchMode = 3;
4599         } else {
4600                 *MinPrefetchMode = 0;
4601                 *MaxPrefetchMode = 3;
4602         }
4603 } // CalculateMinAndMaxPrefetchMode
4604
4605 void dml32_CalculatePixelDeliveryTimes(
4606                 unsigned int             NumberOfActiveSurfaces,
4607                 double              VRatio[],
4608                 double              VRatioChroma[],
4609                 double              VRatioPrefetchY[],
4610                 double              VRatioPrefetchC[],
4611                 unsigned int             swath_width_luma_ub[],
4612                 unsigned int             swath_width_chroma_ub[],
4613                 unsigned int             DPPPerSurface[],
4614                 double              HRatio[],
4615                 double              HRatioChroma[],
4616                 double              PixelClock[],
4617                 double              PSCL_THROUGHPUT[],
4618                 double              PSCL_THROUGHPUT_CHROMA[],
4619                 double              Dppclk[],
4620                 unsigned int             BytePerPixelC[],
4621                 enum dm_rotation_angle   SourceRotation[],
4622                 unsigned int             NumberOfCursors[],
4623                 unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4624                 unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4625                 unsigned int             BlockWidth256BytesY[],
4626                 unsigned int             BlockHeight256BytesY[],
4627                 unsigned int             BlockWidth256BytesC[],
4628                 unsigned int             BlockHeight256BytesC[],
4629
4630                 /* Output */
4631                 double              DisplayPipeLineDeliveryTimeLuma[],
4632                 double              DisplayPipeLineDeliveryTimeChroma[],
4633                 double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4634                 double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4635                 double              DisplayPipeRequestDeliveryTimeLuma[],
4636                 double              DisplayPipeRequestDeliveryTimeChroma[],
4637                 double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4638                 double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4639                 double              CursorRequestDeliveryTime[],
4640                 double              CursorRequestDeliveryTimePrefetch[])
4641 {
4642         double   req_per_swath_ub;
4643         unsigned int k;
4644
4645         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4646
4647 #ifdef __DML_VBA_DEBUG__
4648                 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4649                 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4650                 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4651                 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4652                 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4653                 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4654                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4655                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4656                 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4657                 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4658                 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4659 #endif
4660
4661                 if (VRatio[k] <= 1) {
4662                         DisplayPipeLineDeliveryTimeLuma[k] =
4663                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4664                 } else {
4665                         DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4666                 }
4667
4668                 if (BytePerPixelC[k] == 0) {
4669                         DisplayPipeLineDeliveryTimeChroma[k] = 0;
4670                 } else {
4671                         if (VRatioChroma[k] <= 1) {
4672                                 DisplayPipeLineDeliveryTimeChroma[k] =
4673                                         swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4674                         } else {
4675                                 DisplayPipeLineDeliveryTimeChroma[k] =
4676                                         swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4677                         }
4678                 }
4679
4680                 if (VRatioPrefetchY[k] <= 1) {
4681                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4682                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4683                 } else {
4684                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4685                                         swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4686                 }
4687
4688                 if (BytePerPixelC[k] == 0) {
4689                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4690                 } else {
4691                         if (VRatioPrefetchC[k] <= 1) {
4692                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4693                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4694                         } else {
4695                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4696                                                 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4697                         }
4698                 }
4699 #ifdef __DML_VBA_DEBUG__
4700                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4701                                 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4702                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4703                                 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4704                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4705                                 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4706                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4707                                 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4708 #endif
4709         }
4710
4711         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4712                 if (!IsVertical(SourceRotation[k]))
4713                         req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4714                 else
4715                         req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4716 #ifdef __DML_VBA_DEBUG__
4717                 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4718 #endif
4719
4720                 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4721                 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4722                                 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4723                 if (BytePerPixelC[k] == 0) {
4724                         DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4725                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4726                 } else {
4727                         if (!IsVertical(SourceRotation[k]))
4728                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4729                         else
4730                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4731 #ifdef __DML_VBA_DEBUG__
4732                         dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4733 #endif
4734                         DisplayPipeRequestDeliveryTimeChroma[k] =
4735                                         DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4736                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4737                                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4738                 }
4739 #ifdef __DML_VBA_DEBUG__
4740                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4741                                 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4742                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4743                                 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4744                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4745                                 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4746                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4747                                 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4748 #endif
4749         }
4750
4751         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4752                 unsigned int cursor_req_per_width;
4753
4754                 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4755                                 256.0 / 8.0, 1.0);
4756                 if (NumberOfCursors[k] > 0) {
4757                         if (VRatio[k] <= 1) {
4758                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4759                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4760                         } else {
4761                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4762                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4763                         }
4764                         if (VRatioPrefetchY[k] <= 1) {
4765                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4766                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4767                         } else {
4768                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4769                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4770                         }
4771                 } else {
4772                         CursorRequestDeliveryTime[k] = 0;
4773                         CursorRequestDeliveryTimePrefetch[k] = 0;
4774                 }
4775 #ifdef __DML_VBA_DEBUG__
4776                 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4777                                 __func__, k, NumberOfCursors[k]);
4778                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4779                                 __func__, k, CursorRequestDeliveryTime[k]);
4780                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4781                                 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4782 #endif
4783         }
4784 } // CalculatePixelDeliveryTimes
4785
4786 void dml32_CalculateMetaAndPTETimes(
4787                 bool use_one_row_for_frame[],
4788                 unsigned int NumberOfActiveSurfaces,
4789                 bool GPUVMEnable,
4790                 unsigned int MetaChunkSize,
4791                 unsigned int MinMetaChunkSizeBytes,
4792                 unsigned int    HTotal[],
4793                 double  VRatio[],
4794                 double  VRatioChroma[],
4795                 double  DestinationLinesToRequestRowInVBlank[],
4796                 double  DestinationLinesToRequestRowInImmediateFlip[],
4797                 bool DCCEnable[],
4798                 double  PixelClock[],
4799                 unsigned int BytePerPixelY[],
4800                 unsigned int BytePerPixelC[],
4801                 enum dm_rotation_angle SourceRotation[],
4802                 unsigned int dpte_row_height[],
4803                 unsigned int dpte_row_height_chroma[],
4804                 unsigned int meta_row_width[],
4805                 unsigned int meta_row_width_chroma[],
4806                 unsigned int meta_row_height[],
4807                 unsigned int meta_row_height_chroma[],
4808                 unsigned int meta_req_width[],
4809                 unsigned int meta_req_width_chroma[],
4810                 unsigned int meta_req_height[],
4811                 unsigned int meta_req_height_chroma[],
4812                 unsigned int dpte_group_bytes[],
4813                 unsigned int    PTERequestSizeY[],
4814                 unsigned int    PTERequestSizeC[],
4815                 unsigned int    PixelPTEReqWidthY[],
4816                 unsigned int    PixelPTEReqHeightY[],
4817                 unsigned int    PixelPTEReqWidthC[],
4818                 unsigned int    PixelPTEReqHeightC[],
4819                 unsigned int    dpte_row_width_luma_ub[],
4820                 unsigned int    dpte_row_width_chroma_ub[],
4821
4822                 /* Output */
4823                 double DST_Y_PER_PTE_ROW_NOM_L[],
4824                 double DST_Y_PER_PTE_ROW_NOM_C[],
4825                 double DST_Y_PER_META_ROW_NOM_L[],
4826                 double DST_Y_PER_META_ROW_NOM_C[],
4827                 double TimePerMetaChunkNominal[],
4828                 double TimePerChromaMetaChunkNominal[],
4829                 double TimePerMetaChunkVBlank[],
4830                 double TimePerChromaMetaChunkVBlank[],
4831                 double TimePerMetaChunkFlip[],
4832                 double TimePerChromaMetaChunkFlip[],
4833                 double time_per_pte_group_nom_luma[],
4834                 double time_per_pte_group_vblank_luma[],
4835                 double time_per_pte_group_flip_luma[],
4836                 double time_per_pte_group_nom_chroma[],
4837                 double time_per_pte_group_vblank_chroma[],
4838                 double time_per_pte_group_flip_chroma[])
4839 {
4840         unsigned int   meta_chunk_width;
4841         unsigned int   min_meta_chunk_width;
4842         unsigned int   meta_chunk_per_row_int;
4843         unsigned int   meta_row_remainder;
4844         unsigned int   meta_chunk_threshold;
4845         unsigned int   meta_chunks_per_row_ub;
4846         unsigned int   meta_chunk_width_chroma;
4847         unsigned int   min_meta_chunk_width_chroma;
4848         unsigned int   meta_chunk_per_row_int_chroma;
4849         unsigned int   meta_row_remainder_chroma;
4850         unsigned int   meta_chunk_threshold_chroma;
4851         unsigned int   meta_chunks_per_row_ub_chroma;
4852         unsigned int   dpte_group_width_luma;
4853         unsigned int   dpte_groups_per_row_luma_ub;
4854         unsigned int   dpte_group_width_chroma;
4855         unsigned int   dpte_groups_per_row_chroma_ub;
4856         unsigned int k;
4857
4858         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4859                 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4860                 if (BytePerPixelC[k] == 0)
4861                         DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4862                 else
4863                         DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4864                 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4865                 if (BytePerPixelC[k] == 0)
4866                         DST_Y_PER_META_ROW_NOM_C[k] = 0;
4867                 else
4868                         DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4869         }
4870
4871         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4872                 if (DCCEnable[k] == true) {
4873                         meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4874                         min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4875                         meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4876                         meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4877                         if (!IsVertical(SourceRotation[k]))
4878                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4879                         else
4880                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4881
4882                         if (meta_row_remainder <= meta_chunk_threshold)
4883                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4884                         else
4885                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4886
4887                         TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4888                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4889                         TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4890                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4891                         TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4892                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4893                         if (BytePerPixelC[k] == 0) {
4894                                 TimePerChromaMetaChunkNominal[k] = 0;
4895                                 TimePerChromaMetaChunkVBlank[k] = 0;
4896                                 TimePerChromaMetaChunkFlip[k] = 0;
4897                         } else {
4898                                 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4899                                                 meta_row_height_chroma[k];
4900                                 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4901                                                 meta_row_height_chroma[k];
4902                                 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4903                                                 meta_chunk_width_chroma;
4904                                 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4905                                 if (!IsVertical(SourceRotation[k])) {
4906                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4907                                                         meta_req_width_chroma[k];
4908                                 } else {
4909                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4910                                                         meta_req_height_chroma[k];
4911                                 }
4912                                 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4913                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4914                                 else
4915                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4916
4917                                 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4918                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4919                                 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4920                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4921                                 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4922                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4923                         }
4924                 } else {
4925                         TimePerMetaChunkNominal[k] = 0;
4926                         TimePerMetaChunkVBlank[k] = 0;
4927                         TimePerMetaChunkFlip[k] = 0;
4928                         TimePerChromaMetaChunkNominal[k] = 0;
4929                         TimePerChromaMetaChunkVBlank[k] = 0;
4930                         TimePerChromaMetaChunkFlip[k] = 0;
4931                 }
4932         }
4933
4934         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4935                 if (GPUVMEnable == true) {
4936                         if (!IsVertical(SourceRotation[k])) {
4937                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4938                                                 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4939                         } else {
4940                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4941                                                 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4942                         }
4943
4944                         if (use_one_row_for_frame[k]) {
4945                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4946                                                 (double) dpte_group_width_luma / 2.0, 1.0);
4947                         } else {
4948                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4949                                                 (double) dpte_group_width_luma, 1.0);
4950                         }
4951 #ifdef __DML_VBA_DEBUG__
4952                         dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
4953                                         __func__, k, use_one_row_for_frame[k]);
4954                         dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
4955                                         __func__, k, dpte_group_bytes[k]);
4956                         dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
4957                                         __func__, k, PTERequestSizeY[k]);
4958                         dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
4959                                         __func__, k, PixelPTEReqWidthY[k]);
4960                         dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
4961                                         __func__, k, PixelPTEReqHeightY[k]);
4962                         dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
4963                                         __func__, k, dpte_row_width_luma_ub[k]);
4964                         dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
4965                                         __func__, k, dpte_group_width_luma);
4966                         dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
4967                                         __func__, k, dpte_groups_per_row_luma_ub);
4968 #endif
4969
4970                         time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
4971                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
4972                         time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
4973                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
4974                         time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4975                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
4976                         if (BytePerPixelC[k] == 0) {
4977                                 time_per_pte_group_nom_chroma[k] = 0;
4978                                 time_per_pte_group_vblank_chroma[k] = 0;
4979                                 time_per_pte_group_flip_chroma[k] = 0;
4980                         } else {
4981                                 if (!IsVertical(SourceRotation[k])) {
4982                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
4983                                                         (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
4984                                 } else {
4985                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
4986                                                         (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
4987                                 }
4988
4989                                 if (use_one_row_for_frame[k]) {
4990                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
4991                                                         (double) dpte_group_width_chroma / 2.0, 1.0);
4992                                 } else {
4993                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
4994                                                         (double) dpte_group_width_chroma, 1.0);
4995                                 }
4996 #ifdef __DML_VBA_DEBUG__
4997                                 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
4998                                                 __func__, k, dpte_row_width_chroma_ub[k]);
4999                                 dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5000                                                 __func__, k, dpte_group_width_chroma);
5001                                 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5002                                                 __func__, k, dpte_groups_per_row_chroma_ub);
5003 #endif
5004                                 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5005                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5006                                 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5007                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5008                                 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5009                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5010                         }
5011                 } else {
5012                         time_per_pte_group_nom_luma[k] = 0;
5013                         time_per_pte_group_vblank_luma[k] = 0;
5014                         time_per_pte_group_flip_luma[k] = 0;
5015                         time_per_pte_group_nom_chroma[k] = 0;
5016                         time_per_pte_group_vblank_chroma[k] = 0;
5017                         time_per_pte_group_flip_chroma[k] = 0;
5018                 }
5019 #ifdef __DML_VBA_DEBUG__
5020                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5021                                 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5022                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5023                                 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5024                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5025                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5026                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5027                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5028                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5029                                 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5030                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5031                                 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5032                 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5033                                 __func__, k, TimePerMetaChunkNominal[k]);
5034                 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5035                                 __func__, k, TimePerMetaChunkVBlank[k]);
5036                 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5037                                 __func__, k, TimePerMetaChunkFlip[k]);
5038                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5039                                 __func__, k, TimePerChromaMetaChunkNominal[k]);
5040                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5041                                 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5042                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5043                                 __func__, k, TimePerChromaMetaChunkFlip[k]);
5044                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5045                                 __func__, k, time_per_pte_group_nom_luma[k]);
5046                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5047                                 __func__, k, time_per_pte_group_vblank_luma[k]);
5048                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5049                                 __func__, k, time_per_pte_group_flip_luma[k]);
5050                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5051                                 __func__, k, time_per_pte_group_nom_chroma[k]);
5052                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5053                                 __func__, k, time_per_pte_group_vblank_chroma[k]);
5054                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5055                                 __func__, k, time_per_pte_group_flip_chroma[k]);
5056 #endif
5057         }
5058 } // CalculateMetaAndPTETimes
5059
5060 void dml32_CalculateVMGroupAndRequestTimes(
5061                 unsigned int     NumberOfActiveSurfaces,
5062                 bool     GPUVMEnable,
5063                 unsigned int     GPUVMMaxPageTableLevels,
5064                 unsigned int     HTotal[],
5065                 unsigned int     BytePerPixelC[],
5066                 double      DestinationLinesToRequestVMInVBlank[],
5067                 double      DestinationLinesToRequestVMInImmediateFlip[],
5068                 bool     DCCEnable[],
5069                 double      PixelClock[],
5070                 unsigned int        dpte_row_width_luma_ub[],
5071                 unsigned int        dpte_row_width_chroma_ub[],
5072                 unsigned int     vm_group_bytes[],
5073                 unsigned int     dpde0_bytes_per_frame_ub_l[],
5074                 unsigned int     dpde0_bytes_per_frame_ub_c[],
5075                 unsigned int        meta_pte_bytes_per_frame_ub_l[],
5076                 unsigned int        meta_pte_bytes_per_frame_ub_c[],
5077
5078                 /* Output */
5079                 double      TimePerVMGroupVBlank[],
5080                 double      TimePerVMGroupFlip[],
5081                 double      TimePerVMRequestVBlank[],
5082                 double      TimePerVMRequestFlip[])
5083 {
5084         unsigned int k;
5085         unsigned int   num_group_per_lower_vm_stage;
5086         unsigned int   num_req_per_lower_vm_stage;
5087
5088 #ifdef __DML_VBA_DEBUG__
5089         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5090         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5091 #endif
5092         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5093
5094 #ifdef __DML_VBA_DEBUG__
5095                 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5096                 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5097                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5098                                 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5099                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5100                                 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5101                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5102                                 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5103                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5104                                 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5105 #endif
5106
5107                 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5108                         if (DCCEnable[k] == false) {
5109                                 if (BytePerPixelC[k] > 0) {
5110                                         num_group_per_lower_vm_stage = dml_ceil(
5111                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5112                                                         (double) (vm_group_bytes[k]), 1.0) +
5113                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5114                                                         (double) (vm_group_bytes[k]), 1.0);
5115                                 } else {
5116                                         num_group_per_lower_vm_stage = dml_ceil(
5117                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5118                                                         (double) (vm_group_bytes[k]), 1.0);
5119                                 }
5120                         } else {
5121                                 if (GPUVMMaxPageTableLevels == 1) {
5122                                         if (BytePerPixelC[k] > 0) {
5123                                                 num_group_per_lower_vm_stage = dml_ceil(
5124                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5125                                                         (double) (vm_group_bytes[k]), 1.0) +
5126                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5127                                                         (double) (vm_group_bytes[k]), 1.0);
5128                                         } else {
5129                                                 num_group_per_lower_vm_stage = dml_ceil(
5130                                                                 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5131                                                                 (double) (vm_group_bytes[k]), 1.0);
5132                                         }
5133                                 } else {
5134                                         if (BytePerPixelC[k] > 0) {
5135                                                 num_group_per_lower_vm_stage = 2 + dml_ceil(
5136                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5137                                                         (double) (vm_group_bytes[k]), 1) +
5138                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5139                                                         (double) (vm_group_bytes[k]), 1) +
5140                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5141                                                         (double) (vm_group_bytes[k]), 1) +
5142                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5143                                                         (double) (vm_group_bytes[k]), 1);
5144                                         } else {
5145                                                 num_group_per_lower_vm_stage = 1 + dml_ceil(
5146                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5147                                                         (double) (vm_group_bytes[k]), 1) + dml_ceil(
5148                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5149                                                         (double) (vm_group_bytes[k]), 1);
5150                                         }
5151                                 }
5152                         }
5153
5154                         if (DCCEnable[k] == false) {
5155                                 if (BytePerPixelC[k] > 0) {
5156                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5157                                                         dpde0_bytes_per_frame_ub_c[k] / 64;
5158                                 } else {
5159                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5160                                 }
5161                         } else {
5162                                 if (GPUVMMaxPageTableLevels == 1) {
5163                                         if (BytePerPixelC[k] > 0) {
5164                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5165                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5166                                         } else {
5167                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5168                                         }
5169                                 } else {
5170                                         if (BytePerPixelC[k] > 0) {
5171                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5172                                                                 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5173                                                                 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5174                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5175                                         } else {
5176                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5177                                                                 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5178                                         }
5179                                 }
5180                         }
5181
5182                         TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5183                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5184                         TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5185                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5186                         TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5187                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5188                         TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5189                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5190
5191                         if (GPUVMMaxPageTableLevels > 2) {
5192                                 TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5193                                 TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5194                                 TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5195                                 TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5196                         }
5197
5198                 } else {
5199                         TimePerVMGroupVBlank[k] = 0;
5200                         TimePerVMGroupFlip[k] = 0;
5201                         TimePerVMRequestVBlank[k] = 0;
5202                         TimePerVMRequestFlip[k] = 0;
5203                 }
5204
5205 #ifdef __DML_VBA_DEBUG__
5206                 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5207                 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5208                 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5209                 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5210 #endif
5211         }
5212 } // CalculateVMGroupAndRequestTimes
5213
5214 void dml32_CalculateDCCConfiguration(
5215                 bool             DCCEnabled,
5216                 bool             DCCProgrammingAssumesScanDirectionUnknown,
5217                 enum source_format_class SourcePixelFormat,
5218                 unsigned int             SurfaceWidthLuma,
5219                 unsigned int             SurfaceWidthChroma,
5220                 unsigned int             SurfaceHeightLuma,
5221                 unsigned int             SurfaceHeightChroma,
5222                 unsigned int                nomDETInKByte,
5223                 unsigned int             RequestHeight256ByteLuma,
5224                 unsigned int             RequestHeight256ByteChroma,
5225                 enum dm_swizzle_mode     TilingFormat,
5226                 unsigned int             BytePerPixelY,
5227                 unsigned int             BytePerPixelC,
5228                 double              BytePerPixelDETY,
5229                 double              BytePerPixelDETC,
5230                 enum dm_rotation_angle   SourceRotation,
5231                 /* Output */
5232                 unsigned int        *MaxUncompressedBlockLuma,
5233                 unsigned int        *MaxUncompressedBlockChroma,
5234                 unsigned int        *MaxCompressedBlockLuma,
5235                 unsigned int        *MaxCompressedBlockChroma,
5236                 unsigned int        *IndependentBlockLuma,
5237                 unsigned int        *IndependentBlockChroma)
5238 {
5239         typedef enum {
5240                 REQ_256Bytes,
5241                 REQ_128BytesNonContiguous,
5242                 REQ_128BytesContiguous,
5243                 REQ_NA
5244         } RequestType;
5245
5246         RequestType   RequestLuma;
5247         RequestType   RequestChroma;
5248
5249         unsigned int   segment_order_horz_contiguous_luma;
5250         unsigned int   segment_order_horz_contiguous_chroma;
5251         unsigned int   segment_order_vert_contiguous_luma;
5252         unsigned int   segment_order_vert_contiguous_chroma;
5253         unsigned int req128_horz_wc_l;
5254         unsigned int req128_horz_wc_c;
5255         unsigned int req128_vert_wc_l;
5256         unsigned int req128_vert_wc_c;
5257         unsigned int MAS_vp_horz_limit;
5258         unsigned int MAS_vp_vert_limit;
5259         unsigned int max_vp_horz_width;
5260         unsigned int max_vp_vert_height;
5261         unsigned int eff_surf_width_l;
5262         unsigned int eff_surf_width_c;
5263         unsigned int eff_surf_height_l;
5264         unsigned int eff_surf_height_c;
5265         unsigned int full_swath_bytes_horz_wc_l;
5266         unsigned int full_swath_bytes_horz_wc_c;
5267         unsigned int full_swath_bytes_vert_wc_l;
5268         unsigned int full_swath_bytes_vert_wc_c;
5269         unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5270
5271         unsigned int   yuv420;
5272         unsigned int   horz_div_l;
5273         unsigned int   horz_div_c;
5274         unsigned int   vert_div_l;
5275         unsigned int   vert_div_c;
5276
5277         unsigned int     swath_buf_size;
5278         double   detile_buf_vp_horz_limit;
5279         double   detile_buf_vp_vert_limit;
5280
5281         yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5282                         SourcePixelFormat == dm_420_12) ? 1 : 0);
5283         horz_div_l = 1;
5284         horz_div_c = 1;
5285         vert_div_l = 1;
5286         vert_div_c = 1;
5287
5288         if (BytePerPixelY == 1)
5289                 vert_div_l = 0;
5290         if (BytePerPixelC == 1)
5291                 vert_div_c = 0;
5292
5293         if (BytePerPixelC == 0) {
5294                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5295                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5296                                 BytePerPixelY / (1 + horz_div_l));
5297                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5298                                 (1 + vert_div_l));
5299         } else {
5300                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5301                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5302                                 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5303                                 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5304                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5305                                 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5306                                 (1 + vert_div_c) / (1 + yuv420));
5307         }
5308
5309         if (SourcePixelFormat == dm_420_10) {
5310                 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5311                 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5312         }
5313
5314         detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5315         detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5316
5317         MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5318         MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5319         max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5320         max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5321         eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5322         eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5323         eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5324         eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5325
5326         full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5327         full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5328         if (BytePerPixelC > 0) {
5329                 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5330                 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5331         } else {
5332                 full_swath_bytes_horz_wc_c = 0;
5333                 full_swath_bytes_vert_wc_c = 0;
5334         }
5335
5336         if (SourcePixelFormat == dm_420_10) {
5337                 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5338                 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5339                 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5340                 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5341         }
5342
5343         if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5344                 req128_horz_wc_l = 0;
5345                 req128_horz_wc_c = 0;
5346         } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5347                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5348                 req128_horz_wc_l = 0;
5349                 req128_horz_wc_c = 1;
5350         } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5351                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5352                 req128_horz_wc_l = 1;
5353                 req128_horz_wc_c = 0;
5354         } else {
5355                 req128_horz_wc_l = 1;
5356                 req128_horz_wc_c = 1;
5357         }
5358
5359         if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5360                 req128_vert_wc_l = 0;
5361                 req128_vert_wc_c = 0;
5362         } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5363                         full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5364                 req128_vert_wc_l = 0;
5365                 req128_vert_wc_c = 1;
5366         } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5367                         full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5368                 req128_vert_wc_l = 1;
5369                 req128_vert_wc_c = 0;
5370         } else {
5371                 req128_vert_wc_l = 1;
5372                 req128_vert_wc_c = 1;
5373         }
5374
5375         if (BytePerPixelY == 2) {
5376                 segment_order_horz_contiguous_luma = 0;
5377                 segment_order_vert_contiguous_luma = 1;
5378         } else {
5379                 segment_order_horz_contiguous_luma = 1;
5380                 segment_order_vert_contiguous_luma = 0;
5381         }
5382
5383         if (BytePerPixelC == 2) {
5384                 segment_order_horz_contiguous_chroma = 0;
5385                 segment_order_vert_contiguous_chroma = 1;
5386         } else {
5387                 segment_order_horz_contiguous_chroma = 1;
5388                 segment_order_vert_contiguous_chroma = 0;
5389         }
5390 #ifdef __DML_VBA_DEBUG__
5391         dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5392         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5393         dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5394         dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5395         dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5396         dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5397         dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5398         dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5399         dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5400                         __func__, segment_order_horz_contiguous_chroma);
5401 #endif
5402
5403         if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5404                 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5405                         RequestLuma = REQ_256Bytes;
5406                 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5407                                 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5408                         RequestLuma = REQ_128BytesNonContiguous;
5409                 else
5410                         RequestLuma = REQ_128BytesContiguous;
5411
5412                 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5413                         RequestChroma = REQ_256Bytes;
5414                 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5415                                 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5416                         RequestChroma = REQ_128BytesNonContiguous;
5417                 else
5418                         RequestChroma = REQ_128BytesContiguous;
5419
5420         } else if (!IsVertical(SourceRotation)) {
5421                 if (req128_horz_wc_l == 0)
5422                         RequestLuma = REQ_256Bytes;
5423                 else if (segment_order_horz_contiguous_luma == 0)
5424                         RequestLuma = REQ_128BytesNonContiguous;
5425                 else
5426                         RequestLuma = REQ_128BytesContiguous;
5427
5428                 if (req128_horz_wc_c == 0)
5429                         RequestChroma = REQ_256Bytes;
5430                 else if (segment_order_horz_contiguous_chroma == 0)
5431                         RequestChroma = REQ_128BytesNonContiguous;
5432                 else
5433                         RequestChroma = REQ_128BytesContiguous;
5434
5435         } else {
5436                 if (req128_vert_wc_l == 0)
5437                         RequestLuma = REQ_256Bytes;
5438                 else if (segment_order_vert_contiguous_luma == 0)
5439                         RequestLuma = REQ_128BytesNonContiguous;
5440                 else
5441                         RequestLuma = REQ_128BytesContiguous;
5442
5443                 if (req128_vert_wc_c == 0)
5444                         RequestChroma = REQ_256Bytes;
5445                 else if (segment_order_vert_contiguous_chroma == 0)
5446                         RequestChroma = REQ_128BytesNonContiguous;
5447                 else
5448                         RequestChroma = REQ_128BytesContiguous;
5449         }
5450
5451         if (RequestLuma == REQ_256Bytes) {
5452                 *MaxUncompressedBlockLuma = 256;
5453                 *MaxCompressedBlockLuma = 256;
5454                 *IndependentBlockLuma = 0;
5455         } else if (RequestLuma == REQ_128BytesContiguous) {
5456                 *MaxUncompressedBlockLuma = 256;
5457                 *MaxCompressedBlockLuma = 128;
5458                 *IndependentBlockLuma = 128;
5459         } else {
5460                 *MaxUncompressedBlockLuma = 256;
5461                 *MaxCompressedBlockLuma = 64;
5462                 *IndependentBlockLuma = 64;
5463         }
5464
5465         if (RequestChroma == REQ_256Bytes) {
5466                 *MaxUncompressedBlockChroma = 256;
5467                 *MaxCompressedBlockChroma = 256;
5468                 *IndependentBlockChroma = 0;
5469         } else if (RequestChroma == REQ_128BytesContiguous) {
5470                 *MaxUncompressedBlockChroma = 256;
5471                 *MaxCompressedBlockChroma = 128;
5472                 *IndependentBlockChroma = 128;
5473         } else {
5474                 *MaxUncompressedBlockChroma = 256;
5475                 *MaxCompressedBlockChroma = 64;
5476                 *IndependentBlockChroma = 64;
5477         }
5478
5479         if (DCCEnabled != true || BytePerPixelC == 0) {
5480                 *MaxUncompressedBlockChroma = 0;
5481                 *MaxCompressedBlockChroma = 0;
5482                 *IndependentBlockChroma = 0;
5483         }
5484
5485         if (DCCEnabled != true) {
5486                 *MaxUncompressedBlockLuma = 0;
5487                 *MaxCompressedBlockLuma = 0;
5488                 *IndependentBlockLuma = 0;
5489         }
5490
5491 #ifdef __DML_VBA_DEBUG__
5492         dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5493         dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5494         dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5495         dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5496         dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5497         dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5498 #endif
5499
5500 } // CalculateDCCConfiguration
5501
5502 void dml32_CalculateStutterEfficiency(
5503                 unsigned int      CompressedBufferSizeInkByte,
5504                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5505                 bool   UnboundedRequestEnabled,
5506                 unsigned int      MetaFIFOSizeInKEntries,
5507                 unsigned int      ZeroSizeBufferEntries,
5508                 unsigned int      PixelChunkSizeInKByte,
5509                 unsigned int   NumberOfActiveSurfaces,
5510                 unsigned int      ROBBufferSizeInKByte,
5511                 double    TotalDataReadBandwidth,
5512                 double    DCFCLK,
5513                 double    ReturnBW,
5514                 unsigned int      CompbufReservedSpace64B,
5515                 unsigned int      CompbufReservedSpaceZs,
5516                 double    SRExitTime,
5517                 double    SRExitZ8Time,
5518                 bool   SynchronizeTimingsFinal,
5519                 unsigned int   BlendingAndTiming[],
5520                 double    StutterEnterPlusExitWatermark,
5521                 double    Z8StutterEnterPlusExitWatermark,
5522                 bool   ProgressiveToInterlaceUnitInOPP,
5523                 bool   Interlace[],
5524                 double    MinTTUVBlank[],
5525                 unsigned int   DPPPerSurface[],
5526                 unsigned int      DETBufferSizeY[],
5527                 unsigned int   BytePerPixelY[],
5528                 double    BytePerPixelDETY[],
5529                 double      SwathWidthY[],
5530                 unsigned int   SwathHeightY[],
5531                 unsigned int   SwathHeightC[],
5532                 double    NetDCCRateLuma[],
5533                 double    NetDCCRateChroma[],
5534                 double    DCCFractionOfZeroSizeRequestsLuma[],
5535                 double    DCCFractionOfZeroSizeRequestsChroma[],
5536                 unsigned int      HTotal[],
5537                 unsigned int      VTotal[],
5538                 double    PixelClock[],
5539                 double    VRatio[],
5540                 enum dm_rotation_angle SourceRotation[],
5541                 unsigned int   BlockHeight256BytesY[],
5542                 unsigned int   BlockWidth256BytesY[],
5543                 unsigned int   BlockHeight256BytesC[],
5544                 unsigned int   BlockWidth256BytesC[],
5545                 unsigned int   DCCYMaxUncompressedBlock[],
5546                 unsigned int   DCCCMaxUncompressedBlock[],
5547                 unsigned int      VActive[],
5548                 bool   DCCEnable[],
5549                 bool   WritebackEnable[],
5550                 double    ReadBandwidthSurfaceLuma[],
5551                 double    ReadBandwidthSurfaceChroma[],
5552                 double    meta_row_bw[],
5553                 double    dpte_row_bw[],
5554
5555                 /* Output */
5556                 double   *StutterEfficiencyNotIncludingVBlank,
5557                 double   *StutterEfficiency,
5558                 unsigned int     *NumberOfStutterBurstsPerFrame,
5559                 double   *Z8StutterEfficiencyNotIncludingVBlank,
5560                 double   *Z8StutterEfficiency,
5561                 unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5562                 double   *StutterPeriod,
5563                 bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5564 {
5565
5566         bool FoundCriticalSurface = false;
5567         unsigned int SwathSizeCriticalSurface = 0;
5568         unsigned int LastChunkOfSwathSize;
5569         unsigned int MissingPartOfLastSwathOfDETSize;
5570         double LastZ8StutterPeriod = 0.0;
5571         double LastStutterPeriod = 0.0;
5572         unsigned int TotalNumberOfActiveOTG = 0;
5573         double doublePixelClock;
5574         unsigned int doubleHTotal;
5575         unsigned int doubleVTotal;
5576         bool SameTiming = true;
5577         double DETBufferingTimeY;
5578         double SwathWidthYCriticalSurface = 0.0;
5579         double SwathHeightYCriticalSurface = 0.0;
5580         double VActiveTimeCriticalSurface = 0.0;
5581         double FrameTimeCriticalSurface = 0.0;
5582         unsigned int BytePerPixelYCriticalSurface = 0;
5583         double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5584         unsigned int DETBufferSizeYCriticalSurface = 0;
5585         double MinTTUVBlankCriticalSurface = 0.0;
5586         unsigned int BlockWidth256BytesYCriticalSurface = 0;
5587         bool doublePlaneCriticalSurface = 0;
5588         bool doublePipeCriticalSurface = 0;
5589         double TotalCompressedReadBandwidth;
5590         double TotalRowReadBandwidth;
5591         double AverageDCCCompressionRate;
5592         double EffectiveCompressedBufferSize;
5593         double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5594         double StutterBurstTime;
5595         unsigned int TotalActiveWriteback;
5596         double LinesInDETY;
5597         double LinesInDETYRoundedDownToSwath;
5598         double MaximumEffectiveCompressionLuma;
5599         double MaximumEffectiveCompressionChroma;
5600         double TotalZeroSizeRequestReadBandwidth;
5601         double TotalZeroSizeCompressedReadBandwidth;
5602         double AverageDCCZeroSizeFraction;
5603         double AverageZeroSizeCompressionRate;
5604         unsigned int k;
5605
5606         TotalZeroSizeRequestReadBandwidth = 0;
5607         TotalZeroSizeCompressedReadBandwidth = 0;
5608         TotalRowReadBandwidth = 0;
5609         TotalCompressedReadBandwidth = 0;
5610
5611         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5612                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5613                         if (DCCEnable[k] == true) {
5614                                 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5615                                                 || (!IsVertical(SourceRotation[k])
5616                                                                 && BlockHeight256BytesY[k] > SwathHeightY[k])
5617                                                 || DCCYMaxUncompressedBlock[k] < 256) {
5618                                         MaximumEffectiveCompressionLuma = 2;
5619                                 } else {
5620                                         MaximumEffectiveCompressionLuma = 4;
5621                                 }
5622                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5623                                                 + ReadBandwidthSurfaceLuma[k]
5624                                                                 / dml_min(NetDCCRateLuma[k],
5625                                                                                 MaximumEffectiveCompressionLuma);
5626 #ifdef __DML_VBA_DEBUG__
5627                                 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5628                                                 __func__, k, ReadBandwidthSurfaceLuma[k]);
5629                                 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5630                                                 __func__, k, NetDCCRateLuma[k]);
5631                                 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5632                                                 __func__, k, MaximumEffectiveCompressionLuma);
5633 #endif
5634                                 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5635                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5636                                 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5637                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5638                                                                 / MaximumEffectiveCompressionLuma;
5639
5640                                 if (ReadBandwidthSurfaceChroma[k] > 0) {
5641                                         if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5642                                                         || (!IsVertical(SourceRotation[k])
5643                                                                         && BlockHeight256BytesC[k] > SwathHeightC[k])
5644                                                         || DCCCMaxUncompressedBlock[k] < 256) {
5645                                                 MaximumEffectiveCompressionChroma = 2;
5646                                         } else {
5647                                                 MaximumEffectiveCompressionChroma = 4;
5648                                         }
5649                                         TotalCompressedReadBandwidth =
5650                                                         TotalCompressedReadBandwidth
5651                                                         + ReadBandwidthSurfaceChroma[k]
5652                                                         / dml_min(NetDCCRateChroma[k],
5653                                                         MaximumEffectiveCompressionChroma);
5654 #ifdef __DML_VBA_DEBUG__
5655                                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5656                                                         __func__, k, ReadBandwidthSurfaceChroma[k]);
5657                                         dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5658                                                         __func__, k, NetDCCRateChroma[k]);
5659                                         dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5660                                                         __func__, k, MaximumEffectiveCompressionChroma);
5661 #endif
5662                                         TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5663                                                         + ReadBandwidthSurfaceChroma[k]
5664                                                                         * DCCFractionOfZeroSizeRequestsChroma[k];
5665                                         TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5666                                                         + ReadBandwidthSurfaceChroma[k]
5667                                                                         * DCCFractionOfZeroSizeRequestsChroma[k]
5668                                                                         / MaximumEffectiveCompressionChroma;
5669                                 }
5670                         } else {
5671                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5672                                                 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5673                         }
5674                         TotalRowReadBandwidth = TotalRowReadBandwidth
5675                                         + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5676                 }
5677         }
5678
5679         AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5680         AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5681
5682 #ifdef __DML_VBA_DEBUG__
5683         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5684         dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5685         dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5686         dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5687                         __func__, TotalZeroSizeCompressedReadBandwidth);
5688         dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5689         dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5690         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5691         dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5692         dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5693         dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5694         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5695 #endif
5696         if (AverageDCCZeroSizeFraction == 1) {
5697                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5698                                 / TotalZeroSizeCompressedReadBandwidth;
5699                 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5700                                 * AverageZeroSizeCompressionRate
5701                                 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5702                                                 * AverageZeroSizeCompressionRate;
5703         } else if (AverageDCCZeroSizeFraction > 0) {
5704                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5705                                 / TotalZeroSizeCompressedReadBandwidth;
5706                 EffectiveCompressedBufferSize = dml_min(
5707                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5708                                 (double) MetaFIFOSizeInKEntries * 1024 * 64
5709                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5710                                         + 1 / AverageDCCCompressionRate))
5711                                         + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5712                                         * AverageDCCCompressionRate,
5713                                         ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5714                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5715
5716 #ifdef __DML_VBA_DEBUG__
5717                 dml_print("DML::%s: min 1 = %f\n", __func__,
5718                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5719                 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5720                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5721                                                 AverageDCCCompressionRate));
5722                 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5723                                 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5724                 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5725                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5726 #endif
5727         } else {
5728                 EffectiveCompressedBufferSize = dml_min(
5729                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5730                                 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5731                                 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5732                                                 * AverageDCCCompressionRate;
5733
5734 #ifdef __DML_VBA_DEBUG__
5735                 dml_print("DML::%s: min 1 = %f\n", __func__,
5736                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5737                 dml_print("DML::%s: min 2 = %f\n", __func__,
5738                                 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5739 #endif
5740         }
5741
5742 #ifdef __DML_VBA_DEBUG__
5743         dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5744         dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5745         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5746 #endif
5747
5748         *StutterPeriod = 0;
5749
5750         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5751                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5752                         LinesInDETY = ((double) DETBufferSizeY[k]
5753                                         + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5754                                                         * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5755                                         / BytePerPixelDETY[k] / SwathWidthY[k];
5756                         LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5757                         DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5758                                         / VRatio[k];
5759 #ifdef __DML_VBA_DEBUG__
5760                         dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5761                         dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5762                         dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5763                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5764                                         __func__, k, ReadBandwidthSurfaceLuma[k]);
5765                         dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5766                         dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5767                         dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5768                                         __func__, k, LinesInDETYRoundedDownToSwath);
5769                         dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5770                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5771                         dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5772                         dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5773                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5774 #endif
5775
5776                         if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5777                                 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5778
5779                                 FoundCriticalSurface = true;
5780                                 *StutterPeriod = DETBufferingTimeY;
5781                                 FrameTimeCriticalSurface = (
5782                                                 isInterlaceTiming ?
5783                                                                 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5784                                                 * (double) HTotal[k] / PixelClock[k];
5785                                 VActiveTimeCriticalSurface = (
5786                                                 isInterlaceTiming ?
5787                                                                 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5788                                                 * (double) HTotal[k] / PixelClock[k];
5789                                 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5790                                 SwathWidthYCriticalSurface = SwathWidthY[k];
5791                                 SwathHeightYCriticalSurface = SwathHeightY[k];
5792                                 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5793                                 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5794                                                 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5795                                 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5796                                 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5797                                 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5798                                 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5799
5800 #ifdef __DML_VBA_DEBUG__
5801                                 dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5802                                                 __func__, k, FoundCriticalSurface);
5803                                 dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5804                                                 __func__, k, *StutterPeriod);
5805                                 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5806                                                 __func__, k, MinTTUVBlankCriticalSurface);
5807                                 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5808                                                 __func__, k, FrameTimeCriticalSurface);
5809                                 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5810                                                 __func__, k, VActiveTimeCriticalSurface);
5811                                 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5812                                                 __func__, k, BytePerPixelYCriticalSurface);
5813                                 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5814                                                 __func__, k, SwathWidthYCriticalSurface);
5815                                 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5816                                                 __func__, k, SwathHeightYCriticalSurface);
5817                                 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5818                                                 __func__, k, BlockWidth256BytesYCriticalSurface);
5819                                 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5820                                                 __func__, k, doublePlaneCriticalSurface);
5821                                 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5822                                                 __func__, k, doublePipeCriticalSurface);
5823                                 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5824                                                 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5825 #endif
5826                         }
5827                 }
5828         }
5829
5830         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5831                         EffectiveCompressedBufferSize);
5832 #ifdef __DML_VBA_DEBUG__
5833         dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5834         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5835         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5836                         __func__, *StutterPeriod * TotalDataReadBandwidth);
5837         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5838         dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5839                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5840         dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5841         dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5842         dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5843         dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5844 #endif
5845
5846         StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5847                         / ReturnBW
5848                         + (*StutterPeriod * TotalDataReadBandwidth
5849                                         - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5850                         + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5851 #ifdef __DML_VBA_DEBUG__
5852         dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5853                         AverageDCCCompressionRate / ReturnBW);
5854         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5855                         __func__, (*StutterPeriod * TotalDataReadBandwidth));
5856         dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5857                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5858         dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5859         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5860 #endif
5861         StutterBurstTime = dml_max(StutterBurstTime,
5862                         LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5863                                         * SwathWidthYCriticalSurface / ReturnBW);
5864
5865 #ifdef __DML_VBA_DEBUG__
5866         dml_print("DML::%s: Time to finish residue swath=%f\n",
5867                         __func__,
5868                         LinesToFinishSwathTransferStutterCriticalSurface *
5869                         BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5870 #endif
5871
5872         TotalActiveWriteback = 0;
5873         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5874                 if (WritebackEnable[k])
5875                         TotalActiveWriteback = TotalActiveWriteback + 1;
5876         }
5877
5878         if (TotalActiveWriteback == 0) {
5879 #ifdef __DML_VBA_DEBUG__
5880                 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5881                 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5882                 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5883                 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5884 #endif
5885                 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5886                                 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5887                 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5888                                 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5889                 *NumberOfStutterBurstsPerFrame = (
5890                                 *StutterEfficiencyNotIncludingVBlank > 0 ?
5891                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5892                 *Z8NumberOfStutterBurstsPerFrame = (
5893                                 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5894                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5895         } else {
5896                 *StutterEfficiencyNotIncludingVBlank = 0.;
5897                 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5898                 *NumberOfStutterBurstsPerFrame = 0;
5899                 *Z8NumberOfStutterBurstsPerFrame = 0;
5900         }
5901 #ifdef __DML_VBA_DEBUG__
5902         dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5903         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5904                         __func__, *StutterEfficiencyNotIncludingVBlank);
5905         dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5906                         __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5907         dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5908         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5909 #endif
5910
5911         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5912                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5913                         if (BlendingAndTiming[k] == k) {
5914                                 if (TotalNumberOfActiveOTG == 0) {
5915                                         doublePixelClock = PixelClock[k];
5916                                         doubleHTotal = HTotal[k];
5917                                         doubleVTotal = VTotal[k];
5918                                 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5919                                                 || doubleVTotal != VTotal[k]) {
5920                                         SameTiming = false;
5921                                 }
5922                                 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5923                         }
5924                 }
5925         }
5926
5927         if (*StutterEfficiencyNotIncludingVBlank > 0) {
5928                 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5929
5930                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5931                                 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5932                         *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5933                                                 + StutterBurstTime * VActiveTimeCriticalSurface
5934                                                 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5935                 } else {
5936                         *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5937                 }
5938         } else {
5939                 *StutterEfficiency = 0;
5940         }
5941
5942         if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5943                 LastZ8StutterPeriod = VActiveTimeCriticalSurface
5944                                 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5945                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
5946                                 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
5947                         *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
5948                                 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5949                 } else {
5950                         *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
5951                 }
5952         } else {
5953                 *Z8StutterEfficiency = 0.;
5954         }
5955
5956 #ifdef __DML_VBA_DEBUG__
5957         dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
5958         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
5959         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5960         dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5961         dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
5962         dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
5963         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5964                         __func__, *StutterEfficiencyNotIncludingVBlank);
5965         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5966 #endif
5967
5968         SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
5969                         * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
5970         LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
5971         MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
5972                         - DETBufferSizeYCriticalSurface;
5973
5974         *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
5975                         && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
5976                         && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
5977                         && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
5978
5979 #ifdef __DML_VBA_DEBUG__
5980         dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
5981         dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
5982         dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
5983         dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
5984 #endif
5985 } // CalculateStutterEfficiency
5986
5987 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
5988                 unsigned int    ConfigReturnBufferSizeInKByte,
5989                 unsigned int    ROBBufferSizeInKByte,
5990                 unsigned int MaxNumDPP,
5991                 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
5992                 unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
5993
5994                 /* Output */
5995                 unsigned int *MaxTotalDETInKByte,
5996                 unsigned int *nomDETInKByte,
5997                 unsigned int *MinCompressedBufferSizeInKByte)
5998 {
5999         bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6000         unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6001
6002         *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6003                         (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6004         *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6005         *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6006
6007 #ifdef __DML_VBA_DEBUG__
6008         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6009         dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6010         dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6011         dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6012         dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6013         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6014 #endif
6015
6016         if (det_buff_size_override_en) {
6017                 *nomDETInKByte = det_buff_size_override_val;
6018 #ifdef __DML_VBA_DEBUG__
6019                 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6020 #endif
6021         }
6022 } // CalculateMaxDETAndMinCompressedBufferSize
6023
6024 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6025                 double ReturnBW,
6026                 bool NotUrgentLatencyHiding[],
6027                 double ReadBandwidthLuma[],
6028                 double ReadBandwidthChroma[],
6029                 double cursor_bw[],
6030                 double meta_row_bandwidth[],
6031                 double dpte_row_bandwidth[],
6032                 unsigned int NumberOfDPP[],
6033                 double UrgentBurstFactorLuma[],
6034                 double UrgentBurstFactorChroma[],
6035                 double UrgentBurstFactorCursor[])
6036 {
6037         unsigned int k;
6038         bool NotEnoughUrgentLatencyHiding = false;
6039         bool CalculateVActiveBandwithSupport_val = false;
6040         double VActiveBandwith = 0;
6041
6042         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6043                 if (NotUrgentLatencyHiding[k]) {
6044                         NotEnoughUrgentLatencyHiding = true;
6045                 }
6046         }
6047
6048         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6049                 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6050         }
6051
6052         CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6053
6054 #ifdef __DML_VBA_DEBUG__
6055 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6056 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6057 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6058 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6059 #endif
6060         return CalculateVActiveBandwithSupport_val;
6061 }
6062
6063 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6064                 double ReturnBW,
6065                 bool NotUrgentLatencyHiding[],
6066                 double ReadBandwidthLuma[],
6067                 double ReadBandwidthChroma[],
6068                 double PrefetchBandwidthLuma[],
6069                 double PrefetchBandwidthChroma[],
6070                 double cursor_bw[],
6071                 double meta_row_bandwidth[],
6072                 double dpte_row_bandwidth[],
6073                 double cursor_bw_pre[],
6074                 double prefetch_vmrow_bw[],
6075                 unsigned int NumberOfDPP[],
6076                 double UrgentBurstFactorLuma[],
6077                 double UrgentBurstFactorChroma[],
6078                 double UrgentBurstFactorCursor[],
6079                 double UrgentBurstFactorLumaPre[],
6080                 double UrgentBurstFactorChromaPre[],
6081                 double UrgentBurstFactorCursorPre[],
6082
6083                 /* output */
6084                 double  *PrefetchBandwidth,
6085                 double  *FractionOfUrgentBandwidth,
6086                 bool *PrefetchBandwidthSupport)
6087 {
6088         unsigned int k;
6089         bool NotEnoughUrgentLatencyHiding = false;
6090         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6091                 if (NotUrgentLatencyHiding[k]) {
6092                         NotEnoughUrgentLatencyHiding = true;
6093                 }
6094         }
6095
6096         *PrefetchBandwidth = 0;
6097         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6098                 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6099                                 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6100                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6101         }
6102
6103         *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6104         *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6105 }
6106
6107 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6108                 double ReturnBW,
6109                 double ReadBandwidthLuma[],
6110                 double ReadBandwidthChroma[],
6111                 double PrefetchBandwidthLuma[],
6112                 double PrefetchBandwidthChroma[],
6113                 double cursor_bw[],
6114                 double cursor_bw_pre[],
6115                 unsigned int NumberOfDPP[],
6116                 double UrgentBurstFactorLuma[],
6117                 double UrgentBurstFactorChroma[],
6118                 double UrgentBurstFactorCursor[],
6119                 double UrgentBurstFactorLumaPre[],
6120                 double UrgentBurstFactorChromaPre[],
6121                 double UrgentBurstFactorCursorPre[])
6122 {
6123         unsigned int k;
6124         double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6125
6126         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6127                 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6128                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6129         }
6130
6131         return CalculateBandwidthAvailableForImmediateFlip_val;
6132 }
6133
6134 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6135                 double ReturnBW,
6136                 enum immediate_flip_requirement ImmediateFlipRequirement[],
6137                 double final_flip_bw[],
6138                 double ReadBandwidthLuma[],
6139                 double ReadBandwidthChroma[],
6140                 double PrefetchBandwidthLuma[],
6141                 double PrefetchBandwidthChroma[],
6142                 double cursor_bw[],
6143                 double meta_row_bandwidth[],
6144                 double dpte_row_bandwidth[],
6145                 double cursor_bw_pre[],
6146                 double prefetch_vmrow_bw[],
6147                 unsigned int NumberOfDPP[],
6148                 double UrgentBurstFactorLuma[],
6149                 double UrgentBurstFactorChroma[],
6150                 double UrgentBurstFactorCursor[],
6151                 double UrgentBurstFactorLumaPre[],
6152                 double UrgentBurstFactorChromaPre[],
6153                 double UrgentBurstFactorCursorPre[],
6154
6155                 /* output */
6156                 double  *TotalBandwidth,
6157                 double  *FractionOfUrgentBandwidth,
6158                 bool *ImmediateFlipBandwidthSupport)
6159 {
6160         unsigned int k;
6161         *TotalBandwidth = 0;
6162         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6163                 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6164                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6165                                         NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6166                                         NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6167                 } else {
6168                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6169                                         NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6170                                         NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6171                 }
6172         }
6173         *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6174         *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6175 }