Merge drm/drm-next into drm-intel-next
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / i915 / display / intel_bw.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5
6 #include <drm/drm_atomic_state_helper.h>
7
8 #include "intel_atomic.h"
9 #include "intel_bw.h"
10 #include "intel_cdclk.h"
11 #include "intel_display_types.h"
12 #include "intel_pcode.h"
13 #include "intel_pm.h"
14
15 /* Parameters for Qclk Geyserville (QGV) */
16 struct intel_qgv_point {
17         u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
18 };
19
20 struct intel_psf_gv_point {
21         u8 clk; /* clock in multiples of 16.6666 MHz */
22 };
23
24 struct intel_qgv_info {
25         struct intel_qgv_point points[I915_NUM_QGV_POINTS];
26         struct intel_psf_gv_point psf_points[I915_NUM_PSF_GV_POINTS];
27         u8 num_points;
28         u8 num_psf_points;
29         u8 t_bl;
30         u8 max_numchannels;
31         u8 channel_width;
32         u8 deinterleave;
33 };
34
35 static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv,
36                                           struct intel_qgv_point *sp,
37                                           int point)
38 {
39         u32 dclk_ratio, dclk_reference;
40         u32 val;
41
42         val = intel_uncore_read(&dev_priv->uncore, SA_PERF_STATUS_0_0_0_MCHBAR_PC);
43         dclk_ratio = REG_FIELD_GET(DG1_QCLK_RATIO_MASK, val);
44         if (val & DG1_QCLK_REFERENCE)
45                 dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */
46         else
47                 dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */
48         sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + 500, 1000);
49
50         val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU);
51         if (val & DG1_GEAR_TYPE)
52                 sp->dclk *= 2;
53
54         if (sp->dclk == 0)
55                 return -EINVAL;
56
57         val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR);
58         sp->t_rp = REG_FIELD_GET(DG1_DRAM_T_RP_MASK, val);
59         sp->t_rdpre = REG_FIELD_GET(DG1_DRAM_T_RDPRE_MASK, val);
60
61         val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH);
62         sp->t_rcd = REG_FIELD_GET(DG1_DRAM_T_RCD_MASK, val);
63         sp->t_ras = REG_FIELD_GET(DG1_DRAM_T_RAS_MASK, val);
64
65         sp->t_rc = sp->t_rp + sp->t_ras;
66
67         return 0;
68 }
69
70 static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
71                                          struct intel_qgv_point *sp,
72                                          int point)
73 {
74         u32 val = 0, val2 = 0;
75         u16 dclk;
76         int ret;
77
78         ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
79                              ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
80                              &val, &val2);
81         if (ret)
82                 return ret;
83
84         dclk = val & 0xffff;
85         sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000);
86         sp->t_rp = (val & 0xff0000) >> 16;
87         sp->t_rcd = (val & 0xff000000) >> 24;
88
89         sp->t_rdpre = val2 & 0xff;
90         sp->t_ras = (val2 & 0xff00) >> 8;
91
92         sp->t_rc = sp->t_rp + sp->t_ras;
93
94         return 0;
95 }
96
97 static int adls_pcode_read_psf_gv_point_info(struct drm_i915_private *dev_priv,
98                                             struct intel_psf_gv_point *points)
99 {
100         u32 val = 0;
101         int ret;
102         int i;
103
104         ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
105                              ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, &val, NULL);
106         if (ret)
107                 return ret;
108
109         for (i = 0; i < I915_NUM_PSF_GV_POINTS; i++) {
110                 points[i].clk = val & 0xff;
111                 val >>= 8;
112         }
113
114         return 0;
115 }
116
117 int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv,
118                                   u32 points_mask)
119 {
120         int ret;
121
122         /* bspec says to keep retrying for at least 1 ms */
123         ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG,
124                                 points_mask,
125                                 ICL_PCODE_POINTS_RESTRICTED_MASK,
126                                 ICL_PCODE_POINTS_RESTRICTED,
127                                 1);
128
129         if (ret < 0) {
130                 drm_err(&dev_priv->drm, "Failed to disable qgv points (%d) points: 0x%x\n", ret, points_mask);
131                 return ret;
132         }
133
134         return 0;
135 }
136
137 static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
138                               struct intel_qgv_info *qi,
139                               bool is_y_tile)
140 {
141         const struct dram_info *dram_info = &dev_priv->dram_info;
142         int i, ret;
143
144         qi->num_points = dram_info->num_qgv_points;
145         qi->num_psf_points = dram_info->num_psf_gv_points;
146
147         if (DISPLAY_VER(dev_priv) >= 12)
148                 switch (dram_info->type) {
149                 case INTEL_DRAM_DDR4:
150                         qi->t_bl = is_y_tile ? 8 : 4;
151                         qi->max_numchannels = 2;
152                         qi->channel_width = 64;
153                         qi->deinterleave = is_y_tile ? 1 : 2;
154                         break;
155                 case INTEL_DRAM_DDR5:
156                         qi->t_bl = is_y_tile ? 16 : 8;
157                         qi->max_numchannels = 4;
158                         qi->channel_width = 32;
159                         qi->deinterleave = is_y_tile ? 1 : 2;
160                         break;
161                 case INTEL_DRAM_LPDDR4:
162                         if (IS_ROCKETLAKE(dev_priv)) {
163                                 qi->t_bl = 8;
164                                 qi->max_numchannels = 4;
165                                 qi->channel_width = 32;
166                                 qi->deinterleave = 2;
167                                 break;
168                         }
169                         fallthrough;
170                 case INTEL_DRAM_LPDDR5:
171                         qi->t_bl = 16;
172                         qi->max_numchannels = 8;
173                         qi->channel_width = 16;
174                         qi->deinterleave = is_y_tile ? 2 : 4;
175                         break;
176                 default:
177                         qi->t_bl = 16;
178                         qi->max_numchannels = 1;
179                         break;
180                 }
181         else if (DISPLAY_VER(dev_priv) == 11) {
182                 qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8;
183                 qi->max_numchannels = 1;
184         }
185
186         if (drm_WARN_ON(&dev_priv->drm,
187                         qi->num_points > ARRAY_SIZE(qi->points)))
188                 qi->num_points = ARRAY_SIZE(qi->points);
189
190         for (i = 0; i < qi->num_points; i++) {
191                 struct intel_qgv_point *sp = &qi->points[i];
192
193                 if (IS_DG1(dev_priv))
194                         ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i);
195                 else
196                         ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
197
198                 if (ret)
199                         return ret;
200
201                 drm_dbg_kms(&dev_priv->drm,
202                             "QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
203                             i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
204                             sp->t_rcd, sp->t_rc);
205         }
206
207         if (qi->num_psf_points > 0) {
208                 ret = adls_pcode_read_psf_gv_point_info(dev_priv, qi->psf_points);
209                 if (ret) {
210                         drm_err(&dev_priv->drm, "Failed to read PSF point data; PSF points will not be considered in bandwidth calculations.\n");
211                         qi->num_psf_points = 0;
212                 }
213
214                 for (i = 0; i < qi->num_psf_points; i++)
215                         drm_dbg_kms(&dev_priv->drm,
216                                     "PSF GV %d: CLK=%d \n",
217                                     i, qi->psf_points[i].clk);
218         }
219
220         return 0;
221 }
222
223 static int adl_calc_psf_bw(int clk)
224 {
225         /*
226          * clk is multiples of 16.666MHz (100/6)
227          * According to BSpec PSF GV bandwidth is
228          * calculated as BW = 64 * clk * 16.666Mhz
229          */
230         return DIV_ROUND_CLOSEST(64 * clk * 100, 6);
231 }
232
233 static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
234 {
235         u16 dclk = 0;
236         int i;
237
238         for (i = 0; i < qi->num_points; i++)
239                 dclk = max(dclk, qi->points[i].dclk);
240
241         return dclk;
242 }
243
244 struct intel_sa_info {
245         u16 displayrtids;
246         u8 deburst, deprogbwlimit, derating;
247 };
248
249 static const struct intel_sa_info icl_sa_info = {
250         .deburst = 8,
251         .deprogbwlimit = 25, /* GB/s */
252         .displayrtids = 128,
253         .derating = 10,
254 };
255
256 static const struct intel_sa_info tgl_sa_info = {
257         .deburst = 16,
258         .deprogbwlimit = 34, /* GB/s */
259         .displayrtids = 256,
260         .derating = 10,
261 };
262
263 static const struct intel_sa_info rkl_sa_info = {
264         .deburst = 8,
265         .deprogbwlimit = 20, /* GB/s */
266         .displayrtids = 128,
267         .derating = 10,
268 };
269
270 static const struct intel_sa_info adls_sa_info = {
271         .deburst = 16,
272         .deprogbwlimit = 38, /* GB/s */
273         .displayrtids = 256,
274         .derating = 10,
275 };
276
277 static const struct intel_sa_info adlp_sa_info = {
278         .deburst = 16,
279         .deprogbwlimit = 38, /* GB/s */
280         .displayrtids = 256,
281         .derating = 20,
282 };
283
284 static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
285 {
286         struct intel_qgv_info qi = {};
287         bool is_y_tile = true; /* assume y tile may be used */
288         int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
289         int ipqdepth, ipqdepthpch = 16;
290         int dclk_max;
291         int maxdebw;
292         int num_groups = ARRAY_SIZE(dev_priv->max_bw);
293         int i, ret;
294
295         ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
296         if (ret) {
297                 drm_dbg_kms(&dev_priv->drm,
298                             "Failed to get memory subsystem information, ignoring bandwidth limits");
299                 return ret;
300         }
301
302         dclk_max = icl_sagv_max_dclk(&qi);
303         maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10);
304         ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
305         qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
306
307         for (i = 0; i < num_groups; i++) {
308                 struct intel_bw_info *bi = &dev_priv->max_bw[i];
309                 int clpchgroup;
310                 int j;
311
312                 clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
313                 bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
314
315                 bi->num_qgv_points = qi.num_points;
316                 bi->num_psf_gv_points = qi.num_psf_points;
317
318                 for (j = 0; j < qi.num_points; j++) {
319                         const struct intel_qgv_point *sp = &qi.points[j];
320                         int ct, bw;
321
322                         /*
323                          * Max row cycle time
324                          *
325                          * FIXME what is the logic behind the
326                          * assumed burst length?
327                          */
328                         ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
329                                    (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
330                         bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);
331
332                         bi->deratedbw[j] = min(maxdebw,
333                                                bw * (100 - sa->derating) / 100);
334
335                         drm_dbg_kms(&dev_priv->drm,
336                                     "BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
337                                     i, j, bi->num_planes, bi->deratedbw[j]);
338                 }
339         }
340         /*
341          * In case if SAGV is disabled in BIOS, we always get 1
342          * SAGV point, but we can't send PCode commands to restrict it
343          * as it will fail and pointless anyway.
344          */
345         if (qi.num_points == 1)
346                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
347         else
348                 dev_priv->sagv_status = I915_SAGV_ENABLED;
349
350         return 0;
351 }
352
353 static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
354 {
355         struct intel_qgv_info qi = {};
356         const struct dram_info *dram_info = &dev_priv->dram_info;
357         bool is_y_tile = true; /* assume y tile may be used */
358         int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
359         int ipqdepth, ipqdepthpch = 16;
360         int dclk_max;
361         int maxdebw, peakbw;
362         int clperchgroup;
363         int num_groups = ARRAY_SIZE(dev_priv->max_bw);
364         int i, ret;
365
366         ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
367         if (ret) {
368                 drm_dbg_kms(&dev_priv->drm,
369                             "Failed to get memory subsystem information, ignoring bandwidth limits");
370                 return ret;
371         }
372
373         if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5)
374                 num_channels *= 2;
375
376         qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
377
378         if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12)
379                 qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);
380
381         if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels)
382                 drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels.");
383         if (qi.max_numchannels != 0)
384                 num_channels = min_t(u8, num_channels, qi.max_numchannels);
385
386         dclk_max = icl_sagv_max_dclk(&qi);
387
388         peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * dclk_max;
389         maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */
390
391         ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
392         /*
393          * clperchgroup = 4kpagespermempage * clperchperblock,
394          * clperchperblock = 8 / num_channels * interleave
395          */
396         clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave;
397
398         for (i = 0; i < num_groups; i++) {
399                 struct intel_bw_info *bi = &dev_priv->max_bw[i];
400                 struct intel_bw_info *bi_next;
401                 int clpchgroup;
402                 int j;
403
404                 if (i < num_groups - 1)
405                         bi_next = &dev_priv->max_bw[i + 1];
406
407                 clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
408
409                 if (i < num_groups - 1 && clpchgroup < clperchgroup)
410                         bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
411                 else
412                         bi_next->num_planes = 0;
413
414                 bi->num_qgv_points = qi.num_points;
415                 bi->num_psf_gv_points = qi.num_psf_points;
416
417                 for (j = 0; j < qi.num_points; j++) {
418                         const struct intel_qgv_point *sp = &qi.points[j];
419                         int ct, bw;
420
421                         /*
422                          * Max row cycle time
423                          *
424                          * FIXME what is the logic behind the
425                          * assumed burst length?
426                          */
427                         ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
428                                    (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
429                         bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);
430
431                         bi->deratedbw[j] = min(maxdebw,
432                                                bw * (100 - sa->derating) / 100);
433
434                         drm_dbg_kms(&dev_priv->drm,
435                                     "BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
436                                     i, j, bi->num_planes, bi->deratedbw[j]);
437                 }
438
439                 for (j = 0; j < qi.num_psf_points; j++) {
440                         const struct intel_psf_gv_point *sp = &qi.psf_points[j];
441
442                         bi->psf_bw[j] = adl_calc_psf_bw(sp->clk);
443
444                         drm_dbg_kms(&dev_priv->drm,
445                                     "BW%d / PSF GV %d: num_planes=%d bw=%u\n",
446                                     i, j, bi->num_planes, bi->psf_bw[j]);
447                 }
448         }
449
450         /*
451          * In case if SAGV is disabled in BIOS, we always get 1
452          * SAGV point, but we can't send PCode commands to restrict it
453          * as it will fail and pointless anyway.
454          */
455         if (qi.num_points == 1)
456                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
457         else
458                 dev_priv->sagv_status = I915_SAGV_ENABLED;
459
460         return 0;
461 }
462
463 static void dg2_get_bw_info(struct drm_i915_private *i915)
464 {
465         struct intel_bw_info *bi = &i915->max_bw[0];
466
467         /*
468          * DG2 doesn't have SAGV or QGV points, just a constant max bandwidth
469          * that doesn't depend on the number of planes enabled.  Create a
470          * single dummy QGV point to reflect that.  DG2-G10 platforms have a
471          * constant 50 GB/s bandwidth, whereas DG2-G11 platforms have 38 GB/s.
472          */
473         bi->num_planes = 1;
474         bi->num_qgv_points = 1;
475         if (IS_DG2_G11(i915))
476                 bi->deratedbw[0] = 38000;
477         else
478                 bi->deratedbw[0] = 50000;
479
480         i915->sagv_status = I915_SAGV_NOT_CONTROLLED;
481 }
482
483 static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
484                                int num_planes, int qgv_point)
485 {
486         int i;
487
488         /*
489          * Let's return max bw for 0 planes
490          */
491         num_planes = max(1, num_planes);
492
493         for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
494                 const struct intel_bw_info *bi =
495                         &dev_priv->max_bw[i];
496
497                 /*
498                  * Pcode will not expose all QGV points when
499                  * SAGV is forced to off/min/med/max.
500                  */
501                 if (qgv_point >= bi->num_qgv_points)
502                         return UINT_MAX;
503
504                 if (num_planes >= bi->num_planes)
505                         return bi->deratedbw[qgv_point];
506         }
507
508         return 0;
509 }
510
511 static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
512                                int num_planes, int qgv_point)
513 {
514         int i;
515
516         /*
517          * Let's return max bw for 0 planes
518          */
519         num_planes = max(1, num_planes);
520
521         for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) {
522                 const struct intel_bw_info *bi =
523                         &dev_priv->max_bw[i];
524
525                 /*
526                  * Pcode will not expose all QGV points when
527                  * SAGV is forced to off/min/med/max.
528                  */
529                 if (qgv_point >= bi->num_qgv_points)
530                         return UINT_MAX;
531
532                 if (num_planes <= bi->num_planes)
533                         return bi->deratedbw[qgv_point];
534         }
535
536         return dev_priv->max_bw[0].deratedbw[qgv_point];
537 }
538
539 static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv,
540                                int psf_gv_point)
541 {
542         const struct intel_bw_info *bi =
543                         &dev_priv->max_bw[0];
544
545         return bi->psf_bw[psf_gv_point];
546 }
547
548 void intel_bw_init_hw(struct drm_i915_private *dev_priv)
549 {
550         if (!HAS_DISPLAY(dev_priv))
551                 return;
552
553         if (IS_DG2(dev_priv))
554                 dg2_get_bw_info(dev_priv);
555         else if (IS_ALDERLAKE_P(dev_priv))
556                 tgl_get_bw_info(dev_priv, &adlp_sa_info);
557         else if (IS_ALDERLAKE_S(dev_priv))
558                 tgl_get_bw_info(dev_priv, &adls_sa_info);
559         else if (IS_ROCKETLAKE(dev_priv))
560                 tgl_get_bw_info(dev_priv, &rkl_sa_info);
561         else if (DISPLAY_VER(dev_priv) == 12)
562                 tgl_get_bw_info(dev_priv, &tgl_sa_info);
563         else if (DISPLAY_VER(dev_priv) == 11)
564                 icl_get_bw_info(dev_priv, &icl_sa_info);
565 }
566
567 static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
568 {
569         /*
570          * We assume cursors are small enough
571          * to not not cause bandwidth problems.
572          */
573         return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
574 }
575
576 static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
577 {
578         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
579         unsigned int data_rate = 0;
580         enum plane_id plane_id;
581
582         for_each_plane_id_on_crtc(crtc, plane_id) {
583                 /*
584                  * We assume cursors are small enough
585                  * to not not cause bandwidth problems.
586                  */
587                 if (plane_id == PLANE_CURSOR)
588                         continue;
589
590                 data_rate += crtc_state->data_rate[plane_id];
591         }
592
593         return data_rate;
594 }
595
596 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
597                           const struct intel_crtc_state *crtc_state)
598 {
599         struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
600         struct drm_i915_private *i915 = to_i915(crtc->base.dev);
601
602         bw_state->data_rate[crtc->pipe] =
603                 intel_bw_crtc_data_rate(crtc_state);
604         bw_state->num_active_planes[crtc->pipe] =
605                 intel_bw_crtc_num_active_planes(crtc_state);
606
607         drm_dbg_kms(&i915->drm, "pipe %c data rate %u num active planes %u\n",
608                     pipe_name(crtc->pipe),
609                     bw_state->data_rate[crtc->pipe],
610                     bw_state->num_active_planes[crtc->pipe]);
611 }
612
613 static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
614                                                const struct intel_bw_state *bw_state)
615 {
616         unsigned int num_active_planes = 0;
617         enum pipe pipe;
618
619         for_each_pipe(dev_priv, pipe)
620                 num_active_planes += bw_state->num_active_planes[pipe];
621
622         return num_active_planes;
623 }
624
625 static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
626                                        const struct intel_bw_state *bw_state)
627 {
628         unsigned int data_rate = 0;
629         enum pipe pipe;
630
631         for_each_pipe(dev_priv, pipe)
632                 data_rate += bw_state->data_rate[pipe];
633
634         if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active(dev_priv))
635                 data_rate = data_rate * 105 / 100;
636
637         return data_rate;
638 }
639
640 struct intel_bw_state *
641 intel_atomic_get_old_bw_state(struct intel_atomic_state *state)
642 {
643         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
644         struct intel_global_state *bw_state;
645
646         bw_state = intel_atomic_get_old_global_obj_state(state, &dev_priv->bw_obj);
647
648         return to_intel_bw_state(bw_state);
649 }
650
651 struct intel_bw_state *
652 intel_atomic_get_new_bw_state(struct intel_atomic_state *state)
653 {
654         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
655         struct intel_global_state *bw_state;
656
657         bw_state = intel_atomic_get_new_global_obj_state(state, &dev_priv->bw_obj);
658
659         return to_intel_bw_state(bw_state);
660 }
661
662 struct intel_bw_state *
663 intel_atomic_get_bw_state(struct intel_atomic_state *state)
664 {
665         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
666         struct intel_global_state *bw_state;
667
668         bw_state = intel_atomic_get_global_obj_state(state, &dev_priv->bw_obj);
669         if (IS_ERR(bw_state))
670                 return ERR_CAST(bw_state);
671
672         return to_intel_bw_state(bw_state);
673 }
674
675 int skl_bw_calc_min_cdclk(struct intel_atomic_state *state)
676 {
677         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
678         struct intel_bw_state *new_bw_state = NULL;
679         struct intel_bw_state *old_bw_state = NULL;
680         const struct intel_crtc_state *crtc_state;
681         struct intel_crtc *crtc;
682         int max_bw = 0;
683         enum pipe pipe;
684         int i;
685
686         for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
687                 enum plane_id plane_id;
688                 struct intel_dbuf_bw *crtc_bw;
689
690                 new_bw_state = intel_atomic_get_bw_state(state);
691                 if (IS_ERR(new_bw_state))
692                         return PTR_ERR(new_bw_state);
693
694                 old_bw_state = intel_atomic_get_old_bw_state(state);
695
696                 crtc_bw = &new_bw_state->dbuf_bw[crtc->pipe];
697
698                 memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw));
699
700                 if (!crtc_state->hw.active)
701                         continue;
702
703                 for_each_plane_id_on_crtc(crtc, plane_id) {
704                         const struct skl_ddb_entry *plane_alloc =
705                                 &crtc_state->wm.skl.plane_ddb_y[plane_id];
706                         const struct skl_ddb_entry *uv_plane_alloc =
707                                 &crtc_state->wm.skl.plane_ddb_uv[plane_id];
708                         unsigned int data_rate = crtc_state->data_rate[plane_id];
709                         unsigned int dbuf_mask = 0;
710                         enum dbuf_slice slice;
711
712                         dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, plane_alloc);
713                         dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, uv_plane_alloc);
714
715                         /*
716                          * FIXME: To calculate that more properly we probably
717                          * need to to split per plane data_rate into data_rate_y
718                          * and data_rate_uv for multiplanar formats in order not
719                          * to get accounted those twice if they happen to reside
720                          * on different slices.
721                          * However for pre-icl this would work anyway because
722                          * we have only single slice and for icl+ uv plane has
723                          * non-zero data rate.
724                          * So in worst case those calculation are a bit
725                          * pessimistic, which shouldn't pose any significant
726                          * problem anyway.
727                          */
728                         for_each_dbuf_slice_in_mask(dev_priv, slice, dbuf_mask)
729                                 crtc_bw->used_bw[slice] += data_rate;
730                 }
731         }
732
733         if (!old_bw_state)
734                 return 0;
735
736         for_each_pipe(dev_priv, pipe) {
737                 struct intel_dbuf_bw *crtc_bw;
738                 enum dbuf_slice slice;
739
740                 crtc_bw = &new_bw_state->dbuf_bw[pipe];
741
742                 for_each_dbuf_slice(dev_priv, slice) {
743                         /*
744                          * Current experimental observations show that contrary
745                          * to BSpec we get underruns once we exceed 64 * CDCLK
746                          * for slices in total.
747                          * As a temporary measure in order not to keep CDCLK
748                          * bumped up all the time we calculate CDCLK according
749                          * to this formula for  overall bw consumed by slices.
750                          */
751                         max_bw += crtc_bw->used_bw[slice];
752                 }
753         }
754
755         new_bw_state->min_cdclk = max_bw / 64;
756
757         if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) {
758                 int ret = intel_atomic_lock_global_state(&new_bw_state->base);
759
760                 if (ret)
761                         return ret;
762         }
763
764         return 0;
765 }
766
767 int intel_bw_calc_min_cdclk(struct intel_atomic_state *state)
768 {
769         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
770         struct intel_bw_state *new_bw_state = NULL;
771         struct intel_bw_state *old_bw_state = NULL;
772         const struct intel_crtc_state *crtc_state;
773         struct intel_crtc *crtc;
774         int min_cdclk = 0;
775         enum pipe pipe;
776         int i;
777
778         for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
779                 new_bw_state = intel_atomic_get_bw_state(state);
780                 if (IS_ERR(new_bw_state))
781                         return PTR_ERR(new_bw_state);
782
783                 old_bw_state = intel_atomic_get_old_bw_state(state);
784         }
785
786         if (!old_bw_state)
787                 return 0;
788
789         for_each_pipe(dev_priv, pipe) {
790                 struct intel_cdclk_state *cdclk_state;
791
792                 cdclk_state = intel_atomic_get_new_cdclk_state(state);
793                 if (!cdclk_state)
794                         return 0;
795
796                 min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk);
797         }
798
799         new_bw_state->min_cdclk = min_cdclk;
800
801         if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) {
802                 int ret = intel_atomic_lock_global_state(&new_bw_state->base);
803
804                 if (ret)
805                         return ret;
806         }
807
808         return 0;
809 }
810
811 int intel_bw_atomic_check(struct intel_atomic_state *state)
812 {
813         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
814         struct intel_crtc_state *new_crtc_state, *old_crtc_state;
815         struct intel_bw_state *new_bw_state = NULL;
816         const struct intel_bw_state *old_bw_state = NULL;
817         unsigned int data_rate;
818         unsigned int num_active_planes;
819         struct intel_crtc *crtc;
820         int i, ret;
821         u32 allowed_points = 0;
822         unsigned int max_bw_point = 0, max_bw = 0;
823         unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points;
824         unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points;
825         u32 mask = 0;
826
827         /* FIXME earlier gens need some checks too */
828         if (DISPLAY_VER(dev_priv) < 11)
829                 return 0;
830
831         /*
832          * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects
833          * it with failure if we try masking any unadvertised points.
834          * So need to operate only with those returned from PCode.
835          */
836         if (num_qgv_points > 0)
837                 mask |= REG_GENMASK(num_qgv_points - 1, 0);
838
839         if (num_psf_gv_points > 0)
840                 mask |= REG_GENMASK(num_psf_gv_points - 1, 0) << ADLS_PSF_PT_SHIFT;
841
842         for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
843                                             new_crtc_state, i) {
844                 unsigned int old_data_rate =
845                         intel_bw_crtc_data_rate(old_crtc_state);
846                 unsigned int new_data_rate =
847                         intel_bw_crtc_data_rate(new_crtc_state);
848                 unsigned int old_active_planes =
849                         intel_bw_crtc_num_active_planes(old_crtc_state);
850                 unsigned int new_active_planes =
851                         intel_bw_crtc_num_active_planes(new_crtc_state);
852
853                 /*
854                  * Avoid locking the bw state when
855                  * nothing significant has changed.
856                  */
857                 if (old_data_rate == new_data_rate &&
858                     old_active_planes == new_active_planes)
859                         continue;
860
861                 new_bw_state = intel_atomic_get_bw_state(state);
862                 if (IS_ERR(new_bw_state))
863                         return PTR_ERR(new_bw_state);
864
865                 new_bw_state->data_rate[crtc->pipe] = new_data_rate;
866                 new_bw_state->num_active_planes[crtc->pipe] = new_active_planes;
867
868                 drm_dbg_kms(&dev_priv->drm,
869                             "pipe %c data rate %u num active planes %u\n",
870                             pipe_name(crtc->pipe),
871                             new_bw_state->data_rate[crtc->pipe],
872                             new_bw_state->num_active_planes[crtc->pipe]);
873         }
874
875         if (!new_bw_state)
876                 return 0;
877
878         ret = intel_atomic_lock_global_state(&new_bw_state->base);
879         if (ret)
880                 return ret;
881
882         data_rate = intel_bw_data_rate(dev_priv, new_bw_state);
883         data_rate = DIV_ROUND_UP(data_rate, 1000);
884
885         num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state);
886
887         for (i = 0; i < num_qgv_points; i++) {
888                 unsigned int max_data_rate;
889
890                 if (DISPLAY_VER(dev_priv) > 11)
891                         max_data_rate = tgl_max_bw(dev_priv, num_active_planes, i);
892                 else
893                         max_data_rate = icl_max_bw(dev_priv, num_active_planes, i);
894                 /*
895                  * We need to know which qgv point gives us
896                  * maximum bandwidth in order to disable SAGV
897                  * if we find that we exceed SAGV block time
898                  * with watermarks. By that moment we already
899                  * have those, as it is calculated earlier in
900                  * intel_atomic_check,
901                  */
902                 if (max_data_rate > max_bw) {
903                         max_bw_point = i;
904                         max_bw = max_data_rate;
905                 }
906                 if (max_data_rate >= data_rate)
907                         allowed_points |= REG_FIELD_PREP(ADLS_QGV_PT_MASK, BIT(i));
908
909                 drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n",
910                             i, max_data_rate, data_rate);
911         }
912
913         for (i = 0; i < num_psf_gv_points; i++) {
914                 unsigned int max_data_rate = adl_psf_bw(dev_priv, i);
915
916                 if (max_data_rate >= data_rate)
917                         allowed_points |= REG_FIELD_PREP(ADLS_PSF_PT_MASK, BIT(i));
918
919                 drm_dbg_kms(&dev_priv->drm, "PSF GV point %d: max bw %d"
920                             " required %d\n",
921                             i, max_data_rate, data_rate);
922         }
923
924         /*
925          * BSpec states that we always should have at least one allowed point
926          * left, so if we couldn't - simply reject the configuration for obvious
927          * reasons.
928          */
929         if ((allowed_points & ADLS_QGV_PT_MASK) == 0) {
930                 drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory"
931                             " bandwidth %d for display configuration(%d active planes).\n",
932                             data_rate, num_active_planes);
933                 return -EINVAL;
934         }
935
936         if (num_psf_gv_points > 0) {
937                 if ((allowed_points & ADLS_PSF_PT_MASK) == 0) {
938                         drm_dbg_kms(&dev_priv->drm, "No PSF GV points provide sufficient memory"
939                                     " bandwidth %d for display configuration(%d active planes).\n",
940                                     data_rate, num_active_planes);
941                         return -EINVAL;
942                 }
943         }
944
945         /*
946          * Leave only single point with highest bandwidth, if
947          * we can't enable SAGV due to the increased memory latency it may
948          * cause.
949          */
950         if (!intel_can_enable_sagv(dev_priv, new_bw_state)) {
951                 allowed_points = BIT(max_bw_point);
952                 drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n",
953                             max_bw_point);
954         }
955         /*
956          * We store the ones which need to be masked as that is what PCode
957          * actually accepts as a parameter.
958          */
959         new_bw_state->qgv_points_mask = ~allowed_points & mask;
960
961         old_bw_state = intel_atomic_get_old_bw_state(state);
962         /*
963          * If the actual mask had changed we need to make sure that
964          * the commits are serialized(in case this is a nomodeset, nonblocking)
965          */
966         if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) {
967                 ret = intel_atomic_serialize_global_state(&new_bw_state->base);
968                 if (ret)
969                         return ret;
970         }
971
972         return 0;
973 }
974
975 static struct intel_global_state *
976 intel_bw_duplicate_state(struct intel_global_obj *obj)
977 {
978         struct intel_bw_state *state;
979
980         state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
981         if (!state)
982                 return NULL;
983
984         return &state->base;
985 }
986
987 static void intel_bw_destroy_state(struct intel_global_obj *obj,
988                                    struct intel_global_state *state)
989 {
990         kfree(state);
991 }
992
993 static const struct intel_global_state_funcs intel_bw_funcs = {
994         .atomic_duplicate_state = intel_bw_duplicate_state,
995         .atomic_destroy_state = intel_bw_destroy_state,
996 };
997
998 int intel_bw_init(struct drm_i915_private *dev_priv)
999 {
1000         struct intel_bw_state *state;
1001
1002         state = kzalloc(sizeof(*state), GFP_KERNEL);
1003         if (!state)
1004                 return -ENOMEM;
1005
1006         intel_atomic_global_obj_init(dev_priv, &dev_priv->bw_obj,
1007                                      &state->base, &intel_bw_funcs);
1008
1009         return 0;
1010 }