e88908b9830eb72e26fa5a09fa31113589ed4234
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / vc4 / vc4_hvs.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5
6 /**
7  * DOC: VC4 HVS module.
8  *
9  * The Hardware Video Scaler (HVS) is the piece of hardware that does
10  * translation, scaling, colorspace conversion, and compositing of
11  * pixels stored in framebuffers into a FIFO of pixels going out to
12  * the Pixel Valve (CRTC).  It operates at the system clock rate (the
13  * system audio clock gate, specifically), which is much higher than
14  * the pixel clock rate.
15  *
16  * There is a single global HVS, with multiple output FIFOs that can
17  * be consumed by the PVs.  This file just manages the resources for
18  * the HVS, while the vc4_crtc.c code actually drives HVS setup for
19  * each CRTC.
20  */
21
22 #include <linux/bitfield.h>
23 #include <linux/clk.h>
24 #include <linux/component.h>
25 #include <linux/platform_device.h>
26
27 #include <drm/drm_atomic_helper.h>
28 #include <drm/drm_drv.h>
29 #include <drm/drm_vblank.h>
30
31 #include <soc/bcm2835/raspberrypi-firmware.h>
32
33 #include "vc4_drv.h"
34 #include "vc4_regs.h"
35
36 static const struct debugfs_reg32 vc4_hvs_regs[] = {
37         VC4_REG32(SCALER_DISPCTRL),
38         VC4_REG32(SCALER_DISPSTAT),
39         VC4_REG32(SCALER_DISPID),
40         VC4_REG32(SCALER_DISPECTRL),
41         VC4_REG32(SCALER_DISPPROF),
42         VC4_REG32(SCALER_DISPDITHER),
43         VC4_REG32(SCALER_DISPEOLN),
44         VC4_REG32(SCALER_DISPLIST0),
45         VC4_REG32(SCALER_DISPLIST1),
46         VC4_REG32(SCALER_DISPLIST2),
47         VC4_REG32(SCALER_DISPLSTAT),
48         VC4_REG32(SCALER_DISPLACT0),
49         VC4_REG32(SCALER_DISPLACT1),
50         VC4_REG32(SCALER_DISPLACT2),
51         VC4_REG32(SCALER_DISPCTRL0),
52         VC4_REG32(SCALER_DISPBKGND0),
53         VC4_REG32(SCALER_DISPSTAT0),
54         VC4_REG32(SCALER_DISPBASE0),
55         VC4_REG32(SCALER_DISPCTRL1),
56         VC4_REG32(SCALER_DISPBKGND1),
57         VC4_REG32(SCALER_DISPSTAT1),
58         VC4_REG32(SCALER_DISPBASE1),
59         VC4_REG32(SCALER_DISPCTRL2),
60         VC4_REG32(SCALER_DISPBKGND2),
61         VC4_REG32(SCALER_DISPSTAT2),
62         VC4_REG32(SCALER_DISPBASE2),
63         VC4_REG32(SCALER_DISPALPHA2),
64         VC4_REG32(SCALER_OLEDOFFS),
65         VC4_REG32(SCALER_OLEDCOEF0),
66         VC4_REG32(SCALER_OLEDCOEF1),
67         VC4_REG32(SCALER_OLEDCOEF2),
68 };
69
70 static const struct debugfs_reg32 vc6_hvs_regs[] = {
71         VC4_REG32(SCALER6_VERSION),
72         VC4_REG32(SCALER6_CXM_SIZE),
73         VC4_REG32(SCALER6_LBM_SIZE),
74         VC4_REG32(SCALER6_UBM_SIZE),
75         VC4_REG32(SCALER6_COBA_SIZE),
76         VC4_REG32(SCALER6_COB_SIZE),
77         VC4_REG32(SCALER6_CONTROL),
78         VC4_REG32(SCALER6_FETCHER_STATUS),
79         VC4_REG32(SCALER6_FETCH_STATUS),
80         VC4_REG32(SCALER6_HANDLE_ERROR),
81         VC4_REG32(SCALER6_DISP0_CTRL0),
82         VC4_REG32(SCALER6_DISP0_CTRL1),
83         VC4_REG32(SCALER6_DISP0_BGND),
84         VC4_REG32(SCALER6_DISP0_LPTRS),
85         VC4_REG32(SCALER6_DISP0_COB),
86         VC4_REG32(SCALER6_DISP0_STATUS),
87         VC4_REG32(SCALER6_DISP0_DL),
88         VC4_REG32(SCALER6_DISP0_RUN),
89         VC4_REG32(SCALER6_DISP1_CTRL0),
90         VC4_REG32(SCALER6_DISP1_CTRL1),
91         VC4_REG32(SCALER6_DISP1_BGND),
92         VC4_REG32(SCALER6_DISP1_LPTRS),
93         VC4_REG32(SCALER6_DISP1_COB),
94         VC4_REG32(SCALER6_DISP1_STATUS),
95         VC4_REG32(SCALER6_DISP1_DL),
96         VC4_REG32(SCALER6_DISP1_RUN),
97         VC4_REG32(SCALER6_DISP2_CTRL0),
98         VC4_REG32(SCALER6_DISP2_CTRL1),
99         VC4_REG32(SCALER6_DISP2_BGND),
100         VC4_REG32(SCALER6_DISP2_LPTRS),
101         VC4_REG32(SCALER6_DISP2_COB),
102         VC4_REG32(SCALER6_DISP2_STATUS),
103         VC4_REG32(SCALER6_DISP2_DL),
104         VC4_REG32(SCALER6_DISP2_RUN),
105         VC4_REG32(SCALER6_EOLN),
106         VC4_REG32(SCALER6_DL_STATUS),
107         VC4_REG32(SCALER6_BFG_MISC),
108         VC4_REG32(SCALER6_QOS0),
109         VC4_REG32(SCALER6_PROF0),
110         VC4_REG32(SCALER6_QOS1),
111         VC4_REG32(SCALER6_PROF1),
112         VC4_REG32(SCALER6_QOS2),
113         VC4_REG32(SCALER6_PROF2),
114         VC4_REG32(SCALER6_PRI_MAP0),
115         VC4_REG32(SCALER6_PRI_MAP1),
116         VC4_REG32(SCALER6_HISTCTRL),
117         VC4_REG32(SCALER6_HISTBIN0),
118         VC4_REG32(SCALER6_HISTBIN1),
119         VC4_REG32(SCALER6_HISTBIN2),
120         VC4_REG32(SCALER6_HISTBIN3),
121         VC4_REG32(SCALER6_HISTBIN4),
122         VC4_REG32(SCALER6_HISTBIN5),
123         VC4_REG32(SCALER6_HISTBIN6),
124         VC4_REG32(SCALER6_HISTBIN7),
125         VC4_REG32(SCALER6_HDR_CFG_REMAP),
126         VC4_REG32(SCALER6_COL_SPACE),
127         VC4_REG32(SCALER6_HVS_ID),
128         VC4_REG32(SCALER6_CFC1),
129         VC4_REG32(SCALER6_DISP_UPM_ISO0),
130         VC4_REG32(SCALER6_DISP_UPM_ISO1),
131         VC4_REG32(SCALER6_DISP_UPM_ISO2),
132         VC4_REG32(SCALER6_DISP_LBM_ISO0),
133         VC4_REG32(SCALER6_DISP_LBM_ISO1),
134         VC4_REG32(SCALER6_DISP_LBM_ISO2),
135         VC4_REG32(SCALER6_DISP_COB_ISO0),
136         VC4_REG32(SCALER6_DISP_COB_ISO1),
137         VC4_REG32(SCALER6_DISP_COB_ISO2),
138         VC4_REG32(SCALER6_BAD_COB),
139         VC4_REG32(SCALER6_BAD_LBM),
140         VC4_REG32(SCALER6_BAD_UPM),
141         VC4_REG32(SCALER6_BAD_AXI),
142 };
143
144 void vc4_hvs_dump_state(struct vc4_hvs *hvs)
145 {
146         struct drm_device *drm = &hvs->vc4->base;
147         struct drm_printer p = drm_info_printer(&hvs->pdev->dev);
148         int idx, i;
149
150         if (!drm_dev_enter(drm, &idx))
151                 return;
152
153         drm_print_regset32(&p, &hvs->regset);
154
155         DRM_INFO("HVS ctx:\n");
156         for (i = 0; i < 64; i += 4) {
157                 DRM_INFO("0x%08x (%s): 0x%08x 0x%08x 0x%08x 0x%08x\n",
158                          i * 4, i < HVS_BOOTLOADER_DLIST_END ? "B" : "D",
159                          readl((u32 __iomem *)hvs->dlist + i + 0),
160                          readl((u32 __iomem *)hvs->dlist + i + 1),
161                          readl((u32 __iomem *)hvs->dlist + i + 2),
162                          readl((u32 __iomem *)hvs->dlist + i + 3));
163         }
164
165         drm_dev_exit(idx);
166 }
167
168 static int vc4_hvs_debugfs_underrun(struct seq_file *m, void *data)
169 {
170         struct drm_debugfs_entry *entry = m->private;
171         struct drm_device *dev = entry->dev;
172         struct vc4_dev *vc4 = to_vc4_dev(dev);
173         struct drm_printer p = drm_seq_file_printer(m);
174
175         drm_printf(&p, "%d\n", atomic_read(&vc4->underrun));
176
177         return 0;
178 }
179
180 static int vc4_hvs_debugfs_dlist(struct seq_file *m, void *data)
181 {
182         struct drm_debugfs_entry *entry = m->private;
183         struct drm_device *dev = entry->dev;
184         struct vc4_dev *vc4 = to_vc4_dev(dev);
185         struct vc4_hvs *hvs = vc4->hvs;
186         struct drm_printer p = drm_seq_file_printer(m);
187         unsigned int dlist_mem_size = hvs->dlist_mem_size;
188         unsigned int next_entry_start;
189         unsigned int i, j;
190         u32 dlist_word, dispstat;
191
192         for (i = 0; i < SCALER_CHANNELS_COUNT; i++) {
193                 dispstat = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(i)),
194                                          SCALER_DISPSTATX_MODE);
195                 if (dispstat == SCALER_DISPSTATX_MODE_DISABLED ||
196                     dispstat == SCALER_DISPSTATX_MODE_EOF) {
197                         drm_printf(&p, "HVS chan %u disabled\n", i);
198                         continue;
199                 }
200
201                 drm_printf(&p, "HVS chan %u:\n", i);
202                 next_entry_start = 0;
203
204                 for (j = HVS_READ(SCALER_DISPLISTX(i)); j < dlist_mem_size; j++) {
205                         dlist_word = readl((u32 __iomem *)vc4->hvs->dlist + j);
206                         drm_printf(&p, "dlist: %02d: 0x%08x\n", j,
207                                    dlist_word);
208                         if (!next_entry_start ||
209                             next_entry_start == j) {
210                                 if (dlist_word & SCALER_CTL0_END)
211                                         break;
212                                 next_entry_start = j +
213                                         VC4_GET_FIELD(dlist_word,
214                                                       SCALER_CTL0_SIZE);
215                         }
216                 }
217         }
218
219         return 0;
220 }
221
222 static int vc6_hvs_debugfs_dlist(struct seq_file *m, void *data)
223 {
224         struct drm_info_node *node = m->private;
225         struct drm_device *dev = node->minor->dev;
226         struct vc4_dev *vc4 = to_vc4_dev(dev);
227         struct vc4_hvs *hvs = vc4->hvs;
228         struct drm_printer p = drm_seq_file_printer(m);
229         unsigned int dlist_mem_size = hvs->dlist_mem_size;
230         unsigned int next_entry_start;
231         unsigned int i;
232
233         for (i = 0; i < SCALER_CHANNELS_COUNT; i++) {
234                 unsigned int active_dlist, dispstat;
235                 unsigned int j;
236
237                 dispstat = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(i)),
238                                          SCALER6_DISPX_STATUS_MODE);
239                 if (dispstat == SCALER6_DISPX_STATUS_MODE_DISABLED ||
240                     dispstat == SCALER6_DISPX_STATUS_MODE_EOF) {
241                         drm_printf(&p, "HVS chan %u disabled\n", i);
242                         continue;
243                 }
244
245                 drm_printf(&p, "HVS chan %u:\n", i);
246
247                 active_dlist = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_DL(i)),
248                                              SCALER6_DISPX_DL_LACT);
249                 next_entry_start = 0;
250
251                 for (j = active_dlist; j < dlist_mem_size; j++) {
252                         u32 dlist_word;
253
254                         dlist_word = readl((u32 __iomem *)vc4->hvs->dlist + j);
255                         drm_printf(&p, "dlist: %02d: 0x%08x\n", j,
256                                    dlist_word);
257                         if (!next_entry_start ||
258                             next_entry_start == j) {
259                                 if (dlist_word & SCALER_CTL0_END)
260                                         break;
261                                 next_entry_start = j +
262                                         VC4_GET_FIELD(dlist_word,
263                                                       SCALER_CTL0_SIZE);
264                         }
265                 }
266         }
267
268         return 0;
269 }
270
271 static int vc5_hvs_debugfs_gamma(struct seq_file *m, void *data)
272 {
273         struct drm_info_node *node = m->private;
274         struct drm_device *dev = node->minor->dev;
275         struct vc4_dev *vc4 = to_vc4_dev(dev);
276         struct vc4_hvs *hvs = vc4->hvs;
277         struct drm_printer p = drm_seq_file_printer(m);
278         unsigned int i, chan;
279         u32 dispstat, dispbkgndx;
280
281         for (chan = 0; chan < SCALER_CHANNELS_COUNT; chan++) {
282                 u32 x_c, grad;
283                 u32 offset = SCALER5_DSPGAMMA_START +
284                         chan * SCALER5_DSPGAMMA_CHAN_OFFSET;
285
286                 dispstat = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(chan)),
287                                          SCALER_DISPSTATX_MODE);
288                 if (dispstat == SCALER_DISPSTATX_MODE_DISABLED ||
289                     dispstat == SCALER_DISPSTATX_MODE_EOF) {
290                         drm_printf(&p, "HVS channel %u: Channel disabled\n", chan);
291                         continue;
292                 }
293
294                 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(chan));
295                 if (!(dispbkgndx & SCALER_DISPBKGND_GAMMA)) {
296                         drm_printf(&p, "HVS channel %u: Gamma disabled\n", chan);
297                         continue;
298                 }
299
300                 drm_printf(&p, "HVS channel %u:\n", chan);
301                 drm_printf(&p, "  red:\n");
302                 for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8) {
303                         x_c = HVS_READ(offset);
304                         grad = HVS_READ(offset + 4);
305                         drm_printf(&p, "  %08x %08x - x %u, c %u, grad %u\n",
306                                    x_c, grad,
307                                    VC4_GET_FIELD(x_c, SCALER5_DSPGAMMA_OFF_X),
308                                    VC4_GET_FIELD(x_c, SCALER5_DSPGAMMA_OFF_C),
309                                    grad);
310                 }
311                 drm_printf(&p, "  green:\n");
312                 for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8) {
313                         x_c = HVS_READ(offset);
314                         grad = HVS_READ(offset + 4);
315                         drm_printf(&p, "  %08x %08x - x %u, c %u, grad %u\n",
316                                    x_c, grad,
317                                    VC4_GET_FIELD(x_c, SCALER5_DSPGAMMA_OFF_X),
318                                    VC4_GET_FIELD(x_c, SCALER5_DSPGAMMA_OFF_C),
319                                    grad);
320                 }
321                 drm_printf(&p, "  blue:\n");
322                 for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8) {
323                         x_c = HVS_READ(offset);
324                         grad = HVS_READ(offset + 4);
325                         drm_printf(&p, "  %08x %08x - x %u, c %u, grad %u\n",
326                                    x_c, grad,
327                                    VC4_GET_FIELD(x_c, SCALER5_DSPGAMMA_OFF_X),
328                                    VC4_GET_FIELD(x_c, SCALER5_DSPGAMMA_OFF_C),
329                                    grad);
330                 }
331
332                 /* Alpha only valid on channel 2 */
333                 if (chan != 2)
334                         continue;
335
336                 drm_printf(&p, "  alpha:\n");
337                 for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8) {
338                         x_c = HVS_READ(offset);
339                         grad = HVS_READ(offset + 4);
340                         drm_printf(&p, "  %08x %08x - x %u, c %u, grad %u\n",
341                                    x_c, grad,
342                                    VC4_GET_FIELD(x_c, SCALER5_DSPGAMMA_OFF_X),
343                                    VC4_GET_FIELD(x_c, SCALER5_DSPGAMMA_OFF_C),
344                                    grad);
345                 }
346         }
347         return 0;
348 }
349
350 /* The filter kernel is composed of dwords each containing 3 9-bit
351  * signed integers packed next to each other.
352  */
353 #define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff)
354 #define VC4_PPF_FILTER_WORD(c0, c1, c2)                         \
355         ((((c0) & 0x1ff) << 0) |                                \
356          (((c1) & 0x1ff) << 9) |                                \
357          (((c2) & 0x1ff) << 18))
358
359 /* The whole filter kernel is arranged as the coefficients 0-16 going
360  * up, then a pad, then 17-31 going down and reversed within the
361  * dwords.  This means that a linear phase kernel (where it's
362  * symmetrical at the boundary between 15 and 16) has the last 5
363  * dwords matching the first 5, but reversed.
364  */
365 #define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8,     \
366                                 c9, c10, c11, c12, c13, c14, c15)       \
367         {VC4_PPF_FILTER_WORD(c0, c1, c2),                               \
368          VC4_PPF_FILTER_WORD(c3, c4, c5),                               \
369          VC4_PPF_FILTER_WORD(c6, c7, c8),                               \
370          VC4_PPF_FILTER_WORD(c9, c10, c11),                             \
371          VC4_PPF_FILTER_WORD(c12, c13, c14),                            \
372          VC4_PPF_FILTER_WORD(c15, c15, 0)}
373
374 #define VC4_LINEAR_PHASE_KERNEL_DWORDS 6
375 #define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1)
376
377 /* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali.
378  * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
379  */
380 static const u32 mitchell_netravali_1_3_1_3_kernel[] =
381         VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18,
382                                 50, 82, 119, 155, 187, 213, 227);
383
384 static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
385                                         struct drm_mm_node *space,
386                                         const u32 *kernel)
387 {
388         int ret, i;
389         u32 __iomem *dst_kernel;
390
391         /*
392          * NOTE: We don't need a call to drm_dev_enter()/drm_dev_exit()
393          * here since that function is only called from vc4_hvs_bind().
394          */
395
396         ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS);
397         if (ret) {
398                 DRM_ERROR("Failed to allocate space for filter kernel: %d\n",
399                           ret);
400                 return ret;
401         }
402
403         dst_kernel = hvs->dlist + space->start;
404
405         for (i = 0; i < VC4_KERNEL_DWORDS; i++) {
406                 if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS)
407                         writel(kernel[i], &dst_kernel[i]);
408                 else {
409                         writel(kernel[VC4_KERNEL_DWORDS - i - 1],
410                                &dst_kernel[i]);
411                 }
412         }
413
414         return 0;
415 }
416
417 static void vc4_hvs_lut_load(struct vc4_hvs *hvs,
418                              struct vc4_crtc *vc4_crtc)
419 {
420         struct vc4_dev *vc4 = hvs->vc4;
421         struct drm_device *drm = &vc4->base;
422         struct drm_crtc *crtc = &vc4_crtc->base;
423         struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
424         int idx;
425         u32 i;
426
427         WARN_ON_ONCE(vc4->gen > VC4_GEN_5);
428
429         if (!drm_dev_enter(drm, &idx))
430                 return;
431
432         /* The LUT memory is laid out with each HVS channel in order,
433          * each of which takes 256 writes for R, 256 for G, then 256
434          * for B.
435          */
436         HVS_WRITE(SCALER_GAMADDR,
437                   SCALER_GAMADDR_AUTOINC |
438                   (vc4_state->assigned_channel * 3 * crtc->gamma_size));
439
440         for (i = 0; i < crtc->gamma_size; i++)
441                 HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_r[i]);
442         for (i = 0; i < crtc->gamma_size; i++)
443                 HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_g[i]);
444         for (i = 0; i < crtc->gamma_size; i++)
445                 HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]);
446
447         drm_dev_exit(idx);
448 }
449
450 static void vc4_hvs_update_gamma_lut(struct vc4_hvs *hvs,
451                                      struct vc4_crtc *vc4_crtc)
452 {
453         struct drm_crtc *crtc = &vc4_crtc->base;
454         struct drm_crtc_state *crtc_state = crtc->state;
455         struct drm_color_lut *lut = crtc_state->gamma_lut->data;
456         u32 length = drm_color_lut_size(crtc_state->gamma_lut);
457         u32 i;
458
459         for (i = 0; i < length; i++) {
460                 vc4_crtc->lut_r[i] = drm_color_lut_extract(lut[i].red, 8);
461                 vc4_crtc->lut_g[i] = drm_color_lut_extract(lut[i].green, 8);
462                 vc4_crtc->lut_b[i] = drm_color_lut_extract(lut[i].blue, 8);
463         }
464
465         vc4_hvs_lut_load(hvs, vc4_crtc);
466 }
467
468 static void vc5_hvs_write_gamma_entry(struct vc4_hvs *hvs,
469                                       u32 offset,
470                                       struct vc5_gamma_entry *gamma)
471 {
472         HVS_WRITE(offset, gamma->x_c_terms);
473         HVS_WRITE(offset + 4, gamma->grad_term);
474 }
475
476 static void vc5_hvs_lut_load(struct vc4_hvs *hvs,
477                              struct vc4_crtc *vc4_crtc)
478 {
479         struct drm_crtc *crtc = &vc4_crtc->base;
480         struct drm_crtc_state *crtc_state = crtc->state;
481         struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
482         u32 i;
483         u32 offset = SCALER5_DSPGAMMA_START +
484                 vc4_state->assigned_channel * SCALER5_DSPGAMMA_CHAN_OFFSET;
485
486         for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
487                 vc5_hvs_write_gamma_entry(hvs, offset, &vc4_crtc->pwl_r[i]);
488         for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
489                 vc5_hvs_write_gamma_entry(hvs, offset, &vc4_crtc->pwl_g[i]);
490         for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
491                 vc5_hvs_write_gamma_entry(hvs, offset, &vc4_crtc->pwl_b[i]);
492
493         if (vc4_state->assigned_channel == 2) {
494                 /* Alpha only valid on channel 2 */
495                 for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
496                         vc5_hvs_write_gamma_entry(hvs, offset, &vc4_crtc->pwl_a[i]);
497         }
498 }
499
500 static void vc5_hvs_update_gamma_lut(struct vc4_hvs *hvs,
501                                      struct vc4_crtc *vc4_crtc)
502 {
503         struct drm_crtc *crtc = &vc4_crtc->base;
504         struct drm_color_lut *lut = crtc->state->gamma_lut->data;
505         unsigned int step, i;
506         u32 start, end;
507
508 #define VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl, chan)                  \
509         start = drm_color_lut_extract(lut[i * step].chan, 12);          \
510         end = drm_color_lut_extract(lut[(i + 1) * step - 1].chan, 12);  \
511                                                                         \
512         /* Negative gradients not permitted by the hardware, so         \
513          * flatten such points out.                                     \
514          */                                                             \
515         if (end < start)                                                \
516                 end = start;                                            \
517                                                                         \
518         /* Assume 12bit pipeline.                                       \
519          * X evenly spread over full range (12 bit).                    \
520          * C as U12.4 format.                                           \
521          * Gradient as U4.8 format.                                     \
522         */                                                              \
523         vc4_crtc->pwl[i] =                                              \
524                 VC5_HVS_SET_GAMMA_ENTRY(i << 8, start << 4,             \
525                                 ((end - start) << 4) / (step - 1))
526
527         /* HVS5 has a 16 point piecewise linear function for each colour
528          * channel (including alpha on channel 2) on each display channel.
529          *
530          * Currently take a crude subsample of the gamma LUT, but this could
531          * be improved to implement curve fitting.
532          */
533         step = crtc->gamma_size / SCALER5_DSPGAMMA_NUM_POINTS;
534         for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++) {
535                 VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_r, red);
536                 VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_g, green);
537                 VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_b, blue);
538         }
539
540         vc5_hvs_lut_load(hvs, vc4_crtc);
541 }
542
543 static void vc4_hvs_irq_enable_eof(struct vc4_hvs *hvs,
544                                    unsigned int channel)
545 {
546         struct vc4_dev *vc4 = hvs->vc4;
547
548         if (hvs->eof_irq[channel].enabled)
549                 return;
550
551         switch (vc4->gen) {
552         case VC4_GEN_4:
553                 HVS_WRITE(SCALER_DISPCTRL,
554                           HVS_READ(SCALER_DISPCTRL) |
555                           SCALER_DISPCTRL_DSPEIEOF(channel));
556                 break;
557
558         case VC4_GEN_5:
559                 HVS_WRITE(SCALER_DISPCTRL,
560                           HVS_READ(SCALER_DISPCTRL) |
561                           SCALER5_DISPCTRL_DSPEIEOF(channel));
562                 break;
563
564         case VC4_GEN_6:
565                 enable_irq(hvs->eof_irq[channel].desc);
566                 break;
567
568         default:
569                 break;
570         }
571
572         hvs->eof_irq[channel].enabled = true;
573 }
574
575 static void vc4_hvs_irq_clear_eof(struct vc4_hvs *hvs,
576                                   unsigned int channel)
577 {
578         struct vc4_dev *vc4 = hvs->vc4;
579
580         if (!hvs->eof_irq[channel].enabled)
581                 return;
582
583         switch (vc4->gen) {
584         case VC4_GEN_4:
585                 HVS_WRITE(SCALER_DISPCTRL,
586                           HVS_READ(SCALER_DISPCTRL) &
587                           ~SCALER_DISPCTRL_DSPEIEOF(channel));
588                 break;
589
590         case VC4_GEN_5:
591                 HVS_WRITE(SCALER_DISPCTRL,
592                           HVS_READ(SCALER_DISPCTRL) &
593                           ~SCALER5_DISPCTRL_DSPEIEOF(channel));
594                 break;
595
596         case VC4_GEN_6:
597                 disable_irq_nosync(hvs->eof_irq[channel].desc);
598                 break;
599
600         default:
601                 break;
602         }
603
604         hvs->eof_irq[channel].enabled = false;
605 }
606
607 static struct vc4_hvs_dlist_allocation *
608 vc4_hvs_alloc_dlist_entry(struct vc4_hvs *hvs,
609                           unsigned int channel,
610                           size_t dlist_count)
611 {
612         struct vc4_dev *vc4 = hvs->vc4;
613         struct drm_device *dev = &vc4->base;
614         struct vc4_hvs_dlist_allocation *alloc;
615         unsigned long flags;
616         int ret;
617
618         if (channel == VC4_HVS_CHANNEL_DISABLED)
619                 return NULL;
620
621         alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
622         if (!alloc)
623                 return ERR_PTR(-ENOMEM);
624
625         INIT_LIST_HEAD(&alloc->node);
626
627         spin_lock_irqsave(&hvs->mm_lock, flags);
628         ret = drm_mm_insert_node(&hvs->dlist_mm, &alloc->mm_node,
629                                  dlist_count);
630         spin_unlock_irqrestore(&hvs->mm_lock, flags);
631         if (ret) {
632                 drm_err(dev, "Failed to allocate DLIST entry: %d\n", ret);
633                 return ERR_PTR(ret);
634         }
635
636         alloc->channel = channel;
637
638         return alloc;
639 }
640
641 static void vc4_hvs_free_dlist_entry_locked(struct vc4_hvs *hvs,
642                                             struct vc4_hvs_dlist_allocation *alloc)
643 {
644         lockdep_assert_held(&hvs->mm_lock);
645
646         if (!list_empty(&alloc->node))
647                 list_del(&alloc->node);
648
649         drm_mm_remove_node(&alloc->mm_node);
650         kfree(alloc);
651 }
652
653 void vc4_hvs_mark_dlist_entry_stale(struct vc4_hvs *hvs,
654                                     struct vc4_hvs_dlist_allocation *alloc)
655 {
656         unsigned long flags;
657         u8 frcnt;
658
659         if (!alloc)
660                 return;
661
662         if (!drm_mm_node_allocated(&alloc->mm_node))
663                 return;
664
665         /*
666          * Kunit tests run with a mock device and we consider any hardware
667          * access a test failure. Let's free the dlist allocation right away if
668          * we're running under kunit, we won't risk a dlist corruption anyway.
669          */
670         if (kunit_get_current_test()) {
671                 spin_lock_irqsave(&hvs->mm_lock, flags);
672                 vc4_hvs_free_dlist_entry_locked(hvs, alloc);
673                 spin_unlock_irqrestore(&hvs->mm_lock, flags);
674                 return;
675         }
676
677         frcnt = vc4_hvs_get_fifo_frame_count(hvs, alloc->channel);
678         alloc->target_frame_count = (frcnt + 1) & ((1 << 6) - 1);
679
680         spin_lock_irqsave(&hvs->mm_lock, flags);
681
682         list_add_tail(&alloc->node, &hvs->stale_dlist_entries);
683
684         HVS_WRITE(SCALER_DISPSTAT, SCALER_DISPSTAT_EOF(alloc->channel));
685         vc4_hvs_irq_enable_eof(hvs, alloc->channel);
686
687         spin_unlock_irqrestore(&hvs->mm_lock, flags);
688 }
689
690 static void vc4_hvs_schedule_dlist_sweep(struct vc4_hvs *hvs,
691                                          unsigned int channel)
692 {
693         unsigned long flags;
694
695         spin_lock_irqsave(&hvs->mm_lock, flags);
696
697         if (!list_empty(&hvs->stale_dlist_entries))
698                 queue_work(system_unbound_wq, &hvs->free_dlist_work);
699
700         vc4_hvs_irq_clear_eof(hvs, channel);
701
702         spin_unlock_irqrestore(&hvs->mm_lock, flags);
703 }
704
705 /*
706  * Frame counts are essentially sequence numbers over 6 bits, and we
707  * thus can use sequence number arithmetic and follow the RFC1982 to
708  * implement proper comparison between them.
709  */
710 static bool vc4_hvs_frcnt_lte(u8 cnt1, u8 cnt2)
711 {
712         return (s8)((cnt1 << 2) - (cnt2 << 2)) <= 0;
713 }
714
715 /*
716  * Some atomic commits (legacy cursor updates, mostly) will not wait for
717  * the next vblank and will just return once the commit has been pushed
718  * to the hardware.
719  *
720  * On the hardware side, our HVS stores the planes parameters in its
721  * context RAM, and will use part of the RAM to store data during the
722  * frame rendering.
723  *
724  * This interacts badly if we get multiple commits before the next
725  * vblank since we could end up overwriting the DLIST entries used by
726  * previous commits if our dlist allocation reuses that entry. In such a
727  * case, we would overwrite the data currently being used by the
728  * hardware, resulting in a corrupted frame.
729  *
730  * In order to work around this, we'll queue the dlist entries in a list
731  * once the associated CRTC state is destroyed. The HVS only allows us
732  * to know which entry is being active, but not which one are no longer
733  * being used, so in order to avoid freeing entries that are still used
734  * by the hardware we add a guesstimate of the frame count where our
735  * entry will no longer be used, and thus will only free those entries
736  * when we will have reached that frame count.
737  */
738 static void vc4_hvs_dlist_free_work(struct work_struct *work)
739 {
740         struct vc4_hvs *hvs = container_of(work, struct vc4_hvs, free_dlist_work);
741         struct vc4_hvs_dlist_allocation *cur, *next;
742         unsigned long flags;
743
744         spin_lock_irqsave(&hvs->mm_lock, flags);
745         list_for_each_entry_safe(cur, next, &hvs->stale_dlist_entries, node) {
746                 u8 frcnt;
747
748                 frcnt = vc4_hvs_get_fifo_frame_count(hvs, cur->channel);
749                 if (!vc4_hvs_frcnt_lte(cur->target_frame_count, frcnt))
750                         continue;
751
752                 vc4_hvs_free_dlist_entry_locked(hvs, cur);
753         }
754         spin_unlock_irqrestore(&hvs->mm_lock, flags);
755 }
756
757 u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo)
758 {
759         struct vc4_dev *vc4 = hvs->vc4;
760         struct drm_device *drm = &vc4->base;
761         u8 field = 0;
762         int idx;
763
764         WARN_ON_ONCE(vc4->gen > VC4_GEN_6);
765
766         if (!drm_dev_enter(drm, &idx))
767                 return 0;
768
769         if (vc4->gen >= VC4_GEN_6) {
770                 field = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(fifo)),
771                                       SCALER6_DISPX_STATUS_FRCNT);
772         } else {
773                 switch (fifo) {
774                 case 0:
775                         field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
776                                               SCALER_DISPSTAT1_FRCNT0);
777                         break;
778                 case 1:
779                         field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
780                                               SCALER_DISPSTAT1_FRCNT1);
781                         break;
782                 case 2:
783                         field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2),
784                                               SCALER_DISPSTAT2_FRCNT2);
785                         break;
786                 }
787         }
788
789         drm_dev_exit(idx);
790         return field;
791 }
792
793 int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output)
794 {
795         struct vc4_dev *vc4 = hvs->vc4;
796         u32 reg;
797         int ret;
798
799         WARN_ON_ONCE(vc4->gen > VC4_GEN_6);
800
801         switch (vc4->gen) {
802         case VC4_GEN_4:
803                 return output;
804
805         case VC4_GEN_5:
806                 /*
807                  * NOTE: We should probably use
808                  * drm_dev_enter()/drm_dev_exit() here, but this
809                  * function is only used during the DRM device
810                  * initialization, so we should be fine.
811                  */
812
813                 switch (output) {
814                 case 0:
815                         return 0;
816
817                 case 1:
818                         return 1;
819
820                 case 2:
821                         reg = HVS_READ(SCALER_DISPECTRL);
822                         ret = FIELD_GET(SCALER_DISPECTRL_DSP2_MUX_MASK, reg);
823                         if (ret == 0)
824                                 return 2;
825
826                         return 0;
827
828                 case 3:
829                         reg = HVS_READ(SCALER_DISPCTRL);
830                         ret = FIELD_GET(SCALER_DISPCTRL_DSP3_MUX_MASK, reg);
831                         if (ret == 3)
832                                 return -EPIPE;
833
834                         return ret;
835
836                 case 4:
837                         reg = HVS_READ(SCALER_DISPEOLN);
838                         ret = FIELD_GET(SCALER_DISPEOLN_DSP4_MUX_MASK, reg);
839                         if (ret == 3)
840                                 return -EPIPE;
841
842                         return ret;
843
844                 case 5:
845                         reg = HVS_READ(SCALER_DISPDITHER);
846                         ret = FIELD_GET(SCALER_DISPDITHER_DSP5_MUX_MASK, reg);
847                         if (ret == 3)
848                                 return -EPIPE;
849
850                         return ret;
851
852                 default:
853                         return -EPIPE;
854                 }
855
856         case VC4_GEN_6:
857                 switch (output) {
858                 case 0:
859                         return 0;
860
861                 case 2:
862                         return 2;
863
864                 case 1:
865                 case 3:
866                 case 4:
867                         return 1;
868
869                 default:
870                         return -EPIPE;
871                 }
872         }
873
874         return -EPIPE;
875 }
876
877 static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc,
878                                 struct drm_display_mode *mode, bool oneshot)
879 {
880         struct vc4_dev *vc4 = hvs->vc4;
881         struct drm_device *drm = &vc4->base;
882         struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
883         struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state);
884         unsigned int chan = vc4_crtc_state->assigned_channel;
885         bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
886         u32 dispbkgndx;
887         u32 dispctrl;
888         int idx;
889
890         WARN_ON_ONCE(vc4->gen > VC4_GEN_5);
891
892         if (!drm_dev_enter(drm, &idx))
893                 return -ENODEV;
894
895         HVS_WRITE(SCALER_DISPCTRLX(chan), 0);
896         HVS_WRITE(SCALER_DISPCTRLX(chan), SCALER_DISPCTRLX_RESET);
897         HVS_WRITE(SCALER_DISPCTRLX(chan), 0);
898
899         /* Turn on the scaler, which will wait for vstart to start
900          * compositing.
901          * When feeding the transposer, we should operate in oneshot
902          * mode.
903          */
904         dispctrl = SCALER_DISPCTRLX_ENABLE;
905         dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(chan));
906
907         if (vc4->gen == VC4_GEN_4) {
908                 dispctrl |= VC4_SET_FIELD(mode->hdisplay,
909                                           SCALER_DISPCTRLX_WIDTH) |
910                             VC4_SET_FIELD(mode->vdisplay,
911                                           SCALER_DISPCTRLX_HEIGHT) |
912                             (oneshot ? SCALER_DISPCTRLX_ONESHOT : 0);
913                 dispbkgndx |= SCALER_DISPBKGND_AUTOHS;
914         } else {
915                 dispctrl |= VC4_SET_FIELD(mode->hdisplay,
916                                           SCALER5_DISPCTRLX_WIDTH) |
917                             VC4_SET_FIELD(mode->vdisplay,
918                                           SCALER5_DISPCTRLX_HEIGHT) |
919                             (oneshot ? SCALER5_DISPCTRLX_ONESHOT : 0);
920                 dispbkgndx &= ~SCALER5_DISPBKGND_BCK2BCK;
921         }
922
923         HVS_WRITE(SCALER_DISPCTRLX(chan), dispctrl);
924
925         dispbkgndx &= ~SCALER_DISPBKGND_GAMMA;
926         dispbkgndx &= ~SCALER_DISPBKGND_INTERLACE;
927
928         if (crtc->state->gamma_lut)
929                 /* Enable gamma on if required */
930                 dispbkgndx |= SCALER_DISPBKGND_GAMMA;
931
932         HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx |
933                   (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
934
935         /* Reload the LUT, since the SRAMs would have been disabled if
936          * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once.
937          */
938         if (vc4->gen == VC4_GEN_4)
939                 vc4_hvs_lut_load(hvs, vc4_crtc);
940         else
941                 vc5_hvs_lut_load(hvs, vc4_crtc);
942
943         drm_dev_exit(idx);
944
945         return 0;
946 }
947
948 static int vc6_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc,
949                                 struct drm_display_mode *mode, bool oneshot)
950 {
951         struct vc4_dev *vc4 = hvs->vc4;
952         struct drm_device *drm = &vc4->base;
953         struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state);
954         unsigned int chan = vc4_crtc_state->assigned_channel;
955         bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
956         u32 disp_ctrl1;
957         int idx;
958
959         WARN_ON_ONCE(vc4->gen != VC4_GEN_6);
960
961         if (!drm_dev_enter(drm, &idx))
962                 return -ENODEV;
963
964         HVS_WRITE(SCALER6_DISPX_CTRL0(chan), SCALER6_DISPX_CTRL0_RESET);
965
966         disp_ctrl1 = HVS_READ(SCALER6_DISPX_CTRL1(chan));
967         disp_ctrl1 &= ~SCALER6_DISPX_CTRL1_INTLACE;
968         HVS_WRITE(SCALER6_DISPX_CTRL1(chan),
969                   disp_ctrl1 | (interlace ? SCALER6_DISPX_CTRL1_INTLACE : 0));
970
971         HVS_WRITE(SCALER6_DISPX_CTRL0(chan),
972                   SCALER6_DISPX_CTRL0_ENB |
973                   VC4_SET_FIELD(mode->hdisplay - 1,
974                                 SCALER6_DISPX_CTRL0_FWIDTH) |
975                   (oneshot ? SCALER6_DISPX_CTRL0_ONESHOT : 0) |
976                   VC4_SET_FIELD(mode->vdisplay - 1,
977                                 SCALER6_DISPX_CTRL0_LINES));
978
979         drm_dev_exit(idx);
980
981         return 0;
982 }
983
984 static void __vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
985 {
986         struct vc4_dev *vc4 = hvs->vc4;
987         struct drm_device *drm = &vc4->base;
988         int idx;
989
990         WARN_ON_ONCE(vc4->gen > VC4_GEN_5);
991
992         if (!drm_dev_enter(drm, &idx))
993                 return;
994
995         if (HVS_READ(SCALER_DISPCTRLX(chan)) & SCALER_DISPCTRLX_ENABLE)
996                 goto out;
997
998         HVS_WRITE(SCALER_DISPCTRLX(chan),
999                   HVS_READ(SCALER_DISPCTRLX(chan)) | SCALER_DISPCTRLX_RESET);
1000         HVS_WRITE(SCALER_DISPCTRLX(chan),
1001                   HVS_READ(SCALER_DISPCTRLX(chan)) & ~SCALER_DISPCTRLX_ENABLE);
1002
1003         /* Once we leave, the scaler should be disabled and its fifo empty. */
1004         WARN_ON_ONCE(HVS_READ(SCALER_DISPCTRLX(chan)) & SCALER_DISPCTRLX_RESET);
1005
1006         WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(chan)),
1007                                    SCALER_DISPSTATX_MODE) !=
1008                      SCALER_DISPSTATX_MODE_DISABLED);
1009
1010         WARN_ON_ONCE((HVS_READ(SCALER_DISPSTATX(chan)) &
1011                       (SCALER_DISPSTATX_FULL | SCALER_DISPSTATX_EMPTY)) !=
1012                      SCALER_DISPSTATX_EMPTY);
1013
1014 out:
1015         drm_dev_exit(idx);
1016 }
1017
1018 static void __vc6_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
1019 {
1020         struct vc4_dev *vc4 = hvs->vc4;
1021         struct drm_device *drm = &vc4->base;
1022         int idx;
1023
1024         WARN_ON_ONCE(vc4->gen != VC4_GEN_6);
1025
1026         if (!drm_dev_enter(drm, &idx))
1027                 return;
1028
1029         if (HVS_READ(SCALER6_DISPX_CTRL0(chan)) & SCALER6_DISPX_CTRL0_ENB)
1030                 goto out;
1031
1032         HVS_WRITE(SCALER6_DISPX_CTRL0(chan),
1033                   HVS_READ(SCALER6_DISPX_CTRL0(chan)) | SCALER6_DISPX_CTRL0_RESET);
1034
1035         HVS_WRITE(SCALER6_DISPX_CTRL0(chan),
1036                   HVS_READ(SCALER6_DISPX_CTRL0(chan)) & ~SCALER6_DISPX_CTRL0_ENB);
1037
1038         WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(chan)),
1039                                    SCALER6_DISPX_STATUS_MODE) !=
1040                      SCALER6_DISPX_STATUS_MODE_DISABLED);
1041
1042 out:
1043         drm_dev_exit(idx);
1044 }
1045
1046 void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
1047 {
1048         struct vc4_dev *vc4 = hvs->vc4;
1049
1050         if (vc4->gen >= VC4_GEN_6)
1051                 __vc6_hvs_stop_channel(hvs, chan);
1052         else
1053                 __vc4_hvs_stop_channel(hvs, chan);
1054 }
1055
1056 static int vc4_hvs_gamma_check(struct drm_crtc *crtc,
1057                                struct drm_atomic_state *state)
1058 {
1059         struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
1060         struct drm_connector_state *conn_state;
1061         struct drm_connector *connector;
1062         struct drm_device *dev = crtc->dev;
1063         struct vc4_dev *vc4 = to_vc4_dev(dev);
1064
1065         if (vc4->gen == VC4_GEN_4)
1066                 return 0;
1067
1068         if (!crtc_state->color_mgmt_changed)
1069                 return 0;
1070
1071         if (crtc_state->gamma_lut) {
1072                 unsigned int len = drm_color_lut_size(crtc_state->gamma_lut);
1073
1074                 if (len != crtc->gamma_size) {
1075                         DRM_DEBUG_KMS("Invalid LUT size; got %u, expected %u\n",
1076                                       len, crtc->gamma_size);
1077                         return -EINVAL;
1078                 }
1079         }
1080
1081         connector = vc4_get_crtc_connector(crtc, crtc_state);
1082         if (!connector)
1083                 return -EINVAL;
1084
1085         if (!(connector->connector_type == DRM_MODE_CONNECTOR_HDMIA))
1086                 return 0;
1087
1088         conn_state = drm_atomic_get_connector_state(state, connector);
1089         if (!conn_state)
1090                 return -EINVAL;
1091
1092         crtc_state->mode_changed = true;
1093         return 0;
1094 }
1095
1096 int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
1097 {
1098         struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
1099         struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
1100         struct vc4_hvs_dlist_allocation *alloc;
1101         struct drm_device *dev = crtc->dev;
1102         struct vc4_dev *vc4 = to_vc4_dev(dev);
1103         struct drm_plane *plane;
1104         const struct drm_plane_state *plane_state;
1105         u32 dlist_count = 0;
1106
1107         /* The pixelvalve can only feed one encoder (and encoders are
1108          * 1:1 with connectors.)
1109          */
1110         if (hweight32(crtc_state->connector_mask) > 1)
1111                 return -EINVAL;
1112
1113         drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
1114                 u32 plane_dlist_count = vc4_plane_dlist_size(plane_state);
1115
1116                 drm_dbg_driver(dev, "[CRTC:%d:%s] Found [PLANE:%d:%s] with DLIST size: %u\n",
1117                                crtc->base.id, crtc->name,
1118                                plane->base.id, plane->name,
1119                                plane_dlist_count);
1120
1121                 dlist_count += plane_dlist_count;
1122         }
1123
1124         dlist_count++; /* Account for SCALER_CTL0_END. */
1125
1126         drm_dbg_driver(dev, "[CRTC:%d:%s] Allocating DLIST block with size: %u\n",
1127                        crtc->base.id, crtc->name, dlist_count);
1128
1129         alloc = vc4_hvs_alloc_dlist_entry(vc4->hvs, vc4_state->assigned_channel, dlist_count);
1130         if (IS_ERR(alloc))
1131                 return PTR_ERR(alloc);
1132
1133         vc4_state->mm = alloc;
1134
1135         return vc4_hvs_gamma_check(crtc, state);
1136 }
1137
1138 static void vc4_hvs_install_dlist(struct drm_crtc *crtc)
1139 {
1140         struct drm_device *dev = crtc->dev;
1141         struct vc4_dev *vc4 = to_vc4_dev(dev);
1142         struct vc4_hvs *hvs = vc4->hvs;
1143         struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
1144         int idx;
1145
1146         if (!drm_dev_enter(dev, &idx))
1147                 return;
1148
1149         WARN_ON(!vc4_state->mm);
1150
1151         if (vc4->gen >= VC4_GEN_6)
1152                 HVS_WRITE(SCALER6_DISPX_LPTRS(vc4_state->assigned_channel),
1153                           VC4_SET_FIELD(vc4_state->mm->mm_node.start,
1154                                         SCALER6_DISPX_LPTRS_HEADE));
1155         else
1156                 HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel),
1157                           vc4_state->mm->mm_node.start);
1158
1159         drm_dev_exit(idx);
1160 }
1161
1162 static void vc4_hvs_update_dlist(struct drm_crtc *crtc)
1163 {
1164         struct drm_device *dev = crtc->dev;
1165         struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
1166         struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
1167         unsigned long flags;
1168
1169         if (crtc->state->event) {
1170                 crtc->state->event->pipe = drm_crtc_index(crtc);
1171
1172                 WARN_ON(drm_crtc_vblank_get(crtc) != 0);
1173
1174                 spin_lock_irqsave(&dev->event_lock, flags);
1175
1176                 if (!vc4_crtc->feeds_txp || vc4_state->txp_armed) {
1177                         vc4_crtc->event = crtc->state->event;
1178                         crtc->state->event = NULL;
1179                 }
1180
1181                 spin_unlock_irqrestore(&dev->event_lock, flags);
1182         }
1183
1184         WARN_ON(!vc4_state->mm);
1185
1186         spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
1187         vc4_crtc->current_dlist = vc4_state->mm->mm_node.start;
1188         spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
1189 }
1190
1191 void vc4_hvs_atomic_begin(struct drm_crtc *crtc,
1192                           struct drm_atomic_state *state)
1193 {
1194         struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
1195         struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
1196         unsigned long flags;
1197
1198         spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
1199         vc4_crtc->current_hvs_channel = vc4_state->assigned_channel;
1200         spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
1201 }
1202
1203 void vc4_hvs_atomic_enable(struct drm_crtc *crtc,
1204                            struct drm_atomic_state *state)
1205 {
1206         struct drm_device *dev = crtc->dev;
1207         struct vc4_dev *vc4 = to_vc4_dev(dev);
1208         struct drm_display_mode *mode = &crtc->state->adjusted_mode;
1209         struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
1210         bool oneshot = vc4_crtc->feeds_txp;
1211
1212         vc4_hvs_install_dlist(crtc);
1213         vc4_hvs_update_dlist(crtc);
1214
1215         if (vc4->gen >= VC4_GEN_6)
1216                 vc6_hvs_init_channel(vc4->hvs, crtc, mode, oneshot);
1217         else
1218                 vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot);
1219 }
1220
1221 void vc4_hvs_atomic_disable(struct drm_crtc *crtc,
1222                             struct drm_atomic_state *state)
1223 {
1224         struct drm_device *dev = crtc->dev;
1225         struct vc4_dev *vc4 = to_vc4_dev(dev);
1226         struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state, crtc);
1227         struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(old_state);
1228         unsigned int chan = vc4_state->assigned_channel;
1229
1230         vc4_hvs_stop_channel(vc4->hvs, chan);
1231 }
1232
1233 void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
1234                           struct drm_atomic_state *state)
1235 {
1236         struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state,
1237                                                                          crtc);
1238         struct drm_device *dev = crtc->dev;
1239         struct vc4_dev *vc4 = to_vc4_dev(dev);
1240         struct vc4_hvs *hvs = vc4->hvs;
1241         struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
1242         struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
1243         unsigned int channel = vc4_state->assigned_channel;
1244         struct drm_plane *plane;
1245         struct vc4_plane_state *vc4_plane_state;
1246         bool debug_dump_regs = false;
1247         bool enable_bg_fill = false;
1248         u32 __iomem *dlist_start, *dlist_next;
1249         unsigned int zpos = 0;
1250         bool found = false;
1251         int idx;
1252
1253         WARN_ON_ONCE(vc4->gen > VC4_GEN_6);
1254
1255         if (!drm_dev_enter(dev, &idx)) {
1256                 vc4_crtc_send_vblank(crtc);
1257                 return;
1258         }
1259
1260         if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED)
1261                 return;
1262
1263         if (debug_dump_regs) {
1264                 DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc));
1265                 vc4_hvs_dump_state(hvs);
1266         }
1267
1268         dlist_start = vc4->hvs->dlist + vc4_state->mm->mm_node.start;
1269         dlist_next = dlist_start;
1270
1271         /* Copy all the active planes' dlist contents to the hardware dlist. */
1272         do {
1273                 found = false;
1274
1275                 drm_atomic_crtc_for_each_plane(plane, crtc) {
1276                         if (plane->state->normalized_zpos != zpos)
1277                                 continue;
1278
1279                         /* Is this the first active plane? */
1280                         if (dlist_next == dlist_start) {
1281                                 /* We need to enable background fill when a plane
1282                                  * could be alpha blending from the background, i.e.
1283                                  * where no other plane is underneath. It suffices to
1284                                  * consider the first active plane here since we set
1285                                  * needs_bg_fill such that either the first plane
1286                                  * already needs it or all planes on top blend from
1287                                  * the first or a lower plane.
1288                                  */
1289                                 vc4_plane_state = to_vc4_plane_state(plane->state);
1290                                 enable_bg_fill = vc4_plane_state->needs_bg_fill;
1291                         }
1292
1293                         dlist_next += vc4_plane_write_dlist(plane, dlist_next);
1294
1295                         found = true;
1296                 }
1297
1298                 zpos++;
1299         } while (found);
1300
1301         writel(SCALER_CTL0_END, dlist_next);
1302         dlist_next++;
1303
1304         WARN_ON(!vc4_state->mm);
1305         WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm->mm_node.size);
1306
1307         if (enable_bg_fill) {
1308                 /* This sets a black background color fill, as is the case
1309                  * with other DRM drivers.
1310                  */
1311                 if (vc4->gen >= VC4_GEN_6)
1312                         HVS_WRITE(SCALER6_DISPX_CTRL1(channel),
1313                                   HVS_READ(SCALER6_DISPX_CTRL1(channel)) |
1314                                   SCALER6_DISPX_CTRL1_BGENB);
1315                 else
1316                         HVS_WRITE(SCALER_DISPBKGNDX(channel),
1317                                   HVS_READ(SCALER_DISPBKGNDX(channel)) |
1318                                   SCALER_DISPBKGND_FILL);
1319         } else {
1320                 if (vc4->gen >= VC4_GEN_6)
1321                         HVS_WRITE(SCALER6_DISPX_CTRL1(channel),
1322                                   HVS_READ(SCALER6_DISPX_CTRL1(channel)) &
1323                                   ~SCALER6_DISPX_CTRL1_BGENB);
1324                 else
1325                         HVS_WRITE(SCALER_DISPBKGNDX(channel),
1326                                   HVS_READ(SCALER_DISPBKGNDX(channel)) &
1327                                   ~SCALER_DISPBKGND_FILL);
1328         }
1329
1330         /* Only update DISPLIST if the CRTC was already running and is not
1331          * being disabled.
1332          * vc4_crtc_enable() takes care of updating the dlist just after
1333          * re-enabling VBLANK interrupts and before enabling the engine.
1334          * If the CRTC is being disabled, there's no point in updating this
1335          * information.
1336          */
1337         if (crtc->state->active && old_state->active) {
1338                 vc4_hvs_install_dlist(crtc);
1339                 vc4_hvs_update_dlist(crtc);
1340         }
1341
1342         if (crtc->state->color_mgmt_changed) {
1343                 u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(channel));
1344
1345                 WARN_ON_ONCE(vc4->gen > VC4_GEN_5);
1346
1347                 if (crtc->state->gamma_lut) {
1348                         if (vc4->gen == VC4_GEN_4) {
1349                                 vc4_hvs_update_gamma_lut(hvs, vc4_crtc);
1350                                 dispbkgndx |= SCALER_DISPBKGND_GAMMA;
1351                         } else {
1352                                 vc5_hvs_update_gamma_lut(hvs, vc4_crtc);
1353                         }
1354                 } else {
1355                         /* Unsetting DISPBKGND_GAMMA skips the gamma lut step
1356                          * in hardware, which is the same as a linear lut that
1357                          * DRM expects us to use in absence of a user lut.
1358                          *
1359                          * Do NOT change state dynamically for hvs5 as it
1360                          * inserts a delay in the pipeline that will cause
1361                          * stalls if enabled/disabled whilst running. The other
1362                          * should already be disabling/enabling the pipeline
1363                          * when gamma changes.
1364                          */
1365                         if (vc4->gen == VC4_GEN_4)
1366                                 dispbkgndx &= ~SCALER_DISPBKGND_GAMMA;
1367                 }
1368                 HVS_WRITE(SCALER_DISPBKGNDX(channel), dispbkgndx);
1369         }
1370
1371         if (debug_dump_regs) {
1372                 DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
1373                 vc4_hvs_dump_state(hvs);
1374         }
1375
1376         drm_dev_exit(idx);
1377 }
1378
1379 void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel)
1380 {
1381         struct vc4_dev *vc4 = hvs->vc4;
1382         struct drm_device *drm = &vc4->base;
1383         u32 dispctrl;
1384         int idx;
1385
1386         WARN_ON(vc4->gen > VC4_GEN_5);
1387
1388         if (!drm_dev_enter(drm, &idx))
1389                 return;
1390
1391         dispctrl = HVS_READ(SCALER_DISPCTRL);
1392         dispctrl &= ~((vc4->gen == VC4_GEN_5) ?
1393                       SCALER5_DISPCTRL_DSPEISLUR(channel) :
1394                       SCALER_DISPCTRL_DSPEISLUR(channel));
1395
1396         HVS_WRITE(SCALER_DISPCTRL, dispctrl);
1397
1398         drm_dev_exit(idx);
1399 }
1400
1401 void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel)
1402 {
1403         struct vc4_dev *vc4 = hvs->vc4;
1404         struct drm_device *drm = &vc4->base;
1405         u32 dispctrl;
1406         int idx;
1407
1408         WARN_ON(vc4->gen > VC4_GEN_5);
1409
1410         if (!drm_dev_enter(drm, &idx))
1411                 return;
1412
1413         dispctrl = HVS_READ(SCALER_DISPCTRL);
1414         dispctrl |= ((vc4->gen == VC4_GEN_5) ?
1415                      SCALER5_DISPCTRL_DSPEISLUR(channel) :
1416                      SCALER_DISPCTRL_DSPEISLUR(channel));
1417
1418         HVS_WRITE(SCALER_DISPSTAT,
1419                   SCALER_DISPSTAT_EUFLOW(channel));
1420         HVS_WRITE(SCALER_DISPCTRL, dispctrl);
1421
1422         drm_dev_exit(idx);
1423 }
1424
1425 static void vc4_hvs_report_underrun(struct drm_device *dev)
1426 {
1427         struct vc4_dev *vc4 = to_vc4_dev(dev);
1428
1429         atomic_inc(&vc4->underrun);
1430         DRM_DEV_ERROR(dev->dev, "HVS underrun\n");
1431 }
1432
1433 static irqreturn_t vc4_hvs_irq_handler(int irq, void *data)
1434 {
1435         struct drm_device *dev = data;
1436         struct vc4_dev *vc4 = to_vc4_dev(dev);
1437         struct vc4_hvs *hvs = vc4->hvs;
1438         irqreturn_t irqret = IRQ_NONE;
1439         int channel;
1440         u32 control;
1441         u32 status;
1442         u32 dspeislur;
1443
1444         WARN_ON(vc4->gen > VC4_GEN_5);
1445
1446         /*
1447          * NOTE: We don't need to protect the register access using
1448          * drm_dev_enter() there because the interrupt handler lifetime
1449          * is tied to the device itself, and not to the DRM device.
1450          *
1451          * So when the device will be gone, one of the first thing we
1452          * will be doing will be to unregister the interrupt handler,
1453          * and then unregister the DRM device. drm_dev_enter() would
1454          * thus always succeed if we are here.
1455          */
1456
1457         status = HVS_READ(SCALER_DISPSTAT);
1458         control = HVS_READ(SCALER_DISPCTRL);
1459
1460         for (channel = 0; channel < SCALER_CHANNELS_COUNT; channel++) {
1461                 dspeislur = (vc4->gen == VC4_GEN_5) ?
1462                         SCALER5_DISPCTRL_DSPEISLUR(channel) :
1463                         SCALER_DISPCTRL_DSPEISLUR(channel);
1464
1465                 /* Interrupt masking is not always honored, so check it here. */
1466                 if (status & SCALER_DISPSTAT_EUFLOW(channel) &&
1467                     control & dspeislur) {
1468                         vc4_hvs_mask_underrun(hvs, channel);
1469                         vc4_hvs_report_underrun(dev);
1470
1471                         irqret = IRQ_HANDLED;
1472                 }
1473
1474                 if (status & SCALER_DISPSTAT_EOF(channel)) {
1475                         vc4_hvs_schedule_dlist_sweep(hvs, channel);
1476                         irqret = IRQ_HANDLED;
1477                 }
1478         }
1479
1480         /* Clear every per-channel interrupt flag. */
1481         HVS_WRITE(SCALER_DISPSTAT, SCALER_DISPSTAT_IRQMASK(0) |
1482                                    SCALER_DISPSTAT_IRQMASK(1) |
1483                                    SCALER_DISPSTAT_IRQMASK(2));
1484
1485         return irqret;
1486 }
1487
1488 static irqreturn_t vc6_hvs_eof_irq_handler(int irq, void *data)
1489 {
1490         struct drm_device *dev = data;
1491         struct vc4_dev *vc4 = to_vc4_dev(dev);
1492         struct vc4_hvs *hvs = vc4->hvs;
1493         unsigned int i;
1494
1495         WARN_ON(vc4->gen < VC4_GEN_6);
1496
1497         for (i = 0; i < HVS_NUM_CHANNELS; i++) {
1498                 if (!hvs->eof_irq[i].enabled)
1499                         continue;
1500
1501                 if (hvs->eof_irq[i].desc != irq)
1502                         continue;
1503
1504                 vc4_hvs_schedule_dlist_sweep(hvs, i);
1505                 return IRQ_HANDLED;
1506         }
1507
1508         return IRQ_NONE;
1509 }
1510
1511 int vc4_hvs_debugfs_init(struct drm_minor *minor)
1512 {
1513         struct drm_device *drm = minor->dev;
1514         struct vc4_dev *vc4 = to_vc4_dev(drm);
1515         struct vc4_hvs *hvs = vc4->hvs;
1516
1517         if (vc4->firmware_kms)
1518                 return 0;
1519
1520         if (!vc4->hvs)
1521                 return -ENODEV;
1522
1523         if (vc4->gen == VC4_GEN_4) {
1524                 debugfs_create_bool("hvs_load_tracker", S_IRUGO | S_IWUSR,
1525                                     minor->debugfs_root,
1526                                     &vc4->load_tracker_enabled);
1527
1528                 drm_debugfs_add_file(drm, "hvs_gamma", vc5_hvs_debugfs_gamma,
1529                                      NULL);
1530         }
1531
1532         if (vc4->gen >= VC4_GEN_6)
1533                 drm_debugfs_add_file(drm, "hvs_dlists", vc6_hvs_debugfs_dlist, NULL);
1534         else
1535                 drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL);
1536
1537         drm_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun, NULL);
1538
1539         vc4_debugfs_add_regset32(drm, "hvs_regs", &hvs->regset);
1540
1541         return 0;
1542 }
1543
1544 struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
1545                                 void __iomem *regs,
1546                                 struct platform_device *pdev)
1547 {
1548         struct drm_device *drm = &vc4->base;
1549         struct vc4_hvs *hvs;
1550         unsigned int dlist_start;
1551         size_t dlist_size;
1552         size_t lbm_size;
1553
1554         hvs = drmm_kzalloc(drm, sizeof(*hvs), GFP_KERNEL);
1555         if (!hvs)
1556                 return ERR_PTR(-ENOMEM);
1557
1558         hvs->vc4 = vc4;
1559         hvs->regs = regs;
1560         hvs->pdev = pdev;
1561
1562         spin_lock_init(&hvs->mm_lock);
1563
1564         INIT_LIST_HEAD(&hvs->stale_dlist_entries);
1565         INIT_WORK(&hvs->free_dlist_work, vc4_hvs_dlist_free_work);
1566
1567         switch (vc4->gen) {
1568         case VC4_GEN_4:
1569         case VC4_GEN_5:
1570                 /* Set up the HVS display list memory manager. We never
1571                  * overwrite the setup from the bootloader (just 128b
1572                  * out of our 16K), since we don't want to scramble the
1573                  * screen when transitioning from the firmware's boot
1574                  * setup to runtime.
1575                  */
1576                 dlist_start = HVS_BOOTLOADER_DLIST_END;
1577                 dlist_size = (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END;
1578                 break;
1579
1580         case VC4_GEN_6:
1581                 dlist_start = HVS_BOOTLOADER_DLIST_END;
1582
1583                 /*
1584                  * If we are running a test, it means that we can't
1585                  * access a register. Use a plausible size then.
1586                  */
1587                 if (!kunit_get_current_test())
1588                         dlist_size = HVS_READ(SCALER6_CXM_SIZE);
1589                 else
1590                         dlist_size = 4096;
1591
1592                 break;
1593
1594         default:
1595                 drm_err(drm, "Unknown VC4 generation: %d", vc4->gen);
1596                 return ERR_PTR(-ENODEV);
1597         }
1598
1599         drm_mm_init(&hvs->dlist_mm, dlist_start, dlist_size);
1600
1601         hvs->dlist_mem_size = dlist_size;
1602
1603         /* Set up the HVS LBM memory manager.  We could have some more
1604          * complicated data structure that allowed reuse of LBM areas
1605          * between planes when they don't overlap on the screen, but
1606          * for now we just allocate globally.
1607          */
1608
1609         switch (vc4->gen) {
1610         case VC4_GEN_4:
1611                 /* 48k words of 2x12-bit pixels */
1612                 lbm_size = 48 * SZ_1K;
1613                 break;
1614
1615         case VC4_GEN_5:
1616                 /* 60k words of 4x12-bit pixels */
1617                 lbm_size = 60 * SZ_1K;
1618                 break;
1619
1620         case VC4_GEN_6:
1621                 /*
1622                  * If we are running a test, it means that we can't
1623                  * access a register. Use a plausible size then.
1624                  */
1625                 lbm_size = 1024;
1626                 break;
1627
1628         default:
1629                 drm_err(drm, "Unknown VC4 generation: %d", vc4->gen);
1630                 return ERR_PTR(-ENODEV);
1631         }
1632
1633         drm_mm_init(&hvs->lbm_mm, 0, lbm_size);
1634
1635         if (vc4->gen >= VC4_GEN_6) {
1636                 ida_init(&hvs->upm_handles);
1637
1638                 /*
1639                  * NOTE: On BCM2712, the size can also be read through
1640                  * the SCALER_UBM_SIZE register. We would need to do a
1641                  * register access though, which we can't do with kunit
1642                  * that also uses this function to create its mock
1643                  * device.
1644                  */
1645                 drm_mm_init(&hvs->upm_mm, 0, 1024 * HVS_UBM_WORD_SIZE);
1646         }
1647
1648
1649         vc4->hvs = hvs;
1650
1651         return hvs;
1652 }
1653
1654 static int vc4_hvs_hw_init(struct vc4_hvs *hvs)
1655 {
1656         struct vc4_dev *vc4 = hvs->vc4;
1657         u32 dispctrl, reg;
1658
1659         dispctrl = HVS_READ(SCALER_DISPCTRL);
1660         dispctrl |= SCALER_DISPCTRL_ENABLE;
1661         HVS_WRITE(SCALER_DISPCTRL, dispctrl);
1662
1663         reg = HVS_READ(SCALER_DISPECTRL);
1664         reg &= ~SCALER_DISPECTRL_DSP2_MUX_MASK;
1665         HVS_WRITE(SCALER_DISPECTRL,
1666                   reg | VC4_SET_FIELD(0, SCALER_DISPECTRL_DSP2_MUX));
1667
1668         reg = HVS_READ(SCALER_DISPCTRL);
1669         reg &= ~SCALER_DISPCTRL_DSP3_MUX_MASK;
1670         HVS_WRITE(SCALER_DISPCTRL,
1671                   reg | VC4_SET_FIELD(3, SCALER_DISPCTRL_DSP3_MUX));
1672
1673         reg = HVS_READ(SCALER_DISPEOLN);
1674         reg &= ~SCALER_DISPEOLN_DSP4_MUX_MASK;
1675         HVS_WRITE(SCALER_DISPEOLN,
1676                   reg | VC4_SET_FIELD(3, SCALER_DISPEOLN_DSP4_MUX));
1677
1678         reg = HVS_READ(SCALER_DISPDITHER);
1679         reg &= ~SCALER_DISPDITHER_DSP5_MUX_MASK;
1680         HVS_WRITE(SCALER_DISPDITHER,
1681                   reg | VC4_SET_FIELD(3, SCALER_DISPDITHER_DSP5_MUX));
1682
1683         dispctrl = HVS_READ(SCALER_DISPCTRL);
1684         dispctrl |= SCALER_DISPCTRL_DISPEIRQ(0) |
1685                     SCALER_DISPCTRL_DISPEIRQ(1) |
1686                     SCALER_DISPCTRL_DISPEIRQ(2);
1687
1688         if (vc4->gen == VC4_GEN_4)
1689                 dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ |
1690                               SCALER_DISPCTRL_SLVWREIRQ |
1691                               SCALER_DISPCTRL_SLVRDEIRQ |
1692                               SCALER_DISPCTRL_DSPEIEOF(0) |
1693                               SCALER_DISPCTRL_DSPEIEOF(1) |
1694                               SCALER_DISPCTRL_DSPEIEOF(2) |
1695                               SCALER_DISPCTRL_DSPEIEOLN(0) |
1696                               SCALER_DISPCTRL_DSPEIEOLN(1) |
1697                               SCALER_DISPCTRL_DSPEIEOLN(2) |
1698                               SCALER_DISPCTRL_DSPEISLUR(0) |
1699                               SCALER_DISPCTRL_DSPEISLUR(1) |
1700                               SCALER_DISPCTRL_DSPEISLUR(2) |
1701                               SCALER_DISPCTRL_SCLEIRQ);
1702         else
1703                 dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ |
1704                               SCALER5_DISPCTRL_SLVEIRQ |
1705                               SCALER5_DISPCTRL_DSPEIEOF(0) |
1706                               SCALER5_DISPCTRL_DSPEIEOF(1) |
1707                               SCALER5_DISPCTRL_DSPEIEOF(2) |
1708                               SCALER5_DISPCTRL_DSPEIEOLN(0) |
1709                               SCALER5_DISPCTRL_DSPEIEOLN(1) |
1710                               SCALER5_DISPCTRL_DSPEIEOLN(2) |
1711                               SCALER5_DISPCTRL_DSPEISLUR(0) |
1712                               SCALER5_DISPCTRL_DSPEISLUR(1) |
1713                               SCALER5_DISPCTRL_DSPEISLUR(2) |
1714                               SCALER_DISPCTRL_SCLEIRQ);
1715
1716
1717         /* Set AXI panic mode.
1718          * VC4 panics when < 2 lines in FIFO.
1719          * VC5 panics when less than 1 line in the FIFO.
1720          */
1721         dispctrl &= ~(SCALER_DISPCTRL_PANIC0_MASK |
1722                       SCALER_DISPCTRL_PANIC1_MASK |
1723                       SCALER_DISPCTRL_PANIC2_MASK);
1724         dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC0);
1725         dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC1);
1726         dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC2);
1727
1728         /* Set AXI panic mode.
1729          * VC4 panics when < 2 lines in FIFO.
1730          * VC5 panics when less than 1 line in the FIFO.
1731          */
1732         dispctrl &= ~(SCALER_DISPCTRL_PANIC0_MASK |
1733                       SCALER_DISPCTRL_PANIC1_MASK |
1734                       SCALER_DISPCTRL_PANIC2_MASK);
1735         dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC0);
1736         dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC1);
1737         dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC2);
1738
1739         HVS_WRITE(SCALER_DISPCTRL, dispctrl);
1740
1741         return 0;
1742 }
1743
1744 #define CFC1_N_NL_CSC_CTRL(x)           (0xa000 + ((x) * 0x3000))
1745 #define CFC1_N_MA_CSC_COEFF_C00(x)      (0xa008 + ((x) * 0x3000))
1746 #define CFC1_N_MA_CSC_COEFF_C01(x)      (0xa00c + ((x) * 0x3000))
1747 #define CFC1_N_MA_CSC_COEFF_C02(x)      (0xa010 + ((x) * 0x3000))
1748 #define CFC1_N_MA_CSC_COEFF_C03(x)      (0xa014 + ((x) * 0x3000))
1749 #define CFC1_N_MA_CSC_COEFF_C04(x)      (0xa018 + ((x) * 0x3000))
1750 #define CFC1_N_MA_CSC_COEFF_C10(x)      (0xa01c + ((x) * 0x3000))
1751 #define CFC1_N_MA_CSC_COEFF_C11(x)      (0xa020 + ((x) * 0x3000))
1752 #define CFC1_N_MA_CSC_COEFF_C12(x)      (0xa024 + ((x) * 0x3000))
1753 #define CFC1_N_MA_CSC_COEFF_C13(x)      (0xa028 + ((x) * 0x3000))
1754 #define CFC1_N_MA_CSC_COEFF_C14(x)      (0xa02c + ((x) * 0x3000))
1755 #define CFC1_N_MA_CSC_COEFF_C20(x)      (0xa030 + ((x) * 0x3000))
1756 #define CFC1_N_MA_CSC_COEFF_C21(x)      (0xa034 + ((x) * 0x3000))
1757 #define CFC1_N_MA_CSC_COEFF_C22(x)      (0xa038 + ((x) * 0x3000))
1758 #define CFC1_N_MA_CSC_COEFF_C23(x)      (0xa03c + ((x) * 0x3000))
1759 #define CFC1_N_MA_CSC_COEFF_C24(x)      (0xa040 + ((x) * 0x3000))
1760
1761 /* 4 S2.22 multiplication factors, and 1 S9.15 addititive element for each of 3
1762  * output components
1763  */
1764 struct vc6_csc_coeff_entry {
1765         u32 csc[3][5];
1766 };
1767
1768 static const struct vc6_csc_coeff_entry csc_coeffs[2][3] = {
1769         [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
1770                 [DRM_COLOR_YCBCR_BT601] = {
1771                         .csc = {
1772                                 { 0x004A8542, 0x0, 0x0066254A, 0x0, 0xFF908A0D },
1773                                 { 0x004A8542, 0xFFE6ED5D, 0xFFCBF856, 0x0, 0x0043C9A3 },
1774                                 { 0x004A8542, 0x00811A54, 0x0, 0x0, 0xFF759502 }
1775                         }
1776                 },
1777                 [DRM_COLOR_YCBCR_BT709] = {
1778                         .csc = {
1779                                 { 0x004A8542, 0x0, 0x0072BC44, 0x0, 0xFF83F312 },
1780                                 { 0x004A8542, 0xFFF25A22, 0xFFDDE4D0, 0x0, 0x00267064 },
1781                                 { 0x004A8542, 0x00873197, 0x0, 0x0, 0xFF6F7DC0 }
1782                         }
1783                 },
1784                 [DRM_COLOR_YCBCR_BT2020] = {
1785                         .csc = {
1786                                 { 0x004A8542, 0x0, 0x006B4A17, 0x0, 0xFF8B653F },
1787                                 { 0x004A8542, 0xFFF402D9, 0xFFDDE4D0, 0x0, 0x0024C7AE },
1788                                 { 0x004A8542, 0x008912CC, 0x0, 0x0, 0xFF6D9C8B }
1789                         }
1790                 }
1791         },
1792         [DRM_COLOR_YCBCR_FULL_RANGE] = {
1793                 [DRM_COLOR_YCBCR_BT601] = {
1794                         .csc = {
1795                                 { 0x00400000, 0x0, 0x0059BA5E, 0x0, 0xFFA645A1 },
1796                                 { 0x00400000, 0xFFE9F9AC, 0xFFD24B97, 0x0, 0x0043BABB },
1797                                 { 0x00400000, 0x00716872, 0x0, 0x0, 0xFF8E978D }
1798                         }
1799                 },
1800                 [DRM_COLOR_YCBCR_BT709] = {
1801                         .csc = {
1802                                 { 0x00400000, 0x0, 0x0064C985, 0x0, 0xFF9B367A },
1803                                 { 0x00400000, 0xFFF402E1, 0xFFE20A40, 0x0, 0x0029F2DE },
1804                                 { 0x00400000, 0x0076C226, 0x0, 0x0, 0xFF893DD9 }
1805                         }
1806                 },
1807                 [DRM_COLOR_YCBCR_BT2020] = {
1808                         .csc = {
1809                                 { 0x00400000, 0x0, 0x005E3F14, 0x0, 0xFFA1C0EB },
1810                                 { 0x00400000, 0xFFF577F6, 0xFFDB580F, 0x0, 0x002F2FFA },
1811                                 { 0x00400000, 0x007868DB, 0x0, 0x0, 0xFF879724 }
1812                         }
1813                 }
1814         }
1815 };
1816
1817 static int vc6_hvs_hw_init(struct vc4_hvs *hvs)
1818 {
1819         const struct vc6_csc_coeff_entry *coeffs;
1820         unsigned int i;
1821
1822         HVS_WRITE(SCALER6_CONTROL,
1823                   SCALER6_CONTROL_HVS_EN |
1824                   VC4_SET_FIELD(8, SCALER6_CONTROL_PF_LINES) |
1825                   VC4_SET_FIELD(15, SCALER6_CONTROL_MAX_REQS));
1826
1827         /* Set HVS arbiter priority to max */
1828         HVS_WRITE(SCALER6_PRI_MAP0, 0xffffffff);
1829         HVS_WRITE(SCALER6_PRI_MAP1, 0xffffffff);
1830
1831         for (i = 0; i < 6; i++) {
1832                 coeffs = &csc_coeffs[i / 3][i % 3];
1833
1834                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C00(i), coeffs->csc[0][0]);
1835                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C01(i), coeffs->csc[0][1]);
1836                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C02(i), coeffs->csc[0][2]);
1837                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C03(i), coeffs->csc[0][3]);
1838                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C04(i), coeffs->csc[0][4]);
1839
1840                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C10(i), coeffs->csc[1][0]);
1841                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C11(i), coeffs->csc[1][1]);
1842                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C12(i), coeffs->csc[1][2]);
1843                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C13(i), coeffs->csc[1][3]);
1844                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C14(i), coeffs->csc[1][4]);
1845
1846                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C20(i), coeffs->csc[2][0]);
1847                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C21(i), coeffs->csc[2][1]);
1848                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C22(i), coeffs->csc[2][2]);
1849                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C23(i), coeffs->csc[2][3]);
1850                 HVS_WRITE(CFC1_N_MA_CSC_COEFF_C24(i), coeffs->csc[2][4]);
1851
1852                 HVS_WRITE(CFC1_N_NL_CSC_CTRL(i), BIT(15));
1853         }
1854
1855         return 0;
1856 }
1857
1858 static int vc4_hvs_cob_init(struct vc4_hvs *hvs)
1859 {
1860         struct vc4_dev *vc4 = hvs->vc4;
1861         u32 reg, top, base;
1862
1863         /*
1864          * Recompute Composite Output Buffer (COB) allocations for the
1865          * displays
1866          */
1867         switch (vc4->gen) {
1868         case VC4_GEN_4:
1869                 /* The COB is 20736 pixels, or just over 10 lines at 2048 wide.
1870                  * The bottom 2048 pixels are full 32bpp RGBA (intended for the
1871                  * TXP composing RGBA to memory), whilst the remainder are only
1872                  * 24bpp RGB.
1873                  *
1874                  * Assign 3 lines to channels 1 & 2, and just over 4 lines to
1875                  * channel 0.
1876                  */
1877                 #define VC4_COB_SIZE            20736
1878                 #define VC4_COB_LINE_WIDTH      2048
1879                 #define VC4_COB_NUM_LINES       3
1880                 reg = 0;
1881                 top = VC4_COB_LINE_WIDTH * VC4_COB_NUM_LINES;
1882                 reg |= (top - 1) << 16;
1883                 HVS_WRITE(SCALER_DISPBASE2, reg);
1884                 reg = top;
1885                 top += VC4_COB_LINE_WIDTH * VC4_COB_NUM_LINES;
1886                 reg |= (top - 1) << 16;
1887                 HVS_WRITE(SCALER_DISPBASE1, reg);
1888                 reg = top;
1889                 top = VC4_COB_SIZE;
1890                 reg |= (top - 1) << 16;
1891                 HVS_WRITE(SCALER_DISPBASE0, reg);
1892                 break;
1893
1894         case VC4_GEN_5:
1895                 /* The COB is 44416 pixels, or 10.8 lines at 4096 wide.
1896                  * The bottom 4096 pixels are full RGBA (intended for the TXP
1897                  * composing RGBA to memory), whilst the remainder are only
1898                  * RGB. Addressing is always pixel wide.
1899                  *
1900                  * Assign 3 lines of 4096 to channels 1 & 2, and just over 4
1901                  * lines. to channel 0.
1902                  */
1903                 #define VC5_COB_SIZE            44416
1904                 #define VC5_COB_LINE_WIDTH      4096
1905                 #define VC5_COB_NUM_LINES       3
1906                 reg = 0;
1907                 top = VC5_COB_LINE_WIDTH * VC5_COB_NUM_LINES;
1908                 reg |= top << 16;
1909                 HVS_WRITE(SCALER_DISPBASE2, reg);
1910                 top += 16;
1911                 reg = top;
1912                 top += VC5_COB_LINE_WIDTH * VC5_COB_NUM_LINES;
1913                 reg |= top << 16;
1914                 HVS_WRITE(SCALER_DISPBASE1, reg);
1915                 top += 16;
1916                 reg = top;
1917                 top = VC5_COB_SIZE;
1918                 reg |= top << 16;
1919                 HVS_WRITE(SCALER_DISPBASE0, reg);
1920                 break;
1921
1922         case VC4_GEN_6:
1923                 #define VC6_COB_LINE_WIDTH      3840
1924                 #define VC6_COB_NUM_LINES       4
1925                 reg = 0;
1926                 top = 3840;
1927
1928                 HVS_WRITE(SCALER6_DISP2_COB,
1929                           VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) |
1930                           VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE));
1931
1932                 base = top + 16;
1933                 top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES;
1934
1935                 HVS_WRITE(SCALER6_DISP1_COB,
1936                           VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) |
1937                           VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE));
1938
1939                 base = top + 16;
1940                 top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES;
1941
1942                 HVS_WRITE(SCALER6_DISP0_COB,
1943                           VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) |
1944                           VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE));
1945                 break;
1946
1947         default:
1948                 return -EINVAL;
1949         }
1950
1951         return 0;
1952 }
1953
1954 static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
1955 {
1956         struct platform_device *pdev = to_platform_device(dev);
1957         struct drm_device *drm = dev_get_drvdata(master);
1958         struct vc4_dev *vc4 = to_vc4_dev(drm);
1959         struct vc4_hvs *hvs = NULL;
1960         void __iomem *regs;
1961         int ret;
1962
1963         regs = vc4_ioremap_regs(pdev, 0);
1964         if (IS_ERR(regs))
1965                 return PTR_ERR(regs);
1966
1967         hvs = __vc4_hvs_alloc(vc4, regs, pdev);
1968         if (IS_ERR(hvs))
1969                 return PTR_ERR(hvs);
1970
1971         hvs->regset.base = hvs->regs;
1972
1973         if (vc4->gen >= VC4_GEN_6) {
1974                 hvs->regset.regs = vc6_hvs_regs;
1975                 hvs->regset.nregs = ARRAY_SIZE(vc6_hvs_regs);
1976         } else {
1977                 hvs->regset.regs = vc4_hvs_regs;
1978                 hvs->regset.nregs = ARRAY_SIZE(vc4_hvs_regs);
1979         }
1980
1981         if (vc4->gen >= VC4_GEN_5) {
1982                 struct rpi_firmware *firmware;
1983                 struct device_node *node;
1984                 unsigned int max_rate;
1985
1986                 node = rpi_firmware_find_node();
1987                 if (!node)
1988                         return -EINVAL;
1989
1990                 firmware = rpi_firmware_get(node);
1991                 of_node_put(node);
1992                 if (!firmware)
1993                         return -EPROBE_DEFER;
1994
1995                 hvs->core_clk = devm_clk_get(&pdev->dev,
1996                                              (vc4->gen >= VC4_GEN_6) ? "core" : NULL);
1997                 if (IS_ERR(hvs->core_clk)) {
1998                         dev_err(&pdev->dev, "Couldn't get core clock\n");
1999                         return PTR_ERR(hvs->core_clk);
2000                 }
2001
2002                 hvs->disp_clk = devm_clk_get(&pdev->dev,
2003                                              (vc4->gen >= VC4_GEN_6) ? "disp" : NULL);
2004                 if (IS_ERR(hvs->disp_clk)) {
2005                         dev_err(&pdev->dev, "Couldn't get disp clock\n");
2006                         return PTR_ERR(hvs->disp_clk);
2007                 }
2008
2009                 max_rate = rpi_firmware_clk_get_max_rate(firmware,
2010                                                          RPI_FIRMWARE_CORE_CLK_ID);
2011                 rpi_firmware_put(firmware);
2012                 if (max_rate >= 550000000)
2013                         hvs->vc5_hdmi_enable_hdmi_20 = true;
2014
2015                 if (max_rate >= 600000000)
2016                         hvs->vc5_hdmi_enable_4096by2160 = true;
2017
2018                 hvs->max_core_rate = max_rate;
2019
2020                 ret = clk_prepare_enable(hvs->core_clk);
2021                 if (ret) {
2022                         dev_err(&pdev->dev, "Couldn't enable the core clock\n");
2023                         return ret;
2024                 }
2025
2026                 ret = clk_prepare_enable(hvs->disp_clk);
2027                 if (ret) {
2028                         dev_err(&pdev->dev, "Couldn't enable the disp clock\n");
2029                         return ret;
2030                 }
2031         }
2032
2033         if (vc4->gen >= VC4_GEN_6) {
2034                 unsigned int i;
2035
2036                 for (i = 0; i < HVS_NUM_CHANNELS; i++) {
2037                         char irq_name[16];
2038                         int irq;
2039
2040                         snprintf(irq_name, sizeof(irq_name), "ch%u-eof", i);
2041
2042                         irq = platform_get_irq_byname(pdev, irq_name);
2043                         if (irq < 0) {
2044                                 dev_err(&pdev->dev,
2045                                         "Couldn't get %s interrupt: %d\n",
2046                                         irq_name, irq);
2047                                 return irq;
2048                         }
2049
2050                         ret = devm_request_irq(&pdev->dev,
2051                                                irq,
2052                                                vc6_hvs_eof_irq_handler,
2053                                                IRQF_NO_AUTOEN,
2054                                                dev_name(&pdev->dev),
2055                                                drm);
2056
2057                         hvs->eof_irq[i].desc = irq;
2058                 }
2059         }
2060
2061         if (vc4->gen >= VC4_GEN_5)
2062                 hvs->dlist = hvs->regs + SCALER5_DLIST_START;
2063         else
2064                 hvs->dlist = hvs->regs + SCALER_DLIST_START;
2065
2066         if (vc4->gen >= VC4_GEN_6)
2067                 ret = vc6_hvs_hw_init(hvs);
2068         else
2069                 ret = vc4_hvs_hw_init(hvs);
2070         if (ret)
2071                 return ret;
2072
2073         /* Upload filter kernels.  We only have the one for now, so we
2074          * keep it around for the lifetime of the driver.
2075          */
2076         ret = vc4_hvs_upload_linear_kernel(hvs,
2077                                            &hvs->mitchell_netravali_filter,
2078                                            mitchell_netravali_1_3_1_3_kernel);
2079         if (ret)
2080                 return ret;
2081
2082         ret = vc4_hvs_cob_init(hvs);
2083         if (ret)
2084                 return ret;
2085
2086         if (vc4->gen < VC4_GEN_6) {
2087                 ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
2088                                        vc4_hvs_irq_handler, 0, "vc4 hvs", drm);
2089                 if (ret)
2090                         return ret;
2091         }
2092
2093         return 0;
2094 }
2095
2096 static void vc4_hvs_unbind(struct device *dev, struct device *master,
2097                            void *data)
2098 {
2099         struct drm_device *drm = dev_get_drvdata(master);
2100         struct vc4_dev *vc4 = to_vc4_dev(drm);
2101         struct vc4_hvs *hvs = vc4->hvs;
2102         struct drm_mm_node *node, *next;
2103
2104         if (drm_mm_node_allocated(&vc4->hvs->mitchell_netravali_filter))
2105                 drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
2106
2107         drm_mm_for_each_node_safe(node, next, &vc4->hvs->dlist_mm)
2108                 drm_mm_remove_node(node);
2109
2110         drm_mm_takedown(&vc4->hvs->dlist_mm);
2111
2112         drm_mm_for_each_node_safe(node, next, &vc4->hvs->lbm_mm)
2113                 drm_mm_remove_node(node);
2114         drm_mm_takedown(&vc4->hvs->lbm_mm);
2115
2116         clk_disable_unprepare(hvs->disp_clk);
2117         clk_disable_unprepare(hvs->core_clk);
2118
2119         vc4->hvs = NULL;
2120 }
2121
2122 static const struct component_ops vc4_hvs_ops = {
2123         .bind   = vc4_hvs_bind,
2124         .unbind = vc4_hvs_unbind,
2125 };
2126
2127 static int vc4_hvs_dev_probe(struct platform_device *pdev)
2128 {
2129         return component_add(&pdev->dev, &vc4_hvs_ops);
2130 }
2131
2132 static void vc4_hvs_dev_remove(struct platform_device *pdev)
2133 {
2134         component_del(&pdev->dev, &vc4_hvs_ops);
2135 }
2136
2137 static const struct of_device_id vc4_hvs_dt_match[] = {
2138         { .compatible = "brcm,bcm2711-hvs" },
2139         { .compatible = "brcm,bcm2712-hvs" },
2140         { .compatible = "brcm,bcm2835-hvs" },
2141         {}
2142 };
2143
2144 struct platform_driver vc4_hvs_driver = {
2145         .probe = vc4_hvs_dev_probe,
2146         .remove_new = vc4_hvs_dev_remove,
2147         .driver = {
2148                 .name = "vc4_hvs",
2149                 .of_match_table = vc4_hvs_dt_match,
2150         },
2151 };