Merge tag 'drm-misc-fixes-2017-11-20' of git://anongit.freedesktop.org/drm/drm-misc...
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / vc4 / vc4_plane.c
1 /*
2  * Copyright (C) 2015 Broadcom
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 /**
10  * DOC: VC4 plane module
11  *
12  * Each DRM plane is a layer of pixels being scanned out by the HVS.
13  *
14  * At atomic modeset check time, we compute the HVS display element
15  * state that would be necessary for displaying the plane (giving us a
16  * chance to figure out if a plane configuration is invalid), then at
17  * atomic flush time the CRTC will ask us to write our element state
18  * into the region of the HVS that it has allocated for us.
19  */
20
21 #include <drm/drm_atomic.h>
22 #include <drm/drm_atomic_helper.h>
23 #include <drm/drm_fb_cma_helper.h>
24 #include <drm/drm_plane_helper.h>
25
26 #include "uapi/drm/vc4_drm.h"
27 #include "vc4_drv.h"
28 #include "vc4_regs.h"
29
30 enum vc4_scaling_mode {
31         VC4_SCALING_NONE,
32         VC4_SCALING_TPZ,
33         VC4_SCALING_PPF,
34 };
35
36 struct vc4_plane_state {
37         struct drm_plane_state base;
38         /* System memory copy of the display list for this element, computed
39          * at atomic_check time.
40          */
41         u32 *dlist;
42         u32 dlist_size; /* Number of dwords allocated for the display list */
43         u32 dlist_count; /* Number of used dwords in the display list. */
44
45         /* Offset in the dlist to various words, for pageflip or
46          * cursor updates.
47          */
48         u32 pos0_offset;
49         u32 pos2_offset;
50         u32 ptr0_offset;
51
52         /* Offset where the plane's dlist was last stored in the
53          * hardware at vc4_crtc_atomic_flush() time.
54          */
55         u32 __iomem *hw_dlist;
56
57         /* Clipped coordinates of the plane on the display. */
58         int crtc_x, crtc_y, crtc_w, crtc_h;
59         /* Clipped area being scanned from in the FB. */
60         u32 src_x, src_y;
61
62         u32 src_w[2], src_h[2];
63
64         /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */
65         enum vc4_scaling_mode x_scaling[2], y_scaling[2];
66         bool is_unity;
67         bool is_yuv;
68
69         /* Offset to start scanning out from the start of the plane's
70          * BO.
71          */
72         u32 offsets[3];
73
74         /* Our allocation in LBM for temporary storage during scaling. */
75         struct drm_mm_node lbm;
76 };
77
78 static inline struct vc4_plane_state *
79 to_vc4_plane_state(struct drm_plane_state *state)
80 {
81         return (struct vc4_plane_state *)state;
82 }
83
84 static const struct hvs_format {
85         u32 drm; /* DRM_FORMAT_* */
86         u32 hvs; /* HVS_FORMAT_* */
87         u32 pixel_order;
88         bool has_alpha;
89         bool flip_cbcr;
90 } hvs_formats[] = {
91         {
92                 .drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
93                 .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false,
94         },
95         {
96                 .drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
97                 .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
98         },
99         {
100                 .drm = DRM_FORMAT_ABGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
101                 .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = true,
102         },
103         {
104                 .drm = DRM_FORMAT_XBGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
105                 .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = false,
106         },
107         {
108                 .drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565,
109                 .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false,
110         },
111         {
112                 .drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565,
113                 .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false,
114         },
115         {
116                 .drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
117                 .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
118         },
119         {
120                 .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
121                 .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false,
122         },
123         {
124                 .drm = DRM_FORMAT_YUV422,
125                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
126         },
127         {
128                 .drm = DRM_FORMAT_YVU422,
129                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
130                 .flip_cbcr = true,
131         },
132         {
133                 .drm = DRM_FORMAT_YUV420,
134                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
135         },
136         {
137                 .drm = DRM_FORMAT_YVU420,
138                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
139                 .flip_cbcr = true,
140         },
141         {
142                 .drm = DRM_FORMAT_NV12,
143                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
144         },
145         {
146                 .drm = DRM_FORMAT_NV16,
147                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
148         },
149 };
150
151 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
152 {
153         unsigned i;
154
155         for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
156                 if (hvs_formats[i].drm == drm_format)
157                         return &hvs_formats[i];
158         }
159
160         return NULL;
161 }
162
163 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
164 {
165         if (dst > src)
166                 return VC4_SCALING_PPF;
167         else if (dst < src)
168                 return VC4_SCALING_TPZ;
169         else
170                 return VC4_SCALING_NONE;
171 }
172
173 static bool plane_enabled(struct drm_plane_state *state)
174 {
175         return state->fb && state->crtc;
176 }
177
178 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
179 {
180         struct vc4_plane_state *vc4_state;
181
182         if (WARN_ON(!plane->state))
183                 return NULL;
184
185         vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
186         if (!vc4_state)
187                 return NULL;
188
189         memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
190
191         __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
192
193         if (vc4_state->dlist) {
194                 vc4_state->dlist = kmemdup(vc4_state->dlist,
195                                            vc4_state->dlist_count * 4,
196                                            GFP_KERNEL);
197                 if (!vc4_state->dlist) {
198                         kfree(vc4_state);
199                         return NULL;
200                 }
201                 vc4_state->dlist_size = vc4_state->dlist_count;
202         }
203
204         return &vc4_state->base;
205 }
206
207 static void vc4_plane_destroy_state(struct drm_plane *plane,
208                                     struct drm_plane_state *state)
209 {
210         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
211         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
212
213         if (vc4_state->lbm.allocated) {
214                 unsigned long irqflags;
215
216                 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
217                 drm_mm_remove_node(&vc4_state->lbm);
218                 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
219         }
220
221         kfree(vc4_state->dlist);
222         __drm_atomic_helper_plane_destroy_state(&vc4_state->base);
223         kfree(state);
224 }
225
226 /* Called during init to allocate the plane's atomic state. */
227 static void vc4_plane_reset(struct drm_plane *plane)
228 {
229         struct vc4_plane_state *vc4_state;
230
231         WARN_ON(plane->state);
232
233         vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
234         if (!vc4_state)
235                 return;
236
237         plane->state = &vc4_state->base;
238         vc4_state->base.plane = plane;
239 }
240
241 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
242 {
243         if (vc4_state->dlist_count == vc4_state->dlist_size) {
244                 u32 new_size = max(4u, vc4_state->dlist_count * 2);
245                 u32 *new_dlist = kmalloc(new_size * 4, GFP_KERNEL);
246
247                 if (!new_dlist)
248                         return;
249                 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
250
251                 kfree(vc4_state->dlist);
252                 vc4_state->dlist = new_dlist;
253                 vc4_state->dlist_size = new_size;
254         }
255
256         vc4_state->dlist[vc4_state->dlist_count++] = val;
257 }
258
259 /* Returns the scl0/scl1 field based on whether the dimensions need to
260  * be up/down/non-scaled.
261  *
262  * This is a replication of a table from the spec.
263  */
264 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
265 {
266         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
267
268         switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
269         case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
270                 return SCALER_CTL0_SCL_H_PPF_V_PPF;
271         case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
272                 return SCALER_CTL0_SCL_H_TPZ_V_PPF;
273         case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
274                 return SCALER_CTL0_SCL_H_PPF_V_TPZ;
275         case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
276                 return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
277         case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
278                 return SCALER_CTL0_SCL_H_PPF_V_NONE;
279         case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
280                 return SCALER_CTL0_SCL_H_NONE_V_PPF;
281         case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
282                 return SCALER_CTL0_SCL_H_NONE_V_TPZ;
283         case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
284                 return SCALER_CTL0_SCL_H_TPZ_V_NONE;
285         default:
286         case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
287                 /* The unity case is independently handled by
288                  * SCALER_CTL0_UNITY.
289                  */
290                 return 0;
291         }
292 }
293
294 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
295 {
296         struct drm_plane *plane = state->plane;
297         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
298         struct drm_framebuffer *fb = state->fb;
299         struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
300         u32 subpixel_src_mask = (1 << 16) - 1;
301         u32 format = fb->format->format;
302         int num_planes = fb->format->num_planes;
303         u32 h_subsample = 1;
304         u32 v_subsample = 1;
305         int i;
306
307         for (i = 0; i < num_planes; i++)
308                 vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
309
310         /* We don't support subpixel source positioning for scaling. */
311         if ((state->src_x & subpixel_src_mask) ||
312             (state->src_y & subpixel_src_mask) ||
313             (state->src_w & subpixel_src_mask) ||
314             (state->src_h & subpixel_src_mask)) {
315                 return -EINVAL;
316         }
317
318         vc4_state->src_x = state->src_x >> 16;
319         vc4_state->src_y = state->src_y >> 16;
320         vc4_state->src_w[0] = state->src_w >> 16;
321         vc4_state->src_h[0] = state->src_h >> 16;
322
323         vc4_state->crtc_x = state->crtc_x;
324         vc4_state->crtc_y = state->crtc_y;
325         vc4_state->crtc_w = state->crtc_w;
326         vc4_state->crtc_h = state->crtc_h;
327
328         vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
329                                                        vc4_state->crtc_w);
330         vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
331                                                        vc4_state->crtc_h);
332
333         if (num_planes > 1) {
334                 vc4_state->is_yuv = true;
335
336                 h_subsample = drm_format_horz_chroma_subsampling(format);
337                 v_subsample = drm_format_vert_chroma_subsampling(format);
338                 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
339                 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
340
341                 vc4_state->x_scaling[1] =
342                         vc4_get_scaling_mode(vc4_state->src_w[1],
343                                              vc4_state->crtc_w);
344                 vc4_state->y_scaling[1] =
345                         vc4_get_scaling_mode(vc4_state->src_h[1],
346                                              vc4_state->crtc_h);
347
348                 /* YUV conversion requires that scaling be enabled,
349                  * even on a plane that's otherwise 1:1.  Choose TPZ
350                  * for simplicity.
351                  */
352                 if (vc4_state->x_scaling[0] == VC4_SCALING_NONE)
353                         vc4_state->x_scaling[0] = VC4_SCALING_TPZ;
354                 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE)
355                         vc4_state->y_scaling[0] = VC4_SCALING_TPZ;
356         }
357
358         vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
359                                vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
360                                vc4_state->x_scaling[1] == VC4_SCALING_NONE &&
361                                vc4_state->y_scaling[1] == VC4_SCALING_NONE);
362
363         /* No configuring scaling on the cursor plane, since it gets
364            non-vblank-synced updates, and scaling requires requires
365            LBM changes which have to be vblank-synced.
366          */
367         if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity)
368                 return -EINVAL;
369
370         /* Clamp the on-screen start x/y to 0.  The hardware doesn't
371          * support negative y, and negative x wastes bandwidth.
372          */
373         if (vc4_state->crtc_x < 0) {
374                 for (i = 0; i < num_planes; i++) {
375                         u32 cpp = fb->format->cpp[i];
376                         u32 subs = ((i == 0) ? 1 : h_subsample);
377
378                         vc4_state->offsets[i] += (cpp *
379                                                   (-vc4_state->crtc_x) / subs);
380                 }
381                 vc4_state->src_w[0] += vc4_state->crtc_x;
382                 vc4_state->src_w[1] += vc4_state->crtc_x / h_subsample;
383                 vc4_state->crtc_x = 0;
384         }
385
386         if (vc4_state->crtc_y < 0) {
387                 for (i = 0; i < num_planes; i++) {
388                         u32 subs = ((i == 0) ? 1 : v_subsample);
389
390                         vc4_state->offsets[i] += (fb->pitches[i] *
391                                                   (-vc4_state->crtc_y) / subs);
392                 }
393                 vc4_state->src_h[0] += vc4_state->crtc_y;
394                 vc4_state->src_h[1] += vc4_state->crtc_y / v_subsample;
395                 vc4_state->crtc_y = 0;
396         }
397
398         return 0;
399 }
400
401 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
402 {
403         u32 scale, recip;
404
405         scale = (1 << 16) * src / dst;
406
407         /* The specs note that while the reciprocal would be defined
408          * as (1<<32)/scale, ~0 is close enough.
409          */
410         recip = ~0 / scale;
411
412         vc4_dlist_write(vc4_state,
413                         VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
414                         VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
415         vc4_dlist_write(vc4_state,
416                         VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
417 }
418
419 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
420 {
421         u32 scale = (1 << 16) * src / dst;
422
423         vc4_dlist_write(vc4_state,
424                         SCALER_PPF_AGC |
425                         VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
426                         VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
427 }
428
429 static u32 vc4_lbm_size(struct drm_plane_state *state)
430 {
431         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
432         /* This is the worst case number.  One of the two sizes will
433          * be used depending on the scaling configuration.
434          */
435         u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w);
436         u32 lbm;
437
438         if (!vc4_state->is_yuv) {
439                 if (vc4_state->is_unity)
440                         return 0;
441                 else if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
442                         lbm = pix_per_line * 8;
443                 else {
444                         /* In special cases, this multiplier might be 12. */
445                         lbm = pix_per_line * 16;
446                 }
447         } else {
448                 /* There are cases for this going down to a multiplier
449                  * of 2, but according to the firmware source, the
450                  * table in the docs is somewhat wrong.
451                  */
452                 lbm = pix_per_line * 16;
453         }
454
455         lbm = roundup(lbm, 32);
456
457         return lbm;
458 }
459
460 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
461                                          int channel)
462 {
463         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
464
465         /* Ch0 H-PPF Word 0: Scaling Parameters */
466         if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
467                 vc4_write_ppf(vc4_state,
468                               vc4_state->src_w[channel], vc4_state->crtc_w);
469         }
470
471         /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
472         if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
473                 vc4_write_ppf(vc4_state,
474                               vc4_state->src_h[channel], vc4_state->crtc_h);
475                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
476         }
477
478         /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
479         if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
480                 vc4_write_tpz(vc4_state,
481                               vc4_state->src_w[channel], vc4_state->crtc_w);
482         }
483
484         /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
485         if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
486                 vc4_write_tpz(vc4_state,
487                               vc4_state->src_h[channel], vc4_state->crtc_h);
488                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
489         }
490 }
491
492 /* Writes out a full display list for an active plane to the plane's
493  * private dlist state.
494  */
495 static int vc4_plane_mode_set(struct drm_plane *plane,
496                               struct drm_plane_state *state)
497 {
498         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
499         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
500         struct drm_framebuffer *fb = state->fb;
501         u32 ctl0_offset = vc4_state->dlist_count;
502         const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
503         int num_planes = drm_format_num_planes(format->drm);
504         u32 scl0, scl1, pitch0;
505         u32 lbm_size, tiling;
506         unsigned long irqflags;
507         int ret, i;
508
509         ret = vc4_plane_setup_clipping_and_scaling(state);
510         if (ret)
511                 return ret;
512
513         /* Allocate the LBM memory that the HVS will use for temporary
514          * storage due to our scaling/format conversion.
515          */
516         lbm_size = vc4_lbm_size(state);
517         if (lbm_size) {
518                 if (!vc4_state->lbm.allocated) {
519                         spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
520                         ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
521                                                          &vc4_state->lbm,
522                                                          lbm_size, 32, 0, 0);
523                         spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
524                 } else {
525                         WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
526                 }
527         }
528
529         if (ret)
530                 return ret;
531
532         /* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
533          * and 4:4:4, scl1 should be set to scl0 so both channels of
534          * the scaler do the same thing.  For YUV, the Y plane needs
535          * to be put in channel 1 and Cb/Cr in channel 0, so we swap
536          * the scl fields here.
537          */
538         if (num_planes == 1) {
539                 scl0 = vc4_get_scl_field(state, 1);
540                 scl1 = scl0;
541         } else {
542                 scl0 = vc4_get_scl_field(state, 1);
543                 scl1 = vc4_get_scl_field(state, 0);
544         }
545
546         switch (fb->modifier) {
547         case DRM_FORMAT_MOD_LINEAR:
548                 tiling = SCALER_CTL0_TILING_LINEAR;
549                 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
550                 break;
551
552         case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
553                 /* For T-tiled, the FB pitch is "how many bytes from
554                  * one row to the next, such that pitch * tile_h ==
555                  * tile_size * tiles_per_row."
556                  */
557                 u32 tile_size_shift = 12; /* T tiles are 4kb */
558                 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
559                 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
560
561                 tiling = SCALER_CTL0_TILING_256B_OR_T;
562
563                 pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET) |
564                           VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L) |
565                           VC4_SET_FIELD(tiles_w, SCALER_PITCH0_TILE_WIDTH_R));
566                 break;
567         }
568
569         default:
570                 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
571                               (long long)fb->modifier);
572                 return -EINVAL;
573         }
574
575         /* Control word */
576         vc4_dlist_write(vc4_state,
577                         SCALER_CTL0_VALID |
578                         (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
579                         (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
580                         VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
581                         (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
582                         VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
583                         VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
584
585         /* Position Word 0: Image Positions and Alpha Value */
586         vc4_state->pos0_offset = vc4_state->dlist_count;
587         vc4_dlist_write(vc4_state,
588                         VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) |
589                         VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
590                         VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
591
592         /* Position Word 1: Scaled Image Dimensions. */
593         if (!vc4_state->is_unity) {
594                 vc4_dlist_write(vc4_state,
595                                 VC4_SET_FIELD(vc4_state->crtc_w,
596                                               SCALER_POS1_SCL_WIDTH) |
597                                 VC4_SET_FIELD(vc4_state->crtc_h,
598                                               SCALER_POS1_SCL_HEIGHT));
599         }
600
601         /* Position Word 2: Source Image Size, Alpha Mode */
602         vc4_state->pos2_offset = vc4_state->dlist_count;
603         vc4_dlist_write(vc4_state,
604                         VC4_SET_FIELD(format->has_alpha ?
605                                       SCALER_POS2_ALPHA_MODE_PIPELINE :
606                                       SCALER_POS2_ALPHA_MODE_FIXED,
607                                       SCALER_POS2_ALPHA_MODE) |
608                         VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) |
609                         VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT));
610
611         /* Position Word 3: Context.  Written by the HVS. */
612         vc4_dlist_write(vc4_state, 0xc0c0c0c0);
613
614
615         /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
616          *
617          * The pointers may be any byte address.
618          */
619         vc4_state->ptr0_offset = vc4_state->dlist_count;
620         if (!format->flip_cbcr) {
621                 for (i = 0; i < num_planes; i++)
622                         vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
623         } else {
624                 WARN_ON_ONCE(num_planes != 3);
625                 vc4_dlist_write(vc4_state, vc4_state->offsets[0]);
626                 vc4_dlist_write(vc4_state, vc4_state->offsets[2]);
627                 vc4_dlist_write(vc4_state, vc4_state->offsets[1]);
628         }
629
630         /* Pointer Context Word 0/1/2: Written by the HVS */
631         for (i = 0; i < num_planes; i++)
632                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
633
634         /* Pitch word 0 */
635         vc4_dlist_write(vc4_state, pitch0);
636
637         /* Pitch word 1/2 */
638         for (i = 1; i < num_planes; i++) {
639                 vc4_dlist_write(vc4_state,
640                                 VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH));
641         }
642
643         /* Colorspace conversion words */
644         if (vc4_state->is_yuv) {
645                 vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
646                 vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
647                 vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
648         }
649
650         if (!vc4_state->is_unity) {
651                 /* LBM Base Address. */
652                 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
653                     vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
654                         vc4_dlist_write(vc4_state, vc4_state->lbm.start);
655                 }
656
657                 if (num_planes > 1) {
658                         /* Emit Cb/Cr as channel 0 and Y as channel
659                          * 1. This matches how we set up scl0/scl1
660                          * above.
661                          */
662                         vc4_write_scaling_parameters(state, 1);
663                 }
664                 vc4_write_scaling_parameters(state, 0);
665
666                 /* If any PPF setup was done, then all the kernel
667                  * pointers get uploaded.
668                  */
669                 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
670                     vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
671                     vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
672                     vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
673                         u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
674                                                    SCALER_PPF_KERNEL_OFFSET);
675
676                         /* HPPF plane 0 */
677                         vc4_dlist_write(vc4_state, kernel);
678                         /* VPPF plane 0 */
679                         vc4_dlist_write(vc4_state, kernel);
680                         /* HPPF plane 1 */
681                         vc4_dlist_write(vc4_state, kernel);
682                         /* VPPF plane 1 */
683                         vc4_dlist_write(vc4_state, kernel);
684                 }
685         }
686
687         vc4_state->dlist[ctl0_offset] |=
688                 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
689
690         return 0;
691 }
692
693 /* If a modeset involves changing the setup of a plane, the atomic
694  * infrastructure will call this to validate a proposed plane setup.
695  * However, if a plane isn't getting updated, this (and the
696  * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
697  * compute the dlist here and have all active plane dlists get updated
698  * in the CRTC's flush.
699  */
700 static int vc4_plane_atomic_check(struct drm_plane *plane,
701                                   struct drm_plane_state *state)
702 {
703         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
704
705         vc4_state->dlist_count = 0;
706
707         if (plane_enabled(state))
708                 return vc4_plane_mode_set(plane, state);
709         else
710                 return 0;
711 }
712
713 static void vc4_plane_atomic_update(struct drm_plane *plane,
714                                     struct drm_plane_state *old_state)
715 {
716         /* No contents here.  Since we don't know where in the CRTC's
717          * dlist we should be stored, our dlist is uploaded to the
718          * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
719          * time.
720          */
721 }
722
723 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
724 {
725         struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
726         int i;
727
728         vc4_state->hw_dlist = dlist;
729
730         /* Can't memcpy_toio() because it needs to be 32-bit writes. */
731         for (i = 0; i < vc4_state->dlist_count; i++)
732                 writel(vc4_state->dlist[i], &dlist[i]);
733
734         return vc4_state->dlist_count;
735 }
736
737 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
738 {
739         const struct vc4_plane_state *vc4_state =
740                 container_of(state, typeof(*vc4_state), base);
741
742         return vc4_state->dlist_count;
743 }
744
745 /* Updates the plane to immediately (well, once the FIFO needs
746  * refilling) scan out from at a new framebuffer.
747  */
748 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
749 {
750         struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
751         struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
752         uint32_t addr;
753
754         /* We're skipping the address adjustment for negative origin,
755          * because this is only called on the primary plane.
756          */
757         WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
758         addr = bo->paddr + fb->offsets[0];
759
760         /* Write the new address into the hardware immediately.  The
761          * scanout will start from this address as soon as the FIFO
762          * needs to refill with pixels.
763          */
764         writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
765
766         /* Also update the CPU-side dlist copy, so that any later
767          * atomic updates that don't do a new modeset on our plane
768          * also use our updated address.
769          */
770         vc4_state->dlist[vc4_state->ptr0_offset] = addr;
771 }
772
773 static int vc4_prepare_fb(struct drm_plane *plane,
774                           struct drm_plane_state *state)
775 {
776         struct vc4_bo *bo;
777         struct dma_fence *fence;
778         int ret;
779
780         if ((plane->state->fb == state->fb) || !state->fb)
781                 return 0;
782
783         bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
784
785         ret = vc4_bo_inc_usecnt(bo);
786         if (ret)
787                 return ret;
788
789         fence = reservation_object_get_excl_rcu(bo->resv);
790         drm_atomic_set_fence_for_plane(state, fence);
791
792         return 0;
793 }
794
795 static void vc4_cleanup_fb(struct drm_plane *plane,
796                            struct drm_plane_state *state)
797 {
798         struct vc4_bo *bo;
799
800         if (plane->state->fb == state->fb || !state->fb)
801                 return;
802
803         bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
804         vc4_bo_dec_usecnt(bo);
805 }
806
807 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
808         .atomic_check = vc4_plane_atomic_check,
809         .atomic_update = vc4_plane_atomic_update,
810         .prepare_fb = vc4_prepare_fb,
811         .cleanup_fb = vc4_cleanup_fb,
812 };
813
814 static void vc4_plane_destroy(struct drm_plane *plane)
815 {
816         drm_plane_helper_disable(plane);
817         drm_plane_cleanup(plane);
818 }
819
820 /* Implements immediate (non-vblank-synced) updates of the cursor
821  * position, or falls back to the atomic helper otherwise.
822  */
823 static int
824 vc4_update_plane(struct drm_plane *plane,
825                  struct drm_crtc *crtc,
826                  struct drm_framebuffer *fb,
827                  int crtc_x, int crtc_y,
828                  unsigned int crtc_w, unsigned int crtc_h,
829                  uint32_t src_x, uint32_t src_y,
830                  uint32_t src_w, uint32_t src_h,
831                  struct drm_modeset_acquire_ctx *ctx)
832 {
833         struct drm_plane_state *plane_state;
834         struct vc4_plane_state *vc4_state;
835
836         if (plane != crtc->cursor)
837                 goto out;
838
839         plane_state = plane->state;
840         vc4_state = to_vc4_plane_state(plane_state);
841
842         if (!plane_state)
843                 goto out;
844
845         /* No configuring new scaling in the fast path. */
846         if (crtc_w != plane_state->crtc_w ||
847             crtc_h != plane_state->crtc_h ||
848             src_w != plane_state->src_w ||
849             src_h != plane_state->src_h) {
850                 goto out;
851         }
852
853         if (fb != plane_state->fb) {
854                 drm_atomic_set_fb_for_plane(plane->state, fb);
855                 vc4_plane_async_set_fb(plane, fb);
856         }
857
858         /* Set the cursor's position on the screen.  This is the
859          * expected change from the drm_mode_cursor_universal()
860          * helper.
861          */
862         plane_state->crtc_x = crtc_x;
863         plane_state->crtc_y = crtc_y;
864
865         /* Allow changing the start position within the cursor BO, if
866          * that matters.
867          */
868         plane_state->src_x = src_x;
869         plane_state->src_y = src_y;
870
871         /* Update the display list based on the new crtc_x/y. */
872         vc4_plane_atomic_check(plane, plane_state);
873
874         /* Note that we can't just call vc4_plane_write_dlist()
875          * because that would smash the context data that the HVS is
876          * currently using.
877          */
878         writel(vc4_state->dlist[vc4_state->pos0_offset],
879                &vc4_state->hw_dlist[vc4_state->pos0_offset]);
880         writel(vc4_state->dlist[vc4_state->pos2_offset],
881                &vc4_state->hw_dlist[vc4_state->pos2_offset]);
882         writel(vc4_state->dlist[vc4_state->ptr0_offset],
883                &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
884
885         return 0;
886
887 out:
888         return drm_atomic_helper_update_plane(plane, crtc, fb,
889                                               crtc_x, crtc_y,
890                                               crtc_w, crtc_h,
891                                               src_x, src_y,
892                                               src_w, src_h,
893                                               ctx);
894 }
895
896 static const struct drm_plane_funcs vc4_plane_funcs = {
897         .update_plane = vc4_update_plane,
898         .disable_plane = drm_atomic_helper_disable_plane,
899         .destroy = vc4_plane_destroy,
900         .set_property = NULL,
901         .reset = vc4_plane_reset,
902         .atomic_duplicate_state = vc4_plane_duplicate_state,
903         .atomic_destroy_state = vc4_plane_destroy_state,
904 };
905
906 struct drm_plane *vc4_plane_init(struct drm_device *dev,
907                                  enum drm_plane_type type)
908 {
909         struct drm_plane *plane = NULL;
910         struct vc4_plane *vc4_plane;
911         u32 formats[ARRAY_SIZE(hvs_formats)];
912         u32 num_formats = 0;
913         int ret = 0;
914         unsigned i;
915
916         vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane),
917                                  GFP_KERNEL);
918         if (!vc4_plane)
919                 return ERR_PTR(-ENOMEM);
920
921         for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
922                 /* Don't allow YUV in cursor planes, since that means
923                  * tuning on the scaler, which we don't allow for the
924                  * cursor.
925                  */
926                 if (type != DRM_PLANE_TYPE_CURSOR ||
927                     hvs_formats[i].hvs < HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE) {
928                         formats[num_formats++] = hvs_formats[i].drm;
929                 }
930         }
931         plane = &vc4_plane->base;
932         ret = drm_universal_plane_init(dev, plane, 0,
933                                        &vc4_plane_funcs,
934                                        formats, num_formats,
935                                        NULL, type, NULL);
936
937         drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
938
939         return plane;
940 }