drm/vc4: Drop planes that have 0 destination size
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / vc4 / vc4_plane.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5
6 /**
7  * DOC: VC4 plane module
8  *
9  * Each DRM plane is a layer of pixels being scanned out by the HVS.
10  *
11  * At atomic modeset check time, we compute the HVS display element
12  * state that would be necessary for displaying the plane (giving us a
13  * chance to figure out if a plane configuration is invalid), then at
14  * atomic flush time the CRTC will ask us to write our element state
15  * into the region of the HVS that it has allocated for us.
16  */
17
18 #include <drm/drm_atomic.h>
19 #include <drm/drm_atomic_helper.h>
20 #include <drm/drm_atomic_uapi.h>
21 #include <drm/drm_blend.h>
22 #include <drm/drm_drv.h>
23 #include <drm/drm_fb_dma_helper.h>
24 #include <drm/drm_fourcc.h>
25 #include <drm/drm_framebuffer.h>
26 #include <drm/drm_gem_atomic_helper.h>
27
28 #include "uapi/drm/vc4_drm.h"
29
30 #include "vc4_drv.h"
31 #include "vc4_regs.h"
32
33 static const struct hvs_format {
34         u32 drm; /* DRM_FORMAT_* */
35         u32 hvs; /* HVS_FORMAT_* */
36         u32 pixel_order;
37         u32 pixel_order_hvs5;
38         bool hvs5_only;
39 } hvs_formats[] = {
40         {
41                 .drm = DRM_FORMAT_XRGB8888,
42                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
43                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
44                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
45         },
46         {
47                 .drm = DRM_FORMAT_ARGB8888,
48                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
49                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
50                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
51         },
52         {
53                 .drm = DRM_FORMAT_ABGR8888,
54                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
55                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
56                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
57         },
58         {
59                 .drm = DRM_FORMAT_XBGR8888,
60                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
61                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
62                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
63         },
64         {
65                 .drm = DRM_FORMAT_RGB565,
66                 .hvs = HVS_PIXEL_FORMAT_RGB565,
67                 .pixel_order = HVS_PIXEL_ORDER_XRGB,
68                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
69         },
70         {
71                 .drm = DRM_FORMAT_BGR565,
72                 .hvs = HVS_PIXEL_FORMAT_RGB565,
73                 .pixel_order = HVS_PIXEL_ORDER_XBGR,
74                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
75         },
76         {
77                 .drm = DRM_FORMAT_ARGB1555,
78                 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
79                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
80                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
81         },
82         {
83                 .drm = DRM_FORMAT_XRGB1555,
84                 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
85                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
86                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
87         },
88         {
89                 .drm = DRM_FORMAT_RGB888,
90                 .hvs = HVS_PIXEL_FORMAT_RGB888,
91                 .pixel_order = HVS_PIXEL_ORDER_XRGB,
92                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
93         },
94         {
95                 .drm = DRM_FORMAT_BGR888,
96                 .hvs = HVS_PIXEL_FORMAT_RGB888,
97                 .pixel_order = HVS_PIXEL_ORDER_XBGR,
98                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
99         },
100         {
101                 .drm = DRM_FORMAT_YUV422,
102                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
103                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
104                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
105         },
106         {
107                 .drm = DRM_FORMAT_YVU422,
108                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
109                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
110                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
111         },
112         {
113                 .drm = DRM_FORMAT_YUV444,
114                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
115                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
116                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
117         },
118         {
119                 .drm = DRM_FORMAT_YVU444,
120                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
121                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
122                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
123         },
124         {
125                 .drm = DRM_FORMAT_YUV420,
126                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
127                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
128                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
129         },
130         {
131                 .drm = DRM_FORMAT_YVU420,
132                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
133                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
134                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
135         },
136         {
137                 .drm = DRM_FORMAT_NV12,
138                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
139                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
140                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
141         },
142         {
143                 .drm = DRM_FORMAT_NV21,
144                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
145                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
146                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
147         },
148         {
149                 .drm = DRM_FORMAT_NV16,
150                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
151                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
152                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
153         },
154         {
155                 .drm = DRM_FORMAT_NV61,
156                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
157                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
158                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
159         },
160         {
161                 .drm = DRM_FORMAT_P030,
162                 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT,
163                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
164                 .hvs5_only = true,
165         },
166         {
167                 .drm = DRM_FORMAT_XRGB2101010,
168                 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
169                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
170                 .hvs5_only = true,
171         },
172         {
173                 .drm = DRM_FORMAT_ARGB2101010,
174                 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
175                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
176                 .hvs5_only = true,
177         },
178         {
179                 .drm = DRM_FORMAT_ABGR2101010,
180                 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
181                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
182                 .hvs5_only = true,
183         },
184         {
185                 .drm = DRM_FORMAT_XBGR2101010,
186                 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
187                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
188                 .hvs5_only = true,
189         },
190         {
191                 .drm = DRM_FORMAT_RGB332,
192                 .hvs = HVS_PIXEL_FORMAT_RGB332,
193                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
194                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
195         },
196         {
197                 .drm = DRM_FORMAT_BGR233,
198                 .hvs = HVS_PIXEL_FORMAT_RGB332,
199                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
200                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
201         },
202         {
203                 .drm = DRM_FORMAT_XRGB4444,
204                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
205                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
206                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
207         },
208         {
209                 .drm = DRM_FORMAT_ARGB4444,
210                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
211                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
212                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
213         },
214         {
215                 .drm = DRM_FORMAT_XBGR4444,
216                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
217                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
218                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
219         },
220         {
221                 .drm = DRM_FORMAT_ABGR4444,
222                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
223                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
224                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
225         },
226         {
227                 .drm = DRM_FORMAT_BGRX4444,
228                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
229                 .pixel_order = HVS_PIXEL_ORDER_RGBA,
230                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
231         },
232         {
233                 .drm = DRM_FORMAT_BGRA4444,
234                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
235                 .pixel_order = HVS_PIXEL_ORDER_RGBA,
236                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
237         },
238         {
239                 .drm = DRM_FORMAT_RGBX4444,
240                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
241                 .pixel_order = HVS_PIXEL_ORDER_BGRA,
242                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
243         },
244         {
245                 .drm = DRM_FORMAT_RGBA4444,
246                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
247                 .pixel_order = HVS_PIXEL_ORDER_BGRA,
248                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
249         },
250 };
251
252 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
253 {
254         unsigned i;
255
256         for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
257                 if (hvs_formats[i].drm == drm_format)
258                         return &hvs_formats[i];
259         }
260
261         return NULL;
262 }
263
264 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
265 {
266         if (dst == src >> 16)
267                 return VC4_SCALING_NONE;
268         if (3 * dst >= 2 * (src >> 16))
269                 return VC4_SCALING_PPF;
270         else
271                 return VC4_SCALING_TPZ;
272 }
273
274 static bool plane_enabled(struct drm_plane_state *state)
275 {
276         return state->fb && !WARN_ON(!state->crtc);
277 }
278
279 struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
280 {
281         struct vc4_plane_state *vc4_state;
282         unsigned int i;
283
284         if (WARN_ON(!plane->state))
285                 return NULL;
286
287         vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
288         if (!vc4_state)
289                 return NULL;
290
291         memset(&vc4_state->upm, 0, sizeof(vc4_state->upm));
292
293         for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++)
294                 vc4_state->upm_handle[i] = 0;
295
296         vc4_state->dlist_initialized = 0;
297
298         __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
299
300         if (vc4_state->dlist) {
301                 vc4_state->dlist = kmemdup(vc4_state->dlist,
302                                            vc4_state->dlist_count * 4,
303                                            GFP_KERNEL);
304                 if (!vc4_state->dlist) {
305                         kfree(vc4_state);
306                         return NULL;
307                 }
308                 vc4_state->dlist_size = vc4_state->dlist_count;
309         }
310
311         return &vc4_state->base;
312 }
313
314 void vc4_plane_destroy_state(struct drm_plane *plane,
315                              struct drm_plane_state *state)
316 {
317         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
318         struct vc4_hvs *hvs = vc4->hvs;
319         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
320         unsigned int i;
321
322         for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
323                 unsigned long irqflags;
324
325                 if (!drm_mm_node_allocated(&vc4_state->upm[i]))
326                         continue;
327
328                 spin_lock_irqsave(&hvs->mm_lock, irqflags);
329                 drm_mm_remove_node(&vc4_state->upm[i]);
330                 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
331
332                 if (vc4_state->upm_handle[i] > 0)
333                         ida_free(&hvs->upm_handles, vc4_state->upm_handle[i]);
334         }
335
336         kfree(vc4_state->dlist);
337         __drm_atomic_helper_plane_destroy_state(&vc4_state->base);
338         kfree(state);
339 }
340
341 /* Called during init to allocate the plane's atomic state. */
342 void vc4_plane_reset(struct drm_plane *plane)
343 {
344         struct vc4_plane_state *vc4_state;
345
346         WARN_ON(plane->state);
347
348         vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
349         if (!vc4_state)
350                 return;
351
352         __drm_atomic_helper_plane_reset(plane, &vc4_state->base);
353 }
354
355 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
356 {
357         if (vc4_state->dlist_count == vc4_state->dlist_size) {
358                 u32 new_size = max(4u, vc4_state->dlist_count * 2);
359                 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
360
361                 if (!new_dlist)
362                         return;
363                 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
364
365                 kfree(vc4_state->dlist);
366                 vc4_state->dlist = new_dlist;
367                 vc4_state->dlist_size = new_size;
368         }
369
370         vc4_state->dlist_count++;
371 }
372
373 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
374 {
375         unsigned int idx = vc4_state->dlist_count;
376
377         vc4_dlist_counter_increment(vc4_state);
378         vc4_state->dlist[idx] = val;
379 }
380
381 /* Returns the scl0/scl1 field based on whether the dimensions need to
382  * be up/down/non-scaled.
383  *
384  * This is a replication of a table from the spec.
385  */
386 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
387 {
388         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
389
390         switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
391         case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
392                 return SCALER_CTL0_SCL_H_PPF_V_PPF;
393         case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
394                 return SCALER_CTL0_SCL_H_TPZ_V_PPF;
395         case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
396                 return SCALER_CTL0_SCL_H_PPF_V_TPZ;
397         case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
398                 return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
399         case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
400                 return SCALER_CTL0_SCL_H_PPF_V_NONE;
401         case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
402                 return SCALER_CTL0_SCL_H_NONE_V_PPF;
403         case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
404                 return SCALER_CTL0_SCL_H_NONE_V_TPZ;
405         case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
406                 return SCALER_CTL0_SCL_H_TPZ_V_NONE;
407         default:
408         case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
409                 /* The unity case is independently handled by
410                  * SCALER_CTL0_UNITY.
411                  */
412                 return 0;
413         }
414 }
415
416 static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
417 {
418         struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
419         unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
420         struct drm_crtc_state *crtc_state;
421
422         crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
423                                                    pstate->crtc);
424
425         vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
426         if (!left && !right && !top && !bottom)
427                 return 0;
428
429         if (left + right >= crtc_state->mode.hdisplay ||
430             top + bottom >= crtc_state->mode.vdisplay)
431                 return -EINVAL;
432
433         adjhdisplay = crtc_state->mode.hdisplay - (left + right);
434         vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
435                                                adjhdisplay,
436                                                crtc_state->mode.hdisplay);
437         vc4_pstate->crtc_x += left;
438         if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
439                 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
440
441         adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
442         vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
443                                                adjvdisplay,
444                                                crtc_state->mode.vdisplay);
445         vc4_pstate->crtc_y += top;
446         if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
447                 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
448
449         vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
450                                                adjhdisplay,
451                                                crtc_state->mode.hdisplay);
452         vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
453                                                adjvdisplay,
454                                                crtc_state->mode.vdisplay);
455
456         if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
457                 return -EINVAL;
458
459         return 0;
460 }
461
462 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
463 {
464         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
465         struct drm_framebuffer *fb = state->fb;
466         int num_planes = fb->format->num_planes;
467         struct drm_crtc_state *crtc_state;
468         u32 h_subsample = fb->format->hsub;
469         u32 v_subsample = fb->format->vsub;
470         int ret;
471
472         crtc_state = drm_atomic_get_existing_crtc_state(state->state,
473                                                         state->crtc);
474         if (!crtc_state) {
475                 DRM_DEBUG_KMS("Invalid crtc state\n");
476                 return -EINVAL;
477         }
478
479         ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
480                                                   INT_MAX, true, true);
481         if (ret)
482                 return ret;
483
484         vc4_state->src_x = state->src.x1;
485         vc4_state->src_y = state->src.y1;
486         vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
487         vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;
488
489         vc4_state->crtc_x = state->dst.x1;
490         vc4_state->crtc_y = state->dst.y1;
491         vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
492         vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
493
494         if (!vc4_state->crtc_w)
495                 vc4_state->crtc_w = state->crtc->mode.hdisplay;
496         if (!vc4_state->crtc_h)
497                 vc4_state->crtc_h = state->crtc->mode.vdisplay;
498
499         ret = vc4_plane_margins_adj(state);
500         if (ret)
501                 return ret;
502
503         vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
504                                                        vc4_state->crtc_w);
505         vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
506                                                        vc4_state->crtc_h);
507
508         vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
509                                vc4_state->y_scaling[0] == VC4_SCALING_NONE);
510
511         if (num_planes > 1) {
512                 vc4_state->is_yuv = true;
513
514                 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
515                 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
516
517                 vc4_state->x_scaling[1] =
518                         vc4_get_scaling_mode(vc4_state->src_w[1],
519                                              vc4_state->crtc_w);
520                 vc4_state->y_scaling[1] =
521                         vc4_get_scaling_mode(vc4_state->src_h[1],
522                                              vc4_state->crtc_h);
523
524                 /* YUV conversion requires that horizontal scaling be enabled
525                  * on the UV plane even if vc4_get_scaling_mode() returned
526                  * VC4_SCALING_NONE (which can happen when the down-scaling
527                  * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
528                  * case.
529                  */
530                 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
531                         vc4_state->x_scaling[1] = VC4_SCALING_PPF;
532
533                 /* Similarly UV needs vertical scaling to be enabled.
534                  * Without this a 1:1 scaled YUV422 plane isn't rendered.
535                  */
536                 if (vc4_state->y_scaling[1] == VC4_SCALING_NONE)
537                         vc4_state->y_scaling[1] = VC4_SCALING_PPF;
538         } else {
539                 vc4_state->is_yuv = false;
540                 vc4_state->x_scaling[1] = VC4_SCALING_NONE;
541                 vc4_state->y_scaling[1] = VC4_SCALING_NONE;
542         }
543
544         return 0;
545 }
546
547 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
548 {
549         struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
550         u32 scale, recip;
551
552         WARN_ON_ONCE(vc4->gen > VC4_GEN_6);
553
554         scale = src / dst;
555
556         /* The specs note that while the reciprocal would be defined
557          * as (1<<32)/scale, ~0 is close enough.
558          */
559         recip = ~0 / scale;
560
561         vc4_dlist_write(vc4_state,
562                         /*
563                          * The BCM2712 is lacking BIT(31) compared to
564                          * the previous generations, but we don't use
565                          * it.
566                          */
567                         VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
568                         VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
569         vc4_dlist_write(vc4_state,
570                         VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
571 }
572
573 /* phase magnitude bits */
574 #define PHASE_BITS 6
575
576 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, u32 xy, int channel, int chroma_offset)
577 {
578         struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
579         u32 scale = src / dst;
580         s32 offset, offset2;
581         s32 phase;
582
583         WARN_ON_ONCE(vc4->gen > VC4_GEN_6);
584
585         /* Start the phase at 1/2 pixel from the 1st pixel at src_x.
586            1/4 pixel for YUV, plus the offset for chroma siting */
587         if (channel) {
588                 /* the phase is relative to scale_src->x, so shift it for display list's x value */
589                 offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1;
590                 offset -= chroma_offset >> (17 - PHASE_BITS);
591                 offset += -(1 << PHASE_BITS >> 2);
592         } else {
593                 /* the phase is relative to scale_src->x, so shift it for display list's x value */
594                 offset = (xy & 0xffff) >> (16 - PHASE_BITS);
595                 offset += -(1 << PHASE_BITS >> 1);
596
597                 /* This is a kludge to make sure the scaling factors are consitent with YUV's luma scaling.
598                    we lose 1bit precision because of this. */
599                 scale &= ~1;
600         }
601
602         /* There may be a also small error introduced by precision of scale.
603            Add half of that as a compromise */
604         offset2 = src - dst * scale;
605         offset2 >>= 16 - PHASE_BITS;
606         phase = offset + (offset2 >> 1);
607
608         /* Ensure +ve values don't touch the sign bit, then truncate negative values */
609         if (phase >= 1 << PHASE_BITS)
610                 phase = (1 << PHASE_BITS) - 1;
611
612         phase &= SCALER_PPF_IPHASE_MASK;
613
614         vc4_dlist_write(vc4_state,
615                         SCALER_PPF_AGC |
616                         VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
617                         /*
618                          * The register layout documentation is slightly
619                          * different to setup the phase in the BCM2712,
620                          * but they seem equivalent.
621                          */
622                         VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
623 }
624
625 static u32 __vc4_lbm_size(struct drm_plane_state *state)
626 {
627         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
628         struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
629         u32 pix_per_line;
630         u32 lbm;
631
632         /* LBM is not needed when there's no vertical scaling. */
633         if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
634             vc4_state->y_scaling[1] == VC4_SCALING_NONE)
635                 return 0;
636
637         /*
638          * This can be further optimized in the RGB/YUV444 case if the PPF
639          * decimation factor is between 0.5 and 1.0 by using crtc_w.
640          *
641          * It's not an issue though, since in that case since src_w[0] is going
642          * to be greater than or equal to crtc_w.
643          */
644         if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
645                 pix_per_line = vc4_state->crtc_w;
646         else
647                 pix_per_line = vc4_state->src_w[0] >> 16;
648
649         if (!vc4_state->is_yuv) {
650                 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
651                         lbm = pix_per_line * 8;
652                 else {
653                         /* In special cases, this multiplier might be 12. */
654                         lbm = pix_per_line * 16;
655                 }
656         } else {
657                 /* There are cases for this going down to a multiplier
658                  * of 2, but according to the firmware source, the
659                  * table in the docs is somewhat wrong.
660                  */
661                 lbm = pix_per_line * 16;
662         }
663
664         /* Align it to 64 or 128 (hvs5) bytes */
665         lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64);
666
667         /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
668         lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2;
669
670         return lbm;
671 }
672
673 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state,
674                                                 unsigned int channel)
675 {
676         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
677
678         switch (vc4_state->y_scaling[channel]) {
679         case VC4_SCALING_PPF:
680                 return 4;
681
682         case VC4_SCALING_TPZ:
683                 return 2;
684
685         default:
686                 return 0;
687         }
688 }
689
690 static unsigned int vc4_lbm_components(const struct drm_plane_state *state,
691                                        unsigned int channel)
692 {
693         const struct drm_format_info *info = state->fb->format;
694         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
695
696         if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE)
697                 return 0;
698
699         if (info->is_yuv)
700                 return channel ? 2 : 1;
701
702         if (info->has_alpha)
703                 return 4;
704
705         return 3;
706 }
707
708 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state,
709                                          unsigned int channel)
710 {
711         const struct drm_format_info *info = state->fb->format;
712         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
713         unsigned int channels_scaled = 0;
714         unsigned int components, words, wpc;
715         unsigned int width, lines;
716         unsigned int i;
717
718         /* LBM is meant to use the smaller of source or dest width, but there
719          * is a issue with UV scaling that the size required for the second
720          * channel is based on the source width only.
721          */
722         if (info->hsub > 1 && channel == 1)
723                 width = state->src_w >> 16;
724         else
725                 width = min(state->src_w >> 16, state->crtc_w);
726         width = round_up(width / info->hsub, 4);
727
728         wpc = vc4_lbm_words_per_component(state, channel);
729         if (!wpc)
730                 return 0;
731
732         components = vc4_lbm_components(state, channel);
733         if (!components)
734                 return 0;
735
736         if (state->alpha != DRM_BLEND_ALPHA_OPAQUE)
737                 components -= 1;
738
739         words = width * wpc * components;
740
741         lines = DIV_ROUND_UP(words, 128 / info->hsub);
742
743         for (i = 0; i < 2; i++)
744                 if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE)
745                         channels_scaled++;
746
747         if (channels_scaled == 1)
748                 lines = lines / 2;
749
750         return lines;
751 }
752
753 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state)
754 {
755         const struct drm_format_info *info = state->fb->format;
756
757         if (info->hsub > 1)
758                 return max(vc4_lbm_channel_size(state, 0),
759                            vc4_lbm_channel_size(state, 1));
760         else
761                 return vc4_lbm_channel_size(state, 0);
762 }
763
764 u32 vc4_lbm_size(struct drm_plane_state *state)
765 {
766         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
767         struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
768
769         /* LBM is not needed when there's no vertical scaling. */
770         if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
771             vc4_state->y_scaling[1] == VC4_SCALING_NONE)
772                 return 0;
773
774         if (vc4->gen >= VC4_GEN_6)
775                 return __vc6_lbm_size(state);
776         else
777                 return __vc4_lbm_size(state);
778 }
779
780 static size_t vc6_upm_size(const struct drm_plane_state *state,
781                            unsigned int plane)
782 {
783         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
784         unsigned int stride = state->fb->pitches[plane];
785
786         /*
787          * TODO: This only works for raster formats, and is sub-optimal
788          * for buffers with a stride aligned on 32 bytes.
789          */
790         unsigned int words_per_line = (stride + 62) / 32;
791         unsigned int fetch_region_size = words_per_line * 32;
792         unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines;
793         unsigned int buffer_size = fetch_region_size * buffer_lines;
794
795         return ALIGN(buffer_size, HVS_UBM_WORD_SIZE);
796 }
797
798 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
799                                          int channel)
800 {
801         struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
802         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
803
804         WARN_ON_ONCE(vc4->gen > VC4_GEN_6);
805
806         /* Ch0 H-PPF Word 0: Scaling Parameters */
807         if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
808                 vc4_write_ppf(vc4_state,
809                               vc4_state->src_w[channel], vc4_state->crtc_w, vc4_state->src_x, channel,
810                               state->chroma_siting_h);
811         }
812
813         /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
814         if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
815                 vc4_write_ppf(vc4_state,
816                               vc4_state->src_h[channel], vc4_state->crtc_h, vc4_state->src_y, channel,
817                               state->chroma_siting_v);
818                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
819         }
820
821         /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
822         if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
823                 vc4_write_tpz(vc4_state,
824                               vc4_state->src_w[channel], vc4_state->crtc_w);
825         }
826
827         /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
828         if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
829                 vc4_write_tpz(vc4_state,
830                               vc4_state->src_h[channel], vc4_state->crtc_h);
831                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
832         }
833 }
834
835 static void vc4_plane_calc_load(struct drm_plane_state *state)
836 {
837         unsigned int hvs_load_shift, vrefresh, i;
838         struct drm_framebuffer *fb = state->fb;
839         struct vc4_plane_state *vc4_state;
840         struct drm_crtc_state *crtc_state;
841         unsigned int vscale_factor;
842
843         vc4_state = to_vc4_plane_state(state);
844         crtc_state = drm_atomic_get_existing_crtc_state(state->state,
845                                                         state->crtc);
846         vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
847
848         /* The HVS is able to process 2 pixels/cycle when scaling the source,
849          * 4 pixels/cycle otherwise.
850          * Alpha blending step seems to be pipelined and it's always operating
851          * at 4 pixels/cycle, so the limiting aspect here seems to be the
852          * scaler block.
853          * HVS load is expressed in clk-cycles/sec (AKA Hz).
854          */
855         if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
856             vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
857             vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
858             vc4_state->y_scaling[1] != VC4_SCALING_NONE)
859                 hvs_load_shift = 1;
860         else
861                 hvs_load_shift = 2;
862
863         vc4_state->membus_load = 0;
864         vc4_state->hvs_load = 0;
865         for (i = 0; i < fb->format->num_planes; i++) {
866                 /* Even if the bandwidth/plane required for a single frame is
867                  *
868                  * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
869                  *  cpp * vrefresh
870                  *
871                  * when downscaling, we have to read more pixels per line in
872                  * the time frame reserved for a single line, so the bandwidth
873                  * demand can be punctually higher. To account for that, we
874                  * calculate the down-scaling factor and multiply the plane
875                  * load by this number. We're likely over-estimating the read
876                  * demand, but that's better than under-estimating it.
877                  */
878                 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
879                                              vc4_state->crtc_h);
880                 vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
881                                           (vc4_state->src_h[i] >> 16) *
882                                           vscale_factor * fb->format->cpp[i];
883                 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
884         }
885
886         vc4_state->hvs_load *= vrefresh;
887         vc4_state->hvs_load >>= hvs_load_shift;
888         vc4_state->membus_load *= vrefresh;
889 }
890
891 static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
892 {
893         struct drm_device *drm = state->plane->dev;
894         struct vc4_dev *vc4 = to_vc4_dev(drm);
895         struct drm_plane *plane = state->plane;
896         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
897         u32 lbm_size;
898
899         lbm_size = vc4_lbm_size(state);
900         if (!lbm_size) {
901                 vc4_state->lbm_size = 0;
902                 return 0;
903         }
904
905         /*
906          * NOTE: BCM2712 doesn't need to be aligned, since the size
907          * returned by vc4_lbm_size() is in words already.
908          */
909         if (vc4->gen == VC4_GEN_5)
910                 lbm_size = ALIGN(lbm_size, 64);
911         else if (vc4->gen == VC4_GEN_4)
912                 lbm_size = ALIGN(lbm_size, 32);
913
914         drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n",
915                        plane->base.id, plane->name, lbm_size);
916
917         if (WARN_ON(!vc4_state->lbm_offset))
918                 return -EINVAL;
919
920         /* FIXME: Add loop here that ensures that the total LBM assigned in this
921          *  state is less than the total lbm size
922          */
923         vc4_state->lbm_size = lbm_size;
924
925         return 0;
926 }
927
928 static int vc6_plane_allocate_upm(struct drm_plane_state *state)
929 {
930         const struct drm_format_info *info = state->fb->format;
931         struct drm_device *drm = state->plane->dev;
932         struct vc4_dev *vc4 = to_vc4_dev(drm);
933         struct vc4_hvs *hvs = vc4->hvs;
934         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
935         unsigned int i;
936         int ret;
937
938         WARN_ON_ONCE(vc4->gen < VC4_GEN_6);
939
940         vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;
941
942         for (i = 0; i < info->num_planes; i++) {
943                 unsigned long irqflags;
944                 size_t upm_size;
945
946                 upm_size = vc6_upm_size(state, i);
947                 if (!upm_size)
948                         return -EINVAL;
949
950                 spin_lock_irqsave(&hvs->mm_lock, irqflags);
951                 ret = drm_mm_insert_node_generic(&hvs->upm_mm,
952                                                  &vc4_state->upm[i],
953                                                  upm_size, HVS_UBM_WORD_SIZE,
954                                                  0, 0);
955                 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
956                 if (ret) {
957                         drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
958                         return ret;
959                 }
960
961                 ret = ida_alloc_range(&hvs->upm_handles, 1, 32, GFP_KERNEL);
962                 if (ret < 0)
963                         return ret;
964
965                 vc4_state->upm_handle[i] = ret;
966
967                 vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
968                         VC4_SET_FIELD(vc4_state->upm[i].start / HVS_UBM_WORD_SIZE,
969                                       SCALER6_PTR0_UPM_BASE) |
970                         VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
971                                       SCALER6_PTR0_UPM_HANDLE) |
972                         VC4_SET_FIELD(vc4_state->upm_buffer_lines,
973                                       SCALER6_PTR0_UPM_BUFF_SIZE);
974         }
975
976         return 0;
977 }
978
979 /*
980  * The colorspace conversion matrices are held in 3 entries in the dlist.
981  * Create an array of them, with entries for each full and limited mode, and
982  * each supported colorspace.
983  */
984 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = {
985         {
986                 /* Limited range */
987                 {
988                         /* BT601 */
989                         SCALER_CSC0_ITR_R_601_5,
990                         SCALER_CSC1_ITR_R_601_5,
991                         SCALER_CSC2_ITR_R_601_5,
992                 }, {
993                         /* BT709 */
994                         SCALER_CSC0_ITR_R_709_3,
995                         SCALER_CSC1_ITR_R_709_3,
996                         SCALER_CSC2_ITR_R_709_3,
997                 }, {
998                         /* BT2020 */
999                         SCALER_CSC0_ITR_R_2020,
1000                         SCALER_CSC1_ITR_R_2020,
1001                         SCALER_CSC2_ITR_R_2020,
1002                 }
1003         }, {
1004                 /* Full range */
1005                 {
1006                         /* JFIF */
1007                         SCALER_CSC0_JPEG_JFIF,
1008                         SCALER_CSC1_JPEG_JFIF,
1009                         SCALER_CSC2_JPEG_JFIF,
1010                 }, {
1011                         /* BT709 */
1012                         SCALER_CSC0_ITR_R_709_3_FR,
1013                         SCALER_CSC1_ITR_R_709_3_FR,
1014                         SCALER_CSC2_ITR_R_709_3_FR,
1015                 }, {
1016                         /* BT2020 */
1017                         SCALER_CSC0_ITR_R_2020_FR,
1018                         SCALER_CSC1_ITR_R_2020_FR,
1019                         SCALER_CSC2_ITR_R_2020_FR,
1020                 }
1021         }
1022 };
1023
1024 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state)
1025 {
1026         struct drm_device *dev = state->state->dev;
1027         struct vc4_dev *vc4 = to_vc4_dev(dev);
1028
1029         WARN_ON_ONCE(vc4->gen != VC4_GEN_4);
1030
1031         if (!state->fb->format->has_alpha)
1032                 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1033                                      SCALER_POS2_ALPHA_MODE);
1034
1035         switch (state->pixel_blend_mode) {
1036         case DRM_MODE_BLEND_PIXEL_NONE:
1037                 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1038                                      SCALER_POS2_ALPHA_MODE);
1039         default:
1040         case DRM_MODE_BLEND_PREMULTI:
1041                 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1042                                      SCALER_POS2_ALPHA_MODE) |
1043                         SCALER_POS2_ALPHA_PREMULT;
1044         case DRM_MODE_BLEND_COVERAGE:
1045                 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1046                                      SCALER_POS2_ALPHA_MODE);
1047         }
1048 }
1049
1050 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state)
1051 {
1052         struct drm_device *dev = state->state->dev;
1053         struct vc4_dev *vc4 = to_vc4_dev(dev);
1054
1055         WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6);
1056
1057         if (vc4->gen == VC4_GEN_6 && vc4->step_d0) {
1058                 return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ?
1059                         SCALER5_CTL2_ALPHA_PREMULT : 0;
1060         }
1061
1062
1063         if (!state->fb->format->has_alpha)
1064                 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1065                                      SCALER5_CTL2_ALPHA_MODE);
1066
1067         switch (state->pixel_blend_mode) {
1068         case DRM_MODE_BLEND_PIXEL_NONE:
1069                 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1070                                      SCALER5_CTL2_ALPHA_MODE);
1071         default:
1072         case DRM_MODE_BLEND_PREMULTI:
1073                 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1074                                      SCALER5_CTL2_ALPHA_MODE) |
1075                         SCALER5_CTL2_ALPHA_PREMULT;
1076         case DRM_MODE_BLEND_COVERAGE:
1077                 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1078                                      SCALER5_CTL2_ALPHA_MODE);
1079         }
1080 }
1081
1082 /* Writes out a full display list for an active plane to the plane's
1083  * private dlist state.
1084  */
1085 static int vc4_plane_mode_set(struct drm_plane *plane,
1086                               struct drm_plane_state *state)
1087 {
1088         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
1089         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1090         struct drm_framebuffer *fb = state->fb;
1091         u32 ctl0_offset = vc4_state->dlist_count;
1092         const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1093         u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1094         int num_planes = fb->format->num_planes;
1095         u32 h_subsample = fb->format->hsub;
1096         u32 v_subsample = fb->format->vsub;
1097         bool mix_plane_alpha;
1098         bool covers_screen;
1099         u32 scl0, scl1, pitch0;
1100         u32 tiling, src_x, src_y;
1101         u32 width, height;
1102         u32 hvs_format = format->hvs;
1103         unsigned int rotation;
1104         u32 offsets[3] = { 0 };
1105         int ret, i;
1106
1107         if (vc4_state->dlist_initialized)
1108                 return 0;
1109
1110         ret = vc4_plane_setup_clipping_and_scaling(state);
1111         if (ret)
1112                 return ret;
1113
1114         width = vc4_state->src_w[0] >> 16;
1115         height = vc4_state->src_h[0] >> 16;
1116
1117         if (!width || !height || !vc4_state->crtc_w || !vc4_state->crtc_h) {
1118                 /* 0 source size probably means the plane is offscreen */
1119                 vc4_state->dlist_initialized = 1;
1120                 return 0;
1121         }
1122
1123         /* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1124          * and 4:4:4, scl1 should be set to scl0 so both channels of
1125          * the scaler do the same thing.  For YUV, the Y plane needs
1126          * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1127          * the scl fields here.
1128          */
1129         if (num_planes == 1) {
1130                 scl0 = vc4_get_scl_field(state, 0);
1131                 scl1 = scl0;
1132         } else {
1133                 scl0 = vc4_get_scl_field(state, 1);
1134                 scl1 = vc4_get_scl_field(state, 0);
1135         }
1136
1137         rotation = drm_rotation_simplify(state->rotation,
1138                                          DRM_MODE_ROTATE_0 |
1139                                          DRM_MODE_REFLECT_X |
1140                                          DRM_MODE_REFLECT_Y);
1141
1142         /* We must point to the last line when Y reflection is enabled. */
1143         src_y = vc4_state->src_y >> 16;
1144         if (rotation & DRM_MODE_REFLECT_Y)
1145                 src_y += height - 1;
1146
1147         src_x = vc4_state->src_x >> 16;
1148
1149         switch (base_format_mod) {
1150         case DRM_FORMAT_MOD_LINEAR:
1151                 tiling = SCALER_CTL0_TILING_LINEAR;
1152                 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
1153
1154                 /* Adjust the base pointer to the first pixel to be scanned
1155                  * out.
1156                  */
1157                 for (i = 0; i < num_planes; i++) {
1158                         offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1159                         offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1160                 }
1161
1162                 break;
1163
1164         case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
1165                 u32 tile_size_shift = 12; /* T tiles are 4kb */
1166                 /* Whole-tile offsets, mostly for setting the pitch. */
1167                 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
1168                 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
1169                 u32 tile_w_mask = (1 << tile_w_shift) - 1;
1170                 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
1171                  * the height (in pixels) of a 4k tile.
1172                  */
1173                 u32 tile_h_mask = (2 << tile_h_shift) - 1;
1174                 /* For T-tiled, the FB pitch is "how many bytes from one row to
1175                  * the next, such that
1176                  *
1177                  *      pitch * tile_h == tile_size * tiles_per_row
1178                  */
1179                 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
1180                 u32 tiles_l = src_x >> tile_w_shift;
1181                 u32 tiles_r = tiles_w - tiles_l;
1182                 u32 tiles_t = src_y >> tile_h_shift;
1183                 /* Intra-tile offsets, which modify the base address (the
1184                  * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
1185                  * base address).
1186                  */
1187                 u32 tile_y = (src_y >> 4) & 1;
1188                 u32 subtile_y = (src_y >> 2) & 3;
1189                 u32 utile_y = src_y & 3;
1190                 u32 x_off = src_x & tile_w_mask;
1191                 u32 y_off = src_y & tile_h_mask;
1192
1193                 /* When Y reflection is requested we must set the
1194                  * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
1195                  * after the initial one should be fetched in descending order,
1196                  * which makes sense since we start from the last line and go
1197                  * backward.
1198                  * Don't know why we need y_off = max_y_off - y_off, but it's
1199                  * definitely required (I guess it's also related to the "going
1200                  * backward" situation).
1201                  */
1202                 if (rotation & DRM_MODE_REFLECT_Y) {
1203                         y_off = tile_h_mask - y_off;
1204                         pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
1205                 } else {
1206                         pitch0 = 0;
1207                 }
1208
1209                 tiling = SCALER_CTL0_TILING_256B_OR_T;
1210                 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
1211                            VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
1212                            VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
1213                            VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
1214                 offsets[0] += tiles_t * (tiles_w << tile_size_shift);
1215                 offsets[0] += subtile_y << 8;
1216                 offsets[0] += utile_y << 4;
1217
1218                 /* Rows of tiles alternate left-to-right and right-to-left. */
1219                 if (tiles_t & 1) {
1220                         pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
1221                         offsets[0] += (tiles_w - tiles_l) << tile_size_shift;
1222                         offsets[0] -= (1 + !tile_y) << 10;
1223                 } else {
1224                         offsets[0] += tiles_l << tile_size_shift;
1225                         offsets[0] += tile_y << 10;
1226                 }
1227
1228                 break;
1229         }
1230
1231         case DRM_FORMAT_MOD_BROADCOM_SAND64:
1232         case DRM_FORMAT_MOD_BROADCOM_SAND128:
1233         case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1234                 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1235
1236                 if (param > SCALER_TILE_HEIGHT_MASK) {
1237                         DRM_DEBUG_KMS("SAND height too large (%d)\n",
1238                                       param);
1239                         return -EINVAL;
1240                 }
1241
1242                 if (fb->format->format == DRM_FORMAT_P030) {
1243                         hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1244                         tiling = SCALER_CTL0_TILING_128B;
1245                 } else {
1246                         hvs_format = HVS_PIXEL_FORMAT_H264;
1247
1248                         switch (base_format_mod) {
1249                         case DRM_FORMAT_MOD_BROADCOM_SAND64:
1250                                 tiling = SCALER_CTL0_TILING_64B;
1251                                 break;
1252                         case DRM_FORMAT_MOD_BROADCOM_SAND128:
1253                                 tiling = SCALER_CTL0_TILING_128B;
1254                                 break;
1255                         case DRM_FORMAT_MOD_BROADCOM_SAND256:
1256                                 tiling = SCALER_CTL0_TILING_256B_OR_T;
1257                                 break;
1258                         default:
1259                                 return -EINVAL;
1260                         }
1261                 }
1262
1263                 /* Adjust the base pointer to the first pixel to be scanned
1264                  * out.
1265                  *
1266                  * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1267                  * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1268                  * word that should be taken as the first pixel.
1269                  * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1270                  * element within the 128bit word, eg for pixel 3 the value
1271                  * should be 6.
1272                  */
1273                 for (i = 0; i < num_planes; i++) {
1274                         u32 tile_w, tile, x_off, pix_per_tile;
1275
1276                         if (fb->format->format == DRM_FORMAT_P030) {
1277                                 /*
1278                                  * Spec says: bits [31:4] of the given address
1279                                  * should point to the 128-bit word containing
1280                                  * the desired starting pixel, and bits[3:0]
1281                                  * should be between 0 and 11, indicating which
1282                                  * of the 12-pixels in that 128-bit word is the
1283                                  * first pixel to be used
1284                                  */
1285                                 u32 remaining_pixels = src_x % 96;
1286                                 u32 aligned = remaining_pixels / 12;
1287                                 u32 last_bits = remaining_pixels % 12;
1288
1289                                 x_off = aligned * 16 + last_bits;
1290                                 tile_w = 128;
1291                                 pix_per_tile = 96;
1292                         } else {
1293                                 switch (base_format_mod) {
1294                                 case DRM_FORMAT_MOD_BROADCOM_SAND64:
1295                                         tile_w = 64;
1296                                         break;
1297                                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1298                                         tile_w = 128;
1299                                         break;
1300                                 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1301                                         tile_w = 256;
1302                                         break;
1303                                 default:
1304                                         return -EINVAL;
1305                                 }
1306                                 pix_per_tile = tile_w / fb->format->cpp[0];
1307                                 x_off = (src_x % pix_per_tile) /
1308                                         (i ? h_subsample : 1) *
1309                                         fb->format->cpp[i];
1310                         }
1311
1312                         tile = src_x / pix_per_tile;
1313
1314                         offsets[i] += param * tile_w * tile;
1315                         offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1316                         offsets[i] += x_off & ~(i ? 1 : 0);
1317                 }
1318
1319                 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
1320                 break;
1321         }
1322
1323         default:
1324                 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1325                               (long long)fb->modifier);
1326                 return -EINVAL;
1327         }
1328
1329         /* fetch an extra pixel if we don't actually line up with the left edge. */
1330         if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1331                 width++;
1332
1333         /* same for the right side */
1334         if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1335                vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1336                 width++;
1337
1338         /* now for the top */
1339         if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1340                 height++;
1341
1342         /* and the bottom */
1343         if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1344                vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1345                 height++;
1346
1347         /* for YUV444 hardware wants double the width, otherwise it doesn't fetch full width of chroma */
1348         if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1349                 width <<= 1;
1350
1351         /* Don't waste cycles mixing with plane alpha if the set alpha
1352          * is opaque or there is no per-pixel alpha information.
1353          * In any case we use the alpha property value as the fixed alpha.
1354          */
1355         mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1356                           fb->format->has_alpha;
1357
1358         if (vc4->gen == VC4_GEN_4) {
1359         /* Control word */
1360                 vc4_dlist_write(vc4_state,
1361                                 SCALER_CTL0_VALID |
1362                                 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
1363                                 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
1364                                 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
1365                                 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
1366                                 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1367                                 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1368                                 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
1369                                 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1370                                 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
1371
1372                 /* Position Word 0: Image Positions and Alpha Value */
1373                 vc4_state->pos0_offset = vc4_state->dlist_count;
1374                 vc4_dlist_write(vc4_state,
1375                                 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
1376                                 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
1377                                 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
1378
1379                 /* Position Word 1: Scaled Image Dimensions. */
1380                 if (!vc4_state->is_unity) {
1381                         vc4_dlist_write(vc4_state,
1382                                         VC4_SET_FIELD(vc4_state->crtc_w,
1383                                                       SCALER_POS1_SCL_WIDTH) |
1384                                         VC4_SET_FIELD(vc4_state->crtc_h,
1385                                                       SCALER_POS1_SCL_HEIGHT));
1386                 }
1387
1388                 /* Position Word 2: Source Image Size, Alpha */
1389                 vc4_state->pos2_offset = vc4_state->dlist_count;
1390                 vc4_dlist_write(vc4_state,
1391                                 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
1392                                 vc4_hvs4_get_alpha_blend_mode(state) |
1393                                 VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
1394                                 VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
1395
1396                 /* Position Word 3: Context.  Written by the HVS. */
1397                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1398
1399         } else {
1400                 /* Control word */
1401                 vc4_dlist_write(vc4_state,
1402                                 SCALER_CTL0_VALID |
1403                                 (format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) |
1404                                 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1405                                 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1406                                 (vc4_state->is_unity ?
1407                                                 SCALER5_CTL0_UNITY : 0) |
1408                                 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1409                                 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
1410                                 SCALER5_CTL0_ALPHA_EXPAND |
1411                                 SCALER5_CTL0_RGB_EXPAND);
1412
1413                 /* Position Word 0: Image Positions and Alpha Value */
1414                 vc4_state->pos0_offset = vc4_state->dlist_count;
1415                 vc4_dlist_write(vc4_state,
1416                                 (rotation & DRM_MODE_REFLECT_Y ?
1417                                                 SCALER5_POS0_VFLIP : 0) |
1418                                 VC4_SET_FIELD(vc4_state->crtc_x,
1419                                               SCALER_POS0_START_X) |
1420                                 (rotation & DRM_MODE_REFLECT_X ?
1421                                               SCALER5_POS0_HFLIP : 0) |
1422                                 VC4_SET_FIELD(vc4_state->crtc_y,
1423                                               SCALER5_POS0_START_Y)
1424                                );
1425
1426                 /* Control Word 2 */
1427                 vc4_dlist_write(vc4_state,
1428                                 VC4_SET_FIELD(state->alpha >> 4,
1429                                               SCALER5_CTL2_ALPHA) |
1430                                 vc4_hvs5_get_alpha_blend_mode(state) |
1431                                 (mix_plane_alpha ?
1432                                         SCALER5_CTL2_ALPHA_MIX : 0)
1433                                );
1434
1435                 /* Position Word 1: Scaled Image Dimensions. */
1436                 if (!vc4_state->is_unity) {
1437                         vc4_dlist_write(vc4_state,
1438                                         VC4_SET_FIELD(vc4_state->crtc_w,
1439                                                       SCALER5_POS1_SCL_WIDTH) |
1440                                         VC4_SET_FIELD(vc4_state->crtc_h,
1441                                                       SCALER5_POS1_SCL_HEIGHT));
1442                 }
1443
1444                 /* Position Word 2: Source Image Size */
1445                 vc4_state->pos2_offset = vc4_state->dlist_count;
1446                 vc4_dlist_write(vc4_state,
1447                                 VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
1448                                 VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
1449
1450                 /* Position Word 3: Context.  Written by the HVS. */
1451                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1452         }
1453
1454
1455         /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
1456          *
1457          * The pointers may be any byte address.
1458          */
1459         vc4_state->ptr0_offset[0] = vc4_state->dlist_count;
1460
1461         for (i = 0; i < num_planes; i++) {
1462                 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1463
1464                 vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]);
1465         }
1466
1467         /* Pointer Context Word 0/1/2: Written by the HVS */
1468         for (i = 0; i < num_planes; i++)
1469                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1470
1471         /* Pitch word 0 */
1472         vc4_dlist_write(vc4_state, pitch0);
1473
1474         /* Pitch word 1/2 */
1475         for (i = 1; i < num_planes; i++) {
1476                 if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
1477                     hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) {
1478                         vc4_dlist_write(vc4_state,
1479                                         VC4_SET_FIELD(fb->pitches[i],
1480                                                       SCALER_SRC_PITCH));
1481                 } else {
1482                         vc4_dlist_write(vc4_state, pitch0);
1483                 }
1484         }
1485
1486         /* Colorspace conversion words */
1487         if (vc4_state->is_yuv) {
1488                 enum drm_color_encoding color_encoding = state->color_encoding;
1489                 enum drm_color_range color_range = state->color_range;
1490                 const u32 *ccm;
1491
1492                 if (color_encoding >= DRM_COLOR_ENCODING_MAX)
1493                         color_encoding = DRM_COLOR_YCBCR_BT601;
1494                 if (color_range >= DRM_COLOR_RANGE_MAX)
1495                         color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1496
1497                 ccm = colorspace_coeffs[color_range][color_encoding];
1498
1499                 vc4_dlist_write(vc4_state, ccm[0]);
1500                 vc4_dlist_write(vc4_state, ccm[1]);
1501                 vc4_dlist_write(vc4_state, ccm[2]);
1502         }
1503
1504         vc4_state->lbm_offset = 0;
1505
1506         if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
1507             vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
1508             vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1509             vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1510                 /* Reserve a slot for the LBM Base Address. The real value will
1511                  * be set when calling vc4_plane_allocate_lbm().
1512                  */
1513                 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1514                     vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1515                         vc4_state->lbm_offset = vc4_state->dlist_count;
1516                         vc4_dlist_counter_increment(vc4_state);
1517                 }
1518
1519                 if (num_planes > 1) {
1520                         /* Emit Cb/Cr as channel 0 and Y as channel
1521                          * 1. This matches how we set up scl0/scl1
1522                          * above.
1523                          */
1524                         vc4_write_scaling_parameters(state, 1);
1525                 }
1526                 vc4_write_scaling_parameters(state, 0);
1527
1528                 /* If any PPF setup was done, then all the kernel
1529                  * pointers get uploaded.
1530                  */
1531                 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1532                     vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1533                     vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1534                     vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1535                         u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1536                                                    SCALER_PPF_KERNEL_OFFSET);
1537
1538                         /* HPPF plane 0 */
1539                         vc4_dlist_write(vc4_state, kernel);
1540                         /* VPPF plane 0 */
1541                         vc4_dlist_write(vc4_state, kernel);
1542                         /* HPPF plane 1 */
1543                         vc4_dlist_write(vc4_state, kernel);
1544                         /* VPPF plane 1 */
1545                         vc4_dlist_write(vc4_state, kernel);
1546                 }
1547         }
1548
1549         vc4_state->dlist[ctl0_offset] |=
1550                 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1551
1552         /* crtc_* are already clipped coordinates. */
1553         covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1554                         vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1555                         vc4_state->crtc_h == state->crtc->mode.vdisplay;
1556         /* Background fill might be necessary when the plane has per-pixel
1557          * alpha content or a non-opaque plane alpha and could blend from the
1558          * background or does not cover the entire screen.
1559          */
1560         vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1561                                    state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1562
1563         /* Flag the dlist as initialized to avoid checking it twice in case
1564          * the async update check already called vc4_plane_mode_set() and
1565          * decided to fallback to sync update because async update was not
1566          * possible.
1567          */
1568         vc4_state->dlist_initialized = 1;
1569
1570         vc4_plane_calc_load(state);
1571
1572         return 0;
1573 }
1574
1575 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state)
1576 {
1577         struct drm_plane_state *state = &vc4_state->base;
1578         struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
1579         u32 ret = 0;
1580
1581         if (vc4_state->is_yuv) {
1582                 enum drm_color_encoding color_encoding = state->color_encoding;
1583                 enum drm_color_range color_range = state->color_range;
1584
1585                 /* CSC pre-loaded with:
1586                  * 0 = BT601 limited range
1587                  * 1 = BT709 limited range
1588                  * 2 = BT2020 limited range
1589                  * 3 = BT601 full range
1590                  * 4 = BT709 full range
1591                  * 5 = BT2020 full range
1592                  */
1593                 if (color_encoding > DRM_COLOR_YCBCR_BT2020)
1594                         color_encoding = DRM_COLOR_YCBCR_BT601;
1595                 if (color_range > DRM_COLOR_YCBCR_FULL_RANGE)
1596                         color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1597
1598                 if (vc4->step_d0) {
1599                         ret |= SCALER6D0_CTL2_CSC_ENABLE;
1600                         ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1601                                         SCALER6D0_CTL2_BRCM_CFC_CONTROL);
1602                 } else {
1603                         ret |= SCALER6_CTL2_CSC_ENABLE;
1604                         ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1605                                         SCALER6_CTL2_BRCM_CFC_CONTROL);
1606                 }
1607         }
1608
1609         return ret;
1610 }
1611
1612 static int vc6_plane_mode_set(struct drm_plane *plane,
1613                               struct drm_plane_state *state)
1614 {
1615         struct drm_device *drm = plane->dev;
1616         struct vc4_dev *vc4 = to_vc4_dev(drm);
1617         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1618         struct drm_framebuffer *fb = state->fb;
1619         const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1620         u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1621         int num_planes = fb->format->num_planes;
1622         u32 h_subsample = fb->format->hsub;
1623         u32 v_subsample = fb->format->vsub;
1624         bool mix_plane_alpha;
1625         bool covers_screen;
1626         u32 scl0, scl1, pitch0;
1627         u32 tiling, src_x, src_y;
1628         u32 width, height;
1629         u32 hvs_format = format->hvs;
1630         u32 offsets[3] = { 0 };
1631         unsigned int rotation;
1632         int ret, i;
1633
1634         if (vc4_state->dlist_initialized)
1635                 return 0;
1636
1637         ret = vc4_plane_setup_clipping_and_scaling(state);
1638         if (ret)
1639                 return ret;
1640
1641         width = vc4_state->src_w[0] >> 16;
1642         height = vc4_state->src_h[0] >> 16;
1643
1644         if (!width || !height || !vc4_state->crtc_w || !vc4_state->crtc_h) {
1645                 /* 0 source size probably means the plane is offscreen.
1646                  * 0 destination size is a redundant plane.
1647                  */
1648                 vc4_state->dlist_initialized = 1;
1649                 return 0;
1650         }
1651
1652         /* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1653          * and 4:4:4, scl1 should be set to scl0 so both channels of
1654          * the scaler do the same thing.  For YUV, the Y plane needs
1655          * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1656          * the scl fields here.
1657          */
1658         if (num_planes == 1) {
1659                 scl0 = vc4_get_scl_field(state, 0);
1660                 scl1 = scl0;
1661         } else {
1662                 scl0 = vc4_get_scl_field(state, 1);
1663                 scl1 = vc4_get_scl_field(state, 0);
1664         }
1665
1666         rotation = drm_rotation_simplify(state->rotation,
1667                                          DRM_MODE_ROTATE_0 |
1668                                          DRM_MODE_REFLECT_X |
1669                                          DRM_MODE_REFLECT_Y);
1670
1671         /* We must point to the last line when Y reflection is enabled. */
1672         src_y = vc4_state->src_y >> 16;
1673         if (rotation & DRM_MODE_REFLECT_Y)
1674                 src_y += height - 1;
1675
1676         src_x = vc4_state->src_x >> 16;
1677
1678         switch (base_format_mod) {
1679         case DRM_FORMAT_MOD_LINEAR:
1680                 tiling = SCALER6_CTL0_ADDR_MODE_LINEAR;
1681
1682                 /* Adjust the base pointer to the first pixel to be scanned
1683                  * out.
1684                  */
1685                 for (i = 0; i < num_planes; i++) {
1686                         offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1687                         offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1688                 }
1689
1690                 break;
1691
1692         case DRM_FORMAT_MOD_BROADCOM_SAND128:
1693         case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1694                 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1695                 u32 components_per_word;
1696                 u32 starting_offset;
1697                 u32 fetch_count;
1698
1699                 if (param > SCALER_TILE_HEIGHT_MASK) {
1700                         DRM_DEBUG_KMS("SAND height too large (%d)\n",
1701                                       param);
1702                         return -EINVAL;
1703                 }
1704
1705                 if (fb->format->format == DRM_FORMAT_P030) {
1706                         hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1707                         tiling = SCALER6_CTL0_ADDR_MODE_128B;
1708                 } else {
1709                         hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE;
1710
1711                         switch (base_format_mod) {
1712                         case DRM_FORMAT_MOD_BROADCOM_SAND128:
1713                                 tiling = SCALER6_CTL0_ADDR_MODE_128B;
1714                                 break;
1715                         case DRM_FORMAT_MOD_BROADCOM_SAND256:
1716                                 tiling = SCALER6_CTL0_ADDR_MODE_256B;
1717                                 break;
1718                         default:
1719                                 return -EINVAL;
1720                         }
1721                 }
1722
1723                 /* Adjust the base pointer to the first pixel to be scanned
1724                  * out.
1725                  *
1726                  * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1727                  * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1728                  * word that should be taken as the first pixel.
1729                  * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1730                  * element within the 128bit word, eg for pixel 3 the value
1731                  * should be 6.
1732                  */
1733                 for (i = 0; i < num_planes; i++) {
1734                         u32 tile_w, tile, x_off, pix_per_tile;
1735
1736                         if (fb->format->format == DRM_FORMAT_P030) {
1737                                 /*
1738                                  * Spec says: bits [31:4] of the given address
1739                                  * should point to the 128-bit word containing
1740                                  * the desired starting pixel, and bits[3:0]
1741                                  * should be between 0 and 11, indicating which
1742                                  * of the 12-pixels in that 128-bit word is the
1743                                  * first pixel to be used
1744                                  */
1745                                 u32 remaining_pixels = src_x % 96;
1746                                 u32 aligned = remaining_pixels / 12;
1747                                 u32 last_bits = remaining_pixels % 12;
1748
1749                                 x_off = aligned * 16 + last_bits;
1750                                 tile_w = 128;
1751                                 pix_per_tile = 96;
1752                         } else {
1753                                 switch (base_format_mod) {
1754                                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1755                                         tile_w = 128;
1756                                         break;
1757                                 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1758                                         tile_w = 256;
1759                                         break;
1760                                 default:
1761                                         return -EINVAL;
1762                                 }
1763                                 pix_per_tile = tile_w / fb->format->cpp[0];
1764                                 x_off = (src_x % pix_per_tile) /
1765                                         (i ? h_subsample : 1) *
1766                                         fb->format->cpp[i];
1767                         }
1768
1769                         tile = src_x / pix_per_tile;
1770
1771                         offsets[i] += param * tile_w * tile;
1772                         offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1773                         offsets[i] += x_off & ~(i ? 1 : 0);
1774                 }
1775
1776                 components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32;
1777                 starting_offset = src_x % components_per_word;
1778                 fetch_count = (width + starting_offset + components_per_word - 1) /
1779                         components_per_word;
1780
1781                 pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) |
1782                          VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
1783                 break;
1784         }
1785
1786         default:
1787                 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1788                               (long long)fb->modifier);
1789                 return -EINVAL;
1790         }
1791
1792         /* fetch an extra pixel if we don't actually line up with the left edge. */
1793         if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1794                 width++;
1795
1796         /* same for the right side */
1797         if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1798             vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1799                 width++;
1800
1801         /* now for the top */
1802         if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1803                 height++;
1804
1805         /* and the bottom */
1806         if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1807             vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1808                 height++;
1809
1810         /* for YUV444 hardware wants double the width, otherwise it doesn't
1811          * fetch full width of chroma
1812          */
1813         if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1814                 width <<= 1;
1815
1816         /* Don't waste cycles mixing with plane alpha if the set alpha
1817          * is opaque or there is no per-pixel alpha information.
1818          * In any case we use the alpha property value as the fixed alpha.
1819          */
1820         mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1821                           fb->format->has_alpha;
1822
1823         /* Control Word 0: Scaling Configuration & Element Validity*/
1824         vc4_dlist_write(vc4_state,
1825                         SCALER6_CTL0_VALID |
1826                         VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) |
1827                         VC4_SET_FIELD(0, SCALER6_CTL0_ALPHA_MASK) |
1828                         (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) |
1829                         VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) |
1830                         VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) |
1831                         VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) |
1832                         VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT));
1833
1834         /* Position Word 0: Image Position */
1835         vc4_state->pos0_offset = vc4_state->dlist_count;
1836         vc4_dlist_write(vc4_state,
1837                         VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) |
1838                         (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) |
1839                         VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X));
1840
1841         /* Control Word 2: Alpha Value & CSC */
1842         vc4_dlist_write(vc4_state,
1843                         vc6_plane_get_csc_mode(vc4_state) |
1844                         vc4_hvs5_get_alpha_blend_mode(state) |
1845                         (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) |
1846                         VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA));
1847
1848         /* Position Word 1: Scaled Image Dimensions */
1849         if (!vc4_state->is_unity)
1850                 vc4_dlist_write(vc4_state,
1851                                 VC4_SET_FIELD(vc4_state->crtc_h - 1,
1852                                               SCALER6_POS1_SCL_LINES) |
1853                                 VC4_SET_FIELD(vc4_state->crtc_w - 1,
1854                                               SCALER6_POS1_SCL_WIDTH));
1855
1856         /* Position Word 2: Source Image Size */
1857         vc4_state->pos2_offset = vc4_state->dlist_count;
1858         vc4_dlist_write(vc4_state,
1859                         VC4_SET_FIELD(height - 1,
1860                                       SCALER6_POS2_SRC_LINES) |
1861                         VC4_SET_FIELD(width - 1,
1862                                       SCALER6_POS2_SRC_WIDTH));
1863
1864         /* Position Word 3: Context */
1865         vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1866
1867         /*
1868          * TODO: This only covers Raster Scan Order planes
1869          */
1870         for (i = 0; i < num_planes; i++) {
1871                 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1872                 dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i];
1873
1874                 /* Pointer Word 0 */
1875                 vc4_state->ptr0_offset[i] = vc4_state->dlist_count;
1876                 vc4_dlist_write(vc4_state,
1877                                 (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) |
1878                                 /*
1879                                  * The UPM buffer will be allocated in
1880                                  * vc6_plane_allocate_upm().
1881                                  */
1882                                 VC4_SET_FIELD(upper_32_bits(paddr) & 0xff,
1883                                               SCALER6_PTR0_UPPER_ADDR));
1884
1885                 /* Pointer Word 1 */
1886                 vc4_dlist_write(vc4_state, lower_32_bits(paddr));
1887
1888                 /* Pointer Word 2 */
1889                 if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
1890                     base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
1891                         vc4_dlist_write(vc4_state,
1892                                         VC4_SET_FIELD(fb->pitches[i],
1893                                                       SCALER6_PTR2_PITCH));
1894                 } else {
1895                         vc4_dlist_write(vc4_state, pitch0);
1896                 }
1897         }
1898
1899         /*
1900          * Palette Word 0
1901          * TODO: We're not using the palette mode
1902          */
1903
1904         /*
1905          * Trans Word 0
1906          * TODO: It's only relevant if we set the trans_rgb bit in the
1907          * control word 0, and we don't at the moment.
1908          */
1909
1910         vc4_state->lbm_offset = 0;
1911
1912         if (!vc4_state->is_unity || fb->format->is_yuv) {
1913                 /*
1914                  * Reserve a slot for the LBM Base Address. The real value will
1915                  * be set when calling vc4_plane_allocate_lbm().
1916                  */
1917                 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1918                     vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1919                         vc4_state->lbm_offset = vc4_state->dlist_count;
1920                         vc4_dlist_counter_increment(vc4_state);
1921                 }
1922
1923                 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
1924                     vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
1925                     vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1926                     vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1927                         if (num_planes > 1)
1928                                 /*
1929                                  * Emit Cb/Cr as channel 0 and Y as channel
1930                                  * 1. This matches how we set up scl0/scl1
1931                                  * above.
1932                                  */
1933                                 vc4_write_scaling_parameters(state, 1);
1934
1935                         vc4_write_scaling_parameters(state, 0);
1936                 }
1937
1938                 /*
1939                  * If any PPF setup was done, then all the kernel
1940                  * pointers get uploaded.
1941                  */
1942                 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1943                     vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1944                     vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1945                     vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1946                         u32 kernel =
1947                                 VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1948                                               SCALER_PPF_KERNEL_OFFSET);
1949
1950                         /* HPPF plane 0 */
1951                         vc4_dlist_write(vc4_state, kernel);
1952                         /* VPPF plane 0 */
1953                         vc4_dlist_write(vc4_state, kernel);
1954                         /* HPPF plane 1 */
1955                         vc4_dlist_write(vc4_state, kernel);
1956                         /* VPPF plane 1 */
1957                                 vc4_dlist_write(vc4_state, kernel);
1958                 }
1959         }
1960
1961         vc4_dlist_write(vc4_state, SCALER6_CTL0_END);
1962
1963         vc4_state->dlist[0] |=
1964                 VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT);
1965
1966         /* crtc_* are already clipped coordinates. */
1967         covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1968                         vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1969                         vc4_state->crtc_h == state->crtc->mode.vdisplay;
1970
1971         /*
1972          * Background fill might be necessary when the plane has per-pixel
1973          * alpha content or a non-opaque plane alpha and could blend from the
1974          * background or does not cover the entire screen.
1975          */
1976         vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1977                                    state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1978
1979         /*
1980          * Flag the dlist as initialized to avoid checking it twice in case
1981          * the async update check already called vc4_plane_mode_set() and
1982          * decided to fallback to sync update because async update was not
1983          * possible.
1984          */
1985         vc4_state->dlist_initialized = 1;
1986
1987         vc4_plane_calc_load(state);
1988
1989         drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n",
1990                        plane->base.id, plane->name, vc4_state->dlist_count);
1991
1992         return 0;
1993 }
1994
1995 /* If a modeset involves changing the setup of a plane, the atomic
1996  * infrastructure will call this to validate a proposed plane setup.
1997  * However, if a plane isn't getting updated, this (and the
1998  * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
1999  * compute the dlist here and have all active plane dlists get updated
2000  * in the CRTC's flush.
2001  */
2002 int vc4_plane_atomic_check(struct drm_plane *plane,
2003                            struct drm_atomic_state *state)
2004 {
2005         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2006         struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2007                                                                                  plane);
2008         struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
2009         int ret;
2010
2011         vc4_state->dlist_count = 0;
2012
2013         if (!plane_enabled(new_plane_state))
2014                 return 0;
2015
2016         if (vc4->gen >= VC4_GEN_6)
2017                 ret = vc6_plane_mode_set(plane, new_plane_state);
2018         else
2019                 ret = vc4_plane_mode_set(plane, new_plane_state);
2020         if (ret)
2021                 return ret;
2022
2023         if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
2024             !vc4_state->crtc_w || !vc4_state->crtc_h)
2025                 return 0;
2026
2027         ret = vc4_plane_allocate_lbm(new_plane_state);
2028         if (ret)
2029                 return ret;
2030
2031         if (vc4->gen >= VC4_GEN_6) {
2032                 ret = vc6_plane_allocate_upm(new_plane_state);
2033                 if (ret)
2034                         return ret;
2035         }
2036
2037         return 0;
2038 }
2039
2040 static void vc4_plane_atomic_update(struct drm_plane *plane,
2041                                     struct drm_atomic_state *state)
2042 {
2043         /* No contents here.  Since we don't know where in the CRTC's
2044          * dlist we should be stored, our dlist is uploaded to the
2045          * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
2046          * time.
2047          */
2048 }
2049
2050 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
2051 {
2052         struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2053         int i;
2054         int idx;
2055
2056         if (!drm_dev_enter(plane->dev, &idx))
2057                 goto out;
2058
2059         vc4_state->hw_dlist = dlist;
2060
2061         /* Can't memcpy_toio() because it needs to be 32-bit writes. */
2062         for (i = 0; i < vc4_state->dlist_count; i++)
2063                 writel(vc4_state->dlist[i], &dlist[i]);
2064
2065         drm_dev_exit(idx);
2066
2067 out:
2068         return vc4_state->dlist_count;
2069 }
2070
2071 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
2072 {
2073         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
2074
2075         return vc4_state->dlist_count;
2076 }
2077
2078 /* Updates the plane to immediately (well, once the FIFO needs
2079  * refilling) scan out from at a new framebuffer.
2080  */
2081 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
2082 {
2083         struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2084         struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0);
2085         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2086         dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0];
2087         int idx;
2088
2089         if (!drm_dev_enter(plane->dev, &idx))
2090                 return;
2091
2092         /* We're skipping the address adjustment for negative origin,
2093          * because this is only called on the primary plane.
2094          */
2095         WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
2096
2097         if (vc4->gen == VC4_GEN_6) {
2098                 u32 value;
2099
2100                 value = vc4_state->dlist[vc4_state->ptr0_offset[0]] &
2101                                         ~SCALER6_PTR0_UPPER_ADDR_MASK;
2102                 value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff,
2103                                        SCALER6_PTR0_UPPER_ADDR);
2104
2105                 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2106                 vc4_state->dlist[vc4_state->ptr0_offset[0]] = value;
2107
2108                 value = lower_32_bits(dma_addr);
2109                 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]);
2110                 vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value;
2111         } else {
2112                 u32 addr;
2113
2114                 addr = (u32)dma_addr;
2115
2116                 /* Write the new address into the hardware immediately.  The
2117                  * scanout will start from this address as soon as the FIFO
2118                  * needs to refill with pixels.
2119                  */
2120                 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2121
2122                 /* Also update the CPU-side dlist copy, so that any later
2123                  * atomic updates that don't do a new modeset on our plane
2124                  * also use our updated address.
2125                  */
2126                 vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr;
2127         }
2128
2129         drm_dev_exit(idx);
2130 }
2131
2132 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
2133                                           struct drm_atomic_state *state)
2134 {
2135         struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2136                                                                                  plane);
2137         struct vc4_plane_state *vc4_state, *new_vc4_state;
2138         int idx;
2139
2140         if (!drm_dev_enter(plane->dev, &idx))
2141                 return;
2142
2143         swap(plane->state->fb, new_plane_state->fb);
2144         plane->state->crtc_x = new_plane_state->crtc_x;
2145         plane->state->crtc_y = new_plane_state->crtc_y;
2146         plane->state->crtc_w = new_plane_state->crtc_w;
2147         plane->state->crtc_h = new_plane_state->crtc_h;
2148         plane->state->src_x = new_plane_state->src_x;
2149         plane->state->src_y = new_plane_state->src_y;
2150         plane->state->src_w = new_plane_state->src_w;
2151         plane->state->src_h = new_plane_state->src_h;
2152         plane->state->alpha = new_plane_state->alpha;
2153         plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode;
2154         plane->state->rotation = new_plane_state->rotation;
2155         plane->state->zpos = new_plane_state->zpos;
2156         plane->state->normalized_zpos = new_plane_state->normalized_zpos;
2157         plane->state->color_encoding = new_plane_state->color_encoding;
2158         plane->state->color_range = new_plane_state->color_range;
2159         plane->state->src = new_plane_state->src;
2160         plane->state->dst = new_plane_state->dst;
2161         plane->state->visible = new_plane_state->visible;
2162
2163         new_vc4_state = to_vc4_plane_state(new_plane_state);
2164         vc4_state = to_vc4_plane_state(plane->state);
2165
2166         vc4_state->crtc_x = new_vc4_state->crtc_x;
2167         vc4_state->crtc_y = new_vc4_state->crtc_y;
2168         vc4_state->crtc_h = new_vc4_state->crtc_h;
2169         vc4_state->crtc_w = new_vc4_state->crtc_w;
2170         vc4_state->src_x = new_vc4_state->src_x;
2171         vc4_state->src_y = new_vc4_state->src_y;
2172         memcpy(vc4_state->src_w, new_vc4_state->src_w,
2173                sizeof(vc4_state->src_w));
2174         memcpy(vc4_state->src_h, new_vc4_state->src_h,
2175                sizeof(vc4_state->src_h));
2176         memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
2177                sizeof(vc4_state->x_scaling));
2178         memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
2179                sizeof(vc4_state->y_scaling));
2180         vc4_state->is_unity = new_vc4_state->is_unity;
2181         vc4_state->is_yuv = new_vc4_state->is_yuv;
2182         vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
2183
2184         /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
2185         vc4_state->dlist[vc4_state->pos0_offset] =
2186                 new_vc4_state->dlist[vc4_state->pos0_offset];
2187         vc4_state->dlist[vc4_state->pos2_offset] =
2188                 new_vc4_state->dlist[vc4_state->pos2_offset];
2189         vc4_state->dlist[vc4_state->ptr0_offset[0]] =
2190                 new_vc4_state->dlist[vc4_state->ptr0_offset[0]];
2191
2192         /* Note that we can't just call vc4_plane_write_dlist()
2193          * because that would smash the context data that the HVS is
2194          * currently using.
2195          */
2196         writel(vc4_state->dlist[vc4_state->pos0_offset],
2197                &vc4_state->hw_dlist[vc4_state->pos0_offset]);
2198         writel(vc4_state->dlist[vc4_state->pos2_offset],
2199                &vc4_state->hw_dlist[vc4_state->pos2_offset]);
2200         writel(vc4_state->dlist[vc4_state->ptr0_offset[0]],
2201                &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2202
2203         drm_dev_exit(idx);
2204 }
2205
2206 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
2207                                         struct drm_atomic_state *state)
2208 {
2209         struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2210                                                                                  plane);
2211         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2212         struct vc4_plane_state *old_vc4_state, *new_vc4_state;
2213         int ret;
2214         u32 i;
2215
2216         if (vc4->gen >= VC4_GEN_6)
2217                 ret = vc6_plane_mode_set(plane, new_plane_state);
2218         else
2219                 ret = vc4_plane_mode_set(plane, new_plane_state);
2220         if (ret)
2221                 return ret;
2222
2223         old_vc4_state = to_vc4_plane_state(plane->state);
2224         new_vc4_state = to_vc4_plane_state(new_plane_state);
2225
2226         if (!new_vc4_state->hw_dlist)
2227                 return -EINVAL;
2228
2229         if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
2230             old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
2231             old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
2232             old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] ||
2233             vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state))
2234                 return -EINVAL;
2235
2236         /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
2237          * if anything else has changed, fallback to a sync update.
2238          */
2239         for (i = 0; i < new_vc4_state->dlist_count; i++) {
2240                 if (i == new_vc4_state->pos0_offset ||
2241                     i == new_vc4_state->pos2_offset ||
2242                     i == new_vc4_state->ptr0_offset[0] ||
2243                     (new_vc4_state->lbm_offset &&
2244                      i == new_vc4_state->lbm_offset))
2245                         continue;
2246
2247                 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
2248                         return -EINVAL;
2249         }
2250
2251         return 0;
2252 }
2253
2254 static int vc4_prepare_fb(struct drm_plane *plane,
2255                           struct drm_plane_state *state)
2256 {
2257         struct vc4_bo *bo;
2258
2259         if (!state->fb)
2260                 return 0;
2261
2262         bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2263
2264         drm_gem_plane_helper_prepare_fb(plane, state);
2265
2266         return vc4_bo_inc_usecnt(bo);
2267 }
2268
2269 static void vc4_cleanup_fb(struct drm_plane *plane,
2270                            struct drm_plane_state *state)
2271 {
2272         struct vc4_bo *bo;
2273
2274         if (!state->fb)
2275                 return;
2276
2277         bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2278         vc4_bo_dec_usecnt(bo);
2279 }
2280
2281 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
2282         .atomic_check = vc4_plane_atomic_check,
2283         .atomic_update = vc4_plane_atomic_update,
2284         .prepare_fb = vc4_prepare_fb,
2285         .cleanup_fb = vc4_cleanup_fb,
2286         .atomic_async_check = vc4_plane_atomic_async_check,
2287         .atomic_async_update = vc4_plane_atomic_async_update,
2288 };
2289
2290 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = {
2291         .atomic_check = vc4_plane_atomic_check,
2292         .atomic_update = vc4_plane_atomic_update,
2293         .atomic_async_check = vc4_plane_atomic_async_check,
2294         .atomic_async_update = vc4_plane_atomic_async_update,
2295 };
2296
2297 static bool vc4_format_mod_supported(struct drm_plane *plane,
2298                                      uint32_t format,
2299                                      uint64_t modifier)
2300 {
2301         /* Support T_TILING for RGB formats only. */
2302         switch (format) {
2303         case DRM_FORMAT_XRGB8888:
2304         case DRM_FORMAT_ARGB8888:
2305         case DRM_FORMAT_ABGR8888:
2306         case DRM_FORMAT_XBGR8888:
2307         case DRM_FORMAT_RGB565:
2308         case DRM_FORMAT_BGR565:
2309         case DRM_FORMAT_ARGB1555:
2310         case DRM_FORMAT_XRGB1555:
2311                 switch (fourcc_mod_broadcom_mod(modifier)) {
2312                 case DRM_FORMAT_MOD_LINEAR:
2313                 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
2314                         return true;
2315                 default:
2316                         return false;
2317                 }
2318         case DRM_FORMAT_NV12:
2319         case DRM_FORMAT_NV21:
2320                 switch (fourcc_mod_broadcom_mod(modifier)) {
2321                 case DRM_FORMAT_MOD_LINEAR:
2322                 case DRM_FORMAT_MOD_BROADCOM_SAND64:
2323                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
2324                 case DRM_FORMAT_MOD_BROADCOM_SAND256:
2325                         return true;
2326                 default:
2327                         return false;
2328                 }
2329         case DRM_FORMAT_P030:
2330                 switch (fourcc_mod_broadcom_mod(modifier)) {
2331                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
2332                         return true;
2333                 default:
2334                         return false;
2335                 }
2336         case DRM_FORMAT_RGBX1010102:
2337         case DRM_FORMAT_BGRX1010102:
2338         case DRM_FORMAT_RGBA1010102:
2339         case DRM_FORMAT_BGRA1010102:
2340         case DRM_FORMAT_XRGB4444:
2341         case DRM_FORMAT_ARGB4444:
2342         case DRM_FORMAT_XBGR4444:
2343         case DRM_FORMAT_ABGR4444:
2344         case DRM_FORMAT_RGBX4444:
2345         case DRM_FORMAT_RGBA4444:
2346         case DRM_FORMAT_BGRX4444:
2347         case DRM_FORMAT_BGRA4444:
2348         case DRM_FORMAT_RGB332:
2349         case DRM_FORMAT_BGR233:
2350         case DRM_FORMAT_YUV422:
2351         case DRM_FORMAT_YVU422:
2352         case DRM_FORMAT_YUV420:
2353         case DRM_FORMAT_YVU420:
2354         case DRM_FORMAT_NV16:
2355         case DRM_FORMAT_NV61:
2356         default:
2357                 return (modifier == DRM_FORMAT_MOD_LINEAR);
2358         }
2359 }
2360
2361 static const struct drm_plane_funcs vc4_plane_funcs = {
2362         .update_plane = drm_atomic_helper_update_plane,
2363         .disable_plane = drm_atomic_helper_disable_plane,
2364         .reset = vc4_plane_reset,
2365         .atomic_duplicate_state = vc4_plane_duplicate_state,
2366         .atomic_destroy_state = vc4_plane_destroy_state,
2367         .format_mod_supported = vc4_format_mod_supported,
2368 };
2369
2370 struct drm_plane *vc4_plane_init(struct drm_device *dev,
2371                                  enum drm_plane_type type,
2372                                  uint32_t possible_crtcs)
2373 {
2374         struct vc4_dev *vc4 = to_vc4_dev(dev);
2375         struct drm_plane *plane;
2376         struct vc4_plane *vc4_plane;
2377         u32 formats[ARRAY_SIZE(hvs_formats)];
2378         int num_formats = 0;
2379         unsigned i;
2380         static const uint64_t modifiers[] = {
2381                 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
2382                 DRM_FORMAT_MOD_BROADCOM_SAND128,
2383                 DRM_FORMAT_MOD_BROADCOM_SAND64,
2384                 DRM_FORMAT_MOD_BROADCOM_SAND256,
2385                 DRM_FORMAT_MOD_LINEAR,
2386                 DRM_FORMAT_MOD_INVALID
2387         };
2388
2389         for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
2390                 if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) {
2391                         formats[num_formats] = hvs_formats[i].drm;
2392                         num_formats++;
2393                 }
2394         }
2395
2396         vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base,
2397                                                possible_crtcs,
2398                                                &vc4_plane_funcs,
2399                                                formats, num_formats,
2400                                                modifiers, type, NULL);
2401         if (IS_ERR(vc4_plane))
2402                 return ERR_CAST(vc4_plane);
2403         plane = &vc4_plane->base;
2404
2405         if (vc4->gen >= VC4_GEN_5)
2406                 drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
2407         else
2408                 drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
2409
2410         drm_plane_create_alpha_property(plane);
2411         drm_plane_create_blend_mode_property(plane,
2412                                              BIT(DRM_MODE_BLEND_PIXEL_NONE) |
2413                                              BIT(DRM_MODE_BLEND_PREMULTI) |
2414                                              BIT(DRM_MODE_BLEND_COVERAGE));
2415         drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
2416                                            DRM_MODE_ROTATE_0 |
2417                                            DRM_MODE_ROTATE_180 |
2418                                            DRM_MODE_REFLECT_X |
2419                                            DRM_MODE_REFLECT_Y);
2420
2421         drm_plane_create_color_properties(plane,
2422                                           BIT(DRM_COLOR_YCBCR_BT601) |
2423                                           BIT(DRM_COLOR_YCBCR_BT709) |
2424                                           BIT(DRM_COLOR_YCBCR_BT2020),
2425                                           BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
2426                                           BIT(DRM_COLOR_YCBCR_FULL_RANGE),
2427                                           DRM_COLOR_YCBCR_BT709,
2428                                           DRM_COLOR_YCBCR_LIMITED_RANGE);
2429
2430         drm_plane_create_chroma_siting_properties(plane, 0, 0);
2431
2432         if (type == DRM_PLANE_TYPE_PRIMARY)
2433                 drm_plane_create_zpos_immutable_property(plane, 0);
2434
2435         return plane;
2436 }
2437
2438 #define VC4_NUM_OVERLAY_PLANES  16
2439
2440 int vc4_plane_create_additional_planes(struct drm_device *drm)
2441 {
2442         struct drm_plane *cursor_plane;
2443         struct drm_crtc *crtc;
2444         unsigned int i;
2445
2446         /* Set up some arbitrary number of planes.  We're not limited
2447          * by a set number of physical registers, just the space in
2448          * the HVS (16k) and how small an plane can be (28 bytes).
2449          * However, each plane we set up takes up some memory, and
2450          * increases the cost of looping over planes, which atomic
2451          * modesetting does quite a bit.  As a result, we pick a
2452          * modest number of planes to expose, that should hopefully
2453          * still cover any sane usecase.
2454          */
2455         for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) {
2456                 struct drm_plane *plane =
2457                         vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY,
2458                                        GENMASK(drm->mode_config.num_crtc - 1, 0));
2459
2460                 if (IS_ERR(plane))
2461                         continue;
2462
2463                 /* Create zpos property. Max of all the overlays + 1 primary +
2464                  * 1 cursor plane on a crtc.
2465                  */
2466                 drm_plane_create_zpos_property(plane, i + 1, 1,
2467                                                VC4_NUM_OVERLAY_PLANES + 1);
2468         }
2469
2470         drm_for_each_crtc(crtc, drm) {
2471                 /* Set up the legacy cursor after overlay initialization,
2472                  * since the zpos fallback is that planes are rendered by plane
2473                  * ID order, and that then puts the cursor on top.
2474                  */
2475                 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR,
2476                                               drm_crtc_mask(crtc));
2477                 if (!IS_ERR(cursor_plane)) {
2478                         crtc->cursor = cursor_plane;
2479
2480                         drm_plane_create_zpos_property(cursor_plane,
2481                                                        VC4_NUM_OVERLAY_PLANES + 1,
2482                                                        1,
2483                                                        VC4_NUM_OVERLAY_PLANES + 1);
2484                 }
2485         }
2486
2487         return 0;
2488 }