drm/vc4: Assign LBM memory during atomic_flush.
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / vc4 / vc4_plane.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5
6 /**
7  * DOC: VC4 plane module
8  *
9  * Each DRM plane is a layer of pixels being scanned out by the HVS.
10  *
11  * At atomic modeset check time, we compute the HVS display element
12  * state that would be necessary for displaying the plane (giving us a
13  * chance to figure out if a plane configuration is invalid), then at
14  * atomic flush time the CRTC will ask us to write our element state
15  * into the region of the HVS that it has allocated for us.
16  */
17
18 #include <drm/drm_atomic.h>
19 #include <drm/drm_atomic_helper.h>
20 #include <drm/drm_atomic_uapi.h>
21 #include <drm/drm_blend.h>
22 #include <drm/drm_drv.h>
23 #include <drm/drm_fb_dma_helper.h>
24 #include <drm/drm_fourcc.h>
25 #include <drm/drm_framebuffer.h>
26 #include <drm/drm_gem_atomic_helper.h>
27
28 #include "uapi/drm/vc4_drm.h"
29
30 #include "vc4_drv.h"
31 #include "vc4_regs.h"
32
33 static const struct hvs_format {
34         u32 drm; /* DRM_FORMAT_* */
35         u32 hvs; /* HVS_FORMAT_* */
36         u32 pixel_order;
37         u32 pixel_order_hvs5;
38         bool hvs5_only;
39 } hvs_formats[] = {
40         {
41                 .drm = DRM_FORMAT_XRGB8888,
42                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
43                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
44                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
45         },
46         {
47                 .drm = DRM_FORMAT_ARGB8888,
48                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
49                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
50                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
51         },
52         {
53                 .drm = DRM_FORMAT_ABGR8888,
54                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
55                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
56                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
57         },
58         {
59                 .drm = DRM_FORMAT_XBGR8888,
60                 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
61                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
62                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
63         },
64         {
65                 .drm = DRM_FORMAT_RGB565,
66                 .hvs = HVS_PIXEL_FORMAT_RGB565,
67                 .pixel_order = HVS_PIXEL_ORDER_XRGB,
68                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
69         },
70         {
71                 .drm = DRM_FORMAT_BGR565,
72                 .hvs = HVS_PIXEL_FORMAT_RGB565,
73                 .pixel_order = HVS_PIXEL_ORDER_XBGR,
74                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
75         },
76         {
77                 .drm = DRM_FORMAT_ARGB1555,
78                 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
79                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
80                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
81         },
82         {
83                 .drm = DRM_FORMAT_XRGB1555,
84                 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
85                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
86                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
87         },
88         {
89                 .drm = DRM_FORMAT_RGB888,
90                 .hvs = HVS_PIXEL_FORMAT_RGB888,
91                 .pixel_order = HVS_PIXEL_ORDER_XRGB,
92                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
93         },
94         {
95                 .drm = DRM_FORMAT_BGR888,
96                 .hvs = HVS_PIXEL_FORMAT_RGB888,
97                 .pixel_order = HVS_PIXEL_ORDER_XBGR,
98                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
99         },
100         {
101                 .drm = DRM_FORMAT_YUV422,
102                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
103                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
104                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
105         },
106         {
107                 .drm = DRM_FORMAT_YVU422,
108                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
109                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
110                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
111         },
112         {
113                 .drm = DRM_FORMAT_YUV444,
114                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
115                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
116                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
117         },
118         {
119                 .drm = DRM_FORMAT_YVU444,
120                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
121                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
122                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
123         },
124         {
125                 .drm = DRM_FORMAT_YUV420,
126                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
127                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
128                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
129         },
130         {
131                 .drm = DRM_FORMAT_YVU420,
132                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
133                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
134                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
135         },
136         {
137                 .drm = DRM_FORMAT_NV12,
138                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
139                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
140                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
141         },
142         {
143                 .drm = DRM_FORMAT_NV21,
144                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
145                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
146                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
147         },
148         {
149                 .drm = DRM_FORMAT_NV16,
150                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
151                 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
152                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
153         },
154         {
155                 .drm = DRM_FORMAT_NV61,
156                 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
157                 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
158                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
159         },
160         {
161                 .drm = DRM_FORMAT_P030,
162                 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT,
163                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
164                 .hvs5_only = true,
165         },
166         {
167                 .drm = DRM_FORMAT_XRGB2101010,
168                 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
169                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
170                 .hvs5_only = true,
171         },
172         {
173                 .drm = DRM_FORMAT_ARGB2101010,
174                 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
175                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
176                 .hvs5_only = true,
177         },
178         {
179                 .drm = DRM_FORMAT_ABGR2101010,
180                 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
181                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
182                 .hvs5_only = true,
183         },
184         {
185                 .drm = DRM_FORMAT_XBGR2101010,
186                 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
187                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
188                 .hvs5_only = true,
189         },
190         {
191                 .drm = DRM_FORMAT_RGB332,
192                 .hvs = HVS_PIXEL_FORMAT_RGB332,
193                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
194                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
195         },
196         {
197                 .drm = DRM_FORMAT_BGR233,
198                 .hvs = HVS_PIXEL_FORMAT_RGB332,
199                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
200                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
201         },
202         {
203                 .drm = DRM_FORMAT_XRGB4444,
204                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
205                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
206                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
207         },
208         {
209                 .drm = DRM_FORMAT_ARGB4444,
210                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
211                 .pixel_order = HVS_PIXEL_ORDER_ABGR,
212                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
213         },
214         {
215                 .drm = DRM_FORMAT_XBGR4444,
216                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
217                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
218                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
219         },
220         {
221                 .drm = DRM_FORMAT_ABGR4444,
222                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
223                 .pixel_order = HVS_PIXEL_ORDER_ARGB,
224                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
225         },
226         {
227                 .drm = DRM_FORMAT_BGRX4444,
228                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
229                 .pixel_order = HVS_PIXEL_ORDER_RGBA,
230                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
231         },
232         {
233                 .drm = DRM_FORMAT_BGRA4444,
234                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
235                 .pixel_order = HVS_PIXEL_ORDER_RGBA,
236                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
237         },
238         {
239                 .drm = DRM_FORMAT_RGBX4444,
240                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
241                 .pixel_order = HVS_PIXEL_ORDER_BGRA,
242                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
243         },
244         {
245                 .drm = DRM_FORMAT_RGBA4444,
246                 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
247                 .pixel_order = HVS_PIXEL_ORDER_BGRA,
248                 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
249         },
250 };
251
252 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
253 {
254         unsigned i;
255
256         for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
257                 if (hvs_formats[i].drm == drm_format)
258                         return &hvs_formats[i];
259         }
260
261         return NULL;
262 }
263
264 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
265 {
266         if (dst == src >> 16)
267                 return VC4_SCALING_NONE;
268         if (3 * dst >= 2 * (src >> 16))
269                 return VC4_SCALING_PPF;
270         else
271                 return VC4_SCALING_TPZ;
272 }
273
274 static bool plane_enabled(struct drm_plane_state *state)
275 {
276         return state->fb && !WARN_ON(!state->crtc);
277 }
278
279 struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
280 {
281         struct vc4_plane_state *vc4_state;
282         unsigned int i;
283
284         if (WARN_ON(!plane->state))
285                 return NULL;
286
287         vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
288         if (!vc4_state)
289                 return NULL;
290
291         memset(&vc4_state->upm, 0, sizeof(vc4_state->upm));
292
293         for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++)
294                 vc4_state->upm_handle[i] = 0;
295
296         vc4_state->dlist_initialized = 0;
297
298         __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
299
300         if (vc4_state->dlist) {
301                 vc4_state->dlist = kmemdup(vc4_state->dlist,
302                                            vc4_state->dlist_count * 4,
303                                            GFP_KERNEL);
304                 if (!vc4_state->dlist) {
305                         kfree(vc4_state);
306                         return NULL;
307                 }
308                 vc4_state->dlist_size = vc4_state->dlist_count;
309         }
310
311         return &vc4_state->base;
312 }
313
314 void vc4_plane_destroy_state(struct drm_plane *plane,
315                              struct drm_plane_state *state)
316 {
317         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
318         struct vc4_hvs *hvs = vc4->hvs;
319         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
320         unsigned int i;
321
322         for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
323                 unsigned long irqflags;
324
325                 if (!drm_mm_node_allocated(&vc4_state->upm[i]))
326                         continue;
327
328                 spin_lock_irqsave(&hvs->mm_lock, irqflags);
329                 drm_mm_remove_node(&vc4_state->upm[i]);
330                 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
331
332                 if (vc4_state->upm_handle[i] > 0)
333                         ida_free(&hvs->upm_handles, vc4_state->upm_handle[i]);
334         }
335
336         kfree(vc4_state->dlist);
337         __drm_atomic_helper_plane_destroy_state(&vc4_state->base);
338         kfree(state);
339 }
340
341 /* Called during init to allocate the plane's atomic state. */
342 void vc4_plane_reset(struct drm_plane *plane)
343 {
344         struct vc4_plane_state *vc4_state;
345
346         WARN_ON(plane->state);
347
348         vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
349         if (!vc4_state)
350                 return;
351
352         __drm_atomic_helper_plane_reset(plane, &vc4_state->base);
353 }
354
355 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
356 {
357         if (vc4_state->dlist_count == vc4_state->dlist_size) {
358                 u32 new_size = max(4u, vc4_state->dlist_count * 2);
359                 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
360
361                 if (!new_dlist)
362                         return;
363                 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
364
365                 kfree(vc4_state->dlist);
366                 vc4_state->dlist = new_dlist;
367                 vc4_state->dlist_size = new_size;
368         }
369
370         vc4_state->dlist_count++;
371 }
372
373 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
374 {
375         unsigned int idx = vc4_state->dlist_count;
376
377         vc4_dlist_counter_increment(vc4_state);
378         vc4_state->dlist[idx] = val;
379 }
380
381 /* Returns the scl0/scl1 field based on whether the dimensions need to
382  * be up/down/non-scaled.
383  *
384  * This is a replication of a table from the spec.
385  */
386 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
387 {
388         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
389
390         switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
391         case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
392                 return SCALER_CTL0_SCL_H_PPF_V_PPF;
393         case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
394                 return SCALER_CTL0_SCL_H_TPZ_V_PPF;
395         case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
396                 return SCALER_CTL0_SCL_H_PPF_V_TPZ;
397         case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
398                 return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
399         case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
400                 return SCALER_CTL0_SCL_H_PPF_V_NONE;
401         case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
402                 return SCALER_CTL0_SCL_H_NONE_V_PPF;
403         case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
404                 return SCALER_CTL0_SCL_H_NONE_V_TPZ;
405         case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
406                 return SCALER_CTL0_SCL_H_TPZ_V_NONE;
407         default:
408         case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
409                 /* The unity case is independently handled by
410                  * SCALER_CTL0_UNITY.
411                  */
412                 return 0;
413         }
414 }
415
416 static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
417 {
418         struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
419         unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
420         struct drm_crtc_state *crtc_state;
421
422         crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
423                                                    pstate->crtc);
424
425         vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
426         if (!left && !right && !top && !bottom)
427                 return 0;
428
429         if (left + right >= crtc_state->mode.hdisplay ||
430             top + bottom >= crtc_state->mode.vdisplay)
431                 return -EINVAL;
432
433         adjhdisplay = crtc_state->mode.hdisplay - (left + right);
434         vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
435                                                adjhdisplay,
436                                                crtc_state->mode.hdisplay);
437         vc4_pstate->crtc_x += left;
438         if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
439                 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
440
441         adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
442         vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
443                                                adjvdisplay,
444                                                crtc_state->mode.vdisplay);
445         vc4_pstate->crtc_y += top;
446         if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
447                 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
448
449         vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
450                                                adjhdisplay,
451                                                crtc_state->mode.hdisplay);
452         vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
453                                                adjvdisplay,
454                                                crtc_state->mode.vdisplay);
455
456         if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
457                 return -EINVAL;
458
459         return 0;
460 }
461
462 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
463 {
464         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
465         struct drm_framebuffer *fb = state->fb;
466         int num_planes = fb->format->num_planes;
467         struct drm_crtc_state *crtc_state;
468         u32 h_subsample = fb->format->hsub;
469         u32 v_subsample = fb->format->vsub;
470         int ret;
471
472         crtc_state = drm_atomic_get_existing_crtc_state(state->state,
473                                                         state->crtc);
474         if (!crtc_state) {
475                 DRM_DEBUG_KMS("Invalid crtc state\n");
476                 return -EINVAL;
477         }
478
479         ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
480                                                   INT_MAX, true, true);
481         if (ret)
482                 return ret;
483
484         vc4_state->src_x = state->src.x1;
485         vc4_state->src_y = state->src.y1;
486         vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
487         vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;
488
489         vc4_state->crtc_x = state->dst.x1;
490         vc4_state->crtc_y = state->dst.y1;
491         vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
492         vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
493
494         if (!vc4_state->crtc_w)
495                 vc4_state->crtc_w = state->crtc->mode.hdisplay;
496         if (!vc4_state->crtc_h)
497                 vc4_state->crtc_h = state->crtc->mode.vdisplay;
498
499         ret = vc4_plane_margins_adj(state);
500         if (ret)
501                 return ret;
502
503         vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
504                                                        vc4_state->crtc_w);
505         vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
506                                                        vc4_state->crtc_h);
507
508         vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
509                                vc4_state->y_scaling[0] == VC4_SCALING_NONE);
510
511         if (num_planes > 1) {
512                 vc4_state->is_yuv = true;
513
514                 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
515                 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
516
517                 vc4_state->x_scaling[1] =
518                         vc4_get_scaling_mode(vc4_state->src_w[1],
519                                              vc4_state->crtc_w);
520                 vc4_state->y_scaling[1] =
521                         vc4_get_scaling_mode(vc4_state->src_h[1],
522                                              vc4_state->crtc_h);
523
524                 /* YUV conversion requires that horizontal scaling be enabled
525                  * on the UV plane even if vc4_get_scaling_mode() returned
526                  * VC4_SCALING_NONE (which can happen when the down-scaling
527                  * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
528                  * case.
529                  */
530                 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
531                         vc4_state->x_scaling[1] = VC4_SCALING_PPF;
532
533                 /* Similarly UV needs vertical scaling to be enabled.
534                  * Without this a 1:1 scaled YUV422 plane isn't rendered.
535                  */
536                 if (vc4_state->y_scaling[1] == VC4_SCALING_NONE)
537                         vc4_state->y_scaling[1] = VC4_SCALING_PPF;
538         } else {
539                 vc4_state->is_yuv = false;
540                 vc4_state->x_scaling[1] = VC4_SCALING_NONE;
541                 vc4_state->y_scaling[1] = VC4_SCALING_NONE;
542         }
543
544         return 0;
545 }
546
547 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
548 {
549         struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
550         u32 scale, recip;
551
552         WARN_ON_ONCE(vc4->gen > VC4_GEN_6);
553
554         scale = src / dst;
555
556         /* The specs note that while the reciprocal would be defined
557          * as (1<<32)/scale, ~0 is close enough.
558          */
559         recip = ~0 / scale;
560
561         vc4_dlist_write(vc4_state,
562                         /*
563                          * The BCM2712 is lacking BIT(31) compared to
564                          * the previous generations, but we don't use
565                          * it.
566                          */
567                         VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
568                         VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
569         vc4_dlist_write(vc4_state,
570                         VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
571 }
572
573 /* phase magnitude bits */
574 #define PHASE_BITS 6
575
576 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, u32 xy, int channel, int chroma_offset)
577 {
578         struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
579         u32 scale = src / dst;
580         s32 offset, offset2;
581         s32 phase;
582
583         WARN_ON_ONCE(vc4->gen > VC4_GEN_6);
584
585         /* Start the phase at 1/2 pixel from the 1st pixel at src_x.
586            1/4 pixel for YUV, plus the offset for chroma siting */
587         if (channel) {
588                 /* the phase is relative to scale_src->x, so shift it for display list's x value */
589                 offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1;
590                 offset -= chroma_offset >> (17 - PHASE_BITS);
591                 offset += -(1 << PHASE_BITS >> 2);
592         } else {
593                 /* the phase is relative to scale_src->x, so shift it for display list's x value */
594                 offset = (xy & 0xffff) >> (16 - PHASE_BITS);
595                 offset += -(1 << PHASE_BITS >> 1);
596
597                 /* This is a kludge to make sure the scaling factors are consitent with YUV's luma scaling.
598                    we lose 1bit precision because of this. */
599                 scale &= ~1;
600         }
601
602         /* There may be a also small error introduced by precision of scale.
603            Add half of that as a compromise */
604         offset2 = src - dst * scale;
605         offset2 >>= 16 - PHASE_BITS;
606         phase = offset + (offset2 >> 1);
607
608         /* Ensure +ve values don't touch the sign bit, then truncate negative values */
609         if (phase >= 1 << PHASE_BITS)
610                 phase = (1 << PHASE_BITS) - 1;
611
612         phase &= SCALER_PPF_IPHASE_MASK;
613
614         vc4_dlist_write(vc4_state,
615                         SCALER_PPF_AGC |
616                         VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
617                         /*
618                          * The register layout documentation is slightly
619                          * different to setup the phase in the BCM2712,
620                          * but they seem equivalent.
621                          */
622                         VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
623 }
624
625 static u32 __vc4_lbm_size(struct drm_plane_state *state)
626 {
627         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
628         struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
629         u32 pix_per_line;
630         u32 lbm;
631
632         /* LBM is not needed when there's no vertical scaling. */
633         if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
634             vc4_state->y_scaling[1] == VC4_SCALING_NONE)
635                 return 0;
636
637         /*
638          * This can be further optimized in the RGB/YUV444 case if the PPF
639          * decimation factor is between 0.5 and 1.0 by using crtc_w.
640          *
641          * It's not an issue though, since in that case since src_w[0] is going
642          * to be greater than or equal to crtc_w.
643          */
644         if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
645                 pix_per_line = vc4_state->crtc_w;
646         else
647                 pix_per_line = vc4_state->src_w[0] >> 16;
648
649         if (!vc4_state->is_yuv) {
650                 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
651                         lbm = pix_per_line * 8;
652                 else {
653                         /* In special cases, this multiplier might be 12. */
654                         lbm = pix_per_line * 16;
655                 }
656         } else {
657                 /* There are cases for this going down to a multiplier
658                  * of 2, but according to the firmware source, the
659                  * table in the docs is somewhat wrong.
660                  */
661                 lbm = pix_per_line * 16;
662         }
663
664         /* Align it to 64 or 128 (hvs5) bytes */
665         lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64);
666
667         /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
668         lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2;
669
670         return lbm;
671 }
672
673 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state,
674                                                 unsigned int channel)
675 {
676         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
677
678         switch (vc4_state->y_scaling[channel]) {
679         case VC4_SCALING_PPF:
680                 return 4;
681
682         case VC4_SCALING_TPZ:
683                 return 2;
684
685         default:
686                 return 0;
687         }
688 }
689
690 static unsigned int vc4_lbm_components(const struct drm_plane_state *state,
691                                        unsigned int channel)
692 {
693         const struct drm_format_info *info = state->fb->format;
694         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
695
696         if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE)
697                 return 0;
698
699         if (info->is_yuv)
700                 return channel ? 2 : 1;
701
702         if (info->has_alpha)
703                 return 4;
704
705         return 3;
706 }
707
708 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state,
709                                          unsigned int channel)
710 {
711         const struct drm_format_info *info = state->fb->format;
712         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
713         unsigned int channels_scaled = 0;
714         unsigned int components, words, wpc;
715         unsigned int width, lines;
716         unsigned int i;
717
718         /* LBM is meant to use the smaller of source or dest width, but there
719          * is a issue with UV scaling that the size required for the second
720          * channel is based on the source width only.
721          */
722         if (info->hsub > 1 && channel == 1)
723                 width = state->src_w >> 16;
724         else
725                 width = min(state->src_w >> 16, state->crtc_w);
726         width = round_up(width / info->hsub, 4);
727
728         wpc = vc4_lbm_words_per_component(state, channel);
729         if (!wpc)
730                 return 0;
731
732         components = vc4_lbm_components(state, channel);
733         if (!components)
734                 return 0;
735
736         if (state->alpha != DRM_BLEND_ALPHA_OPAQUE)
737                 components -= 1;
738
739         words = width * wpc * components;
740
741         lines = DIV_ROUND_UP(words, 128 / info->hsub);
742
743         for (i = 0; i < 2; i++)
744                 if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE)
745                         channels_scaled++;
746
747         if (channels_scaled == 1)
748                 lines = lines / 2;
749
750         return lines;
751 }
752
753 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state)
754 {
755         const struct drm_format_info *info = state->fb->format;
756
757         if (info->hsub > 1)
758                 return max(vc4_lbm_channel_size(state, 0),
759                            vc4_lbm_channel_size(state, 1));
760         else
761                 return vc4_lbm_channel_size(state, 0);
762 }
763
764 u32 vc4_lbm_size(struct drm_plane_state *state)
765 {
766         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
767         struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
768
769         /* LBM is not needed when there's no vertical scaling. */
770         if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
771             vc4_state->y_scaling[1] == VC4_SCALING_NONE)
772                 return 0;
773
774         if (vc4->gen >= VC4_GEN_6)
775                 return __vc6_lbm_size(state);
776         else
777                 return __vc4_lbm_size(state);
778 }
779
780 static size_t vc6_upm_size(const struct drm_plane_state *state,
781                            unsigned int plane)
782 {
783         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
784         unsigned int stride = state->fb->pitches[plane];
785
786         /*
787          * TODO: This only works for raster formats, and is sub-optimal
788          * for buffers with a stride aligned on 32 bytes.
789          */
790         unsigned int words_per_line = (stride + 62) / 32;
791         unsigned int fetch_region_size = words_per_line * 32;
792         unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines;
793         unsigned int buffer_size = fetch_region_size * buffer_lines;
794
795         return ALIGN(buffer_size, HVS_UBM_WORD_SIZE);
796 }
797
798 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
799                                          int channel)
800 {
801         struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
802         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
803
804         WARN_ON_ONCE(vc4->gen > VC4_GEN_6);
805
806         /* Ch0 H-PPF Word 0: Scaling Parameters */
807         if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
808                 vc4_write_ppf(vc4_state,
809                               vc4_state->src_w[channel], vc4_state->crtc_w, vc4_state->src_x, channel,
810                               state->chroma_siting_h);
811         }
812
813         /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
814         if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
815                 vc4_write_ppf(vc4_state,
816                               vc4_state->src_h[channel], vc4_state->crtc_h, vc4_state->src_y, channel,
817                               state->chroma_siting_v);
818                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
819         }
820
821         /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
822         if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
823                 vc4_write_tpz(vc4_state,
824                               vc4_state->src_w[channel], vc4_state->crtc_w);
825         }
826
827         /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
828         if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
829                 vc4_write_tpz(vc4_state,
830                               vc4_state->src_h[channel], vc4_state->crtc_h);
831                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
832         }
833 }
834
835 static void vc4_plane_calc_load(struct drm_plane_state *state)
836 {
837         unsigned int hvs_load_shift, vrefresh, i;
838         struct drm_framebuffer *fb = state->fb;
839         struct vc4_plane_state *vc4_state;
840         struct drm_crtc_state *crtc_state;
841         unsigned int vscale_factor;
842
843         vc4_state = to_vc4_plane_state(state);
844         crtc_state = drm_atomic_get_existing_crtc_state(state->state,
845                                                         state->crtc);
846         vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
847
848         /* The HVS is able to process 2 pixels/cycle when scaling the source,
849          * 4 pixels/cycle otherwise.
850          * Alpha blending step seems to be pipelined and it's always operating
851          * at 4 pixels/cycle, so the limiting aspect here seems to be the
852          * scaler block.
853          * HVS load is expressed in clk-cycles/sec (AKA Hz).
854          */
855         if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
856             vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
857             vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
858             vc4_state->y_scaling[1] != VC4_SCALING_NONE)
859                 hvs_load_shift = 1;
860         else
861                 hvs_load_shift = 2;
862
863         vc4_state->membus_load = 0;
864         vc4_state->hvs_load = 0;
865         for (i = 0; i < fb->format->num_planes; i++) {
866                 /* Even if the bandwidth/plane required for a single frame is
867                  *
868                  * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
869                  *  cpp * vrefresh
870                  *
871                  * when downscaling, we have to read more pixels per line in
872                  * the time frame reserved for a single line, so the bandwidth
873                  * demand can be punctually higher. To account for that, we
874                  * calculate the down-scaling factor and multiply the plane
875                  * load by this number. We're likely over-estimating the read
876                  * demand, but that's better than under-estimating it.
877                  */
878                 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
879                                              vc4_state->crtc_h);
880                 vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
881                                           (vc4_state->src_h[i] >> 16) *
882                                           vscale_factor * fb->format->cpp[i];
883                 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
884         }
885
886         vc4_state->hvs_load *= vrefresh;
887         vc4_state->hvs_load >>= hvs_load_shift;
888         vc4_state->membus_load *= vrefresh;
889 }
890
891 static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
892 {
893         struct drm_device *drm = state->plane->dev;
894         struct vc4_dev *vc4 = to_vc4_dev(drm);
895         struct drm_plane *plane = state->plane;
896         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
897         u32 lbm_size;
898
899         lbm_size = vc4_lbm_size(state);
900         if (!lbm_size) {
901                 vc4_state->lbm_size = 0;
902                 return 0;
903         }
904
905         /*
906          * NOTE: BCM2712 doesn't need to be aligned, since the size
907          * returned by vc4_lbm_size() is in words already.
908          */
909         if (vc4->gen == VC4_GEN_5)
910                 lbm_size = ALIGN(lbm_size, 64);
911         else if (vc4->gen == VC4_GEN_4)
912                 lbm_size = ALIGN(lbm_size, 32);
913
914         drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n",
915                        plane->base.id, plane->name, lbm_size);
916
917         if (WARN_ON(!vc4_state->lbm_offset))
918                 return -EINVAL;
919
920         /* FIXME: Add loop here that ensures that the total LBM assigned in this
921          *  state is less than the total lbm size
922          */
923         vc4_state->lbm_size = lbm_size;
924
925         return 0;
926 }
927
928 static int vc6_plane_allocate_upm(struct drm_plane_state *state)
929 {
930         const struct drm_format_info *info = state->fb->format;
931         struct drm_device *drm = state->plane->dev;
932         struct vc4_dev *vc4 = to_vc4_dev(drm);
933         struct vc4_hvs *hvs = vc4->hvs;
934         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
935         unsigned int i;
936         int ret;
937
938         WARN_ON_ONCE(vc4->gen < VC4_GEN_6);
939
940         vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;
941
942         for (i = 0; i < info->num_planes; i++) {
943                 unsigned long irqflags;
944                 size_t upm_size;
945
946                 upm_size = vc6_upm_size(state, i);
947                 if (!upm_size)
948                         return -EINVAL;
949
950                 spin_lock_irqsave(&hvs->mm_lock, irqflags);
951                 ret = drm_mm_insert_node_generic(&hvs->upm_mm,
952                                                  &vc4_state->upm[i],
953                                                  upm_size, HVS_UBM_WORD_SIZE,
954                                                  0, 0);
955                 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
956                 if (ret) {
957                         drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
958                         return ret;
959                 }
960
961                 ret = ida_alloc_range(&hvs->upm_handles, 1, 32, GFP_KERNEL);
962                 if (ret < 0)
963                         return ret;
964
965                 vc4_state->upm_handle[i] = ret;
966
967                 vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
968                         VC4_SET_FIELD(vc4_state->upm[i].start / HVS_UBM_WORD_SIZE,
969                                       SCALER6_PTR0_UPM_BASE) |
970                         VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
971                                       SCALER6_PTR0_UPM_HANDLE) |
972                         VC4_SET_FIELD(vc4_state->upm_buffer_lines,
973                                       SCALER6_PTR0_UPM_BUFF_SIZE);
974         }
975
976         return 0;
977 }
978
979 /*
980  * The colorspace conversion matrices are held in 3 entries in the dlist.
981  * Create an array of them, with entries for each full and limited mode, and
982  * each supported colorspace.
983  */
984 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = {
985         {
986                 /* Limited range */
987                 {
988                         /* BT601 */
989                         SCALER_CSC0_ITR_R_601_5,
990                         SCALER_CSC1_ITR_R_601_5,
991                         SCALER_CSC2_ITR_R_601_5,
992                 }, {
993                         /* BT709 */
994                         SCALER_CSC0_ITR_R_709_3,
995                         SCALER_CSC1_ITR_R_709_3,
996                         SCALER_CSC2_ITR_R_709_3,
997                 }, {
998                         /* BT2020 */
999                         SCALER_CSC0_ITR_R_2020,
1000                         SCALER_CSC1_ITR_R_2020,
1001                         SCALER_CSC2_ITR_R_2020,
1002                 }
1003         }, {
1004                 /* Full range */
1005                 {
1006                         /* JFIF */
1007                         SCALER_CSC0_JPEG_JFIF,
1008                         SCALER_CSC1_JPEG_JFIF,
1009                         SCALER_CSC2_JPEG_JFIF,
1010                 }, {
1011                         /* BT709 */
1012                         SCALER_CSC0_ITR_R_709_3_FR,
1013                         SCALER_CSC1_ITR_R_709_3_FR,
1014                         SCALER_CSC2_ITR_R_709_3_FR,
1015                 }, {
1016                         /* BT2020 */
1017                         SCALER_CSC0_ITR_R_2020_FR,
1018                         SCALER_CSC1_ITR_R_2020_FR,
1019                         SCALER_CSC2_ITR_R_2020_FR,
1020                 }
1021         }
1022 };
1023
1024 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state)
1025 {
1026         struct drm_device *dev = state->state->dev;
1027         struct vc4_dev *vc4 = to_vc4_dev(dev);
1028
1029         WARN_ON_ONCE(vc4->gen != VC4_GEN_4);
1030
1031         if (!state->fb->format->has_alpha)
1032                 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1033                                      SCALER_POS2_ALPHA_MODE);
1034
1035         switch (state->pixel_blend_mode) {
1036         case DRM_MODE_BLEND_PIXEL_NONE:
1037                 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1038                                      SCALER_POS2_ALPHA_MODE);
1039         default:
1040         case DRM_MODE_BLEND_PREMULTI:
1041                 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1042                                      SCALER_POS2_ALPHA_MODE) |
1043                         SCALER_POS2_ALPHA_PREMULT;
1044         case DRM_MODE_BLEND_COVERAGE:
1045                 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1046                                      SCALER_POS2_ALPHA_MODE);
1047         }
1048 }
1049
1050 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state)
1051 {
1052         struct drm_device *dev = state->state->dev;
1053         struct vc4_dev *vc4 = to_vc4_dev(dev);
1054
1055         WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6);
1056
1057         if (!state->fb->format->has_alpha)
1058                 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1059                                      SCALER5_CTL2_ALPHA_MODE);
1060
1061         switch (state->pixel_blend_mode) {
1062         case DRM_MODE_BLEND_PIXEL_NONE:
1063                 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1064                                      SCALER5_CTL2_ALPHA_MODE);
1065         default:
1066         case DRM_MODE_BLEND_PREMULTI:
1067                 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1068                                      SCALER5_CTL2_ALPHA_MODE) |
1069                         SCALER5_CTL2_ALPHA_PREMULT;
1070         case DRM_MODE_BLEND_COVERAGE:
1071                 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1072                                      SCALER5_CTL2_ALPHA_MODE);
1073         }
1074 }
1075
1076 /* Writes out a full display list for an active plane to the plane's
1077  * private dlist state.
1078  */
1079 static int vc4_plane_mode_set(struct drm_plane *plane,
1080                               struct drm_plane_state *state)
1081 {
1082         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
1083         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1084         struct drm_framebuffer *fb = state->fb;
1085         u32 ctl0_offset = vc4_state->dlist_count;
1086         const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1087         u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1088         int num_planes = fb->format->num_planes;
1089         u32 h_subsample = fb->format->hsub;
1090         u32 v_subsample = fb->format->vsub;
1091         bool mix_plane_alpha;
1092         bool covers_screen;
1093         u32 scl0, scl1, pitch0;
1094         u32 tiling, src_x, src_y;
1095         u32 width, height;
1096         u32 hvs_format = format->hvs;
1097         unsigned int rotation;
1098         u32 offsets[3] = { 0 };
1099         int ret, i;
1100
1101         if (vc4_state->dlist_initialized)
1102                 return 0;
1103
1104         ret = vc4_plane_setup_clipping_and_scaling(state);
1105         if (ret)
1106                 return ret;
1107
1108         width = vc4_state->src_w[0] >> 16;
1109         height = vc4_state->src_h[0] >> 16;
1110
1111         /* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1112          * and 4:4:4, scl1 should be set to scl0 so both channels of
1113          * the scaler do the same thing.  For YUV, the Y plane needs
1114          * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1115          * the scl fields here.
1116          */
1117         if (num_planes == 1) {
1118                 scl0 = vc4_get_scl_field(state, 0);
1119                 scl1 = scl0;
1120         } else {
1121                 scl0 = vc4_get_scl_field(state, 1);
1122                 scl1 = vc4_get_scl_field(state, 0);
1123         }
1124
1125         rotation = drm_rotation_simplify(state->rotation,
1126                                          DRM_MODE_ROTATE_0 |
1127                                          DRM_MODE_REFLECT_X |
1128                                          DRM_MODE_REFLECT_Y);
1129
1130         /* We must point to the last line when Y reflection is enabled. */
1131         src_y = vc4_state->src_y >> 16;
1132         if (rotation & DRM_MODE_REFLECT_Y)
1133                 src_y += height - 1;
1134
1135         src_x = vc4_state->src_x >> 16;
1136
1137         switch (base_format_mod) {
1138         case DRM_FORMAT_MOD_LINEAR:
1139                 tiling = SCALER_CTL0_TILING_LINEAR;
1140                 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
1141
1142                 /* Adjust the base pointer to the first pixel to be scanned
1143                  * out.
1144                  */
1145                 for (i = 0; i < num_planes; i++) {
1146                         offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1147                         offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1148                 }
1149
1150                 break;
1151
1152         case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
1153                 u32 tile_size_shift = 12; /* T tiles are 4kb */
1154                 /* Whole-tile offsets, mostly for setting the pitch. */
1155                 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
1156                 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
1157                 u32 tile_w_mask = (1 << tile_w_shift) - 1;
1158                 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
1159                  * the height (in pixels) of a 4k tile.
1160                  */
1161                 u32 tile_h_mask = (2 << tile_h_shift) - 1;
1162                 /* For T-tiled, the FB pitch is "how many bytes from one row to
1163                  * the next, such that
1164                  *
1165                  *      pitch * tile_h == tile_size * tiles_per_row
1166                  */
1167                 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
1168                 u32 tiles_l = src_x >> tile_w_shift;
1169                 u32 tiles_r = tiles_w - tiles_l;
1170                 u32 tiles_t = src_y >> tile_h_shift;
1171                 /* Intra-tile offsets, which modify the base address (the
1172                  * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
1173                  * base address).
1174                  */
1175                 u32 tile_y = (src_y >> 4) & 1;
1176                 u32 subtile_y = (src_y >> 2) & 3;
1177                 u32 utile_y = src_y & 3;
1178                 u32 x_off = src_x & tile_w_mask;
1179                 u32 y_off = src_y & tile_h_mask;
1180
1181                 /* When Y reflection is requested we must set the
1182                  * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
1183                  * after the initial one should be fetched in descending order,
1184                  * which makes sense since we start from the last line and go
1185                  * backward.
1186                  * Don't know why we need y_off = max_y_off - y_off, but it's
1187                  * definitely required (I guess it's also related to the "going
1188                  * backward" situation).
1189                  */
1190                 if (rotation & DRM_MODE_REFLECT_Y) {
1191                         y_off = tile_h_mask - y_off;
1192                         pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
1193                 } else {
1194                         pitch0 = 0;
1195                 }
1196
1197                 tiling = SCALER_CTL0_TILING_256B_OR_T;
1198                 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
1199                            VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
1200                            VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
1201                            VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
1202                 offsets[0] += tiles_t * (tiles_w << tile_size_shift);
1203                 offsets[0] += subtile_y << 8;
1204                 offsets[0] += utile_y << 4;
1205
1206                 /* Rows of tiles alternate left-to-right and right-to-left. */
1207                 if (tiles_t & 1) {
1208                         pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
1209                         offsets[0] += (tiles_w - tiles_l) << tile_size_shift;
1210                         offsets[0] -= (1 + !tile_y) << 10;
1211                 } else {
1212                         offsets[0] += tiles_l << tile_size_shift;
1213                         offsets[0] += tile_y << 10;
1214                 }
1215
1216                 break;
1217         }
1218
1219         case DRM_FORMAT_MOD_BROADCOM_SAND64:
1220         case DRM_FORMAT_MOD_BROADCOM_SAND128:
1221         case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1222                 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1223
1224                 if (param > SCALER_TILE_HEIGHT_MASK) {
1225                         DRM_DEBUG_KMS("SAND height too large (%d)\n",
1226                                       param);
1227                         return -EINVAL;
1228                 }
1229
1230                 if (fb->format->format == DRM_FORMAT_P030) {
1231                         hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1232                         tiling = SCALER_CTL0_TILING_128B;
1233                 } else {
1234                         hvs_format = HVS_PIXEL_FORMAT_H264;
1235
1236                         switch (base_format_mod) {
1237                         case DRM_FORMAT_MOD_BROADCOM_SAND64:
1238                                 tiling = SCALER_CTL0_TILING_64B;
1239                                 break;
1240                         case DRM_FORMAT_MOD_BROADCOM_SAND128:
1241                                 tiling = SCALER_CTL0_TILING_128B;
1242                                 break;
1243                         case DRM_FORMAT_MOD_BROADCOM_SAND256:
1244                                 tiling = SCALER_CTL0_TILING_256B_OR_T;
1245                                 break;
1246                         default:
1247                                 return -EINVAL;
1248                         }
1249                 }
1250
1251                 /* Adjust the base pointer to the first pixel to be scanned
1252                  * out.
1253                  *
1254                  * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1255                  * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1256                  * word that should be taken as the first pixel.
1257                  * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1258                  * element within the 128bit word, eg for pixel 3 the value
1259                  * should be 6.
1260                  */
1261                 for (i = 0; i < num_planes; i++) {
1262                         u32 tile_w, tile, x_off, pix_per_tile;
1263
1264                         if (fb->format->format == DRM_FORMAT_P030) {
1265                                 /*
1266                                  * Spec says: bits [31:4] of the given address
1267                                  * should point to the 128-bit word containing
1268                                  * the desired starting pixel, and bits[3:0]
1269                                  * should be between 0 and 11, indicating which
1270                                  * of the 12-pixels in that 128-bit word is the
1271                                  * first pixel to be used
1272                                  */
1273                                 u32 remaining_pixels = src_x % 96;
1274                                 u32 aligned = remaining_pixels / 12;
1275                                 u32 last_bits = remaining_pixels % 12;
1276
1277                                 x_off = aligned * 16 + last_bits;
1278                                 tile_w = 128;
1279                                 pix_per_tile = 96;
1280                         } else {
1281                                 switch (base_format_mod) {
1282                                 case DRM_FORMAT_MOD_BROADCOM_SAND64:
1283                                         tile_w = 64;
1284                                         break;
1285                                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1286                                         tile_w = 128;
1287                                         break;
1288                                 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1289                                         tile_w = 256;
1290                                         break;
1291                                 default:
1292                                         return -EINVAL;
1293                                 }
1294                                 pix_per_tile = tile_w / fb->format->cpp[0];
1295                                 x_off = (src_x % pix_per_tile) /
1296                                         (i ? h_subsample : 1) *
1297                                         fb->format->cpp[i];
1298                         }
1299
1300                         tile = src_x / pix_per_tile;
1301
1302                         offsets[i] += param * tile_w * tile;
1303                         offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1304                         offsets[i] += x_off & ~(i ? 1 : 0);
1305                 }
1306
1307                 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
1308                 break;
1309         }
1310
1311         default:
1312                 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1313                               (long long)fb->modifier);
1314                 return -EINVAL;
1315         }
1316
1317         /* fetch an extra pixel if we don't actually line up with the left edge. */
1318         if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1319                 width++;
1320
1321         /* same for the right side */
1322         if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1323                vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1324                 width++;
1325
1326         /* now for the top */
1327         if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1328                 height++;
1329
1330         /* and the bottom */
1331         if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1332                vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1333                 height++;
1334
1335         /* for YUV444 hardware wants double the width, otherwise it doesn't fetch full width of chroma */
1336         if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1337                 width <<= 1;
1338
1339         /* Don't waste cycles mixing with plane alpha if the set alpha
1340          * is opaque or there is no per-pixel alpha information.
1341          * In any case we use the alpha property value as the fixed alpha.
1342          */
1343         mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1344                           fb->format->has_alpha;
1345
1346         if (vc4->gen == VC4_GEN_4) {
1347         /* Control word */
1348                 vc4_dlist_write(vc4_state,
1349                                 SCALER_CTL0_VALID |
1350                                 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
1351                                 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
1352                                 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
1353                                 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
1354                                 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1355                                 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1356                                 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
1357                                 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1358                                 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
1359
1360                 /* Position Word 0: Image Positions and Alpha Value */
1361                 vc4_state->pos0_offset = vc4_state->dlist_count;
1362                 vc4_dlist_write(vc4_state,
1363                                 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
1364                                 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
1365                                 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
1366
1367                 /* Position Word 1: Scaled Image Dimensions. */
1368                 if (!vc4_state->is_unity) {
1369                         vc4_dlist_write(vc4_state,
1370                                         VC4_SET_FIELD(vc4_state->crtc_w,
1371                                                       SCALER_POS1_SCL_WIDTH) |
1372                                         VC4_SET_FIELD(vc4_state->crtc_h,
1373                                                       SCALER_POS1_SCL_HEIGHT));
1374                 }
1375
1376                 /* Position Word 2: Source Image Size, Alpha */
1377                 vc4_state->pos2_offset = vc4_state->dlist_count;
1378                 vc4_dlist_write(vc4_state,
1379                                 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
1380                                 vc4_hvs4_get_alpha_blend_mode(state) |
1381                                 VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
1382                                 VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
1383
1384                 /* Position Word 3: Context.  Written by the HVS. */
1385                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1386
1387         } else {
1388                 /* Control word */
1389                 vc4_dlist_write(vc4_state,
1390                                 SCALER_CTL0_VALID |
1391                                 (format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) |
1392                                 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1393                                 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1394                                 (vc4_state->is_unity ?
1395                                                 SCALER5_CTL0_UNITY : 0) |
1396                                 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1397                                 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
1398                                 SCALER5_CTL0_ALPHA_EXPAND |
1399                                 SCALER5_CTL0_RGB_EXPAND);
1400
1401                 /* Position Word 0: Image Positions and Alpha Value */
1402                 vc4_state->pos0_offset = vc4_state->dlist_count;
1403                 vc4_dlist_write(vc4_state,
1404                                 (rotation & DRM_MODE_REFLECT_Y ?
1405                                                 SCALER5_POS0_VFLIP : 0) |
1406                                 VC4_SET_FIELD(vc4_state->crtc_x,
1407                                               SCALER_POS0_START_X) |
1408                                 (rotation & DRM_MODE_REFLECT_X ?
1409                                               SCALER5_POS0_HFLIP : 0) |
1410                                 VC4_SET_FIELD(vc4_state->crtc_y,
1411                                               SCALER5_POS0_START_Y)
1412                                );
1413
1414                 /* Control Word 2 */
1415                 vc4_dlist_write(vc4_state,
1416                                 VC4_SET_FIELD(state->alpha >> 4,
1417                                               SCALER5_CTL2_ALPHA) |
1418                                 vc4_hvs5_get_alpha_blend_mode(state) |
1419                                 (mix_plane_alpha ?
1420                                         SCALER5_CTL2_ALPHA_MIX : 0)
1421                                );
1422
1423                 /* Position Word 1: Scaled Image Dimensions. */
1424                 if (!vc4_state->is_unity) {
1425                         vc4_dlist_write(vc4_state,
1426                                         VC4_SET_FIELD(vc4_state->crtc_w,
1427                                                       SCALER5_POS1_SCL_WIDTH) |
1428                                         VC4_SET_FIELD(vc4_state->crtc_h,
1429                                                       SCALER5_POS1_SCL_HEIGHT));
1430                 }
1431
1432                 /* Position Word 2: Source Image Size */
1433                 vc4_state->pos2_offset = vc4_state->dlist_count;
1434                 vc4_dlist_write(vc4_state,
1435                                 VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
1436                                 VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
1437
1438                 /* Position Word 3: Context.  Written by the HVS. */
1439                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1440         }
1441
1442
1443         /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
1444          *
1445          * The pointers may be any byte address.
1446          */
1447         vc4_state->ptr0_offset[0] = vc4_state->dlist_count;
1448
1449         for (i = 0; i < num_planes; i++) {
1450                 dma_addr_t paddr = drm_fb_dma_get_gem_addr(fb, state, i);
1451
1452                 vc4_dlist_write(vc4_state, paddr + offsets[i]);
1453         }
1454
1455         /* Pointer Context Word 0/1/2: Written by the HVS */
1456         for (i = 0; i < num_planes; i++)
1457                 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1458
1459         /* Pitch word 0 */
1460         vc4_dlist_write(vc4_state, pitch0);
1461
1462         /* Pitch word 1/2 */
1463         for (i = 1; i < num_planes; i++) {
1464                 if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
1465                     hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) {
1466                         vc4_dlist_write(vc4_state,
1467                                         VC4_SET_FIELD(fb->pitches[i],
1468                                                       SCALER_SRC_PITCH));
1469                 } else {
1470                         vc4_dlist_write(vc4_state, pitch0);
1471                 }
1472         }
1473
1474         /* Colorspace conversion words */
1475         if (vc4_state->is_yuv) {
1476                 enum drm_color_encoding color_encoding = state->color_encoding;
1477                 enum drm_color_range color_range = state->color_range;
1478                 const u32 *ccm;
1479
1480                 if (color_encoding >= DRM_COLOR_ENCODING_MAX)
1481                         color_encoding = DRM_COLOR_YCBCR_BT601;
1482                 if (color_range >= DRM_COLOR_RANGE_MAX)
1483                         color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1484
1485                 ccm = colorspace_coeffs[color_range][color_encoding];
1486
1487                 vc4_dlist_write(vc4_state, ccm[0]);
1488                 vc4_dlist_write(vc4_state, ccm[1]);
1489                 vc4_dlist_write(vc4_state, ccm[2]);
1490         }
1491
1492         vc4_state->lbm_offset = 0;
1493
1494         if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
1495             vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
1496             vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1497             vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1498                 /* Reserve a slot for the LBM Base Address. The real value will
1499                  * be set when calling vc4_plane_allocate_lbm().
1500                  */
1501                 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1502                     vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1503                         vc4_state->lbm_offset = vc4_state->dlist_count;
1504                         vc4_dlist_counter_increment(vc4_state);
1505                 }
1506
1507                 if (num_planes > 1) {
1508                         /* Emit Cb/Cr as channel 0 and Y as channel
1509                          * 1. This matches how we set up scl0/scl1
1510                          * above.
1511                          */
1512                         vc4_write_scaling_parameters(state, 1);
1513                 }
1514                 vc4_write_scaling_parameters(state, 0);
1515
1516                 /* If any PPF setup was done, then all the kernel
1517                  * pointers get uploaded.
1518                  */
1519                 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1520                     vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1521                     vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1522                     vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1523                         u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1524                                                    SCALER_PPF_KERNEL_OFFSET);
1525
1526                         /* HPPF plane 0 */
1527                         vc4_dlist_write(vc4_state, kernel);
1528                         /* VPPF plane 0 */
1529                         vc4_dlist_write(vc4_state, kernel);
1530                         /* HPPF plane 1 */
1531                         vc4_dlist_write(vc4_state, kernel);
1532                         /* VPPF plane 1 */
1533                         vc4_dlist_write(vc4_state, kernel);
1534                 }
1535         }
1536
1537         vc4_state->dlist[ctl0_offset] |=
1538                 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1539
1540         /* crtc_* are already clipped coordinates. */
1541         covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1542                         vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1543                         vc4_state->crtc_h == state->crtc->mode.vdisplay;
1544         /* Background fill might be necessary when the plane has per-pixel
1545          * alpha content or a non-opaque plane alpha and could blend from the
1546          * background or does not cover the entire screen.
1547          */
1548         vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1549                                    state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1550
1551         /* Flag the dlist as initialized to avoid checking it twice in case
1552          * the async update check already called vc4_plane_mode_set() and
1553          * decided to fallback to sync update because async update was not
1554          * possible.
1555          */
1556         vc4_state->dlist_initialized = 1;
1557
1558         vc4_plane_calc_load(state);
1559
1560         return 0;
1561 }
1562
1563 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state)
1564 {
1565         struct drm_plane_state *state = &vc4_state->base;
1566         u32 ret = 0;
1567
1568         if (vc4_state->is_yuv) {
1569                 enum drm_color_encoding color_encoding = state->color_encoding;
1570                 enum drm_color_range color_range = state->color_range;
1571
1572                 ret |= SCALER6_CTL2_CSC_ENABLE;
1573
1574                 /* CSC pre-loaded with:
1575                  * 0 = BT601 limited range
1576                  * 1 = BT709 limited range
1577                  * 2 = BT2020 limited range
1578                  * 3 = BT601 full range
1579                  * 4 = BT709 full range
1580                  * 5 = BT2020 full range
1581                  */
1582                 if (color_encoding > DRM_COLOR_YCBCR_BT2020)
1583                         color_encoding = DRM_COLOR_YCBCR_BT601;
1584                 if (color_range > DRM_COLOR_YCBCR_FULL_RANGE)
1585                         color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1586
1587                 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1588                                      SCALER6_CTL2_BRCM_CFC_CONTROL);
1589         }
1590
1591         return ret;
1592 }
1593
1594 static int vc6_plane_mode_set(struct drm_plane *plane,
1595                               struct drm_plane_state *state)
1596 {
1597         struct drm_device *drm = plane->dev;
1598         struct vc4_dev *vc4 = to_vc4_dev(drm);
1599         struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1600         struct drm_framebuffer *fb = state->fb;
1601         const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1602         u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1603         int num_planes = fb->format->num_planes;
1604         u32 h_subsample = fb->format->hsub;
1605         u32 v_subsample = fb->format->vsub;
1606         bool mix_plane_alpha;
1607         bool covers_screen;
1608         u32 scl0, scl1, pitch0;
1609         u32 tiling, src_x, src_y;
1610         u32 width, height;
1611         u32 hvs_format = format->hvs;
1612         u32 offsets[3] = { 0 };
1613         unsigned int rotation;
1614         int ret, i;
1615
1616         if (vc4_state->dlist_initialized)
1617                 return 0;
1618
1619         ret = vc4_plane_setup_clipping_and_scaling(state);
1620         if (ret)
1621                 return ret;
1622
1623         width = vc4_state->src_w[0] >> 16;
1624         height = vc4_state->src_h[0] >> 16;
1625
1626         /* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1627          * and 4:4:4, scl1 should be set to scl0 so both channels of
1628          * the scaler do the same thing.  For YUV, the Y plane needs
1629          * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1630          * the scl fields here.
1631          */
1632         if (num_planes == 1) {
1633                 scl0 = vc4_get_scl_field(state, 0);
1634                 scl1 = scl0;
1635         } else {
1636                 scl0 = vc4_get_scl_field(state, 1);
1637                 scl1 = vc4_get_scl_field(state, 0);
1638         }
1639
1640         rotation = drm_rotation_simplify(state->rotation,
1641                                          DRM_MODE_ROTATE_0 |
1642                                          DRM_MODE_REFLECT_X |
1643                                          DRM_MODE_REFLECT_Y);
1644
1645         /* We must point to the last line when Y reflection is enabled. */
1646         src_y = vc4_state->src_y >> 16;
1647         if (rotation & DRM_MODE_REFLECT_Y)
1648                 src_y += height - 1;
1649
1650         src_x = vc4_state->src_x >> 16;
1651
1652         switch (base_format_mod) {
1653         case DRM_FORMAT_MOD_LINEAR:
1654                 tiling = SCALER6_CTL0_ADDR_MODE_LINEAR;
1655
1656                 /* Adjust the base pointer to the first pixel to be scanned
1657                  * out.
1658                  */
1659                 for (i = 0; i < num_planes; i++) {
1660                         offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1661                         offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1662                 }
1663
1664                 break;
1665
1666         case DRM_FORMAT_MOD_BROADCOM_SAND128:
1667         case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1668                 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1669                 u32 components_per_word;
1670                 u32 starting_offset;
1671                 u32 fetch_count;
1672
1673                 if (param > SCALER_TILE_HEIGHT_MASK) {
1674                         DRM_DEBUG_KMS("SAND height too large (%d)\n",
1675                                       param);
1676                         return -EINVAL;
1677                 }
1678
1679                 if (fb->format->format == DRM_FORMAT_P030) {
1680                         hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1681                         tiling = SCALER6_CTL0_ADDR_MODE_128B;
1682                 } else {
1683                         hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE;
1684
1685                         switch (base_format_mod) {
1686                         case DRM_FORMAT_MOD_BROADCOM_SAND128:
1687                                 tiling = SCALER6_CTL0_ADDR_MODE_128B;
1688                                 break;
1689                         case DRM_FORMAT_MOD_BROADCOM_SAND256:
1690                                 tiling = SCALER6_CTL0_ADDR_MODE_256B;
1691                                 break;
1692                         default:
1693                                 return -EINVAL;
1694                         }
1695                 }
1696
1697                 /* Adjust the base pointer to the first pixel to be scanned
1698                  * out.
1699                  *
1700                  * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1701                  * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1702                  * word that should be taken as the first pixel.
1703                  * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1704                  * element within the 128bit word, eg for pixel 3 the value
1705                  * should be 6.
1706                  */
1707                 for (i = 0; i < num_planes; i++) {
1708                         u32 tile_w, tile, x_off, pix_per_tile;
1709
1710                         if (fb->format->format == DRM_FORMAT_P030) {
1711                                 /*
1712                                  * Spec says: bits [31:4] of the given address
1713                                  * should point to the 128-bit word containing
1714                                  * the desired starting pixel, and bits[3:0]
1715                                  * should be between 0 and 11, indicating which
1716                                  * of the 12-pixels in that 128-bit word is the
1717                                  * first pixel to be used
1718                                  */
1719                                 u32 remaining_pixels = src_x % 96;
1720                                 u32 aligned = remaining_pixels / 12;
1721                                 u32 last_bits = remaining_pixels % 12;
1722
1723                                 x_off = aligned * 16 + last_bits;
1724                                 tile_w = 128;
1725                                 pix_per_tile = 96;
1726                         } else {
1727                                 switch (base_format_mod) {
1728                                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1729                                         tile_w = 128;
1730                                         break;
1731                                 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1732                                         tile_w = 256;
1733                                         break;
1734                                 default:
1735                                         return -EINVAL;
1736                                 }
1737                                 pix_per_tile = tile_w / fb->format->cpp[0];
1738                                 x_off = (src_x % pix_per_tile) /
1739                                         (i ? h_subsample : 1) *
1740                                         fb->format->cpp[i];
1741                         }
1742
1743                         tile = src_x / pix_per_tile;
1744
1745                         offsets[i] += param * tile_w * tile;
1746                         offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1747                         offsets[i] += x_off & ~(i ? 1 : 0);
1748                 }
1749
1750                 components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32;
1751                 starting_offset = src_x % components_per_word;
1752                 fetch_count = (width + starting_offset + components_per_word - 1) /
1753                         components_per_word;
1754
1755                 pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) |
1756                          VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
1757                 break;
1758         }
1759
1760         default:
1761                 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1762                               (long long)fb->modifier);
1763                 return -EINVAL;
1764         }
1765
1766         /* fetch an extra pixel if we don't actually line up with the left edge. */
1767         if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1768                 width++;
1769
1770         /* same for the right side */
1771         if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1772             vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1773                 width++;
1774
1775         /* now for the top */
1776         if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1777                 height++;
1778
1779         /* and the bottom */
1780         if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1781             vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1782                 height++;
1783
1784         /* for YUV444 hardware wants double the width, otherwise it doesn't
1785          * fetch full width of chroma
1786          */
1787         if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1788                 width <<= 1;
1789
1790         /* Don't waste cycles mixing with plane alpha if the set alpha
1791          * is opaque or there is no per-pixel alpha information.
1792          * In any case we use the alpha property value as the fixed alpha.
1793          */
1794         mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1795                           fb->format->has_alpha;
1796
1797         /* Control Word 0: Scaling Configuration & Element Validity*/
1798         vc4_dlist_write(vc4_state,
1799                         SCALER6_CTL0_VALID |
1800                         VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) |
1801                         VC4_SET_FIELD(0, SCALER6_CTL0_ALPHA_MASK) |
1802                         (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) |
1803                         VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) |
1804                         VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) |
1805                         VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) |
1806                         VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT));
1807
1808         /* Position Word 0: Image Position */
1809         vc4_state->pos0_offset = vc4_state->dlist_count;
1810         vc4_dlist_write(vc4_state,
1811                         VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) |
1812                         (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) |
1813                         VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X));
1814
1815         /* Control Word 2: Alpha Value & CSC */
1816         vc4_dlist_write(vc4_state,
1817                         vc6_plane_get_csc_mode(vc4_state) |
1818                         vc4_hvs5_get_alpha_blend_mode(state) |
1819                         (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) |
1820                         VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA));
1821
1822         /* Position Word 1: Scaled Image Dimensions */
1823         if (!vc4_state->is_unity)
1824                 vc4_dlist_write(vc4_state,
1825                                 VC4_SET_FIELD(vc4_state->crtc_h - 1,
1826                                               SCALER6_POS1_SCL_LINES) |
1827                                 VC4_SET_FIELD(vc4_state->crtc_w - 1,
1828                                               SCALER6_POS1_SCL_WIDTH));
1829
1830         /* Position Word 2: Source Image Size */
1831         vc4_state->pos2_offset = vc4_state->dlist_count;
1832         vc4_dlist_write(vc4_state,
1833                         VC4_SET_FIELD(height - 1,
1834                                       SCALER6_POS2_SRC_LINES) |
1835                         VC4_SET_FIELD(width - 1,
1836                                       SCALER6_POS2_SRC_WIDTH));
1837
1838         /* Position Word 3: Context */
1839         vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1840
1841         /*
1842          * TODO: This only covers Raster Scan Order planes
1843          */
1844         for (i = 0; i < num_planes; i++) {
1845                 dma_addr_t paddr = drm_fb_dma_get_gem_addr(fb, state, i);
1846
1847                 paddr += offsets[i];
1848
1849                 /* Pointer Word 0 */
1850                 vc4_state->ptr0_offset[i] = vc4_state->dlist_count;
1851                 vc4_dlist_write(vc4_state,
1852                                 (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) |
1853                                 /*
1854                                  * The UPM buffer will be allocated in
1855                                  * vc6_plane_allocate_upm().
1856                                  */
1857                                 VC4_SET_FIELD(upper_32_bits(paddr) & 0xf,
1858                                               SCALER6_PTR0_UPPER_ADDR));
1859
1860                 /* Pointer Word 1 */
1861                 vc4_dlist_write(vc4_state, lower_32_bits(paddr));
1862
1863                 /* Pointer Word 2 */
1864                 if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
1865                     base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
1866                         vc4_dlist_write(vc4_state,
1867                                         VC4_SET_FIELD(fb->pitches[i],
1868                                                       SCALER6_PTR2_PITCH));
1869                 } else {
1870                         vc4_dlist_write(vc4_state, pitch0);
1871                 }
1872         }
1873
1874         /*
1875          * Palette Word 0
1876          * TODO: We're not using the palette mode
1877          */
1878
1879         /*
1880          * Trans Word 0
1881          * TODO: It's only relevant if we set the trans_rgb bit in the
1882          * control word 0, and we don't at the moment.
1883          */
1884
1885         vc4_state->lbm_offset = 0;
1886
1887         if (!vc4_state->is_unity || fb->format->is_yuv) {
1888                 /*
1889                  * Reserve a slot for the LBM Base Address. The real value will
1890                  * be set when calling vc4_plane_allocate_lbm().
1891                  */
1892                 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1893                     vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1894                         vc4_state->lbm_offset = vc4_state->dlist_count;
1895                         vc4_dlist_counter_increment(vc4_state);
1896                 }
1897
1898                 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
1899                     vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
1900                     vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1901                     vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1902                         if (num_planes > 1)
1903                                 /*
1904                                  * Emit Cb/Cr as channel 0 and Y as channel
1905                                  * 1. This matches how we set up scl0/scl1
1906                                  * above.
1907                                  */
1908                                 vc4_write_scaling_parameters(state, 1);
1909
1910                         vc4_write_scaling_parameters(state, 0);
1911                 }
1912
1913                 /*
1914                  * If any PPF setup was done, then all the kernel
1915                  * pointers get uploaded.
1916                  */
1917                 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1918                     vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1919                     vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1920                     vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1921                         u32 kernel =
1922                                 VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1923                                               SCALER_PPF_KERNEL_OFFSET);
1924
1925                         /* HPPF plane 0 */
1926                         vc4_dlist_write(vc4_state, kernel);
1927                         /* VPPF plane 0 */
1928                         vc4_dlist_write(vc4_state, kernel);
1929                         /* HPPF plane 1 */
1930                         vc4_dlist_write(vc4_state, kernel);
1931                         /* VPPF plane 1 */
1932                                 vc4_dlist_write(vc4_state, kernel);
1933                 }
1934         }
1935
1936         vc4_dlist_write(vc4_state, SCALER6_CTL0_END);
1937
1938         vc4_state->dlist[0] |=
1939                 VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT);
1940
1941         /* crtc_* are already clipped coordinates. */
1942         covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1943                         vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1944                         vc4_state->crtc_h == state->crtc->mode.vdisplay;
1945
1946         /*
1947          * Background fill might be necessary when the plane has per-pixel
1948          * alpha content or a non-opaque plane alpha and could blend from the
1949          * background or does not cover the entire screen.
1950          */
1951         vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1952                                    state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1953
1954         /*
1955          * Flag the dlist as initialized to avoid checking it twice in case
1956          * the async update check already called vc4_plane_mode_set() and
1957          * decided to fallback to sync update because async update was not
1958          * possible.
1959          */
1960         vc4_state->dlist_initialized = 1;
1961
1962         vc4_plane_calc_load(state);
1963
1964         drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n",
1965                        plane->base.id, plane->name, vc4_state->dlist_count);
1966
1967         return 0;
1968 }
1969
1970 /* If a modeset involves changing the setup of a plane, the atomic
1971  * infrastructure will call this to validate a proposed plane setup.
1972  * However, if a plane isn't getting updated, this (and the
1973  * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
1974  * compute the dlist here and have all active plane dlists get updated
1975  * in the CRTC's flush.
1976  */
1977 int vc4_plane_atomic_check(struct drm_plane *plane,
1978                            struct drm_atomic_state *state)
1979 {
1980         struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
1981         struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
1982                                                                                  plane);
1983         struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
1984         int ret;
1985
1986         vc4_state->dlist_count = 0;
1987
1988         if (!plane_enabled(new_plane_state))
1989                 return 0;
1990
1991         if (vc4->gen >= VC4_GEN_6)
1992                 ret = vc6_plane_mode_set(plane, new_plane_state);
1993         else
1994                 ret = vc4_plane_mode_set(plane, new_plane_state);
1995         if (ret)
1996                 return ret;
1997
1998         ret = vc4_plane_allocate_lbm(new_plane_state);
1999         if (ret)
2000                 return ret;
2001
2002         if (vc4->gen >= VC4_GEN_6) {
2003                 ret = vc6_plane_allocate_upm(new_plane_state);
2004                 if (ret)
2005                         return ret;
2006         }
2007
2008         return 0;
2009 }
2010
2011 static void vc4_plane_atomic_update(struct drm_plane *plane,
2012                                     struct drm_atomic_state *state)
2013 {
2014         /* No contents here.  Since we don't know where in the CRTC's
2015          * dlist we should be stored, our dlist is uploaded to the
2016          * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
2017          * time.
2018          */
2019 }
2020
2021 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
2022 {
2023         struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2024         int i;
2025         int idx;
2026
2027         if (!drm_dev_enter(plane->dev, &idx))
2028                 goto out;
2029
2030         vc4_state->hw_dlist = dlist;
2031
2032         /* Can't memcpy_toio() because it needs to be 32-bit writes. */
2033         for (i = 0; i < vc4_state->dlist_count; i++)
2034                 writel(vc4_state->dlist[i], &dlist[i]);
2035
2036         drm_dev_exit(idx);
2037
2038 out:
2039         return vc4_state->dlist_count;
2040 }
2041
2042 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
2043 {
2044         const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
2045
2046         return vc4_state->dlist_count;
2047 }
2048
2049 /* Updates the plane to immediately (well, once the FIFO needs
2050  * refilling) scan out from at a new framebuffer.
2051  */
2052 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
2053 {
2054         struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2055         struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0);
2056         uint32_t addr;
2057         int idx;
2058
2059         if (!drm_dev_enter(plane->dev, &idx))
2060                 return;
2061
2062         /* We're skipping the address adjustment for negative origin,
2063          * because this is only called on the primary plane.
2064          */
2065         WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
2066         addr = bo->dma_addr + fb->offsets[0];
2067
2068         /* Write the new address into the hardware immediately.  The
2069          * scanout will start from this address as soon as the FIFO
2070          * needs to refill with pixels.
2071          */
2072         writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2073
2074         /* Also update the CPU-side dlist copy, so that any later
2075          * atomic updates that don't do a new modeset on our plane
2076          * also use our updated address.
2077          */
2078         vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr;
2079
2080         drm_dev_exit(idx);
2081 }
2082
2083 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
2084                                           struct drm_atomic_state *state)
2085 {
2086         struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2087                                                                                  plane);
2088         struct vc4_plane_state *vc4_state, *new_vc4_state;
2089         int idx;
2090
2091         if (!drm_dev_enter(plane->dev, &idx))
2092                 return;
2093
2094         swap(plane->state->fb, new_plane_state->fb);
2095         plane->state->crtc_x = new_plane_state->crtc_x;
2096         plane->state->crtc_y = new_plane_state->crtc_y;
2097         plane->state->crtc_w = new_plane_state->crtc_w;
2098         plane->state->crtc_h = new_plane_state->crtc_h;
2099         plane->state->src_x = new_plane_state->src_x;
2100         plane->state->src_y = new_plane_state->src_y;
2101         plane->state->src_w = new_plane_state->src_w;
2102         plane->state->src_h = new_plane_state->src_h;
2103         plane->state->alpha = new_plane_state->alpha;
2104         plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode;
2105         plane->state->rotation = new_plane_state->rotation;
2106         plane->state->zpos = new_plane_state->zpos;
2107         plane->state->normalized_zpos = new_plane_state->normalized_zpos;
2108         plane->state->color_encoding = new_plane_state->color_encoding;
2109         plane->state->color_range = new_plane_state->color_range;
2110         plane->state->src = new_plane_state->src;
2111         plane->state->dst = new_plane_state->dst;
2112         plane->state->visible = new_plane_state->visible;
2113
2114         new_vc4_state = to_vc4_plane_state(new_plane_state);
2115         vc4_state = to_vc4_plane_state(plane->state);
2116
2117         vc4_state->crtc_x = new_vc4_state->crtc_x;
2118         vc4_state->crtc_y = new_vc4_state->crtc_y;
2119         vc4_state->crtc_h = new_vc4_state->crtc_h;
2120         vc4_state->crtc_w = new_vc4_state->crtc_w;
2121         vc4_state->src_x = new_vc4_state->src_x;
2122         vc4_state->src_y = new_vc4_state->src_y;
2123         memcpy(vc4_state->src_w, new_vc4_state->src_w,
2124                sizeof(vc4_state->src_w));
2125         memcpy(vc4_state->src_h, new_vc4_state->src_h,
2126                sizeof(vc4_state->src_h));
2127         memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
2128                sizeof(vc4_state->x_scaling));
2129         memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
2130                sizeof(vc4_state->y_scaling));
2131         vc4_state->is_unity = new_vc4_state->is_unity;
2132         vc4_state->is_yuv = new_vc4_state->is_yuv;
2133         vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
2134
2135         /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
2136         vc4_state->dlist[vc4_state->pos0_offset] =
2137                 new_vc4_state->dlist[vc4_state->pos0_offset];
2138         vc4_state->dlist[vc4_state->pos2_offset] =
2139                 new_vc4_state->dlist[vc4_state->pos2_offset];
2140         vc4_state->dlist[vc4_state->ptr0_offset[0]] =
2141                 new_vc4_state->dlist[vc4_state->ptr0_offset[0]];
2142
2143         /* Note that we can't just call vc4_plane_write_dlist()
2144          * because that would smash the context data that the HVS is
2145          * currently using.
2146          */
2147         writel(vc4_state->dlist[vc4_state->pos0_offset],
2148                &vc4_state->hw_dlist[vc4_state->pos0_offset]);
2149         writel(vc4_state->dlist[vc4_state->pos2_offset],
2150                &vc4_state->hw_dlist[vc4_state->pos2_offset]);
2151         writel(vc4_state->dlist[vc4_state->ptr0_offset[0]],
2152                &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2153
2154         drm_dev_exit(idx);
2155 }
2156
2157 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
2158                                         struct drm_atomic_state *state)
2159 {
2160         struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2161                                                                                  plane);
2162         struct vc4_plane_state *old_vc4_state, *new_vc4_state;
2163         int ret;
2164         u32 i;
2165
2166         ret = vc4_plane_mode_set(plane, new_plane_state);
2167         if (ret)
2168                 return ret;
2169
2170         old_vc4_state = to_vc4_plane_state(plane->state);
2171         new_vc4_state = to_vc4_plane_state(new_plane_state);
2172
2173         if (!new_vc4_state->hw_dlist)
2174                 return -EINVAL;
2175
2176         if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
2177             old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
2178             old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
2179             old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] ||
2180             vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state))
2181                 return -EINVAL;
2182
2183         /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
2184          * if anything else has changed, fallback to a sync update.
2185          */
2186         for (i = 0; i < new_vc4_state->dlist_count; i++) {
2187                 if (i == new_vc4_state->pos0_offset ||
2188                     i == new_vc4_state->pos2_offset ||
2189                     i == new_vc4_state->ptr0_offset[0] ||
2190                     (new_vc4_state->lbm_offset &&
2191                      i == new_vc4_state->lbm_offset))
2192                         continue;
2193
2194                 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
2195                         return -EINVAL;
2196         }
2197
2198         return 0;
2199 }
2200
2201 static int vc4_prepare_fb(struct drm_plane *plane,
2202                           struct drm_plane_state *state)
2203 {
2204         struct vc4_bo *bo;
2205
2206         if (!state->fb)
2207                 return 0;
2208
2209         bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2210
2211         drm_gem_plane_helper_prepare_fb(plane, state);
2212
2213         if (plane->state->fb == state->fb)
2214                 return 0;
2215
2216         return vc4_bo_inc_usecnt(bo);
2217 }
2218
2219 static void vc4_cleanup_fb(struct drm_plane *plane,
2220                            struct drm_plane_state *state)
2221 {
2222         struct vc4_bo *bo;
2223
2224         if (plane->state->fb == state->fb || !state->fb)
2225                 return;
2226
2227         bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2228         vc4_bo_dec_usecnt(bo);
2229 }
2230
2231 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
2232         .atomic_check = vc4_plane_atomic_check,
2233         .atomic_update = vc4_plane_atomic_update,
2234         .prepare_fb = vc4_prepare_fb,
2235         .cleanup_fb = vc4_cleanup_fb,
2236         .atomic_async_check = vc4_plane_atomic_async_check,
2237         .atomic_async_update = vc4_plane_atomic_async_update,
2238 };
2239
2240 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = {
2241         .atomic_check = vc4_plane_atomic_check,
2242         .atomic_update = vc4_plane_atomic_update,
2243         .atomic_async_check = vc4_plane_atomic_async_check,
2244         .atomic_async_update = vc4_plane_atomic_async_update,
2245 };
2246
2247 static bool vc4_format_mod_supported(struct drm_plane *plane,
2248                                      uint32_t format,
2249                                      uint64_t modifier)
2250 {
2251         /* Support T_TILING for RGB formats only. */
2252         switch (format) {
2253         case DRM_FORMAT_XRGB8888:
2254         case DRM_FORMAT_ARGB8888:
2255         case DRM_FORMAT_ABGR8888:
2256         case DRM_FORMAT_XBGR8888:
2257         case DRM_FORMAT_RGB565:
2258         case DRM_FORMAT_BGR565:
2259         case DRM_FORMAT_ARGB1555:
2260         case DRM_FORMAT_XRGB1555:
2261                 switch (fourcc_mod_broadcom_mod(modifier)) {
2262                 case DRM_FORMAT_MOD_LINEAR:
2263                 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
2264                         return true;
2265                 default:
2266                         return false;
2267                 }
2268         case DRM_FORMAT_NV12:
2269         case DRM_FORMAT_NV21:
2270                 switch (fourcc_mod_broadcom_mod(modifier)) {
2271                 case DRM_FORMAT_MOD_LINEAR:
2272                 case DRM_FORMAT_MOD_BROADCOM_SAND64:
2273                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
2274                 case DRM_FORMAT_MOD_BROADCOM_SAND256:
2275                         return true;
2276                 default:
2277                         return false;
2278                 }
2279         case DRM_FORMAT_P030:
2280                 switch (fourcc_mod_broadcom_mod(modifier)) {
2281                 case DRM_FORMAT_MOD_BROADCOM_SAND128:
2282                         return true;
2283                 default:
2284                         return false;
2285                 }
2286         case DRM_FORMAT_RGBX1010102:
2287         case DRM_FORMAT_BGRX1010102:
2288         case DRM_FORMAT_RGBA1010102:
2289         case DRM_FORMAT_BGRA1010102:
2290         case DRM_FORMAT_XRGB4444:
2291         case DRM_FORMAT_ARGB4444:
2292         case DRM_FORMAT_XBGR4444:
2293         case DRM_FORMAT_ABGR4444:
2294         case DRM_FORMAT_RGBX4444:
2295         case DRM_FORMAT_RGBA4444:
2296         case DRM_FORMAT_BGRX4444:
2297         case DRM_FORMAT_BGRA4444:
2298         case DRM_FORMAT_RGB332:
2299         case DRM_FORMAT_BGR233:
2300         case DRM_FORMAT_YUV422:
2301         case DRM_FORMAT_YVU422:
2302         case DRM_FORMAT_YUV420:
2303         case DRM_FORMAT_YVU420:
2304         case DRM_FORMAT_NV16:
2305         case DRM_FORMAT_NV61:
2306         default:
2307                 return (modifier == DRM_FORMAT_MOD_LINEAR);
2308         }
2309 }
2310
2311 static const struct drm_plane_funcs vc4_plane_funcs = {
2312         .update_plane = drm_atomic_helper_update_plane,
2313         .disable_plane = drm_atomic_helper_disable_plane,
2314         .reset = vc4_plane_reset,
2315         .atomic_duplicate_state = vc4_plane_duplicate_state,
2316         .atomic_destroy_state = vc4_plane_destroy_state,
2317         .format_mod_supported = vc4_format_mod_supported,
2318 };
2319
2320 struct drm_plane *vc4_plane_init(struct drm_device *dev,
2321                                  enum drm_plane_type type,
2322                                  uint32_t possible_crtcs)
2323 {
2324         struct vc4_dev *vc4 = to_vc4_dev(dev);
2325         struct drm_plane *plane;
2326         struct vc4_plane *vc4_plane;
2327         u32 formats[ARRAY_SIZE(hvs_formats)];
2328         int num_formats = 0;
2329         unsigned i;
2330         static const uint64_t modifiers[] = {
2331                 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
2332                 DRM_FORMAT_MOD_BROADCOM_SAND128,
2333                 DRM_FORMAT_MOD_BROADCOM_SAND64,
2334                 DRM_FORMAT_MOD_BROADCOM_SAND256,
2335                 DRM_FORMAT_MOD_LINEAR,
2336                 DRM_FORMAT_MOD_INVALID
2337         };
2338
2339         for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
2340                 if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) {
2341                         formats[num_formats] = hvs_formats[i].drm;
2342                         num_formats++;
2343                 }
2344         }
2345
2346         vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base,
2347                                                possible_crtcs,
2348                                                &vc4_plane_funcs,
2349                                                formats, num_formats,
2350                                                modifiers, type, NULL);
2351         if (IS_ERR(vc4_plane))
2352                 return ERR_CAST(vc4_plane);
2353         plane = &vc4_plane->base;
2354
2355         if (vc4->gen >= VC4_GEN_5)
2356                 drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
2357         else
2358                 drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
2359
2360         drm_plane_create_alpha_property(plane);
2361         drm_plane_create_blend_mode_property(plane,
2362                                              BIT(DRM_MODE_BLEND_PIXEL_NONE) |
2363                                              BIT(DRM_MODE_BLEND_PREMULTI) |
2364                                              BIT(DRM_MODE_BLEND_COVERAGE));
2365         drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
2366                                            DRM_MODE_ROTATE_0 |
2367                                            DRM_MODE_ROTATE_180 |
2368                                            DRM_MODE_REFLECT_X |
2369                                            DRM_MODE_REFLECT_Y);
2370
2371         drm_plane_create_color_properties(plane,
2372                                           BIT(DRM_COLOR_YCBCR_BT601) |
2373                                           BIT(DRM_COLOR_YCBCR_BT709) |
2374                                           BIT(DRM_COLOR_YCBCR_BT2020),
2375                                           BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
2376                                           BIT(DRM_COLOR_YCBCR_FULL_RANGE),
2377                                           DRM_COLOR_YCBCR_BT709,
2378                                           DRM_COLOR_YCBCR_LIMITED_RANGE);
2379
2380         drm_plane_create_chroma_siting_properties(plane, 0, 0);
2381
2382         if (type == DRM_PLANE_TYPE_PRIMARY)
2383                 drm_plane_create_zpos_immutable_property(plane, 0);
2384
2385         return plane;
2386 }
2387
2388 #define VC4_NUM_OVERLAY_PLANES  16
2389
2390 int vc4_plane_create_additional_planes(struct drm_device *drm)
2391 {
2392         struct drm_plane *cursor_plane;
2393         struct drm_crtc *crtc;
2394         unsigned int i;
2395
2396         /* Set up some arbitrary number of planes.  We're not limited
2397          * by a set number of physical registers, just the space in
2398          * the HVS (16k) and how small an plane can be (28 bytes).
2399          * However, each plane we set up takes up some memory, and
2400          * increases the cost of looping over planes, which atomic
2401          * modesetting does quite a bit.  As a result, we pick a
2402          * modest number of planes to expose, that should hopefully
2403          * still cover any sane usecase.
2404          */
2405         for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) {
2406                 struct drm_plane *plane =
2407                         vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY,
2408                                        GENMASK(drm->mode_config.num_crtc - 1, 0));
2409
2410                 if (IS_ERR(plane))
2411                         continue;
2412
2413                 /* Create zpos property. Max of all the overlays + 1 primary +
2414                  * 1 cursor plane on a crtc.
2415                  */
2416                 drm_plane_create_zpos_property(plane, i + 1, 1,
2417                                                VC4_NUM_OVERLAY_PLANES + 1);
2418         }
2419
2420         drm_for_each_crtc(crtc, drm) {
2421                 /* Set up the legacy cursor after overlay initialization,
2422                  * since the zpos fallback is that planes are rendered by plane
2423                  * ID order, and that then puts the cursor on top.
2424                  */
2425                 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR,
2426                                               drm_crtc_mask(crtc));
2427                 if (!IS_ERR(cursor_plane)) {
2428                         crtc->cursor = cursor_plane;
2429
2430                         drm_plane_create_zpos_property(cursor_plane,
2431                                                        VC4_NUM_OVERLAY_PLANES + 1,
2432                                                        1,
2433                                                        VC4_NUM_OVERLAY_PLANES + 1);
2434                 }
2435         }
2436
2437         return 0;
2438 }