drm/vc4: Assign LBM memory during atomic_flush.
authorDave Stevenson <dave.stevenson@raspberrypi.com>
Thu, 31 Aug 2023 10:45:38 +0000 (11:45 +0100)
committerDom Cobley <popcornmix@gmail.com>
Mon, 19 Feb 2024 11:35:01 +0000 (11:35 +0000)
Avoid double buffering LBM allocations by making the
allocation a single alloc per crtc at atomic_flush.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
drivers/gpu/drm/vc4/tests/vc4_test_lbm_size.c
drivers/gpu/drm/vc4/vc4_drv.h
drivers/gpu/drm/vc4/vc4_hvs.c
drivers/gpu/drm/vc4/vc4_plane.c

index c1479cf..5f4e301 100644 (file)
@@ -248,7 +248,7 @@ static void drm_vc4_test_vc4_lbm_size(struct kunit *test)
        ret = drm_atomic_check_only(state);
        KUNIT_ASSERT_EQ(test, ret, 0);
 
-       KUNIT_EXPECT_EQ(test, vc4_plane_state->lbm.size, params->expected_lbm_size);
+       KUNIT_EXPECT_EQ(test, vc4_plane_state->lbm_size, params->expected_lbm_size);
 
        for (i = 0; i < 2; i++) {
                KUNIT_EXPECT_EQ(test,
index 4d9d446..deeeeac 100644 (file)
@@ -417,6 +417,8 @@ struct vc4_plane_state {
        u32 dlist_size; /* Number of dwords allocated for the display list */
        u32 dlist_count; /* Number of used dwords in the display list. */
 
+       u32 lbm_size; /* LBM requirements for this plane */
+
        /* Offset in the dlist to various words, for pageflip or
         * cursor updates.
         */
@@ -442,9 +444,6 @@ struct vc4_plane_state {
        bool is_unity;
        bool is_yuv;
 
-       /* Our allocation in LBM for temporary storage during scaling. */
-       struct drm_mm_node lbm;
-
        /* Our allocation in UPM for prefetching. */
        struct drm_mm_node upm[DRM_FORMAT_MAX_PLANES];
 
@@ -635,6 +634,9 @@ struct vc4_crtc {
         * access to that value.
         */
        unsigned int current_hvs_channel;
+
+       /* @lbm: Our allocation in LBM for temporary storage during scaling. */
+       struct drm_mm_node lbm;
 };
 
 #define to_vc4_crtc(_crtc)                                     \
index e88908b..b443a32 100644 (file)
@@ -1103,6 +1103,7 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
        struct drm_plane *plane;
        const struct drm_plane_state *plane_state;
        u32 dlist_count = 0;
+       u32 lbm_count = 0;
 
        /* The pixelvalve can only feed one encoder (and encoders are
         * 1:1 with connectors.)
@@ -1111,6 +1112,8 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
                return -EINVAL;
 
        drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
+               const struct vc4_plane_state *vc4_plane_state =
+                                               to_vc4_plane_state(plane_state);
                u32 plane_dlist_count = vc4_plane_dlist_size(plane_state);
 
                drm_dbg_driver(dev, "[CRTC:%d:%s] Found [PLANE:%d:%s] with DLIST size: %u\n",
@@ -1119,6 +1122,7 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
                               plane_dlist_count);
 
                dlist_count += plane_dlist_count;
+               lbm_count += vc4_plane_state->lbm_size;
        }
 
        dlist_count++; /* Account for SCALER_CTL0_END. */
@@ -1132,6 +1136,8 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
 
        vc4_state->mm = alloc;
 
+       /* FIXME: Check total lbm allocation here */
+
        return vc4_hvs_gamma_check(crtc, state);
 }
 
@@ -1246,7 +1252,10 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
        bool debug_dump_regs = false;
        bool enable_bg_fill = false;
        u32 __iomem *dlist_start, *dlist_next;
+       unsigned long irqflags;
        unsigned int zpos = 0;
+       u32 lbm_offset = 0;
+       u32 lbm_size = 0;
        bool found = false;
        int idx;
 
@@ -1265,6 +1274,35 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
                vc4_hvs_dump_state(hvs);
        }
 
+       drm_atomic_crtc_for_each_plane(plane, crtc) {
+               vc4_plane_state = to_vc4_plane_state(plane->state);
+               lbm_size += vc4_plane_state->lbm_size;
+       }
+
+       if (drm_mm_node_allocated(&vc4_crtc->lbm)) {
+               spin_lock_irqsave(&vc4_crtc->irq_lock, irqflags);
+               drm_mm_remove_node(&vc4_crtc->lbm);
+               spin_unlock_irqrestore(&vc4_crtc->irq_lock, irqflags);
+       }
+
+       if (lbm_size) {
+               int ret;
+
+               spin_lock_irqsave(&vc4_crtc->irq_lock, irqflags);
+               ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
+                                                &vc4_crtc->lbm,
+                                                lbm_size, 1,
+                                                0, 0);
+               spin_unlock_irqrestore(&vc4_crtc->irq_lock, irqflags);
+
+               if (ret) {
+                       pr_err("Failed to allocate LBM ret %d\n", ret);
+                       return;
+               }
+       }
+
+       lbm_offset = vc4_crtc->lbm.start;
+
        dlist_start = vc4->hvs->dlist + vc4_state->mm->mm_node.start;
        dlist_next = dlist_start;
 
@@ -1276,6 +1314,8 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
                        if (plane->state->normalized_zpos != zpos)
                                continue;
 
+                       vc4_plane_state = to_vc4_plane_state(plane->state);
+
                        /* Is this the first active plane? */
                        if (dlist_next == dlist_start) {
                                /* We need to enable background fill when a plane
@@ -1286,10 +1326,15 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
                                 * already needs it or all planes on top blend from
                                 * the first or a lower plane.
                                 */
-                               vc4_plane_state = to_vc4_plane_state(plane->state);
                                enable_bg_fill = vc4_plane_state->needs_bg_fill;
                        }
 
+                       if (vc4_plane_state->lbm_size) {
+                               vc4_plane_state->dlist[vc4_plane_state->lbm_offset] =
+                                                               lbm_offset;
+                               lbm_offset += vc4_plane_state->lbm_size;
+                       }
+
                        dlist_next += vc4_plane_write_dlist(plane, dlist_next);
 
                        found = true;
index 3059b60..15d1dd4 100644 (file)
@@ -288,7 +288,6 @@ struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
        if (!vc4_state)
                return NULL;
 
-       memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
        memset(&vc4_state->upm, 0, sizeof(vc4_state->upm));
 
        for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++)
@@ -320,14 +319,6 @@ void vc4_plane_destroy_state(struct drm_plane *plane,
        struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
        unsigned int i;
 
-       if (drm_mm_node_allocated(&vc4_state->lbm)) {
-               unsigned long irqflags;
-
-               spin_lock_irqsave(&hvs->mm_lock, irqflags);
-               drm_mm_remove_node(&vc4_state->lbm);
-               spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
-       }
-
        for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
                unsigned long irqflags;
 
@@ -903,12 +894,13 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
        struct vc4_dev *vc4 = to_vc4_dev(drm);
        struct drm_plane *plane = state->plane;
        struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
-       unsigned long irqflags;
        u32 lbm_size;
 
        lbm_size = vc4_lbm_size(state);
-       if (!lbm_size)
+       if (!lbm_size) {
+               vc4_state->lbm_size = 0;
                return 0;
+       }
 
        /*
         * NOTE: BCM2712 doesn't need to be aligned, since the size
@@ -925,28 +917,10 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
        if (WARN_ON(!vc4_state->lbm_offset))
                return -EINVAL;
 
-       /* Allocate the LBM memory that the HVS will use for temporary
-        * storage due to our scaling/format conversion.
+       /* FIXME: Add loop here that ensures that the total LBM assigned in this
+        *  state is less than the total lbm size
         */
-       if (!drm_mm_node_allocated(&vc4_state->lbm)) {
-               int ret;
-
-               spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
-               ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
-                                                &vc4_state->lbm,
-                                                lbm_size, 1,
-                                                0, 0);
-               spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
-
-               if (ret) {
-                       drm_err(drm, "Failed to allocate LBM entry: %d\n", ret);
-                       return ret;
-               }
-       } else {
-               WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
-       }
-
-       vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
+       vc4_state->lbm_size = lbm_size;
 
        return 0;
 }