From 6e55d273708b2a9485eb7c035efd2757fdae1869 Mon Sep 17 00:00:00 2001
From: Alexey Sheplyakov <asheplyakov@basealt.ru>
Date: Sat, 15 Jan 2022 20:06:58 +0400
Subject: [PATCH] drm/panfrost: initial dual core group GPUs support

On a dual core group GPUs (such as T628) fragment shading can be
performed over all cores (because a fragment shader job doesn't
need coherency between threads), however vertex shading requires
to be run on the same core group as the tiler (which always lives
in core group 0).

As a first step to support T628 power on only the first core group
(so no jobs are scheduled on the second one). This makes T628 look
like every other Midgard GPU (and throws away up to half the cores).

With this patch panfrost is able to drive T628 (r1p0) GPU on some
armv8 SoCs (in particular BE-M1000). Without the patch rendering
is horribly broken (desktop is completely unusable) and eventually
the GPU locks up (it takes from a few seconds to a couple of
minutes).

Using the second core group requires support in Mesa (and an UABI
change): the userspace should

1) set PANFROST_JD_DOESNT_NEED_COHERENCY_ON_GPU flag to opt-in
   to allowing the job to run across all cores.
2) set PANFROST_RUN_ON_SECOND_CORE_GROUP flag to allow compute
   jobs to be run on the second core group (at the moment Mesa
   does not advertise compute support on anything older than
   Mali T760)

But there's little point adding such flags until someone (myself)
steps up to do the Mesa work.

Signed-off-by: Alexey Sheplyakov <asheplyakov@basealt.ru>
Signed-off-by: Vadim V. Vlasov <vadim.vlasov@elpitech.ru>
Tested-by: Alexey Sheplyakov <asheplyakov@basealt.ru>
Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220115160658.582646-1-asheplyakov@basealt.ru
---
 drivers/gpu/drm/panfrost/panfrost_gpu.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index f8355de..15cec83 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -320,19 +320,36 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 {
 	int ret;
 	u32 val;
+	u64 core_mask = U64_MAX;
 
 	panfrost_gpu_init_quirks(pfdev);
 
-	/* Just turn on everything for now */
-	gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present);
+	if (pfdev->features.l2_present != 1) {
+		/*
+		 * Only support one core group now.
+		 * ~(l2_present - 1) unsets all bits in l2_present except
+		 * the bottom bit. (l2_present - 2) has all the bits in
+		 * the first core group set. AND them together to generate
+		 * a mask of cores in the first core group.
+		 */
+		core_mask = ~(pfdev->features.l2_present - 1) &
+			     (pfdev->features.l2_present - 2);
+		dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n",
+			      hweight64(core_mask),
+			      hweight64(pfdev->features.shader_present));
+	}
+	gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present & core_mask);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO,
-		val, val == pfdev->features.l2_present, 100, 20000);
+		val, val == (pfdev->features.l2_present & core_mask),
+		100, 20000);
 	if (ret)
 		dev_err(pfdev->dev, "error powering up gpu L2");
 
-	gpu_write(pfdev, SHADER_PWRON_LO, pfdev->features.shader_present);
+	gpu_write(pfdev, SHADER_PWRON_LO,
+		  pfdev->features.shader_present & core_mask);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO,
-		val, val == pfdev->features.shader_present, 100, 20000);
+		val, val == (pfdev->features.shader_present & core_mask),
+		100, 20000);
 	if (ret)
 		dev_err(pfdev->dev, "error powering up gpu shader");
 
-- 
2.7.4