drm/i915/gt: Move TLB invalidation to its own file
authorChris Wilson <chris.p.wilson@linux.intel.com>
Tue, 1 Aug 2023 14:19:52 +0000 (16:19 +0200)
committerAndi Shyti <andi.shyti@linux.intel.com>
Wed, 2 Aug 2023 13:40:11 +0000 (15:40 +0200)
Prepare for supporting more TLB invalidation scenarios by moving
the current MMIO invalidation to its own file.

Signed-off-by: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230801141955.383305-2-andi.shyti@linux.intel.com
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/gem/i915_gem_pages.c
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/gt/intel_gt.h
drivers/gpu/drm/i915/gt/intel_tlb.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_tlb.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/selftest_tlb.c
drivers/gpu/drm/i915/i915_vma.c

index 7a3d5da..ccf5b1d 100644 (file)
@@ -131,6 +131,7 @@ gt-y += \
        gt/intel_sseu.o \
        gt/intel_sseu_debugfs.o \
        gt/intel_timeline.o \
+       gt/intel_tlb.o \
        gt/intel_wopcm.o \
        gt/intel_workarounds.o \
        gt/shmem_utils.o \
index 89fc8ea..83852f7 100644 (file)
@@ -7,7 +7,7 @@
 #include <drm/drm_cache.h>
 
 #include "gt/intel_gt.h"
-#include "gt/intel_gt_pm.h"
+#include "gt/intel_tlb.h"
 
 #include "i915_drv.h"
 #include "i915_gem_object.h"
@@ -198,7 +198,7 @@ static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
        if (!obj->mm.tlb)
                return;
 
-       intel_gt_invalidate_tlb(gt, obj->mm.tlb);
+       intel_gt_invalidate_tlb_full(gt, obj->mm.tlb);
        obj->mm.tlb = 0;
 }
 
index 1cf325f..15a865a 100644 (file)
@@ -33,6 +33,7 @@
 #include "intel_rps.h"
 #include "intel_sa_media.h"
 #include "intel_gt_sysfs.h"
+#include "intel_tlb.h"
 #include "intel_uncore.h"
 #include "shmem_utils.h"
 
@@ -50,8 +51,7 @@ void intel_gt_common_init_early(struct intel_gt *gt)
        intel_gt_init_reset(gt);
        intel_gt_init_requests(gt);
        intel_gt_init_timelines(gt);
-       mutex_init(&gt->tlb.invalidate_lock);
-       seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
+       intel_gt_init_tlb(gt);
        intel_gt_pm_init_early(gt);
 
        intel_wopcm_init_early(&gt->wopcm);
@@ -846,7 +846,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
                intel_gt_fini_requests(gt);
                intel_gt_fini_reset(gt);
                intel_gt_fini_timelines(gt);
-               mutex_destroy(&gt->tlb.invalidate_lock);
+               intel_gt_fini_tlb(gt);
                intel_engines_free(gt);
        }
 }
@@ -1003,137 +1003,3 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 
        intel_sseu_dump(&info->sseu, p);
 }
-
-/*
- * HW architecture suggest typical invalidation time at 40us,
- * with pessimistic cases up to 100us and a recommendation to
- * cap at 1ms. We go a bit higher just in case.
- */
-#define TLB_INVAL_TIMEOUT_US 100
-#define TLB_INVAL_TIMEOUT_MS 4
-
-/*
- * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
- * but are now considered MCR registers.  Since they exist within a GAM range,
- * the primary instance of the register rolls up the status from each unit.
- */
-static int wait_for_invalidate(struct intel_engine_cs *engine)
-{
-       if (engine->tlb_inv.mcr)
-               return intel_gt_mcr_wait_for_reg(engine->gt,
-                                                engine->tlb_inv.reg.mcr_reg,
-                                                engine->tlb_inv.done,
-                                                0,
-                                                TLB_INVAL_TIMEOUT_US,
-                                                TLB_INVAL_TIMEOUT_MS);
-       else
-               return __intel_wait_for_register_fw(engine->gt->uncore,
-                                                   engine->tlb_inv.reg.reg,
-                                                   engine->tlb_inv.done,
-                                                   0,
-                                                   TLB_INVAL_TIMEOUT_US,
-                                                   TLB_INVAL_TIMEOUT_MS,
-                                                   NULL);
-}
-
-static void mmio_invalidate_full(struct intel_gt *gt)
-{
-       struct drm_i915_private *i915 = gt->i915;
-       struct intel_uncore *uncore = gt->uncore;
-       struct intel_engine_cs *engine;
-       intel_engine_mask_t awake, tmp;
-       enum intel_engine_id id;
-       unsigned long flags;
-
-       if (GRAPHICS_VER(i915) < 8)
-               return;
-
-       intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
-       intel_gt_mcr_lock(gt, &flags);
-       spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
-
-       awake = 0;
-       for_each_engine(engine, gt, id) {
-               if (!intel_engine_pm_is_awake(engine))
-                       continue;
-
-               if (engine->tlb_inv.mcr)
-                       intel_gt_mcr_multicast_write_fw(gt,
-                                                       engine->tlb_inv.reg.mcr_reg,
-                                                       engine->tlb_inv.request);
-               else
-                       intel_uncore_write_fw(uncore,
-                                             engine->tlb_inv.reg.reg,
-                                             engine->tlb_inv.request);
-
-               awake |= engine->mask;
-       }
-
-       GT_TRACE(gt, "invalidated engines %08x\n", awake);
-
-       /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
-       if (awake &&
-           (IS_TIGERLAKE(i915) ||
-            IS_DG1(i915) ||
-            IS_ROCKETLAKE(i915) ||
-            IS_ALDERLAKE_S(i915) ||
-            IS_ALDERLAKE_P(i915)))
-               intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
-
-       spin_unlock(&uncore->lock);
-       intel_gt_mcr_unlock(gt, flags);
-
-       for_each_engine_masked(engine, gt, awake, tmp) {
-               if (wait_for_invalidate(engine))
-                       gt_err_ratelimited(gt,
-                                          "%s TLB invalidation did not complete in %ums!\n",
-                                          engine->name, TLB_INVAL_TIMEOUT_MS);
-       }
-
-       /*
-        * Use delayed put since a) we mostly expect a flurry of TLB
-        * invalidations so it is good to avoid paying the forcewake cost and
-        * b) it works around a bug in Icelake which cannot cope with too rapid
-        * transitions.
-        */
-       intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
-}
-
-static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
-{
-       u32 cur = intel_gt_tlb_seqno(gt);
-
-       /* Only skip if a *full* TLB invalidate barrier has passed */
-       return (s32)(cur - ALIGN(seqno, 2)) > 0;
-}
-
-void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
-{
-       intel_wakeref_t wakeref;
-
-       if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
-               return;
-
-       if (intel_gt_is_wedged(gt))
-               return;
-
-       if (tlb_seqno_passed(gt, seqno))
-               return;
-
-       with_intel_gt_pm_if_awake(gt, wakeref) {
-               mutex_lock(&gt->tlb.invalidate_lock);
-               if (tlb_seqno_passed(gt, seqno))
-                       goto unlock;
-
-               mmio_invalidate_full(gt);
-
-               write_seqcount_invalidate(&gt->tlb.seqno);
-unlock:
-               mutex_unlock(&gt->tlb.invalidate_lock);
-       }
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_tlb.c"
-#endif
index d2f4fbd..6549e28 100644 (file)
@@ -107,16 +107,4 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 
 void intel_gt_watchdog_work(struct work_struct *work);
 
-static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
-{
-       return seqprop_sequence(&gt->tlb.seqno);
-}
-
-static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
-{
-       return intel_gt_tlb_seqno(gt) | 1;
-}
-
-void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
-
 #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
new file mode 100644 (file)
index 0000000..139608c
--- /dev/null
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_perf_oa_regs.h"
+#include "intel_engine_pm.h"
+#include "intel_gt.h"
+#include "intel_gt_mcr.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_print.h"
+#include "intel_gt_regs.h"
+#include "intel_tlb.h"
+
+/*
+ * HW architecture suggest typical invalidation time at 40us,
+ * with pessimistic cases up to 100us and a recommendation to
+ * cap at 1ms. We go a bit higher just in case.
+ */
+#define TLB_INVAL_TIMEOUT_US 100
+#define TLB_INVAL_TIMEOUT_MS 4
+
+/*
+ * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
+ * but are now considered MCR registers.  Since they exist within a GAM range,
+ * the primary instance of the register rolls up the status from each unit.
+ */
+static int wait_for_invalidate(struct intel_engine_cs *engine)
+{
+       if (engine->tlb_inv.mcr)
+               return intel_gt_mcr_wait_for_reg(engine->gt,
+                                                engine->tlb_inv.reg.mcr_reg,
+                                                engine->tlb_inv.done,
+                                                0,
+                                                TLB_INVAL_TIMEOUT_US,
+                                                TLB_INVAL_TIMEOUT_MS);
+       else
+               return __intel_wait_for_register_fw(engine->gt->uncore,
+                                                   engine->tlb_inv.reg.reg,
+                                                   engine->tlb_inv.done,
+                                                   0,
+                                                   TLB_INVAL_TIMEOUT_US,
+                                                   TLB_INVAL_TIMEOUT_MS,
+                                                   NULL);
+}
+
+static void mmio_invalidate_full(struct intel_gt *gt)
+{
+       struct drm_i915_private *i915 = gt->i915;
+       struct intel_uncore *uncore = gt->uncore;
+       struct intel_engine_cs *engine;
+       intel_engine_mask_t awake, tmp;
+       enum intel_engine_id id;
+       unsigned long flags;
+
+       if (GRAPHICS_VER(i915) < 8)
+               return;
+
+       intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+       intel_gt_mcr_lock(gt, &flags);
+       spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
+
+       awake = 0;
+       for_each_engine(engine, gt, id) {
+               if (!intel_engine_pm_is_awake(engine))
+                       continue;
+
+               if (engine->tlb_inv.mcr)
+                       intel_gt_mcr_multicast_write_fw(gt,
+                                                       engine->tlb_inv.reg.mcr_reg,
+                                                       engine->tlb_inv.request);
+               else
+                       intel_uncore_write_fw(uncore,
+                                             engine->tlb_inv.reg.reg,
+                                             engine->tlb_inv.request);
+
+               awake |= engine->mask;
+       }
+
+       GT_TRACE(gt, "invalidated engines %08x\n", awake);
+
+       /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
+       if (awake &&
+           (IS_TIGERLAKE(i915) ||
+            IS_DG1(i915) ||
+            IS_ROCKETLAKE(i915) ||
+            IS_ALDERLAKE_S(i915) ||
+            IS_ALDERLAKE_P(i915)))
+               intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
+
+       spin_unlock(&uncore->lock);
+       intel_gt_mcr_unlock(gt, flags);
+
+       for_each_engine_masked(engine, gt, awake, tmp) {
+               if (wait_for_invalidate(engine))
+                       gt_err_ratelimited(gt,
+                                          "%s TLB invalidation did not complete in %ums!\n",
+                                          engine->name, TLB_INVAL_TIMEOUT_MS);
+       }
+
+       /*
+        * Use delayed put since a) we mostly expect a flurry of TLB
+        * invalidations so it is good to avoid paying the forcewake cost and
+        * b) it works around a bug in Icelake which cannot cope with too rapid
+        * transitions.
+        */
+       intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+}
+
+static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
+{
+       u32 cur = intel_gt_tlb_seqno(gt);
+
+       /* Only skip if a *full* TLB invalidate barrier has passed */
+       return (s32)(cur - ALIGN(seqno, 2)) > 0;
+}
+
+void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
+{
+       intel_wakeref_t wakeref;
+
+       if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+               return;
+
+       if (intel_gt_is_wedged(gt))
+               return;
+
+       if (tlb_seqno_passed(gt, seqno))
+               return;
+
+       with_intel_gt_pm_if_awake(gt, wakeref) {
+               mutex_lock(&gt->tlb.invalidate_lock);
+               if (tlb_seqno_passed(gt, seqno))
+                       goto unlock;
+
+               mmio_invalidate_full(gt);
+
+               write_seqcount_invalidate(&gt->tlb.seqno);
+unlock:
+               mutex_unlock(&gt->tlb.invalidate_lock);
+       }
+}
+
+void intel_gt_init_tlb(struct intel_gt *gt)
+{
+       mutex_init(&gt->tlb.invalidate_lock);
+       seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
+}
+
+void intel_gt_fini_tlb(struct intel_gt *gt)
+{
+       mutex_destroy(&gt->tlb.invalidate_lock);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_tlb.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h b/drivers/gpu/drm/i915/gt/intel_tlb.h
new file mode 100644 (file)
index 0000000..337327a
--- /dev/null
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef INTEL_TLB_H
+#define INTEL_TLB_H
+
+#include <linux/seqlock.h>
+#include <linux/types.h>
+
+#include "intel_gt_types.h"
+
+void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno);
+
+void intel_gt_init_tlb(struct intel_gt *gt);
+void intel_gt_fini_tlb(struct intel_gt *gt);
+
+static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
+{
+       return seqprop_sequence(&gt->tlb.seqno);
+}
+
+static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
+{
+       return intel_gt_tlb_seqno(gt) | 1;
+}
+
+#endif /* INTEL_TLB_H */
index 3bd6b54..7e41f69 100644 (file)
@@ -6,6 +6,7 @@
 #include "i915_selftest.h"
 
 #include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 
 #include "gen8_engine_cs.h"
@@ -354,7 +355,7 @@ out_a:
 
 static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
 {
-       intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
+       intel_gt_invalidate_tlb_full(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
 }
 
 static int invalidate_full(void *arg)
index a814775..b9b8dae 100644 (file)
@@ -34,6 +34,7 @@
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_requests.h"
+#include "gt/intel_tlb.h"
 
 #include "i915_drv.h"
 #include "i915_gem_evict.h"