drm/i915/gt: Ignore TLB invalidations on idle engines
authorChris Wilson <chris.p.wilson@intel.com>
Wed, 27 Jul 2022 12:29:51 +0000 (14:29 +0200)
committerAndi Shyti <andi.shyti@linux.intel.com>
Thu, 28 Jul 2022 11:47:01 +0000 (13:47 +0200)
Check if the device is powered down prior to any engine activity,
as, on such cases, all the TLBs were already invalidated, so an
explicit TLB invalidation is not needed, thus reducing the
performance regression impact due to it.

This becomes more significant with GuC, as it can only do so when
the connection to the GuC is awake.

Cc: stable@vger.kernel.org
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Cc: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/278a57a672edac75683f0818b292e95da583a5fe.1658924372.git.mchehab@kernel.org
drivers/gpu/drm/i915/gem/i915_gem_pages.c
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/gt/intel_gt_pm.h

index 97c820eee115adc91641bce392f1a052dce9eba7..6835279943df849d4be8e0086f982b3d2a62728c 100644 (file)
@@ -6,14 +6,15 @@
 
 #include <drm/drm_cache.h>
 
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+
 #include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
 #include "i915_gem_lmem.h"
 #include "i915_gem_mman.h"
 
-#include "gt/intel_gt.h"
-
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
                                 struct sg_table *pages,
                                 unsigned int sg_page_sizes)
@@ -217,10 +218,11 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 
        if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
                struct drm_i915_private *i915 = to_i915(obj->base.dev);
+               struct intel_gt *gt = to_gt(i915);
                intel_wakeref_t wakeref;
 
-               with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
-                       intel_gt_invalidate_tlbs(to_gt(i915));
+               with_intel_gt_pm_if_awake(gt, wakeref)
+                       intel_gt_invalidate_tlbs(gt);
        }
 
        return pages;
index 68c2b0d8f18761d4866aa665316b71f631bf863f..c4d43da84d8edd4cf6a535381718c1c3ae663997 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "i915_drv.h"
 #include "intel_context.h"
+#include "intel_engine_pm.h"
 #include "intel_engine_regs.h"
 #include "intel_ggtt_gmch.h"
 #include "intel_gt.h"
@@ -924,6 +925,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
        struct drm_i915_private *i915 = gt->i915;
        struct intel_uncore *uncore = gt->uncore;
        struct intel_engine_cs *engine;
+       intel_engine_mask_t awake, tmp;
        enum intel_engine_id id;
        const i915_reg_t *regs;
        unsigned int num = 0;
@@ -947,26 +949,31 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 
        GEM_TRACE("\n");
 
-       assert_rpm_wakelock_held(&i915->runtime_pm);
-
        mutex_lock(&gt->tlb_invalidate_lock);
        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
        spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
 
+       awake = 0;
        for_each_engine(engine, gt, id) {
                struct reg_and_bit rb;
 
+               if (!intel_engine_pm_is_awake(engine))
+                       continue;
+
                rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
                if (!i915_mmio_reg_offset(rb.reg))
                        continue;
 
                intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+               awake |= engine->mask;
        }
 
        spin_unlock_irq(&uncore->lock);
 
-       for_each_engine(engine, gt, id) {
+       for_each_engine_masked(engine, gt, awake, tmp) {
+               struct reg_and_bit rb;
+
                /*
                 * HW architecture suggest typical invalidation time at 40us,
                 * with pessimistic cases up to 100us and a recommendation to
@@ -974,12 +981,8 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
                 */
                const unsigned int timeout_us = 100;
                const unsigned int timeout_ms = 4;
-               struct reg_and_bit rb;
 
                rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-               if (!i915_mmio_reg_offset(rb.reg))
-                       continue;
-
                if (__intel_wait_for_register_fw(uncore,
                                                 rb.reg, rb.bit, 0,
                                                 timeout_us, timeout_ms,
index bc898df7a48ccfe930f6e32fd5a1a59d736fbd17..a334787a4939f76ef0e5d6e25ea259d56dde24f0 100644 (file)
@@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
        for (tmp = 1, intel_gt_pm_get(gt); tmp; \
             intel_gt_pm_put(gt), tmp = 0)
 
+#define with_intel_gt_pm_if_awake(gt, wf) \
+       for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+
 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
 {
        return intel_wakeref_wait_for_idle(&gt->wakeref);