drm/i915: Flush TLBs before releasing backing store

[platform/kernel/linux-starfive.git] / drivers / gpu / drm / i915 / gt / intel_gt.c
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c

index 62d40c9..35d0fcd 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -3,7 +3,9 @@
   * Copyright © 2019 Intel Corporation
   */
  
-#include "debugfs_gt.h"
+#include <drm/intel-gtt.h>
+
+#include "intel_gt_debugfs.h"
  
  #include "gem/i915_gem_lmem.h"
  #include "i915_drv.h"
@@ -15,20 +17,20 @@
  #include "intel_gt_requests.h"
  #include "intel_migrate.h"
  #include "intel_mocs.h"
+#include "intel_pm.h"
  #include "intel_rc6.h"
  #include "intel_renderstate.h"
  #include "intel_rps.h"
  #include "intel_uncore.h"
-#include "intel_pm.h"
  #include "shmem_utils.h"
+#include "pxp/intel_pxp.h"
  
-void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
  {
-       gt->i915 = i915;
-       gt->uncore = &i915->uncore;
-
         spin_lock_init(&gt->irq_lock);
  
+       mutex_init(&gt->tlb_invalidate_lock);
+
         INIT_LIST_HEAD(&gt->closed_vma);
         spin_lock_init(&gt->closed_lock);
  
@@ -45,6 +47,12 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
         intel_rps_init_early(&gt->rps);
  }
  
+void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+{
+       gt->i915 = i915;
+       gt->uncore = &i915->uncore;
+}
+
  int intel_gt_probe_lmem(struct intel_gt *gt)
  {
         struct drm_i915_private *i915 = gt->i915;
@@ -434,7 +442,7 @@ void intel_gt_driver_register(struct intel_gt *gt)
  {
         intel_rps_driver_register(&gt->rps);
  
-       debugfs_gt_register(gt);
+       intel_gt_debugfs_register(gt);
  }
  
  static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
@@ -481,7 +489,7 @@ static void intel_gt_fini_scratch(struct intel_gt *gt)
  static struct i915_address_space *kernel_vm(struct intel_gt *gt)
  {
         if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
-               return &i915_ppgtt_create(gt)->vm;
+               return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
         else
                 return i915_vm_get(&gt->ggtt->vm);
  }
@@ -660,6 +668,8 @@ int intel_gt_init(struct intel_gt *gt)
         if (err)
                 return err;
  
+       intel_gt_init_workarounds(gt);
+
         /*
          * This is just a security blanket to placate dragons.
          * On some systems, we very sporadically observe that the first TLBs
@@ -682,6 +692,8 @@ int intel_gt_init(struct intel_gt *gt)
                 goto err_pm;
         }
  
+       intel_set_mocs_index(gt);
+
         err = intel_engines_init(gt);
         if (err)
                 goto err_engines;
@@ -710,6 +722,8 @@ int intel_gt_init(struct intel_gt *gt)
  
         intel_migrate_init(&gt->migrate, gt);
  
+       intel_pxp_init(&gt->pxp);
+
         goto out_fw;
  err_gt:
         __intel_gt_disable(gt);
@@ -737,6 +751,8 @@ void intel_gt_driver_remove(struct intel_gt *gt)
         intel_uc_driver_remove(&gt->uc);
  
         intel_engines_release(gt);
+
+       intel_gt_flush_buffer_pool(gt);
  }
  
  void intel_gt_driver_unregister(struct intel_gt *gt)
@@ -745,12 +761,14 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
  
         intel_rps_driver_unregister(&gt->rps);
  
+       intel_pxp_fini(&gt->pxp);
+
         /*
          * Upon unregistering the device to prevent any new users, cancel
          * all in-flight requests so that we can quickly unbind the active
          * resources.
          */
-       intel_gt_set_wedged(gt);
+       intel_gt_set_wedged_on_fini(gt);
  
         /* Scrub all HW state upon release */
         with_intel_runtime_pm(gt->uncore->rpm, wakeref)
@@ -765,6 +783,7 @@ void intel_gt_driver_release(struct intel_gt *gt)
         if (vm) /* FIXME being called twice on error paths :( */
                 i915_vm_put(vm);
  
+       intel_wa_list_free(&gt->wa_list);
         intel_gt_pm_fini(gt);
         intel_gt_fini_scratch(gt);
         intel_gt_fini_buffer_pool(gt);
@@ -895,3 +914,109 @@ void intel_gt_info_print(const struct intel_gt_info *info,
  
         intel_sseu_dump(&info->sseu, p);
  }
+
+struct reg_and_bit {
+       i915_reg_t reg;
+       u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
+               const i915_reg_t *regs, const unsigned int num)
+{
+       const unsigned int class = engine->class;
+       struct reg_and_bit rb = { };
+
+       if (drm_WARN_ON_ONCE(&engine->i915->drm,
+                            class >= num || !regs[class].reg))
+               return rb;
+
+       rb.reg = regs[class];
+       if (gen8 && class == VIDEO_DECODE_CLASS)
+               rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+       else
+               rb.bit = engine->instance;
+
+       rb.bit = BIT(rb.bit);
+
+       return rb;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+       static const i915_reg_t gen8_regs[] = {
+               [RENDER_CLASS]                  = GEN8_RTCR,
+               [VIDEO_DECODE_CLASS]            = GEN8_M1TCR, /* , GEN8_M2TCR */
+               [VIDEO_ENHANCEMENT_CLASS]       = GEN8_VTCR,
+               [COPY_ENGINE_CLASS]             = GEN8_BTCR,
+       };
+       static const i915_reg_t gen12_regs[] = {
+               [RENDER_CLASS]                  = GEN12_GFX_TLB_INV_CR,
+               [VIDEO_DECODE_CLASS]            = GEN12_VD_TLB_INV_CR,
+               [VIDEO_ENHANCEMENT_CLASS]       = GEN12_VE_TLB_INV_CR,
+               [COPY_ENGINE_CLASS]             = GEN12_BLT_TLB_INV_CR,
+       };
+       struct drm_i915_private *i915 = gt->i915;
+       struct intel_uncore *uncore = gt->uncore;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       const i915_reg_t *regs;
+       unsigned int num = 0;
+
+       if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+               return;
+
+       if (GRAPHICS_VER(i915) == 12) {
+               regs = gen12_regs;
+               num = ARRAY_SIZE(gen12_regs);
+       } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+               regs = gen8_regs;
+               num = ARRAY_SIZE(gen8_regs);
+       } else if (GRAPHICS_VER(i915) < 8) {
+               return;
+       }
+
+       if (drm_WARN_ONCE(&i915->drm, !num,
+                         "Platform does not implement TLB invalidation!"))
+               return;
+
+       GEM_TRACE("\n");
+
+       assert_rpm_wakelock_held(&i915->runtime_pm);
+
+       mutex_lock(&gt->tlb_invalidate_lock);
+       intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+       for_each_engine(engine, gt, id) {
+               /*
+                * HW architecture suggest typical invalidation time at 40us,
+                * with pessimistic cases up to 100us and a recommendation to
+                * cap at 1ms. We go a bit higher just in case.
+                */
+               const unsigned int timeout_us = 100;
+               const unsigned int timeout_ms = 4;
+               struct reg_and_bit rb;
+
+               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+               if (!i915_mmio_reg_offset(rb.reg))
+                       continue;
+
+               intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+               if (__intel_wait_for_register_fw(uncore,
+                                                rb.reg, rb.bit, 0,
+                                                timeout_us, timeout_ms,
+                                                NULL))
+                       drm_err_ratelimited(&gt->i915->drm,
+                                           "%s TLB invalidation did not complete in %ums!\n",
+                                           engine->name, timeout_ms);
+       }
+
+       /*
+        * Use delayed put since a) we mostly expect a flurry of TLB
+        * invalidations so it is good to avoid paying the forcewake cost and
+        * b) it works around a bug in Icelake which cannot cope with too rapid
+        * transitions.
+        */
+       intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+       mutex_unlock(&gt->tlb_invalidate_lock);
+}