Merge drm/drm-next into drm-intel-gt-next
authorRodrigo Vivi <rodrigo.vivi@intel.com>
Fri, 30 Dec 2022 09:09:09 +0000 (04:09 -0500)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Fri, 30 Dec 2022 09:18:36 +0000 (04:18 -0500)
Sync after v6.2-rc1 landed in drm-next.

We need to get some dependencies in place before we can merge
the fixes series from Gwan-gyeong and Chris.

References: https://lore.kernel.org/all/Y6x5JCDnh2rvh4lA@intel.com/
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
19 files changed:
1  2 
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/display/intel_fbdev.c
drivers/gpu/drm/i915/display/skl_universal_plane.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/gt/intel_gt_pm.c
drivers/gpu/drm/i915/gt/intel_gt_regs.h
drivers/gpu/drm/i915/gt/intel_gtt.c
drivers/gpu/drm/i915/gt/intel_ring_submission.c
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_pci.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_utils.h
drivers/gpu/drm/i915/intel_uncore.c

@@@ -188,9 -188,9 +188,9 @@@ i915-y += 
          i915_vma_resource.o
  
  # general-purpose microcontroller (GuC) support
 -i915-y += gt/uc/intel_uc.o \
 -        gt/uc/intel_uc_debugfs.o \
 -        gt/uc/intel_uc_fw.o \
 +i915-y += \
 +        gt/uc/intel_gsc_fw.o \
 +        gt/uc/intel_gsc_uc.o \
          gt/uc/intel_guc.o \
          gt/uc/intel_guc_ads.o \
          gt/uc/intel_guc_capture.o \
          gt/uc/intel_guc_submission.o \
          gt/uc/intel_huc.o \
          gt/uc/intel_huc_debugfs.o \
 -        gt/uc/intel_huc_fw.o
 +        gt/uc/intel_huc_fw.o \
 +        gt/uc/intel_uc.o \
 +        gt/uc/intel_uc_debugfs.o \
 +        gt/uc/intel_uc_fw.o
  
  # graphics system controller (GSC) support
  i915-y += gt/intel_gsc.o
@@@ -251,6 -248,7 +251,7 @@@ i915-y += 
        display/intel_global_state.o \
        display/intel_hdcp.o \
        display/intel_hotplug.o \
+       display/intel_hti.o \
        display/intel_lpe_audio.o \
        display/intel_modeset_verify.o \
        display/intel_modeset_setup.o \
@@@ -124,6 -124,8 +124,8 @@@ static const struct fb_ops intelfb_ops 
        .owner = THIS_MODULE,
        DRM_FB_HELPER_DEFAULT_OPS,
        .fb_set_par = intel_fbdev_set_par,
+       .fb_read = drm_fb_helper_cfb_read,
+       .fb_write = drm_fb_helper_cfb_write,
        .fb_fillrect = drm_fb_helper_cfb_fillrect,
        .fb_copyarea = drm_fb_helper_cfb_copyarea,
        .fb_imageblit = drm_fb_helper_cfb_imageblit,
@@@ -254,7 -256,7 +256,7 @@@ static int intelfb_create(struct drm_fb
                goto out_unlock;
        }
  
-       info = drm_fb_helper_alloc_fbi(helper);
+       info = drm_fb_helper_alloc_info(helper);
        if (IS_ERR(info)) {
                drm_err(&dev_priv->drm, "Failed to allocate fb_info (%pe)\n", info);
                ret = PTR_ERR(info);
  
                /* Our framebuffer is the entirety of fbdev's system memory */
                info->fix.smem_start =
 -                      (unsigned long)(ggtt->gmadr.start + vma->node.start);
 +                      (unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma));
                info->fix.smem_len = vma->size;
        }
  
@@@ -584,7 -586,7 +586,7 @@@ void intel_fbdev_unregister(struct drm_
        if (!current_is_async())
                intel_fbdev_sync(ifbdev);
  
-       drm_fb_helper_unregister_fbi(&ifbdev->helper);
+       drm_fb_helper_unregister_info(&ifbdev->helper);
  }
  
  void intel_fbdev_fini(struct drm_i915_private *dev_priv)
@@@ -627,7 -629,7 +629,7 @@@ void intel_fbdev_set_suspend(struct drm
        if (!ifbdev || !ifbdev->vma)
                goto set_suspend;
  
-       info = ifbdev->helper.fbdev;
+       info = ifbdev->helper.info;
  
        if (synchronous) {
                /* Flush any pending work to turn the console on, and then
@@@ -9,6 -9,8 +9,8 @@@
  #include <drm/drm_fourcc.h>
  
  #include "i915_drv.h"
+ #include "i915_irq.h"
+ #include "i915_reg.h"
  #include "intel_atomic_plane.h"
  #include "intel_de.h"
  #include "intel_display_types.h"
@@@ -246,6 -248,11 +248,11 @@@ bool icl_is_nv12_y_plane(struct drm_i91
                icl_nv12_y_plane_mask(dev_priv) & BIT(plane_id);
  }
  
+ u8 icl_hdr_plane_mask(void)
+ {
+       return BIT(PLANE_PRIMARY) | BIT(PLANE_SPRITE0) | BIT(PLANE_SPRITE1);
+ }
  bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id)
  {
        return DISPLAY_VER(dev_priv) >= 11 &&
@@@ -1841,7 -1848,7 +1848,7 @@@ static bool bo_has_valid_encryption(str
  {
        struct drm_i915_private *i915 = to_i915(obj->base.dev);
  
 -      return intel_pxp_key_check(&to_gt(i915)->pxp, obj, false) == 0;
 +      return intel_pxp_key_check(i915->pxp, obj, false) == 0;
  }
  
  static bool pxp_is_borked(struct drm_i915_gem_object *obj)
@@@ -30,6 -30,7 +30,7 @@@
  #include "i915_gem_context.h"
  #include "i915_gem_evict.h"
  #include "i915_gem_ioctls.h"
+ #include "i915_reg.h"
  #include "i915_trace.h"
  #include "i915_user_extensions.h"
  
@@@ -378,25 -379,22 +379,25 @@@ eb_vma_misplaced(const struct drm_i915_
                 const struct i915_vma *vma,
                 unsigned int flags)
  {
 -      if (vma->node.size < entry->pad_to_size)
 +      const u64 start = i915_vma_offset(vma);
 +      const u64 size = i915_vma_size(vma);
 +
 +      if (size < entry->pad_to_size)
                return true;
  
 -      if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
 +      if (entry->alignment && !IS_ALIGNED(start, entry->alignment))
                return true;
  
        if (flags & EXEC_OBJECT_PINNED &&
 -          vma->node.start != entry->offset)
 +          start != entry->offset)
                return true;
  
        if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
 -          vma->node.start < BATCH_OFFSET_BIAS)
 +          start < BATCH_OFFSET_BIAS)
                return true;
  
        if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
 -          (vma->node.start + vma->node.size + 4095) >> 32)
 +          (start + size + 4095) >> 32)
                return true;
  
        if (flags & __EXEC_OBJECT_NEEDS_MAP &&
@@@ -442,7 -440,7 +443,7 @@@ eb_pin_vma(struct i915_execbuffer *eb
        int err;
  
        if (vma->node.size)
 -              pin_flags = vma->node.start;
 +              pin_flags =  __i915_vma_offset(vma);
        else
                pin_flags = entry->offset & PIN_OFFSET_MASK;
  
@@@ -665,8 -663,8 +666,8 @@@ static int eb_reserve_vma(struct i915_e
        if (err)
                return err;
  
 -      if (entry->offset != vma->node.start) {
 -              entry->offset = vma->node.start | UPDATE;
 +      if (entry->offset != i915_vma_offset(vma)) {
 +              entry->offset = i915_vma_offset(vma) | UPDATE;
                eb->args->flags |= __EXEC_HAS_RELOC;
        }
  
@@@ -732,74 -730,37 +733,74 @@@ static int eb_reserve(struct i915_execb
        bool unpinned;
  
        /*
 -       * Attempt to pin all of the buffers into the GTT.
 -       * This is done in 2 phases:
 +       * We have one more buffers that we couldn't bind, which could be due to
 +       * various reasons. To resolve this we have 4 passes, with every next
 +       * level turning the screws tighter:
 +       *
 +       * 0. Unbind all objects that do not match the GTT constraints for the
 +       * execbuffer (fenceable, mappable, alignment etc). Bind all new
 +       * objects.  This avoids unnecessary unbinding of later objects in order
 +       * to make room for the earlier objects *unless* we need to defragment.
         *
 -       * 1. Unbind all objects that do not match the GTT constraints for
 -       *    the execbuffer (fenceable, mappable, alignment etc).
 -       * 2. Bind new objects.
 +       * 1. Reorder the buffers, where objects with the most restrictive
 +       * placement requirements go first (ignoring fixed location buffers for
 +       * now).  For example, objects needing the mappable aperture (the first
 +       * 256M of GTT), should go first vs objects that can be placed just
 +       * about anywhere. Repeat the previous pass.
         *
 -       * This avoid unnecessary unbinding of later objects in order to make
 -       * room for the earlier objects *unless* we need to defragment.
 +       * 2. Consider buffers that are pinned at a fixed location. Also try to
 +       * evict the entire VM this time, leaving only objects that we were
 +       * unable to lock. Try again to bind the buffers. (still using the new
 +       * buffer order).
         *
 -       * Defragmenting is skipped if all objects are pinned at a fixed location.
 +       * 3. We likely have object lock contention for one or more stubborn
 +       * objects in the VM, for which we need to evict to make forward
 +       * progress (perhaps we are fighting the shrinker?). When evicting the
 +       * VM this time around, anything that we can't lock we now track using
 +       * the busy_bo, using the full lock (after dropping the vm->mutex to
 +       * prevent deadlocks), instead of trylock. We then continue to evict the
 +       * VM, this time with the stubborn object locked, which we can now
 +       * hopefully unbind (if still bound in the VM). Repeat until the VM is
 +       * evicted. Finally we should be able bind everything.
         */
 -      for (pass = 0; pass <= 2; pass++) {
 +      for (pass = 0; pass <= 3; pass++) {
                int pin_flags = PIN_USER | PIN_VALIDATE;
  
                if (pass == 0)
                        pin_flags |= PIN_NONBLOCK;
  
                if (pass >= 1)
 -                      unpinned = eb_unbind(eb, pass == 2);
 +                      unpinned = eb_unbind(eb, pass >= 2);
  
                if (pass == 2) {
                        err = mutex_lock_interruptible(&eb->context->vm->mutex);
                        if (!err) {
 -                              err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
 +                              err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
                                mutex_unlock(&eb->context->vm->mutex);
                        }
                        if (err)
                                return err;
                }
  
 +              if (pass == 3) {
 +retry:
 +                      err = mutex_lock_interruptible(&eb->context->vm->mutex);
 +                      if (!err) {
 +                              struct drm_i915_gem_object *busy_bo = NULL;
 +
 +                              err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
 +                              mutex_unlock(&eb->context->vm->mutex);
 +                              if (err && busy_bo) {
 +                                      err = i915_gem_object_lock(busy_bo, &eb->ww);
 +                                      i915_gem_object_put(busy_bo);
 +                                      if (!err)
 +                                              goto retry;
 +                              }
 +                      }
 +                      if (err)
 +                              return err;
 +              }
 +
                list_for_each_entry(ev, &eb->unbound, bind_link) {
                        err = eb_reserve_vma(eb, ev, pin_flags);
                        if (err)
@@@ -908,7 -869,7 +909,7 @@@ static struct i915_vma *eb_lookup_vma(s
                 */
                if (i915_gem_context_uses_protected_content(eb->gem_context) &&
                    i915_gem_object_is_protected(obj)) {
 -                      err = intel_pxp_key_check(&vm->gt->pxp, obj, true);
 +                      err = intel_pxp_key_check(eb->i915->pxp, obj, true);
                        if (err) {
                                i915_gem_object_put(obj);
                                return ERR_PTR(err);
@@@ -1023,8 -984,8 +1024,8 @@@ static int eb_validate_vmas(struct i915
                        return err;
  
                if (!err) {
 -                      if (entry->offset != vma->node.start) {
 -                              entry->offset = vma->node.start | UPDATE;
 +                      if (entry->offset != i915_vma_offset(vma)) {
 +                              entry->offset = i915_vma_offset(vma) | UPDATE;
                                eb->args->flags |= __EXEC_HAS_RELOC;
                        }
                } else {
@@@ -1105,7 -1066,7 +1106,7 @@@ static inline u6
  relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
                  const struct i915_vma *target)
  {
 -      return gen8_canonical_addr((int)reloc->delta + target->node.start);
 +      return gen8_canonical_addr((int)reloc->delta + i915_vma_offset(target));
  }
  
  static void reloc_cache_init(struct reloc_cache *cache,
@@@ -1314,7 -1275,7 +1315,7 @@@ static void *reloc_iomap(struct i915_vm
                        if (err) /* no inactive aperture space, use cpu reloc */
                                return NULL;
                } else {
 -                      cache->node.start = vma->node.start;
 +                      cache->node.start = i915_ggtt_offset(vma);
                        cache->node.mm = (void *)vma;
                }
        }
@@@ -1477,7 -1438,7 +1478,7 @@@ eb_relocate_entry(struct i915_execbuffe
         * more work needs to be done.
         */
        if (!DBG_FORCE_RELOC &&
 -          gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
 +          gen8_canonical_addr(i915_vma_offset(target->vma)) == reloc->presumed_offset)
                return 0;
  
        /* Check that the relocation address is valid... */
@@@ -2407,7 -2368,7 +2408,7 @@@ static int eb_request_submit(struct i91
        }
  
        err = rq->context->engine->emit_bb_start(rq,
 -                                               batch->node.start +
 +                                               i915_vma_offset(batch) +
                                                 eb->batch_start_offset,
                                                 batch_len,
                                                 eb->batch_flags);
                GEM_BUG_ON(intel_context_is_parallel(rq->context));
                GEM_BUG_ON(eb->batch_start_offset);
                err = rq->context->engine->emit_bb_start(rq,
 -                                                       eb->trampoline->node.start +
 +                                                       i915_vma_offset(eb->trampoline) +
                                                         batch_len, 0, 0);
                if (err)
                        return err;
@@@ -2466,7 -2427,7 +2467,7 @@@ gen8_dispatch_bsd_engine(struct drm_i91
        /* Check whether the file_priv has already selected one ring. */
        if ((int)file_priv->bsd_engine < 0)
                file_priv->bsd_engine =
-                       prandom_u32_max(num_vcs_engines(dev_priv));
+                       get_random_u32_below(num_vcs_engines(dev_priv));
  
        return file_priv->bsd_engine;
  }
@@@ -17,6 -17,7 +17,7 @@@
  #include "gt/intel_gt.h"
  #include "gt/intel_gt_pm.h"
  #include "gt/intel_migrate.h"
+ #include "i915_reg.h"
  #include "i915_ttm_buddy_manager.h"
  
  #include "huge_gem_object.h"
@@@ -1608,7 -1609,7 +1609,7 @@@ retry
  
                err = i915_vma_move_to_active(vma, rq, 0);
  
 -              err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
 +              err = engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
                i915_request_get(rq);
                i915_request_add(rq);
  
@@@ -13,6 -13,8 +13,8 @@@
  
  #include "i915_cmd_parser.h"
  #include "i915_drv.h"
+ #include "i915_irq.h"
+ #include "i915_reg.h"
  #include "intel_breadcrumbs.h"
  #include "intel_context.h"
  #include "intel_engine.h"
@@@ -892,24 -894,6 +894,24 @@@ static intel_engine_mask_t init_engine_
        engine_mask_apply_compute_fuses(gt);
        engine_mask_apply_copy_fuses(gt);
  
 +      /*
 +       * The only use of the GSC CS is to load and communicate with the GSC
 +       * FW, so we have no use for it if we don't have the FW.
 +       *
 +       * IMPORTANT: in cases where we don't have the GSC FW, we have a
 +       * catch-22 situation that breaks media C6 due to 2 requirements:
 +       * 1) once turned on, the GSC power well will not go to sleep unless the
 +       *    GSC FW is loaded.
 +       * 2) to enable idling (which is required for media C6) we need to
 +       *    initialize the IDLE_MSG register for the GSC CS and do at least 1
 +       *    submission, which will wake up the GSC power well.
 +       */
 +      if (__HAS_ENGINE(info->engine_mask, GSC0) && !intel_uc_wants_gsc_uc(&gt->uc)) {
 +              drm_notice(&gt->i915->drm,
 +                         "No GSC FW selected, disabling GSC CS and media C6\n");
 +              info->engine_mask &= ~BIT(GSC0);
 +      }
 +
        return info->engine_mask;
  }
  
@@@ -8,9 -8,11 +8,10 @@@
  
  #include "gem/i915_gem_internal.h"
  #include "gem/i915_gem_lmem.h"
 -#include "pxp/intel_pxp.h"
  
  #include "i915_drv.h"
  #include "i915_perf_oa_regs.h"
+ #include "i915_reg.h"
  #include "intel_context.h"
  #include "intel_engine_pm.h"
  #include "intel_engine_regs.h"
@@@ -108,18 -110,9 +109,18 @@@ static int intel_gt_probe_lmem(struct i
  
  int intel_gt_assign_ggtt(struct intel_gt *gt)
  {
 -      gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
 +      /* Media GT shares primary GT's GGTT */
 +      if (gt->type == GT_MEDIA) {
 +              gt->ggtt = to_gt(gt->i915)->ggtt;
 +      } else {
 +              gt->ggtt = i915_ggtt_create(gt->i915);
 +              if (IS_ERR(gt->ggtt))
 +                      return PTR_ERR(gt->ggtt);
 +      }
 +
 +      list_add_tail(&gt->ggtt_link, &gt->ggtt->gt_list);
  
 -      return gt->ggtt ? 0 : -ENOMEM;
 +      return 0;
  }
  
  int intel_gt_init_mmio(struct intel_gt *gt)
@@@ -217,6 -210,21 +218,6 @@@ out
        return ret;
  }
  
 -static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
 -{
 -      intel_uncore_rmw(uncore, reg, 0, set);
 -}
 -
 -static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
 -{
 -      intel_uncore_rmw(uncore, reg, clr, 0);
 -}
 -
 -static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
 -{
 -      intel_uncore_rmw(uncore, reg, 0, 0);
 -}
 -
  static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
  {
        GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
@@@ -242,14 -250,14 +243,14 @@@ intel_gt_clear_error_registers(struct i
        u32 eir;
  
        if (GRAPHICS_VER(i915) != 2)
 -              clear_register(uncore, PGTBL_ER);
 +              intel_uncore_write(uncore, PGTBL_ER, 0);
  
        if (GRAPHICS_VER(i915) < 4)
 -              clear_register(uncore, IPEIR(RENDER_RING_BASE));
 +              intel_uncore_write(uncore, IPEIR(RENDER_RING_BASE), 0);
        else
 -              clear_register(uncore, IPEIR_I965);
 +              intel_uncore_write(uncore, IPEIR_I965, 0);
  
 -      clear_register(uncore, EIR);
 +      intel_uncore_write(uncore, EIR, 0);
        eir = intel_uncore_read(uncore, EIR);
        if (eir) {
                /*
                 * mask them.
                 */
                drm_dbg(&gt->i915->drm, "EIR stuck: 0x%08x, masking\n", eir);
 -              rmw_set(uncore, EMR, eir);
 +              intel_uncore_rmw(uncore, EMR, 0, eir);
                intel_uncore_write(uncore, GEN2_IIR,
                                   I915_MASTER_ERROR_INTERRUPT);
        }
                                           RING_FAULT_VALID, 0);
                intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
        } else if (GRAPHICS_VER(i915) >= 12) {
 -              rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
 +              intel_uncore_rmw(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID, 0);
                intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
        } else if (GRAPHICS_VER(i915) >= 8) {
 -              rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
 +              intel_uncore_rmw(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID, 0);
                intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
        } else if (GRAPHICS_VER(i915) >= 6) {
                struct intel_engine_cs *engine;
@@@ -745,6 -753,8 +746,6 @@@ int intel_gt_init(struct intel_gt *gt
  
        intel_migrate_init(&gt->migrate, gt);
  
 -      intel_pxp_init(&gt->pxp);
 -
        goto out_fw;
  err_gt:
        __intel_gt_disable(gt);
@@@ -784,6 -794,8 +785,6 @@@ void intel_gt_driver_unregister(struct 
        intel_rps_driver_unregister(&gt->rps);
        intel_gsc_fini(&gt->gsc);
  
 -      intel_pxp_fini(&gt->pxp);
 -
        /*
         * Upon unregistering the device to prevent any new users, cancel
         * all in-flight requests so that we can quickly unbind the active
@@@ -1067,7 -1079,6 +1068,7 @@@ static void mmio_invalidate_full(struc
        enum intel_engine_id id;
        const i915_reg_t *regs;
        unsigned int num = 0;
 +      unsigned long flags;
  
        if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
                regs = NULL;
  
        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
  
 -      spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
 +      intel_gt_mcr_lock(gt, &flags);
 +      spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
  
        awake = 0;
        for_each_engine(engine, gt, id) {
                        continue;
  
                if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
 +                      u32 val = BIT(engine->instance);
 +
 +                      if (engine->class == VIDEO_DECODE_CLASS ||
 +                          engine->class == VIDEO_ENHANCEMENT_CLASS ||
 +                          engine->class == COMPUTE_CLASS)
 +                              val = _MASKED_BIT_ENABLE(val);
                        intel_gt_mcr_multicast_write_fw(gt,
                                                        xehp_regs[engine->class],
 -                                                      BIT(engine->instance));
 +                                                      val);
                } else {
                        rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
                        if (!i915_mmio_reg_offset(rb.reg))
                                continue;
  
+                       if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
+                           engine->class == VIDEO_ENHANCEMENT_CLASS ||
+                           engine->class == COMPUTE_CLASS))
+                               rb.bit = _MASKED_BIT_ENABLE(rb.bit);
                        intel_uncore_write_fw(uncore, rb.reg, rb.bit);
                }
                awake |= engine->mask;
             IS_ALDERLAKE_P(i915)))
                intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
  
 -      spin_unlock_irq(&uncore->lock);
 +      spin_unlock(&uncore->lock);
 +      intel_gt_mcr_unlock(gt, flags);
  
        for_each_engine_masked(engine, gt, awake, tmp) {
                struct reg_and_bit rb;
@@@ -7,6 -7,7 +7,7 @@@
  #include <linux/suspend.h>
  
  #include "i915_drv.h"
+ #include "i915_irq.h"
  #include "i915_params.h"
  #include "intel_context.h"
  #include "intel_engine_pm.h"
@@@ -303,6 -304,8 +304,6 @@@ int intel_gt_resume(struct intel_gt *gt
  
        intel_uc_resume(&gt->uc);
  
 -      intel_pxp_resume(&gt->pxp);
 -
        user_forcewake(gt, false);
  
  out_fw:
@@@ -336,6 -339,8 +337,6 @@@ void intel_gt_suspend_prepare(struct in
  {
        user_forcewake(gt, true);
        wait_for_suspend(gt);
 -
 -      intel_pxp_suspend_prepare(&gt->pxp);
  }
  
  static suspend_state_t pm_suspend_target(void)
@@@ -360,6 -365,7 +361,6 @@@ void intel_gt_suspend_late(struct intel
        GEM_BUG_ON(gt->awake);
  
        intel_uc_suspend(&gt->uc);
 -      intel_pxp_suspend(&gt->pxp);
  
        /*
         * On disabling the device, we want to turn off HW access to memory
  
  void intel_gt_runtime_suspend(struct intel_gt *gt)
  {
 -      intel_pxp_runtime_suspend(&gt->pxp);
        intel_uc_runtime_suspend(&gt->uc);
  
        GT_TRACE(gt, "\n");
@@@ -404,6 -411,8 +405,6 @@@ int intel_gt_runtime_resume(struct inte
        if (ret)
                return ret;
  
 -      intel_pxp_runtime_resume(&gt->pxp);
 -
        return 0;
  }
  
@@@ -7,6 -7,7 +7,7 @@@
  #define __INTEL_GT_REGS__
  
  #include "i915_reg_defs.h"
+ #include "display/intel_display_reg_defs.h"   /* VLV_DISPLAY_BASE */
  
  #define MCR_REG(offset)       ((const i915_mcr_reg_t){ .reg = (offset) })
  
@@@ -66,7 -67,6 +67,7 @@@
  #define GMD_ID_MEDIA                          _MMIO(MTL_MEDIA_GSI_BASE + 0xd8c)
  
  #define MCFG_MCR_SELECTOR                     _MMIO(0xfd0)
 +#define MTL_STEER_SEMAPHORE                   _MMIO(0xfd0)
  #define MTL_MCR_SELECTOR                      _MMIO(0xfd4)
  #define SF_MCR_SELECTOR                               _MMIO(0xfd8)
  #define GEN8_MCR_SELECTOR                     _MMIO(0xfdc)
  #define   RC_OP_FLUSH_ENABLE                  (1 << 0)
  #define   HIZ_RAW_STALL_OPT_DISABLE           (1 << 2)
  #define CACHE_MODE_1                          _MMIO(0x7004) /* IVB+ */
 -#define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE   (1 << 6)
 -#define   GEN8_4x4_STC_OPTIMIZATION_DISABLE   (1 << 6)
 -#define   GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE  (1 << 1)
 +#define   MSAA_OPTIMIZATION_REDUC_DISABLE     REG_BIT(11)
 +#define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE   REG_BIT(6)
 +#define   GEN8_4x4_STC_OPTIMIZATION_DISABLE   REG_BIT(6)
 +#define   GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE  REG_BIT(1)
  
  #define GEN7_GT_MODE                          _MMIO(0x7008)
  #define   GEN9_IZ_HASHING_MASK(slice)         (0x3 << ((slice) * 2))
  #define GEN8_L3CNTLREG                                _MMIO(0x7034)
  #define   GEN8_ERRDETBCTRL                    (1 << 9)
  
 +#define PSS_MODE2                             _MMIO(0x703c)
 +#define   SCOREBOARD_STALL_FLUSH_CONTROL      REG_BIT(5)
 +
  #define GEN7_SC_INSTDONE                      _MMIO(0x7100)
  #define GEN12_SC_INSTDONE_EXTRA                       _MMIO(0x7104)
  #define GEN12_SC_INSTDONE_EXTRA2              _MMIO(0x7108)
  #define  MSG_IDLE_FW_MASK     REG_GENMASK(13, 9)
  #define  MSG_IDLE_FW_SHIFT    9
  
 +#define       RC_PSMI_CTRL_GSCCS      _MMIO(0x11a050)
 +#define         IDLE_MSG_DISABLE      REG_BIT(0)
 +#define       PWRCTX_MAXCNT_GSCCS     _MMIO(0x11a054)
 +
  #define FORCEWAKE_MEDIA_GEN9                  _MMIO(0xa270)
  #define FORCEWAKE_RENDER_GEN9                 _MMIO(0xa278)
  
  #define   GEN7_DISABLE_SAMPLER_PREFETCH               (1 << 30)
  
  #define GEN8_GARBCNTL                         _MMIO(0xb004)
 -#define   GEN9_GAPS_TSV_CREDIT_DISABLE                (1 << 7)
 -#define   GEN11_ARBITRATION_PRIO_ORDER_MASK   (0x3f << 22)
 -#define   GEN11_HASH_CTRL_EXCL_MASK           (0x7f << 0)
 -#define   GEN11_HASH_CTRL_EXCL_BIT0           (1 << 0)
 +#define   GEN11_ARBITRATION_PRIO_ORDER_MASK   REG_GENMASK(27, 22)
 +#define   GEN12_BUS_HASH_CTL_BIT_EXC          REG_BIT(7)
 +#define   GEN9_GAPS_TSV_CREDIT_DISABLE                REG_BIT(7)
 +#define   GEN11_HASH_CTRL_EXCL_MASK           REG_GENMASK(6, 0)
 +#define   GEN11_HASH_CTRL_EXCL_BIT0           REG_FIELD_PREP(GEN11_HASH_CTRL_EXCL_MASK, 0x1)
  
  #define GEN9_SCRATCH_LNCF1                    _MMIO(0xb008)
  #define   GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE     REG_BIT(0)
  #define   GEN7_L3AGDIS                                (1 << 19)
  
  #define XEHPC_LNCFMISCCFGREG0                 _MMIO(0xb01c)
 +#define   XEHPC_HOSTCACHEEN                   REG_BIT(1)
  #define   XEHPC_OVRLSCCC                      REG_BIT(0)
  
  #define GEN7_L3CNTLREG2                               _MMIO(0xb020)
@@@ -12,6 -12,7 +12,7 @@@
  
  #include "gem/i915_gem_internal.h"
  #include "gem/i915_gem_lmem.h"
+ #include "i915_reg.h"
  #include "i915_trace.h"
  #include "i915_utils.h"
  #include "intel_gt.h"
@@@ -481,25 -482,14 +482,25 @@@ static void tgl_setup_private_ppat(stru
  
  static void xehp_setup_private_ppat(struct intel_gt *gt)
  {
 -      intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
 -      intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
 -      intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
 -      intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
 -      intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
 -      intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
 -      intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
 -      intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
 +      enum forcewake_domains fw;
 +      unsigned long flags;
 +
 +      fw = intel_uncore_forcewake_for_reg(gt->uncore, _MMIO(XEHP_PAT_INDEX(0).reg),
 +                                          FW_REG_WRITE);
 +      intel_uncore_forcewake_get(gt->uncore, fw);
 +
 +      intel_gt_mcr_lock(gt, &flags);
 +      intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
 +      intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
 +      intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
 +      intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
 +      intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
 +      intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
 +      intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
 +      intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
 +      intel_gt_mcr_unlock(gt, flags);
 +
 +      intel_uncore_forcewake_put(gt->uncore, fw);
  }
  
  static void icl_setup_private_ppat(struct intel_uncore *uncore)
@@@ -12,7 -12,9 +12,9 @@@
  #include "gen6_ppgtt.h"
  #include "gen7_renderclear.h"
  #include "i915_drv.h"
+ #include "i915_irq.h"
  #include "i915_mitigations.h"
+ #include "i915_reg.h"
  #include "intel_breadcrumbs.h"
  #include "intel_context.h"
  #include "intel_engine_regs.h"
@@@ -895,7 -897,7 +897,7 @@@ static int clear_residuals(struct i915_
        }
  
        ret = engine->emit_bb_start(rq,
 -                                  engine->wa_ctx.vma->node.start, 0,
 +                                  i915_vma_offset(engine->wa_ctx.vma), 0,
                                    0);
        if (ret)
                return ret;
@@@ -4,6 -4,7 +4,7 @@@
   */
  
  #include "i915_drv.h"
+ #include "i915_reg.h"
  #include "intel_context.h"
  #include "intel_engine_pm.h"
  #include "intel_engine_regs.h"
@@@ -770,19 -771,11 +771,19 @@@ static void dg2_ctx_workarounds_init(st
  
        /* Wa_14014947963:dg2 */
        if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
 -              IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
 +          IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
                wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
  
 +      /* Wa_18018764978:dg2 */
 +      if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
 +          IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
 +              wa_masked_en(wal, PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
 +
        /* Wa_15010599737:dg2 */
        wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
 +
 +      /* Wa_18019271663:dg2 */
 +      wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
  }
  
  static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
@@@ -1759,8 -1752,7 +1760,8 @@@ static void wa_list_apply(const struct 
  
        fw = wal_get_fw_for_rmw(uncore, wal);
  
 -      spin_lock_irqsave(&uncore->lock, flags);
 +      intel_gt_mcr_lock(gt, &flags);
 +      spin_lock(&uncore->lock);
        intel_uncore_forcewake_get__locked(uncore, fw);
  
        for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
        }
  
        intel_uncore_forcewake_put__locked(uncore, fw);
 -      spin_unlock_irqrestore(&uncore->lock, flags);
 +      spin_unlock(&uncore->lock);
 +      intel_gt_mcr_unlock(gt, flags);
  }
  
  void intel_gt_apply_workarounds(struct intel_gt *gt)
@@@ -1811,8 -1802,7 +1812,8 @@@ static bool wa_list_verify(struct intel
  
        fw = wal_get_fw_for_rmw(uncore, wal);
  
 -      spin_lock_irqsave(&uncore->lock, flags);
 +      intel_gt_mcr_lock(gt, &flags);
 +      spin_lock(&uncore->lock);
        intel_uncore_forcewake_get__locked(uncore, fw);
  
        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
                                wal->name, from);
  
        intel_uncore_forcewake_put__locked(uncore, fw);
 -      spin_unlock_irqrestore(&uncore->lock, flags);
 +      spin_unlock(&uncore->lock);
 +      intel_gt_mcr_unlock(gt, flags);
  
        return ok;
  }
@@@ -2906,12 -2895,25 +2907,12 @@@ add_render_compute_tuning_settings(stru
        if (IS_PONTEVECCHIO(i915)) {
                wa_write(wal, XEHPC_L3SCRUB,
                         SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
 +              wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
        }
  
        if (IS_DG2(i915)) {
                wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
                wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
 -
 -              /*
 -               * This is also listed as Wa_22012654132 for certain DG2
 -               * steppings, but the tuning setting programming is a superset
 -               * since it applies to all DG2 variants and steppings.
 -               *
 -               * Note that register 0xE420 is write-only and cannot be read
 -               * back for verification on DG2 (due to Wa_14012342262), so
 -               * we need to explicitly skip the readback.
 -               */
 -              wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
 -                         _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
 -                         0 /* write-only, so skip validation */,
 -                         true);
        }
  
        /*
        if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
                wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
                                        THREAD_EX_ARB_MODE_RR_AFTER_DEP);
 +
 +      if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
 +              wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
  }
  
  /*
@@@ -3007,19 -3006,6 +3008,19 @@@ general_render_compute_wa_init(struct i
                /* Wa_18017747507:dg2 */
                wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
        }
 +
 +      if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
 +              /*
 +               * Wa_22012654132
 +               *
 +               * Note that register 0xE420 is write-only and cannot be read
 +               * back for verification on DG2 (due to Wa_14012342262), so
 +               * we need to explicitly skip the readback.
 +               */
 +              wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
 +                         _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
 +                         0 /* write-only, so skip validation */,
 +                         true);
  }
  
  static void
@@@ -30,6 -30,7 +30,7 @@@
  #include "intel_guc_submission.h"
  
  #include "i915_drv.h"
+ #include "i915_reg.h"
  #include "i915_trace.h"
  
  /**
@@@ -1889,7 -1890,7 +1890,7 @@@ int intel_guc_submission_init(struct in
        if (guc->submission_initialized)
                return 0;
  
 -      if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) {
 +      if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) {
                ret = guc_lrc_desc_pool_create_v69(guc);
                if (ret)
                        return ret;
@@@ -2329,7 -2330,7 +2330,7 @@@ static int register_context(struct inte
        GEM_BUG_ON(intel_context_is_child(ce));
        trace_intel_context_register(ce);
  
 -      if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
 +      if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
                ret = register_context_v70(guc, ce, loop);
        else
                ret = register_context_v69(guc, ce, loop);
                set_context_registered(ce);
                spin_unlock_irqrestore(&ce->guc_state.lock, flags);
  
 -              if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
 +              if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
                        guc_context_policy_init_v70(ce, loop);
        }
  
@@@ -2533,7 -2534,6 +2534,7 @@@ static void prepare_context_registratio
                   i915_gem_object_is_lmem(ce->ring->vma->obj));
  
        desc = __get_lrc_desc_v69(guc, ctx_id);
 +      GEM_BUG_ON(!desc);
        desc->engine_class = engine_class_to_guc_class(engine->class);
        desc->engine_submit_mask = engine->logical_mask;
        desc->hw_context_desc = ce->lrc.lrca;
@@@ -2956,7 -2956,7 +2957,7 @@@ static void __guc_context_set_preemptio
                                                 u16 guc_id,
                                                 u32 preemption_timeout)
  {
 -      if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
 +      if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
                struct context_policy policy;
  
                __guc_context_policy_start_klv(&policy, guc_id);
@@@ -3283,7 -3283,7 +3284,7 @@@ static int guc_context_alloc(struct int
  static void __guc_context_set_prio(struct intel_guc *guc,
                                   struct intel_context *ce)
  {
 -      if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
 +      if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
                struct context_policy policy;
  
                __guc_context_policy_start_klv(&policy, ce->guc_id.id);
@@@ -4366,7 -4366,7 +4367,7 @@@ static int guc_init_global_schedule_pol
        intel_wakeref_t wakeref;
        int ret = 0;
  
 -      if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0))
 +      if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
                return 0;
  
        __guc_scheduling_policy_start_klv(&policy);
@@@ -4905,9 -4905,6 +4906,9 @@@ void intel_guc_submission_print_info(st
        if (!sched_engine)
                return;
  
 +      drm_printf(p, "GuC Submission API Version: %d.%d.%d\n",
 +                 guc->submission_version.major, guc->submission_version.minor,
 +                 guc->submission_version.patch);
        drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
                   atomic_read(&guc->outstanding_submission_g2h));
        drm_printf(p, "GuC tasklet count: %u\n",
@@@ -72,7 -72,6 +72,7 @@@ struct intel_encoder
  struct intel_limit;
  struct intel_overlay_error_state;
  struct vlv_s0ix_state;
 +struct intel_pxp;
  
  #define I915_GEM_GPU_DOMAINS \
        (I915_GEM_DOMAIN_RENDER | \
@@@ -285,28 -284,13 +285,13 @@@ struct drm_i915_private 
  
        unsigned long gem_quirks;
  
-       struct drm_atomic_state *modeset_restore_state;
-       struct drm_modeset_acquire_ctx reset_ctx;
        struct i915_gem_mm mm;
  
-       /* Kernel Modesetting */
-       struct list_head global_obj_list;
        bool mchbar_need_disable;
  
        struct intel_l3_parity l3_parity;
  
        /*
-        * HTI (aka HDPORT) state read during initial hw readout.  Most
-        * platforms don't have HTI, so this will just stay 0.  Those that do
-        * will use this later to figure out which PLLs and PHYs are unavailable
-        * for driver usage.
-        */
-       u32 hti_state;
-       /*
         * edram size in MB.
         * Cannot be determined by PCIID. You must always read a register.
         */
                struct file *mmap_singleton;
        } gem;
  
 +      struct intel_pxp *pxp;
 +
        u8 pch_ssc_use;
  
        /* For i915gm/i945gm vblank irq workaround */
@@@ -936,6 -918,10 +921,6 @@@ IS_SUBPLATFORM(const struct drm_i915_pr
  
  #define HAS_GLOBAL_MOCS_REGISTERS(dev_priv)   (INTEL_INFO(dev_priv)->has_global_mocs)
  
 -#define HAS_PXP(dev_priv)  ((IS_ENABLED(CONFIG_DRM_I915_PXP) && \
 -                          INTEL_INFO(dev_priv)->has_pxp) && \
 -                          VDBOX_MASK(to_gt(dev_priv)))
 -
  #define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch)
  
  #define HAS_GMD_ID(i915)      (INTEL_INFO(i915)->has_gmd_id)
@@@ -1078,7 -1078,6 +1078,6 @@@ static const struct intel_device_info d
        XE_LPD_FEATURES,
        .__runtime.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) |
                               BIT(TRANSCODER_C) | BIT(TRANSCODER_D),
-       .require_force_probe = 1,
  };
  
  static const struct intel_device_info ats_m_info = {
@@@ -1125,7 -1124,7 +1124,7 @@@ static const struct intel_gt_definitio
                .type = GT_MEDIA,
                .name = "Standalone Media GT",
                .gsi_offset = MTL_MEDIA_GSI_BASE,
 -              .engine_mask = BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
 +              .engine_mask = BIT(VECS0) | BIT(VCS0) | BIT(VCS2) | BIT(GSC0),
        },
        {}
  };
  #include "i915_file_private.h"
  #include "i915_perf.h"
  #include "i915_perf_oa_regs.h"
+ #include "i915_reg.h"
  
  /* HW requires this to be a power of two, between 128k and 16M, though driver
   * is currently generally designed assuming the largest 16M size is used such
@@@ -1845,7 -1846,8 +1846,7 @@@ static u32 *save_restore_register(struc
        for (d = 0; d < dword_count; d++) {
                *cs++ = cmd;
                *cs++ = i915_mmio_reg_offset(reg) + 4 * d;
 -              *cs++ = intel_gt_scratch_offset(stream->engine->gt,
 -                                              offset) + 4 * d;
 +              *cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d;
                *cs++ = 0;
        }
  
@@@ -1878,13 -1880,7 +1879,13 @@@ static int alloc_noa_wait(struct i915_p
                                          MI_PREDICATE_RESULT_2_ENGINE(base) :
                                          MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
  
 -      bo = i915_gem_object_create_internal(i915, 4096);
 +      /*
 +       * gt->scratch was being used to save/restore the GPR registers, but on
 +       * MTL the scratch uses stolen lmem. An MI_SRM to this memory region
 +       * causes an engine hang. Instead allocate an additional page here to
 +       * save/restore GPR registers
 +       */
 +      bo = i915_gem_object_create_internal(i915, 8192);
        if (IS_ERR(bo)) {
                drm_err(&i915->drm,
                        "Failed to allocate NOA wait batchbuffer\n");
@@@ -1918,19 -1914,14 +1919,19 @@@ retry
                goto err_unpin;
        }
  
 +      stream->noa_wait = vma;
 +
 +#define GPR_SAVE_OFFSET 4096
 +#define PREDICATE_SAVE_OFFSET 4160
 +
        /* Save registers. */
        for (i = 0; i < N_CS_GPR; i++)
                cs = save_restore_register(
                        stream, cs, true /* save */, CS_GPR(i),
 -                      INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
 +                      GPR_SAVE_OFFSET + 8 * i, 2);
        cs = save_restore_register(
                stream, cs, true /* save */, mi_predicate_result,
 -              INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
 +              PREDICATE_SAVE_OFFSET, 1);
  
        /* First timestamp snapshot location. */
        ts0 = cs;
        for (i = 0; i < N_CS_GPR; i++)
                cs = save_restore_register(
                        stream, cs, false /* restore */, CS_GPR(i),
 -                      INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
 +                      GPR_SAVE_OFFSET + 8 * i, 2);
        cs = save_restore_register(
                stream, cs, false /* restore */, mi_predicate_result,
 -              INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
 +              PREDICATE_SAVE_OFFSET, 1);
  
        /* And return to the ring. */
        *cs++ = MI_BATCH_BUFFER_END;
        i915_gem_object_flush_map(bo);
        __i915_gem_object_release_map(bo);
  
 -      stream->noa_wait = vma;
        goto out_ww;
  
  err_unpin:
@@@ -2271,7 -2263,7 +2272,7 @@@ retry
                goto err_add_request;
  
        err = rq->engine->emit_bb_start(rq,
 -                                      vma->node.start, 0,
 +                                      i915_vma_offset(vma), 0,
                                        I915_DISPATCH_SECURE);
        if (err)
                goto err_add_request;
@@@ -3139,11 -3131,8 +3140,11 @@@ get_sseu_config(struct intel_sseu *out_
   */
  u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
  {
 -      /* Wa_18013179988:dg2 */
 -      if (IS_DG2(i915)) {
 +      /*
 +       * Wa_18013179988:dg2
 +       * Wa_14015846243:mtl
 +       */
 +      if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
                intel_wakeref_t wakeref;
                u32 reg, shift;
  
@@@ -4321,17 -4310,6 +4322,17 @@@ static const struct i915_range gen12_oa
        {}
  };
  
 +/*
 + * Ref: 14010536224:
 + * 0x20cc is repurposed on MTL, so use a separate array for MTL.
 + */
 +static const struct i915_range mtl_oa_mux_regs[] = {
 +      { .start = 0x0d00, .end = 0x0d04 },     /* RPM_CONFIG[0-1] */
 +      { .start = 0x0d0c, .end = 0x0d2c },     /* NOA_CONFIG[0-8] */
 +      { .start = 0x9840, .end = 0x9840 },     /* GDT_CHICKEN_BITS */
 +      { .start = 0x9884, .end = 0x9888 },     /* NOA_WRITE */
 +};
 +
  static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
  {
        return reg_in_range_table(addr, gen7_oa_b_counters);
@@@ -4375,10 -4353,7 +4376,10 @@@ static bool xehp_is_valid_b_counter_add
  
  static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
  {
 -      return reg_in_range_table(addr, gen12_oa_mux_regs);
 +      if (IS_METEORLAKE(perf->i915))
 +              return reg_in_range_table(addr, mtl_oa_mux_regs);
 +      else
 +              return reg_in_range_table(addr, gen12_oa_mux_regs);
  }
  
  static u32 mask_reg_value(u32 reg, u32 val)
@@@ -4775,7 -4750,6 +4776,7 @@@ static void oa_init_supported_formats(s
                break;
  
        case INTEL_DG2:
 +      case INTEL_METEORLAKE:
                oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
                oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
                break;
@@@ -26,6 -26,7 +26,7 @@@
  #define _I915_REG_H_
  
  #include "i915_reg_defs.h"
+ #include "display/intel_display_reg_defs.h"
  
  /**
   * DOC: The i915 register macro definition style guide
   *  #define GEN8_BAR                    _MMIO(0xb888)
   */
  
- #define DISPLAY_MMIO_BASE(dev_priv)   (INTEL_INFO(dev_priv)->display.mmio_offset)
- /*
-  * Given the first two numbers __a and __b of arbitrarily many evenly spaced
-  * numbers, pick the 0-based __index'th value.
-  *
-  * Always prefer this over _PICK() if the numbers are evenly spaced.
-  */
- #define _PICK_EVEN(__index, __a, __b) ((__a) + (__index) * ((__b) - (__a)))
- /*
-  * Given the arbitrary numbers in varargs, pick the 0-based __index'th number.
-  *
-  * Always prefer _PICK_EVEN() over this if the numbers are evenly spaced.
-  */
- #define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index])
- /*
-  * Named helper wrappers around _PICK_EVEN() and _PICK().
-  */
- #define _PIPE(pipe, a, b)             _PICK_EVEN(pipe, a, b)
- #define _PLANE(plane, a, b)           _PICK_EVEN(plane, a, b)
- #define _TRANS(tran, a, b)            _PICK_EVEN(tran, a, b)
- #define _PORT(port, a, b)             _PICK_EVEN(port, a, b)
- #define _PLL(pll, a, b)                       _PICK_EVEN(pll, a, b)
- #define _PHY(phy, a, b)                       _PICK_EVEN(phy, a, b)
- #define _MMIO_PIPE(pipe, a, b)                _MMIO(_PIPE(pipe, a, b))
- #define _MMIO_PLANE(plane, a, b)      _MMIO(_PLANE(plane, a, b))
- #define _MMIO_TRANS(tran, a, b)               _MMIO(_TRANS(tran, a, b))
- #define _MMIO_PORT(port, a, b)                _MMIO(_PORT(port, a, b))
- #define _MMIO_PLL(pll, a, b)          _MMIO(_PLL(pll, a, b))
- #define _MMIO_PHY(phy, a, b)          _MMIO(_PHY(phy, a, b))
- #define _PHY3(phy, ...)                       _PICK(phy, __VA_ARGS__)
- #define _MMIO_PIPE3(pipe, a, b, c)    _MMIO(_PICK(pipe, a, b, c))
- #define _MMIO_PORT3(pipe, a, b, c)    _MMIO(_PICK(pipe, a, b, c))
- #define _MMIO_PHY3(phy, a, b, c)      _MMIO(_PHY3(phy, a, b, c))
- #define _MMIO_PLL3(pll, ...)          _MMIO(_PICK(pll, __VA_ARGS__))
- /*
-  * Device info offset array based helpers for groups of registers with unevenly
-  * spaced base offsets.
-  */
- #define _MMIO_PIPE2(pipe, reg)                _MMIO(INTEL_INFO(dev_priv)->display.pipe_offsets[(pipe)] - \
-                                             INTEL_INFO(dev_priv)->display.pipe_offsets[PIPE_A] + \
-                                             DISPLAY_MMIO_BASE(dev_priv) + (reg))
- #define _MMIO_TRANS2(tran, reg)               _MMIO(INTEL_INFO(dev_priv)->display.trans_offsets[(tran)] - \
-                                             INTEL_INFO(dev_priv)->display.trans_offsets[TRANSCODER_A] + \
-                                             DISPLAY_MMIO_BASE(dev_priv) + (reg))
- #define _MMIO_CURSOR2(pipe, reg)      _MMIO(INTEL_INFO(dev_priv)->display.cursor_offsets[(pipe)] - \
-                                             INTEL_INFO(dev_priv)->display.cursor_offsets[PIPE_A] + \
-                                             DISPLAY_MMIO_BASE(dev_priv) + (reg))
- #define __MASKED_FIELD(mask, value) ((mask) << 16 | (value))
- #define _MASKED_FIELD(mask, value) ({                                    \
-       if (__builtin_constant_p(mask))                                    \
-               BUILD_BUG_ON_MSG(((mask) & 0xffff0000), "Incorrect mask"); \
-       if (__builtin_constant_p(value))                                   \
-               BUILD_BUG_ON_MSG((value) & 0xffff0000, "Incorrect value"); \
-       if (__builtin_constant_p(mask) && __builtin_constant_p(value))     \
-               BUILD_BUG_ON_MSG((value) & ~(mask),                        \
-                                "Incorrect value for mask");              \
-       __MASKED_FIELD(mask, value); })
- #define _MASKED_BIT_ENABLE(a) ({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); })
- #define _MASKED_BIT_DISABLE(a)        (_MASKED_FIELD((a), 0))
  #define GU_CNTL                               _MMIO(0x101010)
  #define   LMEM_INIT                   REG_BIT(7)
 +#define   DRIVERFLR                   REG_BIT(31)
 +#define GU_DEBUG                      _MMIO(0x101018)
 +#define   DRIVERFLR_STATUS            REG_BIT(31)
  
  #define GEN6_STOLEN_RESERVED          _MMIO(0x1082C0)
  #define GEN6_STOLEN_RESERVED_ADDR_MASK        (0xFFF << 20)
  #define MBUS_JOIN_PIPE_SELECT(pipe)   REG_FIELD_PREP(MBUS_JOIN_PIPE_SELECT_MASK, pipe)
  #define MBUS_JOIN_PIPE_SELECT_NONE    MBUS_JOIN_PIPE_SELECT(7)
  
- #define HDPORT_STATE                  _MMIO(0x45050)
- #define   HDPORT_DPLL_USED_MASK               REG_GENMASK(15, 12)
- #define   HDPORT_DDI_USED(phy)                REG_BIT(2 * (phy) + 1)
- #define   HDPORT_ENABLED              REG_BIT(0)
  /* Make render/texture TLB fetches lower priorty than associated data
   *   fetches. This is not turned on by default
   */
  #define _PALETTE_A            0xa000
  #define _PALETTE_B            0xa800
  #define _CHV_PALETTE_C                0xc000
- #define PALETTE_RED_MASK        REG_GENMASK(23, 16)
- #define PALETTE_GREEN_MASK      REG_GENMASK(15, 8)
- #define PALETTE_BLUE_MASK       REG_GENMASK(7, 0)
+ /* 8bit mode / i965+ 10.6 interpolated mode ldw/udw */
+ #define   PALETTE_RED_MASK            REG_GENMASK(23, 16)
+ #define   PALETTE_GREEN_MASK          REG_GENMASK(15, 8)
+ #define   PALETTE_BLUE_MASK           REG_GENMASK(7, 0)
  #define PALETTE(pipe, i)      _MMIO(DISPLAY_MMIO_BASE(dev_priv) + \
                                      _PICK((pipe), _PALETTE_A,         \
                                            _PALETTE_B, _CHV_PALETTE_C) + \
  
  /* Skylake+ pipe bottom (background) color */
  #define _SKL_BOTTOM_COLOR_A           0x70034
+ #define _SKL_BOTTOM_COLOR_B           0x71034
  #define   SKL_BOTTOM_COLOR_GAMMA_ENABLE               REG_BIT(31)
  #define   SKL_BOTTOM_COLOR_CSC_ENABLE         REG_BIT(30)
- #define SKL_BOTTOM_COLOR(pipe)                _MMIO_PIPE2(pipe, _SKL_BOTTOM_COLOR_A)
+ #define SKL_BOTTOM_COLOR(pipe)                _MMIO_PIPE(pipe, _SKL_BOTTOM_COLOR_A, _SKL_BOTTOM_COLOR_B)
  
  #define _ICL_PIPE_A_STATUS                    0x70058
  #define ICL_PIPESTATUS(pipe)                  _MMIO_PIPE2(pipe, _ICL_PIPE_A_STATUS)
  /* legacy palette */
  #define _LGC_PALETTE_A           0x4a000
  #define _LGC_PALETTE_B           0x4a800
- #define LGC_PALETTE_RED_MASK     REG_GENMASK(23, 16)
- #define LGC_PALETTE_GREEN_MASK   REG_GENMASK(15, 8)
- #define LGC_PALETTE_BLUE_MASK    REG_GENMASK(7, 0)
+ /* see PALETTE_* for the bits */
  #define LGC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4)
  
  /* ilk/snb precision palette */
  #define _PREC_PALETTE_A           0x4b000
  #define _PREC_PALETTE_B           0x4c000
- #define   PREC_PALETTE_RED_MASK   REG_GENMASK(29, 20)
- #define   PREC_PALETTE_GREEN_MASK REG_GENMASK(19, 10)
- #define   PREC_PALETTE_BLUE_MASK  REG_GENMASK(9, 0)
+ /* 10bit mode */
+ #define   PREC_PALETTE_10_RED_MASK            REG_GENMASK(29, 20)
+ #define   PREC_PALETTE_10_GREEN_MASK          REG_GENMASK(19, 10)
+ #define   PREC_PALETTE_10_BLUE_MASK           REG_GENMASK(9, 0)
+ /* 12.4 interpolated mode ldw */
+ #define   PREC_PALETTE_12P4_RED_LDW_MASK      REG_GENMASK(29, 24)
+ #define   PREC_PALETTE_12P4_GREEN_LDW_MASK    REG_GENMASK(19, 14)
+ #define   PREC_PALETTE_12P4_BLUE_LDW_MASK     REG_GENMASK(9, 4)
+ /* 12.4 interpolated mode udw */
+ #define   PREC_PALETTE_12P4_RED_UDW_MASK      REG_GENMASK(29, 20)
+ #define   PREC_PALETTE_12P4_GREEN_UDW_MASK    REG_GENMASK(19, 10)
+ #define   PREC_PALETTE_12P4_BLUE_UDW_MASK     REG_GENMASK(9, 0)
  #define PREC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _PREC_PALETTE_A, _PREC_PALETTE_B) + (i) * 4)
  
  #define  _PREC_PIPEAGCMAX              0x4d000
@@@ -7631,12 -7564,10 +7567,10 @@@ enum skl_power_gate 
  #define _PAL_PREC_DATA_A      0x4A404
  #define _PAL_PREC_DATA_B      0x4AC04
  #define _PAL_PREC_DATA_C      0x4B404
+ /* see PREC_PALETTE_* for the bits */
  #define _PAL_PREC_GC_MAX_A    0x4A410
  #define _PAL_PREC_GC_MAX_B    0x4AC10
  #define _PAL_PREC_GC_MAX_C    0x4B410
- #define   PREC_PAL_DATA_RED_MASK      REG_GENMASK(29, 20)
- #define   PREC_PAL_DATA_GREEN_MASK    REG_GENMASK(19, 10)
- #define   PREC_PAL_DATA_BLUE_MASK     REG_GENMASK(9, 0)
  #define _PAL_PREC_EXT_GC_MAX_A        0x4A420
  #define _PAL_PREC_EXT_GC_MAX_B        0x4AC20
  #define _PAL_PREC_EXT_GC_MAX_C        0x4B420
  
  #define _PAL_PREC_MULTI_SEG_DATA_A    0x4A40C
  #define _PAL_PREC_MULTI_SEG_DATA_B    0x4AC0C
- #define  PAL_PREC_MULTI_SEG_RED_LDW_MASK   REG_GENMASK(29, 24)
- #define  PAL_PREC_MULTI_SEG_RED_UDW_MASK   REG_GENMASK(29, 20)
- #define  PAL_PREC_MULTI_SEG_GREEN_LDW_MASK REG_GENMASK(19, 14)
- #define  PAL_PREC_MULTI_SEG_GREEN_UDW_MASK REG_GENMASK(19, 10)
- #define  PAL_PREC_MULTI_SEG_BLUE_LDW_MASK  REG_GENMASK(9, 4)
- #define  PAL_PREC_MULTI_SEG_BLUE_UDW_MASK  REG_GENMASK(9, 0)
+ /* see PREC_PALETTE_12P4_* for the bits */
  
  #define PREC_PAL_MULTI_SEG_INDEX(pipe)        _MMIO_PIPE(pipe, \
                                        _PAL_PREC_MULTI_SEG_INDEX_A, \
  #define _CGM_PIPE_A_CSC_COEFF67       (VLV_DISPLAY_BASE + 0x6790C)
  #define _CGM_PIPE_A_CSC_COEFF8        (VLV_DISPLAY_BASE + 0x67910)
  #define _CGM_PIPE_A_DEGAMMA   (VLV_DISPLAY_BASE + 0x66000)
- #define   CGM_PIPE_DEGAMMA_RED_MASK   REG_GENMASK(13, 0)
- #define   CGM_PIPE_DEGAMMA_GREEN_MASK REG_GENMASK(29, 16)
- #define   CGM_PIPE_DEGAMMA_BLUE_MASK  REG_GENMASK(13, 0)
+ /* cgm degamma ldw */
+ #define   CGM_PIPE_DEGAMMA_GREEN_LDW_MASK     REG_GENMASK(29, 16)
+ #define   CGM_PIPE_DEGAMMA_BLUE_LDW_MASK      REG_GENMASK(13, 0)
+ /* cgm degamma udw */
+ #define   CGM_PIPE_DEGAMMA_RED_UDW_MASK               REG_GENMASK(13, 0)
  #define _CGM_PIPE_A_GAMMA     (VLV_DISPLAY_BASE + 0x67000)
- #define   CGM_PIPE_GAMMA_RED_MASK     REG_GENMASK(9, 0)
- #define   CGM_PIPE_GAMMA_GREEN_MASK   REG_GENMASK(25, 16)
- #define   CGM_PIPE_GAMMA_BLUE_MASK    REG_GENMASK(9, 0)
+ /* cgm gamma ldw */
+ #define   CGM_PIPE_GAMMA_GREEN_LDW_MASK               REG_GENMASK(25, 16)
+ #define   CGM_PIPE_GAMMA_BLUE_LDW_MASK                REG_GENMASK(9, 0)
+ /* cgm gamma udw */
+ #define   CGM_PIPE_GAMMA_RED_UDW_MASK         REG_GENMASK(9, 0)
  #define _CGM_PIPE_A_MODE      (VLV_DISPLAY_BASE + 0x67A00)
  #define   CGM_PIPE_MODE_GAMMA (1 << 2)
  #define   CGM_PIPE_MODE_CSC   (1 << 1)
@@@ -111,10 -111,6 +111,6 @@@ bool i915_error_injected(void)
  #define range_overflows_end_t(type, start, size, max) \
        range_overflows_end((type)(start), (type)(size), (type)(max))
  
- /* Note we don't consider signbits :| */
- #define overflows_type(x, T) \
-       (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
  #define ptr_mask_bits(ptr, n) ({                                      \
        unsigned long __v = (unsigned long)(ptr);                       \
        (typeof(ptr))(__v & -BIT(n));                                   \
  #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
  #define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
  
 -#define struct_member(T, member) (((T *)0)->member)
 -
  #define fetch_and_zero(ptr) ({                                                \
        typeof(*ptr) __T = *(ptr);                                      \
        *(ptr) = (typeof(*ptr))0;                                       \
@@@ -168,7 -166,7 +164,7 @@@ static __always_inline ptrdiff_t ptrdif
   */
  #define container_of_user(ptr, type, member) ({                               \
        void __user *__mptr = (void __user *)(ptr);                     \
 -      BUILD_BUG_ON_MSG(!__same_type(*(ptr), struct_member(type, member)) && \
 +      BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \
                         !__same_type(*(ptr), void),                    \
                         "pointer type mismatch in container_of()");    \
        ((type __user *)(__mptr - offsetof(type, member))); })
@@@ -29,6 -29,7 +29,7 @@@
  
  #include "i915_drv.h"
  #include "i915_iosf_mbi.h"
+ #include "i915_reg.h"
  #include "i915_trace.h"
  #include "i915_vgpu.h"
  #include "intel_pm.h"
@@@ -823,9 -824,9 +824,9 @@@ void intel_uncore_forcewake_flush(struc
  }
  
  /**
-  * intel_uncore_forcewake_put__locked - grab forcewake domain references
+  * intel_uncore_forcewake_put__locked - release forcewake domain references
   * @uncore: the intel_uncore structure
-  * @fw_domains: forcewake domains to get reference on
+  * @fw_domains: forcewake domains to put references
   *
   * See intel_uncore_forcewake_put(). This variant places the onus
   * on the caller to explicitly handle the dev_priv->uncore.lock spinlock.
@@@ -2700,62 -2701,6 +2701,62 @@@ void intel_uncore_prune_engine_fw_domai
                if (fw_domains & BIT(domain_id))
                        fw_domain_fini(uncore, domain_id);
        }
 +
 +      if ((fw_domains & BIT(FW_DOMAIN_ID_GSC)) && !HAS_ENGINE(gt, GSC0))
 +              fw_domain_fini(uncore, FW_DOMAIN_ID_GSC);
 +}
 +
 +/*
 + * The driver-initiated FLR is the highest level of reset that we can trigger
 + * from within the driver. It is different from the PCI FLR in that it doesn't
 + * fully reset the SGUnit and doesn't modify the PCI config space and therefore
 + * it doesn't require a re-enumeration of the PCI BARs. However, the
 + * driver-initiated FLR does still cause a reset of both GT and display and a
 + * memory wipe of local and stolen memory, so recovery would require a full HW
 + * re-init and saving/restoring (or re-populating) the wiped memory. Since we
 + * perform the FLR as the very last action before releasing access to the HW
 + * during the driver release flow, we don't attempt recovery at all, because
 + * if/when a new instance of i915 is bound to the device it will do a full
 + * re-init anyway.
 + */
 +static void driver_initiated_flr(struct intel_uncore *uncore)
 +{
 +      struct drm_i915_private *i915 = uncore->i915;
 +      const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait */
 +      int ret;
 +
 +      drm_dbg(&i915->drm, "Triggering Driver-FLR\n");
 +
 +      /*
 +       * Make sure any pending FLR requests have cleared by waiting for the
 +       * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
 +       * to make sure it's not still set from a prior attempt (it's a write to
 +       * clear bit).
 +       * Note that we should never be in a situation where a previous attempt
 +       * is still pending (unless the HW is totally dead), but better to be
 +       * safe in case something unexpected happens
 +       */
 +      ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms);
 +      if (ret) {
 +              drm_err(&i915->drm,
 +                      "Failed to wait for Driver-FLR bit to clear! %d\n",
 +                      ret);
 +              return;
 +      }
 +      intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
 +
 +      /* Trigger the actual Driver-FLR */
 +      intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR);
 +
 +      ret = intel_wait_for_register_fw(uncore, GU_DEBUG,
 +                                       DRIVERFLR_STATUS, DRIVERFLR_STATUS,
 +                                       flr_timeout_ms);
 +      if (ret) {
 +              drm_err(&i915->drm, "wait for Driver-FLR completion failed! %d\n", ret);
 +              return;
 +      }
 +
 +      intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
  }
  
  /* Called via drm-managed action */
@@@ -2771,9 -2716,6 +2772,9 @@@ void intel_uncore_fini_mmio(struct drm_
                intel_uncore_fw_domains_fini(uncore);
                iosf_mbi_punit_release();
        }
 +
 +      if (intel_uncore_needs_flr_on_fini(uncore))
 +              driver_initiated_flr(uncore);
  }
  
  /**