Merge tag 'gvt-next-fixes-2019-09-06' of https://github.com/intel/gvt-linux into...
authorRodrigo Vivi <rodrigo.vivi@intel.com>
Fri, 6 Sep 2019 16:52:43 +0000 (09:52 -0700)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Fri, 6 Sep 2019 16:52:44 +0000 (09:52 -0700)
gvt-next-fixes-2019-09-06

- Fix guest context head pointer update for hang (Xiaolin)
- Fix guest context ring state for reset (Weinan)

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190906054255.GC3458@zhen-hp.sh.intel.com
1  2 
drivers/gpu/drm/i915/gvt/scheduler.c

  
  #include <linux/kthread.h>
  
 +#include "gem/i915_gem_context.h"
 +#include "gem/i915_gem_pm.h"
 +#include "gt/intel_context.h"
 +
  #include "i915_drv.h"
  #include "gvt.h"
  
@@@ -84,8 -80,8 +84,8 @@@ static void sr_oa_regs(struct intel_vgp
                u32 *reg_state, bool save)
  {
        struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
 -      u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
 -      u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
 +      u32 ctx_oactxctrl = dev_priv->perf.ctx_oactxctrl_offset;
 +      u32 ctx_flexeu0 = dev_priv->perf.ctx_flexeu0_offset;
        int i = 0;
        u32 flex_mmio[] = {
                i915_mmio_reg_offset(EU_PERF_CNTL0),
@@@ -281,19 -277,17 +281,19 @@@ static int shadow_context_status_change
        return NOTIFY_OK;
  }
  
 -static void shadow_context_descriptor_update(struct intel_context *ce)
 +static void
 +shadow_context_descriptor_update(struct intel_context *ce,
 +                               struct intel_vgpu_workload *workload)
  {
 -      u64 desc = 0;
 -
 -      desc = ce->lrc_desc;
 +      u64 desc = ce->lrc_desc;
  
 -      /* Update bits 0-11 of the context descriptor which includes flags
 +      /*
 +       * Update bits 0-11 of the context descriptor which includes flags
         * like GEN8_CTX_* cached in desc_template
         */
 -      desc &= U64_MAX << 12;
 -      desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
 +      desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
 +      desc |= workload->ctx_desc.addressing_mode <<
 +              GEN8_CTX_ADDRESSING_MODE_SHIFT;
  
        ce->lrc_desc = desc;
  }
@@@ -304,29 -298,12 +304,29 @@@ static int copy_workload_to_ring_buffer
        struct i915_request *req = workload->req;
        void *shadow_ring_buffer_va;
        u32 *cs;
 +      int err;
  
 -      if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915)
 -              || IS_COFFEELAKE(req->i915))
 -              && is_inhibit_context(req->hw_context))
 +      if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context))
                intel_vgpu_restore_inhibit_context(vgpu, req);
  
 +      /*
 +       * To track whether a request has started on HW, we can emit a
 +       * breadcrumb at the beginning of the request and check its
 +       * timeline's HWSP to see if the breadcrumb has advanced past the
 +       * start of this request. Actually, the request must have the
 +       * init_breadcrumb if its timeline set has_init_bread_crumb, or the
 +       * scheduler might get a wrong state of it during reset. Since the
 +       * requests from gvt always set the has_init_breadcrumb flag, here
 +       * need to do the emit_init_breadcrumb for all the requests.
 +       */
 +      if (req->engine->emit_init_breadcrumb) {
 +              err = req->engine->emit_init_breadcrumb(req);
 +              if (err) {
 +                      gvt_vgpu_err("fail to emit init breadcrumb\n");
 +                      return err;
 +              }
 +      }
 +
        /* allocate shadow ring buffer */
        cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
        if (IS_ERR(cs)) {
@@@ -361,23 -338,26 +361,23 @@@ static void release_shadow_wa_ctx(struc
        wa_ctx->indirect_ctx.shadow_va = NULL;
  }
  
 -static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
 -                                       struct i915_gem_context *ctx)
 +static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
 +                                        struct i915_gem_context *ctx)
  {
        struct intel_vgpu_mm *mm = workload->shadow_mm;
 -      struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
 +      struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm);
        int i = 0;
  
 -      if (mm->type != INTEL_GVT_MM_PPGTT || !mm->ppgtt_mm.shadowed)
 -              return -EINVAL;
 -
        if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
 -              px_dma(&ppgtt->pml4) = mm->ppgtt_mm.shadow_pdps[0];
 +              px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0];
        } else {
                for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
 -                      px_dma(ppgtt->pdp.page_directory[i]) =
 -                              mm->ppgtt_mm.shadow_pdps[i];
 +                      struct i915_page_directory * const pd =
 +                              i915_pd_entry(ppgtt->pd, i);
 +
 +                      px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
                }
        }
 -
 -      return 0;
  }
  
  static int
@@@ -385,22 -365,26 +385,22 @@@ intel_gvt_workload_req_alloc(struct int
  {
        struct intel_vgpu *vgpu = workload->vgpu;
        struct intel_vgpu_submission *s = &vgpu->submission;
 -      struct i915_gem_context *shadow_ctx = s->shadow_ctx;
        struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 -      struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
        struct i915_request *rq;
 -      int ret = 0;
  
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
  
        if (workload->req)
 -              goto out;
 +              return 0;
  
 -      rq = i915_request_alloc(engine, shadow_ctx);
 +      rq = i915_request_create(s->shadow[workload->ring_id]);
        if (IS_ERR(rq)) {
                gvt_vgpu_err("fail to allocate gem request\n");
 -              ret = PTR_ERR(rq);
 -              goto out;
 +              return PTR_ERR(rq);
        }
 +
        workload->req = i915_request_get(rq);
 -out:
 -      return ret;
 +      return 0;
  }
  
  /**
@@@ -415,7 -399,10 +415,7 @@@ int intel_gvt_scan_and_shadow_workload(
  {
        struct intel_vgpu *vgpu = workload->vgpu;
        struct intel_vgpu_submission *s = &vgpu->submission;
 -      struct i915_gem_context *shadow_ctx = s->shadow_ctx;
        struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 -      struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
 -      struct intel_context *ce;
        int ret;
  
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
        if (workload->shadow)
                return 0;
  
 -      /* pin shadow context by gvt even the shadow context will be pinned
 -       * when i915 alloc request. That is because gvt will update the guest
 -       * context from shadow context when workload is completed, and at that
 -       * moment, i915 may already unpined the shadow context to make the
 -       * shadow_ctx pages invalid. So gvt need to pin itself. After update
 -       * the guest context, gvt can unpin the shadow_ctx safely.
 -       */
 -      ce = intel_context_pin(shadow_ctx, engine);
 -      if (IS_ERR(ce)) {
 -              gvt_vgpu_err("fail to pin shadow context\n");
 -              return PTR_ERR(ce);
 -      }
 -
 -      shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
 -      shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
 -                                  GEN8_CTX_ADDRESSING_MODE_SHIFT;
 -
        if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated))
 -              shadow_context_descriptor_update(ce);
 +              shadow_context_descriptor_update(s->shadow[workload->ring_id],
 +                                               workload);
  
        ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
        if (ret)
 -              goto err_unpin;
 +              return ret;
  
        if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) {
                ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
        return 0;
  err_shadow:
        release_shadow_wa_ctx(&workload->wa_ctx);
 -err_unpin:
 -      intel_context_unpin(ce);
        return ret;
  }
  
@@@ -479,7 -484,7 +479,7 @@@ static int prepare_shadow_batch_buffer(
                                                bb->obj->base.size);
                                bb->clflush &= ~CLFLUSH_AFTER;
                        }
 -                      i915_gem_obj_finish_shmem_access(bb->obj);
 +                      i915_gem_object_finish_access(bb->obj);
                        bb->accessing = false;
  
                } else {
                        }
  
                        ret = i915_gem_object_set_to_gtt_domain(bb->obj,
 -                                      false);
 +                                                              false);
                        if (ret)
                                goto err;
  
 -                      i915_gem_obj_finish_shmem_access(bb->obj);
 -                      bb->accessing = false;
 -
                        ret = i915_vma_move_to_active(bb->vma,
                                                      workload->req,
                                                      0);
                        if (ret)
                                goto err;
 +
 +                      i915_gem_object_finish_access(bb->obj);
 +                      bb->accessing = false;
                }
        }
        return 0;
@@@ -568,6 -573,16 +568,16 @@@ static int prepare_shadow_wa_ctx(struc
        return 0;
  }
  
+ static void update_vreg_in_ctx(struct intel_vgpu_workload *workload)
+ {
+       struct intel_vgpu *vgpu = workload->vgpu;
+       struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+       u32 ring_base;
+       ring_base = dev_priv->engine[workload->ring_id]->mmio_base;
+       vgpu_vreg_t(vgpu, RING_START(ring_base)) = workload->rb_start;
+ }
  static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
  {
        struct intel_vgpu *vgpu = workload->vgpu;
        list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
                if (bb->obj) {
                        if (bb->accessing)
 -                              i915_gem_obj_finish_shmem_access(bb->obj);
 +                              i915_gem_object_finish_access(bb->obj);
  
                        if (bb->va && !IS_ERR(bb->va))
                                i915_gem_object_unpin_map(bb->obj);
                                i915_vma_unpin(bb->vma);
                                i915_vma_close(bb->vma);
                        }
 -                      __i915_gem_object_release_unless_active(bb->obj);
 +                      i915_gem_object_put(bb->obj);
                }
                list_del(&bb->list);
                kfree(bb);
  static int prepare_workload(struct intel_vgpu_workload *workload)
  {
        struct intel_vgpu *vgpu = workload->vgpu;
 +      struct intel_vgpu_submission *s = &vgpu->submission;
 +      int ring = workload->ring_id;
        int ret = 0;
  
        ret = intel_vgpu_pin_mm(workload->shadow_mm);
                return ret;
        }
  
 +      if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT ||
 +          !workload->shadow_mm->ppgtt_mm.shadowed) {
 +              gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
 +              return -EINVAL;
 +      }
 +
        update_shadow_pdps(workload);
  
 +      set_context_ppgtt_from_shadow(workload, s->shadow[ring]->gem_context);
 +
        ret = intel_vgpu_sync_oos_pages(workload->vgpu);
        if (ret) {
                gvt_vgpu_err("fail to vgpu sync oos pages\n");
@@@ -676,6 -681,8 +686,6 @@@ static int dispatch_workload(struct int
  {
        struct intel_vgpu *vgpu = workload->vgpu;
        struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 -      struct intel_vgpu_submission *s = &vgpu->submission;
 -      struct i915_gem_context *shadow_ctx = s->shadow_ctx;
        struct i915_request *rq;
        int ring_id = workload->ring_id;
        int ret;
        mutex_lock(&vgpu->vgpu_lock);
        mutex_lock(&dev_priv->drm.struct_mutex);
  
 -      ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
 -      if (ret < 0) {
 -              gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
 -              goto err_req;
 -      }
 -
        ret = intel_gvt_workload_req_alloc(workload);
        if (ret)
                goto err_req;
@@@ -792,31 -805,10 +802,31 @@@ static void update_guest_context(struc
        void *src;
        unsigned long context_gpa, context_page_num;
        int i;
 +      struct drm_i915_private *dev_priv = gvt->dev_priv;
 +      u32 ring_base;
 +      u32 head, tail;
 +      u16 wrap_count;
  
        gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
                      workload->ctx_desc.lrca);
  
 +      head = workload->rb_head;
 +      tail = workload->rb_tail;
 +      wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF;
 +
 +      if (tail < head) {
 +              if (wrap_count == RB_HEAD_WRAP_CNT_MAX)
 +                      wrap_count = 0;
 +              else
 +                      wrap_count += 1;
 +      }
 +
 +      head = (wrap_count << RB_HEAD_WRAP_CNT_OFF) | tail;
 +
 +      ring_base = dev_priv->engine[workload->ring_id]->mmio_base;
 +      vgpu_vreg_t(vgpu, RING_TAIL(ring_base)) = tail;
 +      vgpu_vreg_t(vgpu, RING_HEAD(ring_base)) = head;
 +
        context_page_num = rq->engine->context_size;
        context_page_num = context_page_num >> PAGE_SHIFT;
  
@@@ -929,6 -921,11 +939,6 @@@ static void complete_current_workload(s
                                intel_vgpu_trigger_virtual_event(vgpu, event);
                }
  
 -              /* unpin shadow ctx as the shadow_ctx update is done */
 -              mutex_lock(&rq->i915->drm.struct_mutex);
 -              intel_context_unpin(rq->hw_context);
 -              mutex_unlock(&rq->i915->drm.struct_mutex);
 -
                i915_request_put(fetch_and_zero(&workload->req));
        }
  
@@@ -984,7 -981,6 +994,7 @@@ static int workload_thread(void *priv
        int ret;
        bool need_force_wake = (INTEL_GEN(gvt->dev_priv) >= 9);
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
 +      struct intel_runtime_pm *rpm = &gvt->dev_priv->runtime_pm;
  
        kfree(p);
  
                                workload->ring_id, workload,
                                workload->vgpu->id);
  
 -              intel_runtime_pm_get(gvt->dev_priv);
 +              intel_runtime_pm_get(rpm);
  
                gvt_dbg_sched("ring id %d will dispatch workload %p\n",
                                workload->ring_id, workload);
                if (need_force_wake)
                        intel_uncore_forcewake_get(&gvt->dev_priv->uncore,
                                        FORCEWAKE_ALL);
+               /*
+                * Update the vReg of the vGPU which submitted this
+                * workload. The vGPU may use these registers for checking
+                * the context state. The value comes from GPU commands
+                * in this workload.
+                */
+               update_vreg_in_ctx(workload);
  
                ret = dispatch_workload(workload);
  
@@@ -1039,7 -1042,7 +1056,7 @@@ complete
                        intel_uncore_forcewake_put(&gvt->dev_priv->uncore,
                                        FORCEWAKE_ALL);
  
 -              intel_runtime_pm_put_unchecked(gvt->dev_priv);
 +              intel_runtime_pm_put_unchecked(rpm);
                if (ret && (vgpu_is_vm_unhealthy(ret)))
                        enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
        }
  }
  
  static void
 -i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
 +i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
 +                              struct i915_ppgtt *ppgtt)
  {
 -      struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
        int i;
  
 -      if (i915_vm_is_4lvl(&i915_ppgtt->vm)) {
 -              px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4;
 +      if (i915_vm_is_4lvl(&ppgtt->vm)) {
 +              px_dma(ppgtt->pd) = s->i915_context_pml4;
        } else {
 -              for (i = 0; i < GEN8_3LVL_PDPES; i++)
 -                      px_dma(i915_ppgtt->pdp.page_directory[i]) =
 -                                              s->i915_context_pdps[i];
 +              for (i = 0; i < GEN8_3LVL_PDPES; i++) {
 +                      struct i915_page_directory * const pd =
 +                              i915_pd_entry(ppgtt->pd, i);
 +
 +                      px_dma(pd) = s->i915_context_pdps[i];
 +              }
        }
  }
  
  void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
  {
        struct intel_vgpu_submission *s = &vgpu->submission;
 +      struct intel_engine_cs *engine;
 +      enum intel_engine_id id;
  
        intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
 -      i915_context_ppgtt_root_restore(s);
 -      i915_gem_context_put(s->shadow_ctx);
 +
 +      i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm));
 +      for_each_engine(engine, vgpu->gvt->dev_priv, id)
 +              intel_context_unpin(s->shadow[id]);
 +
        kmem_cache_destroy(s->workloads);
  }
  
@@@ -1183,20 -1178,17 +1200,20 @@@ void intel_vgpu_reset_submission(struc
  }
  
  static void
 -i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
 +i915_context_ppgtt_root_save(struct intel_vgpu_submission *s,
 +                           struct i915_ppgtt *ppgtt)
  {
 -      struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
        int i;
  
 -      if (i915_vm_is_4lvl(&i915_ppgtt->vm))
 -              s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4);
 -      else {
 -              for (i = 0; i < GEN8_3LVL_PDPES; i++)
 -                      s->i915_context_pdps[i] =
 -                              px_dma(i915_ppgtt->pdp.page_directory[i]);
 +      if (i915_vm_is_4lvl(&ppgtt->vm)) {
 +              s->i915_context_pml4 = px_dma(ppgtt->pd);
 +      } else {
 +              for (i = 0; i < GEN8_3LVL_PDPES; i++) {
 +                      struct i915_page_directory * const pd =
 +                              i915_pd_entry(ppgtt->pd, i);
 +
 +                      s->i915_context_pdps[i] = px_dma(pd);
 +              }
        }
  }
  
   */
  int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
  {
 +      struct drm_i915_private *i915 = vgpu->gvt->dev_priv;
        struct intel_vgpu_submission *s = &vgpu->submission;
 -      enum intel_engine_id i;
        struct intel_engine_cs *engine;
 +      struct i915_gem_context *ctx;
 +      enum intel_engine_id i;
        int ret;
  
 -      s->shadow_ctx = i915_gem_context_create_gvt(
 -                      &vgpu->gvt->dev_priv->drm);
 -      if (IS_ERR(s->shadow_ctx))
 -              return PTR_ERR(s->shadow_ctx);
 +      mutex_lock(&i915->drm.struct_mutex);
 +
 +      ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX);
 +      if (IS_ERR(ctx)) {
 +              ret = PTR_ERR(ctx);
 +              goto out_unlock;
 +      }
 +
 +      i915_gem_context_set_force_single_submission(ctx);
  
 -      i915_context_ppgtt_root_save(s);
 +      i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm));
 +
 +      for_each_engine(engine, i915, i) {
 +              struct intel_context *ce;
 +
 +              INIT_LIST_HEAD(&s->workload_q_head[i]);
 +              s->shadow[i] = ERR_PTR(-EINVAL);
 +
 +              ce = intel_context_create(ctx, engine);
 +              if (IS_ERR(ce)) {
 +                      ret = PTR_ERR(ce);
 +                      goto out_shadow_ctx;
 +              }
 +
 +              if (!USES_GUC_SUBMISSION(i915)) { /* Max ring buffer size */
 +                      const unsigned int ring_size = 512 * SZ_4K;
 +
 +                      ce->ring = __intel_context_ring_size(ring_size);
 +              }
 +
 +              ret = intel_context_pin(ce);
 +              intel_context_put(ce);
 +              if (ret)
 +                      goto out_shadow_ctx;
 +
 +              s->shadow[i] = ce;
 +      }
  
        bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
  
                goto out_shadow_ctx;
        }
  
 -      for_each_engine(engine, vgpu->gvt->dev_priv, i)
 -              INIT_LIST_HEAD(&s->workload_q_head[i]);
 -
        atomic_set(&s->running_workload_num, 0);
        bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
  
 +      i915_gem_context_put(ctx);
 +      mutex_unlock(&i915->drm.struct_mutex);
        return 0;
  
  out_shadow_ctx:
 -      i915_gem_context_put(s->shadow_ctx);
 +      i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm));
 +      for_each_engine(engine, i915, i) {
 +              if (IS_ERR(s->shadow[i]))
 +                      break;
 +
 +              intel_context_unpin(s->shadow[i]);
 +              intel_context_put(s->shadow[i]);
 +      }
 +      i915_gem_context_put(ctx);
 +out_unlock:
 +      mutex_unlock(&i915->drm.struct_mutex);
        return ret;
  }
  
@@@ -1438,9 -1388,6 +1455,6 @@@ static int prepare_mm(struct intel_vgpu
  #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
                ((a)->lrca == (b)->lrca))
  
- #define get_last_workload(q) \
-       (list_empty(q) ? NULL : container_of(q->prev, \
-       struct intel_vgpu_workload, list))
  /**
   * intel_vgpu_create_workload - create a vGPU workload
   * @vgpu: a vGPU
@@@ -1460,12 -1407,11 +1474,12 @@@ intel_vgpu_create_workload(struct intel
  {
        struct intel_vgpu_submission *s = &vgpu->submission;
        struct list_head *q = workload_q_head(vgpu, ring_id);
-       struct intel_vgpu_workload *last_workload = get_last_workload(q);
+       struct intel_vgpu_workload *last_workload = NULL;
        struct intel_vgpu_workload *workload = NULL;
        struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
        u64 ring_context_gpa;
        u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx;
 +      u32 guest_head;
        int ret;
  
        ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
        intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
                        RING_CTX_OFF(ring_tail.val), &tail, 4);
  
 +      guest_head = head;
 +
        head &= RB_HEAD_OFF_MASK;
        tail &= RB_TAIL_OFF_MASK;
  
-       if (last_workload && same_context(&last_workload->ctx_desc, desc)) {
-               gvt_dbg_el("ring id %d cur workload == last\n", ring_id);
-               gvt_dbg_el("ctx head %x real head %lx\n", head,
-                               last_workload->rb_tail);
-               /*
-                * cannot use guest context head pointer here,
-                * as it might not be updated at this time
-                */
-               head = last_workload->rb_tail;
+       list_for_each_entry_reverse(last_workload, q, list) {
+               if (same_context(&last_workload->ctx_desc, desc)) {
+                       gvt_dbg_el("ring id %d cur workload == last\n",
+                                       ring_id);
+                       gvt_dbg_el("ctx head %x real head %lx\n", head,
+                                       last_workload->rb_tail);
+                       /*
+                        * cannot use guest context head pointer here,
+                        * as it might not be updated at this time
+                        */
+                       head = last_workload->rb_tail;
+                       break;
+               }
        }
  
        gvt_dbg_el("ring id %d begin a new workload\n", ring_id);
        intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
                        RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
  
 +      if (!intel_gvt_ggtt_validate_range(vgpu, start,
 +                              _RING_CTL_BUF_SIZE(ctl))) {
 +              gvt_vgpu_err("context contain invalid rb at: 0x%x\n", start);
 +              return ERR_PTR(-EINVAL);
 +      }
 +
        workload = alloc_workload(vgpu);
        if (IS_ERR(workload))
                return workload;
        workload->ctx_desc = *desc;
        workload->ring_context_gpa = ring_context_gpa;
        workload->rb_head = head;
 +      workload->guest_rb_head = guest_head;
        workload->rb_tail = tail;
        workload->rb_start = start;
        workload->rb_ctl = ctl;
                workload->wa_ctx.indirect_ctx.size =
                        (indirect_ctx & INDIRECT_CTX_SIZE_MASK) *
                        CACHELINE_BYTES;
 +
 +              if (workload->wa_ctx.indirect_ctx.size != 0) {
 +                      if (!intel_gvt_ggtt_validate_range(vgpu,
 +                              workload->wa_ctx.indirect_ctx.guest_gma,
 +                              workload->wa_ctx.indirect_ctx.size)) {
 +                              kmem_cache_free(s->workloads, workload);
 +                              gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n",
 +                                  workload->wa_ctx.indirect_ctx.guest_gma);
 +                              return ERR_PTR(-EINVAL);
 +                      }
 +              }
 +
                workload->wa_ctx.per_ctx.guest_gma =
                        per_ctx & PER_CTX_ADDR_MASK;
                workload->wa_ctx.per_ctx.valid = per_ctx & 1;
 +              if (workload->wa_ctx.per_ctx.valid) {
 +                      if (!intel_gvt_ggtt_validate_range(vgpu,
 +                              workload->wa_ctx.per_ctx.guest_gma,
 +                              CACHELINE_BYTES)) {
 +                              kmem_cache_free(s->workloads, workload);
 +                              gvt_vgpu_err("invalid per_ctx at: 0x%lx\n",
 +                                      workload->wa_ctx.per_ctx.guest_gma);
 +                              return ERR_PTR(-EINVAL);
 +                      }
 +              }
        }
  
        gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
         * as there is only one pre-allocated buf-obj for shadow.
         */
        if (list_empty(workload_q_head(vgpu, ring_id))) {
 -              intel_runtime_pm_get(dev_priv);
 +              intel_runtime_pm_get(&dev_priv->runtime_pm);
                mutex_lock(&dev_priv->drm.struct_mutex);
                ret = intel_gvt_scan_and_shadow_workload(workload);
                mutex_unlock(&dev_priv->drm.struct_mutex);
 -              intel_runtime_pm_put_unchecked(dev_priv);
 +              intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
        }
  
        if (ret) {