Currently we allocate a scratch page for each engine, but since we only
ever write into it for post-sync operations, it is not exposed to
userspace nor do we care for coherency. As we then do not care about its
contents, we can use one page for all, reducing our allocations and
avoid complications by not assuming per-engine isolation.
For later use, it simplifies engine initialisation (by removing the
allocation that required struct_mutex!) and means that we can always rely
on there being a scratch page.
v2: Check that we allocated a large enough scratch for I830 w/a
Fixes:
06e562e7f515 ("drm/i915/ringbuffer: Delay after EMIT_INVALIDATE for gen4/gen5") # v4.18.20
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108850
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181204141522.13640-1-chris@chris-wilson.co.uk
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.18.20+
(cherry picked from commit
5179749925933575a67f9d8f16d0cc204f98a29f)
[Joonas: Use new function in gen9_init_indirectctx_bb too]
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
struct delayed_work idle_work;
ktime_t last_init_time;
+
+ struct i915_vma *scratch;
} gt;
/* perform PHY state sanity checks? */
return I915_HWS_CSB_WRITE_INDEX;
}
+static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
+{
+ return i915_ggtt_offset(i915->gt.scratch);
+}
+
#endif
goto out_ctx;
}
+static int
+i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int ret;
+
+ obj = i915_gem_object_create_stolen(i915, size);
+ if (!obj)
+ obj = i915_gem_object_create_internal(i915, size);
+ if (IS_ERR(obj)) {
+ DRM_ERROR("Failed to allocate scratch page\n");
+ return PTR_ERR(obj);
+ }
+
+ vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err_unref;
+ }
+
+ ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (ret)
+ goto err_unref;
+
+ i915->gt.scratch = vma;
+ return 0;
+
+err_unref:
+ i915_gem_object_put(obj);
+ return ret;
+}
+
+static void i915_gem_fini_scratch(struct drm_i915_private *i915)
+{
+ i915_vma_unpin_and_release(&i915->gt.scratch, 0);
+}
+
int i915_gem_init(struct drm_i915_private *dev_priv)
{
int ret;
goto err_unlock;
}
- ret = i915_gem_contexts_init(dev_priv);
+ ret = i915_gem_init_scratch(dev_priv,
+ IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE);
if (ret) {
GEM_BUG_ON(ret == -EIO);
goto err_ggtt;
}
+ ret = i915_gem_contexts_init(dev_priv);
+ if (ret) {
+ GEM_BUG_ON(ret == -EIO);
+ goto err_scratch;
+ }
+
ret = intel_engines_init(dev_priv);
if (ret) {
GEM_BUG_ON(ret == -EIO);
err_context:
if (ret != -EIO)
i915_gem_contexts_fini(dev_priv);
+err_scratch:
+ i915_gem_fini_scratch(dev_priv);
err_ggtt:
err_unlock:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
intel_uc_fini(dev_priv);
i915_gem_cleanup_engines(dev_priv);
i915_gem_contexts_fini(dev_priv);
+ i915_gem_fini_scratch(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
intel_wa_list_free(&dev_priv->gt_wa_list);
if (HAS_BROKEN_CS_TLB(i915))
ee->wa_batchbuffer =
i915_error_object_create(i915,
- engine->scratch);
+ i915->gt.scratch);
request_record_user_bo(request, ee);
ee->ctx =
intel_engine_init_cmd_parser(engine);
}
-int intel_engine_create_scratch(struct intel_engine_cs *engine,
- unsigned int size)
-{
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- int ret;
-
- WARN_ON(engine->scratch);
-
- obj = i915_gem_object_create_stolen(engine->i915, size);
- if (!obj)
- obj = i915_gem_object_create_internal(engine->i915, size);
- if (IS_ERR(obj)) {
- DRM_ERROR("Failed to allocate scratch page\n");
- return PTR_ERR(obj);
- }
-
- vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto err_unref;
- }
-
- ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
- if (ret)
- goto err_unref;
-
- engine->scratch = vma;
- return 0;
-
-err_unref:
- i915_gem_object_put(obj);
- return ret;
-}
-
-void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
-{
- i915_vma_unpin_and_release(&engine->scratch, 0);
-}
-
static void cleanup_status_page(struct intel_engine_cs *engine)
{
if (HWS_NEEDS_PHYSICAL(engine->i915)) {
{
struct drm_i915_private *i915 = engine->i915;
- intel_engine_cleanup_scratch(engine);
-
cleanup_status_page(engine);
intel_engine_fini_breadcrumbs(engine);
static u32 *
gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
{
+ /* NB no one else is allowed to scribble over scratch + 256! */
*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = i915_ggtt_offset(engine->scratch) + 256;
+ *batch++ = i915_scratch_offset(engine->i915) + 256;
*batch++ = 0;
*batch++ = MI_LOAD_REGISTER_IMM(1);
*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = i915_ggtt_offset(engine->scratch) + 256;
+ *batch++ = i915_scratch_offset(engine->i915) + 256;
*batch++ = 0;
return batch;
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_QW_WRITE,
- i915_ggtt_offset(engine->scratch) +
+ i915_scratch_offset(engine->i915) +
2 * CACHELINE_BYTES);
*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_QW_WRITE,
- i915_ggtt_offset(engine->scratch)
+ i915_scratch_offset(engine->i915)
+ 2 * CACHELINE_BYTES);
}
{
struct intel_engine_cs *engine = request->engine;
u32 scratch_addr =
- i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
+ i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
int len;
if (ret)
return ret;
- ret = intel_engine_create_scratch(engine, PAGE_SIZE);
- if (ret)
- goto err_cleanup_common;
-
ret = intel_init_workaround_bb(engine);
if (ret) {
/*
intel_engine_init_workarounds(engine);
return 0;
-
-err_cleanup_common:
- intel_engine_cleanup_common(engine);
- return ret;
}
int logical_xcs_ring_init(struct intel_engine_cs *engine)
*/
if (mode & EMIT_INVALIDATE) {
*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
- *cs++ = i915_ggtt_offset(rq->engine->scratch) |
- PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = 0;
*cs++ = 0;
*cs++ = MI_FLUSH;
*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
- *cs++ = i915_ggtt_offset(rq->engine->scratch) |
- PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = 0;
*cs++ = 0;
}
static int
intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
{
- u32 scratch_addr =
- i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+ u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
u32 *cs;
cs = intel_ring_begin(rq, 6);
static int
gen6_render_ring_flush(struct i915_request *rq, u32 mode)
{
- u32 scratch_addr =
- i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+ u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
u32 *cs, flags = 0;
int ret;
static int
gen7_render_ring_flush(struct i915_request *rq, u32 mode)
{
- u32 scratch_addr =
- i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+ u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
u32 *cs, flags = 0;
/*
}
/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
-#define I830_BATCH_LIMIT (256*1024)
+#define I830_BATCH_LIMIT SZ_256K
#define I830_TLB_ENTRIES (2)
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
static int
u64 offset, u32 len,
unsigned int dispatch_flags)
{
- u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch);
+ u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
+
+ GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
{
struct i915_timeline *timeline;
struct intel_ring *ring;
- unsigned int size;
int err;
intel_engine_setup_common(engine);
GEM_BUG_ON(engine->buffer);
engine->buffer = ring;
- size = PAGE_SIZE;
- if (HAS_BROKEN_CS_TLB(engine->i915))
- size = I830_WA_SIZE;
- err = intel_engine_create_scratch(engine, size);
- if (err)
- goto err_unpin;
-
err = intel_engine_init_common(engine);
if (err)
- goto err_scratch;
+ goto err_unpin;
return 0;
-err_scratch:
- intel_engine_cleanup_scratch(engine);
err_unpin:
intel_ring_unpin(ring);
err_ring:
/* Stall until the page table load is complete */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
- *cs++ = i915_ggtt_offset(engine->scratch);
+ *cs++ = i915_scratch_offset(rq->i915);
*cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
/* Insert a delay before the next switch! */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(last_reg);
- *cs++ = i915_ggtt_offset(engine->scratch);
+ *cs++ = i915_scratch_offset(rq->i915);
*cs++ = MI_NOOP;
}
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
struct intel_hw_status_page status_page;
struct i915_ctx_workarounds wa_ctx;
struct i915_wa_list wa_list;
- struct i915_vma *scratch;
u32 irq_keep_mask; /* always keep these interrupts */
u32 irq_enable_mask; /* bitmask to enable ring interrupt */
int intel_engine_init_common(struct intel_engine_cs *engine);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
-int intel_engine_create_scratch(struct intel_engine_cs *engine,
- unsigned int size);
-void intel_engine_cleanup_scratch(struct intel_engine_cs *engine);
-
int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);