static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ if (obj->cache_dirty)
return false;
if (!i915_gem_object_is_coherent(obj))
return st;
}
+static void __start_cpu_write(struct drm_i915_gem_object *obj)
+{
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ if (cpu_write_needs_clflush(obj))
+ obj->cache_dirty = true;
+}
+
static void
__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
!i915_gem_object_is_coherent(obj))
drm_clflush_sg(pages);
- obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ __start_cpu_write(obj);
}
static void
args->size, &args->handle);
}
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ return !(obj->cache_level == I915_CACHE_NONE ||
+ obj->cache_level == I915_CACHE_WT);
+}
+
/**
* Creates a new mm object and returns a handle to it.
* @dev: drm device pointer
case I915_GEM_DOMAIN_CPU:
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
break;
+
+ case I915_GEM_DOMAIN_RENDER:
+ if (gpu_write_needs_clflush(obj))
+ obj->cache_dirty = true;
+ break;
}
obj->base.write_domain = 0;
* optimizes for the case when the gpu will dirty the data
* anyway again before the next pread happens.
*/
- if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+ if (!obj->cache_dirty &&
+ !(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
*needs_clflush = CLFLUSH_BEFORE;
out:
* This optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway.
*/
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+ if (!obj->cache_dirty) {
*needs_clflush |= CLFLUSH_AFTER;
- /* Same trick applies to invalidate partially written cachelines read
- * before writing.
- */
- if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush |= CLFLUSH_BEFORE;
+ /*
+ * Same trick applies to invalidate partially written
+ * cachelines read before writing.
+ */
+ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+ *needs_clflush |= CLFLUSH_BEFORE;
+ }
out:
intel_fb_obj_invalidate(obj, ORIGIN_CPU);
static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
{
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
- return;
-
- i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+ /*
+ * We manually flush the CPU domain so that we can override and
+ * force the flush for the display, and perform it asyncrhonously.
+ */
+ flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+ if (obj->cache_dirty)
+ i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
obj->base.write_domain = 0;
}
}
}
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
- i915_gem_object_is_coherent(obj))
- obj->cache_dirty = true;
-
list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level;
obj->cache_level = cache_level;
+ obj->cache_dirty = true; /* Always invalidate stale cachelines */
return 0;
}
if (ret)
return ret;
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
- return 0;
-
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* Flush the CPU cache if it's still invalid. */
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
- GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
+ GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
/* If we're writing through the CPU, then the GPU read domains will
* need to be invalidated at next use.
*/
- if (write) {
- obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- }
+ if (write)
+ __start_cpu_write(obj);
return 0;
}
} else
obj->cache_level = I915_CACHE_NONE;
+ obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+
trace_i915_gem_object_create(obj);
return obj;
mutex_lock(&dev_priv->drm.struct_mutex);
for (p = phases; *p; p++) {
- list_for_each_entry(obj, *p, global_link) {
- obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- }
+ list_for_each_entry(obj, *p, global_link)
+ __start_cpu_write(obj);
}
mutex_unlock(&dev_priv->drm.struct_mutex);
static void __i915_do_clflush(struct drm_i915_gem_object *obj)
{
drm_clflush_sg(obj->mm.pages);
- obj->cache_dirty = false;
-
intel_fb_obj_flush(obj, ORIGIN_CPU);
}
struct clflush *clflush = container_of(work, typeof(*clflush), work);
struct drm_i915_gem_object *obj = clflush->obj;
- if (!obj->cache_dirty)
- goto out;
-
if (i915_gem_object_pin_pages(obj)) {
DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
goto out;
* anything not backed by physical memory we consider to be always
* coherent and not need clflushing.
*/
- if (!i915_gem_object_has_struct_page(obj))
+ if (!i915_gem_object_has_struct_page(obj)) {
+ obj->cache_dirty = false;
return;
-
- obj->cache_dirty = true;
+ }
/* If the GPU is snooping the contents of the CPU cache,
* we do not need to manually clear the CPU cache lines. However,
if (!(flags & I915_CLFLUSH_SYNC))
clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
if (clflush) {
+ GEM_BUG_ON(!obj->cache_dirty);
+
dma_fence_init(&clflush->dma,
&i915_clflush_ops,
&clflush_lock,
} else {
GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
}
+
+ obj->cache_dirty = false;
}
return DBG_USE_CPU_RELOC > 0;
return (HAS_LLC(to_i915(obj->base.dev)) ||
- obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
+ obj->cache_dirty ||
obj->cache_level != I915_CACHE_NONE);
}
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
continue;
- if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
+ if (obj->cache_dirty)
i915_gem_clflush_object(obj, 0);
- obj->base.write_domain = 0;
- }
ret = i915_gem_request_await_object
(eb->request, obj, obj->base.pending_write_domain);
return 0;
}
-static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
- return !(obj->cache_level == I915_CACHE_NONE ||
- obj->cache_level == I915_CACHE_WT);
-}
-
void i915_vma_move_to_active(struct i915_vma *vma,
struct drm_i915_gem_request *req,
unsigned int flags)
i915_gem_active_set(&vma->last_read[idx], req);
list_move_tail(&vma->vm_link, &vma->vm->active_list);
+ obj->base.write_domain = 0;
if (flags & EXEC_OBJECT_WRITE) {
+ obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
+
if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
i915_gem_active_set(&obj->frontbuffer_write, req);
- /* update for the implicit flush after a batch */
- obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
- if (!obj->cache_dirty && gpu_write_needs_clflush(obj))
- obj->cache_dirty = true;
+ obj->base.read_domains = 0;
}
+ obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
if (flags & EXEC_OBJECT_NEEDS_FENCE)
i915_gem_active_set(&vma->last_fence, req);