drm/i915: Replace the complex flushing logic with simple invalidate/flush all
authorDaniel Vetter <daniel.vetter@ffwll.ch>
Sat, 21 Jul 2012 10:25:01 +0000 (12:25 +0200)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Wed, 25 Jul 2012 16:23:54 +0000 (18:23 +0200)
Now that we unconditionally flush and invalidate between every batch
buffer, we no longer need the complex logic to decide which domains
require flushing. Remove it and rejoice.

v2 (danvet): Keep around the flip waiting logic. It's gross and
broken, I know, but we can't just kill that thing ... even if we just
keep it around as a reminder that things are broken.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_gem_execbuffer.c

index 36c940c..6c81079 100644 (file)
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
 
-struct change_domains {
-       uint32_t invalidate_domains;
-       uint32_t flush_domains;
-       uint32_t flush_rings;
-       uint32_t flips;
-};
-
-/*
- * Set the next domain for the specified object. This
- * may not actually perform the necessary flushing/invaliding though,
- * as that may want to be batched with other set_domain operations
- *
- * This is (we hope) the only really tricky part of gem. The goal
- * is fairly simple -- track which caches hold bits of the object
- * and make sure they remain coherent. A few concrete examples may
- * help to explain how it works. For shorthand, we use the notation
- * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
- * a pair of read and write domain masks.
- *
- * Case 1: the batch buffer
- *
- *     1. Allocated
- *     2. Written by CPU
- *     3. Mapped to GTT
- *     4. Read by GPU
- *     5. Unmapped from GTT
- *     6. Freed
- *
- *     Let's take these a step at a time
- *
- *     1. Allocated
- *             Pages allocated from the kernel may still have
- *             cache contents, so we set them to (CPU, CPU) always.
- *     2. Written by CPU (using pwrite)
- *             The pwrite function calls set_domain (CPU, CPU) and
- *             this function does nothing (as nothing changes)
- *     3. Mapped by GTT
- *             This function asserts that the object is not
- *             currently in any GPU-based read or write domains
- *     4. Read by GPU
- *             i915_gem_execbuffer calls set_domain (COMMAND, 0).
- *             As write_domain is zero, this function adds in the
- *             current read domains (CPU+COMMAND, 0).
- *             flush_domains is set to CPU.
- *             invalidate_domains is set to COMMAND
- *             clflush is run to get data out of the CPU caches
- *             then i915_dev_set_domain calls i915_gem_flush to
- *             emit an MI_FLUSH and drm_agp_chipset_flush
- *     5. Unmapped from GTT
- *             i915_gem_object_unbind calls set_domain (CPU, CPU)
- *             flush_domains and invalidate_domains end up both zero
- *             so no flushing/invalidating happens
- *     6. Freed
- *             yay, done
- *
- * Case 2: The shared render buffer
- *
- *     1. Allocated
- *     2. Mapped to GTT
- *     3. Read/written by GPU
- *     4. set_domain to (CPU,CPU)
- *     5. Read/written by CPU
- *     6. Read/written by GPU
- *
- *     1. Allocated
- *             Same as last example, (CPU, CPU)
- *     2. Mapped to GTT
- *             Nothing changes (assertions find that it is not in the GPU)
- *     3. Read/written by GPU
- *             execbuffer calls set_domain (RENDER, RENDER)
- *             flush_domains gets CPU
- *             invalidate_domains gets GPU
- *             clflush (obj)
- *             MI_FLUSH and drm_agp_chipset_flush
- *     4. set_domain (CPU, CPU)
- *             flush_domains gets GPU
- *             invalidate_domains gets CPU
- *             wait_rendering (obj) to make sure all drawing is complete.
- *             This will include an MI_FLUSH to get the data from GPU
- *             to memory
- *             clflush (obj) to invalidate the CPU cache
- *             Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
- *     5. Read/written by CPU
- *             cache lines are loaded and dirtied
- *     6. Read written by GPU
- *             Same as last GPU access
- *
- * Case 3: The constant buffer
- *
- *     1. Allocated
- *     2. Written by CPU
- *     3. Read by GPU
- *     4. Updated (written) by CPU again
- *     5. Read by GPU
- *
- *     1. Allocated
- *             (CPU, CPU)
- *     2. Written by CPU
- *             (CPU, CPU)
- *     3. Read by GPU
- *             (CPU+RENDER, 0)
- *             flush_domains = CPU
- *             invalidate_domains = RENDER
- *             clflush (obj)
- *             MI_FLUSH
- *             drm_agp_chipset_flush
- *     4. Updated (written) by CPU again
- *             (CPU, CPU)
- *             flush_domains = 0 (no previous write domain)
- *             invalidate_domains = 0 (no new read domains)
- *     5. Read by GPU
- *             (CPU+RENDER, 0)
- *             flush_domains = CPU
- *             invalidate_domains = RENDER
- *             clflush (obj)
- *             MI_FLUSH
- *             drm_agp_chipset_flush
- */
-static void
-i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
-                                 struct intel_ring_buffer *ring,
-                                 struct change_domains *cd)
-{
-       uint32_t invalidate_domains = 0, flush_domains = 0;
-
-       /*
-        * If the object isn't moving to a new write domain,
-        * let the object stay in multiple read domains
-        */
-       if (obj->base.pending_write_domain == 0)
-               obj->base.pending_read_domains |= obj->base.read_domains;
-
-       /*
-        * Flush the current write domain if
-        * the new read domains don't match. Invalidate
-        * any read domains which differ from the old
-        * write domain
-        */
-       if (obj->base.write_domain &&
-           (((obj->base.write_domain != obj->base.pending_read_domains ||
-              obj->ring != ring)) ||
-            (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
-               flush_domains |= obj->base.write_domain;
-               invalidate_domains |=
-                       obj->base.pending_read_domains & ~obj->base.write_domain;
-       }
-       /*
-        * Invalidate any read caches which may have
-        * stale data. That is, any new read domains.
-        */
-       invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
-       if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
-               i915_gem_clflush_object(obj);
-
-       if (obj->base.pending_write_domain)
-               cd->flips |= atomic_read(&obj->pending_flip);
-
-       /* The actual obj->write_domain will be updated with
-        * pending_write_domain after we emit the accumulated flush for all
-        * of our domain changes in execbuffers (which clears objects'
-        * write_domains).  So if we have a current write domain that we
-        * aren't changing, set pending_write_domain to that.
-        */
-       if (flush_domains == 0 && obj->base.pending_write_domain == 0)
-               obj->base.pending_write_domain = obj->base.write_domain;
-
-       cd->invalidate_domains |= invalidate_domains;
-       cd->flush_domains |= flush_domains;
-       if (flush_domains & I915_GEM_GPU_DOMAINS)
-               cd->flush_rings |= intel_ring_flag(obj->ring);
-       if (invalidate_domains & I915_GEM_GPU_DOMAINS)
-               cd->flush_rings |= intel_ring_flag(ring);
-}
-
 struct eb_objects {
        int and;
        struct hlist_head buckets[0];
@@ -810,18 +636,6 @@ err:
        return ret;
 }
 
-static void
-i915_gem_execbuffer_flush(struct drm_device *dev,
-                         uint32_t invalidate_domains,
-                         uint32_t flush_domains)
-{
-       if (flush_domains & I915_GEM_DOMAIN_CPU)
-               intel_gtt_chipset_flush();
-
-       if (flush_domains & I915_GEM_DOMAIN_GTT)
-               wmb();
-}
-
 static int
 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
 {
@@ -854,37 +668,41 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
        return 0;
 }
 
-
 static int
 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
                                struct list_head *objects)
 {
        struct drm_i915_gem_object *obj;
-       struct change_domains cd;
+       uint32_t flush_domains = 0;
+       uint32_t flips = 0;
        int ret;
 
-       memset(&cd, 0, sizeof(cd));
-       list_for_each_entry(obj, objects, exec_list)
-               i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
-
-       if (cd.invalidate_domains | cd.flush_domains) {
-               i915_gem_execbuffer_flush(ring->dev,
-                                         cd.invalidate_domains,
-                                         cd.flush_domains);
-       }
-
-       if (cd.flips) {
-               ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
+       list_for_each_entry(obj, objects, exec_list) {
+               ret = i915_gem_object_sync(obj, ring);
                if (ret)
                        return ret;
+
+               if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
+                       i915_gem_clflush_object(obj);
+
+               if (obj->base.pending_write_domain)
+                       flips |= atomic_read(&obj->pending_flip);
+
+               flush_domains |= obj->base.write_domain;
        }
 
-       list_for_each_entry(obj, objects, exec_list) {
-               ret = i915_gem_object_sync(obj, ring);
+       if (flips) {
+               ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
                if (ret)
                        return ret;
        }
 
+       if (flush_domains & I915_GEM_DOMAIN_CPU)
+               intel_gtt_chipset_flush();
+
+       if (flush_domains & I915_GEM_DOMAIN_GTT)
+               wmb();
+
        /* Unconditionally invalidate gpu caches and ensure that we do flush
         * any residual writes from the previous batch.
         */