drm/i915: merge get_gtt_alignment/get_unfenced_gtt_alignment()
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / gpu / drm / i915 / i915_gem.c
index 742206e..2166b61 100644 (file)
@@ -163,8 +163,8 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data,
                return -ENODEV;
 
        mutex_lock(&dev->struct_mutex);
-       i915_gem_init_global_gtt(dev, args->gtt_start,
-                                args->gtt_end, args->gtt_end);
+       i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
+                                 args->gtt_end);
        mutex_unlock(&dev->struct_mutex);
 
        return 0;
@@ -192,6 +192,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
        return 0;
 }
 
+void *i915_gem_object_alloc(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+       kmem_cache_free(dev_priv->slab, obj);
+}
+
 static int
 i915_gem_create(struct drm_file *file,
                struct drm_device *dev,
@@ -215,7 +227,7 @@ i915_gem_create(struct drm_file *file,
        if (ret) {
                drm_gem_object_release(&obj->base);
                i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
-               kfree(obj);
+               i915_gem_object_free(obj);
                return ret;
        }
 
@@ -259,14 +271,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
                               args->size, &args->handle);
 }
 
-static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
-{
-       drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
-
-       return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
-               obj->tiling_mode != I915_TILING_NONE;
-}
-
 static inline int
 __copy_to_user_swizzled(char __user *cpu_vaddr,
                        const char *gpu_vaddr, int gpu_offset,
@@ -407,7 +411,6 @@ i915_gem_shmem_pread(struct drm_device *dev,
        loff_t offset;
        int shmem_page_offset, page_length, ret = 0;
        int obj_do_bit17_swizzling, page_do_bit17_swizzling;
-       int hit_slowpath = 0;
        int prefaulted = 0;
        int needs_clflush = 0;
        struct scatterlist *sg;
@@ -469,7 +472,6 @@ i915_gem_shmem_pread(struct drm_device *dev,
                if (ret == 0)
                        goto next_page;
 
-               hit_slowpath = 1;
                mutex_unlock(&dev->struct_mutex);
 
                if (!prefaulted) {
@@ -502,12 +504,6 @@ next_page:
 out:
        i915_gem_object_unpin_pages(obj);
 
-       if (hit_slowpath) {
-               /* Fixup: Kill any reinstated backing storage pages */
-               if (obj->madv == __I915_MADV_PURGED)
-                       i915_gem_object_truncate(obj);
-       }
-
        return ret;
 }
 
@@ -838,12 +834,13 @@ out:
        i915_gem_object_unpin_pages(obj);
 
        if (hit_slowpath) {
-               /* Fixup: Kill any reinstated backing storage pages */
-               if (obj->madv == __I915_MADV_PURGED)
-                       i915_gem_object_truncate(obj);
-               /* and flush dirty cachelines in case the object isn't in the cpu write
-                * domain anymore. */
-               if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+               /*
+                * Fixup: Flush cpu caches in case we didn't flush the dirty
+                * cachelines in-line while writing and the object moved
+                * out of the cpu write domain while we've dropped the lock.
+                */
+               if (!needs_clflush_after &&
+                   obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
                        i915_gem_clflush_object(obj);
                        i915_gem_chipset_flush(dev);
                }
@@ -1344,6 +1341,12 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        trace_i915_gem_object_fault(obj, page_offset, true, write);
 
+       /* Access to snoopable pages through the GTT is incoherent. */
+       if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
+               ret = -EINVAL;
+               goto unlock;
+       }
+
        /* Now bind it into the GTT if needed */
        ret = i915_gem_object_pin(obj, 0, true, false);
        if (ret)
@@ -1460,16 +1463,15 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
  * Return the required GTT alignment for an object, taking into account
  * potential fence register mapping.
  */
-static uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev,
-                          uint32_t size,
-                          int tiling_mode)
+uint32_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
+                          int tiling_mode, bool fenced)
 {
        /*
         * Minimum alignment is 4k (GTT page size), but might be greater
         * if a fence register is needed for the object.
         */
-       if (INTEL_INFO(dev)->gen >= 4 ||
+       if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
            tiling_mode == I915_TILING_NONE)
                return 4096;
 
@@ -1480,35 +1482,6 @@ i915_gem_get_gtt_alignment(struct drm_device *dev,
        return i915_gem_get_gtt_size(dev, size, tiling_mode);
 }
 
-/**
- * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
- *                                      unfenced object
- * @dev: the device
- * @size: size of the object
- * @tiling_mode: tiling mode of the object
- *
- * Return the required GTT alignment for an object, only taking into account
- * unfenced tiled surface requirements.
- */
-uint32_t
-i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
-                                   uint32_t size,
-                                   int tiling_mode)
-{
-       /*
-        * Minimum alignment is 4k (GTT page size) for sane hw.
-        */
-       if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
-           tiling_mode == I915_TILING_NONE)
-               return 4096;
-
-       /* Previous hardware however needs to be aligned to a power-of-two
-        * tile height. The simplest method for determining this is to reuse
-        * the power-of-tile object size.
-        */
-       return i915_gem_get_gtt_size(dev, size, tiling_mode);
-}
-
 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 {
        struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
@@ -1517,9 +1490,11 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
        if (obj->base.map_list.map)
                return 0;
 
+       dev_priv->mm.shrinker_no_lock_stealing = true;
+
        ret = drm_gem_create_mmap_offset(&obj->base);
        if (ret != -ENOSPC)
-               return ret;
+               goto out;
 
        /* Badly fragmented mmap space? The only way we can recover
         * space is by destroying unwanted objects. We can't randomly release
@@ -1531,10 +1506,14 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
        i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
        ret = drm_gem_create_mmap_offset(&obj->base);
        if (ret != -ENOSPC)
-               return ret;
+               goto out;
 
        i915_gem_shrink_all(dev_priv);
-       return drm_gem_create_mmap_offset(&obj->base);
+       ret = drm_gem_create_mmap_offset(&obj->base);
+out:
+       dev_priv->mm.shrinker_no_lock_stealing = false;
+
+       return ret;
 }
 
 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
@@ -1927,30 +1906,24 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 }
 
 static int
-i915_gem_handle_seqno_wrap(struct drm_device *dev)
+i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_ring_buffer *ring;
        int ret, i, j;
 
-       /* The hardware uses various monotonic 32-bit counters, if we
-        * detect that they will wraparound we need to idle the GPU
-        * and reset those counters.
-        */
-       ret = 0;
+       /* Carefully retire all requests without writing to the rings */
        for_each_ring(ring, dev_priv, i) {
-               for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
-                       ret |= ring->sync_seqno[j] != 0;
+               ret = intel_ring_idle(ring);
+               if (ret)
+                       return ret;
        }
-       if (ret == 0)
-               return ret;
-
-       ret = i915_gpu_idle(dev);
-       if (ret)
-               return ret;
-
        i915_gem_retire_requests(dev);
+
+       /* Finally reset hw state */
        for_each_ring(ring, dev_priv, i) {
+               intel_ring_init_seqno(ring, seqno);
+
                for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
                        ring->sync_seqno[j] = 0;
        }
@@ -1958,6 +1931,32 @@ i915_gem_handle_seqno_wrap(struct drm_device *dev)
        return 0;
 }
 
+int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int ret;
+
+       if (seqno == 0)
+               return -EINVAL;
+
+       /* HWS page needs to be set less than what we
+        * will inject to ring
+        */
+       ret = i915_gem_init_seqno(dev, seqno - 1);
+       if (ret)
+               return ret;
+
+       /* Carefully set the last_seqno value so that wrap
+        * detection still works
+        */
+       dev_priv->next_seqno = seqno;
+       dev_priv->last_seqno = seqno - 1;
+       if (dev_priv->last_seqno == 0)
+               dev_priv->last_seqno--;
+
+       return 0;
+}
+
 int
 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 {
@@ -1965,14 +1964,14 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 
        /* reserve 0 for non-seqno */
        if (dev_priv->next_seqno == 0) {
-               int ret = i915_gem_handle_seqno_wrap(dev);
+               int ret = i915_gem_init_seqno(dev, 0);
                if (ret)
                        return ret;
 
                dev_priv->next_seqno = 1;
        }
 
-       *seqno = dev_priv->next_seqno++;
+       *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
        return 0;
 }
 
@@ -2642,7 +2641,7 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
        case 4: i965_write_fence_reg(dev, reg, obj); break;
        case 3: i915_write_fence_reg(dev, reg, obj); break;
        case 2: i830_write_fence_reg(dev, reg, obj); break;
-       default: break;
+       default: BUG();
        }
 }
 
@@ -2817,7 +2816,7 @@ static bool i915_gem_valid_gtt_space(struct drm_device *dev,
 
        /* On non-LLC machines we have to be careful when putting differing
         * types of snoopable memory together to avoid the prefetcher
-        * crossing memory domains and dieing.
+        * crossing memory domains and dying.
         */
        if (HAS_LLC(dev))
                return true;
@@ -2890,7 +2889,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 {
        struct drm_device *dev = obj->base.dev;
        drm_i915_private_t *dev_priv = dev->dev_private;
-       struct drm_mm_node *free_space;
+       struct drm_mm_node *node;
        u32 size, fence_size, fence_alignment, unfenced_alignment;
        bool mappable, fenceable;
        int ret;
@@ -2905,11 +2904,11 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
                                           obj->tiling_mode);
        fence_alignment = i915_gem_get_gtt_alignment(dev,
                                                     obj->base.size,
-                                                    obj->tiling_mode);
+                                                    obj->tiling_mode, true);
        unfenced_alignment =
-               i915_gem_get_unfenced_gtt_alignment(dev,
+               i915_gem_get_gtt_alignment(dev,
                                                    obj->base.size,
-                                                   obj->tiling_mode);
+                                                   obj->tiling_mode, false);
 
        if (alignment == 0)
                alignment = map_and_fenceable ? fence_alignment :
@@ -2936,66 +2935,54 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 
        i915_gem_object_pin_pages(obj);
 
+       node = kzalloc(sizeof(*node), GFP_KERNEL);
+       if (node == NULL) {
+               i915_gem_object_unpin_pages(obj);
+               return -ENOMEM;
+       }
+
  search_free:
        if (map_and_fenceable)
-               free_space = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
-                                                              size, alignment, obj->cache_level,
-                                                              0, dev_priv->mm.gtt_mappable_end,
-                                                              false);
+               ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node,
+                                                         size, alignment, obj->cache_level,
+                                                         0, dev_priv->mm.gtt_mappable_end);
        else
-               free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
-                                                     size, alignment, obj->cache_level,
-                                                     false);
-
-       if (free_space != NULL) {
-               if (map_and_fenceable)
-                       free_space =
-                               drm_mm_get_block_range_generic(free_space,
-                                                              size, alignment, obj->cache_level,
-                                                              0, dev_priv->mm.gtt_mappable_end,
-                                                              false);
-               else
-                       free_space =
-                               drm_mm_get_block_generic(free_space,
-                                                        size, alignment, obj->cache_level,
-                                                        false);
-       }
-       if (free_space == NULL) {
+               ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node,
+                                                size, alignment, obj->cache_level);
+       if (ret) {
                ret = i915_gem_evict_something(dev, size, alignment,
                                               obj->cache_level,
                                               map_and_fenceable,
                                               nonblocking);
-               if (ret) {
-                       i915_gem_object_unpin_pages(obj);
-                       return ret;
-               }
+               if (ret == 0)
+                       goto search_free;
 
-               goto search_free;
+               i915_gem_object_unpin_pages(obj);
+               kfree(node);
+               return ret;
        }
-       if (WARN_ON(!i915_gem_valid_gtt_space(dev,
-                                             free_space,
-                                             obj->cache_level))) {
+       if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) {
                i915_gem_object_unpin_pages(obj);
-               drm_mm_put_block(free_space);
+               drm_mm_put_block(node);
                return -EINVAL;
        }
 
        ret = i915_gem_gtt_prepare_object(obj);
        if (ret) {
                i915_gem_object_unpin_pages(obj);
-               drm_mm_put_block(free_space);
+               drm_mm_put_block(node);
                return ret;
        }
 
        list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
        list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-       obj->gtt_space = free_space;
-       obj->gtt_offset = free_space->start;
+       obj->gtt_space = node;
+       obj->gtt_offset = node->start;
 
        fenceable =
-               free_space->size == fence_size &&
-               (free_space->start & (fence_alignment - 1)) == 0;
+               node->size == fence_size &&
+               (node->start & (fence_alignment - 1)) == 0;
 
        mappable =
                obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
@@ -3704,14 +3691,14 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 {
        struct drm_i915_gem_object *obj;
        struct address_space *mapping;
-       u32 mask;
+       gfp_t mask;
 
-       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+       obj = i915_gem_object_alloc(dev);
        if (obj == NULL)
                return NULL;
 
        if (drm_gem_object_init(dev, &obj->base, size) != 0) {
-               kfree(obj);
+               i915_gem_object_free(obj);
                return NULL;
        }
 
@@ -3783,6 +3770,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
        obj->pages_pin_count = 0;
        i915_gem_object_put_pages(obj);
        i915_gem_object_free_mmap_offset(obj);
+       i915_gem_object_release_stolen(obj);
 
        BUG_ON(obj->pages);
 
@@ -3793,7 +3781,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
        i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
        kfree(obj->bit_17);
-       kfree(obj);
+       i915_gem_object_free(obj);
 }
 
 int
@@ -3889,8 +3877,10 @@ void i915_gem_init_swizzling(struct drm_device *dev)
        I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
        if (IS_GEN6(dev))
                I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
-       else
+       else if (IS_GEN7(dev))
                I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
+       else
+               BUG();
 }
 
 static bool
@@ -3925,6 +3915,8 @@ i915_gem_init_hw(struct drm_device *dev)
 
        i915_gem_init_swizzling(dev);
 
+       dev_priv->next_seqno = dev_priv->last_seqno = (u32)~0 - 0x1000;
+
        ret = intel_init_render_ring_buffer(dev);
        if (ret)
                return ret;
@@ -3941,8 +3933,6 @@ i915_gem_init_hw(struct drm_device *dev)
                        goto cleanup_bsd_ring;
        }
 
-       dev_priv->next_seqno = 1;
-
        /*
         * XXX: There was some w/a described somewhere suggesting loading
         * contexts before PPGTT.
@@ -3959,58 +3949,13 @@ cleanup_render_ring:
        return ret;
 }
 
-static bool
-intel_enable_ppgtt(struct drm_device *dev)
-{
-       if (i915_enable_ppgtt >= 0)
-               return i915_enable_ppgtt;
-
-#ifdef CONFIG_INTEL_IOMMU
-       /* Disable ppgtt on SNB if VT-d is on. */
-       if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
-               return false;
-#endif
-
-       return true;
-}
-
 int i915_gem_init(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       unsigned long gtt_size, mappable_size;
        int ret;
 
-       gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
-       mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
-
        mutex_lock(&dev->struct_mutex);
-       if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
-               /* PPGTT pdes are stolen from global gtt ptes, so shrink the
-                * aperture accordingly when using aliasing ppgtt. */
-               gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
-
-               i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
-
-               ret = i915_gem_init_aliasing_ppgtt(dev);
-               if (ret) {
-                       mutex_unlock(&dev->struct_mutex);
-                       return ret;
-               }
-       } else {
-               /* Let GEM Manage all of the aperture.
-                *
-                * However, leave one page at the end still bound to the scratch
-                * page.  There are a number of places where the hardware
-                * apparently prefetches past the end of the object, and we've
-                * seen multiple hangs with the GPU head pointer stuck in a
-                * batchbuffer bound at the last page of the aperture.  One page
-                * should be enough to keep any prefetching inside of the
-                * aperture.
-                */
-               i915_gem_init_global_gtt(dev, 0, mappable_size,
-                                        gtt_size);
-       }
-
+       i915_gem_init_global_gtt(dev);
        ret = i915_gem_init_hw(dev);
        mutex_unlock(&dev->struct_mutex);
        if (ret) {
@@ -4111,8 +4056,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
 void
 i915_gem_load(struct drm_device *dev)
 {
-       int i;
        drm_i915_private_t *dev_priv = dev->dev_private;
+       int i;
+
+       dev_priv->slab =
+               kmem_cache_create("i915_gem_object",
+                                 sizeof(struct drm_i915_gem_object), 0,
+                                 SLAB_HWCACHE_ALIGN,
+                                 NULL);
 
        INIT_LIST_HEAD(&dev_priv->mm.active_list);
        INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
@@ -4392,6 +4343,9 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
                if (!mutex_is_locked_by(&dev->struct_mutex, current))
                        return 0;
 
+               if (dev_priv->mm.shrinker_no_lock_stealing)
+                       return 0;
+
                unlock = false;
        }