drivers/gpu/drm/i915/i915_gem_gtt.c

   1 /*
   2  * Copyright © 2010 Daniel Vetter
   3  * Copyright © 2011-2014 Intel Corporation
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  *
  24  */
  25
  26 #include <linux/slab.h> /* fault-inject.h is not standalone! */
  27
  28 #include <linux/fault-inject.h>
  29 #include <linux/log2.h>
  30 #include <linux/random.h>
  31 #include <linux/seq_file.h>
  32 #include <linux/stop_machine.h>
  33
  34 #include <asm/set_memory.h>
  35
  36 #include <drm/drmP.h>
  37 #include <drm/i915_drm.h>
  38
  39 #include "i915_drv.h"
  40 #include "i915_vgpu.h"
  41 #include "i915_trace.h"
  42 #include "intel_drv.h"
  43 #include "intel_frontbuffer.h"
  44
  45 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
  46
  47 /**
  48  * DOC: Global GTT views
  49  *
  50  * Background and previous state
  51  *
  52  * Historically objects could exists (be bound) in global GTT space only as
  53  * singular instances with a view representing all of the object's backing pages
  54  * in a linear fashion. This view will be called a normal view.
  55  *
  56  * To support multiple views of the same object, where the number of mapped
  57  * pages is not equal to the backing store, or where the layout of the pages
  58  * is not linear, concept of a GGTT view was added.
  59  *
  60  * One example of an alternative view is a stereo display driven by a single
  61  * image. In this case we would have a framebuffer looking like this
  62  * (2x2 pages):
  63  *
  64  *    12
  65  *    34
  66  *
  67  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
  68  * rendering. In contrast, fed to the display engine would be an alternative
  69  * view which could look something like this:
  70  *
  71  *   1212
  72  *   3434
  73  *
  74  * In this example both the size and layout of pages in the alternative view is
  75  * different from the normal view.
  76  *
  77  * Implementation and usage
  78  *
  79  * GGTT views are implemented using VMAs and are distinguished via enum
  80  * i915_ggtt_view_type and struct i915_ggtt_view.
  81  *
  82  * A new flavour of core GEM functions which work with GGTT bound objects were
  83  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
  84  * renaming  in large amounts of code. They take the struct i915_ggtt_view
  85  * parameter encapsulating all metadata required to implement a view.
  86  *
  87  * As a helper for callers which are only interested in the normal view,
  88  * globally const i915_ggtt_view_normal singleton instance exists. All old core
  89  * GEM API functions, the ones not taking the view parameter, are operating on,
  90  * or with the normal GGTT view.
  91  *
  92  * Code wanting to add or use a new GGTT view needs to:
  93  *
  94  * 1. Add a new enum with a suitable name.
  95  * 2. Extend the metadata in the i915_ggtt_view structure if required.
  96  * 3. Add support to i915_get_vma_pages().
  97  *
  98  * New views are required to build a scatter-gather table from within the
  99  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
 100  * exists for the lifetime of an VMA.
 101  *
 102  * Core API is designed to have copy semantics which means that passed in
 103  * struct i915_ggtt_view does not need to be persistent (left around after
 104  * calling the core API functions).
 105  *
 106  */
 107
 108 static int
 109 i915_get_ggtt_vma_pages(struct i915_vma *vma);
 110
 111 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
 112 {
 113         /*
 114          * Note that as an uncached mmio write, this will flush the
 115          * WCB of the writes into the GGTT before it triggers the invalidate.
 116          */
 117         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 118 }
 119
 120 static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
 121 {
 122         gen6_ggtt_invalidate(dev_priv);
 123         I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 124 }
 125
 126 static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
 127 {
 128         intel_gtt_chipset_flush();
 129 }
 130
 131 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
 132 {
 133         i915->ggtt.invalidate(i915);
 134 }
 135
 136 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 137                                 int enable_ppgtt)
 138 {
 139         bool has_full_ppgtt;
 140         bool has_full_48bit_ppgtt;
 141
 142         if (!dev_priv->info.has_aliasing_ppgtt)
 143                 return 0;
 144
 145         has_full_ppgtt = dev_priv->info.has_full_ppgtt;
 146         has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
 147
 148         if (intel_vgpu_active(dev_priv)) {
 149                 /* GVT-g has no support for 32bit ppgtt */
 150                 has_full_ppgtt = false;
 151                 has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv);
 152         }
 153
 154         /*
 155          * We don't allow disabling PPGTT for gen9+ as it's a requirement for
 156          * execlists, the sole mechanism available to submit work.
 157          */
 158         if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
 159                 return 0;
 160
 161         if (enable_ppgtt == 1)
 162                 return 1;
 163
 164         if (enable_ppgtt == 2 && has_full_ppgtt)
 165                 return 2;
 166
 167         if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
 168                 return 3;
 169
 170         /* Disable ppgtt on SNB if VT-d is on. */
 171         if (IS_GEN6(dev_priv) && intel_vtd_active()) {
 172                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
 173                 return 0;
 174         }
 175
 176         /* Early VLV doesn't have this */
 177         if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
 178                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
 179                 return 0;
 180         }
 181
 182         if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
 183                 if (has_full_48bit_ppgtt)
 184                         return 3;
 185
 186                 if (has_full_ppgtt)
 187                         return 2;
 188         }
 189
 190         return 1;
 191 }
 192
 193 static int ppgtt_bind_vma(struct i915_vma *vma,
 194                           enum i915_cache_level cache_level,
 195                           u32 unused)
 196 {
 197         u32 pte_flags;
 198         int err;
 199
 200         if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
 201                 err = vma->vm->allocate_va_range(vma->vm,
 202                                                  vma->node.start, vma->size);
 203                 if (err)
 204                         return err;
 205         }
 206
 207         /* Applicable to VLV, and gen8+ */
 208         pte_flags = 0;
 209         if (i915_gem_object_is_readonly(vma->obj))
 210                 pte_flags |= PTE_READ_ONLY;
 211
 212         vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
 213
 214         return 0;
 215 }
 216
 217 static void ppgtt_unbind_vma(struct i915_vma *vma)
 218 {
 219         vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 220 }
 221
 222 static int ppgtt_set_pages(struct i915_vma *vma)
 223 {
 224         GEM_BUG_ON(vma->pages);
 225
 226         vma->pages = vma->obj->mm.pages;
 227
 228         vma->page_sizes = vma->obj->mm.page_sizes;
 229
 230         return 0;
 231 }
 232
 233 static void clear_pages(struct i915_vma *vma)
 234 {
 235         GEM_BUG_ON(!vma->pages);
 236
 237         if (vma->pages != vma->obj->mm.pages) {
 238                 sg_free_table(vma->pages);
 239                 kfree(vma->pages);
 240         }
 241         vma->pages = NULL;
 242
 243         memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
 244 }
 245
 246 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
 247                                   enum i915_cache_level level,
 248                                   u32 flags)
 249 {
 250         gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
 251
 252         if (unlikely(flags & PTE_READ_ONLY))
 253                 pte &= ~_PAGE_RW;
 254
 255         switch (level) {
 256         case I915_CACHE_NONE:
 257                 pte |= PPAT_UNCACHED;
 258                 break;
 259         case I915_CACHE_WT:
 260                 pte |= PPAT_DISPLAY_ELLC;
 261                 break;
 262         default:
 263                 pte |= PPAT_CACHED;
 264                 break;
 265         }
 266
 267         return pte;
 268 }
 269
 270 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
 271                                   const enum i915_cache_level level)
 272 {
 273         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
 274         pde |= addr;
 275         if (level != I915_CACHE_NONE)
 276                 pde |= PPAT_CACHED_PDE;
 277         else
 278                 pde |= PPAT_UNCACHED;
 279         return pde;
 280 }
 281
 282 #define gen8_pdpe_encode gen8_pde_encode
 283 #define gen8_pml4e_encode gen8_pde_encode
 284
 285 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
 286                                  enum i915_cache_level level,
 287                                  u32 unused)
 288 {
 289         gen6_pte_t pte = GEN6_PTE_VALID;
 290         pte |= GEN6_PTE_ADDR_ENCODE(addr);
 291
 292         switch (level) {
 293         case I915_CACHE_L3_LLC:
 294         case I915_CACHE_LLC:
 295                 pte |= GEN6_PTE_CACHE_LLC;
 296                 break;
 297         case I915_CACHE_NONE:
 298                 pte |= GEN6_PTE_UNCACHED;
 299                 break;
 300         default:
 301                 MISSING_CASE(level);
 302         }
 303
 304         return pte;
 305 }
 306
 307 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
 308                                  enum i915_cache_level level,
 309                                  u32 unused)
 310 {
 311         gen6_pte_t pte = GEN6_PTE_VALID;
 312         pte |= GEN6_PTE_ADDR_ENCODE(addr);
 313
 314         switch (level) {
 315         case I915_CACHE_L3_LLC:
 316                 pte |= GEN7_PTE_CACHE_L3_LLC;
 317                 break;
 318         case I915_CACHE_LLC:
 319                 pte |= GEN6_PTE_CACHE_LLC;
 320                 break;
 321         case I915_CACHE_NONE:
 322                 pte |= GEN6_PTE_UNCACHED;
 323                 break;
 324         default:
 325                 MISSING_CASE(level);
 326         }
 327
 328         return pte;
 329 }
 330
 331 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
 332                                  enum i915_cache_level level,
 333                                  u32 flags)
 334 {
 335         gen6_pte_t pte = GEN6_PTE_VALID;
 336         pte |= GEN6_PTE_ADDR_ENCODE(addr);
 337
 338         if (!(flags & PTE_READ_ONLY))
 339                 pte |= BYT_PTE_WRITEABLE;
 340
 341         if (level != I915_CACHE_NONE)
 342                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
 343
 344         return pte;
 345 }
 346
 347 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
 348                                  enum i915_cache_level level,
 349                                  u32 unused)
 350 {
 351         gen6_pte_t pte = GEN6_PTE_VALID;
 352         pte |= HSW_PTE_ADDR_ENCODE(addr);
 353
 354         if (level != I915_CACHE_NONE)
 355                 pte |= HSW_WB_LLC_AGE3;
 356
 357         return pte;
 358 }
 359
 360 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
 361                                   enum i915_cache_level level,
 362                                   u32 unused)
 363 {
 364         gen6_pte_t pte = GEN6_PTE_VALID;
 365         pte |= HSW_PTE_ADDR_ENCODE(addr);
 366
 367         switch (level) {
 368         case I915_CACHE_NONE:
 369                 break;
 370         case I915_CACHE_WT:
 371                 pte |= HSW_WT_ELLC_LLC_AGE3;
 372                 break;
 373         default:
 374                 pte |= HSW_WB_ELLC_LLC_AGE3;
 375                 break;
 376         }
 377
 378         return pte;
 379 }
 380
 381 static void stash_init(struct pagestash *stash)
 382 {
 383         pagevec_init(&stash->pvec);
 384         spin_lock_init(&stash->lock);
 385 }
 386
 387 static struct page *stash_pop_page(struct pagestash *stash)
 388 {
 389         struct page *page = NULL;
 390
 391         spin_lock(&stash->lock);
 392         if (likely(stash->pvec.nr))
 393                 page = stash->pvec.pages[--stash->pvec.nr];
 394         spin_unlock(&stash->lock);
 395
 396         return page;
 397 }
 398
 399 static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
 400 {
 401         int nr;
 402
 403         spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
 404
 405         nr = min_t(int, pvec->nr, pagevec_space(&stash->pvec));
 406         memcpy(stash->pvec.pages + stash->pvec.nr,
 407                pvec->pages + pvec->nr - nr,
 408                sizeof(pvec->pages[0]) * nr);
 409         stash->pvec.nr += nr;
 410
 411         spin_unlock(&stash->lock);
 412
 413         pvec->nr -= nr;
 414 }
 415
 416 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 417 {
 418         struct pagevec stack;
 419         struct page *page;
 420
 421         if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
 422                 i915_gem_shrink_all(vm->i915);
 423
 424         page = stash_pop_page(&vm->free_pages);
 425         if (page)
 426                 return page;
 427
 428         if (!vm->pt_kmap_wc)
 429                 return alloc_page(gfp);
 430
 431         /* Look in our global stash of WC pages... */
 432         page = stash_pop_page(&vm->i915->mm.wc_stash);
 433         if (page)
 434                 return page;
 435
 436         /*
 437          * Otherwise batch allocate pages to amortize cost of set_pages_wc.
 438          *
 439          * We have to be careful as page allocation may trigger the shrinker
 440          * (via direct reclaim) which will fill up the WC stash underneath us.
 441          * So we add our WB pages into a temporary pvec on the stack and merge
 442          * them into the WC stash after all the allocations are complete.
 443          */
 444         pagevec_init(&stack);
 445         do {
 446                 struct page *page;
 447
 448                 page = alloc_page(gfp);
 449                 if (unlikely(!page))
 450                         break;
 451
 452                 stack.pages[stack.nr++] = page;
 453         } while (pagevec_space(&stack));
 454
 455         if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
 456                 page = stack.pages[--stack.nr];
 457
 458                 /* Merge spare WC pages to the global stash */
 459                 stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
 460
 461                 /* Push any surplus WC pages onto the local VM stash */
 462                 if (stack.nr)
 463                         stash_push_pagevec(&vm->free_pages, &stack);
 464         }
 465
 466         /* Return unwanted leftovers */
 467         if (unlikely(stack.nr)) {
 468                 WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
 469                 __pagevec_release(&stack);
 470         }
 471
 472         return page;
 473 }
 474
 475 static void vm_free_pages_release(struct i915_address_space *vm,
 476                                   bool immediate)
 477 {
 478         struct pagevec *pvec = &vm->free_pages.pvec;
 479         struct pagevec stack;
 480
 481         lockdep_assert_held(&vm->free_pages.lock);
 482         GEM_BUG_ON(!pagevec_count(pvec));
 483
 484         if (vm->pt_kmap_wc) {
 485                 /*
 486                  * When we use WC, first fill up the global stash and then
 487                  * only if full immediately free the overflow.
 488                  */
 489                 stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
 490
 491                 /*
 492                  * As we have made some room in the VM's free_pages,
 493                  * we can wait for it to fill again. Unless we are
 494                  * inside i915_address_space_fini() and must
 495                  * immediately release the pages!
 496                  */
 497                 if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
 498                         return;
 499
 500                 /*
 501                  * We have to drop the lock to allow ourselves to sleep,
 502                  * so take a copy of the pvec and clear the stash for
 503                  * others to use it as we sleep.
 504                  */
 505                 stack = *pvec;
 506                 pagevec_reinit(pvec);
 507                 spin_unlock(&vm->free_pages.lock);
 508
 509                 pvec = &stack;
 510                 set_pages_array_wb(pvec->pages, pvec->nr);
 511
 512                 spin_lock(&vm->free_pages.lock);
 513         }
 514
 515         __pagevec_release(pvec);
 516 }
 517
 518 static void vm_free_page(struct i915_address_space *vm, struct page *page)
 519 {
 520         /*
 521          * On !llc, we need to change the pages back to WB. We only do so
 522          * in bulk, so we rarely need to change the page attributes here,
 523          * but doing so requires a stop_machine() from deep inside arch/x86/mm.
 524          * To make detection of the possible sleep more likely, use an
 525          * unconditional might_sleep() for everybody.
 526          */
 527         might_sleep();
 528         spin_lock(&vm->free_pages.lock);
 529         if (!pagevec_add(&vm->free_pages.pvec, page))
 530                 vm_free_pages_release(vm, false);
 531         spin_unlock(&vm->free_pages.lock);
 532 }
 533
 534 static void i915_address_space_init(struct i915_address_space *vm,
 535                                     struct drm_i915_private *dev_priv)
 536 {
 537         /*
 538          * The vm->mutex must be reclaim safe (for use in the shrinker).
 539          * Do a dummy acquire now under fs_reclaim so that any allocation
 540          * attempt holding the lock is immediately reported by lockdep.
 541          */
 542         mutex_init(&vm->mutex);
 543         i915_gem_shrinker_taints_mutex(&vm->mutex);
 544
 545         GEM_BUG_ON(!vm->total);
 546         drm_mm_init(&vm->mm, 0, vm->total);
 547         vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 548
 549         stash_init(&vm->free_pages);
 550
 551         INIT_LIST_HEAD(&vm->active_list);
 552         INIT_LIST_HEAD(&vm->inactive_list);
 553         INIT_LIST_HEAD(&vm->unbound_list);
 554 }
 555
 556 static void i915_address_space_fini(struct i915_address_space *vm)
 557 {
 558         spin_lock(&vm->free_pages.lock);
 559         if (pagevec_count(&vm->free_pages.pvec))
 560                 vm_free_pages_release(vm, true);
 561         GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
 562         spin_unlock(&vm->free_pages.lock);
 563
 564         drm_mm_takedown(&vm->mm);
 565
 566         mutex_destroy(&vm->mutex);
 567 }
 568
 569 static int __setup_page_dma(struct i915_address_space *vm,
 570                             struct i915_page_dma *p,
 571                             gfp_t gfp)
 572 {
 573         p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
 574         if (unlikely(!p->page))
 575                 return -ENOMEM;
 576
 577         p->daddr = dma_map_page_attrs(vm->dma,
 578                                       p->page, 0, PAGE_SIZE,
 579                                       PCI_DMA_BIDIRECTIONAL,
 580                                       DMA_ATTR_SKIP_CPU_SYNC |
 581                                       DMA_ATTR_NO_WARN);
 582         if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
 583                 vm_free_page(vm, p->page);
 584                 return -ENOMEM;
 585         }
 586
 587         return 0;
 588 }
 589
 590 static int setup_page_dma(struct i915_address_space *vm,
 591                           struct i915_page_dma *p)
 592 {
 593         return __setup_page_dma(vm, p, __GFP_HIGHMEM);
 594 }
 595
 596 static void cleanup_page_dma(struct i915_address_space *vm,
 597                              struct i915_page_dma *p)
 598 {
 599         dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 600         vm_free_page(vm, p->page);
 601 }
 602
 603 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
 604
 605 #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
 606 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
 607 #define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
 608 #define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
 609
 610 static void fill_page_dma(struct i915_address_space *vm,
 611                           struct i915_page_dma *p,
 612                           const u64 val)
 613 {
 614         u64 * const vaddr = kmap_atomic(p->page);
 615
 616         memset64(vaddr, val, PAGE_SIZE / sizeof(val));
 617
 618         kunmap_atomic(vaddr);
 619 }
 620
 621 static void fill_page_dma_32(struct i915_address_space *vm,
 622                              struct i915_page_dma *p,
 623                              const u32 v)
 624 {
 625         fill_page_dma(vm, p, (u64)v << 32 | v);
 626 }
 627
 628 static int
 629 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 630 {
 631         unsigned long size;
 632
 633         /*
 634          * In order to utilize 64K pages for an object with a size < 2M, we will
 635          * need to support a 64K scratch page, given that every 16th entry for a
 636          * page-table operating in 64K mode must point to a properly aligned 64K
 637          * region, including any PTEs which happen to point to scratch.
 638          *
 639          * This is only relevant for the 48b PPGTT where we support
 640          * huge-gtt-pages, see also i915_vma_insert().
 641          *
 642          * TODO: we should really consider write-protecting the scratch-page and
 643          * sharing between ppgtt
 644          */
 645         size = I915_GTT_PAGE_SIZE_4K;
 646         if (i915_vm_is_48bit(vm) &&
 647             HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
 648                 size = I915_GTT_PAGE_SIZE_64K;
 649                 gfp |= __GFP_NOWARN;
 650         }
 651         gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
 652
 653         do {
 654                 int order = get_order(size);
 655                 struct page *page;
 656                 dma_addr_t addr;
 657
 658                 page = alloc_pages(gfp, order);
 659                 if (unlikely(!page))
 660                         goto skip;
 661
 662                 addr = dma_map_page_attrs(vm->dma,
 663                                           page, 0, size,
 664                                           PCI_DMA_BIDIRECTIONAL,
 665                                           DMA_ATTR_SKIP_CPU_SYNC |
 666                                           DMA_ATTR_NO_WARN);
 667                 if (unlikely(dma_mapping_error(vm->dma, addr)))
 668                         goto free_page;
 669
 670                 if (unlikely(!IS_ALIGNED(addr, size)))
 671                         goto unmap_page;
 672
 673                 vm->scratch_page.page = page;
 674                 vm->scratch_page.daddr = addr;
 675                 vm->scratch_page.order = order;
 676                 return 0;
 677
 678 unmap_page:
 679                 dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
 680 free_page:
 681                 __free_pages(page, order);
 682 skip:
 683                 if (size == I915_GTT_PAGE_SIZE_4K)
 684                         return -ENOMEM;
 685
 686                 size = I915_GTT_PAGE_SIZE_4K;
 687                 gfp &= ~__GFP_NOWARN;
 688         } while (1);
 689 }
 690
 691 static void cleanup_scratch_page(struct i915_address_space *vm)
 692 {
 693         struct i915_page_dma *p = &vm->scratch_page;
 694
 695         dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
 696                        PCI_DMA_BIDIRECTIONAL);
 697         __free_pages(p->page, p->order);
 698 }
 699
 700 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 701 {
 702         struct i915_page_table *pt;
 703
 704         pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
 705         if (unlikely(!pt))
 706                 return ERR_PTR(-ENOMEM);
 707
 708         if (unlikely(setup_px(vm, pt))) {
 709                 kfree(pt);
 710                 return ERR_PTR(-ENOMEM);
 711         }
 712
 713         pt->used_ptes = 0;
 714         return pt;
 715 }
 716
 717 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
 718 {
 719         cleanup_px(vm, pt);
 720         kfree(pt);
 721 }
 722
 723 static void gen8_initialize_pt(struct i915_address_space *vm,
 724                                struct i915_page_table *pt)
 725 {
 726         fill_px(vm, pt,
 727                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
 728 }
 729
 730 static void gen6_initialize_pt(struct gen6_hw_ppgtt *ppgtt,
 731                                struct i915_page_table *pt)
 732 {
 733         fill32_px(&ppgtt->base.vm, pt, ppgtt->scratch_pte);
 734 }
 735
 736 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 737 {
 738         struct i915_page_directory *pd;
 739
 740         pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
 741         if (unlikely(!pd))
 742                 return ERR_PTR(-ENOMEM);
 743
 744         if (unlikely(setup_px(vm, pd))) {
 745                 kfree(pd);
 746                 return ERR_PTR(-ENOMEM);
 747         }
 748
 749         pd->used_pdes = 0;
 750         return pd;
 751 }
 752
 753 static void free_pd(struct i915_address_space *vm,
 754                     struct i915_page_directory *pd)
 755 {
 756         cleanup_px(vm, pd);
 757         kfree(pd);
 758 }
 759
 760 static void gen8_initialize_pd(struct i915_address_space *vm,
 761                                struct i915_page_directory *pd)
 762 {
 763         fill_px(vm, pd,
 764                 gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
 765         memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES);
 766 }
 767
 768 static int __pdp_init(struct i915_address_space *vm,
 769                       struct i915_page_directory_pointer *pdp)
 770 {
 771         const unsigned int pdpes = i915_pdpes_per_pdp(vm);
 772
 773         pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
 774                                             I915_GFP_ALLOW_FAIL);
 775         if (unlikely(!pdp->page_directory))
 776                 return -ENOMEM;
 777
 778         memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes);
 779
 780         return 0;
 781 }
 782
 783 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
 784 {
 785         kfree(pdp->page_directory);
 786         pdp->page_directory = NULL;
 787 }
 788
 789 static inline bool use_4lvl(const struct i915_address_space *vm)
 790 {
 791         return i915_vm_is_48bit(vm);
 792 }
 793
 794 static struct i915_page_directory_pointer *
 795 alloc_pdp(struct i915_address_space *vm)
 796 {
 797         struct i915_page_directory_pointer *pdp;
 798         int ret = -ENOMEM;
 799
 800         GEM_BUG_ON(!use_4lvl(vm));
 801
 802         pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
 803         if (!pdp)
 804                 return ERR_PTR(-ENOMEM);
 805
 806         ret = __pdp_init(vm, pdp);
 807         if (ret)
 808                 goto fail_bitmap;
 809
 810         ret = setup_px(vm, pdp);
 811         if (ret)
 812                 goto fail_page_m;
 813
 814         return pdp;
 815
 816 fail_page_m:
 817         __pdp_fini(pdp);
 818 fail_bitmap:
 819         kfree(pdp);
 820
 821         return ERR_PTR(ret);
 822 }
 823
 824 static void free_pdp(struct i915_address_space *vm,
 825                      struct i915_page_directory_pointer *pdp)
 826 {
 827         __pdp_fini(pdp);
 828
 829         if (!use_4lvl(vm))
 830                 return;
 831
 832         cleanup_px(vm, pdp);
 833         kfree(pdp);
 834 }
 835
 836 static void gen8_initialize_pdp(struct i915_address_space *vm,
 837                                 struct i915_page_directory_pointer *pdp)
 838 {
 839         gen8_ppgtt_pdpe_t scratch_pdpe;
 840
 841         scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
 842
 843         fill_px(vm, pdp, scratch_pdpe);
 844 }
 845
 846 static void gen8_initialize_pml4(struct i915_address_space *vm,
 847                                  struct i915_pml4 *pml4)
 848 {
 849         fill_px(vm, pml4,
 850                 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
 851         memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4);
 852 }
 853
 854 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
 855  * the page table structures, we mark them dirty so that
 856  * context switching/execlist queuing code takes extra steps
 857  * to ensure that tlbs are flushed.
 858  */
 859 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
 860 {
 861         ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->vm.i915)->ring_mask;
 862 }
 863
 864 /* Removes entries from a single page table, releasing it if it's empty.
 865  * Caller can use the return value to update higher-level entries.
 866  */
 867 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 868                                 struct i915_page_table *pt,
 869                                 u64 start, u64 length)
 870 {
 871         unsigned int num_entries = gen8_pte_count(start, length);
 872         unsigned int pte = gen8_pte_index(start);
 873         unsigned int pte_end = pte + num_entries;
 874         const gen8_pte_t scratch_pte =
 875                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
 876         gen8_pte_t *vaddr;
 877
 878         GEM_BUG_ON(num_entries > pt->used_ptes);
 879
 880         pt->used_ptes -= num_entries;
 881         if (!pt->used_ptes)
 882                 return true;
 883
 884         vaddr = kmap_atomic_px(pt);
 885         while (pte < pte_end)
 886                 vaddr[pte++] = scratch_pte;
 887         kunmap_atomic(vaddr);
 888
 889         return false;
 890 }
 891
 892 static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
 893                                struct i915_page_directory *pd,
 894                                struct i915_page_table *pt,
 895                                unsigned int pde)
 896 {
 897         gen8_pde_t *vaddr;
 898
 899         pd->page_table[pde] = pt;
 900
 901         vaddr = kmap_atomic_px(pd);
 902         vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
 903         kunmap_atomic(vaddr);
 904 }
 905
 906 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 907                                 struct i915_page_directory *pd,
 908                                 u64 start, u64 length)
 909 {
 910         struct i915_page_table *pt;
 911         u32 pde;
 912
 913         gen8_for_each_pde(pt, pd, start, length, pde) {
 914                 GEM_BUG_ON(pt == vm->scratch_pt);
 915
 916                 if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
 917                         continue;
 918
 919                 gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
 920                 GEM_BUG_ON(!pd->used_pdes);
 921                 pd->used_pdes--;
 922
 923                 free_pt(vm, pt);
 924         }
 925
 926         return !pd->used_pdes;
 927 }
 928
 929 static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
 930                                 struct i915_page_directory_pointer *pdp,
 931                                 struct i915_page_directory *pd,
 932                                 unsigned int pdpe)
 933 {
 934         gen8_ppgtt_pdpe_t *vaddr;
 935
 936         pdp->page_directory[pdpe] = pd;
 937         if (!use_4lvl(vm))
 938                 return;
 939
 940         vaddr = kmap_atomic_px(pdp);
 941         vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
 942         kunmap_atomic(vaddr);
 943 }
 944
 945 /* Removes entries from a single page dir pointer, releasing it if it's empty.
 946  * Caller can use the return value to update higher-level entries
 947  */
 948 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 949                                  struct i915_page_directory_pointer *pdp,
 950                                  u64 start, u64 length)
 951 {
 952         struct i915_page_directory *pd;
 953         unsigned int pdpe;
 954
 955         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 956                 GEM_BUG_ON(pd == vm->scratch_pd);
 957
 958                 if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
 959                         continue;
 960
 961                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
 962                 GEM_BUG_ON(!pdp->used_pdpes);
 963                 pdp->used_pdpes--;
 964
 965                 free_pd(vm, pd);
 966         }
 967
 968         return !pdp->used_pdpes;
 969 }
 970
 971 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
 972                                   u64 start, u64 length)
 973 {
 974         gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
 975 }
 976
 977 static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
 978                                  struct i915_page_directory_pointer *pdp,
 979                                  unsigned int pml4e)
 980 {
 981         gen8_ppgtt_pml4e_t *vaddr;
 982
 983         pml4->pdps[pml4e] = pdp;
 984
 985         vaddr = kmap_atomic_px(pml4);
 986         vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
 987         kunmap_atomic(vaddr);
 988 }
 989
 990 /* Removes entries from a single pml4.
 991  * This is the top-level structure in 4-level page tables used on gen8+.
 992  * Empty entries are always scratch pml4e.
 993  */
 994 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 995                                   u64 start, u64 length)
 996 {
 997         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 998         struct i915_pml4 *pml4 = &ppgtt->pml4;
 999         struct i915_page_directory_pointer *pdp;
1000         unsigned int pml4e;
1001
1002         GEM_BUG_ON(!use_4lvl(vm));
1003
1004         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1005                 GEM_BUG_ON(pdp == vm->scratch_pdp);
1006
1007                 if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
1008                         continue;
1009
1010                 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1011
1012                 free_pdp(vm, pdp);
1013         }
1014 }
1015
1016 static inline struct sgt_dma {
1017         struct scatterlist *sg;
1018         dma_addr_t dma, max;
1019 } sgt_dma(struct i915_vma *vma) {
1020         struct scatterlist *sg = vma->pages->sgl;
1021         dma_addr_t addr = sg_dma_address(sg);
1022         return (struct sgt_dma) { sg, addr, addr + sg->length };
1023 }
1024
1025 struct gen8_insert_pte {
1026         u16 pml4e;
1027         u16 pdpe;
1028         u16 pde;
1029         u16 pte;
1030 };
1031
1032 static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
1033 {
1034         return (struct gen8_insert_pte) {
1035                  gen8_pml4e_index(start),
1036                  gen8_pdpe_index(start),
1037                  gen8_pde_index(start),
1038                  gen8_pte_index(start),
1039         };
1040 }
1041
1042 static __always_inline bool
1043 gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
1044                               struct i915_page_directory_pointer *pdp,
1045                               struct sgt_dma *iter,
1046                               struct gen8_insert_pte *idx,
1047                               enum i915_cache_level cache_level,
1048                               u32 flags)
1049 {
1050         struct i915_page_directory *pd;
1051         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
1052         gen8_pte_t *vaddr;
1053         bool ret;
1054
1055         GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
1056         pd = pdp->page_directory[idx->pdpe];
1057         vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1058         do {
1059                 vaddr[idx->pte] = pte_encode | iter->dma;
1060
1061                 iter->dma += PAGE_SIZE;
1062                 if (iter->dma >= iter->max) {
1063                         iter->sg = __sg_next(iter->sg);
1064                         if (!iter->sg) {
1065                                 ret = false;
1066                                 break;
1067                         }
1068
1069                         iter->dma = sg_dma_address(iter->sg);
1070                         iter->max = iter->dma + iter->sg->length;
1071                 }
1072
1073                 if (++idx->pte == GEN8_PTES) {
1074                         idx->pte = 0;
1075
1076                         if (++idx->pde == I915_PDES) {
1077                                 idx->pde = 0;
1078
1079                                 /* Limited by sg length for 3lvl */
1080                                 if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
1081                                         idx->pdpe = 0;
1082                                         ret = true;
1083                                         break;
1084                                 }
1085
1086                                 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
1087                                 pd = pdp->page_directory[idx->pdpe];
1088                         }
1089
1090                         kunmap_atomic(vaddr);
1091                         vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1092                 }
1093         } while (1);
1094         kunmap_atomic(vaddr);
1095
1096         return ret;
1097 }
1098
1099 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
1100                                    struct i915_vma *vma,
1101                                    enum i915_cache_level cache_level,
1102                                    u32 flags)
1103 {
1104         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1105         struct sgt_dma iter = sgt_dma(vma);
1106         struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1107
1108         gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
1109                                       cache_level, flags);
1110
1111         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1112 }
1113
1114 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1115                                            struct i915_page_directory_pointer **pdps,
1116                                            struct sgt_dma *iter,
1117                                            enum i915_cache_level cache_level,
1118                                            u32 flags)
1119 {
1120         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
1121         u64 start = vma->node.start;
1122         dma_addr_t rem = iter->sg->length;
1123
1124         do {
1125                 struct gen8_insert_pte idx = gen8_insert_pte(start);
1126                 struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
1127                 struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
1128                 unsigned int page_size;
1129                 bool maybe_64K = false;
1130                 gen8_pte_t encode = pte_encode;
1131                 gen8_pte_t *vaddr;
1132                 u16 index, max;
1133
1134                 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1135                     IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1136                     rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1137                         index = idx.pde;
1138                         max = I915_PDES;
1139                         page_size = I915_GTT_PAGE_SIZE_2M;
1140
1141                         encode |= GEN8_PDE_PS_2M;
1142
1143                         vaddr = kmap_atomic_px(pd);
1144                 } else {
1145                         struct i915_page_table *pt = pd->page_table[idx.pde];
1146
1147                         index = idx.pte;
1148                         max = GEN8_PTES;
1149                         page_size = I915_GTT_PAGE_SIZE;
1150
1151                         if (!index &&
1152                             vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1153                             IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1154                             (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1155                              rem >= (max - index) << PAGE_SHIFT))
1156                                 maybe_64K = true;
1157
1158                         vaddr = kmap_atomic_px(pt);
1159                 }
1160
1161                 do {
1162                         GEM_BUG_ON(iter->sg->length < page_size);
1163                         vaddr[index++] = encode | iter->dma;
1164
1165                         start += page_size;
1166                         iter->dma += page_size;
1167                         rem -= page_size;
1168                         if (iter->dma >= iter->max) {
1169                                 iter->sg = __sg_next(iter->sg);
1170                                 if (!iter->sg)
1171                                         break;
1172
1173                                 rem = iter->sg->length;
1174                                 iter->dma = sg_dma_address(iter->sg);
1175                                 iter->max = iter->dma + rem;
1176
1177                                 if (maybe_64K && index < max &&
1178                                     !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1179                                       (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1180                                        rem >= (max - index) << PAGE_SHIFT)))
1181                                         maybe_64K = false;
1182
1183                                 if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1184                                         break;
1185                         }
1186                 } while (rem >= page_size && index < max);
1187
1188                 kunmap_atomic(vaddr);
1189
1190                 /*
1191                  * Is it safe to mark the 2M block as 64K? -- Either we have
1192                  * filled whole page-table with 64K entries, or filled part of
1193                  * it and have reached the end of the sg table and we have
1194                  * enough padding.
1195                  */
1196                 if (maybe_64K &&
1197                     (index == max ||
1198                      (i915_vm_has_scratch_64K(vma->vm) &&
1199                       !iter->sg && IS_ALIGNED(vma->node.start +
1200                                               vma->node.size,
1201                                               I915_GTT_PAGE_SIZE_2M)))) {
1202                         vaddr = kmap_atomic_px(pd);
1203                         vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
1204                         kunmap_atomic(vaddr);
1205                         page_size = I915_GTT_PAGE_SIZE_64K;
1206
1207                         /*
1208                          * We write all 4K page entries, even when using 64K
1209                          * pages. In order to verify that the HW isn't cheating
1210                          * by using the 4K PTE instead of the 64K PTE, we want
1211                          * to remove all the surplus entries. If the HW skipped
1212                          * the 64K PTE, it will read/write into the scratch page
1213                          * instead - which we detect as missing results during
1214                          * selftests.
1215                          */
1216                         if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
1217                                 u16 i;
1218
1219                                 encode = pte_encode | vma->vm->scratch_page.daddr;
1220                                 vaddr = kmap_atomic_px(pd->page_table[idx.pde]);
1221
1222                                 for (i = 1; i < index; i += 16)
1223                                         memset64(vaddr + i, encode, 15);
1224
1225                                 kunmap_atomic(vaddr);
1226                         }
1227                 }
1228
1229                 vma->page_sizes.gtt |= page_size;
1230         } while (iter->sg);
1231 }
1232
1233 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
1234                                    struct i915_vma *vma,
1235                                    enum i915_cache_level cache_level,
1236                                    u32 flags)
1237 {
1238         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1239         struct sgt_dma iter = sgt_dma(vma);
1240         struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
1241
1242         if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1243                 gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level,
1244                                                flags);
1245         } else {
1246                 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1247
1248                 while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
1249                                                      &iter, &idx, cache_level,
1250                                                      flags))
1251                         GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
1252
1253                 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1254         }
1255 }
1256
1257 static void gen8_free_page_tables(struct i915_address_space *vm,
1258                                   struct i915_page_directory *pd)
1259 {
1260         int i;
1261
1262         if (!px_page(pd))
1263                 return;
1264
1265         for (i = 0; i < I915_PDES; i++) {
1266                 if (pd->page_table[i] != vm->scratch_pt)
1267                         free_pt(vm, pd->page_table[i]);
1268         }
1269 }
1270
1271 static int gen8_init_scratch(struct i915_address_space *vm)
1272 {
1273         int ret;
1274
1275         ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1276         if (ret)
1277                 return ret;
1278
1279         vm->scratch_pt = alloc_pt(vm);
1280         if (IS_ERR(vm->scratch_pt)) {
1281                 ret = PTR_ERR(vm->scratch_pt);
1282                 goto free_scratch_page;
1283         }
1284
1285         vm->scratch_pd = alloc_pd(vm);
1286         if (IS_ERR(vm->scratch_pd)) {
1287                 ret = PTR_ERR(vm->scratch_pd);
1288                 goto free_pt;
1289         }
1290
1291         if (use_4lvl(vm)) {
1292                 vm->scratch_pdp = alloc_pdp(vm);
1293                 if (IS_ERR(vm->scratch_pdp)) {
1294                         ret = PTR_ERR(vm->scratch_pdp);
1295                         goto free_pd;
1296                 }
1297         }
1298
1299         gen8_initialize_pt(vm, vm->scratch_pt);
1300         gen8_initialize_pd(vm, vm->scratch_pd);
1301         if (use_4lvl(vm))
1302                 gen8_initialize_pdp(vm, vm->scratch_pdp);
1303
1304         return 0;
1305
1306 free_pd:
1307         free_pd(vm, vm->scratch_pd);
1308 free_pt:
1309         free_pt(vm, vm->scratch_pt);
1310 free_scratch_page:
1311         cleanup_scratch_page(vm);
1312
1313         return ret;
1314 }
1315
1316 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1317 {
1318         struct i915_address_space *vm = &ppgtt->vm;
1319         struct drm_i915_private *dev_priv = vm->i915;
1320         enum vgt_g2v_type msg;
1321         int i;
1322
1323         if (use_4lvl(vm)) {
1324                 const u64 daddr = px_dma(&ppgtt->pml4);
1325
1326                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1327                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1328
1329                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1330                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1331         } else {
1332                 for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1333                         const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1334
1335                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1336                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1337                 }
1338
1339                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1340                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1341         }
1342
1343         I915_WRITE(vgtif_reg(g2v_notify), msg);
1344
1345         return 0;
1346 }
1347
1348 static void gen8_free_scratch(struct i915_address_space *vm)
1349 {
1350         if (use_4lvl(vm))
1351                 free_pdp(vm, vm->scratch_pdp);
1352         free_pd(vm, vm->scratch_pd);
1353         free_pt(vm, vm->scratch_pt);
1354         cleanup_scratch_page(vm);
1355 }
1356
1357 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1358                                     struct i915_page_directory_pointer *pdp)
1359 {
1360         const unsigned int pdpes = i915_pdpes_per_pdp(vm);
1361         int i;
1362
1363         for (i = 0; i < pdpes; i++) {
1364                 if (pdp->page_directory[i] == vm->scratch_pd)
1365                         continue;
1366
1367                 gen8_free_page_tables(vm, pdp->page_directory[i]);
1368                 free_pd(vm, pdp->page_directory[i]);
1369         }
1370
1371         free_pdp(vm, pdp);
1372 }
1373
1374 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1375 {
1376         int i;
1377
1378         for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1379                 if (ppgtt->pml4.pdps[i] == ppgtt->vm.scratch_pdp)
1380                         continue;
1381
1382                 gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pml4.pdps[i]);
1383         }
1384
1385         cleanup_px(&ppgtt->vm, &ppgtt->pml4);
1386 }
1387
1388 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1389 {
1390         struct drm_i915_private *dev_priv = vm->i915;
1391         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1392
1393         if (intel_vgpu_active(dev_priv))
1394                 gen8_ppgtt_notify_vgt(ppgtt, false);
1395
1396         if (use_4lvl(vm))
1397                 gen8_ppgtt_cleanup_4lvl(ppgtt);
1398         else
1399                 gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp);
1400
1401         gen8_free_scratch(vm);
1402 }
1403
1404 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1405                                struct i915_page_directory *pd,
1406                                u64 start, u64 length)
1407 {
1408         struct i915_page_table *pt;
1409         u64 from = start;
1410         unsigned int pde;
1411
1412         gen8_for_each_pde(pt, pd, start, length, pde) {
1413                 int count = gen8_pte_count(start, length);
1414
1415                 if (pt == vm->scratch_pt) {
1416                         pd->used_pdes++;
1417
1418                         pt = alloc_pt(vm);
1419                         if (IS_ERR(pt)) {
1420                                 pd->used_pdes--;
1421                                 goto unwind;
1422                         }
1423
1424                         if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
1425                                 gen8_initialize_pt(vm, pt);
1426
1427                         gen8_ppgtt_set_pde(vm, pd, pt, pde);
1428                         GEM_BUG_ON(pd->used_pdes > I915_PDES);
1429                 }
1430
1431                 pt->used_ptes += count;
1432         }
1433         return 0;
1434
1435 unwind:
1436         gen8_ppgtt_clear_pd(vm, pd, from, start - from);
1437         return -ENOMEM;
1438 }
1439
1440 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1441                                 struct i915_page_directory_pointer *pdp,
1442                                 u64 start, u64 length)
1443 {
1444         struct i915_page_directory *pd;
1445         u64 from = start;
1446         unsigned int pdpe;
1447         int ret;
1448
1449         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1450                 if (pd == vm->scratch_pd) {
1451                         pdp->used_pdpes++;
1452
1453                         pd = alloc_pd(vm);
1454                         if (IS_ERR(pd)) {
1455                                 pdp->used_pdpes--;
1456                                 goto unwind;
1457                         }
1458
1459                         gen8_initialize_pd(vm, pd);
1460                         gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1461                         GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
1462
1463                         mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
1464                 }
1465
1466                 ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
1467                 if (unlikely(ret))
1468                         goto unwind_pd;
1469         }
1470
1471         return 0;
1472
1473 unwind_pd:
1474         if (!pd->used_pdes) {
1475                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1476                 GEM_BUG_ON(!pdp->used_pdpes);
1477                 pdp->used_pdpes--;
1478                 free_pd(vm, pd);
1479         }
1480 unwind:
1481         gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1482         return -ENOMEM;
1483 }
1484
1485 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1486                                  u64 start, u64 length)
1487 {
1488         return gen8_ppgtt_alloc_pdp(vm,
1489                                     &i915_vm_to_ppgtt(vm)->pdp, start, length);
1490 }
1491
1492 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1493                                  u64 start, u64 length)
1494 {
1495         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1496         struct i915_pml4 *pml4 = &ppgtt->pml4;
1497         struct i915_page_directory_pointer *pdp;
1498         u64 from = start;
1499         u32 pml4e;
1500         int ret;
1501
1502         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1503                 if (pml4->pdps[pml4e] == vm->scratch_pdp) {
1504                         pdp = alloc_pdp(vm);
1505                         if (IS_ERR(pdp))
1506                                 goto unwind;
1507
1508                         gen8_initialize_pdp(vm, pdp);
1509                         gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1510                 }
1511
1512                 ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
1513                 if (unlikely(ret))
1514                         goto unwind_pdp;
1515         }
1516
1517         return 0;
1518
1519 unwind_pdp:
1520         if (!pdp->used_pdpes) {
1521                 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1522                 free_pdp(vm, pdp);
1523         }
1524 unwind:
1525         gen8_ppgtt_clear_4lvl(vm, from, start - from);
1526         return -ENOMEM;
1527 }
1528
1529 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
1530                           struct i915_page_directory_pointer *pdp,
1531                           u64 start, u64 length,
1532                           gen8_pte_t scratch_pte,
1533                           struct seq_file *m)
1534 {
1535         struct i915_address_space *vm = &ppgtt->vm;
1536         struct i915_page_directory *pd;
1537         u32 pdpe;
1538
1539         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1540                 struct i915_page_table *pt;
1541                 u64 pd_len = length;
1542                 u64 pd_start = start;
1543                 u32 pde;
1544
1545                 if (pdp->page_directory[pdpe] == ppgtt->vm.scratch_pd)
1546                         continue;
1547
1548                 seq_printf(m, "\tPDPE #%d\n", pdpe);
1549                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1550                         u32 pte;
1551                         gen8_pte_t *pt_vaddr;
1552
1553                         if (pd->page_table[pde] == ppgtt->vm.scratch_pt)
1554                                 continue;
1555
1556                         pt_vaddr = kmap_atomic_px(pt);
1557                         for (pte = 0; pte < GEN8_PTES; pte += 4) {
1558                                 u64 va = (pdpe << GEN8_PDPE_SHIFT |
1559                                           pde << GEN8_PDE_SHIFT |
1560                                           pte << GEN8_PTE_SHIFT);
1561                                 int i;
1562                                 bool found = false;
1563
1564                                 for (i = 0; i < 4; i++)
1565                                         if (pt_vaddr[pte + i] != scratch_pte)
1566                                                 found = true;
1567                                 if (!found)
1568                                         continue;
1569
1570                                 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1571                                 for (i = 0; i < 4; i++) {
1572                                         if (pt_vaddr[pte + i] != scratch_pte)
1573                                                 seq_printf(m, " %llx", pt_vaddr[pte + i]);
1574                                         else
1575                                                 seq_puts(m, "  SCRATCH ");
1576                                 }
1577                                 seq_puts(m, "\n");
1578                         }
1579                         kunmap_atomic(pt_vaddr);
1580                 }
1581         }
1582 }
1583
1584 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1585 {
1586         struct i915_address_space *vm = &ppgtt->vm;
1587         const gen8_pte_t scratch_pte =
1588                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
1589         u64 start = 0, length = ppgtt->vm.total;
1590
1591         if (use_4lvl(vm)) {
1592                 u64 pml4e;
1593                 struct i915_pml4 *pml4 = &ppgtt->pml4;
1594                 struct i915_page_directory_pointer *pdp;
1595
1596                 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1597                         if (pml4->pdps[pml4e] == ppgtt->vm.scratch_pdp)
1598                                 continue;
1599
1600                         seq_printf(m, "    PML4E #%llu\n", pml4e);
1601                         gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
1602                 }
1603         } else {
1604                 gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
1605         }
1606 }
1607
1608 static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
1609 {
1610         struct i915_address_space *vm = &ppgtt->vm;
1611         struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
1612         struct i915_page_directory *pd;
1613         u64 start = 0, length = ppgtt->vm.total;
1614         u64 from = start;
1615         unsigned int pdpe;
1616
1617         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1618                 pd = alloc_pd(vm);
1619                 if (IS_ERR(pd))
1620                         goto unwind;
1621
1622                 gen8_initialize_pd(vm, pd);
1623                 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1624                 pdp->used_pdpes++;
1625         }
1626
1627         pdp->used_pdpes++; /* never remove */
1628         return 0;
1629
1630 unwind:
1631         start -= from;
1632         gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1633                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1634                 free_pd(vm, pd);
1635         }
1636         pdp->used_pdpes = 0;
1637         return -ENOMEM;
1638 }
1639
1640 /*
1641  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1642  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1643  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1644  * space.
1645  *
1646  */
1647 static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
1648 {
1649         struct i915_hw_ppgtt *ppgtt;
1650         int err;
1651
1652         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1653         if (!ppgtt)
1654                 return ERR_PTR(-ENOMEM);
1655
1656         kref_init(&ppgtt->ref);
1657
1658         ppgtt->vm.i915 = i915;
1659         ppgtt->vm.dma = &i915->drm.pdev->dev;
1660
1661         ppgtt->vm.total = USES_FULL_48BIT_PPGTT(i915) ?
1662                 1ULL << 48 :
1663                 1ULL << 32;
1664
1665         /*
1666          * From bdw, there is support for read-only pages in the PPGTT.
1667          *
1668          * XXX GVT is not honouring the lack of RW in the PTE bits.
1669          */
1670         ppgtt->vm.has_read_only = !intel_vgpu_active(i915);
1671
1672         i915_address_space_init(&ppgtt->vm, i915);
1673
1674         /* There are only few exceptions for gen >=6. chv and bxt.
1675          * And we are not sure about the latter so play safe for now.
1676          */
1677         if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
1678                 ppgtt->vm.pt_kmap_wc = true;
1679
1680         err = gen8_init_scratch(&ppgtt->vm);
1681         if (err)
1682                 goto err_free;
1683
1684         if (use_4lvl(&ppgtt->vm)) {
1685                 err = setup_px(&ppgtt->vm, &ppgtt->pml4);
1686                 if (err)
1687                         goto err_scratch;
1688
1689                 gen8_initialize_pml4(&ppgtt->vm, &ppgtt->pml4);
1690
1691                 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
1692                 ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
1693                 ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
1694         } else {
1695                 err = __pdp_init(&ppgtt->vm, &ppgtt->pdp);
1696                 if (err)
1697                         goto err_scratch;
1698
1699                 if (intel_vgpu_active(i915)) {
1700                         err = gen8_preallocate_top_level_pdp(ppgtt);
1701                         if (err) {
1702                                 __pdp_fini(&ppgtt->pdp);
1703                                 goto err_scratch;
1704                         }
1705                 }
1706
1707                 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
1708                 ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
1709                 ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
1710         }
1711
1712         if (intel_vgpu_active(i915))
1713                 gen8_ppgtt_notify_vgt(ppgtt, true);
1714
1715         ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
1716         ppgtt->debug_dump = gen8_dump_ppgtt;
1717
1718         ppgtt->vm.vma_ops.bind_vma    = ppgtt_bind_vma;
1719         ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
1720         ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
1721         ppgtt->vm.vma_ops.clear_pages = clear_pages;
1722
1723         return ppgtt;
1724
1725 err_scratch:
1726         gen8_free_scratch(&ppgtt->vm);
1727 err_free:
1728         kfree(ppgtt);
1729         return ERR_PTR(err);
1730 }
1731
1732 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m)
1733 {
1734         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
1735         const gen6_pte_t scratch_pte = ppgtt->scratch_pte;
1736         struct i915_page_table *pt;
1737         u32 pte, pde;
1738
1739         gen6_for_all_pdes(pt, &base->pd, pde) {
1740                 gen6_pte_t *vaddr;
1741
1742                 if (pt == base->vm.scratch_pt)
1743                         continue;
1744
1745                 if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
1746                         u32 expected =
1747                                 GEN6_PDE_ADDR_ENCODE(px_dma(pt)) |
1748                                 GEN6_PDE_VALID;
1749                         u32 pd_entry = readl(ppgtt->pd_addr + pde);
1750
1751                         if (pd_entry != expected)
1752                                 seq_printf(m,
1753                                            "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1754                                            pde,
1755                                            pd_entry,
1756                                            expected);
1757
1758                         seq_printf(m, "\tPDE: %x\n", pd_entry);
1759                 }
1760
1761                 vaddr = kmap_atomic_px(base->pd.page_table[pde]);
1762                 for (pte = 0; pte < GEN6_PTES; pte += 4) {
1763                         int i;
1764
1765                         for (i = 0; i < 4; i++)
1766                                 if (vaddr[pte + i] != scratch_pte)
1767                                         break;
1768                         if (i == 4)
1769                                 continue;
1770
1771                         seq_printf(m, "\t\t(%03d, %04d) %08lx: ",
1772                                    pde, pte,
1773                                    (pde * GEN6_PTES + pte) * PAGE_SIZE);
1774                         for (i = 0; i < 4; i++) {
1775                                 if (vaddr[pte + i] != scratch_pte)
1776                                         seq_printf(m, " %08x", vaddr[pte + i]);
1777                                 else
1778                                         seq_puts(m, "  SCRATCH");
1779                         }
1780                         seq_puts(m, "\n");
1781                 }
1782                 kunmap_atomic(vaddr);
1783         }
1784 }
1785
1786 /* Write pde (index) from the page directory @pd to the page table @pt */
1787 static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
1788                                   const unsigned int pde,
1789                                   const struct i915_page_table *pt)
1790 {
1791         /* Caller needs to make sure the write completes if necessary */
1792         iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1793                   ppgtt->pd_addr + pde);
1794 }
1795
1796 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
1797 {
1798         struct intel_engine_cs *engine;
1799         enum intel_engine_id id;
1800
1801         for_each_engine(engine, dev_priv, id) {
1802                 u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ?
1803                                  GEN8_GFX_PPGTT_48B : 0;
1804                 I915_WRITE(RING_MODE_GEN7(engine),
1805                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1806         }
1807 }
1808
1809 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
1810 {
1811         struct intel_engine_cs *engine;
1812         u32 ecochk, ecobits;
1813         enum intel_engine_id id;
1814
1815         ecobits = I915_READ(GAC_ECO_BITS);
1816         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1817
1818         ecochk = I915_READ(GAM_ECOCHK);
1819         if (IS_HASWELL(dev_priv)) {
1820                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1821         } else {
1822                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1823                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1824         }
1825         I915_WRITE(GAM_ECOCHK, ecochk);
1826
1827         for_each_engine(engine, dev_priv, id) {
1828                 /* GFX_MODE is per-ring on gen7+ */
1829                 I915_WRITE(RING_MODE_GEN7(engine),
1830                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1831         }
1832 }
1833
1834 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1835 {
1836         u32 ecochk, gab_ctl, ecobits;
1837
1838         ecobits = I915_READ(GAC_ECO_BITS);
1839         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1840                    ECOBITS_PPGTT_CACHE64B);
1841
1842         gab_ctl = I915_READ(GAB_CTL);
1843         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1844
1845         ecochk = I915_READ(GAM_ECOCHK);
1846         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1847
1848         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1849 }
1850
1851 /* PPGTT support for Sandybdrige/Gen6 and later */
1852 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1853                                    u64 start, u64 length)
1854 {
1855         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1856         unsigned int first_entry = start >> PAGE_SHIFT;
1857         unsigned int pde = first_entry / GEN6_PTES;
1858         unsigned int pte = first_entry % GEN6_PTES;
1859         unsigned int num_entries = length >> PAGE_SHIFT;
1860         const gen6_pte_t scratch_pte = ppgtt->scratch_pte;
1861
1862         while (num_entries) {
1863                 struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++];
1864                 const unsigned int end = min(pte + num_entries, GEN6_PTES);
1865                 const unsigned int count = end - pte;
1866                 gen6_pte_t *vaddr;
1867
1868                 GEM_BUG_ON(pt == vm->scratch_pt);
1869
1870                 num_entries -= count;
1871
1872                 GEM_BUG_ON(count > pt->used_ptes);
1873                 pt->used_ptes -= count;
1874                 if (!pt->used_ptes)
1875                         ppgtt->scan_for_unused_pt = true;
1876
1877                 /*
1878                  * Note that the hw doesn't support removing PDE on the fly
1879                  * (they are cached inside the context with no means to
1880                  * invalidate the cache), so we can only reset the PTE
1881                  * entries back to scratch.
1882                  */
1883
1884                 vaddr = kmap_atomic_px(pt);
1885                 do {
1886                         vaddr[pte++] = scratch_pte;
1887                 } while (pte < end);
1888                 kunmap_atomic(vaddr);
1889
1890                 pte = 0;
1891         }
1892 }
1893
1894 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1895                                       struct i915_vma *vma,
1896                                       enum i915_cache_level cache_level,
1897                                       u32 flags)
1898 {
1899         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1900         unsigned first_entry = vma->node.start >> PAGE_SHIFT;
1901         unsigned act_pt = first_entry / GEN6_PTES;
1902         unsigned act_pte = first_entry % GEN6_PTES;
1903         const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1904         struct sgt_dma iter = sgt_dma(vma);
1905         gen6_pte_t *vaddr;
1906
1907         GEM_BUG_ON(ppgtt->pd.page_table[act_pt] == vm->scratch_pt);
1908
1909         vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
1910         do {
1911                 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1912
1913                 iter.dma += PAGE_SIZE;
1914                 if (iter.dma == iter.max) {
1915                         iter.sg = __sg_next(iter.sg);
1916                         if (!iter.sg)
1917                                 break;
1918
1919                         iter.dma = sg_dma_address(iter.sg);
1920                         iter.max = iter.dma + iter.sg->length;
1921                 }
1922
1923                 if (++act_pte == GEN6_PTES) {
1924                         kunmap_atomic(vaddr);
1925                         vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
1926                         act_pte = 0;
1927                 }
1928         } while (1);
1929         kunmap_atomic(vaddr);
1930
1931         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1932 }
1933
1934 static int gen6_alloc_va_range(struct i915_address_space *vm,
1935                                u64 start, u64 length)
1936 {
1937         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1938         struct i915_page_table *pt;
1939         u64 from = start;
1940         unsigned int pde;
1941         bool flush = false;
1942
1943         gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) {
1944                 const unsigned int count = gen6_pte_count(start, length);
1945
1946                 if (pt == vm->scratch_pt) {
1947                         pt = alloc_pt(vm);
1948                         if (IS_ERR(pt))
1949                                 goto unwind_out;
1950
1951                         gen6_initialize_pt(ppgtt, pt);
1952                         ppgtt->base.pd.page_table[pde] = pt;
1953
1954                         if (i915_vma_is_bound(ppgtt->vma,
1955                                               I915_VMA_GLOBAL_BIND)) {
1956                                 gen6_write_pde(ppgtt, pde, pt);
1957                                 flush = true;
1958                         }
1959
1960                         GEM_BUG_ON(pt->used_ptes);
1961                 }
1962
1963                 pt->used_ptes += count;
1964         }
1965
1966         if (flush) {
1967                 mark_tlbs_dirty(&ppgtt->base);
1968                 gen6_ggtt_invalidate(ppgtt->base.vm.i915);
1969         }
1970
1971         return 0;
1972
1973 unwind_out:
1974         gen6_ppgtt_clear_range(vm, from, start - from);
1975         return -ENOMEM;
1976 }
1977
1978 static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
1979 {
1980         struct i915_address_space * const vm = &ppgtt->base.vm;
1981         struct i915_page_table *unused;
1982         u32 pde;
1983         int ret;
1984
1985         ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1986         if (ret)
1987                 return ret;
1988
1989         ppgtt->scratch_pte =
1990                 vm->pte_encode(vm->scratch_page.daddr,
1991                                I915_CACHE_NONE, PTE_READ_ONLY);
1992
1993         vm->scratch_pt = alloc_pt(vm);
1994         if (IS_ERR(vm->scratch_pt)) {
1995                 cleanup_scratch_page(vm);
1996                 return PTR_ERR(vm->scratch_pt);
1997         }
1998
1999         gen6_initialize_pt(ppgtt, vm->scratch_pt);
2000         gen6_for_all_pdes(unused, &ppgtt->base.pd, pde)
2001                 ppgtt->base.pd.page_table[pde] = vm->scratch_pt;
2002
2003         return 0;
2004 }
2005
2006 static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
2007 {
2008         free_pt(vm, vm->scratch_pt);
2009         cleanup_scratch_page(vm);
2010 }
2011
2012 static void gen6_ppgtt_free_pd(struct gen6_hw_ppgtt *ppgtt)
2013 {
2014         struct i915_page_table *pt;
2015         u32 pde;
2016
2017         gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
2018                 if (pt != ppgtt->base.vm.scratch_pt)
2019                         free_pt(&ppgtt->base.vm, pt);
2020 }
2021
2022 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
2023 {
2024         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
2025
2026         i915_vma_destroy(ppgtt->vma);
2027
2028         gen6_ppgtt_free_pd(ppgtt);
2029         gen6_ppgtt_free_scratch(vm);
2030 }
2031
2032 static int pd_vma_set_pages(struct i915_vma *vma)
2033 {
2034         vma->pages = ERR_PTR(-ENODEV);
2035         return 0;
2036 }
2037
2038 static void pd_vma_clear_pages(struct i915_vma *vma)
2039 {
2040         GEM_BUG_ON(!vma->pages);
2041
2042         vma->pages = NULL;
2043 }
2044
2045 static int pd_vma_bind(struct i915_vma *vma,
2046                        enum i915_cache_level cache_level,
2047                        u32 unused)
2048 {
2049         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
2050         struct gen6_hw_ppgtt *ppgtt = vma->private;
2051         u32 ggtt_offset = i915_ggtt_offset(vma) / PAGE_SIZE;
2052         struct i915_page_table *pt;
2053         unsigned int pde;
2054
2055         ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
2056         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
2057
2058         gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
2059                 gen6_write_pde(ppgtt, pde, pt);
2060
2061         mark_tlbs_dirty(&ppgtt->base);
2062         gen6_ggtt_invalidate(ppgtt->base.vm.i915);
2063
2064         return 0;
2065 }
2066
2067 static void pd_vma_unbind(struct i915_vma *vma)
2068 {
2069         struct gen6_hw_ppgtt *ppgtt = vma->private;
2070         struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
2071         struct i915_page_table *pt;
2072         unsigned int pde;
2073
2074         if (!ppgtt->scan_for_unused_pt)
2075                 return;
2076
2077         /* Free all no longer used page tables */
2078         gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) {
2079                 if (pt->used_ptes || pt == scratch_pt)
2080                         continue;
2081
2082                 free_pt(&ppgtt->base.vm, pt);
2083                 ppgtt->base.pd.page_table[pde] = scratch_pt;
2084         }
2085
2086         ppgtt->scan_for_unused_pt = false;
2087 }
2088
2089 static const struct i915_vma_ops pd_vma_ops = {
2090         .set_pages = pd_vma_set_pages,
2091         .clear_pages = pd_vma_clear_pages,
2092         .bind_vma = pd_vma_bind,
2093         .unbind_vma = pd_vma_unbind,
2094 };
2095
2096 static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
2097 {
2098         struct drm_i915_private *i915 = ppgtt->base.vm.i915;
2099         struct i915_ggtt *ggtt = &i915->ggtt;
2100         struct i915_vma *vma;
2101
2102         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
2103         GEM_BUG_ON(size > ggtt->vm.total);
2104
2105         vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL);
2106         if (!vma)
2107                 return ERR_PTR(-ENOMEM);
2108
2109         init_request_active(&vma->last_fence, NULL);
2110
2111         vma->vm = &ggtt->vm;
2112         vma->ops = &pd_vma_ops;
2113         vma->private = ppgtt;
2114
2115         vma->active = RB_ROOT;
2116
2117         vma->size = size;
2118         vma->fence_size = size;
2119         vma->flags = I915_VMA_GGTT;
2120         vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
2121
2122         INIT_LIST_HEAD(&vma->obj_link);
2123         list_add(&vma->vm_link, &vma->vm->unbound_list);
2124
2125         return vma;
2126 }
2127
2128 int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
2129 {
2130         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
2131
2132         /*
2133          * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
2134          * which will be pinned into every active context.
2135          * (When vma->pin_count becomes atomic, I expect we will naturally
2136          * need a larger, unpacked, type and kill this redundancy.)
2137          */
2138         if (ppgtt->pin_count++)
2139                 return 0;
2140
2141         /*
2142          * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2143          * allocator works in address space sizes, so it's multiplied by page
2144          * size. We allocate at the top of the GTT to avoid fragmentation.
2145          */
2146         return i915_vma_pin(ppgtt->vma,
2147                             0, GEN6_PD_ALIGN,
2148                             PIN_GLOBAL | PIN_HIGH);
2149 }
2150
2151 void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
2152 {
2153         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
2154
2155         GEM_BUG_ON(!ppgtt->pin_count);
2156         if (--ppgtt->pin_count)
2157                 return;
2158
2159         i915_vma_unpin(ppgtt->vma);
2160 }
2161
2162 static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
2163 {
2164         struct i915_ggtt * const ggtt = &i915->ggtt;
2165         struct gen6_hw_ppgtt *ppgtt;
2166         int err;
2167
2168         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2169         if (!ppgtt)
2170                 return ERR_PTR(-ENOMEM);
2171
2172         kref_init(&ppgtt->base.ref);
2173
2174         ppgtt->base.vm.i915 = i915;
2175         ppgtt->base.vm.dma = &i915->drm.pdev->dev;
2176
2177         ppgtt->base.vm.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2178
2179         i915_address_space_init(&ppgtt->base.vm, i915);
2180
2181         ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
2182         ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
2183         ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
2184         ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
2185         ppgtt->base.debug_dump = gen6_dump_ppgtt;
2186
2187         ppgtt->base.vm.vma_ops.bind_vma    = ppgtt_bind_vma;
2188         ppgtt->base.vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
2189         ppgtt->base.vm.vma_ops.set_pages   = ppgtt_set_pages;
2190         ppgtt->base.vm.vma_ops.clear_pages = clear_pages;
2191
2192         ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
2193
2194         err = gen6_ppgtt_init_scratch(ppgtt);
2195         if (err)
2196                 goto err_free;
2197
2198         ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
2199         if (IS_ERR(ppgtt->vma)) {
2200                 err = PTR_ERR(ppgtt->vma);
2201                 goto err_scratch;
2202         }
2203
2204         return &ppgtt->base;
2205
2206 err_scratch:
2207         gen6_ppgtt_free_scratch(&ppgtt->base.vm);
2208 err_free:
2209         kfree(ppgtt);
2210         return ERR_PTR(err);
2211 }
2212
2213 static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
2214 {
2215         /* This function is for gtt related workarounds. This function is
2216          * called on driver load and after a GPU reset, so you can place
2217          * workarounds here even if they get overwritten by GPU reset.
2218          */
2219         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
2220         if (IS_BROADWELL(dev_priv))
2221                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2222         else if (IS_CHERRYVIEW(dev_priv))
2223                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2224         else if (IS_GEN9_LP(dev_priv))
2225                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2226         else if (INTEL_GEN(dev_priv) >= 9)
2227                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2228
2229         /*
2230          * To support 64K PTEs we need to first enable the use of the
2231          * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2232          * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2233          * shouldn't be needed after GEN10.
2234          *
2235          * 64K pages were first introduced from BDW+, although technically they
2236          * only *work* from gen9+. For pre-BDW we instead have the option for
2237          * 32K pages, but we don't currently have any support for it in our
2238          * driver.
2239          */
2240         if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2241             INTEL_GEN(dev_priv) <= 10)
2242                 I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2243                            I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2244                            GAMW_ECO_ENABLE_64K_IPS_FIELD);
2245 }
2246
2247 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
2248 {
2249         gtt_write_workarounds(dev_priv);
2250
2251         /* In the case of execlists, PPGTT is enabled by the context descriptor
2252          * and the PDPs are contained within the context itself.  We don't
2253          * need to do anything here. */
2254         if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
2255                 return 0;
2256
2257         if (!USES_PPGTT(dev_priv))
2258                 return 0;
2259
2260         if (IS_GEN6(dev_priv))
2261                 gen6_ppgtt_enable(dev_priv);
2262         else if (IS_GEN7(dev_priv))
2263                 gen7_ppgtt_enable(dev_priv);
2264         else if (INTEL_GEN(dev_priv) >= 8)
2265                 gen8_ppgtt_enable(dev_priv);
2266         else
2267                 MISSING_CASE(INTEL_GEN(dev_priv));
2268
2269         return 0;
2270 }
2271
2272 static struct i915_hw_ppgtt *
2273 __hw_ppgtt_create(struct drm_i915_private *i915)
2274 {
2275         if (INTEL_GEN(i915) < 8)
2276                 return gen6_ppgtt_create(i915);
2277         else
2278                 return gen8_ppgtt_create(i915);
2279 }
2280
2281 struct i915_hw_ppgtt *
2282 i915_ppgtt_create(struct drm_i915_private *i915,
2283                   struct drm_i915_file_private *fpriv)
2284 {
2285         struct i915_hw_ppgtt *ppgtt;
2286
2287         ppgtt = __hw_ppgtt_create(i915);
2288         if (IS_ERR(ppgtt))
2289                 return ppgtt;
2290
2291         ppgtt->vm.file = fpriv;
2292
2293         trace_i915_ppgtt_create(&ppgtt->vm);
2294
2295         return ppgtt;
2296 }
2297
2298 void i915_ppgtt_close(struct i915_address_space *vm)
2299 {
2300         GEM_BUG_ON(vm->closed);
2301         vm->closed = true;
2302 }
2303
2304 static void ppgtt_destroy_vma(struct i915_address_space *vm)
2305 {
2306         struct list_head *phases[] = {
2307                 &vm->active_list,
2308                 &vm->inactive_list,
2309                 &vm->unbound_list,
2310                 NULL,
2311         }, **phase;
2312
2313         vm->closed = true;
2314         for (phase = phases; *phase; phase++) {
2315                 struct i915_vma *vma, *vn;
2316
2317                 list_for_each_entry_safe(vma, vn, *phase, vm_link)
2318                         i915_vma_destroy(vma);
2319         }
2320 }
2321
2322 void i915_ppgtt_release(struct kref *kref)
2323 {
2324         struct i915_hw_ppgtt *ppgtt =
2325                 container_of(kref, struct i915_hw_ppgtt, ref);
2326
2327         trace_i915_ppgtt_release(&ppgtt->vm);
2328
2329         ppgtt_destroy_vma(&ppgtt->vm);
2330
2331         GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list));
2332         GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
2333         GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
2334
2335         ppgtt->vm.cleanup(&ppgtt->vm);
2336         i915_address_space_fini(&ppgtt->vm);
2337         kfree(ppgtt);
2338 }
2339
2340 /* Certain Gen5 chipsets require require idling the GPU before
2341  * unmapping anything from the GTT when VT-d is enabled.
2342  */
2343 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2344 {
2345         /* Query intel_iommu to see if we need the workaround. Presumably that
2346          * was loaded first.
2347          */
2348         return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
2349 }
2350
2351 static void gen6_check_and_clear_faults(struct drm_i915_private *dev_priv)
2352 {
2353         struct intel_engine_cs *engine;
2354         enum intel_engine_id id;
2355         u32 fault;
2356
2357         for_each_engine(engine, dev_priv, id) {
2358                 fault = I915_READ(RING_FAULT_REG(engine));
2359                 if (fault & RING_FAULT_VALID) {
2360                         DRM_DEBUG_DRIVER("Unexpected fault\n"
2361                                          "\tAddr: 0x%08lx\n"
2362                                          "\tAddress space: %s\n"
2363                                          "\tSource ID: %d\n"
2364                                          "\tType: %d\n",
2365                                          fault & PAGE_MASK,
2366                                          fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2367                                          RING_FAULT_SRCID(fault),
2368                                          RING_FAULT_FAULT_TYPE(fault));
2369                         I915_WRITE(RING_FAULT_REG(engine),
2370                                    fault & ~RING_FAULT_VALID);
2371                 }
2372         }
2373
2374         POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
2375 }
2376
2377 static void gen8_check_and_clear_faults(struct drm_i915_private *dev_priv)
2378 {
2379         u32 fault = I915_READ(GEN8_RING_FAULT_REG);
2380
2381         if (fault & RING_FAULT_VALID) {
2382                 u32 fault_data0, fault_data1;
2383                 u64 fault_addr;
2384
2385                 fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
2386                 fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
2387                 fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
2388                              ((u64)fault_data0 << 12);
2389
2390                 DRM_DEBUG_DRIVER("Unexpected fault\n"
2391                                  "\tAddr: 0x%08x_%08x\n"
2392                                  "\tAddress space: %s\n"
2393                                  "\tEngine ID: %d\n"
2394                                  "\tSource ID: %d\n"
2395                                  "\tType: %d\n",
2396                                  upper_32_bits(fault_addr),
2397                                  lower_32_bits(fault_addr),
2398                                  fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
2399                                  GEN8_RING_FAULT_ENGINE_ID(fault),
2400                                  RING_FAULT_SRCID(fault),
2401                                  RING_FAULT_FAULT_TYPE(fault));
2402                 I915_WRITE(GEN8_RING_FAULT_REG,
2403                            fault & ~RING_FAULT_VALID);
2404         }
2405
2406         POSTING_READ(GEN8_RING_FAULT_REG);
2407 }
2408
2409 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2410 {
2411         /* From GEN8 onwards we only have one 'All Engine Fault Register' */
2412         if (INTEL_GEN(dev_priv) >= 8)
2413                 gen8_check_and_clear_faults(dev_priv);
2414         else if (INTEL_GEN(dev_priv) >= 6)
2415                 gen6_check_and_clear_faults(dev_priv);
2416         else
2417                 return;
2418 }
2419
2420 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2421 {
2422         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2423
2424         /* Don't bother messing with faults pre GEN6 as we have little
2425          * documentation supporting that it's a good idea.
2426          */
2427         if (INTEL_GEN(dev_priv) < 6)
2428                 return;
2429
2430         i915_check_and_clear_faults(dev_priv);
2431
2432         ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
2433
2434         i915_ggtt_invalidate(dev_priv);
2435 }
2436
2437 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
2438                                struct sg_table *pages)
2439 {
2440         do {
2441                 if (dma_map_sg_attrs(&obj->base.dev->pdev->dev,
2442                                      pages->sgl, pages->nents,
2443                                      PCI_DMA_BIDIRECTIONAL,
2444                                      DMA_ATTR_NO_WARN))
2445                         return 0;
2446
2447                 /* If the DMA remap fails, one cause can be that we have
2448                  * too many objects pinned in a small remapping table,
2449                  * such as swiotlb. Incrementally purge all other objects and
2450                  * try again - if there are no more pages to remove from
2451                  * the DMA remapper, i915_gem_shrink will return 0.
2452                  */
2453                 GEM_BUG_ON(obj->mm.pages == pages);
2454         } while (i915_gem_shrink(to_i915(obj->base.dev),
2455                                  obj->base.size >> PAGE_SHIFT, NULL,
2456                                  I915_SHRINK_BOUND |
2457                                  I915_SHRINK_UNBOUND |
2458                                  I915_SHRINK_ACTIVE));
2459
2460         return -ENOSPC;
2461 }
2462
2463 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2464 {
2465         writeq(pte, addr);
2466 }
2467
2468 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2469                                   dma_addr_t addr,
2470                                   u64 offset,
2471                                   enum i915_cache_level level,
2472                                   u32 unused)
2473 {
2474         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2475         gen8_pte_t __iomem *pte =
2476                 (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2477
2478         gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
2479
2480         ggtt->invalidate(vm->i915);
2481 }
2482
2483 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2484                                      struct i915_vma *vma,
2485                                      enum i915_cache_level level,
2486                                      u32 flags)
2487 {
2488         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2489         struct sgt_iter sgt_iter;
2490         gen8_pte_t __iomem *gtt_entries;
2491         const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
2492         dma_addr_t addr;
2493
2494         /*
2495          * Note that we ignore PTE_READ_ONLY here. The caller must be careful
2496          * not to allow the user to override access to a read only page.
2497          */
2498
2499         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
2500         gtt_entries += vma->node.start >> PAGE_SHIFT;
2501         for_each_sgt_dma(addr, sgt_iter, vma->pages)
2502                 gen8_set_pte(gtt_entries++, pte_encode | addr);
2503
2504         /*
2505          * We want to flush the TLBs only after we're certain all the PTE
2506          * updates have finished.
2507          */
2508         ggtt->invalidate(vm->i915);
2509 }
2510
2511 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2512                                   dma_addr_t addr,
2513                                   u64 offset,
2514                                   enum i915_cache_level level,
2515                                   u32 flags)
2516 {
2517         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2518         gen6_pte_t __iomem *pte =
2519                 (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2520
2521         iowrite32(vm->pte_encode(addr, level, flags), pte);
2522
2523         ggtt->invalidate(vm->i915);
2524 }
2525
2526 /*
2527  * Binds an object into the global gtt with the specified cache level. The object
2528  * will be accessible to the GPU via commands whose operands reference offsets
2529  * within the global GTT as well as accessible by the GPU through the GMADR
2530  * mapped BAR (dev_priv->mm.gtt->gtt).
2531  */
2532 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2533                                      struct i915_vma *vma,
2534                                      enum i915_cache_level level,
2535                                      u32 flags)
2536 {
2537         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2538         gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
2539         unsigned int i = vma->node.start >> PAGE_SHIFT;
2540         struct sgt_iter iter;
2541         dma_addr_t addr;
2542         for_each_sgt_dma(addr, iter, vma->pages)
2543                 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2544
2545         /*
2546          * We want to flush the TLBs only after we're certain all the PTE
2547          * updates have finished.
2548          */
2549         ggtt->invalidate(vm->i915);
2550 }
2551
2552 static void nop_clear_range(struct i915_address_space *vm,
2553                             u64 start, u64 length)
2554 {
2555 }
2556
2557 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2558                                   u64 start, u64 length)
2559 {
2560         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2561         unsigned first_entry = start >> PAGE_SHIFT;
2562         unsigned num_entries = length >> PAGE_SHIFT;
2563         const gen8_pte_t scratch_pte =
2564                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
2565         gen8_pte_t __iomem *gtt_base =
2566                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2567         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2568         int i;
2569
2570         if (WARN(num_entries > max_entries,
2571                  "First entry = %d; Num entries = %d (max=%d)\n",
2572                  first_entry, num_entries, max_entries))
2573                 num_entries = max_entries;
2574
2575         for (i = 0; i < num_entries; i++)
2576                 gen8_set_pte(&gtt_base[i], scratch_pte);
2577 }
2578
2579 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2580 {
2581         struct drm_i915_private *dev_priv = vm->i915;
2582
2583         /*
2584          * Make sure the internal GAM fifo has been cleared of all GTT
2585          * writes before exiting stop_machine(). This guarantees that
2586          * any aperture accesses waiting to start in another process
2587          * cannot back up behind the GTT writes causing a hang.
2588          * The register can be any arbitrary GAM register.
2589          */
2590         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2591 }
2592
2593 struct insert_page {
2594         struct i915_address_space *vm;
2595         dma_addr_t addr;
2596         u64 offset;
2597         enum i915_cache_level level;
2598 };
2599
2600 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2601 {
2602         struct insert_page *arg = _arg;
2603
2604         gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2605         bxt_vtd_ggtt_wa(arg->vm);
2606
2607         return 0;
2608 }
2609
2610 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2611                                           dma_addr_t addr,
2612                                           u64 offset,
2613                                           enum i915_cache_level level,
2614                                           u32 unused)
2615 {
2616         struct insert_page arg = { vm, addr, offset, level };
2617
2618         stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2619 }
2620
2621 struct insert_entries {
2622         struct i915_address_space *vm;
2623         struct i915_vma *vma;
2624         enum i915_cache_level level;
2625         u32 flags;
2626 };
2627
2628 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2629 {
2630         struct insert_entries *arg = _arg;
2631
2632         gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
2633         bxt_vtd_ggtt_wa(arg->vm);
2634
2635         return 0;
2636 }
2637
2638 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2639                                              struct i915_vma *vma,
2640                                              enum i915_cache_level level,
2641                                              u32 flags)
2642 {
2643         struct insert_entries arg = { vm, vma, level, flags };
2644
2645         stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2646 }
2647
2648 struct clear_range {
2649         struct i915_address_space *vm;
2650         u64 start;
2651         u64 length;
2652 };
2653
2654 static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2655 {
2656         struct clear_range *arg = _arg;
2657
2658         gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2659         bxt_vtd_ggtt_wa(arg->vm);
2660
2661         return 0;
2662 }
2663
2664 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2665                                           u64 start,
2666                                           u64 length)
2667 {
2668         struct clear_range arg = { vm, start, length };
2669
2670         stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2671 }
2672
2673 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2674                                   u64 start, u64 length)
2675 {
2676         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2677         unsigned first_entry = start >> PAGE_SHIFT;
2678         unsigned num_entries = length >> PAGE_SHIFT;
2679         gen6_pte_t scratch_pte, __iomem *gtt_base =
2680                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2681         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2682         int i;
2683
2684         if (WARN(num_entries > max_entries,
2685                  "First entry = %d; Num entries = %d (max=%d)\n",
2686                  first_entry, num_entries, max_entries))
2687                 num_entries = max_entries;
2688
2689         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
2690                                      I915_CACHE_LLC, 0);
2691
2692         for (i = 0; i < num_entries; i++)
2693                 iowrite32(scratch_pte, &gtt_base[i]);
2694 }
2695
2696 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2697                                   dma_addr_t addr,
2698                                   u64 offset,
2699                                   enum i915_cache_level cache_level,
2700                                   u32 unused)
2701 {
2702         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2703                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2704
2705         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2706 }
2707
2708 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2709                                      struct i915_vma *vma,
2710                                      enum i915_cache_level cache_level,
2711                                      u32 unused)
2712 {
2713         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2714                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2715
2716         intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2717                                     flags);
2718 }
2719
2720 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2721                                   u64 start, u64 length)
2722 {
2723         intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2724 }
2725
2726 static int ggtt_bind_vma(struct i915_vma *vma,
2727                          enum i915_cache_level cache_level,
2728                          u32 flags)
2729 {
2730         struct drm_i915_private *i915 = vma->vm->i915;
2731         struct drm_i915_gem_object *obj = vma->obj;
2732         u32 pte_flags;
2733
2734         /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
2735         pte_flags = 0;
2736         if (i915_gem_object_is_readonly(obj))
2737                 pte_flags |= PTE_READ_ONLY;
2738
2739         intel_runtime_pm_get(i915);
2740         vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2741         intel_runtime_pm_put(i915);
2742
2743         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
2744
2745         /*
2746          * Without aliasing PPGTT there's no difference between
2747          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2748          * upgrade to both bound if we bind either to avoid double-binding.
2749          */
2750         vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2751
2752         return 0;
2753 }
2754
2755 static void ggtt_unbind_vma(struct i915_vma *vma)
2756 {
2757         struct drm_i915_private *i915 = vma->vm->i915;
2758
2759         intel_runtime_pm_get(i915);
2760         vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2761         intel_runtime_pm_put(i915);
2762 }
2763
2764 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2765                                  enum i915_cache_level cache_level,
2766                                  u32 flags)
2767 {
2768         struct drm_i915_private *i915 = vma->vm->i915;
2769         u32 pte_flags;
2770         int ret;
2771
2772         /* Currently applicable only to VLV */
2773         pte_flags = 0;
2774         if (i915_gem_object_is_readonly(vma->obj))
2775                 pte_flags |= PTE_READ_ONLY;
2776
2777         if (flags & I915_VMA_LOCAL_BIND) {
2778                 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2779
2780                 if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
2781                         ret = appgtt->vm.allocate_va_range(&appgtt->vm,
2782                                                            vma->node.start,
2783                                                            vma->size);
2784                         if (ret)
2785                                 return ret;
2786                 }
2787
2788                 appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level,
2789                                           pte_flags);
2790         }
2791
2792         if (flags & I915_VMA_GLOBAL_BIND) {
2793                 intel_runtime_pm_get(i915);
2794                 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2795                 intel_runtime_pm_put(i915);
2796         }
2797
2798         return 0;
2799 }
2800
2801 static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2802 {
2803         struct drm_i915_private *i915 = vma->vm->i915;
2804
2805         if (vma->flags & I915_VMA_GLOBAL_BIND) {
2806                 intel_runtime_pm_get(i915);
2807                 vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2808                 intel_runtime_pm_put(i915);
2809         }
2810
2811         if (vma->flags & I915_VMA_LOCAL_BIND) {
2812                 struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm;
2813
2814                 vm->clear_range(vm, vma->node.start, vma->size);
2815         }
2816 }
2817
2818 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
2819                                struct sg_table *pages)
2820 {
2821         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2822         struct device *kdev = &dev_priv->drm.pdev->dev;
2823         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2824
2825         if (unlikely(ggtt->do_idle_maps)) {
2826                 if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
2827                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2828                         /* Wait a bit, in hopes it avoids the hang */
2829                         udelay(10);
2830                 }
2831         }
2832
2833         dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2834 }
2835
2836 static int ggtt_set_pages(struct i915_vma *vma)
2837 {
2838         int ret;
2839
2840         GEM_BUG_ON(vma->pages);
2841
2842         ret = i915_get_ggtt_vma_pages(vma);
2843         if (ret)
2844                 return ret;
2845
2846         vma->page_sizes = vma->obj->mm.page_sizes;
2847
2848         return 0;
2849 }
2850
2851 static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2852                                   unsigned long color,
2853                                   u64 *start,
2854                                   u64 *end)
2855 {
2856         if (node->allocated && node->color != color)
2857                 *start += I915_GTT_PAGE_SIZE;
2858
2859         /* Also leave a space between the unallocated reserved node after the
2860          * GTT and any objects within the GTT, i.e. we use the color adjustment
2861          * to insert a guard page to prevent prefetches crossing over the
2862          * GTT boundary.
2863          */
2864         node = list_next_entry(node, node_list);
2865         if (node->color != color)
2866                 *end -= I915_GTT_PAGE_SIZE;
2867 }
2868
2869 int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
2870 {
2871         struct i915_ggtt *ggtt = &i915->ggtt;
2872         struct i915_hw_ppgtt *ppgtt;
2873         int err;
2874
2875         ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM));
2876         if (IS_ERR(ppgtt))
2877                 return PTR_ERR(ppgtt);
2878
2879         if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
2880                 err = -ENODEV;
2881                 goto err_ppgtt;
2882         }
2883
2884         /*
2885          * Note we only pre-allocate as far as the end of the global
2886          * GTT. On 48b / 4-level page-tables, the difference is very,
2887          * very significant! We have to preallocate as GVT/vgpu does
2888          * not like the page directory disappearing.
2889          */
2890         err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
2891         if (err)
2892                 goto err_ppgtt;
2893
2894         i915->mm.aliasing_ppgtt = ppgtt;
2895
2896         GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
2897         ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
2898
2899         GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
2900         ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
2901
2902         return 0;
2903
2904 err_ppgtt:
2905         i915_ppgtt_put(ppgtt);
2906         return err;
2907 }
2908
2909 void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
2910 {
2911         struct i915_ggtt *ggtt = &i915->ggtt;
2912         struct i915_hw_ppgtt *ppgtt;
2913
2914         ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2915         if (!ppgtt)
2916                 return;
2917
2918         i915_ppgtt_put(ppgtt);
2919
2920         ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
2921         ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
2922 }
2923
2924 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2925 {
2926         /* Let GEM Manage all of the aperture.
2927          *
2928          * However, leave one page at the end still bound to the scratch page.
2929          * There are a number of places where the hardware apparently prefetches
2930          * past the end of the object, and we've seen multiple hangs with the
2931          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2932          * aperture.  One page should be enough to keep any prefetching inside
2933          * of the aperture.
2934          */
2935         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2936         unsigned long hole_start, hole_end;
2937         struct drm_mm_node *entry;
2938         int ret;
2939
2940         ret = intel_vgt_balloon(dev_priv);
2941         if (ret)
2942                 return ret;
2943
2944         /* Reserve a mappable slot for our lockless error capture */
2945         ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
2946                                           PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2947                                           0, ggtt->mappable_end,
2948                                           DRM_MM_INSERT_LOW);
2949         if (ret)
2950                 return ret;
2951
2952         /* Clear any non-preallocated blocks */
2953         drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
2954                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2955                               hole_start, hole_end);
2956                 ggtt->vm.clear_range(&ggtt->vm, hole_start,
2957                                      hole_end - hole_start);
2958         }
2959
2960         /* And finally clear the reserved guard page */
2961         ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
2962
2963         if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2964                 ret = i915_gem_init_aliasing_ppgtt(dev_priv);
2965                 if (ret)
2966                         goto err;
2967         }
2968
2969         return 0;
2970
2971 err:
2972         drm_mm_remove_node(&ggtt->error_capture);
2973         return ret;
2974 }
2975
2976 /**
2977  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2978  * @dev_priv: i915 device
2979  */
2980 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2981 {
2982         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2983         struct i915_vma *vma, *vn;
2984         struct pagevec *pvec;
2985
2986         ggtt->vm.closed = true;
2987
2988         mutex_lock(&dev_priv->drm.struct_mutex);
2989         i915_gem_fini_aliasing_ppgtt(dev_priv);
2990
2991         GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
2992         list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
2993                 WARN_ON(i915_vma_unbind(vma));
2994
2995         if (drm_mm_node_allocated(&ggtt->error_capture))
2996                 drm_mm_remove_node(&ggtt->error_capture);
2997
2998         if (drm_mm_initialized(&ggtt->vm.mm)) {
2999                 intel_vgt_deballoon(dev_priv);
3000                 i915_address_space_fini(&ggtt->vm);
3001         }
3002
3003         ggtt->vm.cleanup(&ggtt->vm);
3004
3005         pvec = &dev_priv->mm.wc_stash.pvec;
3006         if (pvec->nr) {
3007                 set_pages_array_wb(pvec->pages, pvec->nr);
3008                 __pagevec_release(pvec);
3009         }
3010
3011         mutex_unlock(&dev_priv->drm.struct_mutex);
3012
3013         arch_phys_wc_del(ggtt->mtrr);
3014         io_mapping_fini(&ggtt->iomap);
3015
3016         i915_gem_cleanup_stolen(&dev_priv->drm);
3017 }
3018
3019 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
3020 {
3021         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
3022         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
3023         return snb_gmch_ctl << 20;
3024 }
3025
3026 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
3027 {
3028         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
3029         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
3030         if (bdw_gmch_ctl)
3031                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
3032
3033 #ifdef CONFIG_X86_32
3034         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
3035         if (bdw_gmch_ctl > 4)
3036                 bdw_gmch_ctl = 4;
3037 #endif
3038
3039         return bdw_gmch_ctl << 20;
3040 }
3041
3042 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
3043 {
3044         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
3045         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
3046
3047         if (gmch_ctrl)
3048                 return 1 << (20 + gmch_ctrl);
3049
3050         return 0;
3051 }
3052
3053 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
3054 {
3055         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3056         struct pci_dev *pdev = dev_priv->drm.pdev;
3057         phys_addr_t phys_addr;
3058         int ret;
3059
3060         /* For Modern GENs the PTEs and register space are split in the BAR */
3061         phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
3062
3063         /*
3064          * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
3065          * will be dropped. For WC mappings in general we have 64 byte burst
3066          * writes when the WC buffer is flushed, so we can't use it, but have to
3067          * resort to an uncached mapping. The WC issue is easily caught by the
3068          * readback check when writing GTT PTE entries.
3069          */
3070         if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
3071                 ggtt->gsm = ioremap_nocache(phys_addr, size);
3072         else
3073                 ggtt->gsm = ioremap_wc(phys_addr, size);
3074         if (!ggtt->gsm) {
3075                 DRM_ERROR("Failed to map the ggtt page table\n");
3076                 return -ENOMEM;
3077         }
3078
3079         ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
3080         if (ret) {
3081                 DRM_ERROR("Scratch setup failed\n");
3082                 /* iounmap will also get called at remove, but meh */
3083                 iounmap(ggtt->gsm);
3084                 return ret;
3085         }
3086
3087         return 0;
3088 }
3089
3090 static struct intel_ppat_entry *
3091 __alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
3092 {
3093         struct intel_ppat_entry *entry = &ppat->entries[index];
3094
3095         GEM_BUG_ON(index >= ppat->max_entries);
3096         GEM_BUG_ON(test_bit(index, ppat->used));
3097
3098         entry->ppat = ppat;
3099         entry->value = value;
3100         kref_init(&entry->ref);
3101         set_bit(index, ppat->used);
3102         set_bit(index, ppat->dirty);
3103
3104         return entry;
3105 }
3106
3107 static void __free_ppat_entry(struct intel_ppat_entry *entry)
3108 {
3109         struct intel_ppat *ppat = entry->ppat;
3110         unsigned int index = entry - ppat->entries;
3111
3112         GEM_BUG_ON(index >= ppat->max_entries);
3113         GEM_BUG_ON(!test_bit(index, ppat->used));
3114
3115         entry->value = ppat->clear_value;
3116         clear_bit(index, ppat->used);
3117         set_bit(index, ppat->dirty);
3118 }
3119
3120 /**
3121  * intel_ppat_get - get a usable PPAT entry
3122  * @i915: i915 device instance
3123  * @value: the PPAT value required by the caller
3124  *
3125  * The function tries to search if there is an existing PPAT entry which
3126  * matches with the required value. If perfectly matched, the existing PPAT
3127  * entry will be used. If only partially matched, it will try to check if
3128  * there is any available PPAT index. If yes, it will allocate a new PPAT
3129  * index for the required entry and update the HW. If not, the partially
3130  * matched entry will be used.
3131  */
3132 const struct intel_ppat_entry *
3133 intel_ppat_get(struct drm_i915_private *i915, u8 value)
3134 {
3135         struct intel_ppat *ppat = &i915->ppat;
3136         struct intel_ppat_entry *entry = NULL;
3137         unsigned int scanned, best_score;
3138         int i;
3139
3140         GEM_BUG_ON(!ppat->max_entries);
3141
3142         scanned = best_score = 0;
3143         for_each_set_bit(i, ppat->used, ppat->max_entries) {
3144                 unsigned int score;
3145
3146                 score = ppat->match(ppat->entries[i].value, value);
3147                 if (score > best_score) {
3148                         entry = &ppat->entries[i];
3149                         if (score == INTEL_PPAT_PERFECT_MATCH) {
3150                                 kref_get(&entry->ref);
3151                                 return entry;
3152                         }
3153                         best_score = score;
3154                 }
3155                 scanned++;
3156         }
3157
3158         if (scanned == ppat->max_entries) {
3159                 if (!entry)
3160                         return ERR_PTR(-ENOSPC);
3161
3162                 kref_get(&entry->ref);
3163                 return entry;
3164         }
3165
3166         i = find_first_zero_bit(ppat->used, ppat->max_entries);
3167         entry = __alloc_ppat_entry(ppat, i, value);
3168         ppat->update_hw(i915);
3169         return entry;
3170 }
3171
3172 static void release_ppat(struct kref *kref)
3173 {
3174         struct intel_ppat_entry *entry =
3175                 container_of(kref, struct intel_ppat_entry, ref);
3176         struct drm_i915_private *i915 = entry->ppat->i915;
3177
3178         __free_ppat_entry(entry);
3179         entry->ppat->update_hw(i915);
3180 }
3181
3182 /**
3183  * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
3184  * @entry: an intel PPAT entry
3185  *
3186  * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
3187  * entry is dynamically allocated, its reference count will be decreased. Once
3188  * the reference count becomes into zero, the PPAT index becomes free again.
3189  */
3190 void intel_ppat_put(const struct intel_ppat_entry *entry)
3191 {
3192         struct intel_ppat *ppat = entry->ppat;
3193         unsigned int index = entry - ppat->entries;
3194
3195         GEM_BUG_ON(!ppat->max_entries);
3196
3197         kref_put(&ppat->entries[index].ref, release_ppat);
3198 }
3199
3200 static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3201 {
3202         struct intel_ppat *ppat = &dev_priv->ppat;
3203         int i;
3204
3205         for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3206                 I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3207                 clear_bit(i, ppat->dirty);
3208         }
3209 }
3210
3211 static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3212 {
3213         struct intel_ppat *ppat = &dev_priv->ppat;
3214         u64 pat = 0;
3215         int i;
3216
3217         for (i = 0; i < ppat->max_entries; i++)
3218                 pat |= GEN8_PPAT(i, ppat->entries[i].value);
3219
3220         bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3221
3222         I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3223         I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3224 }
3225
3226 static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3227 {
3228         unsigned int score = 0;
3229         enum {
3230                 AGE_MATCH = BIT(0),
3231                 TC_MATCH = BIT(1),
3232                 CA_MATCH = BIT(2),
3233         };
3234
3235         /* Cache attribute has to be matched. */
3236         if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
3237                 return 0;
3238
3239         score |= CA_MATCH;
3240
3241         if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3242                 score |= TC_MATCH;
3243
3244         if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3245                 score |= AGE_MATCH;
3246
3247         if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3248                 return INTEL_PPAT_PERFECT_MATCH;
3249
3250         return score;
3251 }
3252
3253 static unsigned int chv_private_pat_match(u8 src, u8 dst)
3254 {
3255         return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3256                 INTEL_PPAT_PERFECT_MATCH : 0;
3257 }
3258
3259 static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3260 {
3261         ppat->max_entries = 8;
3262         ppat->update_hw = cnl_private_pat_update_hw;
3263         ppat->match = bdw_private_pat_match;
3264         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3265
3266         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3267         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3268         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3269         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3270         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3271         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3272         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3273         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3274 }
3275
3276 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3277  * bits. When using advanced contexts each context stores its own PAT, but
3278  * writing this data shouldn't be harmful even in those cases. */
3279 static void bdw_setup_private_ppat(struct intel_ppat *ppat)
3280 {
3281         ppat->max_entries = 8;
3282         ppat->update_hw = bdw_private_pat_update_hw;
3283         ppat->match = bdw_private_pat_match;
3284         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3285
3286         if (!USES_PPGTT(ppat->i915)) {
3287                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3288                  * so RTL will always use the value corresponding to
3289                  * pat_sel = 000".
3290                  * So let's disable cache for GGTT to avoid screen corruptions.
3291                  * MOCS still can be used though.
3292                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3293                  * before this patch, i.e. the same uncached + snooping access
3294                  * like on gen6/7 seems to be in effect.
3295                  * - So this just fixes blitter/render access. Again it looks
3296                  * like it's not just uncached access, but uncached + snooping.
3297                  * So we can still hold onto all our assumptions wrt cpu
3298                  * clflushing on LLC machines.
3299                  */
3300                 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3301                 return;
3302         }
3303
3304         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for normal objects, no eLLC */
3305         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for something pointing to ptes? */
3306         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for scanout with eLLC */
3307         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* Uncached objects, mostly for scanout */
3308         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3309         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3310         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3311         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3312 }
3313
3314 static void chv_setup_private_ppat(struct intel_ppat *ppat)
3315 {
3316         ppat->max_entries = 8;
3317         ppat->update_hw = bdw_private_pat_update_hw;
3318         ppat->match = chv_private_pat_match;
3319         ppat->clear_value = CHV_PPAT_SNOOP;
3320
3321         /*
3322          * Map WB on BDW to snooped on CHV.
3323          *
3324          * Only the snoop bit has meaning for CHV, the rest is
3325          * ignored.
3326          *
3327          * The hardware will never snoop for certain types of accesses:
3328          * - CPU GTT (GMADR->GGTT->no snoop->memory)
3329          * - PPGTT page tables
3330          * - some other special cycles
3331          *
3332          * As with BDW, we also need to consider the following for GT accesses:
3333          * "For GGTT, there is NO pat_sel[2:0] from the entry,
3334          * so RTL will always use the value corresponding to
3335          * pat_sel = 000".
3336          * Which means we must set the snoop bit in PAT entry 0
3337          * in order to keep the global status page working.
3338          */
3339
3340         __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3341         __alloc_ppat_entry(ppat, 1, 0);
3342         __alloc_ppat_entry(ppat, 2, 0);
3343         __alloc_ppat_entry(ppat, 3, 0);
3344         __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3345         __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3346         __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3347         __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
3348 }
3349
3350 static void gen6_gmch_remove(struct i915_address_space *vm)
3351 {
3352         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3353
3354         iounmap(ggtt->gsm);
3355         cleanup_scratch_page(vm);
3356 }
3357
3358 static void setup_private_pat(struct drm_i915_private *dev_priv)
3359 {
3360         struct intel_ppat *ppat = &dev_priv->ppat;
3361         int i;
3362
3363         ppat->i915 = dev_priv;
3364
3365         if (INTEL_GEN(dev_priv) >= 10)
3366                 cnl_setup_private_ppat(ppat);
3367         else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3368                 chv_setup_private_ppat(ppat);
3369         else
3370                 bdw_setup_private_ppat(ppat);
3371
3372         GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3373
3374         for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3375                 ppat->entries[i].value = ppat->clear_value;
3376                 ppat->entries[i].ppat = ppat;
3377                 set_bit(i, ppat->dirty);
3378         }
3379
3380         ppat->update_hw(dev_priv);
3381 }
3382
3383 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3384 {
3385         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3386         struct pci_dev *pdev = dev_priv->drm.pdev;
3387         unsigned int size;
3388         u16 snb_gmch_ctl;
3389         int err;
3390
3391         /* TODO: We're not aware of mappable constraints on gen8 yet */
3392         ggtt->gmadr =
3393                 (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3394                                                  pci_resource_len(pdev, 2));
3395         ggtt->mappable_end = resource_size(&ggtt->gmadr);
3396
3397         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3398         if (!err)
3399                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3400         if (err)
3401                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3402
3403         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3404         if (IS_CHERRYVIEW(dev_priv))
3405                 size = chv_get_total_gtt_size(snb_gmch_ctl);
3406         else
3407                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3408
3409         ggtt->vm.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3410         ggtt->vm.cleanup = gen6_gmch_remove;
3411         ggtt->vm.insert_page = gen8_ggtt_insert_page;
3412         ggtt->vm.clear_range = nop_clear_range;
3413         if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3414                 ggtt->vm.clear_range = gen8_ggtt_clear_range;
3415
3416         ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
3417
3418         /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3419         if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
3420                 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3421                 ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
3422                 if (ggtt->vm.clear_range != nop_clear_range)
3423                         ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3424         }
3425
3426         ggtt->invalidate = gen6_ggtt_invalidate;
3427
3428         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3429         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3430         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3431         ggtt->vm.vma_ops.clear_pages = clear_pages;
3432
3433         setup_private_pat(dev_priv);
3434
3435         return ggtt_probe_common(ggtt, size);
3436 }
3437
3438 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3439 {
3440         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3441         struct pci_dev *pdev = dev_priv->drm.pdev;
3442         unsigned int size;
3443         u16 snb_gmch_ctl;
3444         int err;
3445
3446         ggtt->gmadr =
3447                 (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3448                                                  pci_resource_len(pdev, 2));
3449         ggtt->mappable_end = resource_size(&ggtt->gmadr);
3450
3451         /* 64/512MB is the current min/max we actually know of, but this is just
3452          * a coarse sanity check.
3453          */
3454         if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3455                 DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
3456                 return -ENXIO;
3457         }
3458
3459         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3460         if (!err)
3461                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3462         if (err)
3463                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3464         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3465
3466         size = gen6_get_total_gtt_size(snb_gmch_ctl);
3467         ggtt->vm.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3468
3469         ggtt->vm.clear_range = gen6_ggtt_clear_range;
3470         ggtt->vm.insert_page = gen6_ggtt_insert_page;
3471         ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
3472         ggtt->vm.cleanup = gen6_gmch_remove;
3473
3474         ggtt->invalidate = gen6_ggtt_invalidate;
3475
3476         if (HAS_EDRAM(dev_priv))
3477                 ggtt->vm.pte_encode = iris_pte_encode;
3478         else if (IS_HASWELL(dev_priv))
3479                 ggtt->vm.pte_encode = hsw_pte_encode;
3480         else if (IS_VALLEYVIEW(dev_priv))
3481                 ggtt->vm.pte_encode = byt_pte_encode;
3482         else if (INTEL_GEN(dev_priv) >= 7)
3483                 ggtt->vm.pte_encode = ivb_pte_encode;
3484         else
3485                 ggtt->vm.pte_encode = snb_pte_encode;
3486
3487         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3488         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3489         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3490         ggtt->vm.vma_ops.clear_pages = clear_pages;
3491
3492         return ggtt_probe_common(ggtt, size);
3493 }
3494
3495 static void i915_gmch_remove(struct i915_address_space *vm)
3496 {
3497         intel_gmch_remove();
3498 }
3499
3500 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3501 {
3502         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3503         phys_addr_t gmadr_base;
3504         int ret;
3505
3506         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3507         if (!ret) {
3508                 DRM_ERROR("failed to set up gmch\n");
3509                 return -EIO;
3510         }
3511
3512         intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
3513
3514         ggtt->gmadr =
3515                 (struct resource) DEFINE_RES_MEM(gmadr_base,
3516                                                  ggtt->mappable_end);
3517
3518         ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3519         ggtt->vm.insert_page = i915_ggtt_insert_page;
3520         ggtt->vm.insert_entries = i915_ggtt_insert_entries;
3521         ggtt->vm.clear_range = i915_ggtt_clear_range;
3522         ggtt->vm.cleanup = i915_gmch_remove;
3523
3524         ggtt->invalidate = gmch_ggtt_invalidate;
3525
3526         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3527         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3528         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3529         ggtt->vm.vma_ops.clear_pages = clear_pages;
3530
3531         if (unlikely(ggtt->do_idle_maps))
3532                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3533
3534         return 0;
3535 }
3536
3537 /**
3538  * i915_ggtt_probe_hw - Probe GGTT hardware location
3539  * @dev_priv: i915 device
3540  */
3541 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3542 {
3543         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3544         int ret;
3545
3546         ggtt->vm.i915 = dev_priv;
3547         ggtt->vm.dma = &dev_priv->drm.pdev->dev;
3548
3549         if (INTEL_GEN(dev_priv) <= 5)
3550                 ret = i915_gmch_probe(ggtt);
3551         else if (INTEL_GEN(dev_priv) < 8)
3552                 ret = gen6_gmch_probe(ggtt);
3553         else
3554                 ret = gen8_gmch_probe(ggtt);
3555         if (ret)
3556                 return ret;
3557
3558         /* Trim the GGTT to fit the GuC mappable upper range (when enabled).
3559          * This is easier than doing range restriction on the fly, as we
3560          * currently don't have any bits spare to pass in this upper
3561          * restriction!
3562          */
3563         if (USES_GUC(dev_priv)) {
3564                 ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP);
3565                 ggtt->mappable_end =
3566                         min_t(u64, ggtt->mappable_end, ggtt->vm.total);
3567         }
3568
3569         if ((ggtt->vm.total - 1) >> 32) {
3570                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3571                           " of address space! Found %lldM!\n",
3572                           ggtt->vm.total >> 20);
3573                 ggtt->vm.total = 1ULL << 32;
3574                 ggtt->mappable_end =
3575                         min_t(u64, ggtt->mappable_end, ggtt->vm.total);
3576         }
3577
3578         if (ggtt->mappable_end > ggtt->vm.total) {
3579                 DRM_ERROR("mappable aperture extends past end of GGTT,"
3580                           " aperture=%pa, total=%llx\n",
3581                           &ggtt->mappable_end, ggtt->vm.total);
3582                 ggtt->mappable_end = ggtt->vm.total;
3583         }
3584
3585         /* GMADR is the PCI mmio aperture into the global GTT. */
3586         DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
3587         DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
3588         DRM_DEBUG_DRIVER("DSM size = %lluM\n",
3589                          (u64)resource_size(&intel_graphics_stolen_res) >> 20);
3590         if (intel_vtd_active())
3591                 DRM_INFO("VT-d active for gfx access\n");
3592
3593         return 0;
3594 }
3595
3596 /**
3597  * i915_ggtt_init_hw - Initialize GGTT hardware
3598  * @dev_priv: i915 device
3599  */
3600 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3601 {
3602         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3603         int ret;
3604
3605         stash_init(&dev_priv->mm.wc_stash);
3606
3607         /* Note that we use page colouring to enforce a guard page at the
3608          * end of the address space. This is required as the CS may prefetch
3609          * beyond the end of the batch buffer, across the page boundary,
3610          * and beyond the end of the GTT if we do not provide a guard.
3611          */
3612         mutex_lock(&dev_priv->drm.struct_mutex);
3613         i915_address_space_init(&ggtt->vm, dev_priv);
3614
3615         /* Only VLV supports read-only GGTT mappings */
3616         ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv);
3617
3618         if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
3619                 ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
3620         mutex_unlock(&dev_priv->drm.struct_mutex);
3621
3622         if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
3623                                 dev_priv->ggtt.gmadr.start,
3624                                 dev_priv->ggtt.mappable_end)) {
3625                 ret = -EIO;
3626                 goto out_gtt_cleanup;
3627         }
3628
3629         ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
3630
3631         /*
3632          * Initialise stolen early so that we may reserve preallocated
3633          * objects for the BIOS to KMS transition.
3634          */
3635         ret = i915_gem_init_stolen(dev_priv);
3636         if (ret)
3637                 goto out_gtt_cleanup;
3638
3639         return 0;
3640
3641 out_gtt_cleanup:
3642         ggtt->vm.cleanup(&ggtt->vm);
3643         return ret;
3644 }
3645
3646 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3647 {
3648         if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3649                 return -EIO;
3650
3651         return 0;
3652 }
3653
3654 void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3655 {
3656         GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3657
3658         i915->ggtt.invalidate = guc_ggtt_invalidate;
3659
3660         i915_ggtt_invalidate(i915);
3661 }
3662
3663 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3664 {
3665         /* We should only be called after i915_ggtt_enable_guc() */
3666         GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3667
3668         i915->ggtt.invalidate = gen6_ggtt_invalidate;
3669
3670         i915_ggtt_invalidate(i915);
3671 }
3672
3673 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3674 {
3675         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3676         struct i915_vma *vma, *vn;
3677
3678         i915_check_and_clear_faults(dev_priv);
3679
3680         /* First fill our portion of the GTT with scratch pages */
3681         ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
3682
3683         ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
3684
3685         /* clflush objects bound into the GGTT and rebind them. */
3686         GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
3687         list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
3688                 struct drm_i915_gem_object *obj = vma->obj;
3689
3690                 if (!(vma->flags & I915_VMA_GLOBAL_BIND))
3691                         continue;
3692
3693                 if (!i915_vma_unbind(vma))
3694                         continue;
3695
3696                 WARN_ON(i915_vma_bind(vma,
3697                                       obj ? obj->cache_level : 0,
3698                                       PIN_UPDATE));
3699                 if (obj)
3700                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3701         }
3702
3703         ggtt->vm.closed = false;
3704         i915_ggtt_invalidate(dev_priv);
3705
3706         if (INTEL_GEN(dev_priv) >= 8) {
3707                 struct intel_ppat *ppat = &dev_priv->ppat;
3708
3709                 bitmap_set(ppat->dirty, 0, ppat->max_entries);
3710                 dev_priv->ppat.update_hw(dev_priv);
3711                 return;
3712         }
3713 }
3714
3715 static struct scatterlist *
3716 rotate_pages(const dma_addr_t *in, unsigned int offset,
3717              unsigned int width, unsigned int height,
3718              unsigned int stride,
3719              struct sg_table *st, struct scatterlist *sg)
3720 {
3721         unsigned int column, row;
3722         unsigned int src_idx;
3723
3724         for (column = 0; column < width; column++) {
3725                 src_idx = stride * (height - 1) + column;
3726                 for (row = 0; row < height; row++) {
3727                         st->nents++;
3728                         /* We don't need the pages, but need to initialize
3729                          * the entries so the sg list can be happily traversed.
3730                          * The only thing we need are DMA addresses.
3731                          */
3732                         sg_set_page(sg, NULL, PAGE_SIZE, 0);
3733                         sg_dma_address(sg) = in[offset + src_idx];
3734                         sg_dma_len(sg) = PAGE_SIZE;
3735                         sg = sg_next(sg);
3736                         src_idx -= stride;
3737                 }
3738         }
3739
3740         return sg;
3741 }
3742
3743 static noinline struct sg_table *
3744 intel_rotate_pages(struct intel_rotation_info *rot_info,
3745                    struct drm_i915_gem_object *obj)
3746 {
3747         const unsigned long n_pages = obj->base.size / PAGE_SIZE;
3748         unsigned int size = intel_rotation_info_size(rot_info);
3749         struct sgt_iter sgt_iter;
3750         dma_addr_t dma_addr;
3751         unsigned long i;
3752         dma_addr_t *page_addr_list;
3753         struct sg_table *st;
3754         struct scatterlist *sg;
3755         int ret = -ENOMEM;
3756
3757         /* Allocate a temporary list of source pages for random access. */
3758         page_addr_list = kvmalloc_array(n_pages,
3759                                         sizeof(dma_addr_t),
3760                                         GFP_KERNEL);
3761         if (!page_addr_list)
3762                 return ERR_PTR(ret);
3763
3764         /* Allocate target SG list. */
3765         st = kmalloc(sizeof(*st), GFP_KERNEL);
3766         if (!st)
3767                 goto err_st_alloc;
3768
3769         ret = sg_alloc_table(st, size, GFP_KERNEL);
3770         if (ret)
3771                 goto err_sg_alloc;
3772
3773         /* Populate source page list from the object. */
3774         i = 0;
3775         for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
3776                 page_addr_list[i++] = dma_addr;
3777
3778         GEM_BUG_ON(i != n_pages);
3779         st->nents = 0;
3780         sg = st->sgl;
3781
3782         for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3783                 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3784                                   rot_info->plane[i].width, rot_info->plane[i].height,
3785                                   rot_info->plane[i].stride, st, sg);
3786         }
3787
3788         kvfree(page_addr_list);
3789
3790         return st;
3791
3792 err_sg_alloc:
3793         kfree(st);
3794 err_st_alloc:
3795         kvfree(page_addr_list);
3796
3797         DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3798                          obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3799
3800         return ERR_PTR(ret);
3801 }
3802
3803 static noinline struct sg_table *
3804 intel_partial_pages(const struct i915_ggtt_view *view,
3805                     struct drm_i915_gem_object *obj)
3806 {
3807         struct sg_table *st;
3808         struct scatterlist *sg, *iter;
3809         unsigned int count = view->partial.size;
3810         unsigned int offset;
3811         int ret = -ENOMEM;
3812
3813         st = kmalloc(sizeof(*st), GFP_KERNEL);
3814         if (!st)
3815                 goto err_st_alloc;
3816
3817         ret = sg_alloc_table(st, count, GFP_KERNEL);
3818         if (ret)
3819                 goto err_sg_alloc;
3820
3821         iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3822         GEM_BUG_ON(!iter);
3823
3824         sg = st->sgl;
3825         st->nents = 0;
3826         do {
3827                 unsigned int len;
3828
3829                 len = min(iter->length - (offset << PAGE_SHIFT),
3830                           count << PAGE_SHIFT);
3831                 sg_set_page(sg, NULL, len, 0);
3832                 sg_dma_address(sg) =
3833                         sg_dma_address(iter) + (offset << PAGE_SHIFT);
3834                 sg_dma_len(sg) = len;
3835
3836                 st->nents++;
3837                 count -= len >> PAGE_SHIFT;
3838                 if (count == 0) {
3839                         sg_mark_end(sg);
3840                         return st;
3841                 }
3842
3843                 sg = __sg_next(sg);
3844                 iter = __sg_next(iter);
3845                 offset = 0;
3846         } while (1);
3847
3848 err_sg_alloc:
3849         kfree(st);
3850 err_st_alloc:
3851         return ERR_PTR(ret);
3852 }
3853
3854 static int
3855 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3856 {
3857         int ret;
3858
3859         /* The vma->pages are only valid within the lifespan of the borrowed
3860          * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3861          * must be the vma->pages. A simple rule is that vma->pages must only
3862          * be accessed when the obj->mm.pages are pinned.
3863          */
3864         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3865
3866         switch (vma->ggtt_view.type) {
3867         default:
3868                 GEM_BUG_ON(vma->ggtt_view.type);
3869                 /* fall through */
3870         case I915_GGTT_VIEW_NORMAL:
3871                 vma->pages = vma->obj->mm.pages;
3872                 return 0;
3873
3874         case I915_GGTT_VIEW_ROTATED:
3875                 vma->pages =
3876                         intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3877                 break;
3878
3879         case I915_GGTT_VIEW_PARTIAL:
3880                 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3881                 break;
3882         }
3883
3884         ret = 0;
3885         if (unlikely(IS_ERR(vma->pages))) {
3886                 ret = PTR_ERR(vma->pages);
3887                 vma->pages = NULL;
3888                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3889                           vma->ggtt_view.type, ret);
3890         }
3891         return ret;
3892 }
3893
3894 /**
3895  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
3896  * @vm: the &struct i915_address_space
3897  * @node: the &struct drm_mm_node (typically i915_vma.mode)
3898  * @size: how much space to allocate inside the GTT,
3899  *        must be #I915_GTT_PAGE_SIZE aligned
3900  * @offset: where to insert inside the GTT,
3901  *          must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
3902  *          (@offset + @size) must fit within the address space
3903  * @color: color to apply to node, if this node is not from a VMA,
3904  *         color must be #I915_COLOR_UNEVICTABLE
3905  * @flags: control search and eviction behaviour
3906  *
3907  * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
3908  * the address space (using @size and @color). If the @node does not fit, it
3909  * tries to evict any overlapping nodes from the GTT, including any
3910  * neighbouring nodes if the colors do not match (to ensure guard pages between
3911  * differing domains). See i915_gem_evict_for_node() for the gory details
3912  * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
3913  * evicting active overlapping objects, and any overlapping node that is pinned
3914  * or marked as unevictable will also result in failure.
3915  *
3916  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3917  * asked to wait for eviction and interrupted.
3918  */
3919 int i915_gem_gtt_reserve(struct i915_address_space *vm,
3920                          struct drm_mm_node *node,
3921                          u64 size, u64 offset, unsigned long color,
3922                          unsigned int flags)
3923 {
3924         int err;
3925
3926         GEM_BUG_ON(!size);
3927         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3928         GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3929         GEM_BUG_ON(range_overflows(offset, size, vm->total));
3930         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
3931         GEM_BUG_ON(drm_mm_node_allocated(node));
3932
3933         node->size = size;
3934         node->start = offset;
3935         node->color = color;
3936
3937         err = drm_mm_reserve_node(&vm->mm, node);
3938         if (err != -ENOSPC)
3939                 return err;
3940
3941         if (flags & PIN_NOEVICT)
3942                 return -ENOSPC;
3943
3944         err = i915_gem_evict_for_node(vm, node, flags);
3945         if (err == 0)
3946                 err = drm_mm_reserve_node(&vm->mm, node);
3947
3948         return err;
3949 }
3950
3951 static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
3952 {
3953         u64 range, addr;
3954
3955         GEM_BUG_ON(range_overflows(start, len, end));
3956         GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
3957
3958         range = round_down(end - len, align) - round_up(start, align);
3959         if (range) {
3960                 if (sizeof(unsigned long) == sizeof(u64)) {
3961                         addr = get_random_long();
3962                 } else {
3963                         addr = get_random_int();
3964                         if (range > U32_MAX) {
3965                                 addr <<= 32;
3966                                 addr |= get_random_int();
3967                         }
3968                 }
3969                 div64_u64_rem(addr, range, &addr);
3970                 start += addr;
3971         }
3972
3973         return round_up(start, align);
3974 }
3975
3976 /**
3977  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
3978  * @vm: the &struct i915_address_space
3979  * @node: the &struct drm_mm_node (typically i915_vma.node)
3980  * @size: how much space to allocate inside the GTT,
3981  *        must be #I915_GTT_PAGE_SIZE aligned
3982  * @alignment: required alignment of starting offset, may be 0 but
3983  *             if specified, this must be a power-of-two and at least
3984  *             #I915_GTT_MIN_ALIGNMENT
3985  * @color: color to apply to node
3986  * @start: start of any range restriction inside GTT (0 for all),
3987  *         must be #I915_GTT_PAGE_SIZE aligned
3988  * @end: end of any range restriction inside GTT (U64_MAX for all),
3989  *       must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
3990  * @flags: control search and eviction behaviour
3991  *
3992  * i915_gem_gtt_insert() first searches for an available hole into which
3993  * is can insert the node. The hole address is aligned to @alignment and
3994  * its @size must then fit entirely within the [@start, @end] bounds. The
3995  * nodes on either side of the hole must match @color, or else a guard page
3996  * will be inserted between the two nodes (or the node evicted). If no
3997  * suitable hole is found, first a victim is randomly selected and tested
3998  * for eviction, otherwise then the LRU list of objects within the GTT
3999  * is scanned to find the first set of replacement nodes to create the hole.
4000  * Those old overlapping nodes are evicted from the GTT (and so must be
4001  * rebound before any future use). Any node that is currently pinned cannot
4002  * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
4003  * active and #PIN_NONBLOCK is specified, that node is also skipped when
4004  * searching for an eviction candidate. See i915_gem_evict_something() for
4005  * the gory details on the eviction algorithm.
4006  *
4007  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
4008  * asked to wait for eviction and interrupted.
4009  */
4010 int i915_gem_gtt_insert(struct i915_address_space *vm,
4011                         struct drm_mm_node *node,
4012                         u64 size, u64 alignment, unsigned long color,
4013                         u64 start, u64 end, unsigned int flags)
4014 {
4015         enum drm_mm_insert_mode mode;
4016         u64 offset;
4017         int err;
4018
4019         lockdep_assert_held(&vm->i915->drm.struct_mutex);
4020         GEM_BUG_ON(!size);
4021         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
4022         GEM_BUG_ON(alignment && !is_power_of_2(alignment));
4023         GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
4024         GEM_BUG_ON(start >= end);
4025         GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
4026         GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
4027         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
4028         GEM_BUG_ON(drm_mm_node_allocated(node));
4029
4030         if (unlikely(range_overflows(start, size, end)))
4031                 return -ENOSPC;
4032
4033         if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
4034                 return -ENOSPC;
4035
4036         mode = DRM_MM_INSERT_BEST;
4037         if (flags & PIN_HIGH)
4038                 mode = DRM_MM_INSERT_HIGH;
4039         if (flags & PIN_MAPPABLE)
4040                 mode = DRM_MM_INSERT_LOW;
4041
4042         /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
4043          * so we know that we always have a minimum alignment of 4096.
4044          * The drm_mm range manager is optimised to return results
4045          * with zero alignment, so where possible use the optimal
4046          * path.
4047          */
4048         BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
4049         if (alignment <= I915_GTT_MIN_ALIGNMENT)
4050                 alignment = 0;
4051
4052         err = drm_mm_insert_node_in_range(&vm->mm, node,
4053                                           size, alignment, color,
4054                                           start, end, mode);
4055         if (err != -ENOSPC)
4056                 return err;
4057
4058         if (flags & PIN_NOEVICT)
4059                 return -ENOSPC;
4060
4061         /* No free space, pick a slot at random.
4062          *
4063          * There is a pathological case here using a GTT shared between
4064          * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
4065          *
4066          *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
4067          *         (64k objects)             (448k objects)
4068          *
4069          * Now imagine that the eviction LRU is ordered top-down (just because
4070          * pathology meets real life), and that we need to evict an object to
4071          * make room inside the aperture. The eviction scan then has to walk
4072          * the 448k list before it finds one within range. And now imagine that
4073          * it has to search for a new hole between every byte inside the memcpy,
4074          * for several simultaneous clients.
4075          *
4076          * On a full-ppgtt system, if we have run out of available space, there
4077          * will be lots and lots of objects in the eviction list! Again,
4078          * searching that LRU list may be slow if we are also applying any
4079          * range restrictions (e.g. restriction to low 4GiB) and so, for
4080          * simplicity and similarilty between different GTT, try the single
4081          * random replacement first.
4082          */
4083         offset = random_offset(start, end,
4084                                size, alignment ?: I915_GTT_MIN_ALIGNMENT);
4085         err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
4086         if (err != -ENOSPC)
4087                 return err;
4088
4089         /* Randomly selected placement is pinned, do a search */
4090         err = i915_gem_evict_something(vm, size, alignment, color,
4091                                        start, end, flags);
4092         if (err)
4093                 return err;
4094
4095         return drm_mm_insert_node_in_range(&vm->mm, node,
4096                                            size, alignment, color,
4097                                            start, end, DRM_MM_INSERT_EVICT);
4098 }
4099
4100 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4101 #include "selftests/mock_gtt.c"
4102 #include "selftests/i915_gem_gtt.c"
4103 #endif