Merge tag 'm68k-for-v4.9-tag1' of git://git.kernel.org/pub/scm/linux/kernel/git/geert...
[platform/kernel/linux-exynos.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/seq_file.h>
27 #include <linux/stop_machine.h>
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_vgpu.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34
35 /**
36  * DOC: Global GTT views
37  *
38  * Background and previous state
39  *
40  * Historically objects could exists (be bound) in global GTT space only as
41  * singular instances with a view representing all of the object's backing pages
42  * in a linear fashion. This view will be called a normal view.
43  *
44  * To support multiple views of the same object, where the number of mapped
45  * pages is not equal to the backing store, or where the layout of the pages
46  * is not linear, concept of a GGTT view was added.
47  *
48  * One example of an alternative view is a stereo display driven by a single
49  * image. In this case we would have a framebuffer looking like this
50  * (2x2 pages):
51  *
52  *    12
53  *    34
54  *
55  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
56  * rendering. In contrast, fed to the display engine would be an alternative
57  * view which could look something like this:
58  *
59  *   1212
60  *   3434
61  *
62  * In this example both the size and layout of pages in the alternative view is
63  * different from the normal view.
64  *
65  * Implementation and usage
66  *
67  * GGTT views are implemented using VMAs and are distinguished via enum
68  * i915_ggtt_view_type and struct i915_ggtt_view.
69  *
70  * A new flavour of core GEM functions which work with GGTT bound objects were
71  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
72  * renaming  in large amounts of code. They take the struct i915_ggtt_view
73  * parameter encapsulating all metadata required to implement a view.
74  *
75  * As a helper for callers which are only interested in the normal view,
76  * globally const i915_ggtt_view_normal singleton instance exists. All old core
77  * GEM API functions, the ones not taking the view parameter, are operating on,
78  * or with the normal GGTT view.
79  *
80  * Code wanting to add or use a new GGTT view needs to:
81  *
82  * 1. Add a new enum with a suitable name.
83  * 2. Extend the metadata in the i915_ggtt_view structure if required.
84  * 3. Add support to i915_get_vma_pages().
85  *
86  * New views are required to build a scatter-gather table from within the
87  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
88  * exists for the lifetime of an VMA.
89  *
90  * Core API is designed to have copy semantics which means that passed in
91  * struct i915_ggtt_view does not need to be persistent (left around after
92  * calling the core API functions).
93  *
94  */
95
96 static inline struct i915_ggtt *
97 i915_vm_to_ggtt(struct i915_address_space *vm)
98 {
99         GEM_BUG_ON(!i915_is_ggtt(vm));
100         return container_of(vm, struct i915_ggtt, base);
101 }
102
103 static int
104 i915_get_ggtt_vma_pages(struct i915_vma *vma);
105
106 const struct i915_ggtt_view i915_ggtt_view_normal = {
107         .type = I915_GGTT_VIEW_NORMAL,
108 };
109 const struct i915_ggtt_view i915_ggtt_view_rotated = {
110         .type = I915_GGTT_VIEW_ROTATED,
111 };
112
113 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
114                                 int enable_ppgtt)
115 {
116         bool has_aliasing_ppgtt;
117         bool has_full_ppgtt;
118         bool has_full_48bit_ppgtt;
119
120         has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6;
121         has_full_ppgtt = INTEL_GEN(dev_priv) >= 7;
122         has_full_48bit_ppgtt =
123                 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9;
124
125         if (intel_vgpu_active(dev_priv)) {
126                 /* emulation is too hard */
127                 has_full_ppgtt = false;
128                 has_full_48bit_ppgtt = false;
129         }
130
131         if (!has_aliasing_ppgtt)
132                 return 0;
133
134         /*
135          * We don't allow disabling PPGTT for gen9+ as it's a requirement for
136          * execlists, the sole mechanism available to submit work.
137          */
138         if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
139                 return 0;
140
141         if (enable_ppgtt == 1)
142                 return 1;
143
144         if (enable_ppgtt == 2 && has_full_ppgtt)
145                 return 2;
146
147         if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
148                 return 3;
149
150 #ifdef CONFIG_INTEL_IOMMU
151         /* Disable ppgtt on SNB if VT-d is on. */
152         if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
153                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
154                 return 0;
155         }
156 #endif
157
158         /* Early VLV doesn't have this */
159         if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
160                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
161                 return 0;
162         }
163
164         if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
165                 return has_full_48bit_ppgtt ? 3 : 2;
166         else
167                 return has_aliasing_ppgtt ? 1 : 0;
168 }
169
170 static int ppgtt_bind_vma(struct i915_vma *vma,
171                           enum i915_cache_level cache_level,
172                           u32 unused)
173 {
174         u32 pte_flags = 0;
175
176         /* Currently applicable only to VLV */
177         if (vma->obj->gt_ro)
178                 pte_flags |= PTE_READ_ONLY;
179
180         vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
181                                 cache_level, pte_flags);
182
183         return 0;
184 }
185
186 static void ppgtt_unbind_vma(struct i915_vma *vma)
187 {
188         vma->vm->clear_range(vma->vm,
189                              vma->node.start,
190                              vma->obj->base.size,
191                              true);
192 }
193
194 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
195                                   enum i915_cache_level level,
196                                   bool valid)
197 {
198         gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
199         pte |= addr;
200
201         switch (level) {
202         case I915_CACHE_NONE:
203                 pte |= PPAT_UNCACHED_INDEX;
204                 break;
205         case I915_CACHE_WT:
206                 pte |= PPAT_DISPLAY_ELLC_INDEX;
207                 break;
208         default:
209                 pte |= PPAT_CACHED_INDEX;
210                 break;
211         }
212
213         return pte;
214 }
215
216 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
217                                   const enum i915_cache_level level)
218 {
219         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
220         pde |= addr;
221         if (level != I915_CACHE_NONE)
222                 pde |= PPAT_CACHED_PDE_INDEX;
223         else
224                 pde |= PPAT_UNCACHED_INDEX;
225         return pde;
226 }
227
228 #define gen8_pdpe_encode gen8_pde_encode
229 #define gen8_pml4e_encode gen8_pde_encode
230
231 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
232                                  enum i915_cache_level level,
233                                  bool valid, u32 unused)
234 {
235         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
236         pte |= GEN6_PTE_ADDR_ENCODE(addr);
237
238         switch (level) {
239         case I915_CACHE_L3_LLC:
240         case I915_CACHE_LLC:
241                 pte |= GEN6_PTE_CACHE_LLC;
242                 break;
243         case I915_CACHE_NONE:
244                 pte |= GEN6_PTE_UNCACHED;
245                 break;
246         default:
247                 MISSING_CASE(level);
248         }
249
250         return pte;
251 }
252
253 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
254                                  enum i915_cache_level level,
255                                  bool valid, u32 unused)
256 {
257         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
258         pte |= GEN6_PTE_ADDR_ENCODE(addr);
259
260         switch (level) {
261         case I915_CACHE_L3_LLC:
262                 pte |= GEN7_PTE_CACHE_L3_LLC;
263                 break;
264         case I915_CACHE_LLC:
265                 pte |= GEN6_PTE_CACHE_LLC;
266                 break;
267         case I915_CACHE_NONE:
268                 pte |= GEN6_PTE_UNCACHED;
269                 break;
270         default:
271                 MISSING_CASE(level);
272         }
273
274         return pte;
275 }
276
277 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
278                                  enum i915_cache_level level,
279                                  bool valid, u32 flags)
280 {
281         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
282         pte |= GEN6_PTE_ADDR_ENCODE(addr);
283
284         if (!(flags & PTE_READ_ONLY))
285                 pte |= BYT_PTE_WRITEABLE;
286
287         if (level != I915_CACHE_NONE)
288                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
289
290         return pte;
291 }
292
293 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
294                                  enum i915_cache_level level,
295                                  bool valid, u32 unused)
296 {
297         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
298         pte |= HSW_PTE_ADDR_ENCODE(addr);
299
300         if (level != I915_CACHE_NONE)
301                 pte |= HSW_WB_LLC_AGE3;
302
303         return pte;
304 }
305
306 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
307                                   enum i915_cache_level level,
308                                   bool valid, u32 unused)
309 {
310         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
311         pte |= HSW_PTE_ADDR_ENCODE(addr);
312
313         switch (level) {
314         case I915_CACHE_NONE:
315                 break;
316         case I915_CACHE_WT:
317                 pte |= HSW_WT_ELLC_LLC_AGE3;
318                 break;
319         default:
320                 pte |= HSW_WB_ELLC_LLC_AGE3;
321                 break;
322         }
323
324         return pte;
325 }
326
327 static int __setup_page_dma(struct drm_device *dev,
328                             struct i915_page_dma *p, gfp_t flags)
329 {
330         struct device *device = &dev->pdev->dev;
331
332         p->page = alloc_page(flags);
333         if (!p->page)
334                 return -ENOMEM;
335
336         p->daddr = dma_map_page(device,
337                                 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
338
339         if (dma_mapping_error(device, p->daddr)) {
340                 __free_page(p->page);
341                 return -EINVAL;
342         }
343
344         return 0;
345 }
346
347 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
348 {
349         return __setup_page_dma(dev, p, GFP_KERNEL);
350 }
351
352 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
353 {
354         if (WARN_ON(!p->page))
355                 return;
356
357         dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
358         __free_page(p->page);
359         memset(p, 0, sizeof(*p));
360 }
361
362 static void *kmap_page_dma(struct i915_page_dma *p)
363 {
364         return kmap_atomic(p->page);
365 }
366
367 /* We use the flushing unmap only with ppgtt structures:
368  * page directories, page tables and scratch pages.
369  */
370 static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
371 {
372         /* There are only few exceptions for gen >=6. chv and bxt.
373          * And we are not sure about the latter so play safe for now.
374          */
375         if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
376                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
377
378         kunmap_atomic(vaddr);
379 }
380
381 #define kmap_px(px) kmap_page_dma(px_base(px))
382 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
383
384 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
385 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
386 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
387 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
388
389 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
390                           const uint64_t val)
391 {
392         int i;
393         uint64_t * const vaddr = kmap_page_dma(p);
394
395         for (i = 0; i < 512; i++)
396                 vaddr[i] = val;
397
398         kunmap_page_dma(dev, vaddr);
399 }
400
401 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
402                              const uint32_t val32)
403 {
404         uint64_t v = val32;
405
406         v = v << 32 | val32;
407
408         fill_page_dma(dev, p, v);
409 }
410
411 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
412 {
413         struct i915_page_scratch *sp;
414         int ret;
415
416         sp = kzalloc(sizeof(*sp), GFP_KERNEL);
417         if (sp == NULL)
418                 return ERR_PTR(-ENOMEM);
419
420         ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
421         if (ret) {
422                 kfree(sp);
423                 return ERR_PTR(ret);
424         }
425
426         set_pages_uc(px_page(sp), 1);
427
428         return sp;
429 }
430
431 static void free_scratch_page(struct drm_device *dev,
432                               struct i915_page_scratch *sp)
433 {
434         set_pages_wb(px_page(sp), 1);
435
436         cleanup_px(dev, sp);
437         kfree(sp);
438 }
439
440 static struct i915_page_table *alloc_pt(struct drm_device *dev)
441 {
442         struct i915_page_table *pt;
443         const size_t count = INTEL_INFO(dev)->gen >= 8 ?
444                 GEN8_PTES : GEN6_PTES;
445         int ret = -ENOMEM;
446
447         pt = kzalloc(sizeof(*pt), GFP_KERNEL);
448         if (!pt)
449                 return ERR_PTR(-ENOMEM);
450
451         pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
452                                 GFP_KERNEL);
453
454         if (!pt->used_ptes)
455                 goto fail_bitmap;
456
457         ret = setup_px(dev, pt);
458         if (ret)
459                 goto fail_page_m;
460
461         return pt;
462
463 fail_page_m:
464         kfree(pt->used_ptes);
465 fail_bitmap:
466         kfree(pt);
467
468         return ERR_PTR(ret);
469 }
470
471 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
472 {
473         cleanup_px(dev, pt);
474         kfree(pt->used_ptes);
475         kfree(pt);
476 }
477
478 static void gen8_initialize_pt(struct i915_address_space *vm,
479                                struct i915_page_table *pt)
480 {
481         gen8_pte_t scratch_pte;
482
483         scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
484                                       I915_CACHE_LLC, true);
485
486         fill_px(vm->dev, pt, scratch_pte);
487 }
488
489 static void gen6_initialize_pt(struct i915_address_space *vm,
490                                struct i915_page_table *pt)
491 {
492         gen6_pte_t scratch_pte;
493
494         WARN_ON(px_dma(vm->scratch_page) == 0);
495
496         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
497                                      I915_CACHE_LLC, true, 0);
498
499         fill32_px(vm->dev, pt, scratch_pte);
500 }
501
502 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
503 {
504         struct i915_page_directory *pd;
505         int ret = -ENOMEM;
506
507         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
508         if (!pd)
509                 return ERR_PTR(-ENOMEM);
510
511         pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
512                                 sizeof(*pd->used_pdes), GFP_KERNEL);
513         if (!pd->used_pdes)
514                 goto fail_bitmap;
515
516         ret = setup_px(dev, pd);
517         if (ret)
518                 goto fail_page_m;
519
520         return pd;
521
522 fail_page_m:
523         kfree(pd->used_pdes);
524 fail_bitmap:
525         kfree(pd);
526
527         return ERR_PTR(ret);
528 }
529
530 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
531 {
532         if (px_page(pd)) {
533                 cleanup_px(dev, pd);
534                 kfree(pd->used_pdes);
535                 kfree(pd);
536         }
537 }
538
539 static void gen8_initialize_pd(struct i915_address_space *vm,
540                                struct i915_page_directory *pd)
541 {
542         gen8_pde_t scratch_pde;
543
544         scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
545
546         fill_px(vm->dev, pd, scratch_pde);
547 }
548
549 static int __pdp_init(struct drm_device *dev,
550                       struct i915_page_directory_pointer *pdp)
551 {
552         size_t pdpes = I915_PDPES_PER_PDP(dev);
553
554         pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
555                                   sizeof(unsigned long),
556                                   GFP_KERNEL);
557         if (!pdp->used_pdpes)
558                 return -ENOMEM;
559
560         pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
561                                       GFP_KERNEL);
562         if (!pdp->page_directory) {
563                 kfree(pdp->used_pdpes);
564                 /* the PDP might be the statically allocated top level. Keep it
565                  * as clean as possible */
566                 pdp->used_pdpes = NULL;
567                 return -ENOMEM;
568         }
569
570         return 0;
571 }
572
573 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
574 {
575         kfree(pdp->used_pdpes);
576         kfree(pdp->page_directory);
577         pdp->page_directory = NULL;
578 }
579
580 static struct
581 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
582 {
583         struct i915_page_directory_pointer *pdp;
584         int ret = -ENOMEM;
585
586         WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
587
588         pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
589         if (!pdp)
590                 return ERR_PTR(-ENOMEM);
591
592         ret = __pdp_init(dev, pdp);
593         if (ret)
594                 goto fail_bitmap;
595
596         ret = setup_px(dev, pdp);
597         if (ret)
598                 goto fail_page_m;
599
600         return pdp;
601
602 fail_page_m:
603         __pdp_fini(pdp);
604 fail_bitmap:
605         kfree(pdp);
606
607         return ERR_PTR(ret);
608 }
609
610 static void free_pdp(struct drm_device *dev,
611                      struct i915_page_directory_pointer *pdp)
612 {
613         __pdp_fini(pdp);
614         if (USES_FULL_48BIT_PPGTT(dev)) {
615                 cleanup_px(dev, pdp);
616                 kfree(pdp);
617         }
618 }
619
620 static void gen8_initialize_pdp(struct i915_address_space *vm,
621                                 struct i915_page_directory_pointer *pdp)
622 {
623         gen8_ppgtt_pdpe_t scratch_pdpe;
624
625         scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
626
627         fill_px(vm->dev, pdp, scratch_pdpe);
628 }
629
630 static void gen8_initialize_pml4(struct i915_address_space *vm,
631                                  struct i915_pml4 *pml4)
632 {
633         gen8_ppgtt_pml4e_t scratch_pml4e;
634
635         scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
636                                           I915_CACHE_LLC);
637
638         fill_px(vm->dev, pml4, scratch_pml4e);
639 }
640
641 static void
642 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
643                           struct i915_page_directory_pointer *pdp,
644                           struct i915_page_directory *pd,
645                           int index)
646 {
647         gen8_ppgtt_pdpe_t *page_directorypo;
648
649         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
650                 return;
651
652         page_directorypo = kmap_px(pdp);
653         page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
654         kunmap_px(ppgtt, page_directorypo);
655 }
656
657 static void
658 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
659                                   struct i915_pml4 *pml4,
660                                   struct i915_page_directory_pointer *pdp,
661                                   int index)
662 {
663         gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
664
665         WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
666         pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
667         kunmap_px(ppgtt, pagemap);
668 }
669
670 /* Broadwell Page Directory Pointer Descriptors */
671 static int gen8_write_pdp(struct drm_i915_gem_request *req,
672                           unsigned entry,
673                           dma_addr_t addr)
674 {
675         struct intel_engine_cs *engine = req->engine;
676         int ret;
677
678         BUG_ON(entry >= 4);
679
680         ret = intel_ring_begin(req, 6);
681         if (ret)
682                 return ret;
683
684         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
685         intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry));
686         intel_ring_emit(engine, upper_32_bits(addr));
687         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
688         intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry));
689         intel_ring_emit(engine, lower_32_bits(addr));
690         intel_ring_advance(engine);
691
692         return 0;
693 }
694
695 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
696                                  struct drm_i915_gem_request *req)
697 {
698         int i, ret;
699
700         for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
701                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
702
703                 ret = gen8_write_pdp(req, i, pd_daddr);
704                 if (ret)
705                         return ret;
706         }
707
708         return 0;
709 }
710
711 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
712                               struct drm_i915_gem_request *req)
713 {
714         return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
715 }
716
717 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
718                                        struct i915_page_directory_pointer *pdp,
719                                        uint64_t start,
720                                        uint64_t length,
721                                        gen8_pte_t scratch_pte)
722 {
723         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
724         gen8_pte_t *pt_vaddr;
725         unsigned pdpe = gen8_pdpe_index(start);
726         unsigned pde = gen8_pde_index(start);
727         unsigned pte = gen8_pte_index(start);
728         unsigned num_entries = length >> PAGE_SHIFT;
729         unsigned last_pte, i;
730
731         if (WARN_ON(!pdp))
732                 return;
733
734         while (num_entries) {
735                 struct i915_page_directory *pd;
736                 struct i915_page_table *pt;
737
738                 if (WARN_ON(!pdp->page_directory[pdpe]))
739                         break;
740
741                 pd = pdp->page_directory[pdpe];
742
743                 if (WARN_ON(!pd->page_table[pde]))
744                         break;
745
746                 pt = pd->page_table[pde];
747
748                 if (WARN_ON(!px_page(pt)))
749                         break;
750
751                 last_pte = pte + num_entries;
752                 if (last_pte > GEN8_PTES)
753                         last_pte = GEN8_PTES;
754
755                 pt_vaddr = kmap_px(pt);
756
757                 for (i = pte; i < last_pte; i++) {
758                         pt_vaddr[i] = scratch_pte;
759                         num_entries--;
760                 }
761
762                 kunmap_px(ppgtt, pt_vaddr);
763
764                 pte = 0;
765                 if (++pde == I915_PDES) {
766                         if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
767                                 break;
768                         pde = 0;
769                 }
770         }
771 }
772
773 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
774                                    uint64_t start,
775                                    uint64_t length,
776                                    bool use_scratch)
777 {
778         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
779         gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
780                                                  I915_CACHE_LLC, use_scratch);
781
782         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
783                 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
784                                            scratch_pte);
785         } else {
786                 uint64_t pml4e;
787                 struct i915_page_directory_pointer *pdp;
788
789                 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
790                         gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
791                                                    scratch_pte);
792                 }
793         }
794 }
795
796 static void
797 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
798                               struct i915_page_directory_pointer *pdp,
799                               struct sg_page_iter *sg_iter,
800                               uint64_t start,
801                               enum i915_cache_level cache_level)
802 {
803         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
804         gen8_pte_t *pt_vaddr;
805         unsigned pdpe = gen8_pdpe_index(start);
806         unsigned pde = gen8_pde_index(start);
807         unsigned pte = gen8_pte_index(start);
808
809         pt_vaddr = NULL;
810
811         while (__sg_page_iter_next(sg_iter)) {
812                 if (pt_vaddr == NULL) {
813                         struct i915_page_directory *pd = pdp->page_directory[pdpe];
814                         struct i915_page_table *pt = pd->page_table[pde];
815                         pt_vaddr = kmap_px(pt);
816                 }
817
818                 pt_vaddr[pte] =
819                         gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
820                                         cache_level, true);
821                 if (++pte == GEN8_PTES) {
822                         kunmap_px(ppgtt, pt_vaddr);
823                         pt_vaddr = NULL;
824                         if (++pde == I915_PDES) {
825                                 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
826                                         break;
827                                 pde = 0;
828                         }
829                         pte = 0;
830                 }
831         }
832
833         if (pt_vaddr)
834                 kunmap_px(ppgtt, pt_vaddr);
835 }
836
837 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
838                                       struct sg_table *pages,
839                                       uint64_t start,
840                                       enum i915_cache_level cache_level,
841                                       u32 unused)
842 {
843         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
844         struct sg_page_iter sg_iter;
845
846         __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
847
848         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
849                 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
850                                               cache_level);
851         } else {
852                 struct i915_page_directory_pointer *pdp;
853                 uint64_t pml4e;
854                 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
855
856                 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
857                         gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
858                                                       start, cache_level);
859                 }
860         }
861 }
862
863 static void gen8_free_page_tables(struct drm_device *dev,
864                                   struct i915_page_directory *pd)
865 {
866         int i;
867
868         if (!px_page(pd))
869                 return;
870
871         for_each_set_bit(i, pd->used_pdes, I915_PDES) {
872                 if (WARN_ON(!pd->page_table[i]))
873                         continue;
874
875                 free_pt(dev, pd->page_table[i]);
876                 pd->page_table[i] = NULL;
877         }
878 }
879
880 static int gen8_init_scratch(struct i915_address_space *vm)
881 {
882         struct drm_device *dev = vm->dev;
883         int ret;
884
885         vm->scratch_page = alloc_scratch_page(dev);
886         if (IS_ERR(vm->scratch_page))
887                 return PTR_ERR(vm->scratch_page);
888
889         vm->scratch_pt = alloc_pt(dev);
890         if (IS_ERR(vm->scratch_pt)) {
891                 ret = PTR_ERR(vm->scratch_pt);
892                 goto free_scratch_page;
893         }
894
895         vm->scratch_pd = alloc_pd(dev);
896         if (IS_ERR(vm->scratch_pd)) {
897                 ret = PTR_ERR(vm->scratch_pd);
898                 goto free_pt;
899         }
900
901         if (USES_FULL_48BIT_PPGTT(dev)) {
902                 vm->scratch_pdp = alloc_pdp(dev);
903                 if (IS_ERR(vm->scratch_pdp)) {
904                         ret = PTR_ERR(vm->scratch_pdp);
905                         goto free_pd;
906                 }
907         }
908
909         gen8_initialize_pt(vm, vm->scratch_pt);
910         gen8_initialize_pd(vm, vm->scratch_pd);
911         if (USES_FULL_48BIT_PPGTT(dev))
912                 gen8_initialize_pdp(vm, vm->scratch_pdp);
913
914         return 0;
915
916 free_pd:
917         free_pd(dev, vm->scratch_pd);
918 free_pt:
919         free_pt(dev, vm->scratch_pt);
920 free_scratch_page:
921         free_scratch_page(dev, vm->scratch_page);
922
923         return ret;
924 }
925
926 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
927 {
928         enum vgt_g2v_type msg;
929         struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
930         int i;
931
932         if (USES_FULL_48BIT_PPGTT(dev_priv)) {
933                 u64 daddr = px_dma(&ppgtt->pml4);
934
935                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
936                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
937
938                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
939                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
940         } else {
941                 for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
942                         u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
943
944                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
945                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
946                 }
947
948                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
949                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
950         }
951
952         I915_WRITE(vgtif_reg(g2v_notify), msg);
953
954         return 0;
955 }
956
957 static void gen8_free_scratch(struct i915_address_space *vm)
958 {
959         struct drm_device *dev = vm->dev;
960
961         if (USES_FULL_48BIT_PPGTT(dev))
962                 free_pdp(dev, vm->scratch_pdp);
963         free_pd(dev, vm->scratch_pd);
964         free_pt(dev, vm->scratch_pt);
965         free_scratch_page(dev, vm->scratch_page);
966 }
967
968 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
969                                     struct i915_page_directory_pointer *pdp)
970 {
971         int i;
972
973         for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
974                 if (WARN_ON(!pdp->page_directory[i]))
975                         continue;
976
977                 gen8_free_page_tables(dev, pdp->page_directory[i]);
978                 free_pd(dev, pdp->page_directory[i]);
979         }
980
981         free_pdp(dev, pdp);
982 }
983
984 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
985 {
986         int i;
987
988         for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
989                 if (WARN_ON(!ppgtt->pml4.pdps[i]))
990                         continue;
991
992                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
993         }
994
995         cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
996 }
997
998 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
999 {
1000         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1001
1002         if (intel_vgpu_active(to_i915(vm->dev)))
1003                 gen8_ppgtt_notify_vgt(ppgtt, false);
1004
1005         if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
1006                 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
1007         else
1008                 gen8_ppgtt_cleanup_4lvl(ppgtt);
1009
1010         gen8_free_scratch(vm);
1011 }
1012
1013 /**
1014  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1015  * @vm: Master vm structure.
1016  * @pd: Page directory for this address range.
1017  * @start:      Starting virtual address to begin allocations.
1018  * @length:     Size of the allocations.
1019  * @new_pts:    Bitmap set by function with new allocations. Likely used by the
1020  *              caller to free on error.
1021  *
1022  * Allocate the required number of page tables. Extremely similar to
1023  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1024  * the page directory boundary (instead of the page directory pointer). That
1025  * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1026  * possible, and likely that the caller will need to use multiple calls of this
1027  * function to achieve the appropriate allocation.
1028  *
1029  * Return: 0 if success; negative error code otherwise.
1030  */
1031 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1032                                      struct i915_page_directory *pd,
1033                                      uint64_t start,
1034                                      uint64_t length,
1035                                      unsigned long *new_pts)
1036 {
1037         struct drm_device *dev = vm->dev;
1038         struct i915_page_table *pt;
1039         uint32_t pde;
1040
1041         gen8_for_each_pde(pt, pd, start, length, pde) {
1042                 /* Don't reallocate page tables */
1043                 if (test_bit(pde, pd->used_pdes)) {
1044                         /* Scratch is never allocated this way */
1045                         WARN_ON(pt == vm->scratch_pt);
1046                         continue;
1047                 }
1048
1049                 pt = alloc_pt(dev);
1050                 if (IS_ERR(pt))
1051                         goto unwind_out;
1052
1053                 gen8_initialize_pt(vm, pt);
1054                 pd->page_table[pde] = pt;
1055                 __set_bit(pde, new_pts);
1056                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1057         }
1058
1059         return 0;
1060
1061 unwind_out:
1062         for_each_set_bit(pde, new_pts, I915_PDES)
1063                 free_pt(dev, pd->page_table[pde]);
1064
1065         return -ENOMEM;
1066 }
1067
1068 /**
1069  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1070  * @vm: Master vm structure.
1071  * @pdp:        Page directory pointer for this address range.
1072  * @start:      Starting virtual address to begin allocations.
1073  * @length:     Size of the allocations.
1074  * @new_pds:    Bitmap set by function with new allocations. Likely used by the
1075  *              caller to free on error.
1076  *
1077  * Allocate the required number of page directories starting at the pde index of
1078  * @start, and ending at the pde index @start + @length. This function will skip
1079  * over already allocated page directories within the range, and only allocate
1080  * new ones, setting the appropriate pointer within the pdp as well as the
1081  * correct position in the bitmap @new_pds.
1082  *
1083  * The function will only allocate the pages within the range for a give page
1084  * directory pointer. In other words, if @start + @length straddles a virtually
1085  * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1086  * required by the caller, This is not currently possible, and the BUG in the
1087  * code will prevent it.
1088  *
1089  * Return: 0 if success; negative error code otherwise.
1090  */
1091 static int
1092 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1093                                   struct i915_page_directory_pointer *pdp,
1094                                   uint64_t start,
1095                                   uint64_t length,
1096                                   unsigned long *new_pds)
1097 {
1098         struct drm_device *dev = vm->dev;
1099         struct i915_page_directory *pd;
1100         uint32_t pdpe;
1101         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1102
1103         WARN_ON(!bitmap_empty(new_pds, pdpes));
1104
1105         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1106                 if (test_bit(pdpe, pdp->used_pdpes))
1107                         continue;
1108
1109                 pd = alloc_pd(dev);
1110                 if (IS_ERR(pd))
1111                         goto unwind_out;
1112
1113                 gen8_initialize_pd(vm, pd);
1114                 pdp->page_directory[pdpe] = pd;
1115                 __set_bit(pdpe, new_pds);
1116                 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1117         }
1118
1119         return 0;
1120
1121 unwind_out:
1122         for_each_set_bit(pdpe, new_pds, pdpes)
1123                 free_pd(dev, pdp->page_directory[pdpe]);
1124
1125         return -ENOMEM;
1126 }
1127
1128 /**
1129  * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1130  * @vm: Master vm structure.
1131  * @pml4:       Page map level 4 for this address range.
1132  * @start:      Starting virtual address to begin allocations.
1133  * @length:     Size of the allocations.
1134  * @new_pdps:   Bitmap set by function with new allocations. Likely used by the
1135  *              caller to free on error.
1136  *
1137  * Allocate the required number of page directory pointers. Extremely similar to
1138  * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1139  * The main difference is here we are limited by the pml4 boundary (instead of
1140  * the page directory pointer).
1141  *
1142  * Return: 0 if success; negative error code otherwise.
1143  */
1144 static int
1145 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1146                                   struct i915_pml4 *pml4,
1147                                   uint64_t start,
1148                                   uint64_t length,
1149                                   unsigned long *new_pdps)
1150 {
1151         struct drm_device *dev = vm->dev;
1152         struct i915_page_directory_pointer *pdp;
1153         uint32_t pml4e;
1154
1155         WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1156
1157         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1158                 if (!test_bit(pml4e, pml4->used_pml4es)) {
1159                         pdp = alloc_pdp(dev);
1160                         if (IS_ERR(pdp))
1161                                 goto unwind_out;
1162
1163                         gen8_initialize_pdp(vm, pdp);
1164                         pml4->pdps[pml4e] = pdp;
1165                         __set_bit(pml4e, new_pdps);
1166                         trace_i915_page_directory_pointer_entry_alloc(vm,
1167                                                                       pml4e,
1168                                                                       start,
1169                                                                       GEN8_PML4E_SHIFT);
1170                 }
1171         }
1172
1173         return 0;
1174
1175 unwind_out:
1176         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1177                 free_pdp(dev, pml4->pdps[pml4e]);
1178
1179         return -ENOMEM;
1180 }
1181
1182 static void
1183 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1184 {
1185         kfree(new_pts);
1186         kfree(new_pds);
1187 }
1188
1189 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1190  * of these are based on the number of PDPEs in the system.
1191  */
1192 static
1193 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1194                                          unsigned long **new_pts,
1195                                          uint32_t pdpes)
1196 {
1197         unsigned long *pds;
1198         unsigned long *pts;
1199
1200         pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1201         if (!pds)
1202                 return -ENOMEM;
1203
1204         pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1205                       GFP_TEMPORARY);
1206         if (!pts)
1207                 goto err_out;
1208
1209         *new_pds = pds;
1210         *new_pts = pts;
1211
1212         return 0;
1213
1214 err_out:
1215         free_gen8_temp_bitmaps(pds, pts);
1216         return -ENOMEM;
1217 }
1218
1219 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1220  * the page table structures, we mark them dirty so that
1221  * context switching/execlist queuing code takes extra steps
1222  * to ensure that tlbs are flushed.
1223  */
1224 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1225 {
1226         ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1227 }
1228
1229 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1230                                     struct i915_page_directory_pointer *pdp,
1231                                     uint64_t start,
1232                                     uint64_t length)
1233 {
1234         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1235         unsigned long *new_page_dirs, *new_page_tables;
1236         struct drm_device *dev = vm->dev;
1237         struct i915_page_directory *pd;
1238         const uint64_t orig_start = start;
1239         const uint64_t orig_length = length;
1240         uint32_t pdpe;
1241         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1242         int ret;
1243
1244         /* Wrap is never okay since we can only represent 48b, and we don't
1245          * actually use the other side of the canonical address space.
1246          */
1247         if (WARN_ON(start + length < start))
1248                 return -ENODEV;
1249
1250         if (WARN_ON(start + length > vm->total))
1251                 return -ENODEV;
1252
1253         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1254         if (ret)
1255                 return ret;
1256
1257         /* Do the allocations first so we can easily bail out */
1258         ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1259                                                 new_page_dirs);
1260         if (ret) {
1261                 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1262                 return ret;
1263         }
1264
1265         /* For every page directory referenced, allocate page tables */
1266         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1267                 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1268                                                 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1269                 if (ret)
1270                         goto err_out;
1271         }
1272
1273         start = orig_start;
1274         length = orig_length;
1275
1276         /* Allocations have completed successfully, so set the bitmaps, and do
1277          * the mappings. */
1278         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1279                 gen8_pde_t *const page_directory = kmap_px(pd);
1280                 struct i915_page_table *pt;
1281                 uint64_t pd_len = length;
1282                 uint64_t pd_start = start;
1283                 uint32_t pde;
1284
1285                 /* Every pd should be allocated, we just did that above. */
1286                 WARN_ON(!pd);
1287
1288                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1289                         /* Same reasoning as pd */
1290                         WARN_ON(!pt);
1291                         WARN_ON(!pd_len);
1292                         WARN_ON(!gen8_pte_count(pd_start, pd_len));
1293
1294                         /* Set our used ptes within the page table */
1295                         bitmap_set(pt->used_ptes,
1296                                    gen8_pte_index(pd_start),
1297                                    gen8_pte_count(pd_start, pd_len));
1298
1299                         /* Our pde is now pointing to the pagetable, pt */
1300                         __set_bit(pde, pd->used_pdes);
1301
1302                         /* Map the PDE to the page table */
1303                         page_directory[pde] = gen8_pde_encode(px_dma(pt),
1304                                                               I915_CACHE_LLC);
1305                         trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1306                                                         gen8_pte_index(start),
1307                                                         gen8_pte_count(start, length),
1308                                                         GEN8_PTES);
1309
1310                         /* NB: We haven't yet mapped ptes to pages. At this
1311                          * point we're still relying on insert_entries() */
1312                 }
1313
1314                 kunmap_px(ppgtt, page_directory);
1315                 __set_bit(pdpe, pdp->used_pdpes);
1316                 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1317         }
1318
1319         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1320         mark_tlbs_dirty(ppgtt);
1321         return 0;
1322
1323 err_out:
1324         while (pdpe--) {
1325                 unsigned long temp;
1326
1327                 for_each_set_bit(temp, new_page_tables + pdpe *
1328                                 BITS_TO_LONGS(I915_PDES), I915_PDES)
1329                         free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1330         }
1331
1332         for_each_set_bit(pdpe, new_page_dirs, pdpes)
1333                 free_pd(dev, pdp->page_directory[pdpe]);
1334
1335         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1336         mark_tlbs_dirty(ppgtt);
1337         return ret;
1338 }
1339
1340 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1341                                     struct i915_pml4 *pml4,
1342                                     uint64_t start,
1343                                     uint64_t length)
1344 {
1345         DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1346         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1347         struct i915_page_directory_pointer *pdp;
1348         uint64_t pml4e;
1349         int ret = 0;
1350
1351         /* Do the pml4 allocations first, so we don't need to track the newly
1352          * allocated tables below the pdp */
1353         bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1354
1355         /* The pagedirectory and pagetable allocations are done in the shared 3
1356          * and 4 level code. Just allocate the pdps.
1357          */
1358         ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1359                                                 new_pdps);
1360         if (ret)
1361                 return ret;
1362
1363         WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1364              "The allocation has spanned more than 512GB. "
1365              "It is highly likely this is incorrect.");
1366
1367         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1368                 WARN_ON(!pdp);
1369
1370                 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1371                 if (ret)
1372                         goto err_out;
1373
1374                 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1375         }
1376
1377         bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1378                   GEN8_PML4ES_PER_PML4);
1379
1380         return 0;
1381
1382 err_out:
1383         for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1384                 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1385
1386         return ret;
1387 }
1388
1389 static int gen8_alloc_va_range(struct i915_address_space *vm,
1390                                uint64_t start, uint64_t length)
1391 {
1392         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1393
1394         if (USES_FULL_48BIT_PPGTT(vm->dev))
1395                 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1396         else
1397                 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1398 }
1399
1400 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1401                           uint64_t start, uint64_t length,
1402                           gen8_pte_t scratch_pte,
1403                           struct seq_file *m)
1404 {
1405         struct i915_page_directory *pd;
1406         uint32_t pdpe;
1407
1408         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1409                 struct i915_page_table *pt;
1410                 uint64_t pd_len = length;
1411                 uint64_t pd_start = start;
1412                 uint32_t pde;
1413
1414                 if (!test_bit(pdpe, pdp->used_pdpes))
1415                         continue;
1416
1417                 seq_printf(m, "\tPDPE #%d\n", pdpe);
1418                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1419                         uint32_t  pte;
1420                         gen8_pte_t *pt_vaddr;
1421
1422                         if (!test_bit(pde, pd->used_pdes))
1423                                 continue;
1424
1425                         pt_vaddr = kmap_px(pt);
1426                         for (pte = 0; pte < GEN8_PTES; pte += 4) {
1427                                 uint64_t va =
1428                                         (pdpe << GEN8_PDPE_SHIFT) |
1429                                         (pde << GEN8_PDE_SHIFT) |
1430                                         (pte << GEN8_PTE_SHIFT);
1431                                 int i;
1432                                 bool found = false;
1433
1434                                 for (i = 0; i < 4; i++)
1435                                         if (pt_vaddr[pte + i] != scratch_pte)
1436                                                 found = true;
1437                                 if (!found)
1438                                         continue;
1439
1440                                 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1441                                 for (i = 0; i < 4; i++) {
1442                                         if (pt_vaddr[pte + i] != scratch_pte)
1443                                                 seq_printf(m, " %llx", pt_vaddr[pte + i]);
1444                                         else
1445                                                 seq_puts(m, "  SCRATCH ");
1446                                 }
1447                                 seq_puts(m, "\n");
1448                         }
1449                         /* don't use kunmap_px, it could trigger
1450                          * an unnecessary flush.
1451                          */
1452                         kunmap_atomic(pt_vaddr);
1453                 }
1454         }
1455 }
1456
1457 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1458 {
1459         struct i915_address_space *vm = &ppgtt->base;
1460         uint64_t start = ppgtt->base.start;
1461         uint64_t length = ppgtt->base.total;
1462         gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1463                                                  I915_CACHE_LLC, true);
1464
1465         if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1466                 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1467         } else {
1468                 uint64_t pml4e;
1469                 struct i915_pml4 *pml4 = &ppgtt->pml4;
1470                 struct i915_page_directory_pointer *pdp;
1471
1472                 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1473                         if (!test_bit(pml4e, pml4->used_pml4es))
1474                                 continue;
1475
1476                         seq_printf(m, "    PML4E #%llu\n", pml4e);
1477                         gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1478                 }
1479         }
1480 }
1481
1482 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1483 {
1484         unsigned long *new_page_dirs, *new_page_tables;
1485         uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1486         int ret;
1487
1488         /* We allocate temp bitmap for page tables for no gain
1489          * but as this is for init only, lets keep the things simple
1490          */
1491         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1492         if (ret)
1493                 return ret;
1494
1495         /* Allocate for all pdps regardless of how the ppgtt
1496          * was defined.
1497          */
1498         ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1499                                                 0, 1ULL << 32,
1500                                                 new_page_dirs);
1501         if (!ret)
1502                 *ppgtt->pdp.used_pdpes = *new_page_dirs;
1503
1504         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1505
1506         return ret;
1507 }
1508
1509 /*
1510  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1511  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1512  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1513  * space.
1514  *
1515  */
1516 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1517 {
1518         int ret;
1519
1520         ret = gen8_init_scratch(&ppgtt->base);
1521         if (ret)
1522                 return ret;
1523
1524         ppgtt->base.start = 0;
1525         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1526         ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1527         ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1528         ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1529         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1530         ppgtt->base.bind_vma = ppgtt_bind_vma;
1531         ppgtt->debug_dump = gen8_dump_ppgtt;
1532
1533         if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1534                 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1535                 if (ret)
1536                         goto free_scratch;
1537
1538                 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1539
1540                 ppgtt->base.total = 1ULL << 48;
1541                 ppgtt->switch_mm = gen8_48b_mm_switch;
1542         } else {
1543                 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1544                 if (ret)
1545                         goto free_scratch;
1546
1547                 ppgtt->base.total = 1ULL << 32;
1548                 ppgtt->switch_mm = gen8_legacy_mm_switch;
1549                 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1550                                                               0, 0,
1551                                                               GEN8_PML4E_SHIFT);
1552
1553                 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) {
1554                         ret = gen8_preallocate_top_level_pdps(ppgtt);
1555                         if (ret)
1556                                 goto free_scratch;
1557                 }
1558         }
1559
1560         if (intel_vgpu_active(to_i915(ppgtt->base.dev)))
1561                 gen8_ppgtt_notify_vgt(ppgtt, true);
1562
1563         return 0;
1564
1565 free_scratch:
1566         gen8_free_scratch(&ppgtt->base);
1567         return ret;
1568 }
1569
1570 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1571 {
1572         struct i915_address_space *vm = &ppgtt->base;
1573         struct i915_page_table *unused;
1574         gen6_pte_t scratch_pte;
1575         uint32_t pd_entry;
1576         uint32_t  pte, pde;
1577         uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1578
1579         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1580                                      I915_CACHE_LLC, true, 0);
1581
1582         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
1583                 u32 expected;
1584                 gen6_pte_t *pt_vaddr;
1585                 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1586                 pd_entry = readl(ppgtt->pd_addr + pde);
1587                 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1588
1589                 if (pd_entry != expected)
1590                         seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1591                                    pde,
1592                                    pd_entry,
1593                                    expected);
1594                 seq_printf(m, "\tPDE: %x\n", pd_entry);
1595
1596                 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1597
1598                 for (pte = 0; pte < GEN6_PTES; pte+=4) {
1599                         unsigned long va =
1600                                 (pde * PAGE_SIZE * GEN6_PTES) +
1601                                 (pte * PAGE_SIZE);
1602                         int i;
1603                         bool found = false;
1604                         for (i = 0; i < 4; i++)
1605                                 if (pt_vaddr[pte + i] != scratch_pte)
1606                                         found = true;
1607                         if (!found)
1608                                 continue;
1609
1610                         seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1611                         for (i = 0; i < 4; i++) {
1612                                 if (pt_vaddr[pte + i] != scratch_pte)
1613                                         seq_printf(m, " %08x", pt_vaddr[pte + i]);
1614                                 else
1615                                         seq_puts(m, "  SCRATCH ");
1616                         }
1617                         seq_puts(m, "\n");
1618                 }
1619                 kunmap_px(ppgtt, pt_vaddr);
1620         }
1621 }
1622
1623 /* Write pde (index) from the page directory @pd to the page table @pt */
1624 static void gen6_write_pde(struct i915_page_directory *pd,
1625                             const int pde, struct i915_page_table *pt)
1626 {
1627         /* Caller needs to make sure the write completes if necessary */
1628         struct i915_hw_ppgtt *ppgtt =
1629                 container_of(pd, struct i915_hw_ppgtt, pd);
1630         u32 pd_entry;
1631
1632         pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1633         pd_entry |= GEN6_PDE_VALID;
1634
1635         writel(pd_entry, ppgtt->pd_addr + pde);
1636 }
1637
1638 /* Write all the page tables found in the ppgtt structure to incrementing page
1639  * directories. */
1640 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1641                                   struct i915_page_directory *pd,
1642                                   uint32_t start, uint32_t length)
1643 {
1644         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1645         struct i915_page_table *pt;
1646         uint32_t pde;
1647
1648         gen6_for_each_pde(pt, pd, start, length, pde)
1649                 gen6_write_pde(pd, pde, pt);
1650
1651         /* Make sure write is complete before other code can use this page
1652          * table. Also require for WC mapped PTEs */
1653         readl(ggtt->gsm);
1654 }
1655
1656 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1657 {
1658         BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1659
1660         return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1661 }
1662
1663 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1664                          struct drm_i915_gem_request *req)
1665 {
1666         struct intel_engine_cs *engine = req->engine;
1667         int ret;
1668
1669         /* NB: TLBs must be flushed and invalidated before a switch */
1670         ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1671         if (ret)
1672                 return ret;
1673
1674         ret = intel_ring_begin(req, 6);
1675         if (ret)
1676                 return ret;
1677
1678         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
1679         intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine));
1680         intel_ring_emit(engine, PP_DIR_DCLV_2G);
1681         intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine));
1682         intel_ring_emit(engine, get_pd_offset(ppgtt));
1683         intel_ring_emit(engine, MI_NOOP);
1684         intel_ring_advance(engine);
1685
1686         return 0;
1687 }
1688
1689 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1690                           struct drm_i915_gem_request *req)
1691 {
1692         struct intel_engine_cs *engine = req->engine;
1693         int ret;
1694
1695         /* NB: TLBs must be flushed and invalidated before a switch */
1696         ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1697         if (ret)
1698                 return ret;
1699
1700         ret = intel_ring_begin(req, 6);
1701         if (ret)
1702                 return ret;
1703
1704         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
1705         intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine));
1706         intel_ring_emit(engine, PP_DIR_DCLV_2G);
1707         intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine));
1708         intel_ring_emit(engine, get_pd_offset(ppgtt));
1709         intel_ring_emit(engine, MI_NOOP);
1710         intel_ring_advance(engine);
1711
1712         /* XXX: RCS is the only one to auto invalidate the TLBs? */
1713         if (engine->id != RCS) {
1714                 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1715                 if (ret)
1716                         return ret;
1717         }
1718
1719         return 0;
1720 }
1721
1722 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1723                           struct drm_i915_gem_request *req)
1724 {
1725         struct intel_engine_cs *engine = req->engine;
1726         struct drm_i915_private *dev_priv = req->i915;
1727
1728         I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1729         I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1730         return 0;
1731 }
1732
1733 static void gen8_ppgtt_enable(struct drm_device *dev)
1734 {
1735         struct drm_i915_private *dev_priv = to_i915(dev);
1736         struct intel_engine_cs *engine;
1737
1738         for_each_engine(engine, dev_priv) {
1739                 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1740                 I915_WRITE(RING_MODE_GEN7(engine),
1741                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1742         }
1743 }
1744
1745 static void gen7_ppgtt_enable(struct drm_device *dev)
1746 {
1747         struct drm_i915_private *dev_priv = to_i915(dev);
1748         struct intel_engine_cs *engine;
1749         uint32_t ecochk, ecobits;
1750
1751         ecobits = I915_READ(GAC_ECO_BITS);
1752         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1753
1754         ecochk = I915_READ(GAM_ECOCHK);
1755         if (IS_HASWELL(dev)) {
1756                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1757         } else {
1758                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1759                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1760         }
1761         I915_WRITE(GAM_ECOCHK, ecochk);
1762
1763         for_each_engine(engine, dev_priv) {
1764                 /* GFX_MODE is per-ring on gen7+ */
1765                 I915_WRITE(RING_MODE_GEN7(engine),
1766                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1767         }
1768 }
1769
1770 static void gen6_ppgtt_enable(struct drm_device *dev)
1771 {
1772         struct drm_i915_private *dev_priv = to_i915(dev);
1773         uint32_t ecochk, gab_ctl, ecobits;
1774
1775         ecobits = I915_READ(GAC_ECO_BITS);
1776         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1777                    ECOBITS_PPGTT_CACHE64B);
1778
1779         gab_ctl = I915_READ(GAB_CTL);
1780         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1781
1782         ecochk = I915_READ(GAM_ECOCHK);
1783         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1784
1785         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1786 }
1787
1788 /* PPGTT support for Sandybdrige/Gen6 and later */
1789 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1790                                    uint64_t start,
1791                                    uint64_t length,
1792                                    bool use_scratch)
1793 {
1794         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1795         gen6_pte_t *pt_vaddr, scratch_pte;
1796         unsigned first_entry = start >> PAGE_SHIFT;
1797         unsigned num_entries = length >> PAGE_SHIFT;
1798         unsigned act_pt = first_entry / GEN6_PTES;
1799         unsigned first_pte = first_entry % GEN6_PTES;
1800         unsigned last_pte, i;
1801
1802         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1803                                      I915_CACHE_LLC, true, 0);
1804
1805         while (num_entries) {
1806                 last_pte = first_pte + num_entries;
1807                 if (last_pte > GEN6_PTES)
1808                         last_pte = GEN6_PTES;
1809
1810                 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1811
1812                 for (i = first_pte; i < last_pte; i++)
1813                         pt_vaddr[i] = scratch_pte;
1814
1815                 kunmap_px(ppgtt, pt_vaddr);
1816
1817                 num_entries -= last_pte - first_pte;
1818                 first_pte = 0;
1819                 act_pt++;
1820         }
1821 }
1822
1823 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1824                                       struct sg_table *pages,
1825                                       uint64_t start,
1826                                       enum i915_cache_level cache_level, u32 flags)
1827 {
1828         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1829         unsigned first_entry = start >> PAGE_SHIFT;
1830         unsigned act_pt = first_entry / GEN6_PTES;
1831         unsigned act_pte = first_entry % GEN6_PTES;
1832         gen6_pte_t *pt_vaddr = NULL;
1833         struct sgt_iter sgt_iter;
1834         dma_addr_t addr;
1835
1836         for_each_sgt_dma(addr, sgt_iter, pages) {
1837                 if (pt_vaddr == NULL)
1838                         pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1839
1840                 pt_vaddr[act_pte] =
1841                         vm->pte_encode(addr, cache_level, true, flags);
1842
1843                 if (++act_pte == GEN6_PTES) {
1844                         kunmap_px(ppgtt, pt_vaddr);
1845                         pt_vaddr = NULL;
1846                         act_pt++;
1847                         act_pte = 0;
1848                 }
1849         }
1850
1851         if (pt_vaddr)
1852                 kunmap_px(ppgtt, pt_vaddr);
1853 }
1854
1855 static int gen6_alloc_va_range(struct i915_address_space *vm,
1856                                uint64_t start_in, uint64_t length_in)
1857 {
1858         DECLARE_BITMAP(new_page_tables, I915_PDES);
1859         struct drm_device *dev = vm->dev;
1860         struct drm_i915_private *dev_priv = to_i915(dev);
1861         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1862         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1863         struct i915_page_table *pt;
1864         uint32_t start, length, start_save, length_save;
1865         uint32_t pde;
1866         int ret;
1867
1868         if (WARN_ON(start_in + length_in > ppgtt->base.total))
1869                 return -ENODEV;
1870
1871         start = start_save = start_in;
1872         length = length_save = length_in;
1873
1874         bitmap_zero(new_page_tables, I915_PDES);
1875
1876         /* The allocation is done in two stages so that we can bail out with
1877          * minimal amount of pain. The first stage finds new page tables that
1878          * need allocation. The second stage marks use ptes within the page
1879          * tables.
1880          */
1881         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1882                 if (pt != vm->scratch_pt) {
1883                         WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1884                         continue;
1885                 }
1886
1887                 /* We've already allocated a page table */
1888                 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1889
1890                 pt = alloc_pt(dev);
1891                 if (IS_ERR(pt)) {
1892                         ret = PTR_ERR(pt);
1893                         goto unwind_out;
1894                 }
1895
1896                 gen6_initialize_pt(vm, pt);
1897
1898                 ppgtt->pd.page_table[pde] = pt;
1899                 __set_bit(pde, new_page_tables);
1900                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1901         }
1902
1903         start = start_save;
1904         length = length_save;
1905
1906         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1907                 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1908
1909                 bitmap_zero(tmp_bitmap, GEN6_PTES);
1910                 bitmap_set(tmp_bitmap, gen6_pte_index(start),
1911                            gen6_pte_count(start, length));
1912
1913                 if (__test_and_clear_bit(pde, new_page_tables))
1914                         gen6_write_pde(&ppgtt->pd, pde, pt);
1915
1916                 trace_i915_page_table_entry_map(vm, pde, pt,
1917                                          gen6_pte_index(start),
1918                                          gen6_pte_count(start, length),
1919                                          GEN6_PTES);
1920                 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1921                                 GEN6_PTES);
1922         }
1923
1924         WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1925
1926         /* Make sure write is complete before other code can use this page
1927          * table. Also require for WC mapped PTEs */
1928         readl(ggtt->gsm);
1929
1930         mark_tlbs_dirty(ppgtt);
1931         return 0;
1932
1933 unwind_out:
1934         for_each_set_bit(pde, new_page_tables, I915_PDES) {
1935                 struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1936
1937                 ppgtt->pd.page_table[pde] = vm->scratch_pt;
1938                 free_pt(vm->dev, pt);
1939         }
1940
1941         mark_tlbs_dirty(ppgtt);
1942         return ret;
1943 }
1944
1945 static int gen6_init_scratch(struct i915_address_space *vm)
1946 {
1947         struct drm_device *dev = vm->dev;
1948
1949         vm->scratch_page = alloc_scratch_page(dev);
1950         if (IS_ERR(vm->scratch_page))
1951                 return PTR_ERR(vm->scratch_page);
1952
1953         vm->scratch_pt = alloc_pt(dev);
1954         if (IS_ERR(vm->scratch_pt)) {
1955                 free_scratch_page(dev, vm->scratch_page);
1956                 return PTR_ERR(vm->scratch_pt);
1957         }
1958
1959         gen6_initialize_pt(vm, vm->scratch_pt);
1960
1961         return 0;
1962 }
1963
1964 static void gen6_free_scratch(struct i915_address_space *vm)
1965 {
1966         struct drm_device *dev = vm->dev;
1967
1968         free_pt(dev, vm->scratch_pt);
1969         free_scratch_page(dev, vm->scratch_page);
1970 }
1971
1972 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1973 {
1974         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1975         struct i915_page_directory *pd = &ppgtt->pd;
1976         struct drm_device *dev = vm->dev;
1977         struct i915_page_table *pt;
1978         uint32_t pde;
1979
1980         drm_mm_remove_node(&ppgtt->node);
1981
1982         gen6_for_all_pdes(pt, pd, pde)
1983                 if (pt != vm->scratch_pt)
1984                         free_pt(dev, pt);
1985
1986         gen6_free_scratch(vm);
1987 }
1988
1989 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1990 {
1991         struct i915_address_space *vm = &ppgtt->base;
1992         struct drm_device *dev = ppgtt->base.dev;
1993         struct drm_i915_private *dev_priv = to_i915(dev);
1994         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1995         bool retried = false;
1996         int ret;
1997
1998         /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1999          * allocator works in address space sizes, so it's multiplied by page
2000          * size. We allocate at the top of the GTT to avoid fragmentation.
2001          */
2002         BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
2003
2004         ret = gen6_init_scratch(vm);
2005         if (ret)
2006                 return ret;
2007
2008 alloc:
2009         ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
2010                                                   &ppgtt->node, GEN6_PD_SIZE,
2011                                                   GEN6_PD_ALIGN, 0,
2012                                                   0, ggtt->base.total,
2013                                                   DRM_MM_TOPDOWN);
2014         if (ret == -ENOSPC && !retried) {
2015                 ret = i915_gem_evict_something(dev, &ggtt->base,
2016                                                GEN6_PD_SIZE, GEN6_PD_ALIGN,
2017                                                I915_CACHE_NONE,
2018                                                0, ggtt->base.total,
2019                                                0);
2020                 if (ret)
2021                         goto err_out;
2022
2023                 retried = true;
2024                 goto alloc;
2025         }
2026
2027         if (ret)
2028                 goto err_out;
2029
2030
2031         if (ppgtt->node.start < ggtt->mappable_end)
2032                 DRM_DEBUG("Forced to use aperture for PDEs\n");
2033
2034         return 0;
2035
2036 err_out:
2037         gen6_free_scratch(vm);
2038         return ret;
2039 }
2040
2041 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2042 {
2043         return gen6_ppgtt_allocate_page_directories(ppgtt);
2044 }
2045
2046 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2047                                   uint64_t start, uint64_t length)
2048 {
2049         struct i915_page_table *unused;
2050         uint32_t pde;
2051
2052         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
2053                 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2054 }
2055
2056 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2057 {
2058         struct drm_device *dev = ppgtt->base.dev;
2059         struct drm_i915_private *dev_priv = to_i915(dev);
2060         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2061         int ret;
2062
2063         ppgtt->base.pte_encode = ggtt->base.pte_encode;
2064         if (intel_vgpu_active(dev_priv) || IS_GEN6(dev))
2065                 ppgtt->switch_mm = gen6_mm_switch;
2066         else if (IS_HASWELL(dev))
2067                 ppgtt->switch_mm = hsw_mm_switch;
2068         else if (IS_GEN7(dev))
2069                 ppgtt->switch_mm = gen7_mm_switch;
2070         else
2071                 BUG();
2072
2073         ret = gen6_ppgtt_alloc(ppgtt);
2074         if (ret)
2075                 return ret;
2076
2077         ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2078         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2079         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2080         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2081         ppgtt->base.bind_vma = ppgtt_bind_vma;
2082         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2083         ppgtt->base.start = 0;
2084         ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2085         ppgtt->debug_dump = gen6_dump_ppgtt;
2086
2087         ppgtt->pd.base.ggtt_offset =
2088                 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2089
2090         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2091                 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2092
2093         gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2094
2095         gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2096
2097         DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2098                          ppgtt->node.size >> 20,
2099                          ppgtt->node.start / PAGE_SIZE);
2100
2101         DRM_DEBUG("Adding PPGTT at offset %x\n",
2102                   ppgtt->pd.base.ggtt_offset << 10);
2103
2104         return 0;
2105 }
2106
2107 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2108 {
2109         ppgtt->base.dev = dev;
2110
2111         if (INTEL_INFO(dev)->gen < 8)
2112                 return gen6_ppgtt_init(ppgtt);
2113         else
2114                 return gen8_ppgtt_init(ppgtt);
2115 }
2116
2117 static void i915_address_space_init(struct i915_address_space *vm,
2118                                     struct drm_i915_private *dev_priv)
2119 {
2120         drm_mm_init(&vm->mm, vm->start, vm->total);
2121         vm->dev = &dev_priv->drm;
2122         INIT_LIST_HEAD(&vm->active_list);
2123         INIT_LIST_HEAD(&vm->inactive_list);
2124         list_add_tail(&vm->global_link, &dev_priv->vm_list);
2125 }
2126
2127 static void gtt_write_workarounds(struct drm_device *dev)
2128 {
2129         struct drm_i915_private *dev_priv = to_i915(dev);
2130
2131         /* This function is for gtt related workarounds. This function is
2132          * called on driver load and after a GPU reset, so you can place
2133          * workarounds here even if they get overwritten by GPU reset.
2134          */
2135         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
2136         if (IS_BROADWELL(dev))
2137                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2138         else if (IS_CHERRYVIEW(dev))
2139                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2140         else if (IS_SKYLAKE(dev))
2141                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2142         else if (IS_BROXTON(dev))
2143                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2144 }
2145
2146 static int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2147 {
2148         struct drm_i915_private *dev_priv = to_i915(dev);
2149         int ret = 0;
2150
2151         ret = __hw_ppgtt_init(dev, ppgtt);
2152         if (ret == 0) {
2153                 kref_init(&ppgtt->ref);
2154                 i915_address_space_init(&ppgtt->base, dev_priv);
2155         }
2156
2157         return ret;
2158 }
2159
2160 int i915_ppgtt_init_hw(struct drm_device *dev)
2161 {
2162         gtt_write_workarounds(dev);
2163
2164         /* In the case of execlists, PPGTT is enabled by the context descriptor
2165          * and the PDPs are contained within the context itself.  We don't
2166          * need to do anything here. */
2167         if (i915.enable_execlists)
2168                 return 0;
2169
2170         if (!USES_PPGTT(dev))
2171                 return 0;
2172
2173         if (IS_GEN6(dev))
2174                 gen6_ppgtt_enable(dev);
2175         else if (IS_GEN7(dev))
2176                 gen7_ppgtt_enable(dev);
2177         else if (INTEL_INFO(dev)->gen >= 8)
2178                 gen8_ppgtt_enable(dev);
2179         else
2180                 MISSING_CASE(INTEL_INFO(dev)->gen);
2181
2182         return 0;
2183 }
2184
2185 struct i915_hw_ppgtt *
2186 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2187 {
2188         struct i915_hw_ppgtt *ppgtt;
2189         int ret;
2190
2191         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2192         if (!ppgtt)
2193                 return ERR_PTR(-ENOMEM);
2194
2195         ret = i915_ppgtt_init(dev, ppgtt);
2196         if (ret) {
2197                 kfree(ppgtt);
2198                 return ERR_PTR(ret);
2199         }
2200
2201         ppgtt->file_priv = fpriv;
2202
2203         trace_i915_ppgtt_create(&ppgtt->base);
2204
2205         return ppgtt;
2206 }
2207
2208 void  i915_ppgtt_release(struct kref *kref)
2209 {
2210         struct i915_hw_ppgtt *ppgtt =
2211                 container_of(kref, struct i915_hw_ppgtt, ref);
2212
2213         trace_i915_ppgtt_release(&ppgtt->base);
2214
2215         /* vmas should already be unbound */
2216         WARN_ON(!list_empty(&ppgtt->base.active_list));
2217         WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2218
2219         list_del(&ppgtt->base.global_link);
2220         drm_mm_takedown(&ppgtt->base.mm);
2221
2222         ppgtt->base.cleanup(&ppgtt->base);
2223         kfree(ppgtt);
2224 }
2225
2226 extern int intel_iommu_gfx_mapped;
2227 /* Certain Gen5 chipsets require require idling the GPU before
2228  * unmapping anything from the GTT when VT-d is enabled.
2229  */
2230 static bool needs_idle_maps(struct drm_device *dev)
2231 {
2232 #ifdef CONFIG_INTEL_IOMMU
2233         /* Query intel_iommu to see if we need the workaround. Presumably that
2234          * was loaded first.
2235          */
2236         if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2237                 return true;
2238 #endif
2239         return false;
2240 }
2241
2242 static bool do_idling(struct drm_i915_private *dev_priv)
2243 {
2244         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2245         bool ret = dev_priv->mm.interruptible;
2246
2247         if (unlikely(ggtt->do_idle_maps)) {
2248                 dev_priv->mm.interruptible = false;
2249                 if (i915_gem_wait_for_idle(dev_priv)) {
2250                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2251                         /* Wait a bit, in hopes it avoids the hang */
2252                         udelay(10);
2253                 }
2254         }
2255
2256         return ret;
2257 }
2258
2259 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2260 {
2261         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2262
2263         if (unlikely(ggtt->do_idle_maps))
2264                 dev_priv->mm.interruptible = interruptible;
2265 }
2266
2267 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2268 {
2269         struct intel_engine_cs *engine;
2270
2271         if (INTEL_INFO(dev_priv)->gen < 6)
2272                 return;
2273
2274         for_each_engine(engine, dev_priv) {
2275                 u32 fault_reg;
2276                 fault_reg = I915_READ(RING_FAULT_REG(engine));
2277                 if (fault_reg & RING_FAULT_VALID) {
2278                         DRM_DEBUG_DRIVER("Unexpected fault\n"
2279                                          "\tAddr: 0x%08lx\n"
2280                                          "\tAddress space: %s\n"
2281                                          "\tSource ID: %d\n"
2282                                          "\tType: %d\n",
2283                                          fault_reg & PAGE_MASK,
2284                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2285                                          RING_FAULT_SRCID(fault_reg),
2286                                          RING_FAULT_FAULT_TYPE(fault_reg));
2287                         I915_WRITE(RING_FAULT_REG(engine),
2288                                    fault_reg & ~RING_FAULT_VALID);
2289                 }
2290         }
2291         POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS]));
2292 }
2293
2294 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2295 {
2296         if (INTEL_INFO(dev_priv)->gen < 6) {
2297                 intel_gtt_chipset_flush();
2298         } else {
2299                 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2300                 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2301         }
2302 }
2303
2304 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2305 {
2306         struct drm_i915_private *dev_priv = to_i915(dev);
2307         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2308
2309         /* Don't bother messing with faults pre GEN6 as we have little
2310          * documentation supporting that it's a good idea.
2311          */
2312         if (INTEL_INFO(dev)->gen < 6)
2313                 return;
2314
2315         i915_check_and_clear_faults(dev_priv);
2316
2317         ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
2318                              true);
2319
2320         i915_ggtt_flush(dev_priv);
2321 }
2322
2323 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2324 {
2325         if (!dma_map_sg(&obj->base.dev->pdev->dev,
2326                         obj->pages->sgl, obj->pages->nents,
2327                         PCI_DMA_BIDIRECTIONAL))
2328                 return -ENOSPC;
2329
2330         return 0;
2331 }
2332
2333 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2334 {
2335 #ifdef writeq
2336         writeq(pte, addr);
2337 #else
2338         iowrite32((u32)pte, addr);
2339         iowrite32(pte >> 32, addr + 4);
2340 #endif
2341 }
2342
2343 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2344                                   dma_addr_t addr,
2345                                   uint64_t offset,
2346                                   enum i915_cache_level level,
2347                                   u32 unused)
2348 {
2349         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2350         gen8_pte_t __iomem *pte =
2351                 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
2352                 (offset >> PAGE_SHIFT);
2353         int rpm_atomic_seq;
2354
2355         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2356
2357         gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
2358
2359         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2360         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2361
2362         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2363 }
2364
2365 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2366                                      struct sg_table *st,
2367                                      uint64_t start,
2368                                      enum i915_cache_level level, u32 unused)
2369 {
2370         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2371         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2372         struct sgt_iter sgt_iter;
2373         gen8_pte_t __iomem *gtt_entries;
2374         gen8_pte_t gtt_entry;
2375         dma_addr_t addr;
2376         int rpm_atomic_seq;
2377         int i = 0;
2378
2379         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2380
2381         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2382
2383         for_each_sgt_dma(addr, sgt_iter, st) {
2384                 gtt_entry = gen8_pte_encode(addr, level, true);
2385                 gen8_set_pte(&gtt_entries[i++], gtt_entry);
2386         }
2387
2388         /*
2389          * XXX: This serves as a posting read to make sure that the PTE has
2390          * actually been updated. There is some concern that even though
2391          * registers and PTEs are within the same BAR that they are potentially
2392          * of NUMA access patterns. Therefore, even with the way we assume
2393          * hardware should work, we must keep this posting read for paranoia.
2394          */
2395         if (i != 0)
2396                 WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
2397
2398         /* This next bit makes the above posting read even more important. We
2399          * want to flush the TLBs only after we're certain all the PTE updates
2400          * have finished.
2401          */
2402         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2403         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2404
2405         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2406 }
2407
2408 struct insert_entries {
2409         struct i915_address_space *vm;
2410         struct sg_table *st;
2411         uint64_t start;
2412         enum i915_cache_level level;
2413         u32 flags;
2414 };
2415
2416 static int gen8_ggtt_insert_entries__cb(void *_arg)
2417 {
2418         struct insert_entries *arg = _arg;
2419         gen8_ggtt_insert_entries(arg->vm, arg->st,
2420                                  arg->start, arg->level, arg->flags);
2421         return 0;
2422 }
2423
2424 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2425                                           struct sg_table *st,
2426                                           uint64_t start,
2427                                           enum i915_cache_level level,
2428                                           u32 flags)
2429 {
2430         struct insert_entries arg = { vm, st, start, level, flags };
2431         stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
2432 }
2433
2434 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2435                                   dma_addr_t addr,
2436                                   uint64_t offset,
2437                                   enum i915_cache_level level,
2438                                   u32 flags)
2439 {
2440         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2441         gen6_pte_t __iomem *pte =
2442                 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
2443                 (offset >> PAGE_SHIFT);
2444         int rpm_atomic_seq;
2445
2446         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2447
2448         iowrite32(vm->pte_encode(addr, level, true, flags), pte);
2449
2450         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2451         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2452
2453         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2454 }
2455
2456 /*
2457  * Binds an object into the global gtt with the specified cache level. The object
2458  * will be accessible to the GPU via commands whose operands reference offsets
2459  * within the global GTT as well as accessible by the GPU through the GMADR
2460  * mapped BAR (dev_priv->mm.gtt->gtt).
2461  */
2462 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2463                                      struct sg_table *st,
2464                                      uint64_t start,
2465                                      enum i915_cache_level level, u32 flags)
2466 {
2467         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2468         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2469         struct sgt_iter sgt_iter;
2470         gen6_pte_t __iomem *gtt_entries;
2471         gen6_pte_t gtt_entry;
2472         dma_addr_t addr;
2473         int rpm_atomic_seq;
2474         int i = 0;
2475
2476         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2477
2478         gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
2479
2480         for_each_sgt_dma(addr, sgt_iter, st) {
2481                 gtt_entry = vm->pte_encode(addr, level, true, flags);
2482                 iowrite32(gtt_entry, &gtt_entries[i++]);
2483         }
2484
2485         /* XXX: This serves as a posting read to make sure that the PTE has
2486          * actually been updated. There is some concern that even though
2487          * registers and PTEs are within the same BAR that they are potentially
2488          * of NUMA access patterns. Therefore, even with the way we assume
2489          * hardware should work, we must keep this posting read for paranoia.
2490          */
2491         if (i != 0)
2492                 WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
2493
2494         /* This next bit makes the above posting read even more important. We
2495          * want to flush the TLBs only after we're certain all the PTE updates
2496          * have finished.
2497          */
2498         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2499         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2500
2501         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2502 }
2503
2504 static void nop_clear_range(struct i915_address_space *vm,
2505                             uint64_t start,
2506                             uint64_t length,
2507                             bool use_scratch)
2508 {
2509 }
2510
2511 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2512                                   uint64_t start,
2513                                   uint64_t length,
2514                                   bool use_scratch)
2515 {
2516         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2517         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2518         unsigned first_entry = start >> PAGE_SHIFT;
2519         unsigned num_entries = length >> PAGE_SHIFT;
2520         gen8_pte_t scratch_pte, __iomem *gtt_base =
2521                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2522         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2523         int i;
2524         int rpm_atomic_seq;
2525
2526         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2527
2528         if (WARN(num_entries > max_entries,
2529                  "First entry = %d; Num entries = %d (max=%d)\n",
2530                  first_entry, num_entries, max_entries))
2531                 num_entries = max_entries;
2532
2533         scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2534                                       I915_CACHE_LLC,
2535                                       use_scratch);
2536         for (i = 0; i < num_entries; i++)
2537                 gen8_set_pte(&gtt_base[i], scratch_pte);
2538         readl(gtt_base);
2539
2540         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2541 }
2542
2543 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2544                                   uint64_t start,
2545                                   uint64_t length,
2546                                   bool use_scratch)
2547 {
2548         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2549         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2550         unsigned first_entry = start >> PAGE_SHIFT;
2551         unsigned num_entries = length >> PAGE_SHIFT;
2552         gen6_pte_t scratch_pte, __iomem *gtt_base =
2553                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2554         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2555         int i;
2556         int rpm_atomic_seq;
2557
2558         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2559
2560         if (WARN(num_entries > max_entries,
2561                  "First entry = %d; Num entries = %d (max=%d)\n",
2562                  first_entry, num_entries, max_entries))
2563                 num_entries = max_entries;
2564
2565         scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2566                                      I915_CACHE_LLC, use_scratch, 0);
2567
2568         for (i = 0; i < num_entries; i++)
2569                 iowrite32(scratch_pte, &gtt_base[i]);
2570         readl(gtt_base);
2571
2572         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2573 }
2574
2575 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2576                                   dma_addr_t addr,
2577                                   uint64_t offset,
2578                                   enum i915_cache_level cache_level,
2579                                   u32 unused)
2580 {
2581         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2582         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2583                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2584         int rpm_atomic_seq;
2585
2586         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2587
2588         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2589
2590         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2591 }
2592
2593 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2594                                      struct sg_table *pages,
2595                                      uint64_t start,
2596                                      enum i915_cache_level cache_level, u32 unused)
2597 {
2598         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2599         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2600                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2601         int rpm_atomic_seq;
2602
2603         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2604
2605         intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2606
2607         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2608
2609 }
2610
2611 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2612                                   uint64_t start,
2613                                   uint64_t length,
2614                                   bool unused)
2615 {
2616         struct drm_i915_private *dev_priv = to_i915(vm->dev);
2617         unsigned first_entry = start >> PAGE_SHIFT;
2618         unsigned num_entries = length >> PAGE_SHIFT;
2619         int rpm_atomic_seq;
2620
2621         rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2622
2623         intel_gtt_clear_range(first_entry, num_entries);
2624
2625         assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2626 }
2627
2628 static int ggtt_bind_vma(struct i915_vma *vma,
2629                          enum i915_cache_level cache_level,
2630                          u32 flags)
2631 {
2632         struct drm_i915_gem_object *obj = vma->obj;
2633         u32 pte_flags = 0;
2634         int ret;
2635
2636         ret = i915_get_ggtt_vma_pages(vma);
2637         if (ret)
2638                 return ret;
2639
2640         /* Currently applicable only to VLV */
2641         if (obj->gt_ro)
2642                 pte_flags |= PTE_READ_ONLY;
2643
2644         vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2645                                 vma->node.start,
2646                                 cache_level, pte_flags);
2647
2648         /*
2649          * Without aliasing PPGTT there's no difference between
2650          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2651          * upgrade to both bound if we bind either to avoid double-binding.
2652          */
2653         vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2654
2655         return 0;
2656 }
2657
2658 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2659                                  enum i915_cache_level cache_level,
2660                                  u32 flags)
2661 {
2662         u32 pte_flags;
2663         int ret;
2664
2665         ret = i915_get_ggtt_vma_pages(vma);
2666         if (ret)
2667                 return ret;
2668
2669         /* Currently applicable only to VLV */
2670         pte_flags = 0;
2671         if (vma->obj->gt_ro)
2672                 pte_flags |= PTE_READ_ONLY;
2673
2674
2675         if (flags & GLOBAL_BIND) {
2676                 vma->vm->insert_entries(vma->vm,
2677                                         vma->ggtt_view.pages,
2678                                         vma->node.start,
2679                                         cache_level, pte_flags);
2680         }
2681
2682         if (flags & LOCAL_BIND) {
2683                 struct i915_hw_ppgtt *appgtt =
2684                         to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2685                 appgtt->base.insert_entries(&appgtt->base,
2686                                             vma->ggtt_view.pages,
2687                                             vma->node.start,
2688                                             cache_level, pte_flags);
2689         }
2690
2691         return 0;
2692 }
2693
2694 static void ggtt_unbind_vma(struct i915_vma *vma)
2695 {
2696         struct drm_device *dev = vma->vm->dev;
2697         struct drm_i915_private *dev_priv = to_i915(dev);
2698         struct drm_i915_gem_object *obj = vma->obj;
2699         const uint64_t size = min_t(uint64_t,
2700                                     obj->base.size,
2701                                     vma->node.size);
2702
2703         if (vma->bound & GLOBAL_BIND) {
2704                 vma->vm->clear_range(vma->vm,
2705                                      vma->node.start,
2706                                      size,
2707                                      true);
2708         }
2709
2710         if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2711                 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2712
2713                 appgtt->base.clear_range(&appgtt->base,
2714                                          vma->node.start,
2715                                          size,
2716                                          true);
2717         }
2718 }
2719
2720 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2721 {
2722         struct drm_device *dev = obj->base.dev;
2723         struct drm_i915_private *dev_priv = to_i915(dev);
2724         bool interruptible;
2725
2726         interruptible = do_idling(dev_priv);
2727
2728         dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2729                      PCI_DMA_BIDIRECTIONAL);
2730
2731         undo_idling(dev_priv, interruptible);
2732 }
2733
2734 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2735                                   unsigned long color,
2736                                   u64 *start,
2737                                   u64 *end)
2738 {
2739         if (node->color != color)
2740                 *start += 4096;
2741
2742         if (!list_empty(&node->node_list)) {
2743                 node = list_entry(node->node_list.next,
2744                                   struct drm_mm_node,
2745                                   node_list);
2746                 if (node->allocated && node->color != color)
2747                         *end -= 4096;
2748         }
2749 }
2750
2751 static int i915_gem_setup_global_gtt(struct drm_device *dev,
2752                                      u64 start,
2753                                      u64 mappable_end,
2754                                      u64 end)
2755 {
2756         /* Let GEM Manage all of the aperture.
2757          *
2758          * However, leave one page at the end still bound to the scratch page.
2759          * There are a number of places where the hardware apparently prefetches
2760          * past the end of the object, and we've seen multiple hangs with the
2761          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2762          * aperture.  One page should be enough to keep any prefetching inside
2763          * of the aperture.
2764          */
2765         struct drm_i915_private *dev_priv = to_i915(dev);
2766         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2767         struct drm_mm_node *entry;
2768         struct drm_i915_gem_object *obj;
2769         unsigned long hole_start, hole_end;
2770         int ret;
2771
2772         BUG_ON(mappable_end > end);
2773
2774         ggtt->base.start = start;
2775
2776         /* Subtract the guard page before address space initialization to
2777          * shrink the range used by drm_mm */
2778         ggtt->base.total = end - start - PAGE_SIZE;
2779         i915_address_space_init(&ggtt->base, dev_priv);
2780         ggtt->base.total += PAGE_SIZE;
2781
2782         ret = intel_vgt_balloon(dev_priv);
2783         if (ret)
2784                 return ret;
2785
2786         if (!HAS_LLC(dev))
2787                 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
2788
2789         /* Mark any preallocated objects as occupied */
2790         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2791                 struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base);
2792
2793                 DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
2794                               i915_gem_obj_ggtt_offset(obj), obj->base.size);
2795
2796                 WARN_ON(i915_gem_obj_ggtt_bound(obj));
2797                 ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
2798                 if (ret) {
2799                         DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2800                         return ret;
2801                 }
2802                 vma->bound |= GLOBAL_BIND;
2803                 __i915_vma_set_map_and_fenceable(vma);
2804                 list_add_tail(&vma->vm_link, &ggtt->base.inactive_list);
2805         }
2806
2807         /* Clear any non-preallocated blocks */
2808         drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2809                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2810                               hole_start, hole_end);
2811                 ggtt->base.clear_range(&ggtt->base, hole_start,
2812                                      hole_end - hole_start, true);
2813         }
2814
2815         /* And finally clear the reserved guard page */
2816         ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true);
2817
2818         if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2819                 struct i915_hw_ppgtt *ppgtt;
2820
2821                 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2822                 if (!ppgtt)
2823                         return -ENOMEM;
2824
2825                 ret = __hw_ppgtt_init(dev, ppgtt);
2826                 if (ret) {
2827                         ppgtt->base.cleanup(&ppgtt->base);
2828                         kfree(ppgtt);
2829                         return ret;
2830                 }
2831
2832                 if (ppgtt->base.allocate_va_range)
2833                         ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2834                                                             ppgtt->base.total);
2835                 if (ret) {
2836                         ppgtt->base.cleanup(&ppgtt->base);
2837                         kfree(ppgtt);
2838                         return ret;
2839                 }
2840
2841                 ppgtt->base.clear_range(&ppgtt->base,
2842                                         ppgtt->base.start,
2843                                         ppgtt->base.total,
2844                                         true);
2845
2846                 dev_priv->mm.aliasing_ppgtt = ppgtt;
2847                 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2848                 ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2849         }
2850
2851         return 0;
2852 }
2853
2854 /**
2855  * i915_gem_init_ggtt - Initialize GEM for Global GTT
2856  * @dev: DRM device
2857  */
2858 void i915_gem_init_ggtt(struct drm_device *dev)
2859 {
2860         struct drm_i915_private *dev_priv = to_i915(dev);
2861         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2862
2863         i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total);
2864 }
2865
2866 /**
2867  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2868  * @dev: DRM device
2869  */
2870 void i915_ggtt_cleanup_hw(struct drm_device *dev)
2871 {
2872         struct drm_i915_private *dev_priv = to_i915(dev);
2873         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2874
2875         if (dev_priv->mm.aliasing_ppgtt) {
2876                 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2877
2878                 ppgtt->base.cleanup(&ppgtt->base);
2879                 kfree(ppgtt);
2880         }
2881
2882         i915_gem_cleanup_stolen(dev);
2883
2884         if (drm_mm_initialized(&ggtt->base.mm)) {
2885                 intel_vgt_deballoon(dev_priv);
2886
2887                 drm_mm_takedown(&ggtt->base.mm);
2888                 list_del(&ggtt->base.global_link);
2889         }
2890
2891         ggtt->base.cleanup(&ggtt->base);
2892 }
2893
2894 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2895 {
2896         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2897         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2898         return snb_gmch_ctl << 20;
2899 }
2900
2901 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2902 {
2903         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2904         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2905         if (bdw_gmch_ctl)
2906                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2907
2908 #ifdef CONFIG_X86_32
2909         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2910         if (bdw_gmch_ctl > 4)
2911                 bdw_gmch_ctl = 4;
2912 #endif
2913
2914         return bdw_gmch_ctl << 20;
2915 }
2916
2917 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2918 {
2919         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2920         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2921
2922         if (gmch_ctrl)
2923                 return 1 << (20 + gmch_ctrl);
2924
2925         return 0;
2926 }
2927
2928 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2929 {
2930         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2931         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2932         return snb_gmch_ctl << 25; /* 32 MB units */
2933 }
2934
2935 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2936 {
2937         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2938         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2939         return bdw_gmch_ctl << 25; /* 32 MB units */
2940 }
2941
2942 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2943 {
2944         gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2945         gmch_ctrl &= SNB_GMCH_GMS_MASK;
2946
2947         /*
2948          * 0x0  to 0x10: 32MB increments starting at 0MB
2949          * 0x11 to 0x16: 4MB increments starting at 8MB
2950          * 0x17 to 0x1d: 4MB increments start at 36MB
2951          */
2952         if (gmch_ctrl < 0x11)
2953                 return gmch_ctrl << 25;
2954         else if (gmch_ctrl < 0x17)
2955                 return (gmch_ctrl - 0x11 + 2) << 22;
2956         else
2957                 return (gmch_ctrl - 0x17 + 9) << 22;
2958 }
2959
2960 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2961 {
2962         gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2963         gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2964
2965         if (gen9_gmch_ctl < 0xf0)
2966                 return gen9_gmch_ctl << 25; /* 32 MB units */
2967         else
2968                 /* 4MB increments starting at 0xf0 for 4MB */
2969                 return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2970 }
2971
2972 static int ggtt_probe_common(struct drm_device *dev,
2973                              size_t gtt_size)
2974 {
2975         struct drm_i915_private *dev_priv = to_i915(dev);
2976         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2977         struct i915_page_scratch *scratch_page;
2978         phys_addr_t ggtt_phys_addr;
2979
2980         /* For Modern GENs the PTEs and register space are split in the BAR */
2981         ggtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2982                          (pci_resource_len(dev->pdev, 0) / 2);
2983
2984         /*
2985          * On BXT writes larger than 64 bit to the GTT pagetable range will be
2986          * dropped. For WC mappings in general we have 64 byte burst writes
2987          * when the WC buffer is flushed, so we can't use it, but have to
2988          * resort to an uncached mapping. The WC issue is easily caught by the
2989          * readback check when writing GTT PTE entries.
2990          */
2991         if (IS_BROXTON(dev))
2992                 ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size);
2993         else
2994                 ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size);
2995         if (!ggtt->gsm) {
2996                 DRM_ERROR("Failed to map the gtt page table\n");
2997                 return -ENOMEM;
2998         }
2999
3000         scratch_page = alloc_scratch_page(dev);
3001         if (IS_ERR(scratch_page)) {
3002                 DRM_ERROR("Scratch setup failed\n");
3003                 /* iounmap will also get called at remove, but meh */
3004                 iounmap(ggtt->gsm);
3005                 return PTR_ERR(scratch_page);
3006         }
3007
3008         ggtt->base.scratch_page = scratch_page;
3009
3010         return 0;
3011 }
3012
3013 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3014  * bits. When using advanced contexts each context stores its own PAT, but
3015  * writing this data shouldn't be harmful even in those cases. */
3016 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
3017 {
3018         uint64_t pat;
3019
3020         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
3021               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
3022               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
3023               GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
3024               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
3025               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
3026               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
3027               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3028
3029         if (!USES_PPGTT(dev_priv))
3030                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3031                  * so RTL will always use the value corresponding to
3032                  * pat_sel = 000".
3033                  * So let's disable cache for GGTT to avoid screen corruptions.
3034                  * MOCS still can be used though.
3035                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3036                  * before this patch, i.e. the same uncached + snooping access
3037                  * like on gen6/7 seems to be in effect.
3038                  * - So this just fixes blitter/render access. Again it looks
3039                  * like it's not just uncached access, but uncached + snooping.
3040                  * So we can still hold onto all our assumptions wrt cpu
3041                  * clflushing on LLC machines.
3042                  */
3043                 pat = GEN8_PPAT(0, GEN8_PPAT_UC);
3044
3045         /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
3046          * write would work. */
3047         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3048         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3049 }
3050
3051 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3052 {
3053         uint64_t pat;
3054
3055         /*
3056          * Map WB on BDW to snooped on CHV.
3057          *
3058          * Only the snoop bit has meaning for CHV, the rest is
3059          * ignored.
3060          *
3061          * The hardware will never snoop for certain types of accesses:
3062          * - CPU GTT (GMADR->GGTT->no snoop->memory)
3063          * - PPGTT page tables
3064          * - some other special cycles
3065          *
3066          * As with BDW, we also need to consider the following for GT accesses:
3067          * "For GGTT, there is NO pat_sel[2:0] from the entry,
3068          * so RTL will always use the value corresponding to
3069          * pat_sel = 000".
3070          * Which means we must set the snoop bit in PAT entry 0
3071          * in order to keep the global status page working.
3072          */
3073         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3074               GEN8_PPAT(1, 0) |
3075               GEN8_PPAT(2, 0) |
3076               GEN8_PPAT(3, 0) |
3077               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3078               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3079               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3080               GEN8_PPAT(7, CHV_PPAT_SNOOP);
3081
3082         I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3083         I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3084 }
3085
3086 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3087 {
3088         struct drm_device *dev = ggtt->base.dev;
3089         struct drm_i915_private *dev_priv = to_i915(dev);
3090         u16 snb_gmch_ctl;
3091         int ret;
3092
3093         /* TODO: We're not aware of mappable constraints on gen8 yet */
3094         ggtt->mappable_base = pci_resource_start(dev->pdev, 2);
3095         ggtt->mappable_end = pci_resource_len(dev->pdev, 2);
3096
3097         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
3098                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
3099
3100         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3101
3102         if (INTEL_INFO(dev)->gen >= 9) {
3103                 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
3104                 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl);
3105         } else if (IS_CHERRYVIEW(dev)) {
3106                 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
3107                 ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl);
3108         } else {
3109                 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
3110                 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl);
3111         }
3112
3113         ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3114
3115         if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3116                 chv_setup_private_ppat(dev_priv);
3117         else
3118                 bdw_setup_private_ppat(dev_priv);
3119
3120         ret = ggtt_probe_common(dev, ggtt->size);
3121
3122         ggtt->base.bind_vma = ggtt_bind_vma;
3123         ggtt->base.unbind_vma = ggtt_unbind_vma;
3124         ggtt->base.insert_page = gen8_ggtt_insert_page;
3125         ggtt->base.clear_range = nop_clear_range;
3126         if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3127                 ggtt->base.clear_range = gen8_ggtt_clear_range;
3128
3129         ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3130         if (IS_CHERRYVIEW(dev_priv))
3131                 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
3132
3133         return ret;
3134 }
3135
3136 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3137 {
3138         struct drm_device *dev = ggtt->base.dev;
3139         u16 snb_gmch_ctl;
3140         int ret;
3141
3142         ggtt->mappable_base = pci_resource_start(dev->pdev, 2);
3143         ggtt->mappable_end = pci_resource_len(dev->pdev, 2);
3144
3145         /* 64/512MB is the current min/max we actually know of, but this is just
3146          * a coarse sanity check.
3147          */
3148         if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) {
3149                 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
3150                 return -ENXIO;
3151         }
3152
3153         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3154                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3155         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3156
3157         ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
3158         ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl);
3159         ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3160
3161         ret = ggtt_probe_common(dev, ggtt->size);
3162
3163         ggtt->base.clear_range = gen6_ggtt_clear_range;
3164         ggtt->base.insert_page = gen6_ggtt_insert_page;
3165         ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3166         ggtt->base.bind_vma = ggtt_bind_vma;
3167         ggtt->base.unbind_vma = ggtt_unbind_vma;
3168
3169         return ret;
3170 }
3171
3172 static void gen6_gmch_remove(struct i915_address_space *vm)
3173 {
3174         struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base);
3175
3176         iounmap(ggtt->gsm);
3177         free_scratch_page(vm->dev, vm->scratch_page);
3178 }
3179
3180 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3181 {
3182         struct drm_device *dev = ggtt->base.dev;
3183         struct drm_i915_private *dev_priv = to_i915(dev);
3184         int ret;
3185
3186         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3187         if (!ret) {
3188                 DRM_ERROR("failed to set up gmch\n");
3189                 return -EIO;
3190         }
3191
3192         intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size,
3193                       &ggtt->mappable_base, &ggtt->mappable_end);
3194
3195         ggtt->do_idle_maps = needs_idle_maps(&dev_priv->drm);
3196         ggtt->base.insert_page = i915_ggtt_insert_page;
3197         ggtt->base.insert_entries = i915_ggtt_insert_entries;
3198         ggtt->base.clear_range = i915_ggtt_clear_range;
3199         ggtt->base.bind_vma = ggtt_bind_vma;
3200         ggtt->base.unbind_vma = ggtt_unbind_vma;
3201
3202         if (unlikely(ggtt->do_idle_maps))
3203                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3204
3205         return 0;
3206 }
3207
3208 static void i915_gmch_remove(struct i915_address_space *vm)
3209 {
3210         intel_gmch_remove();
3211 }
3212
3213 /**
3214  * i915_ggtt_init_hw - Initialize GGTT hardware
3215  * @dev: DRM device
3216  */
3217 int i915_ggtt_init_hw(struct drm_device *dev)
3218 {
3219         struct drm_i915_private *dev_priv = to_i915(dev);
3220         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3221         int ret;
3222
3223         if (INTEL_INFO(dev)->gen <= 5) {
3224                 ggtt->probe = i915_gmch_probe;
3225                 ggtt->base.cleanup = i915_gmch_remove;
3226         } else if (INTEL_INFO(dev)->gen < 8) {
3227                 ggtt->probe = gen6_gmch_probe;
3228                 ggtt->base.cleanup = gen6_gmch_remove;
3229
3230                 if (HAS_EDRAM(dev))
3231                         ggtt->base.pte_encode = iris_pte_encode;
3232                 else if (IS_HASWELL(dev))
3233                         ggtt->base.pte_encode = hsw_pte_encode;
3234                 else if (IS_VALLEYVIEW(dev))
3235                         ggtt->base.pte_encode = byt_pte_encode;
3236                 else if (INTEL_INFO(dev)->gen >= 7)
3237                         ggtt->base.pte_encode = ivb_pte_encode;
3238                 else
3239                         ggtt->base.pte_encode = snb_pte_encode;
3240         } else {
3241                 ggtt->probe = gen8_gmch_probe;
3242                 ggtt->base.cleanup = gen6_gmch_remove;
3243         }
3244
3245         ggtt->base.dev = dev;
3246         ggtt->base.is_ggtt = true;
3247
3248         ret = ggtt->probe(ggtt);
3249         if (ret)
3250                 return ret;
3251
3252         if ((ggtt->base.total - 1) >> 32) {
3253                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3254                           "of address space! Found %lldM!\n",
3255                           ggtt->base.total >> 20);
3256                 ggtt->base.total = 1ULL << 32;
3257                 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3258         }
3259
3260         /*
3261          * Initialise stolen early so that we may reserve preallocated
3262          * objects for the BIOS to KMS transition.
3263          */
3264         ret = i915_gem_init_stolen(dev);
3265         if (ret)
3266                 goto out_gtt_cleanup;
3267
3268         /* GMADR is the PCI mmio aperture into the global GTT. */
3269         DRM_INFO("Memory usable by graphics device = %lluM\n",
3270                  ggtt->base.total >> 20);
3271         DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
3272         DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20);
3273 #ifdef CONFIG_INTEL_IOMMU
3274         if (intel_iommu_gfx_mapped)
3275                 DRM_INFO("VT-d active for gfx access\n");
3276 #endif
3277
3278         return 0;
3279
3280 out_gtt_cleanup:
3281         ggtt->base.cleanup(&ggtt->base);
3282
3283         return ret;
3284 }
3285
3286 int i915_ggtt_enable_hw(struct drm_device *dev)
3287 {
3288         if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
3289                 return -EIO;
3290
3291         return 0;
3292 }
3293
3294 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3295 {
3296         struct drm_i915_private *dev_priv = to_i915(dev);
3297         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3298         struct drm_i915_gem_object *obj;
3299         struct i915_vma *vma;
3300
3301         i915_check_and_clear_faults(dev_priv);
3302
3303         /* First fill our portion of the GTT with scratch pages */
3304         ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
3305                                true);
3306
3307         /* Cache flush objects bound into GGTT and rebind them. */
3308         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3309                 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3310                         if (vma->vm != &ggtt->base)
3311                                 continue;
3312
3313                         WARN_ON(i915_vma_bind(vma, obj->cache_level,
3314                                               PIN_UPDATE));
3315                 }
3316
3317                 if (obj->pin_display)
3318                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3319         }
3320
3321         if (INTEL_INFO(dev)->gen >= 8) {
3322                 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3323                         chv_setup_private_ppat(dev_priv);
3324                 else
3325                         bdw_setup_private_ppat(dev_priv);
3326
3327                 return;
3328         }
3329
3330         if (USES_PPGTT(dev)) {
3331                 struct i915_address_space *vm;
3332
3333                 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3334                         /* TODO: Perhaps it shouldn't be gen6 specific */
3335
3336                         struct i915_hw_ppgtt *ppgtt;
3337
3338                         if (vm->is_ggtt)
3339                                 ppgtt = dev_priv->mm.aliasing_ppgtt;
3340                         else
3341                                 ppgtt = i915_vm_to_ppgtt(vm);
3342
3343                         gen6_write_page_range(dev_priv, &ppgtt->pd,
3344                                               0, ppgtt->base.total);
3345                 }
3346         }
3347
3348         i915_ggtt_flush(dev_priv);
3349 }
3350
3351 static struct i915_vma *
3352 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
3353                       struct i915_address_space *vm,
3354                       const struct i915_ggtt_view *ggtt_view)
3355 {
3356         struct i915_vma *vma;
3357
3358         if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3359                 return ERR_PTR(-EINVAL);
3360
3361         vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3362         if (vma == NULL)
3363                 return ERR_PTR(-ENOMEM);
3364
3365         INIT_LIST_HEAD(&vma->vm_link);
3366         INIT_LIST_HEAD(&vma->obj_link);
3367         INIT_LIST_HEAD(&vma->exec_list);
3368         vma->vm = vm;
3369         vma->obj = obj;
3370         vma->is_ggtt = i915_is_ggtt(vm);
3371
3372         if (i915_is_ggtt(vm))
3373                 vma->ggtt_view = *ggtt_view;
3374         else
3375                 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3376
3377         list_add_tail(&vma->obj_link, &obj->vma_list);
3378
3379         return vma;
3380 }
3381
3382 struct i915_vma *
3383 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3384                                   struct i915_address_space *vm)
3385 {
3386         struct i915_vma *vma;
3387
3388         vma = i915_gem_obj_to_vma(obj, vm);
3389         if (!vma)
3390                 vma = __i915_gem_vma_create(obj, vm,
3391                                             i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3392
3393         return vma;
3394 }
3395
3396 struct i915_vma *
3397 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3398                                        const struct i915_ggtt_view *view)
3399 {
3400         struct drm_device *dev = obj->base.dev;
3401         struct drm_i915_private *dev_priv = to_i915(dev);
3402         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3403         struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
3404
3405         if (!vma)
3406                 vma = __i915_gem_vma_create(obj, &ggtt->base, view);
3407
3408         return vma;
3409
3410 }
3411
3412 static struct scatterlist *
3413 rotate_pages(const dma_addr_t *in, unsigned int offset,
3414              unsigned int width, unsigned int height,
3415              unsigned int stride,
3416              struct sg_table *st, struct scatterlist *sg)
3417 {
3418         unsigned int column, row;
3419         unsigned int src_idx;
3420
3421         for (column = 0; column < width; column++) {
3422                 src_idx = stride * (height - 1) + column;
3423                 for (row = 0; row < height; row++) {
3424                         st->nents++;
3425                         /* We don't need the pages, but need to initialize
3426                          * the entries so the sg list can be happily traversed.
3427                          * The only thing we need are DMA addresses.
3428                          */
3429                         sg_set_page(sg, NULL, PAGE_SIZE, 0);
3430                         sg_dma_address(sg) = in[offset + src_idx];
3431                         sg_dma_len(sg) = PAGE_SIZE;
3432                         sg = sg_next(sg);
3433                         src_idx -= stride;
3434                 }
3435         }
3436
3437         return sg;
3438 }
3439
3440 static struct sg_table *
3441 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
3442                           struct drm_i915_gem_object *obj)
3443 {
3444         const size_t n_pages = obj->base.size / PAGE_SIZE;
3445         unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
3446         unsigned int size_pages_uv;
3447         struct sgt_iter sgt_iter;
3448         dma_addr_t dma_addr;
3449         unsigned long i;
3450         dma_addr_t *page_addr_list;
3451         struct sg_table *st;
3452         unsigned int uv_start_page;
3453         struct scatterlist *sg;
3454         int ret = -ENOMEM;
3455
3456         /* Allocate a temporary list of source pages for random access. */
3457         page_addr_list = drm_malloc_gfp(n_pages,
3458                                         sizeof(dma_addr_t),
3459                                         GFP_TEMPORARY);
3460         if (!page_addr_list)
3461                 return ERR_PTR(ret);
3462
3463         /* Account for UV plane with NV12. */
3464         if (rot_info->pixel_format == DRM_FORMAT_NV12)
3465                 size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height;
3466         else
3467                 size_pages_uv = 0;
3468
3469         /* Allocate target SG list. */
3470         st = kmalloc(sizeof(*st), GFP_KERNEL);
3471         if (!st)
3472                 goto err_st_alloc;
3473
3474         ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3475         if (ret)
3476                 goto err_sg_alloc;
3477
3478         /* Populate source page list from the object. */
3479         i = 0;
3480         for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
3481                 page_addr_list[i++] = dma_addr;
3482
3483         GEM_BUG_ON(i != n_pages);
3484         st->nents = 0;
3485         sg = st->sgl;
3486
3487         /* Rotate the pages. */
3488         sg = rotate_pages(page_addr_list, 0,
3489                           rot_info->plane[0].width, rot_info->plane[0].height,
3490                           rot_info->plane[0].width,
3491                           st, sg);
3492
3493         /* Append the UV plane if NV12. */
3494         if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3495                 uv_start_page = size_pages;
3496
3497                 /* Check for tile-row un-alignment. */
3498                 if (offset_in_page(rot_info->uv_offset))
3499                         uv_start_page--;
3500
3501                 rot_info->uv_start_page = uv_start_page;
3502
3503                 sg = rotate_pages(page_addr_list, rot_info->uv_start_page,
3504                                   rot_info->plane[1].width, rot_info->plane[1].height,
3505                                   rot_info->plane[1].width,
3506                                   st, sg);
3507         }
3508
3509         DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n",
3510                       obj->base.size, rot_info->plane[0].width,
3511                       rot_info->plane[0].height, size_pages + size_pages_uv,
3512                       size_pages);
3513
3514         drm_free_large(page_addr_list);
3515
3516         return st;
3517
3518 err_sg_alloc:
3519         kfree(st);
3520 err_st_alloc:
3521         drm_free_large(page_addr_list);
3522
3523         DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n",
3524                       obj->base.size, ret, rot_info->plane[0].width,
3525                       rot_info->plane[0].height, size_pages + size_pages_uv,
3526                       size_pages);
3527         return ERR_PTR(ret);
3528 }
3529
3530 static struct sg_table *
3531 intel_partial_pages(const struct i915_ggtt_view *view,
3532                     struct drm_i915_gem_object *obj)
3533 {
3534         struct sg_table *st;
3535         struct scatterlist *sg;
3536         struct sg_page_iter obj_sg_iter;
3537         int ret = -ENOMEM;
3538
3539         st = kmalloc(sizeof(*st), GFP_KERNEL);
3540         if (!st)
3541                 goto err_st_alloc;
3542
3543         ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3544         if (ret)
3545                 goto err_sg_alloc;
3546
3547         sg = st->sgl;
3548         st->nents = 0;
3549         for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3550                 view->params.partial.offset)
3551         {
3552                 if (st->nents >= view->params.partial.size)
3553                         break;
3554
3555                 sg_set_page(sg, NULL, PAGE_SIZE, 0);
3556                 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3557                 sg_dma_len(sg) = PAGE_SIZE;
3558
3559                 sg = sg_next(sg);
3560                 st->nents++;
3561         }
3562
3563         return st;
3564
3565 err_sg_alloc:
3566         kfree(st);
3567 err_st_alloc:
3568         return ERR_PTR(ret);
3569 }
3570
3571 static int
3572 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3573 {
3574         int ret = 0;
3575
3576         if (vma->ggtt_view.pages)
3577                 return 0;
3578
3579         if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3580                 vma->ggtt_view.pages = vma->obj->pages;
3581         else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3582                 vma->ggtt_view.pages =
3583                         intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
3584         else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3585                 vma->ggtt_view.pages =
3586                         intel_partial_pages(&vma->ggtt_view, vma->obj);
3587         else
3588                 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3589                           vma->ggtt_view.type);
3590
3591         if (!vma->ggtt_view.pages) {
3592                 DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3593                           vma->ggtt_view.type);
3594                 ret = -EINVAL;
3595         } else if (IS_ERR(vma->ggtt_view.pages)) {
3596                 ret = PTR_ERR(vma->ggtt_view.pages);
3597                 vma->ggtt_view.pages = NULL;
3598                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3599                           vma->ggtt_view.type, ret);
3600         }
3601
3602         return ret;
3603 }
3604
3605 /**
3606  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3607  * @vma: VMA to map
3608  * @cache_level: mapping cache level
3609  * @flags: flags like global or local mapping
3610  *
3611  * DMA addresses are taken from the scatter-gather table of this object (or of
3612  * this VMA in case of non-default GGTT views) and PTE entries set up.
3613  * Note that DMA addresses are also the only part of the SG table we care about.
3614  */
3615 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3616                   u32 flags)
3617 {
3618         int ret;
3619         u32 bind_flags;
3620
3621         if (WARN_ON(flags == 0))
3622                 return -EINVAL;
3623
3624         bind_flags = 0;
3625         if (flags & PIN_GLOBAL)
3626                 bind_flags |= GLOBAL_BIND;
3627         if (flags & PIN_USER)
3628                 bind_flags |= LOCAL_BIND;
3629
3630         if (flags & PIN_UPDATE)
3631                 bind_flags |= vma->bound;
3632         else
3633                 bind_flags &= ~vma->bound;
3634
3635         if (bind_flags == 0)
3636                 return 0;
3637
3638         if (vma->bound == 0 && vma->vm->allocate_va_range) {
3639                 /* XXX: i915_vma_pin() will fix this +- hack */
3640                 vma->pin_count++;
3641                 trace_i915_va_alloc(vma);
3642                 ret = vma->vm->allocate_va_range(vma->vm,
3643                                                  vma->node.start,
3644                                                  vma->node.size);
3645                 vma->pin_count--;
3646                 if (ret)
3647                         return ret;
3648         }
3649
3650         ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3651         if (ret)
3652                 return ret;
3653
3654         vma->bound |= bind_flags;
3655
3656         return 0;
3657 }
3658
3659 /**
3660  * i915_ggtt_view_size - Get the size of a GGTT view.
3661  * @obj: Object the view is of.
3662  * @view: The view in question.
3663  *
3664  * @return The size of the GGTT view in bytes.
3665  */
3666 size_t
3667 i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3668                     const struct i915_ggtt_view *view)
3669 {
3670         if (view->type == I915_GGTT_VIEW_NORMAL) {
3671                 return obj->base.size;
3672         } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3673                 return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT;
3674         } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3675                 return view->params.partial.size << PAGE_SHIFT;
3676         } else {
3677                 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3678                 return obj->base.size;
3679         }
3680 }
3681
3682 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3683 {
3684         void __iomem *ptr;
3685
3686         lockdep_assert_held(&vma->vm->dev->struct_mutex);
3687         if (WARN_ON(!vma->obj->map_and_fenceable))
3688                 return ERR_PTR(-ENODEV);
3689
3690         GEM_BUG_ON(!vma->is_ggtt);
3691         GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0);
3692
3693         ptr = vma->iomap;
3694         if (ptr == NULL) {
3695                 ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable,
3696                                         vma->node.start,
3697                                         vma->node.size);
3698                 if (ptr == NULL)
3699                         return ERR_PTR(-ENOMEM);
3700
3701                 vma->iomap = ptr;
3702         }
3703
3704         vma->pin_count++;
3705         return ptr;
3706 }