KVM: drm/i915/gvt: Drop @vcpu from KVM's ->track_write() hook
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  *    Eddie Dong <eddie.dong@intel.com>
30  *
31  * Contributors:
32  *    Niu Bing <bing.niu@intel.com>
33  *    Zhi Wang <zhi.a.wang@intel.com>
34  */
35
36 #include <linux/init.h>
37 #include <linux/mm.h>
38 #include <linux/kthread.h>
39 #include <linux/sched/mm.h>
40 #include <linux/types.h>
41 #include <linux/list.h>
42 #include <linux/rbtree.h>
43 #include <linux/spinlock.h>
44 #include <linux/eventfd.h>
45 #include <linux/mdev.h>
46 #include <linux/debugfs.h>
47
48 #include <linux/nospec.h>
49
50 #include <drm/drm_edid.h>
51
52 #include "i915_drv.h"
53 #include "intel_gvt.h"
54 #include "gvt.h"
55
56 MODULE_IMPORT_NS(DMA_BUF);
57 MODULE_IMPORT_NS(I915_GVT);
58
59 /* helper macros copied from vfio-pci */
60 #define VFIO_PCI_OFFSET_SHIFT   40
61 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
62 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
63 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
64
65 #define EDID_BLOB_OFFSET (PAGE_SIZE/2)
66
67 #define OPREGION_SIGNATURE "IntelGraphicsMem"
68
69 struct vfio_region;
70 struct intel_vgpu_regops {
71         size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
72                         size_t count, loff_t *ppos, bool iswrite);
73         void (*release)(struct intel_vgpu *vgpu,
74                         struct vfio_region *region);
75 };
76
77 struct vfio_region {
78         u32                             type;
79         u32                             subtype;
80         size_t                          size;
81         u32                             flags;
82         const struct intel_vgpu_regops  *ops;
83         void                            *data;
84 };
85
86 struct vfio_edid_region {
87         struct vfio_region_gfx_edid vfio_edid_regs;
88         void *edid_blob;
89 };
90
91 struct kvmgt_pgfn {
92         gfn_t gfn;
93         struct hlist_node hnode;
94 };
95
96 struct gvt_dma {
97         struct intel_vgpu *vgpu;
98         struct rb_node gfn_node;
99         struct rb_node dma_addr_node;
100         gfn_t gfn;
101         dma_addr_t dma_addr;
102         unsigned long size;
103         struct kref ref;
104 };
105
106 #define vfio_dev_to_vgpu(vfio_dev) \
107         container_of((vfio_dev), struct intel_vgpu, vfio_device)
108
109 static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
110                                    struct kvm_page_track_notifier_node *node);
111 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
112                 struct kvm_memory_slot *slot,
113                 struct kvm_page_track_notifier_node *node);
114
115 static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
116 {
117         struct intel_vgpu_type *type =
118                 container_of(mtype, struct intel_vgpu_type, type);
119
120         return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
121                        "fence: %d\nresolution: %s\n"
122                        "weight: %d\n",
123                        BYTES_TO_MB(type->conf->low_mm),
124                        BYTES_TO_MB(type->conf->high_mm),
125                        type->conf->fence, vgpu_edid_str(type->conf->edid),
126                        type->conf->weight);
127 }
128
129 static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
130                 unsigned long size)
131 {
132         vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT,
133                          DIV_ROUND_UP(size, PAGE_SIZE));
134 }
135
136 /* Pin a normal or compound guest page for dma. */
137 static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
138                 unsigned long size, struct page **page)
139 {
140         int total_pages = DIV_ROUND_UP(size, PAGE_SIZE);
141         struct page *base_page = NULL;
142         int npage;
143         int ret;
144
145         /*
146          * We pin the pages one-by-one to avoid allocating a big arrary
147          * on stack to hold pfns.
148          */
149         for (npage = 0; npage < total_pages; npage++) {
150                 dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT;
151                 struct page *cur_page;
152
153                 ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1,
154                                      IOMMU_READ | IOMMU_WRITE, &cur_page);
155                 if (ret != 1) {
156                         gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n",
157                                      &cur_iova, ret);
158                         goto err;
159                 }
160
161                 if (npage == 0)
162                         base_page = cur_page;
163                 else if (page_to_pfn(base_page) + npage != page_to_pfn(cur_page)) {
164                         ret = -EINVAL;
165                         npage++;
166                         goto err;
167                 }
168         }
169
170         *page = base_page;
171         return 0;
172 err:
173         if (npage)
174                 gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
175         return ret;
176 }
177
178 static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
179                 dma_addr_t *dma_addr, unsigned long size)
180 {
181         struct device *dev = vgpu->gvt->gt->i915->drm.dev;
182         struct page *page = NULL;
183         int ret;
184
185         ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
186         if (ret)
187                 return ret;
188
189         /* Setup DMA mapping. */
190         *dma_addr = dma_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL);
191         if (dma_mapping_error(dev, *dma_addr)) {
192                 gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
193                              page_to_pfn(page), ret);
194                 gvt_unpin_guest_page(vgpu, gfn, size);
195                 return -ENOMEM;
196         }
197
198         return 0;
199 }
200
201 static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
202                 dma_addr_t dma_addr, unsigned long size)
203 {
204         struct device *dev = vgpu->gvt->gt->i915->drm.dev;
205
206         dma_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL);
207         gvt_unpin_guest_page(vgpu, gfn, size);
208 }
209
210 static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
211                 dma_addr_t dma_addr)
212 {
213         struct rb_node *node = vgpu->dma_addr_cache.rb_node;
214         struct gvt_dma *itr;
215
216         while (node) {
217                 itr = rb_entry(node, struct gvt_dma, dma_addr_node);
218
219                 if (dma_addr < itr->dma_addr)
220                         node = node->rb_left;
221                 else if (dma_addr > itr->dma_addr)
222                         node = node->rb_right;
223                 else
224                         return itr;
225         }
226         return NULL;
227 }
228
229 static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
230 {
231         struct rb_node *node = vgpu->gfn_cache.rb_node;
232         struct gvt_dma *itr;
233
234         while (node) {
235                 itr = rb_entry(node, struct gvt_dma, gfn_node);
236
237                 if (gfn < itr->gfn)
238                         node = node->rb_left;
239                 else if (gfn > itr->gfn)
240                         node = node->rb_right;
241                 else
242                         return itr;
243         }
244         return NULL;
245 }
246
247 static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
248                 dma_addr_t dma_addr, unsigned long size)
249 {
250         struct gvt_dma *new, *itr;
251         struct rb_node **link, *parent = NULL;
252
253         new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
254         if (!new)
255                 return -ENOMEM;
256
257         new->vgpu = vgpu;
258         new->gfn = gfn;
259         new->dma_addr = dma_addr;
260         new->size = size;
261         kref_init(&new->ref);
262
263         /* gfn_cache maps gfn to struct gvt_dma. */
264         link = &vgpu->gfn_cache.rb_node;
265         while (*link) {
266                 parent = *link;
267                 itr = rb_entry(parent, struct gvt_dma, gfn_node);
268
269                 if (gfn < itr->gfn)
270                         link = &parent->rb_left;
271                 else
272                         link = &parent->rb_right;
273         }
274         rb_link_node(&new->gfn_node, parent, link);
275         rb_insert_color(&new->gfn_node, &vgpu->gfn_cache);
276
277         /* dma_addr_cache maps dma addr to struct gvt_dma. */
278         parent = NULL;
279         link = &vgpu->dma_addr_cache.rb_node;
280         while (*link) {
281                 parent = *link;
282                 itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
283
284                 if (dma_addr < itr->dma_addr)
285                         link = &parent->rb_left;
286                 else
287                         link = &parent->rb_right;
288         }
289         rb_link_node(&new->dma_addr_node, parent, link);
290         rb_insert_color(&new->dma_addr_node, &vgpu->dma_addr_cache);
291
292         vgpu->nr_cache_entries++;
293         return 0;
294 }
295
296 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
297                                 struct gvt_dma *entry)
298 {
299         rb_erase(&entry->gfn_node, &vgpu->gfn_cache);
300         rb_erase(&entry->dma_addr_node, &vgpu->dma_addr_cache);
301         kfree(entry);
302         vgpu->nr_cache_entries--;
303 }
304
305 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
306 {
307         struct gvt_dma *dma;
308         struct rb_node *node = NULL;
309
310         for (;;) {
311                 mutex_lock(&vgpu->cache_lock);
312                 node = rb_first(&vgpu->gfn_cache);
313                 if (!node) {
314                         mutex_unlock(&vgpu->cache_lock);
315                         break;
316                 }
317                 dma = rb_entry(node, struct gvt_dma, gfn_node);
318                 gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
319                 __gvt_cache_remove_entry(vgpu, dma);
320                 mutex_unlock(&vgpu->cache_lock);
321         }
322 }
323
324 static void gvt_cache_init(struct intel_vgpu *vgpu)
325 {
326         vgpu->gfn_cache = RB_ROOT;
327         vgpu->dma_addr_cache = RB_ROOT;
328         vgpu->nr_cache_entries = 0;
329         mutex_init(&vgpu->cache_lock);
330 }
331
332 static void kvmgt_protect_table_init(struct intel_vgpu *info)
333 {
334         hash_init(info->ptable);
335 }
336
337 static void kvmgt_protect_table_destroy(struct intel_vgpu *info)
338 {
339         struct kvmgt_pgfn *p;
340         struct hlist_node *tmp;
341         int i;
342
343         hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
344                 hash_del(&p->hnode);
345                 kfree(p);
346         }
347 }
348
349 static struct kvmgt_pgfn *
350 __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn)
351 {
352         struct kvmgt_pgfn *p, *res = NULL;
353
354         lockdep_assert_held(&info->vgpu_lock);
355
356         hash_for_each_possible(info->ptable, p, hnode, gfn) {
357                 if (gfn == p->gfn) {
358                         res = p;
359                         break;
360                 }
361         }
362
363         return res;
364 }
365
366 static bool kvmgt_gfn_is_write_protected(struct intel_vgpu *info, gfn_t gfn)
367 {
368         struct kvmgt_pgfn *p;
369
370         p = __kvmgt_protect_table_find(info, gfn);
371         return !!p;
372 }
373
374 static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn)
375 {
376         struct kvmgt_pgfn *p;
377
378         if (kvmgt_gfn_is_write_protected(info, gfn))
379                 return;
380
381         p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
382         if (WARN(!p, "gfn: 0x%llx\n", gfn))
383                 return;
384
385         p->gfn = gfn;
386         hash_add(info->ptable, &p->hnode, gfn);
387 }
388
389 static void kvmgt_protect_table_del(struct intel_vgpu *info, gfn_t gfn)
390 {
391         struct kvmgt_pgfn *p;
392
393         p = __kvmgt_protect_table_find(info, gfn);
394         if (p) {
395                 hash_del(&p->hnode);
396                 kfree(p);
397         }
398 }
399
400 static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
401                 size_t count, loff_t *ppos, bool iswrite)
402 {
403         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
404                         VFIO_PCI_NUM_REGIONS;
405         void *base = vgpu->region[i].data;
406         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
407
408
409         if (pos >= vgpu->region[i].size || iswrite) {
410                 gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
411                 return -EINVAL;
412         }
413         count = min(count, (size_t)(vgpu->region[i].size - pos));
414         memcpy(buf, base + pos, count);
415
416         return count;
417 }
418
419 static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
420                 struct vfio_region *region)
421 {
422 }
423
424 static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
425         .rw = intel_vgpu_reg_rw_opregion,
426         .release = intel_vgpu_reg_release_opregion,
427 };
428
429 static int handle_edid_regs(struct intel_vgpu *vgpu,
430                         struct vfio_edid_region *region, char *buf,
431                         size_t count, u16 offset, bool is_write)
432 {
433         struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
434         unsigned int data;
435
436         if (offset + count > sizeof(*regs))
437                 return -EINVAL;
438
439         if (count != 4)
440                 return -EINVAL;
441
442         if (is_write) {
443                 data = *((unsigned int *)buf);
444                 switch (offset) {
445                 case offsetof(struct vfio_region_gfx_edid, link_state):
446                         if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
447                                 if (!drm_edid_block_valid(
448                                         (u8 *)region->edid_blob,
449                                         0,
450                                         true,
451                                         NULL)) {
452                                         gvt_vgpu_err("invalid EDID blob\n");
453                                         return -EINVAL;
454                                 }
455                                 intel_vgpu_emulate_hotplug(vgpu, true);
456                         } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
457                                 intel_vgpu_emulate_hotplug(vgpu, false);
458                         else {
459                                 gvt_vgpu_err("invalid EDID link state %d\n",
460                                         regs->link_state);
461                                 return -EINVAL;
462                         }
463                         regs->link_state = data;
464                         break;
465                 case offsetof(struct vfio_region_gfx_edid, edid_size):
466                         if (data > regs->edid_max_size) {
467                                 gvt_vgpu_err("EDID size is bigger than %d!\n",
468                                         regs->edid_max_size);
469                                 return -EINVAL;
470                         }
471                         regs->edid_size = data;
472                         break;
473                 default:
474                         /* read-only regs */
475                         gvt_vgpu_err("write read-only EDID region at offset %d\n",
476                                 offset);
477                         return -EPERM;
478                 }
479         } else {
480                 memcpy(buf, (char *)regs + offset, count);
481         }
482
483         return count;
484 }
485
486 static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
487                         size_t count, u16 offset, bool is_write)
488 {
489         if (offset + count > region->vfio_edid_regs.edid_size)
490                 return -EINVAL;
491
492         if (is_write)
493                 memcpy(region->edid_blob + offset, buf, count);
494         else
495                 memcpy(buf, region->edid_blob + offset, count);
496
497         return count;
498 }
499
500 static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
501                 size_t count, loff_t *ppos, bool iswrite)
502 {
503         int ret;
504         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
505                         VFIO_PCI_NUM_REGIONS;
506         struct vfio_edid_region *region = vgpu->region[i].data;
507         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
508
509         if (pos < region->vfio_edid_regs.edid_offset) {
510                 ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
511         } else {
512                 pos -= EDID_BLOB_OFFSET;
513                 ret = handle_edid_blob(region, buf, count, pos, iswrite);
514         }
515
516         if (ret < 0)
517                 gvt_vgpu_err("failed to access EDID region\n");
518
519         return ret;
520 }
521
522 static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
523                                         struct vfio_region *region)
524 {
525         kfree(region->data);
526 }
527
528 static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
529         .rw = intel_vgpu_reg_rw_edid,
530         .release = intel_vgpu_reg_release_edid,
531 };
532
533 static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
534                 unsigned int type, unsigned int subtype,
535                 const struct intel_vgpu_regops *ops,
536                 size_t size, u32 flags, void *data)
537 {
538         struct vfio_region *region;
539
540         region = krealloc(vgpu->region,
541                         (vgpu->num_regions + 1) * sizeof(*region),
542                         GFP_KERNEL);
543         if (!region)
544                 return -ENOMEM;
545
546         vgpu->region = region;
547         vgpu->region[vgpu->num_regions].type = type;
548         vgpu->region[vgpu->num_regions].subtype = subtype;
549         vgpu->region[vgpu->num_regions].ops = ops;
550         vgpu->region[vgpu->num_regions].size = size;
551         vgpu->region[vgpu->num_regions].flags = flags;
552         vgpu->region[vgpu->num_regions].data = data;
553         vgpu->num_regions++;
554         return 0;
555 }
556
557 int intel_gvt_set_opregion(struct intel_vgpu *vgpu)
558 {
559         void *base;
560         int ret;
561
562         /* Each vgpu has its own opregion, although VFIO would create another
563          * one later. This one is used to expose opregion to VFIO. And the
564          * other one created by VFIO later, is used by guest actually.
565          */
566         base = vgpu_opregion(vgpu)->va;
567         if (!base)
568                 return -ENOMEM;
569
570         if (memcmp(base, OPREGION_SIGNATURE, 16)) {
571                 memunmap(base);
572                 return -EINVAL;
573         }
574
575         ret = intel_vgpu_register_reg(vgpu,
576                         PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
577                         VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
578                         &intel_vgpu_regops_opregion, OPREGION_SIZE,
579                         VFIO_REGION_INFO_FLAG_READ, base);
580
581         return ret;
582 }
583
584 int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
585 {
586         struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
587         struct vfio_edid_region *base;
588         int ret;
589
590         base = kzalloc(sizeof(*base), GFP_KERNEL);
591         if (!base)
592                 return -ENOMEM;
593
594         /* TODO: Add multi-port and EDID extension block support */
595         base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
596         base->vfio_edid_regs.edid_max_size = EDID_SIZE;
597         base->vfio_edid_regs.edid_size = EDID_SIZE;
598         base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
599         base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
600         base->edid_blob = port->edid->edid_block;
601
602         ret = intel_vgpu_register_reg(vgpu,
603                         VFIO_REGION_TYPE_GFX,
604                         VFIO_REGION_SUBTYPE_GFX_EDID,
605                         &intel_vgpu_regops_edid, EDID_SIZE,
606                         VFIO_REGION_INFO_FLAG_READ |
607                         VFIO_REGION_INFO_FLAG_WRITE |
608                         VFIO_REGION_INFO_FLAG_CAPS, base);
609
610         return ret;
611 }
612
613 static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
614                                  u64 length)
615 {
616         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
617         struct gvt_dma *entry;
618         u64 iov_pfn = iova >> PAGE_SHIFT;
619         u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
620
621         mutex_lock(&vgpu->cache_lock);
622         for (; iov_pfn < end_iov_pfn; iov_pfn++) {
623                 entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
624                 if (!entry)
625                         continue;
626
627                 gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
628                                    entry->size);
629                 __gvt_cache_remove_entry(vgpu, entry);
630         }
631         mutex_unlock(&vgpu->cache_lock);
632 }
633
634 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
635 {
636         struct intel_vgpu *itr;
637         int id;
638         bool ret = false;
639
640         mutex_lock(&vgpu->gvt->lock);
641         for_each_active_vgpu(vgpu->gvt, itr, id) {
642                 if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, itr->status))
643                         continue;
644
645                 if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
646                         ret = true;
647                         goto out;
648                 }
649         }
650 out:
651         mutex_unlock(&vgpu->gvt->lock);
652         return ret;
653 }
654
655 static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
656 {
657         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
658
659         if (!vgpu->vfio_device.kvm ||
660             vgpu->vfio_device.kvm->mm != current->mm) {
661                 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
662                 return -ESRCH;
663         }
664
665         if (__kvmgt_vgpu_exist(vgpu))
666                 return -EEXIST;
667
668         vgpu->track_node.track_write = kvmgt_page_track_write;
669         vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
670         kvm_get_kvm(vgpu->vfio_device.kvm);
671         kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
672                                          &vgpu->track_node);
673
674         set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
675
676         debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
677                              &vgpu->nr_cache_entries);
678
679         intel_gvt_activate_vgpu(vgpu);
680
681         return 0;
682 }
683
684 static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
685 {
686         struct eventfd_ctx *trigger;
687
688         trigger = vgpu->msi_trigger;
689         if (trigger) {
690                 eventfd_ctx_put(trigger);
691                 vgpu->msi_trigger = NULL;
692         }
693 }
694
695 static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
696 {
697         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
698
699         intel_gvt_release_vgpu(vgpu);
700
701         clear_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
702
703         debugfs_lookup_and_remove(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs);
704
705         kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
706                                            &vgpu->track_node);
707         kvm_put_kvm(vgpu->vfio_device.kvm);
708
709         kvmgt_protect_table_destroy(vgpu);
710         gvt_cache_destroy(vgpu);
711
712         WARN_ON(vgpu->nr_cache_entries);
713
714         vgpu->gfn_cache = RB_ROOT;
715         vgpu->dma_addr_cache = RB_ROOT;
716
717         intel_vgpu_release_msi_eventfd_ctx(vgpu);
718 }
719
720 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
721 {
722         u32 start_lo, start_hi;
723         u32 mem_type;
724
725         start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
726                         PCI_BASE_ADDRESS_MEM_MASK;
727         mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
728                         PCI_BASE_ADDRESS_MEM_TYPE_MASK;
729
730         switch (mem_type) {
731         case PCI_BASE_ADDRESS_MEM_TYPE_64:
732                 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
733                                                 + bar + 4));
734                 break;
735         case PCI_BASE_ADDRESS_MEM_TYPE_32:
736         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
737                 /* 1M mem BAR treated as 32-bit BAR */
738         default:
739                 /* mem unknown type treated as 32-bit BAR */
740                 start_hi = 0;
741                 break;
742         }
743
744         return ((u64)start_hi << 32) | start_lo;
745 }
746
747 static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off,
748                              void *buf, unsigned int count, bool is_write)
749 {
750         u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
751         int ret;
752
753         if (is_write)
754                 ret = intel_vgpu_emulate_mmio_write(vgpu,
755                                         bar_start + off, buf, count);
756         else
757                 ret = intel_vgpu_emulate_mmio_read(vgpu,
758                                         bar_start + off, buf, count);
759         return ret;
760 }
761
762 static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
763 {
764         return off >= vgpu_aperture_offset(vgpu) &&
765                off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
766 }
767
768 static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
769                 void *buf, unsigned long count, bool is_write)
770 {
771         void __iomem *aperture_va;
772
773         if (!intel_vgpu_in_aperture(vgpu, off) ||
774             !intel_vgpu_in_aperture(vgpu, off + count)) {
775                 gvt_vgpu_err("Invalid aperture offset %llu\n", off);
776                 return -EINVAL;
777         }
778
779         aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap,
780                                         ALIGN_DOWN(off, PAGE_SIZE),
781                                         count + offset_in_page(off));
782         if (!aperture_va)
783                 return -EIO;
784
785         if (is_write)
786                 memcpy_toio(aperture_va + offset_in_page(off), buf, count);
787         else
788                 memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
789
790         io_mapping_unmap(aperture_va);
791
792         return 0;
793 }
794
795 static ssize_t intel_vgpu_rw(struct intel_vgpu *vgpu, char *buf,
796                         size_t count, loff_t *ppos, bool is_write)
797 {
798         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
799         u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
800         int ret = -EINVAL;
801
802
803         if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) {
804                 gvt_vgpu_err("invalid index: %u\n", index);
805                 return -EINVAL;
806         }
807
808         switch (index) {
809         case VFIO_PCI_CONFIG_REGION_INDEX:
810                 if (is_write)
811                         ret = intel_vgpu_emulate_cfg_write(vgpu, pos,
812                                                 buf, count);
813                 else
814                         ret = intel_vgpu_emulate_cfg_read(vgpu, pos,
815                                                 buf, count);
816                 break;
817         case VFIO_PCI_BAR0_REGION_INDEX:
818                 ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
819                                         buf, count, is_write);
820                 break;
821         case VFIO_PCI_BAR2_REGION_INDEX:
822                 ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
823                 break;
824         case VFIO_PCI_BAR1_REGION_INDEX:
825         case VFIO_PCI_BAR3_REGION_INDEX:
826         case VFIO_PCI_BAR4_REGION_INDEX:
827         case VFIO_PCI_BAR5_REGION_INDEX:
828         case VFIO_PCI_VGA_REGION_INDEX:
829         case VFIO_PCI_ROM_REGION_INDEX:
830                 break;
831         default:
832                 if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions)
833                         return -EINVAL;
834
835                 index -= VFIO_PCI_NUM_REGIONS;
836                 return vgpu->region[index].ops->rw(vgpu, buf, count,
837                                 ppos, is_write);
838         }
839
840         return ret == 0 ? count : ret;
841 }
842
843 static bool gtt_entry(struct intel_vgpu *vgpu, loff_t *ppos)
844 {
845         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
846         struct intel_gvt *gvt = vgpu->gvt;
847         int offset;
848
849         /* Only allow MMIO GGTT entry access */
850         if (index != PCI_BASE_ADDRESS_0)
851                 return false;
852
853         offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
854                 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
855
856         return (offset >= gvt->device_info.gtt_start_offset &&
857                 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
858                         true : false;
859 }
860
861 static ssize_t intel_vgpu_read(struct vfio_device *vfio_dev, char __user *buf,
862                         size_t count, loff_t *ppos)
863 {
864         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
865         unsigned int done = 0;
866         int ret;
867
868         while (count) {
869                 size_t filled;
870
871                 /* Only support GGTT entry 8 bytes read */
872                 if (count >= 8 && !(*ppos % 8) &&
873                         gtt_entry(vgpu, ppos)) {
874                         u64 val;
875
876                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
877                                         ppos, false);
878                         if (ret <= 0)
879                                 goto read_err;
880
881                         if (copy_to_user(buf, &val, sizeof(val)))
882                                 goto read_err;
883
884                         filled = 8;
885                 } else if (count >= 4 && !(*ppos % 4)) {
886                         u32 val;
887
888                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
889                                         ppos, false);
890                         if (ret <= 0)
891                                 goto read_err;
892
893                         if (copy_to_user(buf, &val, sizeof(val)))
894                                 goto read_err;
895
896                         filled = 4;
897                 } else if (count >= 2 && !(*ppos % 2)) {
898                         u16 val;
899
900                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
901                                         ppos, false);
902                         if (ret <= 0)
903                                 goto read_err;
904
905                         if (copy_to_user(buf, &val, sizeof(val)))
906                                 goto read_err;
907
908                         filled = 2;
909                 } else {
910                         u8 val;
911
912                         ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos,
913                                         false);
914                         if (ret <= 0)
915                                 goto read_err;
916
917                         if (copy_to_user(buf, &val, sizeof(val)))
918                                 goto read_err;
919
920                         filled = 1;
921                 }
922
923                 count -= filled;
924                 done += filled;
925                 *ppos += filled;
926                 buf += filled;
927         }
928
929         return done;
930
931 read_err:
932         return -EFAULT;
933 }
934
935 static ssize_t intel_vgpu_write(struct vfio_device *vfio_dev,
936                                 const char __user *buf,
937                                 size_t count, loff_t *ppos)
938 {
939         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
940         unsigned int done = 0;
941         int ret;
942
943         while (count) {
944                 size_t filled;
945
946                 /* Only support GGTT entry 8 bytes write */
947                 if (count >= 8 && !(*ppos % 8) &&
948                         gtt_entry(vgpu, ppos)) {
949                         u64 val;
950
951                         if (copy_from_user(&val, buf, sizeof(val)))
952                                 goto write_err;
953
954                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
955                                         ppos, true);
956                         if (ret <= 0)
957                                 goto write_err;
958
959                         filled = 8;
960                 } else if (count >= 4 && !(*ppos % 4)) {
961                         u32 val;
962
963                         if (copy_from_user(&val, buf, sizeof(val)))
964                                 goto write_err;
965
966                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
967                                         ppos, true);
968                         if (ret <= 0)
969                                 goto write_err;
970
971                         filled = 4;
972                 } else if (count >= 2 && !(*ppos % 2)) {
973                         u16 val;
974
975                         if (copy_from_user(&val, buf, sizeof(val)))
976                                 goto write_err;
977
978                         ret = intel_vgpu_rw(vgpu, (char *)&val,
979                                         sizeof(val), ppos, true);
980                         if (ret <= 0)
981                                 goto write_err;
982
983                         filled = 2;
984                 } else {
985                         u8 val;
986
987                         if (copy_from_user(&val, buf, sizeof(val)))
988                                 goto write_err;
989
990                         ret = intel_vgpu_rw(vgpu, &val, sizeof(val),
991                                         ppos, true);
992                         if (ret <= 0)
993                                 goto write_err;
994
995                         filled = 1;
996                 }
997
998                 count -= filled;
999                 done += filled;
1000                 *ppos += filled;
1001                 buf += filled;
1002         }
1003
1004         return done;
1005 write_err:
1006         return -EFAULT;
1007 }
1008
1009 static int intel_vgpu_mmap(struct vfio_device *vfio_dev,
1010                 struct vm_area_struct *vma)
1011 {
1012         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1013         unsigned int index;
1014         u64 virtaddr;
1015         unsigned long req_size, pgoff, req_start;
1016         pgprot_t pg_prot;
1017
1018         index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
1019         if (index >= VFIO_PCI_ROM_REGION_INDEX)
1020                 return -EINVAL;
1021
1022         if (vma->vm_end < vma->vm_start)
1023                 return -EINVAL;
1024         if ((vma->vm_flags & VM_SHARED) == 0)
1025                 return -EINVAL;
1026         if (index != VFIO_PCI_BAR2_REGION_INDEX)
1027                 return -EINVAL;
1028
1029         pg_prot = vma->vm_page_prot;
1030         virtaddr = vma->vm_start;
1031         req_size = vma->vm_end - vma->vm_start;
1032         pgoff = vma->vm_pgoff &
1033                 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1034         req_start = pgoff << PAGE_SHIFT;
1035
1036         if (!intel_vgpu_in_aperture(vgpu, req_start))
1037                 return -EINVAL;
1038         if (req_start + req_size >
1039             vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
1040                 return -EINVAL;
1041
1042         pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
1043
1044         return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
1045 }
1046
1047 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
1048 {
1049         if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
1050                 return 1;
1051
1052         return 0;
1053 }
1054
1055 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
1056                         unsigned int index, unsigned int start,
1057                         unsigned int count, u32 flags,
1058                         void *data)
1059 {
1060         return 0;
1061 }
1062
1063 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
1064                         unsigned int index, unsigned int start,
1065                         unsigned int count, u32 flags, void *data)
1066 {
1067         return 0;
1068 }
1069
1070 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
1071                 unsigned int index, unsigned int start, unsigned int count,
1072                 u32 flags, void *data)
1073 {
1074         return 0;
1075 }
1076
1077 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
1078                 unsigned int index, unsigned int start, unsigned int count,
1079                 u32 flags, void *data)
1080 {
1081         struct eventfd_ctx *trigger;
1082
1083         if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
1084                 int fd = *(int *)data;
1085
1086                 trigger = eventfd_ctx_fdget(fd);
1087                 if (IS_ERR(trigger)) {
1088                         gvt_vgpu_err("eventfd_ctx_fdget failed\n");
1089                         return PTR_ERR(trigger);
1090                 }
1091                 vgpu->msi_trigger = trigger;
1092         } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
1093                 intel_vgpu_release_msi_eventfd_ctx(vgpu);
1094
1095         return 0;
1096 }
1097
1098 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags,
1099                 unsigned int index, unsigned int start, unsigned int count,
1100                 void *data)
1101 {
1102         int (*func)(struct intel_vgpu *vgpu, unsigned int index,
1103                         unsigned int start, unsigned int count, u32 flags,
1104                         void *data) = NULL;
1105
1106         switch (index) {
1107         case VFIO_PCI_INTX_IRQ_INDEX:
1108                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1109                 case VFIO_IRQ_SET_ACTION_MASK:
1110                         func = intel_vgpu_set_intx_mask;
1111                         break;
1112                 case VFIO_IRQ_SET_ACTION_UNMASK:
1113                         func = intel_vgpu_set_intx_unmask;
1114                         break;
1115                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1116                         func = intel_vgpu_set_intx_trigger;
1117                         break;
1118                 }
1119                 break;
1120         case VFIO_PCI_MSI_IRQ_INDEX:
1121                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1122                 case VFIO_IRQ_SET_ACTION_MASK:
1123                 case VFIO_IRQ_SET_ACTION_UNMASK:
1124                         /* XXX Need masking support exported */
1125                         break;
1126                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1127                         func = intel_vgpu_set_msi_trigger;
1128                         break;
1129                 }
1130                 break;
1131         }
1132
1133         if (!func)
1134                 return -ENOTTY;
1135
1136         return func(vgpu, index, start, count, flags, data);
1137 }
1138
1139 static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
1140                              unsigned long arg)
1141 {
1142         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1143         unsigned long minsz;
1144
1145         gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
1146
1147         if (cmd == VFIO_DEVICE_GET_INFO) {
1148                 struct vfio_device_info info;
1149
1150                 minsz = offsetofend(struct vfio_device_info, num_irqs);
1151
1152                 if (copy_from_user(&info, (void __user *)arg, minsz))
1153                         return -EFAULT;
1154
1155                 if (info.argsz < minsz)
1156                         return -EINVAL;
1157
1158                 info.flags = VFIO_DEVICE_FLAGS_PCI;
1159                 info.flags |= VFIO_DEVICE_FLAGS_RESET;
1160                 info.num_regions = VFIO_PCI_NUM_REGIONS +
1161                                 vgpu->num_regions;
1162                 info.num_irqs = VFIO_PCI_NUM_IRQS;
1163
1164                 return copy_to_user((void __user *)arg, &info, minsz) ?
1165                         -EFAULT : 0;
1166
1167         } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
1168                 struct vfio_region_info info;
1169                 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
1170                 unsigned int i;
1171                 int ret;
1172                 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
1173                 int nr_areas = 1;
1174                 int cap_type_id;
1175
1176                 minsz = offsetofend(struct vfio_region_info, offset);
1177
1178                 if (copy_from_user(&info, (void __user *)arg, minsz))
1179                         return -EFAULT;
1180
1181                 if (info.argsz < minsz)
1182                         return -EINVAL;
1183
1184                 switch (info.index) {
1185                 case VFIO_PCI_CONFIG_REGION_INDEX:
1186                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1187                         info.size = vgpu->gvt->device_info.cfg_space_size;
1188                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1189                                      VFIO_REGION_INFO_FLAG_WRITE;
1190                         break;
1191                 case VFIO_PCI_BAR0_REGION_INDEX:
1192                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1193                         info.size = vgpu->cfg_space.bar[info.index].size;
1194                         if (!info.size) {
1195                                 info.flags = 0;
1196                                 break;
1197                         }
1198
1199                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1200                                      VFIO_REGION_INFO_FLAG_WRITE;
1201                         break;
1202                 case VFIO_PCI_BAR1_REGION_INDEX:
1203                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1204                         info.size = 0;
1205                         info.flags = 0;
1206                         break;
1207                 case VFIO_PCI_BAR2_REGION_INDEX:
1208                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1209                         info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1210                                         VFIO_REGION_INFO_FLAG_MMAP |
1211                                         VFIO_REGION_INFO_FLAG_READ |
1212                                         VFIO_REGION_INFO_FLAG_WRITE;
1213                         info.size = gvt_aperture_sz(vgpu->gvt);
1214
1215                         sparse = kzalloc(struct_size(sparse, areas, nr_areas),
1216                                          GFP_KERNEL);
1217                         if (!sparse)
1218                                 return -ENOMEM;
1219
1220                         sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1221                         sparse->header.version = 1;
1222                         sparse->nr_areas = nr_areas;
1223                         cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1224                         sparse->areas[0].offset =
1225                                         PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1226                         sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1227                         break;
1228
1229                 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1230                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1231                         info.size = 0;
1232                         info.flags = 0;
1233
1234                         gvt_dbg_core("get region info bar:%d\n", info.index);
1235                         break;
1236
1237                 case VFIO_PCI_ROM_REGION_INDEX:
1238                 case VFIO_PCI_VGA_REGION_INDEX:
1239                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1240                         info.size = 0;
1241                         info.flags = 0;
1242
1243                         gvt_dbg_core("get region info index:%d\n", info.index);
1244                         break;
1245                 default:
1246                         {
1247                                 struct vfio_region_info_cap_type cap_type = {
1248                                         .header.id = VFIO_REGION_INFO_CAP_TYPE,
1249                                         .header.version = 1 };
1250
1251                                 if (info.index >= VFIO_PCI_NUM_REGIONS +
1252                                                 vgpu->num_regions)
1253                                         return -EINVAL;
1254                                 info.index =
1255                                         array_index_nospec(info.index,
1256                                                         VFIO_PCI_NUM_REGIONS +
1257                                                         vgpu->num_regions);
1258
1259                                 i = info.index - VFIO_PCI_NUM_REGIONS;
1260
1261                                 info.offset =
1262                                         VFIO_PCI_INDEX_TO_OFFSET(info.index);
1263                                 info.size = vgpu->region[i].size;
1264                                 info.flags = vgpu->region[i].flags;
1265
1266                                 cap_type.type = vgpu->region[i].type;
1267                                 cap_type.subtype = vgpu->region[i].subtype;
1268
1269                                 ret = vfio_info_add_capability(&caps,
1270                                                         &cap_type.header,
1271                                                         sizeof(cap_type));
1272                                 if (ret)
1273                                         return ret;
1274                         }
1275                 }
1276
1277                 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1278                         switch (cap_type_id) {
1279                         case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1280                                 ret = vfio_info_add_capability(&caps,
1281                                         &sparse->header,
1282                                         struct_size(sparse, areas,
1283                                                     sparse->nr_areas));
1284                                 if (ret) {
1285                                         kfree(sparse);
1286                                         return ret;
1287                                 }
1288                                 break;
1289                         default:
1290                                 kfree(sparse);
1291                                 return -EINVAL;
1292                         }
1293                 }
1294
1295                 if (caps.size) {
1296                         info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1297                         if (info.argsz < sizeof(info) + caps.size) {
1298                                 info.argsz = sizeof(info) + caps.size;
1299                                 info.cap_offset = 0;
1300                         } else {
1301                                 vfio_info_cap_shift(&caps, sizeof(info));
1302                                 if (copy_to_user((void __user *)arg +
1303                                                   sizeof(info), caps.buf,
1304                                                   caps.size)) {
1305                                         kfree(caps.buf);
1306                                         kfree(sparse);
1307                                         return -EFAULT;
1308                                 }
1309                                 info.cap_offset = sizeof(info);
1310                         }
1311
1312                         kfree(caps.buf);
1313                 }
1314
1315                 kfree(sparse);
1316                 return copy_to_user((void __user *)arg, &info, minsz) ?
1317                         -EFAULT : 0;
1318         } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1319                 struct vfio_irq_info info;
1320
1321                 minsz = offsetofend(struct vfio_irq_info, count);
1322
1323                 if (copy_from_user(&info, (void __user *)arg, minsz))
1324                         return -EFAULT;
1325
1326                 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1327                         return -EINVAL;
1328
1329                 switch (info.index) {
1330                 case VFIO_PCI_INTX_IRQ_INDEX:
1331                 case VFIO_PCI_MSI_IRQ_INDEX:
1332                         break;
1333                 default:
1334                         return -EINVAL;
1335                 }
1336
1337                 info.flags = VFIO_IRQ_INFO_EVENTFD;
1338
1339                 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1340
1341                 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1342                         info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1343                                        VFIO_IRQ_INFO_AUTOMASKED);
1344                 else
1345                         info.flags |= VFIO_IRQ_INFO_NORESIZE;
1346
1347                 return copy_to_user((void __user *)arg, &info, minsz) ?
1348                         -EFAULT : 0;
1349         } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1350                 struct vfio_irq_set hdr;
1351                 u8 *data = NULL;
1352                 int ret = 0;
1353                 size_t data_size = 0;
1354
1355                 minsz = offsetofend(struct vfio_irq_set, count);
1356
1357                 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1358                         return -EFAULT;
1359
1360                 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1361                         int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1362
1363                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1364                                                 VFIO_PCI_NUM_IRQS, &data_size);
1365                         if (ret) {
1366                                 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1367                                 return -EINVAL;
1368                         }
1369                         if (data_size) {
1370                                 data = memdup_user((void __user *)(arg + minsz),
1371                                                    data_size);
1372                                 if (IS_ERR(data))
1373                                         return PTR_ERR(data);
1374                         }
1375                 }
1376
1377                 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1378                                         hdr.start, hdr.count, data);
1379                 kfree(data);
1380
1381                 return ret;
1382         } else if (cmd == VFIO_DEVICE_RESET) {
1383                 intel_gvt_reset_vgpu(vgpu);
1384                 return 0;
1385         } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
1386                 struct vfio_device_gfx_plane_info dmabuf;
1387                 int ret = 0;
1388
1389                 minsz = offsetofend(struct vfio_device_gfx_plane_info,
1390                                     dmabuf_id);
1391                 if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
1392                         return -EFAULT;
1393                 if (dmabuf.argsz < minsz)
1394                         return -EINVAL;
1395
1396                 ret = intel_vgpu_query_plane(vgpu, &dmabuf);
1397                 if (ret != 0)
1398                         return ret;
1399
1400                 return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
1401                                                                 -EFAULT : 0;
1402         } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
1403                 __u32 dmabuf_id;
1404
1405                 if (get_user(dmabuf_id, (__u32 __user *)arg))
1406                         return -EFAULT;
1407                 return intel_vgpu_get_dmabuf(vgpu, dmabuf_id);
1408         }
1409
1410         return -ENOTTY;
1411 }
1412
1413 static ssize_t
1414 vgpu_id_show(struct device *dev, struct device_attribute *attr,
1415              char *buf)
1416 {
1417         struct intel_vgpu *vgpu = dev_get_drvdata(dev);
1418
1419         return sprintf(buf, "%d\n", vgpu->id);
1420 }
1421
1422 static DEVICE_ATTR_RO(vgpu_id);
1423
1424 static struct attribute *intel_vgpu_attrs[] = {
1425         &dev_attr_vgpu_id.attr,
1426         NULL
1427 };
1428
1429 static const struct attribute_group intel_vgpu_group = {
1430         .name = "intel_vgpu",
1431         .attrs = intel_vgpu_attrs,
1432 };
1433
1434 static const struct attribute_group *intel_vgpu_groups[] = {
1435         &intel_vgpu_group,
1436         NULL,
1437 };
1438
1439 static int intel_vgpu_init_dev(struct vfio_device *vfio_dev)
1440 {
1441         struct mdev_device *mdev = to_mdev_device(vfio_dev->dev);
1442         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1443         struct intel_vgpu_type *type =
1444                 container_of(mdev->type, struct intel_vgpu_type, type);
1445         int ret;
1446
1447         vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
1448         ret = intel_gvt_create_vgpu(vgpu, type->conf);
1449         if (ret)
1450                 return ret;
1451
1452         kvmgt_protect_table_init(vgpu);
1453         gvt_cache_init(vgpu);
1454
1455         return 0;
1456 }
1457
1458 static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
1459 {
1460         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1461
1462         intel_gvt_destroy_vgpu(vgpu);
1463 }
1464
1465 static const struct vfio_device_ops intel_vgpu_dev_ops = {
1466         .init           = intel_vgpu_init_dev,
1467         .release        = intel_vgpu_release_dev,
1468         .open_device    = intel_vgpu_open_device,
1469         .close_device   = intel_vgpu_close_device,
1470         .read           = intel_vgpu_read,
1471         .write          = intel_vgpu_write,
1472         .mmap           = intel_vgpu_mmap,
1473         .ioctl          = intel_vgpu_ioctl,
1474         .dma_unmap      = intel_vgpu_dma_unmap,
1475         .bind_iommufd   = vfio_iommufd_emulated_bind,
1476         .unbind_iommufd = vfio_iommufd_emulated_unbind,
1477         .attach_ioas    = vfio_iommufd_emulated_attach_ioas,
1478 };
1479
1480 static int intel_vgpu_probe(struct mdev_device *mdev)
1481 {
1482         struct intel_vgpu *vgpu;
1483         int ret;
1484
1485         vgpu = vfio_alloc_device(intel_vgpu, vfio_device, &mdev->dev,
1486                                  &intel_vgpu_dev_ops);
1487         if (IS_ERR(vgpu)) {
1488                 gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu));
1489                 return PTR_ERR(vgpu);
1490         }
1491
1492         dev_set_drvdata(&mdev->dev, vgpu);
1493         ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device);
1494         if (ret)
1495                 goto out_put_vdev;
1496
1497         gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
1498                      dev_name(mdev_dev(mdev)));
1499         return 0;
1500
1501 out_put_vdev:
1502         vfio_put_device(&vgpu->vfio_device);
1503         return ret;
1504 }
1505
1506 static void intel_vgpu_remove(struct mdev_device *mdev)
1507 {
1508         struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev);
1509
1510         vfio_unregister_group_dev(&vgpu->vfio_device);
1511         vfio_put_device(&vgpu->vfio_device);
1512 }
1513
1514 static unsigned int intel_vgpu_get_available(struct mdev_type *mtype)
1515 {
1516         struct intel_vgpu_type *type =
1517                 container_of(mtype, struct intel_vgpu_type, type);
1518         struct intel_gvt *gvt = kdev_to_i915(mtype->parent->dev)->gvt;
1519         unsigned int low_gm_avail, high_gm_avail, fence_avail;
1520
1521         mutex_lock(&gvt->lock);
1522         low_gm_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE -
1523                 gvt->gm.vgpu_allocated_low_gm_size;
1524         high_gm_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE -
1525                 gvt->gm.vgpu_allocated_high_gm_size;
1526         fence_avail = gvt_fence_sz(gvt) - HOST_FENCE -
1527                 gvt->fence.vgpu_allocated_fence_num;
1528         mutex_unlock(&gvt->lock);
1529
1530         return min3(low_gm_avail / type->conf->low_mm,
1531                     high_gm_avail / type->conf->high_mm,
1532                     fence_avail / type->conf->fence);
1533 }
1534
1535 static struct mdev_driver intel_vgpu_mdev_driver = {
1536         .device_api     = VFIO_DEVICE_API_PCI_STRING,
1537         .driver = {
1538                 .name           = "intel_vgpu_mdev",
1539                 .owner          = THIS_MODULE,
1540                 .dev_groups     = intel_vgpu_groups,
1541         },
1542         .probe                  = intel_vgpu_probe,
1543         .remove                 = intel_vgpu_remove,
1544         .get_available          = intel_vgpu_get_available,
1545         .show_description       = intel_vgpu_show_description,
1546 };
1547
1548 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
1549 {
1550         struct kvm *kvm = info->vfio_device.kvm;
1551         struct kvm_memory_slot *slot;
1552         int idx;
1553
1554         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
1555                 return -ESRCH;
1556
1557         if (kvmgt_gfn_is_write_protected(info, gfn))
1558                 return 0;
1559
1560         idx = srcu_read_lock(&kvm->srcu);
1561         slot = gfn_to_memslot(kvm, gfn);
1562         if (!slot) {
1563                 srcu_read_unlock(&kvm->srcu, idx);
1564                 return -EINVAL;
1565         }
1566
1567         write_lock(&kvm->mmu_lock);
1568         kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1569         write_unlock(&kvm->mmu_lock);
1570
1571         srcu_read_unlock(&kvm->srcu, idx);
1572
1573         kvmgt_protect_table_add(info, gfn);
1574         return 0;
1575 }
1576
1577 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
1578 {
1579         struct kvm *kvm = info->vfio_device.kvm;
1580         struct kvm_memory_slot *slot;
1581         int idx;
1582
1583         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
1584                 return -ESRCH;
1585
1586         if (!kvmgt_gfn_is_write_protected(info, gfn))
1587                 return 0;
1588
1589         idx = srcu_read_lock(&kvm->srcu);
1590         slot = gfn_to_memslot(kvm, gfn);
1591         if (!slot) {
1592                 srcu_read_unlock(&kvm->srcu, idx);
1593                 return -EINVAL;
1594         }
1595
1596         write_lock(&kvm->mmu_lock);
1597         kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1598         write_unlock(&kvm->mmu_lock);
1599         srcu_read_unlock(&kvm->srcu, idx);
1600
1601         kvmgt_protect_table_del(info, gfn);
1602         return 0;
1603 }
1604
1605 static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
1606                                    struct kvm_page_track_notifier_node *node)
1607 {
1608         struct intel_vgpu *info =
1609                 container_of(node, struct intel_vgpu, track_node);
1610
1611         mutex_lock(&info->vgpu_lock);
1612
1613         if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1614                 intel_vgpu_page_track_handler(info, gpa,
1615                                                      (void *)val, len);
1616
1617         mutex_unlock(&info->vgpu_lock);
1618 }
1619
1620 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1621                 struct kvm_memory_slot *slot,
1622                 struct kvm_page_track_notifier_node *node)
1623 {
1624         unsigned long i;
1625         gfn_t gfn;
1626         struct intel_vgpu *info =
1627                 container_of(node, struct intel_vgpu, track_node);
1628
1629         mutex_lock(&info->vgpu_lock);
1630
1631         for (i = 0; i < slot->npages; i++) {
1632                 gfn = slot->base_gfn + i;
1633                 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1634                         write_lock(&kvm->mmu_lock);
1635                         kvm_slot_page_track_remove_page(kvm, slot, gfn,
1636                                                 KVM_PAGE_TRACK_WRITE);
1637                         write_unlock(&kvm->mmu_lock);
1638
1639                         kvmgt_protect_table_del(info, gfn);
1640                 }
1641         }
1642         mutex_unlock(&info->vgpu_lock);
1643 }
1644
1645 void intel_vgpu_detach_regions(struct intel_vgpu *vgpu)
1646 {
1647         int i;
1648
1649         if (!vgpu->region)
1650                 return;
1651
1652         for (i = 0; i < vgpu->num_regions; i++)
1653                 if (vgpu->region[i].ops->release)
1654                         vgpu->region[i].ops->release(vgpu,
1655                                         &vgpu->region[i]);
1656         vgpu->num_regions = 0;
1657         kfree(vgpu->region);
1658         vgpu->region = NULL;
1659 }
1660
1661 int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
1662                 unsigned long size, dma_addr_t *dma_addr)
1663 {
1664         struct gvt_dma *entry;
1665         int ret;
1666
1667         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1668                 return -EINVAL;
1669
1670         mutex_lock(&vgpu->cache_lock);
1671
1672         entry = __gvt_cache_find_gfn(vgpu, gfn);
1673         if (!entry) {
1674                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1675                 if (ret)
1676                         goto err_unlock;
1677
1678                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1679                 if (ret)
1680                         goto err_unmap;
1681         } else if (entry->size != size) {
1682                 /* the same gfn with different size: unmap and re-map */
1683                 gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size);
1684                 __gvt_cache_remove_entry(vgpu, entry);
1685
1686                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1687                 if (ret)
1688                         goto err_unlock;
1689
1690                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1691                 if (ret)
1692                         goto err_unmap;
1693         } else {
1694                 kref_get(&entry->ref);
1695                 *dma_addr = entry->dma_addr;
1696         }
1697
1698         mutex_unlock(&vgpu->cache_lock);
1699         return 0;
1700
1701 err_unmap:
1702         gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
1703 err_unlock:
1704         mutex_unlock(&vgpu->cache_lock);
1705         return ret;
1706 }
1707
1708 int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr)
1709 {
1710         struct gvt_dma *entry;
1711         int ret = 0;
1712
1713         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1714                 return -EINVAL;
1715
1716         mutex_lock(&vgpu->cache_lock);
1717         entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
1718         if (entry)
1719                 kref_get(&entry->ref);
1720         else
1721                 ret = -ENOMEM;
1722         mutex_unlock(&vgpu->cache_lock);
1723
1724         return ret;
1725 }
1726
1727 static void __gvt_dma_release(struct kref *ref)
1728 {
1729         struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
1730
1731         gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
1732                            entry->size);
1733         __gvt_cache_remove_entry(entry->vgpu, entry);
1734 }
1735
1736 void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu,
1737                 dma_addr_t dma_addr)
1738 {
1739         struct gvt_dma *entry;
1740
1741         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1742                 return;
1743
1744         mutex_lock(&vgpu->cache_lock);
1745         entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
1746         if (entry)
1747                 kref_put(&entry->ref, __gvt_dma_release);
1748         mutex_unlock(&vgpu->cache_lock);
1749 }
1750
1751 static void init_device_info(struct intel_gvt *gvt)
1752 {
1753         struct intel_gvt_device_info *info = &gvt->device_info;
1754         struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev);
1755
1756         info->max_support_vgpus = 8;
1757         info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE;
1758         info->mmio_size = 2 * 1024 * 1024;
1759         info->mmio_bar = 0;
1760         info->gtt_start_offset = 8 * 1024 * 1024;
1761         info->gtt_entry_size = 8;
1762         info->gtt_entry_size_shift = 3;
1763         info->gmadr_bytes_in_cmd = 8;
1764         info->max_surface_size = 36 * 1024 * 1024;
1765         info->msi_cap_offset = pdev->msi_cap;
1766 }
1767
1768 static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt)
1769 {
1770         struct intel_vgpu *vgpu;
1771         int id;
1772
1773         mutex_lock(&gvt->lock);
1774         idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) {
1775                 if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id,
1776                                        (void *)&gvt->service_request)) {
1777                         if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status))
1778                                 intel_vgpu_emulate_vblank(vgpu);
1779                 }
1780         }
1781         mutex_unlock(&gvt->lock);
1782 }
1783
1784 static int gvt_service_thread(void *data)
1785 {
1786         struct intel_gvt *gvt = (struct intel_gvt *)data;
1787         int ret;
1788
1789         gvt_dbg_core("service thread start\n");
1790
1791         while (!kthread_should_stop()) {
1792                 ret = wait_event_interruptible(gvt->service_thread_wq,
1793                                 kthread_should_stop() || gvt->service_request);
1794
1795                 if (kthread_should_stop())
1796                         break;
1797
1798                 if (WARN_ONCE(ret, "service thread is waken up by signal.\n"))
1799                         continue;
1800
1801                 intel_gvt_test_and_emulate_vblank(gvt);
1802
1803                 if (test_bit(INTEL_GVT_REQUEST_SCHED,
1804                                 (void *)&gvt->service_request) ||
1805                         test_bit(INTEL_GVT_REQUEST_EVENT_SCHED,
1806                                         (void *)&gvt->service_request)) {
1807                         intel_gvt_schedule(gvt);
1808                 }
1809         }
1810
1811         return 0;
1812 }
1813
1814 static void clean_service_thread(struct intel_gvt *gvt)
1815 {
1816         kthread_stop(gvt->service_thread);
1817 }
1818
1819 static int init_service_thread(struct intel_gvt *gvt)
1820 {
1821         init_waitqueue_head(&gvt->service_thread_wq);
1822
1823         gvt->service_thread = kthread_run(gvt_service_thread,
1824                         gvt, "gvt_service_thread");
1825         if (IS_ERR(gvt->service_thread)) {
1826                 gvt_err("fail to start service thread.\n");
1827                 return PTR_ERR(gvt->service_thread);
1828         }
1829         return 0;
1830 }
1831
1832 /**
1833  * intel_gvt_clean_device - clean a GVT device
1834  * @i915: i915 private
1835  *
1836  * This function is called at the driver unloading stage, to free the
1837  * resources owned by a GVT device.
1838  *
1839  */
1840 static void intel_gvt_clean_device(struct drm_i915_private *i915)
1841 {
1842         struct intel_gvt *gvt = fetch_and_zero(&i915->gvt);
1843
1844         if (drm_WARN_ON(&i915->drm, !gvt))
1845                 return;
1846
1847         mdev_unregister_parent(&gvt->parent);
1848         intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
1849         intel_gvt_clean_vgpu_types(gvt);
1850
1851         intel_gvt_debugfs_clean(gvt);
1852         clean_service_thread(gvt);
1853         intel_gvt_clean_cmd_parser(gvt);
1854         intel_gvt_clean_sched_policy(gvt);
1855         intel_gvt_clean_workload_scheduler(gvt);
1856         intel_gvt_clean_gtt(gvt);
1857         intel_gvt_free_firmware(gvt);
1858         intel_gvt_clean_mmio_info(gvt);
1859         idr_destroy(&gvt->vgpu_idr);
1860
1861         kfree(i915->gvt);
1862 }
1863
1864 /**
1865  * intel_gvt_init_device - initialize a GVT device
1866  * @i915: drm i915 private data
1867  *
1868  * This function is called at the initialization stage, to initialize
1869  * necessary GVT components.
1870  *
1871  * Returns:
1872  * Zero on success, negative error code if failed.
1873  *
1874  */
1875 static int intel_gvt_init_device(struct drm_i915_private *i915)
1876 {
1877         struct intel_gvt *gvt;
1878         struct intel_vgpu *vgpu;
1879         int ret;
1880
1881         if (drm_WARN_ON(&i915->drm, i915->gvt))
1882                 return -EEXIST;
1883
1884         gvt = kzalloc(sizeof(struct intel_gvt), GFP_KERNEL);
1885         if (!gvt)
1886                 return -ENOMEM;
1887
1888         gvt_dbg_core("init gvt device\n");
1889
1890         idr_init_base(&gvt->vgpu_idr, 1);
1891         spin_lock_init(&gvt->scheduler.mmio_context_lock);
1892         mutex_init(&gvt->lock);
1893         mutex_init(&gvt->sched_lock);
1894         gvt->gt = to_gt(i915);
1895         i915->gvt = gvt;
1896
1897         init_device_info(gvt);
1898
1899         ret = intel_gvt_setup_mmio_info(gvt);
1900         if (ret)
1901                 goto out_clean_idr;
1902
1903         intel_gvt_init_engine_mmio_context(gvt);
1904
1905         ret = intel_gvt_load_firmware(gvt);
1906         if (ret)
1907                 goto out_clean_mmio_info;
1908
1909         ret = intel_gvt_init_irq(gvt);
1910         if (ret)
1911                 goto out_free_firmware;
1912
1913         ret = intel_gvt_init_gtt(gvt);
1914         if (ret)
1915                 goto out_free_firmware;
1916
1917         ret = intel_gvt_init_workload_scheduler(gvt);
1918         if (ret)
1919                 goto out_clean_gtt;
1920
1921         ret = intel_gvt_init_sched_policy(gvt);
1922         if (ret)
1923                 goto out_clean_workload_scheduler;
1924
1925         ret = intel_gvt_init_cmd_parser(gvt);
1926         if (ret)
1927                 goto out_clean_sched_policy;
1928
1929         ret = init_service_thread(gvt);
1930         if (ret)
1931                 goto out_clean_cmd_parser;
1932
1933         ret = intel_gvt_init_vgpu_types(gvt);
1934         if (ret)
1935                 goto out_clean_thread;
1936
1937         vgpu = intel_gvt_create_idle_vgpu(gvt);
1938         if (IS_ERR(vgpu)) {
1939                 ret = PTR_ERR(vgpu);
1940                 gvt_err("failed to create idle vgpu\n");
1941                 goto out_clean_types;
1942         }
1943         gvt->idle_vgpu = vgpu;
1944
1945         intel_gvt_debugfs_init(gvt);
1946
1947         ret = mdev_register_parent(&gvt->parent, i915->drm.dev,
1948                                    &intel_vgpu_mdev_driver,
1949                                    gvt->mdev_types, gvt->num_types);
1950         if (ret)
1951                 goto out_destroy_idle_vgpu;
1952
1953         gvt_dbg_core("gvt device initialization is done\n");
1954         return 0;
1955
1956 out_destroy_idle_vgpu:
1957         intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
1958         intel_gvt_debugfs_clean(gvt);
1959 out_clean_types:
1960         intel_gvt_clean_vgpu_types(gvt);
1961 out_clean_thread:
1962         clean_service_thread(gvt);
1963 out_clean_cmd_parser:
1964         intel_gvt_clean_cmd_parser(gvt);
1965 out_clean_sched_policy:
1966         intel_gvt_clean_sched_policy(gvt);
1967 out_clean_workload_scheduler:
1968         intel_gvt_clean_workload_scheduler(gvt);
1969 out_clean_gtt:
1970         intel_gvt_clean_gtt(gvt);
1971 out_free_firmware:
1972         intel_gvt_free_firmware(gvt);
1973 out_clean_mmio_info:
1974         intel_gvt_clean_mmio_info(gvt);
1975 out_clean_idr:
1976         idr_destroy(&gvt->vgpu_idr);
1977         kfree(gvt);
1978         i915->gvt = NULL;
1979         return ret;
1980 }
1981
1982 static void intel_gvt_pm_resume(struct drm_i915_private *i915)
1983 {
1984         struct intel_gvt *gvt = i915->gvt;
1985
1986         intel_gvt_restore_fence(gvt);
1987         intel_gvt_restore_mmio(gvt);
1988         intel_gvt_restore_ggtt(gvt);
1989 }
1990
1991 static const struct intel_vgpu_ops intel_gvt_vgpu_ops = {
1992         .init_device    = intel_gvt_init_device,
1993         .clean_device   = intel_gvt_clean_device,
1994         .pm_resume      = intel_gvt_pm_resume,
1995 };
1996
1997 static int __init kvmgt_init(void)
1998 {
1999         int ret;
2000
2001         ret = intel_gvt_set_ops(&intel_gvt_vgpu_ops);
2002         if (ret)
2003                 return ret;
2004
2005         ret = mdev_register_driver(&intel_vgpu_mdev_driver);
2006         if (ret)
2007                 intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
2008         return ret;
2009 }
2010
2011 static void __exit kvmgt_exit(void)
2012 {
2013         mdev_unregister_driver(&intel_vgpu_mdev_driver);
2014         intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
2015 }
2016
2017 module_init(kvmgt_init);
2018 module_exit(kvmgt_exit);
2019
2020 MODULE_LICENSE("GPL and additional rights");
2021 MODULE_AUTHOR("Intel Corporation");