8f6d68006ab6a9dae13b2d63b1b2ca8602de4f60
[platform/kernel/linux-rpi.git] / drivers / iommu / intel / svm.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2015 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>
6  */
7
8 #include <linux/mmu_notifier.h>
9 #include <linux/sched.h>
10 #include <linux/sched/mm.h>
11 #include <linux/slab.h>
12 #include <linux/rculist.h>
13 #include <linux/pci.h>
14 #include <linux/pci-ats.h>
15 #include <linux/dmar.h>
16 #include <linux/interrupt.h>
17 #include <linux/mm_types.h>
18 #include <linux/xarray.h>
19 #include <asm/page.h>
20 #include <asm/fpu/api.h>
21
22 #include "iommu.h"
23 #include "pasid.h"
24 #include "perf.h"
25 #include "../iommu-sva.h"
26 #include "trace.h"
27
28 static irqreturn_t prq_event_thread(int irq, void *d);
29 static void intel_svm_drain_prq(struct device *dev, u32 pasid);
30 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
31
32 static DEFINE_XARRAY_ALLOC(pasid_private_array);
33 static int pasid_private_add(ioasid_t pasid, void *priv)
34 {
35         return xa_alloc(&pasid_private_array, &pasid, priv,
36                         XA_LIMIT(pasid, pasid), GFP_ATOMIC);
37 }
38
39 static void pasid_private_remove(ioasid_t pasid)
40 {
41         xa_erase(&pasid_private_array, pasid);
42 }
43
44 static void *pasid_private_find(ioasid_t pasid)
45 {
46         return xa_load(&pasid_private_array, pasid);
47 }
48
49 static struct intel_svm_dev *
50 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
51 {
52         struct intel_svm_dev *sdev = NULL, *t;
53
54         rcu_read_lock();
55         list_for_each_entry_rcu(t, &svm->devs, list) {
56                 if (t->dev == dev) {
57                         sdev = t;
58                         break;
59                 }
60         }
61         rcu_read_unlock();
62
63         return sdev;
64 }
65
66 int intel_svm_enable_prq(struct intel_iommu *iommu)
67 {
68         struct iopf_queue *iopfq;
69         struct page *pages;
70         int irq, ret;
71
72         pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
73         if (!pages) {
74                 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
75                         iommu->name);
76                 return -ENOMEM;
77         }
78         iommu->prq = page_address(pages);
79
80         irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
81         if (irq <= 0) {
82                 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
83                        iommu->name);
84                 ret = -EINVAL;
85                 goto free_prq;
86         }
87         iommu->pr_irq = irq;
88
89         snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
90                  "dmar%d-iopfq", iommu->seq_id);
91         iopfq = iopf_queue_alloc(iommu->iopfq_name);
92         if (!iopfq) {
93                 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
94                 ret = -ENOMEM;
95                 goto free_hwirq;
96         }
97         iommu->iopf_queue = iopfq;
98
99         snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
100
101         ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
102                                    iommu->prq_name, iommu);
103         if (ret) {
104                 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
105                        iommu->name);
106                 goto free_iopfq;
107         }
108         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
109         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
110         dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
111
112         init_completion(&iommu->prq_complete);
113
114         return 0;
115
116 free_iopfq:
117         iopf_queue_free(iommu->iopf_queue);
118         iommu->iopf_queue = NULL;
119 free_hwirq:
120         dmar_free_hwirq(irq);
121         iommu->pr_irq = 0;
122 free_prq:
123         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
124         iommu->prq = NULL;
125
126         return ret;
127 }
128
129 int intel_svm_finish_prq(struct intel_iommu *iommu)
130 {
131         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
132         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
133         dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
134
135         if (iommu->pr_irq) {
136                 free_irq(iommu->pr_irq, iommu);
137                 dmar_free_hwirq(iommu->pr_irq);
138                 iommu->pr_irq = 0;
139         }
140
141         if (iommu->iopf_queue) {
142                 iopf_queue_free(iommu->iopf_queue);
143                 iommu->iopf_queue = NULL;
144         }
145
146         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
147         iommu->prq = NULL;
148
149         return 0;
150 }
151
152 void intel_svm_check(struct intel_iommu *iommu)
153 {
154         if (!pasid_supported(iommu))
155                 return;
156
157         if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
158             !cap_fl1gp_support(iommu->cap)) {
159                 pr_err("%s SVM disabled, incompatible 1GB page capability\n",
160                        iommu->name);
161                 return;
162         }
163
164         if (cpu_feature_enabled(X86_FEATURE_LA57) &&
165             !cap_fl5lp_support(iommu->cap)) {
166                 pr_err("%s SVM disabled, incompatible paging mode\n",
167                        iommu->name);
168                 return;
169         }
170
171         iommu->flags |= VTD_FLAG_SVM_CAPABLE;
172 }
173
174 static void __flush_svm_range_dev(struct intel_svm *svm,
175                                   struct intel_svm_dev *sdev,
176                                   unsigned long address,
177                                   unsigned long pages, int ih)
178 {
179         struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
180
181         if (WARN_ON(!pages))
182                 return;
183
184         qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
185         if (info->ats_enabled) {
186                 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
187                                          svm->pasid, sdev->qdep, address,
188                                          order_base_2(pages));
189                 quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
190                                           svm->pasid, sdev->qdep);
191         }
192 }
193
194 static void intel_flush_svm_range_dev(struct intel_svm *svm,
195                                       struct intel_svm_dev *sdev,
196                                       unsigned long address,
197                                       unsigned long pages, int ih)
198 {
199         unsigned long shift = ilog2(__roundup_pow_of_two(pages));
200         unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
201         unsigned long start = ALIGN_DOWN(address, align);
202         unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
203
204         while (start < end) {
205                 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
206                 start += align;
207         }
208 }
209
210 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
211                                 unsigned long pages, int ih)
212 {
213         struct intel_svm_dev *sdev;
214
215         rcu_read_lock();
216         list_for_each_entry_rcu(sdev, &svm->devs, list)
217                 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
218         rcu_read_unlock();
219 }
220
221 /* Pages have been freed at this point */
222 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
223                                         struct mm_struct *mm,
224                                         unsigned long start, unsigned long end)
225 {
226         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
227
228         intel_flush_svm_range(svm, start,
229                               (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
230 }
231
232 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
233 {
234         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
235         struct intel_svm_dev *sdev;
236
237         /* This might end up being called from exit_mmap(), *before* the page
238          * tables are cleared. And __mmu_notifier_release() will delete us from
239          * the list of notifiers so that our invalidate_range() callback doesn't
240          * get called when the page tables are cleared. So we need to protect
241          * against hardware accessing those page tables.
242          *
243          * We do it by clearing the entry in the PASID table and then flushing
244          * the IOTLB and the PASID table caches. This might upset hardware;
245          * perhaps we'll want to point the PASID to a dummy PGD (like the zero
246          * page) so that we end up taking a fault that the hardware really
247          * *has* to handle gracefully without affecting other processes.
248          */
249         rcu_read_lock();
250         list_for_each_entry_rcu(sdev, &svm->devs, list)
251                 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
252                                             svm->pasid, true);
253         rcu_read_unlock();
254
255 }
256
257 static const struct mmu_notifier_ops intel_mmuops = {
258         .release = intel_mm_release,
259         .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
260 };
261
262 static DEFINE_MUTEX(pasid_mutex);
263
264 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
265                              struct intel_svm **rsvm,
266                              struct intel_svm_dev **rsdev)
267 {
268         struct intel_svm_dev *sdev = NULL;
269         struct intel_svm *svm;
270
271         /* The caller should hold the pasid_mutex lock */
272         if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
273                 return -EINVAL;
274
275         if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
276                 return -EINVAL;
277
278         svm = pasid_private_find(pasid);
279         if (IS_ERR(svm))
280                 return PTR_ERR(svm);
281
282         if (!svm)
283                 goto out;
284
285         /*
286          * If we found svm for the PASID, there must be at least one device
287          * bond.
288          */
289         if (WARN_ON(list_empty(&svm->devs)))
290                 return -EINVAL;
291         sdev = svm_lookup_device_by_dev(svm, dev);
292
293 out:
294         *rsvm = svm;
295         *rsdev = sdev;
296
297         return 0;
298 }
299
300 static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
301                              struct mm_struct *mm)
302 {
303         struct device_domain_info *info = dev_iommu_priv_get(dev);
304         struct intel_svm_dev *sdev;
305         struct intel_svm *svm;
306         unsigned long sflags;
307         int ret = 0;
308
309         svm = pasid_private_find(mm->pasid);
310         if (!svm) {
311                 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
312                 if (!svm)
313                         return -ENOMEM;
314
315                 svm->pasid = mm->pasid;
316                 svm->mm = mm;
317                 INIT_LIST_HEAD_RCU(&svm->devs);
318
319                 svm->notifier.ops = &intel_mmuops;
320                 ret = mmu_notifier_register(&svm->notifier, mm);
321                 if (ret) {
322                         kfree(svm);
323                         return ret;
324                 }
325
326                 ret = pasid_private_add(svm->pasid, svm);
327                 if (ret) {
328                         mmu_notifier_unregister(&svm->notifier, mm);
329                         kfree(svm);
330                         return ret;
331                 }
332         }
333
334         sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
335         if (!sdev) {
336                 ret = -ENOMEM;
337                 goto free_svm;
338         }
339
340         sdev->dev = dev;
341         sdev->iommu = iommu;
342         sdev->did = FLPT_DEFAULT_DID;
343         sdev->sid = PCI_DEVID(info->bus, info->devfn);
344         init_rcu_head(&sdev->rcu);
345         if (info->ats_enabled) {
346                 sdev->qdep = info->ats_qdep;
347                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
348                         sdev->qdep = 0;
349         }
350
351         /* Setup the pasid table: */
352         sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
353         ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
354                                             FLPT_DEFAULT_DID, sflags);
355         if (ret)
356                 goto free_sdev;
357
358         list_add_rcu(&sdev->list, &svm->devs);
359
360         return 0;
361
362 free_sdev:
363         kfree(sdev);
364 free_svm:
365         if (list_empty(&svm->devs)) {
366                 mmu_notifier_unregister(&svm->notifier, mm);
367                 pasid_private_remove(mm->pasid);
368                 kfree(svm);
369         }
370
371         return ret;
372 }
373
374 /* Caller must hold pasid_mutex */
375 static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
376 {
377         struct intel_svm_dev *sdev;
378         struct intel_iommu *iommu;
379         struct intel_svm *svm;
380         struct mm_struct *mm;
381         int ret = -EINVAL;
382
383         iommu = device_to_iommu(dev, NULL, NULL);
384         if (!iommu)
385                 goto out;
386
387         ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
388         if (ret)
389                 goto out;
390         mm = svm->mm;
391
392         if (sdev) {
393                 list_del_rcu(&sdev->list);
394                 /*
395                  * Flush the PASID cache and IOTLB for this device.
396                  * Note that we do depend on the hardware *not* using
397                  * the PASID any more. Just as we depend on other
398                  * devices never using PASIDs that they have no right
399                  * to use. We have a *shared* PASID table, because it's
400                  * large and has to be physically contiguous. So it's
401                  * hard to be as defensive as we might like.
402                  */
403                 intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false);
404                 intel_svm_drain_prq(dev, svm->pasid);
405                 kfree_rcu(sdev, rcu);
406
407                 if (list_empty(&svm->devs)) {
408                         if (svm->notifier.ops)
409                                 mmu_notifier_unregister(&svm->notifier, mm);
410                         pasid_private_remove(svm->pasid);
411                         /*
412                          * We mandate that no page faults may be outstanding
413                          * for the PASID when intel_svm_unbind_mm() is called.
414                          * If that is not obeyed, subtle errors will happen.
415                          * Let's make them less subtle...
416                          */
417                         memset(svm, 0x6b, sizeof(*svm));
418                         kfree(svm);
419                 }
420         }
421 out:
422         return ret;
423 }
424
425 /* Page request queue descriptor */
426 struct page_req_dsc {
427         union {
428                 struct {
429                         u64 type:8;
430                         u64 pasid_present:1;
431                         u64 priv_data_present:1;
432                         u64 rsvd:6;
433                         u64 rid:16;
434                         u64 pasid:20;
435                         u64 exe_req:1;
436                         u64 pm_req:1;
437                         u64 rsvd2:10;
438                 };
439                 u64 qw_0;
440         };
441         union {
442                 struct {
443                         u64 rd_req:1;
444                         u64 wr_req:1;
445                         u64 lpig:1;
446                         u64 prg_index:9;
447                         u64 addr:52;
448                 };
449                 u64 qw_1;
450         };
451         u64 priv_data[2];
452 };
453
454 static bool is_canonical_address(u64 addr)
455 {
456         int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
457         long saddr = (long) addr;
458
459         return (((saddr << shift) >> shift) == saddr);
460 }
461
462 /**
463  * intel_svm_drain_prq - Drain page requests and responses for a pasid
464  * @dev: target device
465  * @pasid: pasid for draining
466  *
467  * Drain all pending page requests and responses related to @pasid in both
468  * software and hardware. This is supposed to be called after the device
469  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
470  * and DevTLB have been invalidated.
471  *
472  * It waits until all pending page requests for @pasid in the page fault
473  * queue are completed by the prq handling thread. Then follow the steps
474  * described in VT-d spec CH7.10 to drain all page requests and page
475  * responses pending in the hardware.
476  */
477 static void intel_svm_drain_prq(struct device *dev, u32 pasid)
478 {
479         struct device_domain_info *info;
480         struct dmar_domain *domain;
481         struct intel_iommu *iommu;
482         struct qi_desc desc[3];
483         struct pci_dev *pdev;
484         int head, tail;
485         u16 sid, did;
486         int qdep;
487
488         info = dev_iommu_priv_get(dev);
489         if (WARN_ON(!info || !dev_is_pci(dev)))
490                 return;
491
492         if (!info->pri_enabled)
493                 return;
494
495         iommu = info->iommu;
496         domain = info->domain;
497         pdev = to_pci_dev(dev);
498         sid = PCI_DEVID(info->bus, info->devfn);
499         did = domain_id_iommu(domain, iommu);
500         qdep = pci_ats_queue_depth(pdev);
501
502         /*
503          * Check and wait until all pending page requests in the queue are
504          * handled by the prq handling thread.
505          */
506 prq_retry:
507         reinit_completion(&iommu->prq_complete);
508         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
509         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
510         while (head != tail) {
511                 struct page_req_dsc *req;
512
513                 req = &iommu->prq[head / sizeof(*req)];
514                 if (!req->pasid_present || req->pasid != pasid) {
515                         head = (head + sizeof(*req)) & PRQ_RING_MASK;
516                         continue;
517                 }
518
519                 wait_for_completion(&iommu->prq_complete);
520                 goto prq_retry;
521         }
522
523         /*
524          * A work in IO page fault workqueue may try to lock pasid_mutex now.
525          * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for
526          * all works in the workqueue to finish may cause deadlock.
527          *
528          * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev().
529          * Unlock it to allow the works to be handled while waiting for
530          * them to finish.
531          */
532         lockdep_assert_held(&pasid_mutex);
533         mutex_unlock(&pasid_mutex);
534         iopf_queue_flush_dev(dev);
535         mutex_lock(&pasid_mutex);
536
537         /*
538          * Perform steps described in VT-d spec CH7.10 to drain page
539          * requests and responses in hardware.
540          */
541         memset(desc, 0, sizeof(desc));
542         desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
543                         QI_IWD_FENCE |
544                         QI_IWD_TYPE;
545         desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
546                         QI_EIOTLB_DID(did) |
547                         QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
548                         QI_EIOTLB_TYPE;
549         desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
550                         QI_DEV_EIOTLB_SID(sid) |
551                         QI_DEV_EIOTLB_QDEP(qdep) |
552                         QI_DEIOTLB_TYPE |
553                         QI_DEV_IOTLB_PFSID(info->pfsid);
554 qi_retry:
555         reinit_completion(&iommu->prq_complete);
556         qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
557         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
558                 wait_for_completion(&iommu->prq_complete);
559                 goto qi_retry;
560         }
561 }
562
563 static int prq_to_iommu_prot(struct page_req_dsc *req)
564 {
565         int prot = 0;
566
567         if (req->rd_req)
568                 prot |= IOMMU_FAULT_PERM_READ;
569         if (req->wr_req)
570                 prot |= IOMMU_FAULT_PERM_WRITE;
571         if (req->exe_req)
572                 prot |= IOMMU_FAULT_PERM_EXEC;
573         if (req->pm_req)
574                 prot |= IOMMU_FAULT_PERM_PRIV;
575
576         return prot;
577 }
578
579 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
580                                 struct page_req_dsc *desc)
581 {
582         struct iommu_fault_event event;
583
584         if (!dev || !dev_is_pci(dev))
585                 return -ENODEV;
586
587         /* Fill in event data for device specific processing */
588         memset(&event, 0, sizeof(struct iommu_fault_event));
589         event.fault.type = IOMMU_FAULT_PAGE_REQ;
590         event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
591         event.fault.prm.pasid = desc->pasid;
592         event.fault.prm.grpid = desc->prg_index;
593         event.fault.prm.perm = prq_to_iommu_prot(desc);
594
595         if (desc->lpig)
596                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
597         if (desc->pasid_present) {
598                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
599                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
600         }
601         if (desc->priv_data_present) {
602                 /*
603                  * Set last page in group bit if private data is present,
604                  * page response is required as it does for LPIG.
605                  * iommu_report_device_fault() doesn't understand this vendor
606                  * specific requirement thus we set last_page as a workaround.
607                  */
608                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
609                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
610                 event.fault.prm.private_data[0] = desc->priv_data[0];
611                 event.fault.prm.private_data[1] = desc->priv_data[1];
612         } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
613                 /*
614                  * If the private data fields are not used by hardware, use it
615                  * to monitor the prq handle latency.
616                  */
617                 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
618         }
619
620         return iommu_report_device_fault(dev, &event);
621 }
622
623 static void handle_bad_prq_event(struct intel_iommu *iommu,
624                                  struct page_req_dsc *req, int result)
625 {
626         struct qi_desc desc;
627
628         pr_err("%s: Invalid page request: %08llx %08llx\n",
629                iommu->name, ((unsigned long long *)req)[0],
630                ((unsigned long long *)req)[1]);
631
632         /*
633          * Per VT-d spec. v3.0 ch7.7, system software must
634          * respond with page group response if private data
635          * is present (PDP) or last page in group (LPIG) bit
636          * is set. This is an additional VT-d feature beyond
637          * PCI ATS spec.
638          */
639         if (!req->lpig && !req->priv_data_present)
640                 return;
641
642         desc.qw0 = QI_PGRP_PASID(req->pasid) |
643                         QI_PGRP_DID(req->rid) |
644                         QI_PGRP_PASID_P(req->pasid_present) |
645                         QI_PGRP_PDP(req->priv_data_present) |
646                         QI_PGRP_RESP_CODE(result) |
647                         QI_PGRP_RESP_TYPE;
648         desc.qw1 = QI_PGRP_IDX(req->prg_index) |
649                         QI_PGRP_LPIG(req->lpig);
650
651         if (req->priv_data_present) {
652                 desc.qw2 = req->priv_data[0];
653                 desc.qw3 = req->priv_data[1];
654         } else {
655                 desc.qw2 = 0;
656                 desc.qw3 = 0;
657         }
658
659         qi_submit_sync(iommu, &desc, 1, 0);
660 }
661
662 static irqreturn_t prq_event_thread(int irq, void *d)
663 {
664         struct intel_iommu *iommu = d;
665         struct page_req_dsc *req;
666         int head, tail, handled;
667         struct pci_dev *pdev;
668         u64 address;
669
670         /*
671          * Clear PPR bit before reading head/tail registers, to ensure that
672          * we get a new interrupt if needed.
673          */
674         writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
675
676         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
677         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
678         handled = (head != tail);
679         while (head != tail) {
680                 req = &iommu->prq[head / sizeof(*req)];
681                 address = (u64)req->addr << VTD_PAGE_SHIFT;
682
683                 if (unlikely(!req->pasid_present)) {
684                         pr_err("IOMMU: %s: Page request without PASID\n",
685                                iommu->name);
686 bad_req:
687                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
688                         goto prq_advance;
689                 }
690
691                 if (unlikely(!is_canonical_address(address))) {
692                         pr_err("IOMMU: %s: Address is not canonical\n",
693                                iommu->name);
694                         goto bad_req;
695                 }
696
697                 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
698                         pr_err("IOMMU: %s: Page request in Privilege Mode\n",
699                                iommu->name);
700                         goto bad_req;
701                 }
702
703                 if (unlikely(req->exe_req && req->rd_req)) {
704                         pr_err("IOMMU: %s: Execution request not supported\n",
705                                iommu->name);
706                         goto bad_req;
707                 }
708
709                 /* Drop Stop Marker message. No need for a response. */
710                 if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
711                         goto prq_advance;
712
713                 pdev = pci_get_domain_bus_and_slot(iommu->segment,
714                                                    PCI_BUS_NUM(req->rid),
715                                                    req->rid & 0xff);
716                 /*
717                  * If prq is to be handled outside iommu driver via receiver of
718                  * the fault notifiers, we skip the page response here.
719                  */
720                 if (!pdev)
721                         goto bad_req;
722
723                 if (intel_svm_prq_report(iommu, &pdev->dev, req))
724                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
725                 else
726                         trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
727                                          req->priv_data[0], req->priv_data[1],
728                                          iommu->prq_seq_number++);
729                 pci_dev_put(pdev);
730 prq_advance:
731                 head = (head + sizeof(*req)) & PRQ_RING_MASK;
732         }
733
734         dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
735
736         /*
737          * Clear the page request overflow bit and wake up all threads that
738          * are waiting for the completion of this handling.
739          */
740         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
741                 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
742                                     iommu->name);
743                 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
744                 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
745                 if (head == tail) {
746                         iopf_queue_discard_partial(iommu->iopf_queue);
747                         writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
748                         pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
749                                             iommu->name);
750                 }
751         }
752
753         if (!completion_done(&iommu->prq_complete))
754                 complete(&iommu->prq_complete);
755
756         return IRQ_RETVAL(handled);
757 }
758
759 int intel_svm_page_response(struct device *dev,
760                             struct iommu_fault_event *evt,
761                             struct iommu_page_response *msg)
762 {
763         struct iommu_fault_page_request *prm;
764         struct intel_iommu *iommu;
765         bool private_present;
766         bool pasid_present;
767         bool last_page;
768         u8 bus, devfn;
769         int ret = 0;
770         u16 sid;
771
772         if (!dev || !dev_is_pci(dev))
773                 return -ENODEV;
774
775         iommu = device_to_iommu(dev, &bus, &devfn);
776         if (!iommu)
777                 return -ENODEV;
778
779         if (!msg || !evt)
780                 return -EINVAL;
781
782         prm = &evt->fault.prm;
783         sid = PCI_DEVID(bus, devfn);
784         pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
785         private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
786         last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
787
788         if (!pasid_present) {
789                 ret = -EINVAL;
790                 goto out;
791         }
792
793         if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
794                 ret = -EINVAL;
795                 goto out;
796         }
797
798         /*
799          * Per VT-d spec. v3.0 ch7.7, system software must respond
800          * with page group response if private data is present (PDP)
801          * or last page in group (LPIG) bit is set. This is an
802          * additional VT-d requirement beyond PCI ATS spec.
803          */
804         if (last_page || private_present) {
805                 struct qi_desc desc;
806
807                 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
808                                 QI_PGRP_PASID_P(pasid_present) |
809                                 QI_PGRP_PDP(private_present) |
810                                 QI_PGRP_RESP_CODE(msg->code) |
811                                 QI_PGRP_RESP_TYPE;
812                 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
813                 desc.qw2 = 0;
814                 desc.qw3 = 0;
815
816                 if (private_present) {
817                         desc.qw2 = prm->private_data[0];
818                         desc.qw3 = prm->private_data[1];
819                 } else if (prm->private_data[0]) {
820                         dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
821                                 ktime_to_ns(ktime_get()) - prm->private_data[0]);
822                 }
823
824                 qi_submit_sync(iommu, &desc, 1, 0);
825         }
826 out:
827         return ret;
828 }
829
830 void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
831 {
832         mutex_lock(&pasid_mutex);
833         intel_svm_unbind_mm(dev, pasid);
834         mutex_unlock(&pasid_mutex);
835 }
836
837 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
838                                    struct device *dev, ioasid_t pasid)
839 {
840         struct device_domain_info *info = dev_iommu_priv_get(dev);
841         struct intel_iommu *iommu = info->iommu;
842         struct mm_struct *mm = domain->mm;
843         int ret;
844
845         mutex_lock(&pasid_mutex);
846         ret = intel_svm_bind_mm(iommu, dev, mm);
847         mutex_unlock(&pasid_mutex);
848
849         return ret;
850 }
851
852 static void intel_svm_domain_free(struct iommu_domain *domain)
853 {
854         kfree(to_dmar_domain(domain));
855 }
856
857 static const struct iommu_domain_ops intel_svm_domain_ops = {
858         .set_dev_pasid          = intel_svm_set_dev_pasid,
859         .free                   = intel_svm_domain_free
860 };
861
862 struct iommu_domain *intel_svm_domain_alloc(void)
863 {
864         struct dmar_domain *domain;
865
866         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
867         if (!domain)
868                 return NULL;
869         domain->domain.ops = &intel_svm_domain_ops;
870
871         return &domain->domain;
872 }