e7b9bedebcc077cf9a48e8b51033908cf2ee694b
[platform/kernel/linux-starfive.git] / drivers / iommu / intel / svm.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2015 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>
6  */
7
8 #include <linux/mmu_notifier.h>
9 #include <linux/sched.h>
10 #include <linux/sched/mm.h>
11 #include <linux/slab.h>
12 #include <linux/rculist.h>
13 #include <linux/pci.h>
14 #include <linux/pci-ats.h>
15 #include <linux/dmar.h>
16 #include <linux/interrupt.h>
17 #include <linux/mm_types.h>
18 #include <linux/xarray.h>
19 #include <linux/ioasid.h>
20 #include <asm/page.h>
21 #include <asm/fpu/api.h>
22
23 #include "iommu.h"
24 #include "pasid.h"
25 #include "perf.h"
26 #include "../iommu-sva.h"
27 #include "trace.h"
28
29 static irqreturn_t prq_event_thread(int irq, void *d);
30 static void intel_svm_drain_prq(struct device *dev, u32 pasid);
31 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
32
33 static DEFINE_XARRAY_ALLOC(pasid_private_array);
34 static int pasid_private_add(ioasid_t pasid, void *priv)
35 {
36         return xa_alloc(&pasid_private_array, &pasid, priv,
37                         XA_LIMIT(pasid, pasid), GFP_ATOMIC);
38 }
39
40 static void pasid_private_remove(ioasid_t pasid)
41 {
42         xa_erase(&pasid_private_array, pasid);
43 }
44
45 static void *pasid_private_find(ioasid_t pasid)
46 {
47         return xa_load(&pasid_private_array, pasid);
48 }
49
50 static struct intel_svm_dev *
51 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
52 {
53         struct intel_svm_dev *sdev = NULL, *t;
54
55         rcu_read_lock();
56         list_for_each_entry_rcu(t, &svm->devs, list) {
57                 if (t->dev == dev) {
58                         sdev = t;
59                         break;
60                 }
61         }
62         rcu_read_unlock();
63
64         return sdev;
65 }
66
67 int intel_svm_enable_prq(struct intel_iommu *iommu)
68 {
69         struct iopf_queue *iopfq;
70         struct page *pages;
71         int irq, ret;
72
73         pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
74         if (!pages) {
75                 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
76                         iommu->name);
77                 return -ENOMEM;
78         }
79         iommu->prq = page_address(pages);
80
81         irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
82         if (irq <= 0) {
83                 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
84                        iommu->name);
85                 ret = -EINVAL;
86                 goto free_prq;
87         }
88         iommu->pr_irq = irq;
89
90         snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
91                  "dmar%d-iopfq", iommu->seq_id);
92         iopfq = iopf_queue_alloc(iommu->iopfq_name);
93         if (!iopfq) {
94                 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
95                 ret = -ENOMEM;
96                 goto free_hwirq;
97         }
98         iommu->iopf_queue = iopfq;
99
100         snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
101
102         ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
103                                    iommu->prq_name, iommu);
104         if (ret) {
105                 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
106                        iommu->name);
107                 goto free_iopfq;
108         }
109         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
110         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
111         dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
112
113         init_completion(&iommu->prq_complete);
114
115         return 0;
116
117 free_iopfq:
118         iopf_queue_free(iommu->iopf_queue);
119         iommu->iopf_queue = NULL;
120 free_hwirq:
121         dmar_free_hwirq(irq);
122         iommu->pr_irq = 0;
123 free_prq:
124         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
125         iommu->prq = NULL;
126
127         return ret;
128 }
129
130 int intel_svm_finish_prq(struct intel_iommu *iommu)
131 {
132         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
133         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
134         dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
135
136         if (iommu->pr_irq) {
137                 free_irq(iommu->pr_irq, iommu);
138                 dmar_free_hwirq(iommu->pr_irq);
139                 iommu->pr_irq = 0;
140         }
141
142         if (iommu->iopf_queue) {
143                 iopf_queue_free(iommu->iopf_queue);
144                 iommu->iopf_queue = NULL;
145         }
146
147         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
148         iommu->prq = NULL;
149
150         return 0;
151 }
152
153 void intel_svm_check(struct intel_iommu *iommu)
154 {
155         if (!pasid_supported(iommu))
156                 return;
157
158         if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
159             !cap_fl1gp_support(iommu->cap)) {
160                 pr_err("%s SVM disabled, incompatible 1GB page capability\n",
161                        iommu->name);
162                 return;
163         }
164
165         if (cpu_feature_enabled(X86_FEATURE_LA57) &&
166             !cap_fl5lp_support(iommu->cap)) {
167                 pr_err("%s SVM disabled, incompatible paging mode\n",
168                        iommu->name);
169                 return;
170         }
171
172         iommu->flags |= VTD_FLAG_SVM_CAPABLE;
173 }
174
175 static void __flush_svm_range_dev(struct intel_svm *svm,
176                                   struct intel_svm_dev *sdev,
177                                   unsigned long address,
178                                   unsigned long pages, int ih)
179 {
180         struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
181
182         if (WARN_ON(!pages))
183                 return;
184
185         qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
186         if (info->ats_enabled) {
187                 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
188                                          svm->pasid, sdev->qdep, address,
189                                          order_base_2(pages));
190                 quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
191                                           svm->pasid, sdev->qdep);
192         }
193 }
194
195 static void intel_flush_svm_range_dev(struct intel_svm *svm,
196                                       struct intel_svm_dev *sdev,
197                                       unsigned long address,
198                                       unsigned long pages, int ih)
199 {
200         unsigned long shift = ilog2(__roundup_pow_of_two(pages));
201         unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
202         unsigned long start = ALIGN_DOWN(address, align);
203         unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
204
205         while (start < end) {
206                 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
207                 start += align;
208         }
209 }
210
211 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
212                                 unsigned long pages, int ih)
213 {
214         struct intel_svm_dev *sdev;
215
216         rcu_read_lock();
217         list_for_each_entry_rcu(sdev, &svm->devs, list)
218                 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
219         rcu_read_unlock();
220 }
221
222 /* Pages have been freed at this point */
223 static void intel_invalidate_range(struct mmu_notifier *mn,
224                                    struct mm_struct *mm,
225                                    unsigned long start, unsigned long end)
226 {
227         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
228
229         intel_flush_svm_range(svm, start,
230                               (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
231 }
232
233 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
234 {
235         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
236         struct intel_svm_dev *sdev;
237
238         /* This might end up being called from exit_mmap(), *before* the page
239          * tables are cleared. And __mmu_notifier_release() will delete us from
240          * the list of notifiers so that our invalidate_range() callback doesn't
241          * get called when the page tables are cleared. So we need to protect
242          * against hardware accessing those page tables.
243          *
244          * We do it by clearing the entry in the PASID table and then flushing
245          * the IOTLB and the PASID table caches. This might upset hardware;
246          * perhaps we'll want to point the PASID to a dummy PGD (like the zero
247          * page) so that we end up taking a fault that the hardware really
248          * *has* to handle gracefully without affecting other processes.
249          */
250         rcu_read_lock();
251         list_for_each_entry_rcu(sdev, &svm->devs, list)
252                 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
253                                             svm->pasid, true);
254         rcu_read_unlock();
255
256 }
257
258 static const struct mmu_notifier_ops intel_mmuops = {
259         .release = intel_mm_release,
260         .invalidate_range = intel_invalidate_range,
261 };
262
263 static DEFINE_MUTEX(pasid_mutex);
264
265 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
266                              struct intel_svm **rsvm,
267                              struct intel_svm_dev **rsdev)
268 {
269         struct intel_svm_dev *sdev = NULL;
270         struct intel_svm *svm;
271
272         /* The caller should hold the pasid_mutex lock */
273         if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
274                 return -EINVAL;
275
276         if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
277                 return -EINVAL;
278
279         svm = pasid_private_find(pasid);
280         if (IS_ERR(svm))
281                 return PTR_ERR(svm);
282
283         if (!svm)
284                 goto out;
285
286         /*
287          * If we found svm for the PASID, there must be at least one device
288          * bond.
289          */
290         if (WARN_ON(list_empty(&svm->devs)))
291                 return -EINVAL;
292         sdev = svm_lookup_device_by_dev(svm, dev);
293
294 out:
295         *rsvm = svm;
296         *rsdev = sdev;
297
298         return 0;
299 }
300
301 static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
302                              struct mm_struct *mm)
303 {
304         struct device_domain_info *info = dev_iommu_priv_get(dev);
305         struct intel_svm_dev *sdev;
306         struct intel_svm *svm;
307         unsigned long sflags;
308         int ret = 0;
309
310         svm = pasid_private_find(mm->pasid);
311         if (!svm) {
312                 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
313                 if (!svm)
314                         return -ENOMEM;
315
316                 svm->pasid = mm->pasid;
317                 svm->mm = mm;
318                 INIT_LIST_HEAD_RCU(&svm->devs);
319
320                 svm->notifier.ops = &intel_mmuops;
321                 ret = mmu_notifier_register(&svm->notifier, mm);
322                 if (ret) {
323                         kfree(svm);
324                         return ret;
325                 }
326
327                 ret = pasid_private_add(svm->pasid, svm);
328                 if (ret) {
329                         mmu_notifier_unregister(&svm->notifier, mm);
330                         kfree(svm);
331                         return ret;
332                 }
333         }
334
335         sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
336         if (!sdev) {
337                 ret = -ENOMEM;
338                 goto free_svm;
339         }
340
341         sdev->dev = dev;
342         sdev->iommu = iommu;
343         sdev->did = FLPT_DEFAULT_DID;
344         sdev->sid = PCI_DEVID(info->bus, info->devfn);
345         init_rcu_head(&sdev->rcu);
346         if (info->ats_enabled) {
347                 sdev->qdep = info->ats_qdep;
348                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
349                         sdev->qdep = 0;
350         }
351
352         /* Setup the pasid table: */
353         sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
354         ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
355                                             FLPT_DEFAULT_DID, sflags);
356         if (ret)
357                 goto free_sdev;
358
359         list_add_rcu(&sdev->list, &svm->devs);
360
361         return 0;
362
363 free_sdev:
364         kfree(sdev);
365 free_svm:
366         if (list_empty(&svm->devs)) {
367                 mmu_notifier_unregister(&svm->notifier, mm);
368                 pasid_private_remove(mm->pasid);
369                 kfree(svm);
370         }
371
372         return ret;
373 }
374
375 /* Caller must hold pasid_mutex */
376 static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
377 {
378         struct intel_svm_dev *sdev;
379         struct intel_iommu *iommu;
380         struct intel_svm *svm;
381         struct mm_struct *mm;
382         int ret = -EINVAL;
383
384         iommu = device_to_iommu(dev, NULL, NULL);
385         if (!iommu)
386                 goto out;
387
388         ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
389         if (ret)
390                 goto out;
391         mm = svm->mm;
392
393         if (sdev) {
394                 list_del_rcu(&sdev->list);
395                 /*
396                  * Flush the PASID cache and IOTLB for this device.
397                  * Note that we do depend on the hardware *not* using
398                  * the PASID any more. Just as we depend on other
399                  * devices never using PASIDs that they have no right
400                  * to use. We have a *shared* PASID table, because it's
401                  * large and has to be physically contiguous. So it's
402                  * hard to be as defensive as we might like.
403                  */
404                 intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false);
405                 intel_svm_drain_prq(dev, svm->pasid);
406                 kfree_rcu(sdev, rcu);
407
408                 if (list_empty(&svm->devs)) {
409                         if (svm->notifier.ops)
410                                 mmu_notifier_unregister(&svm->notifier, mm);
411                         pasid_private_remove(svm->pasid);
412                         /*
413                          * We mandate that no page faults may be outstanding
414                          * for the PASID when intel_svm_unbind_mm() is called.
415                          * If that is not obeyed, subtle errors will happen.
416                          * Let's make them less subtle...
417                          */
418                         memset(svm, 0x6b, sizeof(*svm));
419                         kfree(svm);
420                 }
421         }
422 out:
423         return ret;
424 }
425
426 /* Page request queue descriptor */
427 struct page_req_dsc {
428         union {
429                 struct {
430                         u64 type:8;
431                         u64 pasid_present:1;
432                         u64 priv_data_present:1;
433                         u64 rsvd:6;
434                         u64 rid:16;
435                         u64 pasid:20;
436                         u64 exe_req:1;
437                         u64 pm_req:1;
438                         u64 rsvd2:10;
439                 };
440                 u64 qw_0;
441         };
442         union {
443                 struct {
444                         u64 rd_req:1;
445                         u64 wr_req:1;
446                         u64 lpig:1;
447                         u64 prg_index:9;
448                         u64 addr:52;
449                 };
450                 u64 qw_1;
451         };
452         u64 priv_data[2];
453 };
454
455 static bool is_canonical_address(u64 addr)
456 {
457         int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
458         long saddr = (long) addr;
459
460         return (((saddr << shift) >> shift) == saddr);
461 }
462
463 /**
464  * intel_svm_drain_prq - Drain page requests and responses for a pasid
465  * @dev: target device
466  * @pasid: pasid for draining
467  *
468  * Drain all pending page requests and responses related to @pasid in both
469  * software and hardware. This is supposed to be called after the device
470  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
471  * and DevTLB have been invalidated.
472  *
473  * It waits until all pending page requests for @pasid in the page fault
474  * queue are completed by the prq handling thread. Then follow the steps
475  * described in VT-d spec CH7.10 to drain all page requests and page
476  * responses pending in the hardware.
477  */
478 static void intel_svm_drain_prq(struct device *dev, u32 pasid)
479 {
480         struct device_domain_info *info;
481         struct dmar_domain *domain;
482         struct intel_iommu *iommu;
483         struct qi_desc desc[3];
484         struct pci_dev *pdev;
485         int head, tail;
486         u16 sid, did;
487         int qdep;
488
489         info = dev_iommu_priv_get(dev);
490         if (WARN_ON(!info || !dev_is_pci(dev)))
491                 return;
492
493         if (!info->pri_enabled)
494                 return;
495
496         iommu = info->iommu;
497         domain = info->domain;
498         pdev = to_pci_dev(dev);
499         sid = PCI_DEVID(info->bus, info->devfn);
500         did = domain_id_iommu(domain, iommu);
501         qdep = pci_ats_queue_depth(pdev);
502
503         /*
504          * Check and wait until all pending page requests in the queue are
505          * handled by the prq handling thread.
506          */
507 prq_retry:
508         reinit_completion(&iommu->prq_complete);
509         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
510         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
511         while (head != tail) {
512                 struct page_req_dsc *req;
513
514                 req = &iommu->prq[head / sizeof(*req)];
515                 if (!req->pasid_present || req->pasid != pasid) {
516                         head = (head + sizeof(*req)) & PRQ_RING_MASK;
517                         continue;
518                 }
519
520                 wait_for_completion(&iommu->prq_complete);
521                 goto prq_retry;
522         }
523
524         /*
525          * A work in IO page fault workqueue may try to lock pasid_mutex now.
526          * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for
527          * all works in the workqueue to finish may cause deadlock.
528          *
529          * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev().
530          * Unlock it to allow the works to be handled while waiting for
531          * them to finish.
532          */
533         lockdep_assert_held(&pasid_mutex);
534         mutex_unlock(&pasid_mutex);
535         iopf_queue_flush_dev(dev);
536         mutex_lock(&pasid_mutex);
537
538         /*
539          * Perform steps described in VT-d spec CH7.10 to drain page
540          * requests and responses in hardware.
541          */
542         memset(desc, 0, sizeof(desc));
543         desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
544                         QI_IWD_FENCE |
545                         QI_IWD_TYPE;
546         desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
547                         QI_EIOTLB_DID(did) |
548                         QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
549                         QI_EIOTLB_TYPE;
550         desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
551                         QI_DEV_EIOTLB_SID(sid) |
552                         QI_DEV_EIOTLB_QDEP(qdep) |
553                         QI_DEIOTLB_TYPE |
554                         QI_DEV_IOTLB_PFSID(info->pfsid);
555 qi_retry:
556         reinit_completion(&iommu->prq_complete);
557         qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
558         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
559                 wait_for_completion(&iommu->prq_complete);
560                 goto qi_retry;
561         }
562 }
563
564 static int prq_to_iommu_prot(struct page_req_dsc *req)
565 {
566         int prot = 0;
567
568         if (req->rd_req)
569                 prot |= IOMMU_FAULT_PERM_READ;
570         if (req->wr_req)
571                 prot |= IOMMU_FAULT_PERM_WRITE;
572         if (req->exe_req)
573                 prot |= IOMMU_FAULT_PERM_EXEC;
574         if (req->pm_req)
575                 prot |= IOMMU_FAULT_PERM_PRIV;
576
577         return prot;
578 }
579
580 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
581                                 struct page_req_dsc *desc)
582 {
583         struct iommu_fault_event event;
584
585         if (!dev || !dev_is_pci(dev))
586                 return -ENODEV;
587
588         /* Fill in event data for device specific processing */
589         memset(&event, 0, sizeof(struct iommu_fault_event));
590         event.fault.type = IOMMU_FAULT_PAGE_REQ;
591         event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
592         event.fault.prm.pasid = desc->pasid;
593         event.fault.prm.grpid = desc->prg_index;
594         event.fault.prm.perm = prq_to_iommu_prot(desc);
595
596         if (desc->lpig)
597                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
598         if (desc->pasid_present) {
599                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
600                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
601         }
602         if (desc->priv_data_present) {
603                 /*
604                  * Set last page in group bit if private data is present,
605                  * page response is required as it does for LPIG.
606                  * iommu_report_device_fault() doesn't understand this vendor
607                  * specific requirement thus we set last_page as a workaround.
608                  */
609                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
610                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
611                 event.fault.prm.private_data[0] = desc->priv_data[0];
612                 event.fault.prm.private_data[1] = desc->priv_data[1];
613         } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
614                 /*
615                  * If the private data fields are not used by hardware, use it
616                  * to monitor the prq handle latency.
617                  */
618                 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
619         }
620
621         return iommu_report_device_fault(dev, &event);
622 }
623
624 static void handle_bad_prq_event(struct intel_iommu *iommu,
625                                  struct page_req_dsc *req, int result)
626 {
627         struct qi_desc desc;
628
629         pr_err("%s: Invalid page request: %08llx %08llx\n",
630                iommu->name, ((unsigned long long *)req)[0],
631                ((unsigned long long *)req)[1]);
632
633         /*
634          * Per VT-d spec. v3.0 ch7.7, system software must
635          * respond with page group response if private data
636          * is present (PDP) or last page in group (LPIG) bit
637          * is set. This is an additional VT-d feature beyond
638          * PCI ATS spec.
639          */
640         if (!req->lpig && !req->priv_data_present)
641                 return;
642
643         desc.qw0 = QI_PGRP_PASID(req->pasid) |
644                         QI_PGRP_DID(req->rid) |
645                         QI_PGRP_PASID_P(req->pasid_present) |
646                         QI_PGRP_PDP(req->priv_data_present) |
647                         QI_PGRP_RESP_CODE(result) |
648                         QI_PGRP_RESP_TYPE;
649         desc.qw1 = QI_PGRP_IDX(req->prg_index) |
650                         QI_PGRP_LPIG(req->lpig);
651
652         if (req->priv_data_present) {
653                 desc.qw2 = req->priv_data[0];
654                 desc.qw3 = req->priv_data[1];
655         } else {
656                 desc.qw2 = 0;
657                 desc.qw3 = 0;
658         }
659
660         qi_submit_sync(iommu, &desc, 1, 0);
661 }
662
663 static irqreturn_t prq_event_thread(int irq, void *d)
664 {
665         struct intel_iommu *iommu = d;
666         struct page_req_dsc *req;
667         int head, tail, handled;
668         struct pci_dev *pdev;
669         u64 address;
670
671         /*
672          * Clear PPR bit before reading head/tail registers, to ensure that
673          * we get a new interrupt if needed.
674          */
675         writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
676
677         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
678         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
679         handled = (head != tail);
680         while (head != tail) {
681                 req = &iommu->prq[head / sizeof(*req)];
682                 address = (u64)req->addr << VTD_PAGE_SHIFT;
683
684                 if (unlikely(!req->pasid_present)) {
685                         pr_err("IOMMU: %s: Page request without PASID\n",
686                                iommu->name);
687 bad_req:
688                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
689                         goto prq_advance;
690                 }
691
692                 if (unlikely(!is_canonical_address(address))) {
693                         pr_err("IOMMU: %s: Address is not canonical\n",
694                                iommu->name);
695                         goto bad_req;
696                 }
697
698                 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
699                         pr_err("IOMMU: %s: Page request in Privilege Mode\n",
700                                iommu->name);
701                         goto bad_req;
702                 }
703
704                 if (unlikely(req->exe_req && req->rd_req)) {
705                         pr_err("IOMMU: %s: Execution request not supported\n",
706                                iommu->name);
707                         goto bad_req;
708                 }
709
710                 /* Drop Stop Marker message. No need for a response. */
711                 if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
712                         goto prq_advance;
713
714                 pdev = pci_get_domain_bus_and_slot(iommu->segment,
715                                                    PCI_BUS_NUM(req->rid),
716                                                    req->rid & 0xff);
717                 /*
718                  * If prq is to be handled outside iommu driver via receiver of
719                  * the fault notifiers, we skip the page response here.
720                  */
721                 if (!pdev)
722                         goto bad_req;
723
724                 if (intel_svm_prq_report(iommu, &pdev->dev, req))
725                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
726                 else
727                         trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
728                                          req->priv_data[0], req->priv_data[1],
729                                          iommu->prq_seq_number++);
730                 pci_dev_put(pdev);
731 prq_advance:
732                 head = (head + sizeof(*req)) & PRQ_RING_MASK;
733         }
734
735         dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
736
737         /*
738          * Clear the page request overflow bit and wake up all threads that
739          * are waiting for the completion of this handling.
740          */
741         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
742                 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
743                                     iommu->name);
744                 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
745                 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
746                 if (head == tail) {
747                         iopf_queue_discard_partial(iommu->iopf_queue);
748                         writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
749                         pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
750                                             iommu->name);
751                 }
752         }
753
754         if (!completion_done(&iommu->prq_complete))
755                 complete(&iommu->prq_complete);
756
757         return IRQ_RETVAL(handled);
758 }
759
760 int intel_svm_page_response(struct device *dev,
761                             struct iommu_fault_event *evt,
762                             struct iommu_page_response *msg)
763 {
764         struct iommu_fault_page_request *prm;
765         struct intel_iommu *iommu;
766         bool private_present;
767         bool pasid_present;
768         bool last_page;
769         u8 bus, devfn;
770         int ret = 0;
771         u16 sid;
772
773         if (!dev || !dev_is_pci(dev))
774                 return -ENODEV;
775
776         iommu = device_to_iommu(dev, &bus, &devfn);
777         if (!iommu)
778                 return -ENODEV;
779
780         if (!msg || !evt)
781                 return -EINVAL;
782
783         prm = &evt->fault.prm;
784         sid = PCI_DEVID(bus, devfn);
785         pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
786         private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
787         last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
788
789         if (!pasid_present) {
790                 ret = -EINVAL;
791                 goto out;
792         }
793
794         if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
795                 ret = -EINVAL;
796                 goto out;
797         }
798
799         /*
800          * Per VT-d spec. v3.0 ch7.7, system software must respond
801          * with page group response if private data is present (PDP)
802          * or last page in group (LPIG) bit is set. This is an
803          * additional VT-d requirement beyond PCI ATS spec.
804          */
805         if (last_page || private_present) {
806                 struct qi_desc desc;
807
808                 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
809                                 QI_PGRP_PASID_P(pasid_present) |
810                                 QI_PGRP_PDP(private_present) |
811                                 QI_PGRP_RESP_CODE(msg->code) |
812                                 QI_PGRP_RESP_TYPE;
813                 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
814                 desc.qw2 = 0;
815                 desc.qw3 = 0;
816
817                 if (private_present) {
818                         desc.qw2 = prm->private_data[0];
819                         desc.qw3 = prm->private_data[1];
820                 } else if (prm->private_data[0]) {
821                         dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
822                                 ktime_to_ns(ktime_get()) - prm->private_data[0]);
823                 }
824
825                 qi_submit_sync(iommu, &desc, 1, 0);
826         }
827 out:
828         return ret;
829 }
830
831 void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
832 {
833         mutex_lock(&pasid_mutex);
834         intel_svm_unbind_mm(dev, pasid);
835         mutex_unlock(&pasid_mutex);
836 }
837
838 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
839                                    struct device *dev, ioasid_t pasid)
840 {
841         struct device_domain_info *info = dev_iommu_priv_get(dev);
842         struct intel_iommu *iommu = info->iommu;
843         struct mm_struct *mm = domain->mm;
844         int ret;
845
846         mutex_lock(&pasid_mutex);
847         ret = intel_svm_bind_mm(iommu, dev, mm);
848         mutex_unlock(&pasid_mutex);
849
850         return ret;
851 }
852
853 static void intel_svm_domain_free(struct iommu_domain *domain)
854 {
855         kfree(to_dmar_domain(domain));
856 }
857
858 static const struct iommu_domain_ops intel_svm_domain_ops = {
859         .set_dev_pasid          = intel_svm_set_dev_pasid,
860         .free                   = intel_svm_domain_free
861 };
862
863 struct iommu_domain *intel_svm_domain_alloc(void)
864 {
865         struct dmar_domain *domain;
866
867         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
868         if (!domain)
869                 return NULL;
870         domain->domain.ops = &intel_svm_domain_ops;
871
872         return &domain->domain;
873 }