drivers/iommu/intel/svm.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright © 2015 Intel Corporation.
   4  *
   5  * Authors: David Woodhouse <dwmw2@infradead.org>
   6  */
   7
   8 #include <linux/mmu_notifier.h>
   9 #include <linux/sched.h>
  10 #include <linux/sched/mm.h>
  11 #include <linux/slab.h>
  12 #include <linux/rculist.h>
  13 #include <linux/pci.h>
  14 #include <linux/pci-ats.h>
  15 #include <linux/dmar.h>
  16 #include <linux/interrupt.h>
  17 #include <linux/mm_types.h>
  18 #include <linux/xarray.h>
  19 #include <linux/ioasid.h>
  20 #include <asm/page.h>
  21 #include <asm/fpu/api.h>
  22
  23 #include "iommu.h"
  24 #include "pasid.h"
  25 #include "perf.h"
  26 #include "../iommu-sva.h"
  27 #include "trace.h"
  28
  29 static irqreturn_t prq_event_thread(int irq, void *d);
  30 static void intel_svm_drain_prq(struct device *dev, u32 pasid);
  31 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
  32
  33 static DEFINE_XARRAY_ALLOC(pasid_private_array);
  34 static int pasid_private_add(ioasid_t pasid, void *priv)
  35 {
  36         return xa_alloc(&pasid_private_array, &pasid, priv,
  37                         XA_LIMIT(pasid, pasid), GFP_ATOMIC);
  38 }
  39
  40 static void pasid_private_remove(ioasid_t pasid)
  41 {
  42         xa_erase(&pasid_private_array, pasid);
  43 }
  44
  45 static void *pasid_private_find(ioasid_t pasid)
  46 {
  47         return xa_load(&pasid_private_array, pasid);
  48 }
  49
  50 static struct intel_svm_dev *
  51 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
  52 {
  53         struct intel_svm_dev *sdev = NULL, *t;
  54
  55         rcu_read_lock();
  56         list_for_each_entry_rcu(t, &svm->devs, list) {
  57                 if (t->dev == dev) {
  58                         sdev = t;
  59                         break;
  60                 }
  61         }
  62         rcu_read_unlock();
  63
  64         return sdev;
  65 }
  66
  67 int intel_svm_enable_prq(struct intel_iommu *iommu)
  68 {
  69         struct iopf_queue *iopfq;
  70         struct page *pages;
  71         int irq, ret;
  72
  73         pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
  74         if (!pages) {
  75                 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
  76                         iommu->name);
  77                 return -ENOMEM;
  78         }
  79         iommu->prq = page_address(pages);
  80
  81         irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
  82         if (irq <= 0) {
  83                 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
  84                        iommu->name);
  85                 ret = -EINVAL;
  86                 goto free_prq;
  87         }
  88         iommu->pr_irq = irq;
  89
  90         snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
  91                  "dmar%d-iopfq", iommu->seq_id);
  92         iopfq = iopf_queue_alloc(iommu->iopfq_name);
  93         if (!iopfq) {
  94                 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
  95                 ret = -ENOMEM;
  96                 goto free_hwirq;
  97         }
  98         iommu->iopf_queue = iopfq;
  99
 100         snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
 101
 102         ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
 103                                    iommu->prq_name, iommu);
 104         if (ret) {
 105                 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
 106                        iommu->name);
 107                 goto free_iopfq;
 108         }
 109         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
 110         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
 111         dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
 112
 113         init_completion(&iommu->prq_complete);
 114
 115         return 0;
 116
 117 free_iopfq:
 118         iopf_queue_free(iommu->iopf_queue);
 119         iommu->iopf_queue = NULL;
 120 free_hwirq:
 121         dmar_free_hwirq(irq);
 122         iommu->pr_irq = 0;
 123 free_prq:
 124         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
 125         iommu->prq = NULL;
 126
 127         return ret;
 128 }
 129
 130 int intel_svm_finish_prq(struct intel_iommu *iommu)
 131 {
 132         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
 133         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
 134         dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
 135
 136         if (iommu->pr_irq) {
 137                 free_irq(iommu->pr_irq, iommu);
 138                 dmar_free_hwirq(iommu->pr_irq);
 139                 iommu->pr_irq = 0;
 140         }
 141
 142         if (iommu->iopf_queue) {
 143                 iopf_queue_free(iommu->iopf_queue);
 144                 iommu->iopf_queue = NULL;
 145         }
 146
 147         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
 148         iommu->prq = NULL;
 149
 150         return 0;
 151 }
 152
 153 void intel_svm_check(struct intel_iommu *iommu)
 154 {
 155         if (!pasid_supported(iommu))
 156                 return;
 157
 158         if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
 159             !cap_fl1gp_support(iommu->cap)) {
 160                 pr_err("%s SVM disabled, incompatible 1GB page capability\n",
 161                        iommu->name);
 162                 return;
 163         }
 164
 165         if (cpu_feature_enabled(X86_FEATURE_LA57) &&
 166             !cap_fl5lp_support(iommu->cap)) {
 167                 pr_err("%s SVM disabled, incompatible paging mode\n",
 168                        iommu->name);
 169                 return;
 170         }
 171
 172         iommu->flags |= VTD_FLAG_SVM_CAPABLE;
 173 }
 174
 175 static void __flush_svm_range_dev(struct intel_svm *svm,
 176                                   struct intel_svm_dev *sdev,
 177                                   unsigned long address,
 178                                   unsigned long pages, int ih)
 179 {
 180         struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
 181
 182         if (WARN_ON(!pages))
 183                 return;
 184
 185         qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
 186         if (info->ats_enabled) {
 187                 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
 188                                          svm->pasid, sdev->qdep, address,
 189                                          order_base_2(pages));
 190                 quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
 191                                           svm->pasid, sdev->qdep);
 192         }
 193 }
 194
 195 static void intel_flush_svm_range_dev(struct intel_svm *svm,
 196                                       struct intel_svm_dev *sdev,
 197                                       unsigned long address,
 198                                       unsigned long pages, int ih)
 199 {
 200         unsigned long shift = ilog2(__roundup_pow_of_two(pages));
 201         unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
 202         unsigned long start = ALIGN_DOWN(address, align);
 203         unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
 204
 205         while (start < end) {
 206                 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
 207                 start += align;
 208         }
 209 }
 210
 211 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
 212                                 unsigned long pages, int ih)
 213 {
 214         struct intel_svm_dev *sdev;
 215
 216         rcu_read_lock();
 217         list_for_each_entry_rcu(sdev, &svm->devs, list)
 218                 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
 219         rcu_read_unlock();
 220 }
 221
 222 /* Pages have been freed at this point */
 223 static void intel_invalidate_range(struct mmu_notifier *mn,
 224                                    struct mm_struct *mm,
 225                                    unsigned long start, unsigned long end)
 226 {
 227         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
 228
 229         intel_flush_svm_range(svm, start,
 230                               (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
 231 }
 232
 233 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 234 {
 235         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
 236         struct intel_svm_dev *sdev;
 237
 238         /* This might end up being called from exit_mmap(), *before* the page
 239          * tables are cleared. And __mmu_notifier_release() will delete us from
 240          * the list of notifiers so that our invalidate_range() callback doesn't
 241          * get called when the page tables are cleared. So we need to protect
 242          * against hardware accessing those page tables.
 243          *
 244          * We do it by clearing the entry in the PASID table and then flushing
 245          * the IOTLB and the PASID table caches. This might upset hardware;
 246          * perhaps we'll want to point the PASID to a dummy PGD (like the zero
 247          * page) so that we end up taking a fault that the hardware really
 248          * *has* to handle gracefully without affecting other processes.
 249          */
 250         rcu_read_lock();
 251         list_for_each_entry_rcu(sdev, &svm->devs, list)
 252                 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
 253                                             svm->pasid, true);
 254         rcu_read_unlock();
 255
 256 }
 257
 258 static const struct mmu_notifier_ops intel_mmuops = {
 259         .release = intel_mm_release,
 260         .invalidate_range = intel_invalidate_range,
 261 };
 262
 263 static DEFINE_MUTEX(pasid_mutex);
 264
 265 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
 266                              struct intel_svm **rsvm,
 267                              struct intel_svm_dev **rsdev)
 268 {
 269         struct intel_svm_dev *sdev = NULL;
 270         struct intel_svm *svm;
 271
 272         /* The caller should hold the pasid_mutex lock */
 273         if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
 274                 return -EINVAL;
 275
 276         if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
 277                 return -EINVAL;
 278
 279         svm = pasid_private_find(pasid);
 280         if (IS_ERR(svm))
 281                 return PTR_ERR(svm);
 282
 283         if (!svm)
 284                 goto out;
 285
 286         /*
 287          * If we found svm for the PASID, there must be at least one device
 288          * bond.
 289          */
 290         if (WARN_ON(list_empty(&svm->devs)))
 291                 return -EINVAL;
 292         sdev = svm_lookup_device_by_dev(svm, dev);
 293
 294 out:
 295         *rsvm = svm;
 296         *rsdev = sdev;
 297
 298         return 0;
 299 }
 300
 301 static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
 302                              struct mm_struct *mm)
 303 {
 304         struct device_domain_info *info = dev_iommu_priv_get(dev);
 305         struct intel_svm_dev *sdev;
 306         struct intel_svm *svm;
 307         unsigned long sflags;
 308         int ret = 0;
 309
 310         svm = pasid_private_find(mm->pasid);
 311         if (!svm) {
 312                 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
 313                 if (!svm)
 314                         return -ENOMEM;
 315
 316                 svm->pasid = mm->pasid;
 317                 svm->mm = mm;
 318                 INIT_LIST_HEAD_RCU(&svm->devs);
 319
 320                 svm->notifier.ops = &intel_mmuops;
 321                 ret = mmu_notifier_register(&svm->notifier, mm);
 322                 if (ret) {
 323                         kfree(svm);
 324                         return ret;
 325                 }
 326
 327                 ret = pasid_private_add(svm->pasid, svm);
 328                 if (ret) {
 329                         mmu_notifier_unregister(&svm->notifier, mm);
 330                         kfree(svm);
 331                         return ret;
 332                 }
 333         }
 334
 335         sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
 336         if (!sdev) {
 337                 ret = -ENOMEM;
 338                 goto free_svm;
 339         }
 340
 341         sdev->dev = dev;
 342         sdev->iommu = iommu;
 343         sdev->did = FLPT_DEFAULT_DID;
 344         sdev->sid = PCI_DEVID(info->bus, info->devfn);
 345         init_rcu_head(&sdev->rcu);
 346         if (info->ats_enabled) {
 347                 sdev->qdep = info->ats_qdep;
 348                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
 349                         sdev->qdep = 0;
 350         }
 351
 352         /* Setup the pasid table: */
 353         sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
 354         ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
 355                                             FLPT_DEFAULT_DID, sflags);
 356         if (ret)
 357                 goto free_sdev;
 358
 359         list_add_rcu(&sdev->list, &svm->devs);
 360
 361         return 0;
 362
 363 free_sdev:
 364         kfree(sdev);
 365 free_svm:
 366         if (list_empty(&svm->devs)) {
 367                 mmu_notifier_unregister(&svm->notifier, mm);
 368                 pasid_private_remove(mm->pasid);
 369                 kfree(svm);
 370         }
 371
 372         return ret;
 373 }
 374
 375 /* Caller must hold pasid_mutex */
 376 static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
 377 {
 378         struct intel_svm_dev *sdev;
 379         struct intel_iommu *iommu;
 380         struct intel_svm *svm;
 381         struct mm_struct *mm;
 382         int ret = -EINVAL;
 383
 384         iommu = device_to_iommu(dev, NULL, NULL);
 385         if (!iommu)
 386                 goto out;
 387
 388         ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
 389         if (ret)
 390                 goto out;
 391         mm = svm->mm;
 392
 393         if (sdev) {
 394                 list_del_rcu(&sdev->list);
 395                 /*
 396                  * Flush the PASID cache and IOTLB for this device.
 397                  * Note that we do depend on the hardware *not* using
 398                  * the PASID any more. Just as we depend on other
 399                  * devices never using PASIDs that they have no right
 400                  * to use. We have a *shared* PASID table, because it's
 401                  * large and has to be physically contiguous. So it's
 402                  * hard to be as defensive as we might like.
 403                  */
 404                 intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false);
 405                 intel_svm_drain_prq(dev, svm->pasid);
 406                 kfree_rcu(sdev, rcu);
 407
 408                 if (list_empty(&svm->devs)) {
 409                         if (svm->notifier.ops)
 410                                 mmu_notifier_unregister(&svm->notifier, mm);
 411                         pasid_private_remove(svm->pasid);
 412                         /*
 413                          * We mandate that no page faults may be outstanding
 414                          * for the PASID when intel_svm_unbind_mm() is called.
 415                          * If that is not obeyed, subtle errors will happen.
 416                          * Let's make them less subtle...
 417                          */
 418                         memset(svm, 0x6b, sizeof(*svm));
 419                         kfree(svm);
 420                 }
 421         }
 422 out:
 423         return ret;
 424 }
 425
 426 /* Page request queue descriptor */
 427 struct page_req_dsc {
 428         union {
 429                 struct {
 430                         u64 type:8;
 431                         u64 pasid_present:1;
 432                         u64 priv_data_present:1;
 433                         u64 rsvd:6;
 434                         u64 rid:16;
 435                         u64 pasid:20;
 436                         u64 exe_req:1;
 437                         u64 pm_req:1;
 438                         u64 rsvd2:10;
 439                 };
 440                 u64 qw_0;
 441         };
 442         union {
 443                 struct {
 444                         u64 rd_req:1;
 445                         u64 wr_req:1;
 446                         u64 lpig:1;
 447                         u64 prg_index:9;
 448                         u64 addr:52;
 449                 };
 450                 u64 qw_1;
 451         };
 452         u64 priv_data[2];
 453 };
 454
 455 static bool is_canonical_address(u64 addr)
 456 {
 457         int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
 458         long saddr = (long) addr;
 459
 460         return (((saddr << shift) >> shift) == saddr);
 461 }
 462
 463 /**
 464  * intel_svm_drain_prq - Drain page requests and responses for a pasid
 465  * @dev: target device
 466  * @pasid: pasid for draining
 467  *
 468  * Drain all pending page requests and responses related to @pasid in both
 469  * software and hardware. This is supposed to be called after the device
 470  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
 471  * and DevTLB have been invalidated.
 472  *
 473  * It waits until all pending page requests for @pasid in the page fault
 474  * queue are completed by the prq handling thread. Then follow the steps
 475  * described in VT-d spec CH7.10 to drain all page requests and page
 476  * responses pending in the hardware.
 477  */
 478 static void intel_svm_drain_prq(struct device *dev, u32 pasid)
 479 {
 480         struct device_domain_info *info;
 481         struct dmar_domain *domain;
 482         struct intel_iommu *iommu;
 483         struct qi_desc desc[3];
 484         struct pci_dev *pdev;
 485         int head, tail;
 486         u16 sid, did;
 487         int qdep;
 488
 489         info = dev_iommu_priv_get(dev);
 490         if (WARN_ON(!info || !dev_is_pci(dev)))
 491                 return;
 492
 493         if (!info->pri_enabled)
 494                 return;
 495
 496         iommu = info->iommu;
 497         domain = info->domain;
 498         pdev = to_pci_dev(dev);
 499         sid = PCI_DEVID(info->bus, info->devfn);
 500         did = domain_id_iommu(domain, iommu);
 501         qdep = pci_ats_queue_depth(pdev);
 502
 503         /*
 504          * Check and wait until all pending page requests in the queue are
 505          * handled by the prq handling thread.
 506          */
 507 prq_retry:
 508         reinit_completion(&iommu->prq_complete);
 509         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 510         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 511         while (head != tail) {
 512                 struct page_req_dsc *req;
 513
 514                 req = &iommu->prq[head / sizeof(*req)];
 515                 if (!req->pasid_present || req->pasid != pasid) {
 516                         head = (head + sizeof(*req)) & PRQ_RING_MASK;
 517                         continue;
 518                 }
 519
 520                 wait_for_completion(&iommu->prq_complete);
 521                 goto prq_retry;
 522         }
 523
 524         /*
 525          * A work in IO page fault workqueue may try to lock pasid_mutex now.
 526          * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for
 527          * all works in the workqueue to finish may cause deadlock.
 528          *
 529          * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev().
 530          * Unlock it to allow the works to be handled while waiting for
 531          * them to finish.
 532          */
 533         lockdep_assert_held(&pasid_mutex);
 534         mutex_unlock(&pasid_mutex);
 535         iopf_queue_flush_dev(dev);
 536         mutex_lock(&pasid_mutex);
 537
 538         /*
 539          * Perform steps described in VT-d spec CH7.10 to drain page
 540          * requests and responses in hardware.
 541          */
 542         memset(desc, 0, sizeof(desc));
 543         desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
 544                         QI_IWD_FENCE |
 545                         QI_IWD_TYPE;
 546         desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
 547                         QI_EIOTLB_DID(did) |
 548                         QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
 549                         QI_EIOTLB_TYPE;
 550         desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
 551                         QI_DEV_EIOTLB_SID(sid) |
 552                         QI_DEV_EIOTLB_QDEP(qdep) |
 553                         QI_DEIOTLB_TYPE |
 554                         QI_DEV_IOTLB_PFSID(info->pfsid);
 555 qi_retry:
 556         reinit_completion(&iommu->prq_complete);
 557         qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
 558         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
 559                 wait_for_completion(&iommu->prq_complete);
 560                 goto qi_retry;
 561         }
 562 }
 563
 564 static int prq_to_iommu_prot(struct page_req_dsc *req)
 565 {
 566         int prot = 0;
 567
 568         if (req->rd_req)
 569                 prot |= IOMMU_FAULT_PERM_READ;
 570         if (req->wr_req)
 571                 prot |= IOMMU_FAULT_PERM_WRITE;
 572         if (req->exe_req)
 573                 prot |= IOMMU_FAULT_PERM_EXEC;
 574         if (req->pm_req)
 575                 prot |= IOMMU_FAULT_PERM_PRIV;
 576
 577         return prot;
 578 }
 579
 580 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
 581                                 struct page_req_dsc *desc)
 582 {
 583         struct iommu_fault_event event;
 584
 585         if (!dev || !dev_is_pci(dev))
 586                 return -ENODEV;
 587
 588         /* Fill in event data for device specific processing */
 589         memset(&event, 0, sizeof(struct iommu_fault_event));
 590         event.fault.type = IOMMU_FAULT_PAGE_REQ;
 591         event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
 592         event.fault.prm.pasid = desc->pasid;
 593         event.fault.prm.grpid = desc->prg_index;
 594         event.fault.prm.perm = prq_to_iommu_prot(desc);
 595
 596         if (desc->lpig)
 597                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
 598         if (desc->pasid_present) {
 599                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
 600                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
 601         }
 602         if (desc->priv_data_present) {
 603                 /*
 604                  * Set last page in group bit if private data is present,
 605                  * page response is required as it does for LPIG.
 606                  * iommu_report_device_fault() doesn't understand this vendor
 607                  * specific requirement thus we set last_page as a workaround.
 608                  */
 609                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
 610                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
 611                 event.fault.prm.private_data[0] = desc->priv_data[0];
 612                 event.fault.prm.private_data[1] = desc->priv_data[1];
 613         } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
 614                 /*
 615                  * If the private data fields are not used by hardware, use it
 616                  * to monitor the prq handle latency.
 617                  */
 618                 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
 619         }
 620
 621         return iommu_report_device_fault(dev, &event);
 622 }
 623
 624 static void handle_bad_prq_event(struct intel_iommu *iommu,
 625                                  struct page_req_dsc *req, int result)
 626 {
 627         struct qi_desc desc;
 628
 629         pr_err("%s: Invalid page request: %08llx %08llx\n",
 630                iommu->name, ((unsigned long long *)req)[0],
 631                ((unsigned long long *)req)[1]);
 632
 633         /*
 634          * Per VT-d spec. v3.0 ch7.7, system software must
 635          * respond with page group response if private data
 636          * is present (PDP) or last page in group (LPIG) bit
 637          * is set. This is an additional VT-d feature beyond
 638          * PCI ATS spec.
 639          */
 640         if (!req->lpig && !req->priv_data_present)
 641                 return;
 642
 643         desc.qw0 = QI_PGRP_PASID(req->pasid) |
 644                         QI_PGRP_DID(req->rid) |
 645                         QI_PGRP_PASID_P(req->pasid_present) |
 646                         QI_PGRP_PDP(req->priv_data_present) |
 647                         QI_PGRP_RESP_CODE(result) |
 648                         QI_PGRP_RESP_TYPE;
 649         desc.qw1 = QI_PGRP_IDX(req->prg_index) |
 650                         QI_PGRP_LPIG(req->lpig);
 651
 652         if (req->priv_data_present) {
 653                 desc.qw2 = req->priv_data[0];
 654                 desc.qw3 = req->priv_data[1];
 655         } else {
 656                 desc.qw2 = 0;
 657                 desc.qw3 = 0;
 658         }
 659
 660         qi_submit_sync(iommu, &desc, 1, 0);
 661 }
 662
 663 static irqreturn_t prq_event_thread(int irq, void *d)
 664 {
 665         struct intel_iommu *iommu = d;
 666         struct page_req_dsc *req;
 667         int head, tail, handled;
 668         struct pci_dev *pdev;
 669         u64 address;
 670
 671         /*
 672          * Clear PPR bit before reading head/tail registers, to ensure that
 673          * we get a new interrupt if needed.
 674          */
 675         writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
 676
 677         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 678         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 679         handled = (head != tail);
 680         while (head != tail) {
 681                 req = &iommu->prq[head / sizeof(*req)];
 682                 address = (u64)req->addr << VTD_PAGE_SHIFT;
 683
 684                 if (unlikely(!req->pasid_present)) {
 685                         pr_err("IOMMU: %s: Page request without PASID\n",
 686                                iommu->name);
 687 bad_req:
 688                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
 689                         goto prq_advance;
 690                 }
 691
 692                 if (unlikely(!is_canonical_address(address))) {
 693                         pr_err("IOMMU: %s: Address is not canonical\n",
 694                                iommu->name);
 695                         goto bad_req;
 696                 }
 697
 698                 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
 699                         pr_err("IOMMU: %s: Page request in Privilege Mode\n",
 700                                iommu->name);
 701                         goto bad_req;
 702                 }
 703
 704                 if (unlikely(req->exe_req && req->rd_req)) {
 705                         pr_err("IOMMU: %s: Execution request not supported\n",
 706                                iommu->name);
 707                         goto bad_req;
 708                 }
 709
 710                 /* Drop Stop Marker message. No need for a response. */
 711                 if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
 712                         goto prq_advance;
 713
 714                 pdev = pci_get_domain_bus_and_slot(iommu->segment,
 715                                                    PCI_BUS_NUM(req->rid),
 716                                                    req->rid & 0xff);
 717                 /*
 718                  * If prq is to be handled outside iommu driver via receiver of
 719                  * the fault notifiers, we skip the page response here.
 720                  */
 721                 if (!pdev)
 722                         goto bad_req;
 723
 724                 if (intel_svm_prq_report(iommu, &pdev->dev, req))
 725                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
 726                 else
 727                         trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
 728                                          req->priv_data[0], req->priv_data[1],
 729                                          iommu->prq_seq_number++);
 730                 pci_dev_put(pdev);
 731 prq_advance:
 732                 head = (head + sizeof(*req)) & PRQ_RING_MASK;
 733         }
 734
 735         dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
 736
 737         /*
 738          * Clear the page request overflow bit and wake up all threads that
 739          * are waiting for the completion of this handling.
 740          */
 741         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
 742                 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
 743                                     iommu->name);
 744                 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 745                 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 746                 if (head == tail) {
 747                         iopf_queue_discard_partial(iommu->iopf_queue);
 748                         writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
 749                         pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
 750                                             iommu->name);
 751                 }
 752         }
 753
 754         if (!completion_done(&iommu->prq_complete))
 755                 complete(&iommu->prq_complete);
 756
 757         return IRQ_RETVAL(handled);
 758 }
 759
 760 int intel_svm_page_response(struct device *dev,
 761                             struct iommu_fault_event *evt,
 762                             struct iommu_page_response *msg)
 763 {
 764         struct iommu_fault_page_request *prm;
 765         struct intel_iommu *iommu;
 766         bool private_present;
 767         bool pasid_present;
 768         bool last_page;
 769         u8 bus, devfn;
 770         int ret = 0;
 771         u16 sid;
 772
 773         if (!dev || !dev_is_pci(dev))
 774                 return -ENODEV;
 775
 776         iommu = device_to_iommu(dev, &bus, &devfn);
 777         if (!iommu)
 778                 return -ENODEV;
 779
 780         if (!msg || !evt)
 781                 return -EINVAL;
 782
 783         prm = &evt->fault.prm;
 784         sid = PCI_DEVID(bus, devfn);
 785         pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
 786         private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
 787         last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
 788
 789         if (!pasid_present) {
 790                 ret = -EINVAL;
 791                 goto out;
 792         }
 793
 794         if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
 795                 ret = -EINVAL;
 796                 goto out;
 797         }
 798
 799         /*
 800          * Per VT-d spec. v3.0 ch7.7, system software must respond
 801          * with page group response if private data is present (PDP)
 802          * or last page in group (LPIG) bit is set. This is an
 803          * additional VT-d requirement beyond PCI ATS spec.
 804          */
 805         if (last_page || private_present) {
 806                 struct qi_desc desc;
 807
 808                 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
 809                                 QI_PGRP_PASID_P(pasid_present) |
 810                                 QI_PGRP_PDP(private_present) |
 811                                 QI_PGRP_RESP_CODE(msg->code) |
 812                                 QI_PGRP_RESP_TYPE;
 813                 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
 814                 desc.qw2 = 0;
 815                 desc.qw3 = 0;
 816
 817                 if (private_present) {
 818                         desc.qw2 = prm->private_data[0];
 819                         desc.qw3 = prm->private_data[1];
 820                 } else if (prm->private_data[0]) {
 821                         dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
 822                                 ktime_to_ns(ktime_get()) - prm->private_data[0]);
 823                 }
 824
 825                 qi_submit_sync(iommu, &desc, 1, 0);
 826         }
 827 out:
 828         return ret;
 829 }
 830
 831 void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
 832 {
 833         mutex_lock(&pasid_mutex);
 834         intel_svm_unbind_mm(dev, pasid);
 835         mutex_unlock(&pasid_mutex);
 836 }
 837
 838 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
 839                                    struct device *dev, ioasid_t pasid)
 840 {
 841         struct device_domain_info *info = dev_iommu_priv_get(dev);
 842         struct intel_iommu *iommu = info->iommu;
 843         struct mm_struct *mm = domain->mm;
 844         int ret;
 845
 846         mutex_lock(&pasid_mutex);
 847         ret = intel_svm_bind_mm(iommu, dev, mm);
 848         mutex_unlock(&pasid_mutex);
 849
 850         return ret;
 851 }
 852
 853 static void intel_svm_domain_free(struct iommu_domain *domain)
 854 {
 855         kfree(to_dmar_domain(domain));
 856 }
 857
 858 static const struct iommu_domain_ops intel_svm_domain_ops = {
 859         .set_dev_pasid          = intel_svm_set_dev_pasid,
 860         .free                   = intel_svm_domain_free
 861 };
 862
 863 struct iommu_domain *intel_svm_domain_alloc(void)
 864 {
 865         struct dmar_domain *domain;
 866
 867         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 868         if (!domain)
 869                 return NULL;
 870         domain->domain.ops = &intel_svm_domain_ops;
 871
 872         return &domain->domain;
 873 }