powerpc/powernv/pci: Enable 64-bit devices to access >4GB DMA space

author Russell Currey <ruscur@russell.cc>

Wed, 21 Jun 2017 07:18:04 +0000 (17:18 +1000)

committer Michael Ellerman <mpe@ellerman.id.au>

Tue, 27 Jun 2017 02:14:28 +0000 (12:14 +1000)
author Russell Currey <ruscur@russell.cc>
Wed, 21 Jun 2017 07:18:04 +0000 (17:18 +1000)
committer Michael Ellerman <mpe@ellerman.id.au>
Tue, 27 Jun 2017 02:14:28 +0000 (12:14 +1000)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c

index a642412..4376135 100644 (file)
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1743,6 +1743,75 @@ static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe)
         return true;
  }
  
+/*
+ * Reconfigure TVE#0 to be usable as 64-bit DMA space.
+ *
+ * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses.
+ * Devices can only access more than that if bit 59 of the PCI address is set
+ * by hardware, which indicates TVE#1 should be used instead of TVE#0.
+ * Many PCI devices are not capable of addressing that many bits, and as a
+ * result are limited to the 4GB of virtual memory made available to 32-bit
+ * devices in TVE#0.
+ *
+ * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit
+ * devices by configuring the virtual memory past the first 4GB inaccessible
+ * by 64-bit DMAs.  This should only be used by devices that want more than
+ * 4GB, and only on PEs that have no 32-bit devices.
+ *
+ * Currently this will only work on PHB3 (POWER8).
+ */
+static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
+{
+       u64 window_size, table_size, tce_count, addr;
+       struct page *table_pages;
+       u64 tce_order = 28; /* 256MB TCEs */
+       __be64 *tces;
+       s64 rc;
+
+       /*
+        * Window size needs to be a power of two, but needs to account for
+        * shifting memory by the 4GB offset required to skip 32bit space.
+        */
+       window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
+       tce_count = window_size >> tce_order;
+       table_size = tce_count << 3;
+
+       if (table_size < PAGE_SIZE)
+               table_size = PAGE_SIZE;
+
+       table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
+                                      get_order(table_size));
+       if (!table_pages)
+               goto err;
+
+       tces = page_address(table_pages);
+       if (!tces)
+               goto err;
+
+       memset(tces, 0, table_size);
+
+       for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) {
+               tces[(addr + (1ULL << 32)) >> tce_order] =
+                       cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE);
+       }
+
+       rc = opal_pci_map_pe_dma_window(pe->phb->opal_id,
+                                       pe->pe_number,
+                                       /* reconfigure window 0 */
+                                       (pe->pe_number << 1) + 0,
+                                       1,
+                                       __pa(tces),
+                                       table_size,
+                                       1 << tce_order);
+       if (rc == OPAL_SUCCESS) {
+               pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n");
+               return 0;
+       }
+err:
+       pe_err(pe, "Error configuring 64-bit DMA bypass\n");
+       return -EIO;
+}
+
  static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
  {
         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -1751,6 +1820,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
         struct pnv_ioda_pe *pe;
         uint64_t top;
         bool bypass = false;
+       s64 rc;
  
         if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
                 return -ENODEV;;
@@ -1765,8 +1835,27 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
                 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
                 set_dma_ops(&pdev->dev, &dma_direct_ops);
         } else {
-               dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
-               set_dma_ops(&pdev->dev, &dma_iommu_ops);
+               /*
+                * If the device can't set the TCE bypass bit but still wants
+                * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
+                * bypass the 32-bit region and be usable for 64-bit DMAs.
+                * The device needs to be able to address all of this space.
+                */
+               if (dma_mask >> 32 &&
+                   dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
+                   pnv_pci_ioda_pe_single_vendor(pe) &&
+                   phb->model == PNV_PHB_MODEL_PHB3) {
+                       /* Configure the bypass mode */
+                       rc = pnv_pci_ioda_dma_64bit_bypass(pe);
+                       if (rc)
+                               return rc;
+                       /* 4GB offset bypasses 32-bit space */
+                       set_dma_offset(&pdev->dev, (1ULL << 32));
+                       set_dma_ops(&pdev->dev, &dma_direct_ops);
+               } else {
+                       dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
+                       set_dma_ops(&pdev->dev, &dma_iommu_ops);
+               }
         }
         *pdev->dev.dma_mask = dma_mask;
author	Russell Currey <ruscur@russell.cc>
	Wed, 21 Jun 2017 07:18:04 +0000 (17:18 +1000)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Tue, 27 Jun 2017 02:14:28 +0000 (12:14 +1000)