Merge tag 'dma-mapping-6.2-2022-12-13' of git://git.infradead.org/users/hch/dma-mapping

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 13 Dec 2022 17:05:19 +0000 (09:05 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 13 Dec 2022 17:05:19 +0000 (09:05 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 13 Dec 2022 17:05:19 +0000 (09:05 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 13 Dec 2022 17:05:19 +0000 (09:05 -0800)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c

index d790909..c135f6e 100644 (file)
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -564,14 +564,6 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
         if (mask < 0xffffffffULL)
                 gfp |= GFP_DMA;
  
-       /*
-        * Following is a work-around (a.k.a. hack) to prevent pages
-        * with __GFP_COMP being passed to split_page() which cannot
-        * handle them.  The real problem is that this flag probably
-        * should be 0 on ARM as it is not supported on this
-        * platform; see CONFIG_HUGETLBFS.
-        */
-       gfp &= ~(__GFP_COMP);
         args.gfp = gfp;
  
         *handle = DMA_MAPPING_ERROR;
@@ -1093,15 +1085,6 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
                 return __iommu_alloc_simple(dev, size, gfp, handle,
                                             coherent_flag, attrs);
  
-       /*
-        * Following is a work-around (a.k.a. hack) to prevent pages
-        * with __GFP_COMP being passed to split_page() which cannot
-        * handle them.  The real problem is that this flag probably
-        * should be 0 on ARM as it is not supported on this
-        * platform; see CONFIG_HUGETLBFS.
-        */
-       gfp &= ~(__GFP_COMP);
-
         pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag);
         if (!pages)
                 return NULL;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c

index 436372b..24c0f0d 100644 (file)
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1761,17 +1761,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
         unsigned amt;
  
         if (!rcd->rcvhdrq) {
-               gfp_t gfp_flags;
-
                 amt = rcvhdrq_size(rcd);
  
-               if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
-                       gfp_flags = GFP_KERNEL;
-               else
-                       gfp_flags = GFP_USER;
                 rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
                                                   &rcd->rcvhdrq_dma,
-                                                 gfp_flags | __GFP_COMP);
+                                                 GFP_KERNEL);
  
                 if (!rcd->rcvhdrq) {
                         dd_dev_err(dd,
@@ -1785,7 +1779,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
                         rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
                                                                     PAGE_SIZE,
                                                                     &rcd->rcvhdrqtailaddr_dma,
-                                                                   gfp_flags);
+                                                                   GFP_KERNEL);
                         if (!rcd->rcvhdrtail_kvaddr)
                                 goto bail_free;
                 }
@@ -1821,20 +1815,11 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
  {
         struct hfi1_devdata *dd = rcd->dd;
         u32 max_entries, egrtop, alloced_bytes = 0;
-       gfp_t gfp_flags;
         u16 order, idx = 0;
         int ret = 0;
         u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
  
         /*
-        * GFP_USER, but without GFP_FS, so buffer cache can be
-        * coalesced (we hope); otherwise, even at order 4,
-        * heavy filesystem activity makes these fail, and we can
-        * use compound pages.
-        */
-       gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
-       /*
          * The minimum size of the eager buffers is a groups of MTU-sized
          * buffers.
          * The global eager_buffer_size parameter is checked against the
@@ -1864,7 +1849,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
                         dma_alloc_coherent(&dd->pcidev->dev,
                                            rcd->egrbufs.rcvtid_size,
                                            &rcd->egrbufs.buffers[idx].dma,
-                                          gfp_flags);
+                                          GFP_KERNEL);
                 if (rcd->egrbufs.buffers[idx].addr) {
                         rcd->egrbufs.buffers[idx].len =
                                 rcd->egrbufs.rcvtid_size;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c

index aea5719..0738611 100644 (file)
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -2075,7 +2075,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd)
         dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev,
                                         dd->rcd[0]->rcvhdrq_size,
                                         &dd->cspec->dummy_hdrq_phys,
-                                       GFP_ATOMIC | __GFP_COMP);
+                                       GFP_ATOMIC);
         if (!dd->cspec->dummy_hdrq) {
                 qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n");
                 /* fallback to just 0'ing */
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c

index 4521100..33667be 100644 (file)
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1546,18 +1546,14 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
  
         if (!rcd->rcvhdrq) {
                 dma_addr_t phys_hdrqtail;
-               gfp_t gfp_flags;
  
                 amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize *
                             sizeof(u32), PAGE_SIZE);
-               gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
-                       GFP_USER : GFP_KERNEL;
  
                 old_node_id = dev_to_node(&dd->pcidev->dev);
                 set_dev_node(&dd->pcidev->dev, rcd->node_id);
-               rcd->rcvhdrq = dma_alloc_coherent(
-                       &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
-                       gfp_flags | __GFP_COMP);
+               rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
+                               &rcd->rcvhdrq_phys, GFP_KERNEL);
                 set_dev_node(&dd->pcidev->dev, old_node_id);
  
                 if (!rcd->rcvhdrq) {
@@ -1577,7 +1573,7 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
                         set_dev_node(&dd->pcidev->dev, rcd->node_id);
                         rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
                                 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
-                               gfp_flags);
+                               GFP_KERNEL);
                         set_dev_node(&dd->pcidev->dev, old_node_id);
                         if (!rcd->rcvhdrtail_kvaddr)
                                 goto bail_free;
@@ -1621,17 +1617,8 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
         struct qib_devdata *dd = rcd->dd;
         unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
         size_t size;
-       gfp_t gfp_flags;
         int old_node_id;
  
-       /*
-        * GFP_USER, but without GFP_FS, so buffer cache can be
-        * coalesced (we hope); otherwise, even at order 4,
-        * heavy filesystem activity makes these fail, and we can
-        * use compound pages.
-        */
-       gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
         egrcnt = rcd->rcvegrcnt;
         egroff = rcd->rcvegr_tid_base;
         egrsize = dd->rcvegrbufsize;
@@ -1663,7 +1650,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
                 rcd->rcvegrbuf[e] =
                         dma_alloc_coherent(&dd->pcidev->dev, size,
                                            &rcd->rcvegrbuf_phys[e],
-                                          gfp_flags);
+                                          GFP_KERNEL);
                 set_dev_node(&dd->pcidev->dev, old_node_id);
                 if (!rcd->rcvegrbuf[e])
                         goto bail_rcvegrbuf_phys;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c

index 9297b74..f798c44 100644 (file)
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -744,9 +744,6 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev,
         /* IOMMU can map any pages, so himem can also be used here */
         gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
  
-       /* It makes no sense to muck about with huge pages */
-       gfp &= ~__GFP_COMP;
-
         while (count) {
                 struct page *page = NULL;
                 unsigned int order_size;
diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c b/drivers/media/v4l2-core/videobuf-dma-contig.c

index 52312ce..f2c4393 100644 (file)
--- a/drivers/media/v4l2-core/videobuf-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf-dma-contig.c
@@ -36,12 +36,11 @@ struct videobuf_dma_contig_memory {
  
  static int __videobuf_dc_alloc(struct device *dev,
                                struct videobuf_dma_contig_memory *mem,
-                              unsigned long size, gfp_t flags)
+                              unsigned long size)
  {
         mem->size = size;
-       mem->vaddr = dma_alloc_coherent(dev, mem->size,
-                                       &mem->dma_handle, flags);
-
+       mem->vaddr = dma_alloc_coherent(dev, mem->size, &mem->dma_handle,
+                                       GFP_KERNEL);
         if (!mem->vaddr) {
                 dev_err(dev, "memory alloc size %ld failed\n", mem->size);
                 return -ENOMEM;
@@ -258,8 +257,7 @@ static int __videobuf_iolock(struct videobuf_queue *q,
                         return videobuf_dma_contig_user_get(mem, vb);
  
                 /* allocate memory for the read() method */
-               if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size),
-                                       GFP_KERNEL))
+               if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size)))
                         return -ENOMEM;
                 break;
         case V4L2_MEMORY_OVERLAY:
@@ -295,22 +293,18 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
         BUG_ON(!mem);
         MAGIC_CHECK(mem->magic, MAGIC_DC_MEM);
  
-       if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize),
-                               GFP_KERNEL | __GFP_COMP))
+       if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize)))
                 goto error;
  
-       /* Try to remap memory */
-       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
         /* the "vm_pgoff" is just used in v4l2 to find the
          * corresponding buffer data structure which is allocated
          * earlier and it does not mean the offset from the physical
          * buffer start address as usual. So set it to 0 to pass
-        * the sanity check in vm_iomap_memory().
+        * the sanity check in dma_mmap_coherent().
          */
         vma->vm_pgoff = 0;
-
-       retval = vm_iomap_memory(vma, mem->dma_handle, mem->size);
+       retval = dma_mmap_coherent(q->dev, vma, mem->vaddr, mem->dma_handle,
+                                  mem->size);
         if (retval) {
                 dev_err(q->dev, "mmap: remap failed with error %d. ",
                         retval);
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c

index 74bc053..7926aae 100644 (file)
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -1027,16 +1027,14 @@ static int __cnic_alloc_uio_rings(struct cnic_uio_dev *udev, int pages)
  
         udev->l2_ring_size = pages * CNIC_PAGE_SIZE;
         udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size,
-                                          &udev->l2_ring_map,
-                                          GFP_KERNEL | __GFP_COMP);
+                                          &udev->l2_ring_map, GFP_KERNEL);
         if (!udev->l2_ring)
                 return -ENOMEM;
  
         udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
         udev->l2_buf_size = CNIC_PAGE_ALIGN(udev->l2_buf_size);
         udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size,
-                                         &udev->l2_buf_map,
-                                         GFP_KERNEL | __GFP_COMP);
+                                         &udev->l2_buf_map, GFP_KERNEL);
         if (!udev->l2_buf) {
                 __cnic_free_uio_rings(udev);
                 return -ENOMEM;
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c

index d34bb6e..dfd401d 100644 (file)
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -243,7 +243,8 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct smcd_dmb *dmb)
  
         dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
                                            &dmb->dma_addr,
-                                          GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC | __GFP_COMP | __GFP_NORETRY);
+                                          GFP_KERNEL | __GFP_NOWARN |
+                                          __GFP_NOMEMALLOC | __GFP_NORETRY);
         if (!dmb->cpu_addr)
                 clear_bit(dmb->sba_idx, ism->sba_bitmap);
  
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c

index 33437d6..c026a5a 100644 (file)
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -498,6 +498,14 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
  
         WARN_ON_ONCE(!dev->coherent_dma_mask);
  
+       /*
+        * DMA allocations can never be turned back into a page pointer, so
+        * requesting compound pages doesn't make sense (and can't even be
+        * supported at all by various backends).
+        */
+       if (WARN_ON_ONCE(flag & __GFP_COMP))
+               return NULL;
+
         if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
                 return cpu_addr;
  
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c

index 339a990..a34c38b 100644 (file)
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -300,6 +300,37 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
         return;
  }
  
+static void *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags,
+               int (*remap)(void *tlb, unsigned long nslabs))
+{
+       size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
+       void *tlb;
+
+       /*
+        * By default allocate the bounce buffer memory from low memory, but
+        * allow to pick a location everywhere for hypervisors with guest
+        * memory encryption.
+        */
+       if (flags & SWIOTLB_ANY)
+               tlb = memblock_alloc(bytes, PAGE_SIZE);
+       else
+               tlb = memblock_alloc_low(bytes, PAGE_SIZE);
+
+       if (!tlb) {
+               pr_warn("%s: Failed to allocate %zu bytes tlb structure\n",
+                       __func__, bytes);
+               return NULL;
+       }
+
+       if (remap && remap(tlb, nslabs) < 0) {
+               memblock_free(tlb, PAGE_ALIGN(bytes));
+               pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
+               return NULL;
+       }
+
+       return tlb;
+}
+
  /*
   * Statically reserve bounce buffer space and initialize bounce buffer data
   * structures for the software IO TLB used to implement the DMA API.
@@ -310,7 +341,6 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
         struct io_tlb_mem *mem = &io_tlb_default_mem;
         unsigned long nslabs;
         size_t alloc_size;
-       size_t bytes;
         void *tlb;
  
         if (!addressing_limit && !swiotlb_force_bounce)
@@ -326,31 +356,16 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
                 swiotlb_adjust_nareas(num_possible_cpus());
  
         nslabs = default_nslabs;
-       /*
-        * By default allocate the bounce buffer memory from low memory, but
-        * allow to pick a location everywhere for hypervisors with guest
-        * memory encryption.
-        */
-retry:
-       bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
-       if (flags & SWIOTLB_ANY)
-               tlb = memblock_alloc(bytes, PAGE_SIZE);
-       else
-               tlb = memblock_alloc_low(bytes, PAGE_SIZE);
-       if (!tlb) {
-               pr_warn("%s: failed to allocate tlb structure\n", __func__);
-               return;
-       }
-
-       if (remap && remap(tlb, nslabs) < 0) {
-               memblock_free(tlb, PAGE_ALIGN(bytes));
-
+       while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) {
+               if (nslabs <= IO_TLB_MIN_SLABS)
+                       return;
                 nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
-               if (nslabs >= IO_TLB_MIN_SLABS)
-                       goto retry;
+       }
  
-               pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
-               return;
+       if (default_nslabs != nslabs) {
+               pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs",
+                       default_nslabs, nslabs);
+               default_nslabs = nslabs;
         }
  
         alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c

index ba09555..34250e6 100644 (file)
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -21,7 +21,6 @@
  
  #define DEFAULT_GFP \
         (GFP_KERNEL | \
-        __GFP_COMP |    /* compound page lets parts be mapped */ \
          __GFP_RETRY_MAYFAIL | /* don't trigger OOM-killer */ \
          __GFP_NOWARN)   /* no stack trace print - this call is non-critical */
  
@@ -543,7 +542,7 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
         void *p;
  
         sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir,
-                                     DEFAULT_GFP, 0);
+                                     DEFAULT_GFP | __GFP_COMP, 0);
  #ifdef CONFIG_SND_DMA_SGBUF
         if (!sgt && !get_dma_ops(dmab->dev.dev)) {
                 if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
@@ -811,7 +810,7 @@ static void *snd_dma_noncoherent_alloc(struct snd_dma_buffer *dmab, size_t size)
         void *p;
  
         p = dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr,
-                                 dmab->dev.dir, DEFAULT_GFP);
+                                 dmab->dev.dir, DEFAULT_GFP | __GFP_COMP);
         if (p)
                 dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->addr);
         return p;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 13 Dec 2022 17:05:19 +0000 (09:05 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 13 Dec 2022 17:05:19 +0000 (09:05 -0800)
arch/arm/mm/dma-mapping.c		patch \| blob \| history
drivers/infiniband/hw/hfi1/init.c		patch \| blob \| history
drivers/infiniband/hw/qib/qib_iba6120.c		patch \| blob \| history
drivers/infiniband/hw/qib/qib_init.c		patch \| blob \| history
drivers/iommu/dma-iommu.c		patch \| blob \| history
drivers/media/v4l2-core/videobuf-dma-contig.c		patch \| blob \| history
drivers/net/ethernet/broadcom/cnic.c		patch \| blob \| history
drivers/s390/net/ism_drv.c		patch \| blob \| history
kernel/dma/mapping.c		patch \| blob \| history
kernel/dma/swiotlb.c		patch \| blob \| history
sound/core/memalloc.c		patch \| blob \| history