Ported pcie-brcmstb bounce buffer implementation to ARM64. (#3144)
authoryaroslavros <yaroslavros@gmail.com>
Wed, 14 Aug 2019 14:22:55 +0000 (15:22 +0100)
committerPhil Elwell <pelwell@users.noreply.github.com>
Wed, 14 Aug 2019 14:22:55 +0000 (15:22 +0100)
Ported pcie-brcmstb bounce buffer implementation to ARM64.
This enables full 4G RAM usage on Raspberry Pi in 64-bit mode.

Signed-off-by: Yaroslav Rosomakho <yaroslavros@gmail.com>
arch/arm64/include/asm/dma-mapping.h
arch/arm64/mm/dma-mapping.c
drivers/pci/controller/Makefile
drivers/pci/controller/pcie-brcmstb-bounce.h
drivers/pci/controller/pcie-brcmstb-bounce64.c [new file with mode: 0644]
drivers/pci/controller/pcie-brcmstb.c

index b7847eb..9195e52 100644 (file)
 #include <xen/xen.h>
 #include <asm/xen/hypervisor.h>
 
+extern void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+                          gfp_t gfp, unsigned long attrs);
+extern void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr,
+                        dma_addr_t handle, unsigned long attrs);
+extern int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+                       void *cpu_addr, dma_addr_t dma_addr, size_t size,
+                       unsigned long attrs);
+extern int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+               void *cpu_addr, dma_addr_t dma_addr, size_t size,
+               unsigned long attrs);
+extern int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
+               enum dma_data_direction dir, unsigned long attrs);
+extern void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int,
+               enum dma_data_direction dir, unsigned long attrs);
+extern void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems,
+               enum dma_data_direction dir);
+extern void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems,
+               enum dma_data_direction dir);
+
+
+
 extern const struct dma_map_ops dummy_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
index d3a5bb1..b1f6d4e 100644 (file)
@@ -138,6 +138,12 @@ no_mem:
        return NULL;
 }
 
+void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+                           gfp_t gfp, unsigned long attrs)
+{
+        return __dma_alloc(dev, size, handle, gfp, attrs);
+}
+
 static void __dma_free(struct device *dev, size_t size,
                       void *vaddr, dma_addr_t dma_handle,
                       unsigned long attrs)
@@ -154,6 +160,12 @@ static void __dma_free(struct device *dev, size_t size,
        swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs);
 }
 
+void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr,
+                         dma_addr_t handle, unsigned long attrs)
+{
+        __dma_free(dev, size, cpu_addr, handle, attrs);
+}
+
 static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
                                     unsigned long offset, size_t size,
                                     enum dma_data_direction dir,
@@ -197,6 +209,12 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
        return ret;
 }
 
+int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
+                enum dma_data_direction dir, unsigned long attrs)
+{
+       return __swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
+}
+
 static void __swiotlb_unmap_sg_attrs(struct device *dev,
                                     struct scatterlist *sgl, int nelems,
                                     enum dma_data_direction dir,
@@ -213,6 +231,12 @@ static void __swiotlb_unmap_sg_attrs(struct device *dev,
        swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
 }
 
+void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int nelems,
+                enum dma_data_direction dir, unsigned long attrs)
+{
+       __swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+}
+
 static void __swiotlb_sync_single_for_cpu(struct device *dev,
                                          dma_addr_t dev_addr, size_t size,
                                          enum dma_data_direction dir)
@@ -245,6 +269,12 @@ static void __swiotlb_sync_sg_for_cpu(struct device *dev,
        swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
 }
 
+void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems,
+                enum dma_data_direction dir)
+{
+       __swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
+}
+
 static void __swiotlb_sync_sg_for_device(struct device *dev,
                                         struct scatterlist *sgl, int nelems,
                                         enum dma_data_direction dir)
@@ -259,6 +289,12 @@ static void __swiotlb_sync_sg_for_device(struct device *dev,
                                       sg->length, dir);
 }
 
+void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems,
+                enum dma_data_direction dir)
+{
+       __swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
+}
+
 static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
                              unsigned long pfn, size_t size)
 {
@@ -294,6 +330,13 @@ static int __swiotlb_mmap(struct device *dev,
        return __swiotlb_mmap_pfn(vma, pfn, size);
 }
 
+int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+                        void *cpu_addr, dma_addr_t dma_addr, size_t size,
+                        unsigned long attrs)
+{
+       return __swiotlb_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
 static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
                                      struct page *page, size_t size)
 {
@@ -314,6 +357,13 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
        return __swiotlb_get_sgtable_page(sgt, page, size);
 }
 
+int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+                void *cpu_addr, dma_addr_t dma_addr, size_t size,
+                unsigned long attrs)
+{
+       return __swiotlb_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
+}
+
 static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
        if (swiotlb)
index c40b7b9..71be2a0 100644 (file)
@@ -32,6 +32,9 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o
 ifdef CONFIG_ARM
 obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o
 endif
+ifdef CONFIG_ARM64
+obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce64.o
+endif
 
 obj-$(CONFIG_VMD) += vmd.o
 # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
index 2fe20a1..7caa078 100644 (file)
@@ -6,7 +6,7 @@
 #ifndef _PCIE_BRCMSTB_BOUNCE_H
 #define _PCIE_BRCMSTB_BOUNCE_H
 
-#ifdef CONFIG_ARM
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
 
 int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size,
                          dma_addr_t threshold);
diff --git a/drivers/pci/controller/pcie-brcmstb-bounce64.c b/drivers/pci/controller/pcie-brcmstb-bounce64.c
new file mode 100644 (file)
index 0000000..d9f1a46
--- /dev/null
@@ -0,0 +1,576 @@
+/*
+ *  This code started out as a version of arch/arm/common/dmabounce.c,
+ *  modified to cope with highmem pages. Now it has been changed heavily -
+ *  it now preallocates a large block (currently 4MB) and carves it up
+ *  sequentially in ring fashion, and DMA is used to copy the data - to the
+ *  point where very little of the original remains.
+ *
+ *  Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
+ *
+ *  Original version by Brad Parker (brad@heeltoe.com)
+ *  Re-written by Christopher Hoover <ch@murgatroid.com>
+ *  Made generic by Deepak Saxena <dsaxena@plexity.net>
+ *
+ *  Copyright (C) 2002 Hewlett Packard Company.
+ *  Copyright (C) 2004 MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/page-flags.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
+#include <linux/dmapool.h>
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+#include <linux/bitmap.h>
+#include <linux/swiotlb.h>
+
+#include <asm/cacheflush.h>
+
+#define STATS
+
+#ifdef STATS
+#define DO_STATS(X) do { X ; } while (0)
+#else
+#define DO_STATS(X) do { } while (0)
+#endif
+
+/* ************************************************** */
+
+struct safe_buffer {
+       struct list_head node;
+
+       /* original request */
+       size_t          size;
+       int             direction;
+
+       struct dmabounce_pool *pool;
+       void            *safe;
+       dma_addr_t      unsafe_dma_addr;
+       dma_addr_t      safe_dma_addr;
+};
+
+struct dmabounce_pool {
+       unsigned long   pages;
+       void            *virt_addr;
+       dma_addr_t      dma_addr;
+       unsigned long   *alloc_map;
+       unsigned long   alloc_pos;
+       spinlock_t      lock;
+       struct device   *dev;
+       unsigned long   num_pages;
+#ifdef STATS
+       size_t          max_size;
+       unsigned long   num_bufs;
+       unsigned long   max_bufs;
+       unsigned long   max_pages;
+#endif
+};
+
+struct dmabounce_device_info {
+       struct device *dev;
+       dma_addr_t threshold;
+       struct list_head safe_buffers;
+       struct dmabounce_pool pool;
+       rwlock_t lock;
+#ifdef STATS
+       unsigned long map_count;
+       unsigned long unmap_count;
+       unsigned long sync_dev_count;
+       unsigned long sync_cpu_count;
+       unsigned long fail_count;
+       int attr_res;
+#endif
+};
+
+static struct dmabounce_device_info *g_dmabounce_device_info;
+
+extern int bcm2838_dma40_memcpy_init(void);
+extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
+
+#ifdef STATS
+static ssize_t
+bounce_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct dmabounce_device_info *device_info = g_dmabounce_device_info;
+       return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n",
+               device_info->map_count,
+               device_info->unmap_count,
+               device_info->sync_dev_count,
+               device_info->sync_cpu_count,
+               device_info->fail_count,
+               device_info->pool.max_size,
+               device_info->pool.num_bufs,
+               device_info->pool.max_bufs,
+               device_info->pool.num_pages * PAGE_SIZE,
+               device_info->pool.max_pages * PAGE_SIZE);
+}
+
+static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL);
+#endif
+
+static int bounce_create(struct dmabounce_pool *pool, struct device *dev,
+                        unsigned long buffer_size)
+{
+       int ret = -ENOMEM;
+       pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE;
+       pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL);
+       if (!pool->alloc_map)
+               goto err_bitmap;
+       pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE,
+                                            &pool->dma_addr, GFP_KERNEL);
+       if (!pool->virt_addr)
+               goto err_dmabuf;
+
+       pool->alloc_pos = 0;
+       spin_lock_init(&pool->lock);
+       pool->dev = dev;
+       pool->num_pages = 0;
+
+       DO_STATS(pool->max_size = 0);
+       DO_STATS(pool->num_bufs = 0);
+       DO_STATS(pool->max_bufs = 0);
+       DO_STATS(pool->max_pages = 0);
+
+       return  0;
+
+err_dmabuf:
+       bitmap_free(pool->alloc_map);
+err_bitmap:
+       return ret;
+}
+
+static void bounce_destroy(struct dmabounce_pool *pool)
+{
+       dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr,
+                         pool->dma_addr);
+
+       bitmap_free(pool->alloc_map);
+}
+
+static void *bounce_alloc(struct dmabounce_pool *pool, size_t size,
+                         dma_addr_t *dmaaddrp)
+{
+       unsigned long pages;
+       unsigned long flags;
+       unsigned long pos;
+
+       pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
+
+       DO_STATS(pool->max_size = max(size, pool->max_size));
+
+       spin_lock_irqsave(&pool->lock, flags);
+       pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
+                                        pool->alloc_pos, pages, 0);
+       /* If not found, try from the start */
+       if (pos >= pool->pages && pool->alloc_pos)
+               pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
+                                                0, pages, 0);
+
+       if (pos >= pool->pages) {
+               spin_unlock_irqrestore(&pool->lock, flags);
+               return NULL;
+       }
+
+       bitmap_set(pool->alloc_map, pos, pages);
+       pool->alloc_pos = (pos + pages) % pool->pages;
+       pool->num_pages += pages;
+
+       DO_STATS(pool->num_bufs++);
+       DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs));
+       DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages));
+
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE;
+
+       return pool->virt_addr + pos * PAGE_SIZE;
+}
+
+static void
+bounce_free(struct dmabounce_pool *pool, void *buf, size_t size)
+{
+       unsigned long pages;
+       unsigned long flags;
+       unsigned long pos;
+
+       pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
+       pos = (buf - pool->virt_addr)/PAGE_SIZE;
+
+       BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1));
+
+       spin_lock_irqsave(&pool->lock, flags);
+       bitmap_clear(pool->alloc_map, pos, pages);
+       pool->num_pages -= pages;
+       if (pool->num_pages == 0)
+               pool->alloc_pos = 0;
+       DO_STATS(pool->num_bufs--);
+       spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+/* allocate a 'safe' buffer and keep track of it */
+static struct safe_buffer *
+alloc_safe_buffer(struct dmabounce_device_info *device_info,
+                 dma_addr_t dma_addr, size_t size, enum dma_data_direction dir)
+{
+       struct safe_buffer *buf;
+       struct dmabounce_pool *pool = &device_info->pool;
+       struct device *dev = device_info->dev;
+       unsigned long flags;
+
+       /*
+        * Although one might expect this to be called in thread context,
+        * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic()
+        * was previously used to select the appropriate allocation mode,
+        * but this is unsafe.
+        */
+       buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
+       if (!buf) {
+               dev_warn(dev, "%s: kmalloc failed\n", __func__);
+               return NULL;
+       }
+
+       buf->unsafe_dma_addr = dma_addr;
+       buf->size = size;
+       buf->direction = dir;
+       buf->pool = pool;
+
+       buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr);
+
+       if (!buf->safe) {
+               dev_warn(dev,
+                        "%s: could not alloc dma memory (size=%d)\n",
+                        __func__, size);
+               kfree(buf);
+               return NULL;
+       }
+
+       write_lock_irqsave(&device_info->lock, flags);
+       list_add(&buf->node, &device_info->safe_buffers);
+       write_unlock_irqrestore(&device_info->lock, flags);
+
+       return buf;
+}
+
+/* determine if a buffer is from our "safe" pool */
+static struct safe_buffer *
+find_safe_buffer(struct dmabounce_device_info *device_info,
+                dma_addr_t safe_dma_addr)
+{
+       struct safe_buffer *b, *rb = NULL;
+       unsigned long flags;
+
+       read_lock_irqsave(&device_info->lock, flags);
+
+       list_for_each_entry(b, &device_info->safe_buffers, node)
+               if (b->safe_dma_addr <= safe_dma_addr &&
+                   b->safe_dma_addr + b->size > safe_dma_addr) {
+                       rb = b;
+                       break;
+               }
+
+       read_unlock_irqrestore(&device_info->lock, flags);
+       return rb;
+}
+
+static void
+free_safe_buffer(struct dmabounce_device_info *device_info,
+                struct safe_buffer *buf)
+{
+       unsigned long flags;
+
+       write_lock_irqsave(&device_info->lock, flags);
+       list_del(&buf->node);
+       write_unlock_irqrestore(&device_info->lock, flags);
+
+       bounce_free(buf->pool, buf->safe, buf->size);
+
+       kfree(buf);
+}
+
+/* ************************************************** */
+
+static struct safe_buffer *
+find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where)
+{
+       if (!dev || !g_dmabounce_device_info)
+               return NULL;
+       if (dma_mapping_error(dev, dma_addr)) {
+               dev_err(dev, "Trying to %s invalid mapping\n", where);
+               return NULL;
+       }
+       return find_safe_buffer(g_dmabounce_device_info, dma_addr);
+}
+
+static dma_addr_t
+map_single(struct device *dev, struct safe_buffer *buf, size_t size,
+          enum dma_data_direction dir, unsigned long attrs)
+{
+       BUG_ON(buf->size != size);
+       BUG_ON(buf->direction != dir);
+
+       dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr,
+               (u64)buf->safe_dma_addr);
+
+       if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
+           !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+               bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr,
+                                    size);
+
+       return buf->safe_dma_addr;
+}
+
+static dma_addr_t
+unmap_single(struct device *dev, struct safe_buffer *buf, size_t size,
+            enum dma_data_direction dir, unsigned long attrs)
+{
+       BUG_ON(buf->size != size);
+       BUG_ON(buf->direction != dir);
+
+       if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
+           !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+               dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr,
+                       (u64)buf->unsafe_dma_addr);
+
+               bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr,
+                                    size);
+       }
+       return buf->unsafe_dma_addr;
+}
+
+/* ************************************************** */
+
+/*
+ * see if a buffer address is in an 'unsafe' range.  if it is
+ * allocate a 'safe' buffer and copy the unsafe buffer into it.
+ * substitute the safe buffer for the unsafe one.
+ * (basically move the buffer from an unsafe area to a safe one)
+ */
+static dma_addr_t
+dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset,
+                  size_t size, enum dma_data_direction dir,
+                  unsigned long attrs)
+{
+       struct dmabounce_device_info *device_info = g_dmabounce_device_info;
+       dma_addr_t dma_addr;
+
+       dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
+
+       swiotlb_sync_single_for_device(dev, dma_addr, size, dir);
+        if (!is_device_dma_coherent(dev))
+               __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
+
+       if (device_info && (dma_addr + size) > device_info->threshold) {
+               struct safe_buffer *buf;
+
+               buf = alloc_safe_buffer(device_info, dma_addr, size, dir);
+               if (!buf) {
+                       DO_STATS(device_info->fail_count++);
+                       return (~(dma_addr_t)0x0);
+               }
+
+               DO_STATS(device_info->map_count++);
+
+               dma_addr = map_single(dev, buf, size, dir, attrs);
+       }
+       return dma_addr;
+}
+
+/*
+ * see if a mapped address was really a "safe" buffer and if so, copy
+ * the data from the safe buffer back to the unsafe buffer and free up
+ * the safe buffer.  (basically return things back to the way they
+ * should be)
+ */
+static void
+dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
+                    enum dma_data_direction dir, unsigned long attrs)
+{
+       struct safe_buffer *buf;
+
+       buf = find_safe_buffer_dev(dev, dma_addr, __func__);
+       if (buf) {
+               DO_STATS(g_dmabounce_device_info->unmap_count++);
+               dma_addr = unmap_single(dev, buf, size, dir, attrs);
+               free_safe_buffer(g_dmabounce_device_info, buf);
+       }
+
+        if (!is_device_dma_coherent(dev))
+               __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
+       swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir);
+}
+
+/*
+ * A version of dmabounce_map_page that assumes the mapping has already
+ * been created - intended for streaming operation.
+ */
+static void
+dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
+                         enum dma_data_direction dir)
+{
+       struct safe_buffer *buf;
+
+        swiotlb_sync_single_for_device(dev, dma_addr, size, dir);
+        if (!is_device_dma_coherent(dev))
+                __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
+
+       buf = find_safe_buffer_dev(dev, dma_addr, __func__);
+       if (buf) {
+               DO_STATS(g_dmabounce_device_info->sync_dev_count++);
+               map_single(dev, buf, size, dir, 0);
+       }
+}
+
+/*
+ * A version of dmabounce_unmap_page that doesn't destroy the mapping -
+ * intended for streaming operation.
+ */
+static void
+dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr,
+                      size_t size, enum dma_data_direction dir)
+{
+       struct safe_buffer *buf;
+
+       buf = find_safe_buffer_dev(dev, dma_addr, __func__);
+       if (buf) {
+               DO_STATS(g_dmabounce_device_info->sync_cpu_count++);
+               dma_addr = unmap_single(dev, buf, size, dir, 0);
+       }
+
+        if (!is_device_dma_coherent(dev))
+                __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
+        swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir);
+}
+
+static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
+{
+       if (g_dmabounce_device_info)
+               return 0;
+
+       return swiotlb_dma_supported(dev, dma_mask);
+}
+
+static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+       return swiotlb_dma_mapping_error(dev, dma_addr);
+}
+
+static const struct dma_map_ops dmabounce_ops = {
+       .alloc                  = arm64_dma_alloc,
+       .free                   = arm64_dma_free,
+       .mmap                   = arm64_dma_mmap,
+       .get_sgtable            = arm64_dma_get_sgtable,
+       .map_page               = dmabounce_map_page,
+       .unmap_page             = dmabounce_unmap_page,
+       .sync_single_for_cpu    = dmabounce_sync_for_cpu,
+       .sync_single_for_device = dmabounce_sync_for_device,
+       .map_sg                 = arm64_dma_map_sg,
+       .unmap_sg               = arm64_dma_unmap_sg,
+       .sync_sg_for_cpu        = arm64_dma_sync_sg_for_cpu,
+       .sync_sg_for_device     = arm64_dma_sync_sg_for_device,
+       .dma_supported          = dmabounce_dma_supported,
+       .mapping_error          = dmabounce_mapping_error,
+};
+
+int brcm_pcie_bounce_init(struct device *dev,
+                         unsigned long buffer_size,
+                         dma_addr_t threshold)
+{
+       struct dmabounce_device_info *device_info;
+       int ret;
+
+       /* Only support a single client */
+       if (g_dmabounce_device_info)
+               return -EBUSY;
+
+       ret = bcm2838_dma40_memcpy_init();
+       if (ret)
+               return ret;
+
+       device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
+       if (!device_info) {
+               dev_err(dev,
+                       "Could not allocated dmabounce_device_info\n");
+               return -ENOMEM;
+       }
+
+       ret = bounce_create(&device_info->pool, dev, buffer_size);
+       if (ret) {
+               dev_err(dev,
+                       "dmabounce: could not allocate %ld byte DMA pool\n",
+                       buffer_size);
+               goto err_bounce;
+       }
+
+       device_info->dev = dev;
+       device_info->threshold = threshold;
+       INIT_LIST_HEAD(&device_info->safe_buffers);
+       rwlock_init(&device_info->lock);
+
+       DO_STATS(device_info->map_count = 0);
+       DO_STATS(device_info->unmap_count = 0);
+       DO_STATS(device_info->sync_dev_count = 0);
+       DO_STATS(device_info->sync_cpu_count = 0);
+       DO_STATS(device_info->fail_count = 0);
+       DO_STATS(device_info->attr_res =
+                device_create_file(dev, &dev_attr_dmabounce_stats));
+
+       g_dmabounce_device_info = device_info;
+
+       dev_err(dev, "dmabounce: initialised - %ld kB, threshold %pad\n",
+                buffer_size / 1024, &threshold);
+
+       return 0;
+
+ err_bounce:
+       kfree(device_info);
+       return ret;
+}
+EXPORT_SYMBOL(brcm_pcie_bounce_init);
+
+void brcm_pcie_bounce_uninit(struct device *dev)
+{
+       struct dmabounce_device_info *device_info = g_dmabounce_device_info;
+
+       g_dmabounce_device_info = NULL;
+
+       if (!device_info) {
+               dev_warn(dev,
+                        "Never registered with dmabounce but attempting"
+                        "to unregister!\n");
+               return;
+       }
+
+       if (!list_empty(&device_info->safe_buffers)) {
+               dev_err(dev,
+                       "Removing from dmabounce with pending buffers!\n");
+               BUG();
+       }
+
+       bounce_destroy(&device_info->pool);
+
+       DO_STATS(if (device_info->attr_res == 0)
+                        device_remove_file(dev, &dev_attr_dmabounce_stats));
+
+       kfree(device_info);
+}
+EXPORT_SYMBOL(brcm_pcie_bounce_uninit);
+
+int brcm_pcie_bounce_register_dev(struct device *dev)
+{
+       set_dma_ops(dev, &dmabounce_ops);
+
+       return 0;
+}
+EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
+
+MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
+MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
+MODULE_LICENSE("GPL");
index f99ab05..f279ed6 100644 (file)
@@ -617,28 +617,6 @@ static const struct dma_map_ops brcm_dma_ops = {
 
 static void brcm_set_dma_ops(struct device *dev)
 {
-       int ret;
-
-       if (IS_ENABLED(CONFIG_ARM64)) {
-               /*
-                * We are going to invoke get_dma_ops().  That
-                * function, at this point in time, invokes
-                * get_arch_dma_ops(), and for ARM64 that function
-                * returns a pointer to dummy_dma_ops.  So then we'd
-                * like to call arch_setup_dma_ops(), but that isn't
-                * exported.  Instead, we call of_dma_configure(),
-                * which is exported, and this calls
-                * arch_setup_dma_ops().  Once we do this the call to
-                * get_dma_ops() will work properly because
-                * dev->dma_ops will be set.
-                */
-               ret = of_dma_configure(dev, dev->of_node, true);
-               if (ret) {
-                       dev_err(dev, "of_dma_configure() failed: %d\n", ret);
-                       return;
-               }
-       }
-
        arch_dma_ops = get_dma_ops(dev);
        if (!arch_dma_ops) {
                dev_err(dev, "failed to get arch_dma_ops\n");
@@ -657,12 +635,12 @@ static int brcmstb_platform_notifier(struct notifier_block *nb,
        extern unsigned long max_pfn;
        struct device *dev = __dev;
        const char *rc_name = "0000:00:00.0";
+       int ret;
 
        switch (event) {
        case BUS_NOTIFY_ADD_DEVICE:
                if (max_pfn > (bounce_threshold/PAGE_SIZE) &&
                    strcmp(dev->kobj.name, rc_name)) {
-                       int ret;
 
                        ret = brcm_pcie_bounce_register_dev(dev);
                        if (ret) {
@@ -671,6 +649,12 @@ static int brcmstb_platform_notifier(struct notifier_block *nb,
                                        ret);
                                return ret;
                        }
+               } else if (IS_ENABLED(CONFIG_ARM64)) {
+                       ret = of_dma_configure(dev, dev->of_node, true);
+                       if (ret) {
+                               dev_err(dev, "of_dma_configure() failed: %d\n", ret);
+                               return;
+                       }
                }
                brcm_set_dma_ops(dev);
                return NOTIFY_OK;