staging: vchiq_arm: Add 36-bit address support
authorPhil Elwell <phil@raspberrypi.com>
Thu, 1 Nov 2018 17:31:37 +0000 (17:31 +0000)
committerDom Cobley <popcornmix@gmail.com>
Mon, 19 Feb 2024 11:33:11 +0000 (11:33 +0000)
Conditional on a new compatible string, change the pagelist encoding
such that the top 24 bits are the pfn, leaving 8 bits for run length
(-1), giving a 36-bit address range.

Manage the split between addresses for the VPU and addresses for the
40-bit DMA controller with a dedicated DMA device pointer that on non-
BCM2711 platforms is the same as the main VCHIQ device. This allows
the VCHIQ node to stay in the usual place in the DT.

Signed-off-by: Phil Elwell <phil@raspberrypi.com>
drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c

index d5dfd75..52ca088 100644 (file)
@@ -73,6 +73,7 @@ static struct platform_device *bcm2835_isp;
 
 struct vchiq_drvdata {
        const unsigned int cache_line_size;
+       const bool use_36bit_addrs;
        struct rpi_firmware *fw;
 };
 
@@ -118,6 +119,11 @@ struct vchiq_arm_state {
        int first_connect;
 };
 
+static struct vchiq_drvdata bcm2711_drvdata = {
+       .cache_line_size = 64,
+       .use_36bit_addrs = true,
+};
+
 struct vchiq_2835_state {
        int inited;
        struct vchiq_arm_state arm_state;
@@ -147,10 +153,12 @@ static void __iomem *g_regs;
  * of 32.
  */
 static unsigned int g_cache_line_size = 32;
+static unsigned int g_use_36bit_addrs = 0;
 static unsigned int g_fragments_size;
 static char *g_fragments_base;
 static char *g_free_fragments;
 static struct semaphore g_free_fragments_sema;
+static struct device *g_dma_dev;
 
 static DEFINE_SEMAPHORE(g_free_fragments_mutex, 1);
 
@@ -180,7 +188,7 @@ static void
 cleanup_pagelistinfo(struct vchiq_instance *instance, struct vchiq_pagelist_info *pagelistinfo)
 {
        if (pagelistinfo->scatterlist_mapped) {
-               dma_unmap_sg(instance->state->dev, pagelistinfo->scatterlist,
+               dma_unmap_sg(g_dma_dev, pagelistinfo->scatterlist,
                             pagelistinfo->num_pages, pagelistinfo->dma_dir);
        }
 
@@ -340,7 +348,7 @@ create_pagelist(struct vchiq_instance *instance, char *buf, char __user *ubuf,
                count -= len;
        }
 
-       dma_buffers = dma_map_sg(instance->state->dev,
+       dma_buffers = dma_map_sg(g_dma_dev,
                                 scatterlist,
                                 num_pages,
                                 pagelistinfo->dma_dir);
@@ -354,22 +362,61 @@ create_pagelist(struct vchiq_instance *instance, char *buf, char __user *ubuf,
 
        /* Combine adjacent blocks for performance */
        k = 0;
-       for_each_sg(scatterlist, sg, dma_buffers, i) {
-               u32 len = sg_dma_len(sg);
-               u32 addr = sg_dma_address(sg);
-
-               /* Note: addrs is the address + page_count - 1
-                * The firmware expects blocks after the first to be page-
-                * aligned and a multiple of the page size
-                */
-               WARN_ON(len == 0);
-               WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
-               WARN_ON(i && (addr & ~PAGE_MASK));
-               if (is_adjacent_block(addrs, addr, k))
-                       addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT);
-               else
-                       addrs[k++] = (addr & PAGE_MASK) |
-                               (((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1);
+       if (g_use_36bit_addrs) {
+               for_each_sg(scatterlist, sg, dma_buffers, i) {
+                       u32 len = sg_dma_len(sg);
+                       u64 addr = sg_dma_address(sg);
+                       u32 page_id = (u32)((addr >> 4) & ~0xff);
+                       u32 sg_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+                       /* Note: addrs is the address + page_count - 1
+                        * The firmware expects blocks after the first to be page-
+                        * aligned and a multiple of the page size
+                        */
+                       WARN_ON(len == 0);
+                       WARN_ON(i &&
+                               (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
+                       WARN_ON(i && (addr & ~PAGE_MASK));
+                       WARN_ON(upper_32_bits(addr) > 0xf);
+
+                       if (k > 0 &&
+                           ((addrs[k - 1] & ~0xff) +
+                            (((addrs[k - 1] & 0xff) + 1) << 8)
+                            == page_id)) {
+                               u32 inc_pages = min(sg_pages,
+                                                   0xff - (addrs[k - 1] & 0xff));
+                               addrs[k - 1] += inc_pages;
+                               page_id += inc_pages << 8;
+                               sg_pages -= inc_pages;
+                       }
+                       while (sg_pages) {
+                               u32 inc_pages = min(sg_pages, 0x100u);
+                               addrs[k++] = page_id | (inc_pages - 1);
+                               page_id += inc_pages << 8;
+                               sg_pages -= inc_pages;
+                       }
+               }
+       } else {
+               for_each_sg(scatterlist, sg, dma_buffers, i) {
+                       u32 len = sg_dma_len(sg);
+                       u32 addr = sg_dma_address(sg);
+                       u32 new_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+                       /* Note: addrs is the address + page_count - 1
+                        * The firmware expects blocks after the first to be page-
+                        * aligned and a multiple of the page size
+                        */
+                       WARN_ON(len == 0);
+                       WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
+                       WARN_ON(i && (addr & ~PAGE_MASK));
+                       if (k > 0 &&
+                           ((addrs[k - 1] & PAGE_MASK) +
+                            (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT))
+                           == (addr & PAGE_MASK))
+                               addrs[k - 1] += new_pages;
+                       else
+                               addrs[k++] = (addr & PAGE_MASK) | (new_pages - 1);
+               }
        }
 
        /* Partial cache lines (fragments) require special measures */
@@ -413,7 +460,7 @@ free_pagelist(struct vchiq_instance *instance, struct vchiq_pagelist_info *pagel
         * NOTE: dma_unmap_sg must be called before the
         * cpu can touch any of the data/pages.
         */
-       dma_unmap_sg(instance->state->dev, pagelistinfo->scatterlist,
+       dma_unmap_sg(g_dma_dev, pagelistinfo->scatterlist,
                     pagelistinfo->num_pages, pagelistinfo->dma_dir);
        pagelistinfo->scatterlist_mapped = 0;
 
@@ -468,6 +515,7 @@ free_pagelist(struct vchiq_instance *instance, struct vchiq_pagelist_info *pagel
 static int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state *state)
 {
        struct device *dev = &pdev->dev;
+       struct device *dma_dev = NULL;
        struct vchiq_drvdata *drvdata = platform_get_drvdata(pdev);
        struct rpi_firmware *fw = drvdata->fw;
        struct vchiq_slot_zero *vchiq_slot_zero;
@@ -489,6 +537,24 @@ static int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state
        g_cache_line_size = drvdata->cache_line_size;
        g_fragments_size = 2 * g_cache_line_size;
 
+       if (drvdata->use_36bit_addrs) {
+               struct device_node *dma_node =
+                       of_find_compatible_node(NULL, NULL, "brcm,bcm2711-dma");
+
+               if (dma_node) {
+                       struct platform_device *pdev;
+
+                       pdev = of_find_device_by_node(dma_node);
+                       if (pdev)
+                               dma_dev = &pdev->dev;
+                       of_node_put(dma_node);
+                       g_use_36bit_addrs = true;
+               } else {
+                       dev_err(dev, "40-bit DMA controller not found\n");
+                       return -EINVAL;
+               }
+       }
+
        /* Allocate space for the channels in coherent memory */
        slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
        frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
@@ -501,13 +567,14 @@ static int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state
        }
 
        WARN_ON(((unsigned long)slot_mem & (PAGE_SIZE - 1)) != 0);
+       channelbase = slot_phys;
 
        vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size);
        if (!vchiq_slot_zero)
                return -ENOMEM;
 
        vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] =
-               (int)slot_phys + slot_mem_size;
+               channelbase + slot_mem_size;
        vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
                MAX_FRAGMENTS;
 
@@ -541,7 +608,6 @@ static int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state
        }
 
        /* Send the base address of the slots to VideoCore */
-       channelbase = slot_phys;
        err = rpi_firmware_property(fw, RPI_FIRMWARE_VCHIQ_INIT,
                                    &channelbase, sizeof(channelbase));
        if (err) {
@@ -555,6 +621,8 @@ static int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state
                return -ENXIO;
        }
 
+       g_dma_dev = dma_dev ?: dev;
+
        vchiq_log_info(vchiq_arm_log_level, "vchiq_init - done (slots %pK, phys %pad)",
                       vchiq_slot_zero, &slot_phys);
 
@@ -1768,6 +1836,7 @@ void vchiq_platform_conn_state_changed(struct vchiq_state *state,
 static const struct of_device_id vchiq_of_match[] = {
        { .compatible = "brcm,bcm2835-vchiq", .data = &bcm2835_drvdata },
        { .compatible = "brcm,bcm2836-vchiq", .data = &bcm2836_drvdata },
+       { .compatible = "brcm,bcm2711-vchiq", .data = &bcm2711_drvdata },
        {},
 };
 MODULE_DEVICE_TABLE(of, vchiq_of_match);
@@ -1800,22 +1869,8 @@ vchiq_register_child(struct platform_device *pdev, const char *name)
 
        child->dev.of_node = np;
 
-       /*
-        * We want the dma-ranges etc to be copied from a device with the
-        * correct dma-ranges for the VPU.
-        * VCHIQ on Pi4 is now under scb which doesn't get those dma-ranges.
-        * Take the "dma" node as going to be suitable as it sees the world
-        * through the same eyes as the VPU.
-        */
-       np = of_find_node_by_path("dma");
-       if (!np)
-               np = pdev->dev.of_node;
-
        of_dma_configure(&child->dev, np, true);
 
-       if (np != pdev->dev.of_node)
-               of_node_put(np);
-
        return child;
 }