1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU driver for BCM2712
5 * Copyright (c) 2023 Raspberry Pi Ltd.
8 #include "bcm2712-iommu.h"
10 #include <linux/dma-mapping.h>
11 #include <linux/err.h>
12 #include <linux/iommu.h>
13 #include <linux/of_platform.h>
14 #include <linux/platform_device.h>
15 #include <linux/spinlock.h>
17 #define MMU_WR(off, val) writel(val, mmu->reg_base + (off))
18 #define MMU_RD(off) readl(mmu->reg_base + (off))
20 #define domain_to_mmu(d) (container_of(d, struct bcm2712_iommu_domain, base)->mmu)
22 #define MMMU_CTRL_OFFSET 0x00
23 #define MMMU_CTRL_CAP_EXCEEDED BIT(27)
24 #define MMMU_CTRL_CAP_EXCEEDED_ABORT_EN BIT(26)
25 #define MMMU_CTRL_CAP_EXCEEDED_INT_EN BIT(25)
26 #define MMMU_CTRL_CAP_EXCEEDED_EXCEPTION_EN BIT(24)
27 #define MMMU_CTRL_PT_INVALID BIT(20)
28 #define MMMU_CTRL_PT_INVALID_ABORT_EN BIT(19)
29 #define MMMU_CTRL_PT_INVALID_EXCEPTION_EN BIT(18)
30 #define MMMU_CTRL_PT_INVALID_EN BIT(17)
31 #define MMMU_CTRL_WRITE_VIOLATION BIT(12)
32 #define MMMU_CTRL_WRITE_VIOLATION_ABORT_EN BIT(11)
33 #define MMMU_CTRL_WRITE_VIOLATION_INT_EN BIT(10)
34 #define MMMU_CTRL_WRITE_VIOLATION_EXCEPTION_EN BIT(9)
35 #define MMMU_CTRL_BYPASS BIT(8)
36 #define MMMU_CTRL_TLB_CLEARING BIT(7)
37 #define MMMU_CTRL_STATS_CLEAR BIT(3)
38 #define MMMU_CTRL_TLB_CLEAR BIT(2)
39 #define MMMU_CTRL_STATS_ENABLE BIT(1)
40 #define MMMU_CTRL_ENABLE BIT(0)
42 #define MMMU_PT_PA_BASE_OFFSET 0x04
44 #define MMMU_HIT_OFFSET 0x08
45 #define MMMU_MISS_OFFSET 0x0C
46 #define MMMU_STALL_OFFSET 0x10
48 #define MMMU_ADDR_CAP_OFFSET 0x14
49 #define MMMU_ADDR_CAP_ENABLE BIT(31)
50 #define ADDR_CAP_SHIFT 28 /* ADDR_CAP is defined to be in 256 MByte units */
52 #define MMMU_SHOOT_DOWN_OFFSET 0x18
53 #define MMMU_SHOOT_DOWN_SHOOTING BIT(31)
54 #define MMMU_SHOOT_DOWN_SHOOT BIT(30)
56 #define MMMU_BYPASS_START_OFFSET 0x1C
57 #define MMMU_BYPASS_START_ENABLE BIT(31)
58 #define MMMU_BYPASS_START_INVERT BIT(30)
60 #define MMMU_BYPASS_END_OFFSET 0x20
61 #define MMMU_BYPASS_END_ENABLE BIT(31)
63 #define MMMU_MISC_OFFSET 0x24
64 #define MMMU_MISC_SINGLE_TABLE BIT(31)
66 #define MMMU_ILLEGAL_ADR_OFFSET 0x30
67 #define MMMU_ILLEGAL_ADR_ENABLE BIT(31)
69 #define MMMU_DEBUG_INFO_OFFSET 0x38
70 #define MMMU_DEBUG_INFO_VERSION_MASK 0x0000000Fu
71 #define MMMU_DEBUG_INFO_VA_WIDTH_MASK 0x000000F0u
72 #define MMMU_DEBUG_INFO_PA_WIDTH_MASK 0x00000F00u
73 #define MMMU_DEBUG_INFO_BIGPAGE_WIDTH_MASK 0x000FF000u
74 #define MMMU_DEBUG_INFO_SUPERPAGE_WIDTH_MASK 0x0FF00000u
75 #define MMMU_DEBUG_INFO_BYPASS_4M BIT(28)
76 #define MMMU_DEBUG_INFO_BYPASS BIT(29)
78 #define MMMU_PTE_PAGESIZE_MASK 0xC0000000u
79 #define MMMU_PTE_WRITEABLE BIT(29)
80 #define MMMU_PTE_VALID BIT(28)
83 * BCM2712 IOMMU is organized around 4Kbyte pages (MMU_PAGE_SIZE).
84 * Linux PAGE_SIZE must not be smaller but may be larger (e.g. 4K, 16K).
86 * Unlike many larger MMUs, this one uses a 4-byte word size, allowing
87 * 1024 entries within each 4K table page, and two-level translation.
89 * Let's allocate enough table space for 2GB of translated memory (IOVA).
90 * This requires 512 4K pages (2MB) of level-2 tables, one page of
91 * top-level table (only half-filled in this particular configuration),
92 * plus one "default" page to catch illegal requests.
94 * The translated virtual address region is between 40GB and 42GB;
95 * addresses below this range pass straight through to the SDRAM.
97 * Currently we assume a 1:1:1 correspondence of IOMMU, group and domain.
100 #define MMU_PAGE_SHIFT 12
101 #define MMU_PAGE_SIZE BIT(MMU_PAGE_SHIFT)
103 #define PAGEWORDS_SHIFT (MMU_PAGE_SHIFT - 2)
104 #define HUGEPAGE_SHIFT (MMU_PAGE_SHIFT + PAGEWORDS_SHIFT)
105 #define L1_CHUNK_SHIFT (MMU_PAGE_SHIFT + 2 * PAGEWORDS_SHIFT)
107 #define APERTURE_BASE (40ul << 30)
108 #define APERTURE_SIZE (2ul << 30)
109 #define APERTURE_TOP (APERTURE_BASE + APERTURE_SIZE)
110 #define TRANSLATED_PAGES (APERTURE_SIZE >> MMU_PAGE_SHIFT)
111 #define L2_PAGES (TRANSLATED_PAGES >> PAGEWORDS_SHIFT)
112 #define TABLES_ALLOC_SIZE (L2_PAGES * MMU_PAGE_SIZE + 2 * PAGE_SIZE)
114 static void bcm2712_iommu_init(struct bcm2712_iommu *mmu)
116 unsigned int i, bypass_shift;
117 struct sg_dma_page_iter it;
118 u32 u = MMU_RD(MMMU_DEBUG_INFO_OFFSET);
121 * Check IOMMU version and hardware configuration.
122 * This driver is for VC IOMMU version >= 4 (with 2-level tables)
123 * and assumes at least 36 bits of virtual and physical address space.
124 * Bigpage and superpage sizes are typically 64K and 1M, but may vary
125 * (hugepage size is fixed at 4M, the range covered by an L2 page).
127 dev_info(mmu->dev, "%s: DEBUG_INFO = 0x%08x\n", __func__, u);
128 WARN_ON(FIELD_GET(MMMU_DEBUG_INFO_VERSION_MASK, u) < 4 ||
129 FIELD_GET(MMMU_DEBUG_INFO_VA_WIDTH_MASK, u) < 6 ||
130 FIELD_GET(MMMU_DEBUG_INFO_PA_WIDTH_MASK, u) < 6 ||
131 !(u & MMMU_DEBUG_INFO_BYPASS));
134 ((1u << FIELD_GET(MMMU_DEBUG_INFO_BIGPAGE_WIDTH_MASK, u)) - 1u) << MMU_PAGE_SHIFT;
135 mmu->superpage_mask =
136 ((1u << FIELD_GET(MMMU_DEBUG_INFO_SUPERPAGE_WIDTH_MASK, u)) - 1u) << MMU_PAGE_SHIFT;
137 bypass_shift = (u & MMMU_DEBUG_INFO_BYPASS_4M) ?
138 HUGEPAGE_SHIFT : ADDR_CAP_SHIFT;
140 /* Disable MMU and clear sticky flags; meanwhile flush the TLB */
141 MMU_WR(MMMU_CTRL_OFFSET,
142 MMMU_CTRL_CAP_EXCEEDED |
143 MMMU_CTRL_PT_INVALID |
144 MMMU_CTRL_WRITE_VIOLATION |
145 MMMU_CTRL_STATS_CLEAR |
146 MMMU_CTRL_TLB_CLEAR);
149 * Put MMU into 2-level mode; set address cap and "bypass" range
150 * (note that some of these registers have unintuitive off-by-ones).
151 * Addresses below APERTURE_BASE are passed unchanged: this is
152 * useful for blocks which share an IOMMU with other blocks
153 * whose drivers are not IOMMU-aware.
155 MMU_WR(MMMU_MISC_OFFSET,
156 MMU_RD(MMMU_MISC_OFFSET) & ~MMMU_MISC_SINGLE_TABLE);
157 MMU_WR(MMMU_ADDR_CAP_OFFSET,
158 MMMU_ADDR_CAP_ENABLE +
159 (APERTURE_TOP >> ADDR_CAP_SHIFT) - 1);
160 if (APERTURE_BASE > 0) {
161 MMU_WR(MMMU_BYPASS_START_OFFSET,
162 MMMU_BYPASS_START_ENABLE + MMMU_BYPASS_START_INVERT +
163 (APERTURE_BASE >> bypass_shift) - 1);
164 MMU_WR(MMMU_BYPASS_END_OFFSET,
165 MMMU_BYPASS_END_ENABLE +
166 (APERTURE_TOP >> bypass_shift));
168 MMU_WR(MMMU_BYPASS_START_OFFSET, 0);
169 MMU_WR(MMMU_BYPASS_END_OFFSET, 0);
172 /* Ensure tables are zeroed (which marks all pages as invalid) */
173 dma_sync_sgtable_for_cpu(mmu->dev, mmu->sgt, DMA_TO_DEVICE);
174 memset(mmu->tables, 0, TABLES_ALLOC_SIZE);
175 mmu->nmapped_pages = 0;
177 /* Initialize the high-level table to point to the low-level pages */
178 __sg_page_iter_start(&it.base, mmu->sgt->sgl, mmu->sgt->nents, 0);
179 for (i = 0; i < L2_PAGES; i++) {
180 if (!(i % (PAGE_SIZE / MMU_PAGE_SIZE))) {
181 __sg_page_iter_dma_next(&it);
182 u = (sg_page_iter_dma_address(&it) >> MMU_PAGE_SHIFT);
186 mmu->tables[TRANSLATED_PAGES + i] = MMMU_PTE_VALID + u;
190 * Configure the addresses of the top-level table (offset because
191 * the aperture does not start from zero), and of the default page.
192 * For simplicity, both these regions are whole Linux pages.
194 __sg_page_iter_dma_next(&it);
195 u = (sg_page_iter_dma_address(&it) >> MMU_PAGE_SHIFT);
196 MMU_WR(MMMU_PT_PA_BASE_OFFSET, u - (APERTURE_BASE >> L1_CHUNK_SHIFT));
197 __sg_page_iter_dma_next(&it);
198 u = (sg_page_iter_dma_address(&it) >> MMU_PAGE_SHIFT);
199 MMU_WR(MMMU_ILLEGAL_ADR_OFFSET, MMMU_ILLEGAL_ADR_ENABLE + u);
200 dma_sync_sgtable_for_device(mmu->dev, mmu->sgt, DMA_TO_DEVICE);
203 /* Flush (and enable) the shared TLB cache; enable this MMU. */
205 bcm2712_iommu_cache_flush(mmu->cache);
206 MMU_WR(MMMU_CTRL_OFFSET,
207 MMMU_CTRL_CAP_EXCEEDED_ABORT_EN |
208 MMMU_CTRL_PT_INVALID_ABORT_EN |
209 MMMU_CTRL_WRITE_VIOLATION_ABORT_EN |
210 MMMU_CTRL_STATS_ENABLE |
214 static int bcm2712_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
216 struct bcm2712_iommu *mmu = dev ? dev_iommu_priv_get(dev) : 0;
217 struct bcm2712_iommu_domain *mydomain =
218 container_of(domain, struct bcm2712_iommu_domain, base);
220 dev_info(dev, "%s: MMU %s\n",
221 __func__, mmu ? dev_name(mmu->dev) : "");
225 mmu->domain = mydomain;
227 if (mmu->dma_iova_offset) {
228 domain->geometry.aperture_start =
229 mmu->dma_iova_offset + APERTURE_BASE;
230 domain->geometry.aperture_end =
231 mmu->dma_iova_offset + APERTURE_TOP - 1ul;
239 static int bcm2712_iommu_map(struct iommu_domain *domain, unsigned long iova,
240 phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
242 struct bcm2712_iommu *mmu = domain_to_mmu(domain);
245 iova -= mmu->dma_iova_offset;
246 if (iova >= APERTURE_BASE && iova + bytes <= APERTURE_TOP) {
248 u32 entry = MMMU_PTE_VALID | (pa >> MMU_PAGE_SHIFT);
249 u32 align = (u32)(iova | pa | bytes);
251 /* large page and write enable flags */
252 if (!(align & ((1 << HUGEPAGE_SHIFT) - 1)))
253 entry |= FIELD_PREP(MMMU_PTE_PAGESIZE_MASK, 3);
254 else if (!(align & mmu->superpage_mask) && mmu->superpage_mask)
255 entry |= FIELD_PREP(MMMU_PTE_PAGESIZE_MASK, 2);
256 else if (!(align & mmu->bigpage_mask) && mmu->bigpage_mask)
257 entry |= FIELD_PREP(MMMU_PTE_PAGESIZE_MASK, 1);
258 if (prot & IOMMU_WRITE)
259 entry |= MMMU_PTE_WRITEABLE;
261 /* Ensure tables are cache-coherent with CPU */
263 dma_sync_sgtable_for_cpu(mmu->dev, mmu->sgt, DMA_TO_DEVICE);
267 iova -= APERTURE_BASE;
268 for (p = iova >> MMU_PAGE_SHIFT;
269 p < (iova + bytes) >> MMU_PAGE_SHIFT; p++) {
270 mmu->nmapped_pages += !(mmu->tables[p]);
271 mmu->tables[p] = entry++;
273 } else if (iova + bytes > APERTURE_BASE || iova != pa) {
274 dev_warn(mmu->dev, "%s: iova=0x%lx pa=0x%llx size=0x%llx OUT OF RANGE!\n",
276 (unsigned long long)pa, (unsigned long long)bytes);
283 static size_t bcm2712_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
284 size_t bytes, struct iommu_iotlb_gather *gather)
286 struct bcm2712_iommu *mmu = domain_to_mmu(domain);
288 if (iova >= mmu->dma_iova_offset + APERTURE_BASE &&
289 iova + bytes <= mmu->dma_iova_offset + APERTURE_TOP) {
292 /* Record just the lower and upper bounds in "gather" */
294 bool empty = (gather->end <= gather->start);
296 if (empty || gather->start < iova)
297 gather->start = iova;
298 if (empty || gather->end < iova + bytes)
299 gather->end = iova + bytes;
302 /* Ensure tables are cache-coherent with CPU */
304 dma_sync_sgtable_for_cpu(mmu->dev, mmu->sgt, DMA_TO_DEVICE);
308 /* Clear table entries, this marks the addresses as illegal */
309 iova -= (mmu->dma_iova_offset + APERTURE_BASE);
310 for (p = iova >> MMU_PAGE_SHIFT;
311 p < (iova + bytes) >> MMU_PAGE_SHIFT;
313 mmu->nmapped_pages -= !!(mmu->tables[p]);
321 static void bcm2712_iommu_sync_range(struct iommu_domain *domain,
322 unsigned long iova, size_t size)
324 struct bcm2712_iommu *mmu = domain_to_mmu(domain);
325 unsigned long iova_end;
328 if (!mmu || !mmu->dirty)
331 /* Ensure tables are cleaned from CPU cache or write-buffer */
332 dma_sync_sgtable_for_device(mmu->dev, mmu->sgt, DMA_TO_DEVICE);
335 /* Flush the shared TLB cache */
337 bcm2712_iommu_cache_flush(mmu->cache);
340 * When flushing a large range or when nothing needs to be kept,
341 * it's quicker to use the"TLB_CLEAR" flag. Otherwise, invalidate
342 * TLB entries in lines of 4 words each. Each flush/clear operation
343 * should complete almost instantaneously.
345 iova -= mmu->dma_iova_offset;
346 iova_end = min(APERTURE_TOP, iova + size);
347 iova = max(APERTURE_BASE, iova);
348 if (mmu->nmapped_pages == 0 || iova_end - iova >= APERTURE_SIZE / 8) {
349 MMU_WR(MMMU_CTRL_OFFSET,
350 MMMU_CTRL_CAP_EXCEEDED_ABORT_EN |
351 MMMU_CTRL_PT_INVALID_ABORT_EN |
352 MMMU_CTRL_WRITE_VIOLATION_ABORT_EN |
353 MMMU_CTRL_TLB_CLEAR |
354 MMMU_CTRL_STATS_ENABLE |
356 for (i = 0; i < 1024; i++) {
357 if (!(MMMU_CTRL_TLB_CLEARING & MMU_RD(MMMU_CTRL_OFFSET)))
362 for (p4 = iova >> (MMU_PAGE_SHIFT + 2);
363 p4 < (iova_end + 3 * MMU_PAGE_SIZE) >> (MMU_PAGE_SHIFT + 2);
365 MMU_WR(MMMU_SHOOT_DOWN_OFFSET,
366 MMMU_SHOOT_DOWN_SHOOT + (p4 << 2));
367 for (i = 0; i < 1024; i++) {
368 if (!(MMMU_SHOOT_DOWN_SHOOTING & MMU_RD(MMMU_SHOOT_DOWN_OFFSET)))
376 static void bcm2712_iommu_sync(struct iommu_domain *domain,
377 struct iommu_iotlb_gather *gather)
379 bcm2712_iommu_sync_range(domain, gather->start,
380 gather->end - gather->start);
383 static void bcm2712_iommu_sync_all(struct iommu_domain *domain)
385 bcm2712_iommu_sync_range(domain, APERTURE_BASE, APERTURE_SIZE);
388 static phys_addr_t bcm2712_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
390 struct bcm2712_iommu *mmu = domain_to_mmu(domain);
393 iova -= mmu->dma_iova_offset;
394 if (iova >= APERTURE_BASE && iova < APERTURE_TOP) {
395 p = (iova - APERTURE_BASE) >> MMU_PAGE_SHIFT;
396 p = mmu->tables[p] & 0x0FFFFFFFu;
397 return (((phys_addr_t)p) << MMU_PAGE_SHIFT) + (iova & (MMU_PAGE_SIZE - 1u));
398 } else if (iova < APERTURE_BASE) {
399 return (phys_addr_t)iova;
401 return (phys_addr_t)-EINVAL;
405 static void bcm2712_iommu_domain_free(struct iommu_domain *domain)
407 struct bcm2712_iommu_domain *mydomain =
408 container_of(domain, struct bcm2712_iommu_domain, base);
413 static const struct iommu_domain_ops bcm2712_iommu_domain_ops = {
414 .attach_dev = bcm2712_iommu_attach_dev,
415 .map = bcm2712_iommu_map,
416 .unmap = bcm2712_iommu_unmap,
417 .iotlb_sync = bcm2712_iommu_sync,
418 .iotlb_sync_map = bcm2712_iommu_sync_range,
419 .flush_iotlb_all = bcm2712_iommu_sync_all,
420 .iova_to_phys = bcm2712_iommu_iova_to_phys,
421 .free = bcm2712_iommu_domain_free,
424 static struct iommu_domain *bcm2712_iommu_domain_alloc(unsigned int type)
426 struct bcm2712_iommu_domain *domain;
428 if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
431 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
435 domain->base.type = type;
436 domain->base.ops = &bcm2712_iommu_domain_ops;
437 domain->base.geometry.aperture_start = APERTURE_BASE;
438 domain->base.geometry.aperture_end = APERTURE_TOP - 1ul;
439 domain->base.geometry.force_aperture = true;
440 return &domain->base;
443 static struct iommu_device *bcm2712_iommu_probe_device(struct device *dev)
445 struct bcm2712_iommu *mmu;
448 * For reasons I don't fully understand, we need to try both
449 * cases (dev_iommu_priv_get() and platform_get_drvdata())
450 * in order to get both GPU and ISP-BE to probe successfully.
452 mmu = dev_iommu_priv_get(dev);
454 struct device_node *np;
455 struct platform_device *pdev;
457 /* Ignore devices that don't have an "iommus" property with exactly one phandle */
459 of_property_count_elems_of_size(dev->of_node, "iommus", sizeof(phandle)) != 1)
460 return ERR_PTR(-ENODEV);
462 np = of_parse_phandle(dev->of_node, "iommus", 0);
464 return ERR_PTR(-EINVAL);
466 pdev = of_find_device_by_node(np);
469 mmu = platform_get_drvdata(pdev);
472 return ERR_PTR(-ENODEV);
475 dev_info(dev, "%s: MMU %s\n", __func__, dev_name(mmu->dev));
476 dev_iommu_priv_set(dev, mmu);
480 static void bcm2712_iommu_release_device(struct device *dev)
482 dev_iommu_priv_set(dev, NULL);
485 static struct iommu_group *bcm2712_iommu_device_group(struct device *dev)
487 struct bcm2712_iommu *mmu = dev_iommu_priv_get(dev);
489 if (!mmu || !mmu->group)
490 return ERR_PTR(-EINVAL);
492 dev_info(dev, "%s: MMU %s\n", __func__, dev_name(mmu->dev));
493 return iommu_group_ref_get(mmu->group);
496 static int bcm2712_iommu_of_xlate(struct device *dev,
497 struct of_phandle_args *args)
499 struct platform_device *iommu_dev;
500 struct bcm2712_iommu *mmu;
502 iommu_dev = of_find_device_by_node(args->np);
503 mmu = platform_get_drvdata(iommu_dev);
504 dev_iommu_priv_set(dev, mmu);
505 dev_info(dev, "%s: MMU %s\n", __func__, dev_name(mmu->dev));
510 static bool bcm2712_iommu_capable(struct device *dev, enum iommu_cap cap)
515 static const struct iommu_ops bcm2712_iommu_ops = {
516 .capable = bcm2712_iommu_capable,
517 .domain_alloc = bcm2712_iommu_domain_alloc,
518 .probe_device = bcm2712_iommu_probe_device,
519 .release_device = bcm2712_iommu_release_device,
520 .device_group = bcm2712_iommu_device_group,
521 /* Advertise native page sizes as well as 2M, 16K which Linux may prefer */
522 .pgsize_bitmap = (SZ_4M | SZ_2M | SZ_1M | SZ_64K | SZ_16K | SZ_4K),
523 .default_domain_ops = &bcm2712_iommu_domain_ops,
524 .of_xlate = bcm2712_iommu_of_xlate,
527 static int bcm2712_iommu_probe(struct platform_device *pdev)
529 struct bcm2712_iommu *mmu;
530 struct bcm2712_iommu_cache *cache = NULL;
533 /* First of all, check for an IOMMU shared cache */
534 if (pdev->dev.of_node) {
535 struct device_node *cache_np;
536 struct platform_device *cache_pdev;
538 cache_np = of_parse_phandle(pdev->dev.of_node, "cache", 0);
540 cache_pdev = of_find_device_by_node(cache_np);
541 of_node_put(cache_np);
542 if (cache_pdev && !IS_ERR(cache_pdev))
543 cache = platform_get_drvdata(cache_pdev);
545 return -EPROBE_DEFER;
549 /* Allocate private data */
550 mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL);
554 mmu->name = dev_name(&pdev->dev);
555 mmu->dev = &pdev->dev;
557 platform_set_drvdata(pdev, mmu);
558 spin_lock_init(&mmu->hw_lock);
561 * XXX When an IOMMU is downstream of a PCIe RC or some other chip/bus
562 * and serves some of the masters thereon (others using pass-through),
563 * we seem to fumble and lose the "dma-ranges" address offset for
564 * masters using IOMMU. This property restores it, where needed.
566 if (!pdev->dev.of_node ||
567 of_property_read_u64(pdev->dev.of_node, "dma-iova-offset",
568 &mmu->dma_iova_offset))
569 mmu->dma_iova_offset = 0;
572 * The IOMMU is itself a device that allocates DMA-able memory
573 * to hold its translation tables. Provided the IOVA aperture
574 * is no larger than 4 GBytes (so that the L1 table fits within
575 * a single 4K page), we don't need the tables to be contiguous.
576 * Assume we can address at least 36 bits (64 GB).
578 ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36));
580 mmu->sgt = dma_alloc_noncontiguous(&pdev->dev, TABLES_ALLOC_SIZE,
581 DMA_TO_DEVICE, GFP_KERNEL,
582 DMA_ATTR_ALLOC_SINGLE_PAGES);
587 mmu->tables = dma_vmap_noncontiguous(&pdev->dev, TABLES_ALLOC_SIZE,
594 /* Get IOMMU registers */
595 mmu->reg_base = devm_platform_ioremap_resource(pdev, 0);
596 if (IS_ERR(mmu->reg_base)) {
597 dev_err(&pdev->dev, "Failed to get IOMMU registers address\n");
598 ret = PTR_ERR(mmu->reg_base);
603 mmu->group = iommu_group_alloc();
604 if (IS_ERR(mmu->group)) {
605 ret = PTR_ERR(mmu->group);
609 ret = iommu_device_sysfs_add(&mmu->iommu, mmu->dev, NULL, mmu->name);
613 /* Initialize table and hardware */
614 bcm2712_iommu_init(mmu);
615 ret = iommu_device_register(&mmu->iommu, &bcm2712_iommu_ops, &pdev->dev);
617 dev_info(&pdev->dev, "%s: Success\n", __func__);
621 dev_info(&pdev->dev, "%s: Failure %d\n", __func__, ret);
623 iommu_group_put(mmu->group);
625 dma_vunmap_noncontiguous(&pdev->dev,
626 (void *)(mmu->tables));
629 dma_free_noncontiguous(&pdev->dev, TABLES_ALLOC_SIZE,
630 mmu->sgt, DMA_TO_DEVICE);
636 static int bcm2712_iommu_remove(struct platform_device *pdev)
638 struct bcm2712_iommu *mmu = platform_get_drvdata(pdev);
641 MMU_WR(MMMU_CTRL_OFFSET, 0); /* disable the MMU */
643 dma_free_noncontiguous(&pdev->dev, TABLES_ALLOC_SIZE,
644 mmu->sgt, DMA_TO_DEVICE);
649 static const struct of_device_id bcm2712_iommu_of_match[] = {
651 . compatible = "brcm,bcm2712-iommu"
656 static struct platform_driver bcm2712_iommu_driver = {
657 .probe = bcm2712_iommu_probe,
658 .remove = bcm2712_iommu_remove,
660 .name = "bcm2712-iommu",
661 .of_match_table = bcm2712_iommu_of_match
665 builtin_platform_driver(bcm2712_iommu_driver);