iommu/io-pgtable-arm: Convert to IOMMU API TLB sync
[platform/kernel/linux-rpi.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
58
59 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
60
61 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
62 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
63
64 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
65 #define TLB_SPIN_COUNT                  10
66
67 /* Maximum number of context banks per SMMU */
68 #define ARM_SMMU_MAX_CBS                128
69
70 /* SMMU global address space */
71 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
72 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
73
74 /*
75  * SMMU global address space with conditional offset to access secure
76  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
77  * nsGFSYNR0: 0x450)
78  */
79 #define ARM_SMMU_GR0_NS(smmu)                                           \
80         ((smmu)->base +                                                 \
81                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
82                         ? 0x400 : 0))
83
84 /*
85  * Some 64-bit registers only make sense to write atomically, but in such
86  * cases all the data relevant to AArch32 formats lies within the lower word,
87  * therefore this actually makes more sense than it might first appear.
88  */
89 #ifdef CONFIG_64BIT
90 #define smmu_write_atomic_lq            writeq_relaxed
91 #else
92 #define smmu_write_atomic_lq            writel_relaxed
93 #endif
94
95 /* Translation context bank */
96 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
97
98 #define MSI_IOVA_BASE                   0x8000000
99 #define MSI_IOVA_LENGTH                 0x100000
100
101 static int force_stage;
102 module_param(force_stage, int, S_IRUGO);
103 MODULE_PARM_DESC(force_stage,
104         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
105 static bool disable_bypass;
106 module_param(disable_bypass, bool, S_IRUGO);
107 MODULE_PARM_DESC(disable_bypass,
108         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
109
110 enum arm_smmu_arch_version {
111         ARM_SMMU_V1,
112         ARM_SMMU_V1_64K,
113         ARM_SMMU_V2,
114 };
115
116 enum arm_smmu_implementation {
117         GENERIC_SMMU,
118         ARM_MMU500,
119         CAVIUM_SMMUV2,
120 };
121
122 /* Until ACPICA headers cover IORT rev. C */
123 #ifndef ACPI_IORT_SMMU_CORELINK_MMU401
124 #define ACPI_IORT_SMMU_CORELINK_MMU401  0x4
125 #endif
126 #ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
127 #define ACPI_IORT_SMMU_CAVIUM_THUNDERX  0x5
128 #endif
129
130 struct arm_smmu_s2cr {
131         struct iommu_group              *group;
132         int                             count;
133         enum arm_smmu_s2cr_type         type;
134         enum arm_smmu_s2cr_privcfg      privcfg;
135         u8                              cbndx;
136 };
137
138 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
139         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
140 }
141
142 struct arm_smmu_smr {
143         u16                             mask;
144         u16                             id;
145         bool                            valid;
146 };
147
148 struct arm_smmu_cb {
149         u64                             ttbr[2];
150         u32                             tcr[2];
151         u32                             mair[2];
152         struct arm_smmu_cfg             *cfg;
153 };
154
155 struct arm_smmu_master_cfg {
156         struct arm_smmu_device          *smmu;
157         s16                             smendx[];
158 };
159 #define INVALID_SMENDX                  -1
160 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
161 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
162 #define fwspec_smendx(fw, i) \
163         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
164 #define for_each_cfg_sme(fw, i, idx) \
165         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
166
167 struct arm_smmu_device {
168         struct device                   *dev;
169
170         void __iomem                    *base;
171         void __iomem                    *cb_base;
172         unsigned long                   pgshift;
173
174 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
175 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
176 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
177 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
178 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
179 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
180 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
181 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
182 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
183 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
184 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
185 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
186 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
187         u32                             features;
188
189 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
190         u32                             options;
191         enum arm_smmu_arch_version      version;
192         enum arm_smmu_implementation    model;
193
194         u32                             num_context_banks;
195         u32                             num_s2_context_banks;
196         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
197         struct arm_smmu_cb              *cbs;
198         atomic_t                        irptndx;
199
200         u32                             num_mapping_groups;
201         u16                             streamid_mask;
202         u16                             smr_mask_mask;
203         struct arm_smmu_smr             *smrs;
204         struct arm_smmu_s2cr            *s2crs;
205         struct mutex                    stream_map_mutex;
206
207         unsigned long                   va_size;
208         unsigned long                   ipa_size;
209         unsigned long                   pa_size;
210         unsigned long                   pgsize_bitmap;
211
212         u32                             num_global_irqs;
213         u32                             num_context_irqs;
214         unsigned int                    *irqs;
215
216         u32                             cavium_id_base; /* Specific to Cavium */
217
218         spinlock_t                      global_sync_lock;
219
220         /* IOMMU core code handle */
221         struct iommu_device             iommu;
222 };
223
224 enum arm_smmu_context_fmt {
225         ARM_SMMU_CTX_FMT_NONE,
226         ARM_SMMU_CTX_FMT_AARCH64,
227         ARM_SMMU_CTX_FMT_AARCH32_L,
228         ARM_SMMU_CTX_FMT_AARCH32_S,
229 };
230
231 struct arm_smmu_cfg {
232         u8                              cbndx;
233         u8                              irptndx;
234         union {
235                 u16                     asid;
236                 u16                     vmid;
237         };
238         u32                             cbar;
239         enum arm_smmu_context_fmt       fmt;
240 };
241 #define INVALID_IRPTNDX                 0xff
242
243 enum arm_smmu_domain_stage {
244         ARM_SMMU_DOMAIN_S1 = 0,
245         ARM_SMMU_DOMAIN_S2,
246         ARM_SMMU_DOMAIN_NESTED,
247         ARM_SMMU_DOMAIN_BYPASS,
248 };
249
250 struct arm_smmu_domain {
251         struct arm_smmu_device          *smmu;
252         struct io_pgtable_ops           *pgtbl_ops;
253         const struct iommu_gather_ops   *tlb_ops;
254         struct arm_smmu_cfg             cfg;
255         enum arm_smmu_domain_stage      stage;
256         struct mutex                    init_mutex; /* Protects smmu pointer */
257         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
258         struct iommu_domain             domain;
259 };
260
261 struct arm_smmu_option_prop {
262         u32 opt;
263         const char *prop;
264 };
265
266 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
267
268 static bool using_legacy_binding, using_generic_binding;
269
270 static struct arm_smmu_option_prop arm_smmu_options[] = {
271         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
272         { 0, NULL},
273 };
274
275 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
276 {
277         return container_of(dom, struct arm_smmu_domain, domain);
278 }
279
280 static void parse_driver_options(struct arm_smmu_device *smmu)
281 {
282         int i = 0;
283
284         do {
285                 if (of_property_read_bool(smmu->dev->of_node,
286                                                 arm_smmu_options[i].prop)) {
287                         smmu->options |= arm_smmu_options[i].opt;
288                         dev_notice(smmu->dev, "option %s\n",
289                                 arm_smmu_options[i].prop);
290                 }
291         } while (arm_smmu_options[++i].opt);
292 }
293
294 static struct device_node *dev_get_dev_node(struct device *dev)
295 {
296         if (dev_is_pci(dev)) {
297                 struct pci_bus *bus = to_pci_dev(dev)->bus;
298
299                 while (!pci_is_root_bus(bus))
300                         bus = bus->parent;
301                 return of_node_get(bus->bridge->parent->of_node);
302         }
303
304         return of_node_get(dev->of_node);
305 }
306
307 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
308 {
309         *((__be32 *)data) = cpu_to_be32(alias);
310         return 0; /* Continue walking */
311 }
312
313 static int __find_legacy_master_phandle(struct device *dev, void *data)
314 {
315         struct of_phandle_iterator *it = *(void **)data;
316         struct device_node *np = it->node;
317         int err;
318
319         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
320                             "#stream-id-cells", 0)
321                 if (it->node == np) {
322                         *(void **)data = dev;
323                         return 1;
324                 }
325         it->node = np;
326         return err == -ENOENT ? 0 : err;
327 }
328
329 static struct platform_driver arm_smmu_driver;
330 static struct iommu_ops arm_smmu_ops;
331
332 static int arm_smmu_register_legacy_master(struct device *dev,
333                                            struct arm_smmu_device **smmu)
334 {
335         struct device *smmu_dev;
336         struct device_node *np;
337         struct of_phandle_iterator it;
338         void *data = &it;
339         u32 *sids;
340         __be32 pci_sid;
341         int err;
342
343         np = dev_get_dev_node(dev);
344         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
345                 of_node_put(np);
346                 return -ENODEV;
347         }
348
349         it.node = np;
350         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
351                                      __find_legacy_master_phandle);
352         smmu_dev = data;
353         of_node_put(np);
354         if (err == 0)
355                 return -ENODEV;
356         if (err < 0)
357                 return err;
358
359         if (dev_is_pci(dev)) {
360                 /* "mmu-masters" assumes Stream ID == Requester ID */
361                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
362                                        &pci_sid);
363                 it.cur = &pci_sid;
364                 it.cur_count = 1;
365         }
366
367         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
368                                 &arm_smmu_ops);
369         if (err)
370                 return err;
371
372         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
373         if (!sids)
374                 return -ENOMEM;
375
376         *smmu = dev_get_drvdata(smmu_dev);
377         of_phandle_iterator_args(&it, sids, it.cur_count);
378         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
379         kfree(sids);
380         return err;
381 }
382
383 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
384 {
385         int idx;
386
387         do {
388                 idx = find_next_zero_bit(map, end, start);
389                 if (idx == end)
390                         return -ENOSPC;
391         } while (test_and_set_bit(idx, map));
392
393         return idx;
394 }
395
396 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
397 {
398         clear_bit(idx, map);
399 }
400
401 /* Wait for any pending TLB invalidations to complete */
402 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
403                                 void __iomem *sync, void __iomem *status)
404 {
405         unsigned int spin_cnt, delay;
406
407         writel_relaxed(0, sync);
408         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
409                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
410                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
411                                 return;
412                         cpu_relax();
413                 }
414                 udelay(delay);
415         }
416         dev_err_ratelimited(smmu->dev,
417                             "TLB sync timed out -- SMMU may be deadlocked\n");
418 }
419
420 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
421 {
422         void __iomem *base = ARM_SMMU_GR0(smmu);
423         unsigned long flags;
424
425         spin_lock_irqsave(&smmu->global_sync_lock, flags);
426         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
427                             base + ARM_SMMU_GR0_sTLBGSTATUS);
428         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
429 }
430
431 static void arm_smmu_tlb_sync_context(void *cookie)
432 {
433         struct arm_smmu_domain *smmu_domain = cookie;
434         struct arm_smmu_device *smmu = smmu_domain->smmu;
435         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
436         unsigned long flags;
437
438         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
439         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
440                             base + ARM_SMMU_CB_TLBSTATUS);
441         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
442 }
443
444 static void arm_smmu_tlb_sync_vmid(void *cookie)
445 {
446         struct arm_smmu_domain *smmu_domain = cookie;
447
448         arm_smmu_tlb_sync_global(smmu_domain->smmu);
449 }
450
451 static void arm_smmu_tlb_inv_context_s1(void *cookie)
452 {
453         struct arm_smmu_domain *smmu_domain = cookie;
454         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
455         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
456
457         writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
458         arm_smmu_tlb_sync_context(cookie);
459 }
460
461 static void arm_smmu_tlb_inv_context_s2(void *cookie)
462 {
463         struct arm_smmu_domain *smmu_domain = cookie;
464         struct arm_smmu_device *smmu = smmu_domain->smmu;
465         void __iomem *base = ARM_SMMU_GR0(smmu);
466
467         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
468         arm_smmu_tlb_sync_global(smmu);
469 }
470
471 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
472                                           size_t granule, bool leaf, void *cookie)
473 {
474         struct arm_smmu_domain *smmu_domain = cookie;
475         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
476         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
477         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
478
479         if (stage1) {
480                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
481
482                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
483                         iova &= ~12UL;
484                         iova |= cfg->asid;
485                         do {
486                                 writel_relaxed(iova, reg);
487                                 iova += granule;
488                         } while (size -= granule);
489                 } else {
490                         iova >>= 12;
491                         iova |= (u64)cfg->asid << 48;
492                         do {
493                                 writeq_relaxed(iova, reg);
494                                 iova += granule >> 12;
495                         } while (size -= granule);
496                 }
497         } else {
498                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
499                               ARM_SMMU_CB_S2_TLBIIPAS2;
500                 iova >>= 12;
501                 do {
502                         smmu_write_atomic_lq(iova, reg);
503                         iova += granule >> 12;
504                 } while (size -= granule);
505         }
506 }
507
508 /*
509  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
510  * almost negligible, but the benefit of getting the first one in as far ahead
511  * of the sync as possible is significant, hence we don't just make this a
512  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
513  */
514 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
515                                          size_t granule, bool leaf, void *cookie)
516 {
517         struct arm_smmu_domain *smmu_domain = cookie;
518         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
519
520         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
521 }
522
523 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
524         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
525         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
526         .tlb_sync       = arm_smmu_tlb_sync_context,
527 };
528
529 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
530         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
531         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
532         .tlb_sync       = arm_smmu_tlb_sync_context,
533 };
534
535 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
536         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
537         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
538         .tlb_sync       = arm_smmu_tlb_sync_vmid,
539 };
540
541 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
542 {
543         u32 fsr, fsynr;
544         unsigned long iova;
545         struct iommu_domain *domain = dev;
546         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
547         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
548         struct arm_smmu_device *smmu = smmu_domain->smmu;
549         void __iomem *cb_base;
550
551         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
552         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
553
554         if (!(fsr & FSR_FAULT))
555                 return IRQ_NONE;
556
557         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
558         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
559
560         dev_err_ratelimited(smmu->dev,
561         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
562                             fsr, iova, fsynr, cfg->cbndx);
563
564         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
565         return IRQ_HANDLED;
566 }
567
568 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
569 {
570         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
571         struct arm_smmu_device *smmu = dev;
572         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
573
574         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
575         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
576         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
577         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
578
579         if (!gfsr)
580                 return IRQ_NONE;
581
582         dev_err_ratelimited(smmu->dev,
583                 "Unexpected global fault, this could be serious\n");
584         dev_err_ratelimited(smmu->dev,
585                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
586                 gfsr, gfsynr0, gfsynr1, gfsynr2);
587
588         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
589         return IRQ_HANDLED;
590 }
591
592 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
593                                        struct io_pgtable_cfg *pgtbl_cfg)
594 {
595         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
596         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
597         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
598
599         cb->cfg = cfg;
600
601         /* TTBCR */
602         if (stage1) {
603                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
604                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
605                 } else {
606                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
607                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
608                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
609                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
610                                 cb->tcr[1] |= TTBCR2_AS;
611                 }
612         } else {
613                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
614         }
615
616         /* TTBRs */
617         if (stage1) {
618                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
619                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
620                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
621                 } else {
622                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
623                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
624                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
625                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
626                 }
627         } else {
628                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
629         }
630
631         /* MAIRs (stage-1 only) */
632         if (stage1) {
633                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
634                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
635                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
636                 } else {
637                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
638                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
639                 }
640         }
641 }
642
643 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
644 {
645         u32 reg;
646         bool stage1;
647         struct arm_smmu_cb *cb = &smmu->cbs[idx];
648         struct arm_smmu_cfg *cfg = cb->cfg;
649         void __iomem *cb_base, *gr1_base;
650
651         cb_base = ARM_SMMU_CB(smmu, idx);
652
653         /* Unassigned context banks only need disabling */
654         if (!cfg) {
655                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
656                 return;
657         }
658
659         gr1_base = ARM_SMMU_GR1(smmu);
660         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
661
662         /* CBA2R */
663         if (smmu->version > ARM_SMMU_V1) {
664                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
665                         reg = CBA2R_RW64_64BIT;
666                 else
667                         reg = CBA2R_RW64_32BIT;
668                 /* 16-bit VMIDs live in CBA2R */
669                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
670                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
671
672                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
673         }
674
675         /* CBAR */
676         reg = cfg->cbar;
677         if (smmu->version < ARM_SMMU_V2)
678                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
679
680         /*
681          * Use the weakest shareability/memory types, so they are
682          * overridden by the ttbcr/pte.
683          */
684         if (stage1) {
685                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
686                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
687         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
688                 /* 8-bit VMIDs live in CBAR */
689                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
690         }
691         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
692
693         /*
694          * TTBCR
695          * We must write this before the TTBRs, since it determines the
696          * access behaviour of some fields (in particular, ASID[15:8]).
697          */
698         if (stage1 && smmu->version > ARM_SMMU_V1)
699                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
700         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
701
702         /* TTBRs */
703         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
704                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
705                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
706                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
707         } else {
708                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
709                 if (stage1)
710                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
711         }
712
713         /* MAIRs (stage-1 only) */
714         if (stage1) {
715                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
716                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
717         }
718
719         /* SCTLR */
720         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
721         if (stage1)
722                 reg |= SCTLR_S1_ASIDPNE;
723         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
724                 reg |= SCTLR_E;
725
726         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
727 }
728
729 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
730                                         struct arm_smmu_device *smmu)
731 {
732         int irq, start, ret = 0;
733         unsigned long ias, oas;
734         struct io_pgtable_ops *pgtbl_ops;
735         struct io_pgtable_cfg pgtbl_cfg;
736         enum io_pgtable_fmt fmt;
737         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
738         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
739
740         mutex_lock(&smmu_domain->init_mutex);
741         if (smmu_domain->smmu)
742                 goto out_unlock;
743
744         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
745                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
746                 smmu_domain->smmu = smmu;
747                 goto out_unlock;
748         }
749
750         /*
751          * Mapping the requested stage onto what we support is surprisingly
752          * complicated, mainly because the spec allows S1+S2 SMMUs without
753          * support for nested translation. That means we end up with the
754          * following table:
755          *
756          * Requested        Supported        Actual
757          *     S1               N              S1
758          *     S1             S1+S2            S1
759          *     S1               S2             S2
760          *     S1               S1             S1
761          *     N                N              N
762          *     N              S1+S2            S2
763          *     N                S2             S2
764          *     N                S1             S1
765          *
766          * Note that you can't actually request stage-2 mappings.
767          */
768         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
769                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
770         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
771                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
772
773         /*
774          * Choosing a suitable context format is even more fiddly. Until we
775          * grow some way for the caller to express a preference, and/or move
776          * the decision into the io-pgtable code where it arguably belongs,
777          * just aim for the closest thing to the rest of the system, and hope
778          * that the hardware isn't esoteric enough that we can't assume AArch64
779          * support to be a superset of AArch32 support...
780          */
781         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
782                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
783         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
784             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
785             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
786             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
787                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
788         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
789             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
790                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
791                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
792                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
793
794         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
795                 ret = -EINVAL;
796                 goto out_unlock;
797         }
798
799         switch (smmu_domain->stage) {
800         case ARM_SMMU_DOMAIN_S1:
801                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
802                 start = smmu->num_s2_context_banks;
803                 ias = smmu->va_size;
804                 oas = smmu->ipa_size;
805                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
806                         fmt = ARM_64_LPAE_S1;
807                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
808                         fmt = ARM_32_LPAE_S1;
809                         ias = min(ias, 32UL);
810                         oas = min(oas, 40UL);
811                 } else {
812                         fmt = ARM_V7S;
813                         ias = min(ias, 32UL);
814                         oas = min(oas, 32UL);
815                 }
816                 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
817                 break;
818         case ARM_SMMU_DOMAIN_NESTED:
819                 /*
820                  * We will likely want to change this if/when KVM gets
821                  * involved.
822                  */
823         case ARM_SMMU_DOMAIN_S2:
824                 cfg->cbar = CBAR_TYPE_S2_TRANS;
825                 start = 0;
826                 ias = smmu->ipa_size;
827                 oas = smmu->pa_size;
828                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
829                         fmt = ARM_64_LPAE_S2;
830                 } else {
831                         fmt = ARM_32_LPAE_S2;
832                         ias = min(ias, 40UL);
833                         oas = min(oas, 40UL);
834                 }
835                 if (smmu->version == ARM_SMMU_V2)
836                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
837                 else
838                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
839                 break;
840         default:
841                 ret = -EINVAL;
842                 goto out_unlock;
843         }
844         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
845                                       smmu->num_context_banks);
846         if (ret < 0)
847                 goto out_unlock;
848
849         cfg->cbndx = ret;
850         if (smmu->version < ARM_SMMU_V2) {
851                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
852                 cfg->irptndx %= smmu->num_context_irqs;
853         } else {
854                 cfg->irptndx = cfg->cbndx;
855         }
856
857         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
858                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
859         else
860                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
861
862         pgtbl_cfg = (struct io_pgtable_cfg) {
863                 .pgsize_bitmap  = smmu->pgsize_bitmap,
864                 .ias            = ias,
865                 .oas            = oas,
866                 .tlb            = smmu_domain->tlb_ops,
867                 .iommu_dev      = smmu->dev,
868         };
869
870         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
871                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
872
873         smmu_domain->smmu = smmu;
874         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
875         if (!pgtbl_ops) {
876                 ret = -ENOMEM;
877                 goto out_clear_smmu;
878         }
879
880         /* Update the domain's page sizes to reflect the page table format */
881         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
882         domain->geometry.aperture_end = (1UL << ias) - 1;
883         domain->geometry.force_aperture = true;
884
885         /* Initialise the context bank with our page table cfg */
886         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
887         arm_smmu_write_context_bank(smmu, cfg->cbndx);
888
889         /*
890          * Request context fault interrupt. Do this last to avoid the
891          * handler seeing a half-initialised domain state.
892          */
893         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
894         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
895                                IRQF_SHARED, "arm-smmu-context-fault", domain);
896         if (ret < 0) {
897                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
898                         cfg->irptndx, irq);
899                 cfg->irptndx = INVALID_IRPTNDX;
900         }
901
902         mutex_unlock(&smmu_domain->init_mutex);
903
904         /* Publish page table ops for map/unmap */
905         smmu_domain->pgtbl_ops = pgtbl_ops;
906         return 0;
907
908 out_clear_smmu:
909         smmu_domain->smmu = NULL;
910 out_unlock:
911         mutex_unlock(&smmu_domain->init_mutex);
912         return ret;
913 }
914
915 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
916 {
917         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
918         struct arm_smmu_device *smmu = smmu_domain->smmu;
919         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
920         int irq;
921
922         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
923                 return;
924
925         /*
926          * Disable the context bank and free the page tables before freeing
927          * it.
928          */
929         smmu->cbs[cfg->cbndx].cfg = NULL;
930         arm_smmu_write_context_bank(smmu, cfg->cbndx);
931
932         if (cfg->irptndx != INVALID_IRPTNDX) {
933                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
934                 devm_free_irq(smmu->dev, irq, domain);
935         }
936
937         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
938         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
939 }
940
941 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
942 {
943         struct arm_smmu_domain *smmu_domain;
944
945         if (type != IOMMU_DOMAIN_UNMANAGED &&
946             type != IOMMU_DOMAIN_DMA &&
947             type != IOMMU_DOMAIN_IDENTITY)
948                 return NULL;
949         /*
950          * Allocate the domain and initialise some of its data structures.
951          * We can't really do anything meaningful until we've added a
952          * master.
953          */
954         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
955         if (!smmu_domain)
956                 return NULL;
957
958         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
959             iommu_get_dma_cookie(&smmu_domain->domain))) {
960                 kfree(smmu_domain);
961                 return NULL;
962         }
963
964         mutex_init(&smmu_domain->init_mutex);
965         spin_lock_init(&smmu_domain->cb_lock);
966
967         return &smmu_domain->domain;
968 }
969
970 static void arm_smmu_domain_free(struct iommu_domain *domain)
971 {
972         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
973
974         /*
975          * Free the domain resources. We assume that all devices have
976          * already been detached.
977          */
978         iommu_put_dma_cookie(domain);
979         arm_smmu_destroy_domain_context(domain);
980         kfree(smmu_domain);
981 }
982
983 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
984 {
985         struct arm_smmu_smr *smr = smmu->smrs + idx;
986         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
987
988         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
989                 reg |= SMR_VALID;
990         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
991 }
992
993 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
994 {
995         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
996         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
997                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
998                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
999
1000         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1001             smmu->smrs[idx].valid)
1002                 reg |= S2CR_EXIDVALID;
1003         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1004 }
1005
1006 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1007 {
1008         arm_smmu_write_s2cr(smmu, idx);
1009         if (smmu->smrs)
1010                 arm_smmu_write_smr(smmu, idx);
1011 }
1012
1013 /*
1014  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1015  * should be called after sCR0 is written.
1016  */
1017 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1018 {
1019         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1020         u32 smr;
1021
1022         if (!smmu->smrs)
1023                 return;
1024
1025         /*
1026          * SMR.ID bits may not be preserved if the corresponding MASK
1027          * bits are set, so check each one separately. We can reject
1028          * masters later if they try to claim IDs outside these masks.
1029          */
1030         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1031         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1032         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1033         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1034
1035         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1036         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1037         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1038         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1039 }
1040
1041 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1042 {
1043         struct arm_smmu_smr *smrs = smmu->smrs;
1044         int i, free_idx = -ENOSPC;
1045
1046         /* Stream indexing is blissfully easy */
1047         if (!smrs)
1048                 return id;
1049
1050         /* Validating SMRs is... less so */
1051         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1052                 if (!smrs[i].valid) {
1053                         /*
1054                          * Note the first free entry we come across, which
1055                          * we'll claim in the end if nothing else matches.
1056                          */
1057                         if (free_idx < 0)
1058                                 free_idx = i;
1059                         continue;
1060                 }
1061                 /*
1062                  * If the new entry is _entirely_ matched by an existing entry,
1063                  * then reuse that, with the guarantee that there also cannot
1064                  * be any subsequent conflicting entries. In normal use we'd
1065                  * expect simply identical entries for this case, but there's
1066                  * no harm in accommodating the generalisation.
1067                  */
1068                 if ((mask & smrs[i].mask) == mask &&
1069                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1070                         return i;
1071                 /*
1072                  * If the new entry has any other overlap with an existing one,
1073                  * though, then there always exists at least one stream ID
1074                  * which would cause a conflict, and we can't allow that risk.
1075                  */
1076                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1077                         return -EINVAL;
1078         }
1079
1080         return free_idx;
1081 }
1082
1083 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1084 {
1085         if (--smmu->s2crs[idx].count)
1086                 return false;
1087
1088         smmu->s2crs[idx] = s2cr_init_val;
1089         if (smmu->smrs)
1090                 smmu->smrs[idx].valid = false;
1091
1092         return true;
1093 }
1094
1095 static int arm_smmu_master_alloc_smes(struct device *dev)
1096 {
1097         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1098         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1099         struct arm_smmu_device *smmu = cfg->smmu;
1100         struct arm_smmu_smr *smrs = smmu->smrs;
1101         struct iommu_group *group;
1102         int i, idx, ret;
1103
1104         mutex_lock(&smmu->stream_map_mutex);
1105         /* Figure out a viable stream map entry allocation */
1106         for_each_cfg_sme(fwspec, i, idx) {
1107                 u16 sid = fwspec->ids[i];
1108                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1109
1110                 if (idx != INVALID_SMENDX) {
1111                         ret = -EEXIST;
1112                         goto out_err;
1113                 }
1114
1115                 ret = arm_smmu_find_sme(smmu, sid, mask);
1116                 if (ret < 0)
1117                         goto out_err;
1118
1119                 idx = ret;
1120                 if (smrs && smmu->s2crs[idx].count == 0) {
1121                         smrs[idx].id = sid;
1122                         smrs[idx].mask = mask;
1123                         smrs[idx].valid = true;
1124                 }
1125                 smmu->s2crs[idx].count++;
1126                 cfg->smendx[i] = (s16)idx;
1127         }
1128
1129         group = iommu_group_get_for_dev(dev);
1130         if (!group)
1131                 group = ERR_PTR(-ENOMEM);
1132         if (IS_ERR(group)) {
1133                 ret = PTR_ERR(group);
1134                 goto out_err;
1135         }
1136         iommu_group_put(group);
1137
1138         /* It worked! Now, poke the actual hardware */
1139         for_each_cfg_sme(fwspec, i, idx) {
1140                 arm_smmu_write_sme(smmu, idx);
1141                 smmu->s2crs[idx].group = group;
1142         }
1143
1144         mutex_unlock(&smmu->stream_map_mutex);
1145         return 0;
1146
1147 out_err:
1148         while (i--) {
1149                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1150                 cfg->smendx[i] = INVALID_SMENDX;
1151         }
1152         mutex_unlock(&smmu->stream_map_mutex);
1153         return ret;
1154 }
1155
1156 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1157 {
1158         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1159         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1160         int i, idx;
1161
1162         mutex_lock(&smmu->stream_map_mutex);
1163         for_each_cfg_sme(fwspec, i, idx) {
1164                 if (arm_smmu_free_sme(smmu, idx))
1165                         arm_smmu_write_sme(smmu, idx);
1166                 cfg->smendx[i] = INVALID_SMENDX;
1167         }
1168         mutex_unlock(&smmu->stream_map_mutex);
1169 }
1170
1171 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1172                                       struct iommu_fwspec *fwspec)
1173 {
1174         struct arm_smmu_device *smmu = smmu_domain->smmu;
1175         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1176         u8 cbndx = smmu_domain->cfg.cbndx;
1177         enum arm_smmu_s2cr_type type;
1178         int i, idx;
1179
1180         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1181                 type = S2CR_TYPE_BYPASS;
1182         else
1183                 type = S2CR_TYPE_TRANS;
1184
1185         for_each_cfg_sme(fwspec, i, idx) {
1186                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1187                         continue;
1188
1189                 s2cr[idx].type = type;
1190                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1191                 s2cr[idx].cbndx = cbndx;
1192                 arm_smmu_write_s2cr(smmu, idx);
1193         }
1194         return 0;
1195 }
1196
1197 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1198 {
1199         int ret;
1200         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1201         struct arm_smmu_device *smmu;
1202         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1203
1204         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1205                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1206                 return -ENXIO;
1207         }
1208
1209         /*
1210          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1211          * domains between of_xlate() and add_device() - we have no way to cope
1212          * with that, so until ARM gets converted to rely on groups and default
1213          * domains, just say no (but more politely than by dereferencing NULL).
1214          * This should be at least a WARN_ON once that's sorted.
1215          */
1216         if (!fwspec->iommu_priv)
1217                 return -ENODEV;
1218
1219         smmu = fwspec_smmu(fwspec);
1220         /* Ensure that the domain is finalised */
1221         ret = arm_smmu_init_domain_context(domain, smmu);
1222         if (ret < 0)
1223                 return ret;
1224
1225         /*
1226          * Sanity check the domain. We don't support domains across
1227          * different SMMUs.
1228          */
1229         if (smmu_domain->smmu != smmu) {
1230                 dev_err(dev,
1231                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1232                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1233                 return -EINVAL;
1234         }
1235
1236         /* Looks ok, so add the device to the domain */
1237         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1238 }
1239
1240 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1241                         phys_addr_t paddr, size_t size, int prot)
1242 {
1243         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1244
1245         if (!ops)
1246                 return -ENODEV;
1247
1248         return ops->map(ops, iova, paddr, size, prot);
1249 }
1250
1251 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1252                              size_t size)
1253 {
1254         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1255
1256         if (!ops)
1257                 return 0;
1258
1259         return ops->unmap(ops, iova, size);
1260 }
1261
1262 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1263 {
1264         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1265
1266         if (smmu_domain->tlb_ops)
1267                 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1268 }
1269
1270 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1271                                               dma_addr_t iova)
1272 {
1273         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1274         struct arm_smmu_device *smmu = smmu_domain->smmu;
1275         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1276         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1277         struct device *dev = smmu->dev;
1278         void __iomem *cb_base;
1279         u32 tmp;
1280         u64 phys;
1281         unsigned long va, flags;
1282
1283         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1284
1285         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1286         /* ATS1 registers can only be written atomically */
1287         va = iova & ~0xfffUL;
1288         if (smmu->version == ARM_SMMU_V2)
1289                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1290         else /* Register is only 32-bit in v1 */
1291                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1292
1293         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1294                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1295                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1296                 dev_err(dev,
1297                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1298                         &iova);
1299                 return ops->iova_to_phys(ops, iova);
1300         }
1301
1302         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1303         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1304         if (phys & CB_PAR_F) {
1305                 dev_err(dev, "translation fault!\n");
1306                 dev_err(dev, "PAR = 0x%llx\n", phys);
1307                 return 0;
1308         }
1309
1310         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1311 }
1312
1313 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1314                                         dma_addr_t iova)
1315 {
1316         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1317         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1318
1319         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1320                 return iova;
1321
1322         if (!ops)
1323                 return 0;
1324
1325         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1326                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1327                 return arm_smmu_iova_to_phys_hard(domain, iova);
1328
1329         return ops->iova_to_phys(ops, iova);
1330 }
1331
1332 static bool arm_smmu_capable(enum iommu_cap cap)
1333 {
1334         switch (cap) {
1335         case IOMMU_CAP_CACHE_COHERENCY:
1336                 /*
1337                  * Return true here as the SMMU can always send out coherent
1338                  * requests.
1339                  */
1340                 return true;
1341         case IOMMU_CAP_NOEXEC:
1342                 return true;
1343         default:
1344                 return false;
1345         }
1346 }
1347
1348 static int arm_smmu_match_node(struct device *dev, void *data)
1349 {
1350         return dev->fwnode == data;
1351 }
1352
1353 static
1354 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1355 {
1356         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1357                                                 fwnode, arm_smmu_match_node);
1358         put_device(dev);
1359         return dev ? dev_get_drvdata(dev) : NULL;
1360 }
1361
1362 static int arm_smmu_add_device(struct device *dev)
1363 {
1364         struct arm_smmu_device *smmu;
1365         struct arm_smmu_master_cfg *cfg;
1366         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1367         int i, ret;
1368
1369         if (using_legacy_binding) {
1370                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1371
1372                 /*
1373                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1374                  * will allocate/initialise a new one. Thus we need to update fwspec for
1375                  * later use.
1376                  */
1377                 fwspec = dev->iommu_fwspec;
1378                 if (ret)
1379                         goto out_free;
1380         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1381                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1382         } else {
1383                 return -ENODEV;
1384         }
1385
1386         ret = -EINVAL;
1387         for (i = 0; i < fwspec->num_ids; i++) {
1388                 u16 sid = fwspec->ids[i];
1389                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1390
1391                 if (sid & ~smmu->streamid_mask) {
1392                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1393                                 sid, smmu->streamid_mask);
1394                         goto out_free;
1395                 }
1396                 if (mask & ~smmu->smr_mask_mask) {
1397                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1398                                 mask, smmu->smr_mask_mask);
1399                         goto out_free;
1400                 }
1401         }
1402
1403         ret = -ENOMEM;
1404         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1405                       GFP_KERNEL);
1406         if (!cfg)
1407                 goto out_free;
1408
1409         cfg->smmu = smmu;
1410         fwspec->iommu_priv = cfg;
1411         while (i--)
1412                 cfg->smendx[i] = INVALID_SMENDX;
1413
1414         ret = arm_smmu_master_alloc_smes(dev);
1415         if (ret)
1416                 goto out_cfg_free;
1417
1418         iommu_device_link(&smmu->iommu, dev);
1419
1420         return 0;
1421
1422 out_cfg_free:
1423         kfree(cfg);
1424 out_free:
1425         iommu_fwspec_free(dev);
1426         return ret;
1427 }
1428
1429 static void arm_smmu_remove_device(struct device *dev)
1430 {
1431         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1432         struct arm_smmu_master_cfg *cfg;
1433         struct arm_smmu_device *smmu;
1434
1435
1436         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1437                 return;
1438
1439         cfg  = fwspec->iommu_priv;
1440         smmu = cfg->smmu;
1441
1442         iommu_device_unlink(&smmu->iommu, dev);
1443         arm_smmu_master_free_smes(fwspec);
1444         iommu_group_remove_device(dev);
1445         kfree(fwspec->iommu_priv);
1446         iommu_fwspec_free(dev);
1447 }
1448
1449 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1450 {
1451         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1452         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1453         struct iommu_group *group = NULL;
1454         int i, idx;
1455
1456         for_each_cfg_sme(fwspec, i, idx) {
1457                 if (group && smmu->s2crs[idx].group &&
1458                     group != smmu->s2crs[idx].group)
1459                         return ERR_PTR(-EINVAL);
1460
1461                 group = smmu->s2crs[idx].group;
1462         }
1463
1464         if (group)
1465                 return iommu_group_ref_get(group);
1466
1467         if (dev_is_pci(dev))
1468                 group = pci_device_group(dev);
1469         else
1470                 group = generic_device_group(dev);
1471
1472         return group;
1473 }
1474
1475 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1476                                     enum iommu_attr attr, void *data)
1477 {
1478         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1479
1480         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1481                 return -EINVAL;
1482
1483         switch (attr) {
1484         case DOMAIN_ATTR_NESTING:
1485                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1486                 return 0;
1487         default:
1488                 return -ENODEV;
1489         }
1490 }
1491
1492 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1493                                     enum iommu_attr attr, void *data)
1494 {
1495         int ret = 0;
1496         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1497
1498         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1499                 return -EINVAL;
1500
1501         mutex_lock(&smmu_domain->init_mutex);
1502
1503         switch (attr) {
1504         case DOMAIN_ATTR_NESTING:
1505                 if (smmu_domain->smmu) {
1506                         ret = -EPERM;
1507                         goto out_unlock;
1508                 }
1509
1510                 if (*(int *)data)
1511                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1512                 else
1513                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1514
1515                 break;
1516         default:
1517                 ret = -ENODEV;
1518         }
1519
1520 out_unlock:
1521         mutex_unlock(&smmu_domain->init_mutex);
1522         return ret;
1523 }
1524
1525 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1526 {
1527         u32 mask, fwid = 0;
1528
1529         if (args->args_count > 0)
1530                 fwid |= (u16)args->args[0];
1531
1532         if (args->args_count > 1)
1533                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1534         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1535                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1536
1537         return iommu_fwspec_add_ids(dev, &fwid, 1);
1538 }
1539
1540 static void arm_smmu_get_resv_regions(struct device *dev,
1541                                       struct list_head *head)
1542 {
1543         struct iommu_resv_region *region;
1544         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1545
1546         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1547                                          prot, IOMMU_RESV_SW_MSI);
1548         if (!region)
1549                 return;
1550
1551         list_add_tail(&region->list, head);
1552
1553         iommu_dma_get_resv_regions(dev, head);
1554 }
1555
1556 static void arm_smmu_put_resv_regions(struct device *dev,
1557                                       struct list_head *head)
1558 {
1559         struct iommu_resv_region *entry, *next;
1560
1561         list_for_each_entry_safe(entry, next, head, list)
1562                 kfree(entry);
1563 }
1564
1565 static struct iommu_ops arm_smmu_ops = {
1566         .capable                = arm_smmu_capable,
1567         .domain_alloc           = arm_smmu_domain_alloc,
1568         .domain_free            = arm_smmu_domain_free,
1569         .attach_dev             = arm_smmu_attach_dev,
1570         .map                    = arm_smmu_map,
1571         .unmap                  = arm_smmu_unmap,
1572         .map_sg                 = default_iommu_map_sg,
1573         .flush_iotlb_all        = arm_smmu_iotlb_sync,
1574         .iotlb_sync             = arm_smmu_iotlb_sync,
1575         .iova_to_phys           = arm_smmu_iova_to_phys,
1576         .add_device             = arm_smmu_add_device,
1577         .remove_device          = arm_smmu_remove_device,
1578         .device_group           = arm_smmu_device_group,
1579         .domain_get_attr        = arm_smmu_domain_get_attr,
1580         .domain_set_attr        = arm_smmu_domain_set_attr,
1581         .of_xlate               = arm_smmu_of_xlate,
1582         .get_resv_regions       = arm_smmu_get_resv_regions,
1583         .put_resv_regions       = arm_smmu_put_resv_regions,
1584         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1585 };
1586
1587 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1588 {
1589         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1590         int i;
1591         u32 reg, major;
1592
1593         /* clear global FSR */
1594         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1595         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1596
1597         /*
1598          * Reset stream mapping groups: Initial values mark all SMRn as
1599          * invalid and all S2CRn as bypass unless overridden.
1600          */
1601         for (i = 0; i < smmu->num_mapping_groups; ++i)
1602                 arm_smmu_write_sme(smmu, i);
1603
1604         if (smmu->model == ARM_MMU500) {
1605                 /*
1606                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1607                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1608                  * bit is only present in MMU-500r2 onwards.
1609                  */
1610                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1611                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1612                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1613                 if (major >= 2)
1614                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1615                 /*
1616                  * Allow unmatched Stream IDs to allocate bypass
1617                  * TLB entries for reduced latency.
1618                  */
1619                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
1620                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1621         }
1622
1623         /* Make sure all context banks are disabled and clear CB_FSR  */
1624         for (i = 0; i < smmu->num_context_banks; ++i) {
1625                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1626
1627                 arm_smmu_write_context_bank(smmu, i);
1628                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1629                 /*
1630                  * Disable MMU-500's not-particularly-beneficial next-page
1631                  * prefetcher for the sake of errata #841119 and #826419.
1632                  */
1633                 if (smmu->model == ARM_MMU500) {
1634                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1635                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1636                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1637                 }
1638         }
1639
1640         /* Invalidate the TLB, just in case */
1641         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1642         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1643
1644         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1645
1646         /* Enable fault reporting */
1647         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1648
1649         /* Disable TLB broadcasting. */
1650         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1651
1652         /* Enable client access, handling unmatched streams as appropriate */
1653         reg &= ~sCR0_CLIENTPD;
1654         if (disable_bypass)
1655                 reg |= sCR0_USFCFG;
1656         else
1657                 reg &= ~sCR0_USFCFG;
1658
1659         /* Disable forced broadcasting */
1660         reg &= ~sCR0_FB;
1661
1662         /* Don't upgrade barriers */
1663         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1664
1665         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1666                 reg |= sCR0_VMID16EN;
1667
1668         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1669                 reg |= sCR0_EXIDENABLE;
1670
1671         /* Push the button */
1672         arm_smmu_tlb_sync_global(smmu);
1673         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1674 }
1675
1676 static int arm_smmu_id_size_to_bits(int size)
1677 {
1678         switch (size) {
1679         case 0:
1680                 return 32;
1681         case 1:
1682                 return 36;
1683         case 2:
1684                 return 40;
1685         case 3:
1686                 return 42;
1687         case 4:
1688                 return 44;
1689         case 5:
1690         default:
1691                 return 48;
1692         }
1693 }
1694
1695 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1696 {
1697         unsigned long size;
1698         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1699         u32 id;
1700         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1701         int i;
1702
1703         dev_notice(smmu->dev, "probing hardware configuration...\n");
1704         dev_notice(smmu->dev, "SMMUv%d with:\n",
1705                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1706
1707         /* ID0 */
1708         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1709
1710         /* Restrict available stages based on module parameter */
1711         if (force_stage == 1)
1712                 id &= ~(ID0_S2TS | ID0_NTS);
1713         else if (force_stage == 2)
1714                 id &= ~(ID0_S1TS | ID0_NTS);
1715
1716         if (id & ID0_S1TS) {
1717                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1718                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1719         }
1720
1721         if (id & ID0_S2TS) {
1722                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1723                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1724         }
1725
1726         if (id & ID0_NTS) {
1727                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1728                 dev_notice(smmu->dev, "\tnested translation\n");
1729         }
1730
1731         if (!(smmu->features &
1732                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1733                 dev_err(smmu->dev, "\tno translation support!\n");
1734                 return -ENODEV;
1735         }
1736
1737         if ((id & ID0_S1TS) &&
1738                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1739                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1740                 dev_notice(smmu->dev, "\taddress translation ops\n");
1741         }
1742
1743         /*
1744          * In order for DMA API calls to work properly, we must defer to what
1745          * the FW says about coherency, regardless of what the hardware claims.
1746          * Fortunately, this also opens up a workaround for systems where the
1747          * ID register value has ended up configured incorrectly.
1748          */
1749         cttw_reg = !!(id & ID0_CTTW);
1750         if (cttw_fw || cttw_reg)
1751                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1752                            cttw_fw ? "" : "non-");
1753         if (cttw_fw != cttw_reg)
1754                 dev_notice(smmu->dev,
1755                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1756
1757         /* Max. number of entries we have for stream matching/indexing */
1758         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1759                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1760                 size = 1 << 16;
1761         } else {
1762                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1763         }
1764         smmu->streamid_mask = size - 1;
1765         if (id & ID0_SMS) {
1766                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1767                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1768                 if (size == 0) {
1769                         dev_err(smmu->dev,
1770                                 "stream-matching supported, but no SMRs present!\n");
1771                         return -ENODEV;
1772                 }
1773
1774                 /* Zero-initialised to mark as invalid */
1775                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1776                                           GFP_KERNEL);
1777                 if (!smmu->smrs)
1778                         return -ENOMEM;
1779
1780                 dev_notice(smmu->dev,
1781                            "\tstream matching with %lu register groups", size);
1782         }
1783         /* s2cr->type == 0 means translation, so initialise explicitly */
1784         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1785                                          GFP_KERNEL);
1786         if (!smmu->s2crs)
1787                 return -ENOMEM;
1788         for (i = 0; i < size; i++)
1789                 smmu->s2crs[i] = s2cr_init_val;
1790
1791         smmu->num_mapping_groups = size;
1792         mutex_init(&smmu->stream_map_mutex);
1793         spin_lock_init(&smmu->global_sync_lock);
1794
1795         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1796                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1797                 if (!(id & ID0_PTFS_NO_AARCH32S))
1798                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1799         }
1800
1801         /* ID1 */
1802         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1803         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1804
1805         /* Check for size mismatch of SMMU address space from mapped region */
1806         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1807         size <<= smmu->pgshift;
1808         if (smmu->cb_base != gr0_base + size)
1809                 dev_warn(smmu->dev,
1810                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1811                         size * 2, (smmu->cb_base - gr0_base) * 2);
1812
1813         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1814         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1815         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1816                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1817                 return -ENODEV;
1818         }
1819         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1820                    smmu->num_context_banks, smmu->num_s2_context_banks);
1821         /*
1822          * Cavium CN88xx erratum #27704.
1823          * Ensure ASID and VMID allocation is unique across all SMMUs in
1824          * the system.
1825          */
1826         if (smmu->model == CAVIUM_SMMUV2) {
1827                 smmu->cavium_id_base =
1828                         atomic_add_return(smmu->num_context_banks,
1829                                           &cavium_smmu_context_count);
1830                 smmu->cavium_id_base -= smmu->num_context_banks;
1831                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1832         }
1833         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1834                                  sizeof(*smmu->cbs), GFP_KERNEL);
1835         if (!smmu->cbs)
1836                 return -ENOMEM;
1837
1838         /* ID2 */
1839         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1840         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1841         smmu->ipa_size = size;
1842
1843         /* The output mask is also applied for bypass */
1844         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1845         smmu->pa_size = size;
1846
1847         if (id & ID2_VMID16)
1848                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1849
1850         /*
1851          * What the page table walker can address actually depends on which
1852          * descriptor format is in use, but since a) we don't know that yet,
1853          * and b) it can vary per context bank, this will have to do...
1854          */
1855         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1856                 dev_warn(smmu->dev,
1857                          "failed to set DMA mask for table walker\n");
1858
1859         if (smmu->version < ARM_SMMU_V2) {
1860                 smmu->va_size = smmu->ipa_size;
1861                 if (smmu->version == ARM_SMMU_V1_64K)
1862                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1863         } else {
1864                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1865                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1866                 if (id & ID2_PTFS_4K)
1867                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1868                 if (id & ID2_PTFS_16K)
1869                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1870                 if (id & ID2_PTFS_64K)
1871                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1872         }
1873
1874         /* Now we've corralled the various formats, what'll it do? */
1875         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1876                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1877         if (smmu->features &
1878             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1879                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1880         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1881                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1882         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1883                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1884
1885         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1886                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1887         else
1888                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1889         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1890                    smmu->pgsize_bitmap);
1891
1892
1893         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1894                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1895                            smmu->va_size, smmu->ipa_size);
1896
1897         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1898                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1899                            smmu->ipa_size, smmu->pa_size);
1900
1901         return 0;
1902 }
1903
1904 struct arm_smmu_match_data {
1905         enum arm_smmu_arch_version version;
1906         enum arm_smmu_implementation model;
1907 };
1908
1909 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1910 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1911
1912 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1913 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1914 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1915 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1916 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1917
1918 static const struct of_device_id arm_smmu_of_match[] = {
1919         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1920         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1921         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1922         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1923         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1924         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1925         { },
1926 };
1927 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1928
1929 #ifdef CONFIG_ACPI
1930 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1931 {
1932         int ret = 0;
1933
1934         switch (model) {
1935         case ACPI_IORT_SMMU_V1:
1936         case ACPI_IORT_SMMU_CORELINK_MMU400:
1937                 smmu->version = ARM_SMMU_V1;
1938                 smmu->model = GENERIC_SMMU;
1939                 break;
1940         case ACPI_IORT_SMMU_CORELINK_MMU401:
1941                 smmu->version = ARM_SMMU_V1_64K;
1942                 smmu->model = GENERIC_SMMU;
1943                 break;
1944         case ACPI_IORT_SMMU_V2:
1945                 smmu->version = ARM_SMMU_V2;
1946                 smmu->model = GENERIC_SMMU;
1947                 break;
1948         case ACPI_IORT_SMMU_CORELINK_MMU500:
1949                 smmu->version = ARM_SMMU_V2;
1950                 smmu->model = ARM_MMU500;
1951                 break;
1952         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1953                 smmu->version = ARM_SMMU_V2;
1954                 smmu->model = CAVIUM_SMMUV2;
1955                 break;
1956         default:
1957                 ret = -ENODEV;
1958         }
1959
1960         return ret;
1961 }
1962
1963 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1964                                       struct arm_smmu_device *smmu)
1965 {
1966         struct device *dev = smmu->dev;
1967         struct acpi_iort_node *node =
1968                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1969         struct acpi_iort_smmu *iort_smmu;
1970         int ret;
1971
1972         /* Retrieve SMMU1/2 specific data */
1973         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1974
1975         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1976         if (ret < 0)
1977                 return ret;
1978
1979         /* Ignore the configuration access interrupt */
1980         smmu->num_global_irqs = 1;
1981
1982         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1983                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1984
1985         return 0;
1986 }
1987 #else
1988 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1989                                              struct arm_smmu_device *smmu)
1990 {
1991         return -ENODEV;
1992 }
1993 #endif
1994
1995 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1996                                     struct arm_smmu_device *smmu)
1997 {
1998         const struct arm_smmu_match_data *data;
1999         struct device *dev = &pdev->dev;
2000         bool legacy_binding;
2001
2002         if (of_property_read_u32(dev->of_node, "#global-interrupts",
2003                                  &smmu->num_global_irqs)) {
2004                 dev_err(dev, "missing #global-interrupts property\n");
2005                 return -ENODEV;
2006         }
2007
2008         data = of_device_get_match_data(dev);
2009         smmu->version = data->version;
2010         smmu->model = data->model;
2011
2012         parse_driver_options(smmu);
2013
2014         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2015         if (legacy_binding && !using_generic_binding) {
2016                 if (!using_legacy_binding)
2017                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2018                 using_legacy_binding = true;
2019         } else if (!legacy_binding && !using_legacy_binding) {
2020                 using_generic_binding = true;
2021         } else {
2022                 dev_err(dev, "not probing due to mismatched DT properties\n");
2023                 return -ENODEV;
2024         }
2025
2026         if (of_dma_is_coherent(dev->of_node))
2027                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2028
2029         return 0;
2030 }
2031
2032 static void arm_smmu_bus_init(void)
2033 {
2034         /* Oh, for a proper bus abstraction */
2035         if (!iommu_present(&platform_bus_type))
2036                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2037 #ifdef CONFIG_ARM_AMBA
2038         if (!iommu_present(&amba_bustype))
2039                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2040 #endif
2041 #ifdef CONFIG_PCI
2042         if (!iommu_present(&pci_bus_type)) {
2043                 pci_request_acs();
2044                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2045         }
2046 #endif
2047 }
2048
2049 static int arm_smmu_device_probe(struct platform_device *pdev)
2050 {
2051         struct resource *res;
2052         resource_size_t ioaddr;
2053         struct arm_smmu_device *smmu;
2054         struct device *dev = &pdev->dev;
2055         int num_irqs, i, err;
2056
2057         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2058         if (!smmu) {
2059                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2060                 return -ENOMEM;
2061         }
2062         smmu->dev = dev;
2063
2064         if (dev->of_node)
2065                 err = arm_smmu_device_dt_probe(pdev, smmu);
2066         else
2067                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2068
2069         if (err)
2070                 return err;
2071
2072         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2073         ioaddr = res->start;
2074         smmu->base = devm_ioremap_resource(dev, res);
2075         if (IS_ERR(smmu->base))
2076                 return PTR_ERR(smmu->base);
2077         smmu->cb_base = smmu->base + resource_size(res) / 2;
2078
2079         num_irqs = 0;
2080         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2081                 num_irqs++;
2082                 if (num_irqs > smmu->num_global_irqs)
2083                         smmu->num_context_irqs++;
2084         }
2085
2086         if (!smmu->num_context_irqs) {
2087                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2088                         num_irqs, smmu->num_global_irqs + 1);
2089                 return -ENODEV;
2090         }
2091
2092         smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2093                                   GFP_KERNEL);
2094         if (!smmu->irqs) {
2095                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2096                 return -ENOMEM;
2097         }
2098
2099         for (i = 0; i < num_irqs; ++i) {
2100                 int irq = platform_get_irq(pdev, i);
2101
2102                 if (irq < 0) {
2103                         dev_err(dev, "failed to get irq index %d\n", i);
2104                         return -ENODEV;
2105                 }
2106                 smmu->irqs[i] = irq;
2107         }
2108
2109         err = arm_smmu_device_cfg_probe(smmu);
2110         if (err)
2111                 return err;
2112
2113         if (smmu->version == ARM_SMMU_V2 &&
2114             smmu->num_context_banks != smmu->num_context_irqs) {
2115                 dev_err(dev,
2116                         "found only %d context interrupt(s) but %d required\n",
2117                         smmu->num_context_irqs, smmu->num_context_banks);
2118                 return -ENODEV;
2119         }
2120
2121         for (i = 0; i < smmu->num_global_irqs; ++i) {
2122                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2123                                        arm_smmu_global_fault,
2124                                        IRQF_SHARED,
2125                                        "arm-smmu global fault",
2126                                        smmu);
2127                 if (err) {
2128                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2129                                 i, smmu->irqs[i]);
2130                         return err;
2131                 }
2132         }
2133
2134         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2135                                      "smmu.%pa", &ioaddr);
2136         if (err) {
2137                 dev_err(dev, "Failed to register iommu in sysfs\n");
2138                 return err;
2139         }
2140
2141         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2142         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2143
2144         err = iommu_device_register(&smmu->iommu);
2145         if (err) {
2146                 dev_err(dev, "Failed to register iommu\n");
2147                 return err;
2148         }
2149
2150         platform_set_drvdata(pdev, smmu);
2151         arm_smmu_device_reset(smmu);
2152         arm_smmu_test_smr_masks(smmu);
2153
2154         /*
2155          * For ACPI and generic DT bindings, an SMMU will be probed before
2156          * any device which might need it, so we want the bus ops in place
2157          * ready to handle default domain setup as soon as any SMMU exists.
2158          */
2159         if (!using_legacy_binding)
2160                 arm_smmu_bus_init();
2161
2162         return 0;
2163 }
2164
2165 /*
2166  * With the legacy DT binding in play, though, we have no guarantees about
2167  * probe order, but then we're also not doing default domains, so we can
2168  * delay setting bus ops until we're sure every possible SMMU is ready,
2169  * and that way ensure that no add_device() calls get missed.
2170  */
2171 static int arm_smmu_legacy_bus_init(void)
2172 {
2173         if (using_legacy_binding)
2174                 arm_smmu_bus_init();
2175         return 0;
2176 }
2177 device_initcall_sync(arm_smmu_legacy_bus_init);
2178
2179 static int arm_smmu_device_remove(struct platform_device *pdev)
2180 {
2181         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2182
2183         if (!smmu)
2184                 return -ENODEV;
2185
2186         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2187                 dev_err(&pdev->dev, "removing device with active domains!\n");
2188
2189         /* Turn the thing off */
2190         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2191         return 0;
2192 }
2193
2194 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2195 {
2196         arm_smmu_device_remove(pdev);
2197 }
2198
2199 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2200 {
2201         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2202
2203         arm_smmu_device_reset(smmu);
2204         return 0;
2205 }
2206
2207 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2208
2209 static struct platform_driver arm_smmu_driver = {
2210         .driver = {
2211                 .name           = "arm-smmu",
2212                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2213                 .pm             = &arm_smmu_pm_ops,
2214         },
2215         .probe  = arm_smmu_device_probe,
2216         .remove = arm_smmu_device_remove,
2217         .shutdown = arm_smmu_device_shutdown,
2218 };
2219 module_platform_driver(arm_smmu_driver);
2220
2221 IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
2222 IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
2223 IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
2224 IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
2225 IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
2226 IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
2227
2228 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2229 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2230 MODULE_LICENSE("GPL v2");