Merge branch 'tda998x-fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-cubox into...
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - 4k and 64k pages, with contiguous pte hints.
27  *      - Up to 42-bit addressing (dependent on VA_BITS)
28  *      - Context fault reporting
29  */
30
31 #define pr_fmt(fmt) "arm-smmu: " fmt
32
33 #include <linux/delay.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/err.h>
36 #include <linux/interrupt.h>
37 #include <linux/io.h>
38 #include <linux/iommu.h>
39 #include <linux/mm.h>
40 #include <linux/module.h>
41 #include <linux/of.h>
42 #include <linux/platform_device.h>
43 #include <linux/slab.h>
44 #include <linux/spinlock.h>
45
46 #include <linux/amba/bus.h>
47
48 #include <asm/pgalloc.h>
49
50 /* Maximum number of stream IDs assigned to a single device */
51 #define MAX_MASTER_STREAMIDS            8
52
53 /* Maximum number of context banks per SMMU */
54 #define ARM_SMMU_MAX_CBS                128
55
56 /* Maximum number of mapping groups per SMMU */
57 #define ARM_SMMU_MAX_SMRS               128
58
59 /* SMMU global address space */
60 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
61 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (smmu)->pagesize)
62
63 /* Page table bits */
64 #define ARM_SMMU_PTE_XN                 (((pteval_t)3) << 53)
65 #define ARM_SMMU_PTE_CONT               (((pteval_t)1) << 52)
66 #define ARM_SMMU_PTE_AF                 (((pteval_t)1) << 10)
67 #define ARM_SMMU_PTE_SH_NS              (((pteval_t)0) << 8)
68 #define ARM_SMMU_PTE_SH_OS              (((pteval_t)2) << 8)
69 #define ARM_SMMU_PTE_SH_IS              (((pteval_t)3) << 8)
70 #define ARM_SMMU_PTE_PAGE               (((pteval_t)3) << 0)
71
72 #if PAGE_SIZE == SZ_4K
73 #define ARM_SMMU_PTE_CONT_ENTRIES       16
74 #elif PAGE_SIZE == SZ_64K
75 #define ARM_SMMU_PTE_CONT_ENTRIES       32
76 #else
77 #define ARM_SMMU_PTE_CONT_ENTRIES       1
78 #endif
79
80 #define ARM_SMMU_PTE_CONT_SIZE          (PAGE_SIZE * ARM_SMMU_PTE_CONT_ENTRIES)
81 #define ARM_SMMU_PTE_CONT_MASK          (~(ARM_SMMU_PTE_CONT_SIZE - 1))
82 #define ARM_SMMU_PTE_HWTABLE_SIZE       (PTRS_PER_PTE * sizeof(pte_t))
83
84 /* Stage-1 PTE */
85 #define ARM_SMMU_PTE_AP_UNPRIV          (((pteval_t)1) << 6)
86 #define ARM_SMMU_PTE_AP_RDONLY          (((pteval_t)2) << 6)
87 #define ARM_SMMU_PTE_ATTRINDX_SHIFT     2
88 #define ARM_SMMU_PTE_nG                 (((pteval_t)1) << 11)
89
90 /* Stage-2 PTE */
91 #define ARM_SMMU_PTE_HAP_FAULT          (((pteval_t)0) << 6)
92 #define ARM_SMMU_PTE_HAP_READ           (((pteval_t)1) << 6)
93 #define ARM_SMMU_PTE_HAP_WRITE          (((pteval_t)2) << 6)
94 #define ARM_SMMU_PTE_MEMATTR_OIWB       (((pteval_t)0xf) << 2)
95 #define ARM_SMMU_PTE_MEMATTR_NC         (((pteval_t)0x5) << 2)
96 #define ARM_SMMU_PTE_MEMATTR_DEV        (((pteval_t)0x1) << 2)
97
98 /* Configuration registers */
99 #define ARM_SMMU_GR0_sCR0               0x0
100 #define sCR0_CLIENTPD                   (1 << 0)
101 #define sCR0_GFRE                       (1 << 1)
102 #define sCR0_GFIE                       (1 << 2)
103 #define sCR0_GCFGFRE                    (1 << 4)
104 #define sCR0_GCFGFIE                    (1 << 5)
105 #define sCR0_USFCFG                     (1 << 10)
106 #define sCR0_VMIDPNE                    (1 << 11)
107 #define sCR0_PTM                        (1 << 12)
108 #define sCR0_FB                         (1 << 13)
109 #define sCR0_BSU_SHIFT                  14
110 #define sCR0_BSU_MASK                   0x3
111
112 /* Identification registers */
113 #define ARM_SMMU_GR0_ID0                0x20
114 #define ARM_SMMU_GR0_ID1                0x24
115 #define ARM_SMMU_GR0_ID2                0x28
116 #define ARM_SMMU_GR0_ID3                0x2c
117 #define ARM_SMMU_GR0_ID4                0x30
118 #define ARM_SMMU_GR0_ID5                0x34
119 #define ARM_SMMU_GR0_ID6                0x38
120 #define ARM_SMMU_GR0_ID7                0x3c
121 #define ARM_SMMU_GR0_sGFSR              0x48
122 #define ARM_SMMU_GR0_sGFSYNR0           0x50
123 #define ARM_SMMU_GR0_sGFSYNR1           0x54
124 #define ARM_SMMU_GR0_sGFSYNR2           0x58
125 #define ARM_SMMU_GR0_PIDR0              0xfe0
126 #define ARM_SMMU_GR0_PIDR1              0xfe4
127 #define ARM_SMMU_GR0_PIDR2              0xfe8
128
129 #define ID0_S1TS                        (1 << 30)
130 #define ID0_S2TS                        (1 << 29)
131 #define ID0_NTS                         (1 << 28)
132 #define ID0_SMS                         (1 << 27)
133 #define ID0_PTFS_SHIFT                  24
134 #define ID0_PTFS_MASK                   0x2
135 #define ID0_PTFS_V8_ONLY                0x2
136 #define ID0_CTTW                        (1 << 14)
137 #define ID0_NUMIRPT_SHIFT               16
138 #define ID0_NUMIRPT_MASK                0xff
139 #define ID0_NUMSMRG_SHIFT               0
140 #define ID0_NUMSMRG_MASK                0xff
141
142 #define ID1_PAGESIZE                    (1 << 31)
143 #define ID1_NUMPAGENDXB_SHIFT           28
144 #define ID1_NUMPAGENDXB_MASK            7
145 #define ID1_NUMS2CB_SHIFT               16
146 #define ID1_NUMS2CB_MASK                0xff
147 #define ID1_NUMCB_SHIFT                 0
148 #define ID1_NUMCB_MASK                  0xff
149
150 #define ID2_OAS_SHIFT                   4
151 #define ID2_OAS_MASK                    0xf
152 #define ID2_IAS_SHIFT                   0
153 #define ID2_IAS_MASK                    0xf
154 #define ID2_UBS_SHIFT                   8
155 #define ID2_UBS_MASK                    0xf
156 #define ID2_PTFS_4K                     (1 << 12)
157 #define ID2_PTFS_16K                    (1 << 13)
158 #define ID2_PTFS_64K                    (1 << 14)
159
160 #define PIDR2_ARCH_SHIFT                4
161 #define PIDR2_ARCH_MASK                 0xf
162
163 /* Global TLB invalidation */
164 #define ARM_SMMU_GR0_STLBIALL           0x60
165 #define ARM_SMMU_GR0_TLBIVMID           0x64
166 #define ARM_SMMU_GR0_TLBIALLNSNH        0x68
167 #define ARM_SMMU_GR0_TLBIALLH           0x6c
168 #define ARM_SMMU_GR0_sTLBGSYNC          0x70
169 #define ARM_SMMU_GR0_sTLBGSTATUS        0x74
170 #define sTLBGSTATUS_GSACTIVE            (1 << 0)
171 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
172
173 /* Stream mapping registers */
174 #define ARM_SMMU_GR0_SMR(n)             (0x800 + ((n) << 2))
175 #define SMR_VALID                       (1 << 31)
176 #define SMR_MASK_SHIFT                  16
177 #define SMR_MASK_MASK                   0x7fff
178 #define SMR_ID_SHIFT                    0
179 #define SMR_ID_MASK                     0x7fff
180
181 #define ARM_SMMU_GR0_S2CR(n)            (0xc00 + ((n) << 2))
182 #define S2CR_CBNDX_SHIFT                0
183 #define S2CR_CBNDX_MASK                 0xff
184 #define S2CR_TYPE_SHIFT                 16
185 #define S2CR_TYPE_MASK                  0x3
186 #define S2CR_TYPE_TRANS                 (0 << S2CR_TYPE_SHIFT)
187 #define S2CR_TYPE_BYPASS                (1 << S2CR_TYPE_SHIFT)
188 #define S2CR_TYPE_FAULT                 (2 << S2CR_TYPE_SHIFT)
189
190 /* Context bank attribute registers */
191 #define ARM_SMMU_GR1_CBAR(n)            (0x0 + ((n) << 2))
192 #define CBAR_VMID_SHIFT                 0
193 #define CBAR_VMID_MASK                  0xff
194 #define CBAR_S1_MEMATTR_SHIFT           12
195 #define CBAR_S1_MEMATTR_MASK            0xf
196 #define CBAR_S1_MEMATTR_WB              0xf
197 #define CBAR_TYPE_SHIFT                 16
198 #define CBAR_TYPE_MASK                  0x3
199 #define CBAR_TYPE_S2_TRANS              (0 << CBAR_TYPE_SHIFT)
200 #define CBAR_TYPE_S1_TRANS_S2_BYPASS    (1 << CBAR_TYPE_SHIFT)
201 #define CBAR_TYPE_S1_TRANS_S2_FAULT     (2 << CBAR_TYPE_SHIFT)
202 #define CBAR_TYPE_S1_TRANS_S2_TRANS     (3 << CBAR_TYPE_SHIFT)
203 #define CBAR_IRPTNDX_SHIFT              24
204 #define CBAR_IRPTNDX_MASK               0xff
205
206 #define ARM_SMMU_GR1_CBA2R(n)           (0x800 + ((n) << 2))
207 #define CBA2R_RW64_32BIT                (0 << 0)
208 #define CBA2R_RW64_64BIT                (1 << 0)
209
210 /* Translation context bank */
211 #define ARM_SMMU_CB_BASE(smmu)          ((smmu)->base + ((smmu)->size >> 1))
212 #define ARM_SMMU_CB(smmu, n)            ((n) * (smmu)->pagesize)
213
214 #define ARM_SMMU_CB_SCTLR               0x0
215 #define ARM_SMMU_CB_RESUME              0x8
216 #define ARM_SMMU_CB_TTBCR2              0x10
217 #define ARM_SMMU_CB_TTBR0_LO            0x20
218 #define ARM_SMMU_CB_TTBR0_HI            0x24
219 #define ARM_SMMU_CB_TTBCR               0x30
220 #define ARM_SMMU_CB_S1_MAIR0            0x38
221 #define ARM_SMMU_CB_FSR                 0x58
222 #define ARM_SMMU_CB_FAR_LO              0x60
223 #define ARM_SMMU_CB_FAR_HI              0x64
224 #define ARM_SMMU_CB_FSYNR0              0x68
225 #define ARM_SMMU_CB_S1_TLBIASID         0x610
226
227 #define SCTLR_S1_ASIDPNE                (1 << 12)
228 #define SCTLR_CFCFG                     (1 << 7)
229 #define SCTLR_CFIE                      (1 << 6)
230 #define SCTLR_CFRE                      (1 << 5)
231 #define SCTLR_E                         (1 << 4)
232 #define SCTLR_AFE                       (1 << 2)
233 #define SCTLR_TRE                       (1 << 1)
234 #define SCTLR_M                         (1 << 0)
235 #define SCTLR_EAE_SBOP                  (SCTLR_AFE | SCTLR_TRE)
236
237 #define RESUME_RETRY                    (0 << 0)
238 #define RESUME_TERMINATE                (1 << 0)
239
240 #define TTBCR_EAE                       (1 << 31)
241
242 #define TTBCR_PASIZE_SHIFT              16
243 #define TTBCR_PASIZE_MASK               0x7
244
245 #define TTBCR_TG0_4K                    (0 << 14)
246 #define TTBCR_TG0_64K                   (1 << 14)
247
248 #define TTBCR_SH0_SHIFT                 12
249 #define TTBCR_SH0_MASK                  0x3
250 #define TTBCR_SH_NS                     0
251 #define TTBCR_SH_OS                     2
252 #define TTBCR_SH_IS                     3
253
254 #define TTBCR_ORGN0_SHIFT               10
255 #define TTBCR_IRGN0_SHIFT               8
256 #define TTBCR_RGN_MASK                  0x3
257 #define TTBCR_RGN_NC                    0
258 #define TTBCR_RGN_WBWA                  1
259 #define TTBCR_RGN_WT                    2
260 #define TTBCR_RGN_WB                    3
261
262 #define TTBCR_SL0_SHIFT                 6
263 #define TTBCR_SL0_MASK                  0x3
264 #define TTBCR_SL0_LVL_2                 0
265 #define TTBCR_SL0_LVL_1                 1
266
267 #define TTBCR_T1SZ_SHIFT                16
268 #define TTBCR_T0SZ_SHIFT                0
269 #define TTBCR_SZ_MASK                   0xf
270
271 #define TTBCR2_SEP_SHIFT                15
272 #define TTBCR2_SEP_MASK                 0x7
273
274 #define TTBCR2_PASIZE_SHIFT             0
275 #define TTBCR2_PASIZE_MASK              0x7
276
277 /* Common definitions for PASize and SEP fields */
278 #define TTBCR2_ADDR_32                  0
279 #define TTBCR2_ADDR_36                  1
280 #define TTBCR2_ADDR_40                  2
281 #define TTBCR2_ADDR_42                  3
282 #define TTBCR2_ADDR_44                  4
283 #define TTBCR2_ADDR_48                  5
284
285 #define TTBRn_HI_ASID_SHIFT             16
286
287 #define MAIR_ATTR_SHIFT(n)              ((n) << 3)
288 #define MAIR_ATTR_MASK                  0xff
289 #define MAIR_ATTR_DEVICE                0x04
290 #define MAIR_ATTR_NC                    0x44
291 #define MAIR_ATTR_WBRWA                 0xff
292 #define MAIR_ATTR_IDX_NC                0
293 #define MAIR_ATTR_IDX_CACHE             1
294 #define MAIR_ATTR_IDX_DEV               2
295
296 #define FSR_MULTI                       (1 << 31)
297 #define FSR_SS                          (1 << 30)
298 #define FSR_UUT                         (1 << 8)
299 #define FSR_ASF                         (1 << 7)
300 #define FSR_TLBLKF                      (1 << 6)
301 #define FSR_TLBMCF                      (1 << 5)
302 #define FSR_EF                          (1 << 4)
303 #define FSR_PF                          (1 << 3)
304 #define FSR_AFF                         (1 << 2)
305 #define FSR_TF                          (1 << 1)
306
307 #define FSR_IGN                         (FSR_AFF | FSR_ASF | FSR_TLBMCF |       \
308                                          FSR_TLBLKF)
309 #define FSR_FAULT                       (FSR_MULTI | FSR_SS | FSR_UUT |         \
310                                          FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
311
312 #define FSYNR0_WNR                      (1 << 4)
313
314 struct arm_smmu_smr {
315         u8                              idx;
316         u16                             mask;
317         u16                             id;
318 };
319
320 struct arm_smmu_master {
321         struct device_node              *of_node;
322
323         /*
324          * The following is specific to the master's position in the
325          * SMMU chain.
326          */
327         struct rb_node                  node;
328         int                             num_streamids;
329         u16                             streamids[MAX_MASTER_STREAMIDS];
330
331         /*
332          * We only need to allocate these on the root SMMU, as we
333          * configure unmatched streams to bypass translation.
334          */
335         struct arm_smmu_smr             *smrs;
336 };
337
338 struct arm_smmu_device {
339         struct device                   *dev;
340         struct device_node              *parent_of_node;
341
342         void __iomem                    *base;
343         unsigned long                   size;
344         unsigned long                   pagesize;
345
346 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
347 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
348 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
349 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
350 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
351         u32                             features;
352         int                             version;
353
354         u32                             num_context_banks;
355         u32                             num_s2_context_banks;
356         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
357         atomic_t                        irptndx;
358
359         u32                             num_mapping_groups;
360         DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS);
361
362         unsigned long                   input_size;
363         unsigned long                   s1_output_size;
364         unsigned long                   s2_output_size;
365
366         u32                             num_global_irqs;
367         u32                             num_context_irqs;
368         unsigned int                    *irqs;
369
370         struct list_head                list;
371         struct rb_root                  masters;
372 };
373
374 struct arm_smmu_cfg {
375         struct arm_smmu_device          *smmu;
376         u8                              cbndx;
377         u8                              irptndx;
378         u32                             cbar;
379         pgd_t                           *pgd;
380 };
381 #define INVALID_IRPTNDX                 0xff
382
383 #define ARM_SMMU_CB_ASID(cfg)           ((cfg)->cbndx)
384 #define ARM_SMMU_CB_VMID(cfg)           ((cfg)->cbndx + 1)
385
386 struct arm_smmu_domain {
387         /*
388          * A domain can span across multiple, chained SMMUs and requires
389          * all devices within the domain to follow the same translation
390          * path.
391          */
392         struct arm_smmu_device          *leaf_smmu;
393         struct arm_smmu_cfg             root_cfg;
394         phys_addr_t                     output_mask;
395
396         struct mutex                    lock;
397 };
398
399 static DEFINE_SPINLOCK(arm_smmu_devices_lock);
400 static LIST_HEAD(arm_smmu_devices);
401
402 static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,
403                                                 struct device_node *dev_node)
404 {
405         struct rb_node *node = smmu->masters.rb_node;
406
407         while (node) {
408                 struct arm_smmu_master *master;
409                 master = container_of(node, struct arm_smmu_master, node);
410
411                 if (dev_node < master->of_node)
412                         node = node->rb_left;
413                 else if (dev_node > master->of_node)
414                         node = node->rb_right;
415                 else
416                         return master;
417         }
418
419         return NULL;
420 }
421
422 static int insert_smmu_master(struct arm_smmu_device *smmu,
423                               struct arm_smmu_master *master)
424 {
425         struct rb_node **new, *parent;
426
427         new = &smmu->masters.rb_node;
428         parent = NULL;
429         while (*new) {
430                 struct arm_smmu_master *this;
431                 this = container_of(*new, struct arm_smmu_master, node);
432
433                 parent = *new;
434                 if (master->of_node < this->of_node)
435                         new = &((*new)->rb_left);
436                 else if (master->of_node > this->of_node)
437                         new = &((*new)->rb_right);
438                 else
439                         return -EEXIST;
440         }
441
442         rb_link_node(&master->node, parent, new);
443         rb_insert_color(&master->node, &smmu->masters);
444         return 0;
445 }
446
447 static int register_smmu_master(struct arm_smmu_device *smmu,
448                                 struct device *dev,
449                                 struct of_phandle_args *masterspec)
450 {
451         int i;
452         struct arm_smmu_master *master;
453
454         master = find_smmu_master(smmu, masterspec->np);
455         if (master) {
456                 dev_err(dev,
457                         "rejecting multiple registrations for master device %s\n",
458                         masterspec->np->name);
459                 return -EBUSY;
460         }
461
462         if (masterspec->args_count > MAX_MASTER_STREAMIDS) {
463                 dev_err(dev,
464                         "reached maximum number (%d) of stream IDs for master device %s\n",
465                         MAX_MASTER_STREAMIDS, masterspec->np->name);
466                 return -ENOSPC;
467         }
468
469         master = devm_kzalloc(dev, sizeof(*master), GFP_KERNEL);
470         if (!master)
471                 return -ENOMEM;
472
473         master->of_node         = masterspec->np;
474         master->num_streamids   = masterspec->args_count;
475
476         for (i = 0; i < master->num_streamids; ++i)
477                 master->streamids[i] = masterspec->args[i];
478
479         return insert_smmu_master(smmu, master);
480 }
481
482 static struct arm_smmu_device *find_parent_smmu(struct arm_smmu_device *smmu)
483 {
484         struct arm_smmu_device *parent;
485
486         if (!smmu->parent_of_node)
487                 return NULL;
488
489         spin_lock(&arm_smmu_devices_lock);
490         list_for_each_entry(parent, &arm_smmu_devices, list)
491                 if (parent->dev->of_node == smmu->parent_of_node)
492                         goto out_unlock;
493
494         parent = NULL;
495         dev_warn(smmu->dev,
496                  "Failed to find SMMU parent despite parent in DT\n");
497 out_unlock:
498         spin_unlock(&arm_smmu_devices_lock);
499         return parent;
500 }
501
502 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
503 {
504         int idx;
505
506         do {
507                 idx = find_next_zero_bit(map, end, start);
508                 if (idx == end)
509                         return -ENOSPC;
510         } while (test_and_set_bit(idx, map));
511
512         return idx;
513 }
514
515 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
516 {
517         clear_bit(idx, map);
518 }
519
520 /* Wait for any pending TLB invalidations to complete */
521 static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
522 {
523         int count = 0;
524         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
525
526         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
527         while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
528                & sTLBGSTATUS_GSACTIVE) {
529                 cpu_relax();
530                 if (++count == TLB_LOOP_TIMEOUT) {
531                         dev_err_ratelimited(smmu->dev,
532                         "TLB sync timed out -- SMMU may be deadlocked\n");
533                         return;
534                 }
535                 udelay(1);
536         }
537 }
538
539 static void arm_smmu_tlb_inv_context(struct arm_smmu_cfg *cfg)
540 {
541         struct arm_smmu_device *smmu = cfg->smmu;
542         void __iomem *base = ARM_SMMU_GR0(smmu);
543         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
544
545         if (stage1) {
546                 base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
547                 writel_relaxed(ARM_SMMU_CB_ASID(cfg),
548                                base + ARM_SMMU_CB_S1_TLBIASID);
549         } else {
550                 base = ARM_SMMU_GR0(smmu);
551                 writel_relaxed(ARM_SMMU_CB_VMID(cfg),
552                                base + ARM_SMMU_GR0_TLBIVMID);
553         }
554
555         arm_smmu_tlb_sync(smmu);
556 }
557
558 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
559 {
560         int flags, ret;
561         u32 fsr, far, fsynr, resume;
562         unsigned long iova;
563         struct iommu_domain *domain = dev;
564         struct arm_smmu_domain *smmu_domain = domain->priv;
565         struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
566         struct arm_smmu_device *smmu = root_cfg->smmu;
567         void __iomem *cb_base;
568
569         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx);
570         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
571
572         if (!(fsr & FSR_FAULT))
573                 return IRQ_NONE;
574
575         if (fsr & FSR_IGN)
576                 dev_err_ratelimited(smmu->dev,
577                                     "Unexpected context fault (fsr 0x%u)\n",
578                                     fsr);
579
580         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
581         flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
582
583         far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_LO);
584         iova = far;
585 #ifdef CONFIG_64BIT
586         far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_HI);
587         iova |= ((unsigned long)far << 32);
588 #endif
589
590         if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
591                 ret = IRQ_HANDLED;
592                 resume = RESUME_RETRY;
593         } else {
594                 dev_err_ratelimited(smmu->dev,
595                     "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
596                     iova, fsynr, root_cfg->cbndx);
597                 ret = IRQ_NONE;
598                 resume = RESUME_TERMINATE;
599         }
600
601         /* Clear the faulting FSR */
602         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
603
604         /* Retry or terminate any stalled transactions */
605         if (fsr & FSR_SS)
606                 writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
607
608         return ret;
609 }
610
611 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
612 {
613         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
614         struct arm_smmu_device *smmu = dev;
615         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
616
617         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
618         if (!gfsr)
619                 return IRQ_NONE;
620
621         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
622         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
623         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
624
625         dev_err_ratelimited(smmu->dev,
626                 "Unexpected global fault, this could be serious\n");
627         dev_err_ratelimited(smmu->dev,
628                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
629                 gfsr, gfsynr0, gfsynr1, gfsynr2);
630
631         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
632         return IRQ_HANDLED;
633 }
634
635 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
636 {
637         u32 reg;
638         bool stage1;
639         struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
640         struct arm_smmu_device *smmu = root_cfg->smmu;
641         void __iomem *cb_base, *gr0_base, *gr1_base;
642
643         gr0_base = ARM_SMMU_GR0(smmu);
644         gr1_base = ARM_SMMU_GR1(smmu);
645         stage1 = root_cfg->cbar != CBAR_TYPE_S2_TRANS;
646         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx);
647
648         /* CBAR */
649         reg = root_cfg->cbar;
650         if (smmu->version == 1)
651               reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT;
652
653         /* Use the weakest memory type, so it is overridden by the pte */
654         if (stage1)
655                 reg |= (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
656         else
657                 reg |= ARM_SMMU_CB_VMID(root_cfg) << CBAR_VMID_SHIFT;
658         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(root_cfg->cbndx));
659
660         if (smmu->version > 1) {
661                 /* CBA2R */
662 #ifdef CONFIG_64BIT
663                 reg = CBA2R_RW64_64BIT;
664 #else
665                 reg = CBA2R_RW64_32BIT;
666 #endif
667                 writel_relaxed(reg,
668                                gr1_base + ARM_SMMU_GR1_CBA2R(root_cfg->cbndx));
669
670                 /* TTBCR2 */
671                 switch (smmu->input_size) {
672                 case 32:
673                         reg = (TTBCR2_ADDR_32 << TTBCR2_SEP_SHIFT);
674                         break;
675                 case 36:
676                         reg = (TTBCR2_ADDR_36 << TTBCR2_SEP_SHIFT);
677                         break;
678                 case 39:
679                         reg = (TTBCR2_ADDR_40 << TTBCR2_SEP_SHIFT);
680                         break;
681                 case 42:
682                         reg = (TTBCR2_ADDR_42 << TTBCR2_SEP_SHIFT);
683                         break;
684                 case 44:
685                         reg = (TTBCR2_ADDR_44 << TTBCR2_SEP_SHIFT);
686                         break;
687                 case 48:
688                         reg = (TTBCR2_ADDR_48 << TTBCR2_SEP_SHIFT);
689                         break;
690                 }
691
692                 switch (smmu->s1_output_size) {
693                 case 32:
694                         reg |= (TTBCR2_ADDR_32 << TTBCR2_PASIZE_SHIFT);
695                         break;
696                 case 36:
697                         reg |= (TTBCR2_ADDR_36 << TTBCR2_PASIZE_SHIFT);
698                         break;
699                 case 39:
700                         reg |= (TTBCR2_ADDR_40 << TTBCR2_PASIZE_SHIFT);
701                         break;
702                 case 42:
703                         reg |= (TTBCR2_ADDR_42 << TTBCR2_PASIZE_SHIFT);
704                         break;
705                 case 44:
706                         reg |= (TTBCR2_ADDR_44 << TTBCR2_PASIZE_SHIFT);
707                         break;
708                 case 48:
709                         reg |= (TTBCR2_ADDR_48 << TTBCR2_PASIZE_SHIFT);
710                         break;
711                 }
712
713                 if (stage1)
714                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
715         }
716
717         /* TTBR0 */
718         reg = __pa(root_cfg->pgd);
719         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
720         reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32;
721         if (stage1)
722                 reg |= ARM_SMMU_CB_ASID(root_cfg) << TTBRn_HI_ASID_SHIFT;
723         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
724
725         /*
726          * TTBCR
727          * We use long descriptor, with inner-shareable WBWA tables in TTBR0.
728          */
729         if (smmu->version > 1) {
730                 if (PAGE_SIZE == SZ_4K)
731                         reg = TTBCR_TG0_4K;
732                 else
733                         reg = TTBCR_TG0_64K;
734
735                 if (!stage1) {
736                         switch (smmu->s2_output_size) {
737                         case 32:
738                                 reg |= (TTBCR2_ADDR_32 << TTBCR_PASIZE_SHIFT);
739                                 break;
740                         case 36:
741                                 reg |= (TTBCR2_ADDR_36 << TTBCR_PASIZE_SHIFT);
742                                 break;
743                         case 40:
744                                 reg |= (TTBCR2_ADDR_40 << TTBCR_PASIZE_SHIFT);
745                                 break;
746                         case 42:
747                                 reg |= (TTBCR2_ADDR_42 << TTBCR_PASIZE_SHIFT);
748                                 break;
749                         case 44:
750                                 reg |= (TTBCR2_ADDR_44 << TTBCR_PASIZE_SHIFT);
751                                 break;
752                         case 48:
753                                 reg |= (TTBCR2_ADDR_48 << TTBCR_PASIZE_SHIFT);
754                                 break;
755                         }
756                 } else {
757                         reg |= (64 - smmu->s1_output_size) << TTBCR_T0SZ_SHIFT;
758                 }
759         } else {
760                 reg = 0;
761         }
762
763         reg |= TTBCR_EAE |
764               (TTBCR_SH_IS << TTBCR_SH0_SHIFT) |
765               (TTBCR_RGN_WBWA << TTBCR_ORGN0_SHIFT) |
766               (TTBCR_RGN_WBWA << TTBCR_IRGN0_SHIFT) |
767               (TTBCR_SL0_LVL_1 << TTBCR_SL0_SHIFT);
768         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
769
770         /* MAIR0 (stage-1 only) */
771         if (stage1) {
772                 reg = (MAIR_ATTR_NC << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_NC)) |
773                       (MAIR_ATTR_WBRWA << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_CACHE)) |
774                       (MAIR_ATTR_DEVICE << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_DEV));
775                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
776         }
777
778         /* SCTLR */
779         reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
780         if (stage1)
781                 reg |= SCTLR_S1_ASIDPNE;
782 #ifdef __BIG_ENDIAN
783         reg |= SCTLR_E;
784 #endif
785         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
786 }
787
788 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
789                                         struct device *dev)
790 {
791         int irq, ret, start;
792         struct arm_smmu_domain *smmu_domain = domain->priv;
793         struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
794         struct arm_smmu_device *smmu, *parent;
795
796         /*
797          * Walk the SMMU chain to find the root device for this chain.
798          * We assume that no masters have translations which terminate
799          * early, and therefore check that the root SMMU does indeed have
800          * a StreamID for the master in question.
801          */
802         parent = dev->archdata.iommu;
803         smmu_domain->output_mask = -1;
804         do {
805                 smmu = parent;
806                 smmu_domain->output_mask &= (1ULL << smmu->s2_output_size) - 1;
807         } while ((parent = find_parent_smmu(smmu)));
808
809         if (!find_smmu_master(smmu, dev->of_node)) {
810                 dev_err(dev, "unable to find root SMMU for device\n");
811                 return -ENODEV;
812         }
813
814         if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) {
815                 /*
816                  * We will likely want to change this if/when KVM gets
817                  * involved.
818                  */
819                 root_cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
820                 start = smmu->num_s2_context_banks;
821         } else if (smmu->features & ARM_SMMU_FEAT_TRANS_S2) {
822                 root_cfg->cbar = CBAR_TYPE_S2_TRANS;
823                 start = 0;
824         } else {
825                 root_cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
826                 start = smmu->num_s2_context_banks;
827         }
828
829         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
830                                       smmu->num_context_banks);
831         if (IS_ERR_VALUE(ret))
832                 return ret;
833
834         root_cfg->cbndx = ret;
835         if (smmu->version == 1) {
836                 root_cfg->irptndx = atomic_inc_return(&smmu->irptndx);
837                 root_cfg->irptndx %= smmu->num_context_irqs;
838         } else {
839                 root_cfg->irptndx = root_cfg->cbndx;
840         }
841
842         irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx];
843         ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
844                           "arm-smmu-context-fault", domain);
845         if (IS_ERR_VALUE(ret)) {
846                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
847                         root_cfg->irptndx, irq);
848                 root_cfg->irptndx = INVALID_IRPTNDX;
849                 goto out_free_context;
850         }
851
852         root_cfg->smmu = smmu;
853         arm_smmu_init_context_bank(smmu_domain);
854         return ret;
855
856 out_free_context:
857         __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx);
858         return ret;
859 }
860
861 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
862 {
863         struct arm_smmu_domain *smmu_domain = domain->priv;
864         struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
865         struct arm_smmu_device *smmu = root_cfg->smmu;
866         void __iomem *cb_base;
867         int irq;
868
869         if (!smmu)
870                 return;
871
872         /* Disable the context bank and nuke the TLB before freeing it. */
873         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx);
874         writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
875         arm_smmu_tlb_inv_context(root_cfg);
876
877         if (root_cfg->irptndx != INVALID_IRPTNDX) {
878                 irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx];
879                 free_irq(irq, domain);
880         }
881
882         __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx);
883 }
884
885 static int arm_smmu_domain_init(struct iommu_domain *domain)
886 {
887         struct arm_smmu_domain *smmu_domain;
888         pgd_t *pgd;
889
890         /*
891          * Allocate the domain and initialise some of its data structures.
892          * We can't really do anything meaningful until we've added a
893          * master.
894          */
895         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
896         if (!smmu_domain)
897                 return -ENOMEM;
898
899         pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
900         if (!pgd)
901                 goto out_free_domain;
902         smmu_domain->root_cfg.pgd = pgd;
903
904         mutex_init(&smmu_domain->lock);
905         domain->priv = smmu_domain;
906         return 0;
907
908 out_free_domain:
909         kfree(smmu_domain);
910         return -ENOMEM;
911 }
912
913 static void arm_smmu_free_ptes(pmd_t *pmd)
914 {
915         pgtable_t table = pmd_pgtable(*pmd);
916         pgtable_page_dtor(table);
917         __free_page(table);
918 }
919
920 static void arm_smmu_free_pmds(pud_t *pud)
921 {
922         int i;
923         pmd_t *pmd, *pmd_base = pmd_offset(pud, 0);
924
925         pmd = pmd_base;
926         for (i = 0; i < PTRS_PER_PMD; ++i) {
927                 if (pmd_none(*pmd))
928                         continue;
929
930                 arm_smmu_free_ptes(pmd);
931                 pmd++;
932         }
933
934         pmd_free(NULL, pmd_base);
935 }
936
937 static void arm_smmu_free_puds(pgd_t *pgd)
938 {
939         int i;
940         pud_t *pud, *pud_base = pud_offset(pgd, 0);
941
942         pud = pud_base;
943         for (i = 0; i < PTRS_PER_PUD; ++i) {
944                 if (pud_none(*pud))
945                         continue;
946
947                 arm_smmu_free_pmds(pud);
948                 pud++;
949         }
950
951         pud_free(NULL, pud_base);
952 }
953
954 static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain)
955 {
956         int i;
957         struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
958         pgd_t *pgd, *pgd_base = root_cfg->pgd;
959
960         /*
961          * Recursively free the page tables for this domain. We don't
962          * care about speculative TLB filling, because the TLB will be
963          * nuked next time this context bank is re-allocated and no devices
964          * currently map to these tables.
965          */
966         pgd = pgd_base;
967         for (i = 0; i < PTRS_PER_PGD; ++i) {
968                 if (pgd_none(*pgd))
969                         continue;
970                 arm_smmu_free_puds(pgd);
971                 pgd++;
972         }
973
974         kfree(pgd_base);
975 }
976
977 static void arm_smmu_domain_destroy(struct iommu_domain *domain)
978 {
979         struct arm_smmu_domain *smmu_domain = domain->priv;
980
981         /*
982          * Free the domain resources. We assume that all devices have
983          * already been detached.
984          */
985         arm_smmu_destroy_domain_context(domain);
986         arm_smmu_free_pgtables(smmu_domain);
987         kfree(smmu_domain);
988 }
989
990 static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
991                                           struct arm_smmu_master *master)
992 {
993         int i;
994         struct arm_smmu_smr *smrs;
995         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
996
997         if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH))
998                 return 0;
999
1000         if (master->smrs)
1001                 return -EEXIST;
1002
1003         smrs = kmalloc(sizeof(*smrs) * master->num_streamids, GFP_KERNEL);
1004         if (!smrs) {
1005                 dev_err(smmu->dev, "failed to allocate %d SMRs for master %s\n",
1006                         master->num_streamids, master->of_node->name);
1007                 return -ENOMEM;
1008         }
1009
1010         /* Allocate the SMRs on the root SMMU */
1011         for (i = 0; i < master->num_streamids; ++i) {
1012                 int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
1013                                                   smmu->num_mapping_groups);
1014                 if (IS_ERR_VALUE(idx)) {
1015                         dev_err(smmu->dev, "failed to allocate free SMR\n");
1016                         goto err_free_smrs;
1017                 }
1018
1019                 smrs[i] = (struct arm_smmu_smr) {
1020                         .idx    = idx,
1021                         .mask   = 0, /* We don't currently share SMRs */
1022                         .id     = master->streamids[i],
1023                 };
1024         }
1025
1026         /* It worked! Now, poke the actual hardware */
1027         for (i = 0; i < master->num_streamids; ++i) {
1028                 u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
1029                           smrs[i].mask << SMR_MASK_SHIFT;
1030                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
1031         }
1032
1033         master->smrs = smrs;
1034         return 0;
1035
1036 err_free_smrs:
1037         while (--i >= 0)
1038                 __arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
1039         kfree(smrs);
1040         return -ENOSPC;
1041 }
1042
1043 static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
1044                                       struct arm_smmu_master *master)
1045 {
1046         int i;
1047         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1048         struct arm_smmu_smr *smrs = master->smrs;
1049
1050         /* Invalidate the SMRs before freeing back to the allocator */
1051         for (i = 0; i < master->num_streamids; ++i) {
1052                 u8 idx = smrs[i].idx;
1053                 writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
1054                 __arm_smmu_free_bitmap(smmu->smr_map, idx);
1055         }
1056
1057         master->smrs = NULL;
1058         kfree(smrs);
1059 }
1060
1061 static void arm_smmu_bypass_stream_mapping(struct arm_smmu_device *smmu,
1062                                            struct arm_smmu_master *master)
1063 {
1064         int i;
1065         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1066
1067         for (i = 0; i < master->num_streamids; ++i) {
1068                 u16 sid = master->streamids[i];
1069                 writel_relaxed(S2CR_TYPE_BYPASS,
1070                                gr0_base + ARM_SMMU_GR0_S2CR(sid));
1071         }
1072 }
1073
1074 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1075                                       struct arm_smmu_master *master)
1076 {
1077         int i, ret;
1078         struct arm_smmu_device *parent, *smmu = smmu_domain->root_cfg.smmu;
1079         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1080
1081         ret = arm_smmu_master_configure_smrs(smmu, master);
1082         if (ret)
1083                 return ret;
1084
1085         /* Bypass the leaves */
1086         smmu = smmu_domain->leaf_smmu;
1087         while ((parent = find_parent_smmu(smmu))) {
1088                 /*
1089                  * We won't have a StreamID match for anything but the root
1090                  * smmu, so we only need to worry about StreamID indexing,
1091                  * where we must install bypass entries in the S2CRs.
1092                  */
1093                 if (smmu->features & ARM_SMMU_FEAT_STREAM_MATCH)
1094                         continue;
1095
1096                 arm_smmu_bypass_stream_mapping(smmu, master);
1097                 smmu = parent;
1098         }
1099
1100         /* Now we're at the root, time to point at our context bank */
1101         for (i = 0; i < master->num_streamids; ++i) {
1102                 u32 idx, s2cr;
1103                 idx = master->smrs ? master->smrs[i].idx : master->streamids[i];
1104                 s2cr = (S2CR_TYPE_TRANS << S2CR_TYPE_SHIFT) |
1105                        (smmu_domain->root_cfg.cbndx << S2CR_CBNDX_SHIFT);
1106                 writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
1107         }
1108
1109         return 0;
1110 }
1111
1112 static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
1113                                           struct arm_smmu_master *master)
1114 {
1115         struct arm_smmu_device *smmu = smmu_domain->root_cfg.smmu;
1116
1117         /*
1118          * We *must* clear the S2CR first, because freeing the SMR means
1119          * that it can be re-allocated immediately.
1120          */
1121         arm_smmu_bypass_stream_mapping(smmu, master);
1122         arm_smmu_master_free_smrs(smmu, master);
1123 }
1124
1125 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1126 {
1127         int ret = -EINVAL;
1128         struct arm_smmu_domain *smmu_domain = domain->priv;
1129         struct arm_smmu_device *device_smmu = dev->archdata.iommu;
1130         struct arm_smmu_master *master;
1131
1132         if (!device_smmu) {
1133                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1134                 return -ENXIO;
1135         }
1136
1137         /*
1138          * Sanity check the domain. We don't currently support domains
1139          * that cross between different SMMU chains.
1140          */
1141         mutex_lock(&smmu_domain->lock);
1142         if (!smmu_domain->leaf_smmu) {
1143                 /* Now that we have a master, we can finalise the domain */
1144                 ret = arm_smmu_init_domain_context(domain, dev);
1145                 if (IS_ERR_VALUE(ret))
1146                         goto err_unlock;
1147
1148                 smmu_domain->leaf_smmu = device_smmu;
1149         } else if (smmu_domain->leaf_smmu != device_smmu) {
1150                 dev_err(dev,
1151                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1152                         dev_name(smmu_domain->leaf_smmu->dev),
1153                         dev_name(device_smmu->dev));
1154                 goto err_unlock;
1155         }
1156         mutex_unlock(&smmu_domain->lock);
1157
1158         /* Looks ok, so add the device to the domain */
1159         master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node);
1160         if (!master)
1161                 return -ENODEV;
1162
1163         return arm_smmu_domain_add_master(smmu_domain, master);
1164
1165 err_unlock:
1166         mutex_unlock(&smmu_domain->lock);
1167         return ret;
1168 }
1169
1170 static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
1171 {
1172         struct arm_smmu_domain *smmu_domain = domain->priv;
1173         struct arm_smmu_master *master;
1174
1175         master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node);
1176         if (master)
1177                 arm_smmu_domain_remove_master(smmu_domain, master);
1178 }
1179
1180 static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr,
1181                                    size_t size)
1182 {
1183         unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
1184
1185         /*
1186          * If the SMMU can't walk tables in the CPU caches, treat them
1187          * like non-coherent DMA since we need to flush the new entries
1188          * all the way out to memory. There's no possibility of recursion
1189          * here as the SMMU table walker will not be wired through another
1190          * SMMU.
1191          */
1192         if (!(smmu->features & ARM_SMMU_FEAT_COHERENT_WALK))
1193                 dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
1194                              DMA_TO_DEVICE);
1195 }
1196
1197 static bool arm_smmu_pte_is_contiguous_range(unsigned long addr,
1198                                              unsigned long end)
1199 {
1200         return !(addr & ~ARM_SMMU_PTE_CONT_MASK) &&
1201                 (addr + ARM_SMMU_PTE_CONT_SIZE <= end);
1202 }
1203
1204 static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,
1205                                    unsigned long addr, unsigned long end,
1206                                    unsigned long pfn, int flags, int stage)
1207 {
1208         pte_t *pte, *start;
1209         pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF | ARM_SMMU_PTE_XN;
1210
1211         if (pmd_none(*pmd)) {
1212                 /* Allocate a new set of tables */
1213                 pgtable_t table = alloc_page(PGALLOC_GFP);
1214                 if (!table)
1215                         return -ENOMEM;
1216
1217                 arm_smmu_flush_pgtable(smmu, page_address(table),
1218                                        ARM_SMMU_PTE_HWTABLE_SIZE);
1219                 if (!pgtable_page_ctor(table)) {
1220                         __free_page(table);
1221                         return -ENOMEM;
1222                 }
1223                 pmd_populate(NULL, pmd, table);
1224                 arm_smmu_flush_pgtable(smmu, pmd, sizeof(*pmd));
1225         }
1226
1227         if (stage == 1) {
1228                 pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG;
1229                 if (!(flags & IOMMU_WRITE) && (flags & IOMMU_READ))
1230                         pteval |= ARM_SMMU_PTE_AP_RDONLY;
1231
1232                 if (flags & IOMMU_CACHE)
1233                         pteval |= (MAIR_ATTR_IDX_CACHE <<
1234                                    ARM_SMMU_PTE_ATTRINDX_SHIFT);
1235         } else {
1236                 pteval |= ARM_SMMU_PTE_HAP_FAULT;
1237                 if (flags & IOMMU_READ)
1238                         pteval |= ARM_SMMU_PTE_HAP_READ;
1239                 if (flags & IOMMU_WRITE)
1240                         pteval |= ARM_SMMU_PTE_HAP_WRITE;
1241                 if (flags & IOMMU_CACHE)
1242                         pteval |= ARM_SMMU_PTE_MEMATTR_OIWB;
1243                 else
1244                         pteval |= ARM_SMMU_PTE_MEMATTR_NC;
1245         }
1246
1247         /* If no access, create a faulting entry to avoid TLB fills */
1248         if (flags & IOMMU_EXEC)
1249                 pteval &= ~ARM_SMMU_PTE_XN;
1250         else if (!(flags & (IOMMU_READ | IOMMU_WRITE)))
1251                 pteval &= ~ARM_SMMU_PTE_PAGE;
1252
1253         pteval |= ARM_SMMU_PTE_SH_IS;
1254         start = pmd_page_vaddr(*pmd) + pte_index(addr);
1255         pte = start;
1256
1257         /*
1258          * Install the page table entries. This is fairly complicated
1259          * since we attempt to make use of the contiguous hint in the
1260          * ptes where possible. The contiguous hint indicates a series
1261          * of ARM_SMMU_PTE_CONT_ENTRIES ptes mapping a physically
1262          * contiguous region with the following constraints:
1263          *
1264          *   - The region start is aligned to ARM_SMMU_PTE_CONT_SIZE
1265          *   - Each pte in the region has the contiguous hint bit set
1266          *
1267          * This complicates unmapping (also handled by this code, when
1268          * neither IOMMU_READ or IOMMU_WRITE are set) because it is
1269          * possible, yet highly unlikely, that a client may unmap only
1270          * part of a contiguous range. This requires clearing of the
1271          * contiguous hint bits in the range before installing the new
1272          * faulting entries.
1273          *
1274          * Note that re-mapping an address range without first unmapping
1275          * it is not supported, so TLB invalidation is not required here
1276          * and is instead performed at unmap and domain-init time.
1277          */
1278         do {
1279                 int i = 1;
1280                 pteval &= ~ARM_SMMU_PTE_CONT;
1281
1282                 if (arm_smmu_pte_is_contiguous_range(addr, end)) {
1283                         i = ARM_SMMU_PTE_CONT_ENTRIES;
1284                         pteval |= ARM_SMMU_PTE_CONT;
1285                 } else if (pte_val(*pte) &
1286                            (ARM_SMMU_PTE_CONT | ARM_SMMU_PTE_PAGE)) {
1287                         int j;
1288                         pte_t *cont_start;
1289                         unsigned long idx = pte_index(addr);
1290
1291                         idx &= ~(ARM_SMMU_PTE_CONT_ENTRIES - 1);
1292                         cont_start = pmd_page_vaddr(*pmd) + idx;
1293                         for (j = 0; j < ARM_SMMU_PTE_CONT_ENTRIES; ++j)
1294                                 pte_val(*(cont_start + j)) &= ~ARM_SMMU_PTE_CONT;
1295
1296                         arm_smmu_flush_pgtable(smmu, cont_start,
1297                                                sizeof(*pte) *
1298                                                ARM_SMMU_PTE_CONT_ENTRIES);
1299                 }
1300
1301                 do {
1302                         *pte = pfn_pte(pfn, __pgprot(pteval));
1303                 } while (pte++, pfn++, addr += PAGE_SIZE, --i);
1304         } while (addr != end);
1305
1306         arm_smmu_flush_pgtable(smmu, start, sizeof(*pte) * (pte - start));
1307         return 0;
1308 }
1309
1310 static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud,
1311                                    unsigned long addr, unsigned long end,
1312                                    phys_addr_t phys, int flags, int stage)
1313 {
1314         int ret;
1315         pmd_t *pmd;
1316         unsigned long next, pfn = __phys_to_pfn(phys);
1317
1318 #ifndef __PAGETABLE_PMD_FOLDED
1319         if (pud_none(*pud)) {
1320                 pmd = pmd_alloc_one(NULL, addr);
1321                 if (!pmd)
1322                         return -ENOMEM;
1323         } else
1324 #endif
1325                 pmd = pmd_offset(pud, addr);
1326
1327         do {
1328                 next = pmd_addr_end(addr, end);
1329                 ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, end, pfn,
1330                                               flags, stage);
1331                 pud_populate(NULL, pud, pmd);
1332                 arm_smmu_flush_pgtable(smmu, pud, sizeof(*pud));
1333                 phys += next - addr;
1334         } while (pmd++, addr = next, addr < end);
1335
1336         return ret;
1337 }
1338
1339 static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd,
1340                                    unsigned long addr, unsigned long end,
1341                                    phys_addr_t phys, int flags, int stage)
1342 {
1343         int ret = 0;
1344         pud_t *pud;
1345         unsigned long next;
1346
1347 #ifndef __PAGETABLE_PUD_FOLDED
1348         if (pgd_none(*pgd)) {
1349                 pud = pud_alloc_one(NULL, addr);
1350                 if (!pud)
1351                         return -ENOMEM;
1352         } else
1353 #endif
1354                 pud = pud_offset(pgd, addr);
1355
1356         do {
1357                 next = pud_addr_end(addr, end);
1358                 ret = arm_smmu_alloc_init_pmd(smmu, pud, addr, next, phys,
1359                                               flags, stage);
1360                 pgd_populate(NULL, pud, pgd);
1361                 arm_smmu_flush_pgtable(smmu, pgd, sizeof(*pgd));
1362                 phys += next - addr;
1363         } while (pud++, addr = next, addr < end);
1364
1365         return ret;
1366 }
1367
1368 static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,
1369                                    unsigned long iova, phys_addr_t paddr,
1370                                    size_t size, int flags)
1371 {
1372         int ret, stage;
1373         unsigned long end;
1374         phys_addr_t input_mask, output_mask;
1375         struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
1376         pgd_t *pgd = root_cfg->pgd;
1377         struct arm_smmu_device *smmu = root_cfg->smmu;
1378
1379         if (root_cfg->cbar == CBAR_TYPE_S2_TRANS) {
1380                 stage = 2;
1381                 output_mask = (1ULL << smmu->s2_output_size) - 1;
1382         } else {
1383                 stage = 1;
1384                 output_mask = (1ULL << smmu->s1_output_size) - 1;
1385         }
1386
1387         if (!pgd)
1388                 return -EINVAL;
1389
1390         if (size & ~PAGE_MASK)
1391                 return -EINVAL;
1392
1393         input_mask = (1ULL << smmu->input_size) - 1;
1394         if ((phys_addr_t)iova & ~input_mask)
1395                 return -ERANGE;
1396
1397         if (paddr & ~output_mask)
1398                 return -ERANGE;
1399
1400         mutex_lock(&smmu_domain->lock);
1401         pgd += pgd_index(iova);
1402         end = iova + size;
1403         do {
1404                 unsigned long next = pgd_addr_end(iova, end);
1405
1406                 ret = arm_smmu_alloc_init_pud(smmu, pgd, iova, next, paddr,
1407                                               flags, stage);
1408                 if (ret)
1409                         goto out_unlock;
1410
1411                 paddr += next - iova;
1412                 iova = next;
1413         } while (pgd++, iova != end);
1414
1415 out_unlock:
1416         mutex_unlock(&smmu_domain->lock);
1417
1418         /* Ensure new page tables are visible to the hardware walker */
1419         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
1420                 dsb();
1421
1422         return ret;
1423 }
1424
1425 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1426                         phys_addr_t paddr, size_t size, int flags)
1427 {
1428         struct arm_smmu_domain *smmu_domain = domain->priv;
1429
1430         if (!smmu_domain)
1431                 return -ENODEV;
1432
1433         /* Check for silent address truncation up the SMMU chain. */
1434         if ((phys_addr_t)iova & ~smmu_domain->output_mask)
1435                 return -ERANGE;
1436
1437         return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, flags);
1438 }
1439
1440 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1441                              size_t size)
1442 {
1443         int ret;
1444         struct arm_smmu_domain *smmu_domain = domain->priv;
1445
1446         ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0);
1447         arm_smmu_tlb_inv_context(&smmu_domain->root_cfg);
1448         return ret ? ret : size;
1449 }
1450
1451 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1452                                          dma_addr_t iova)
1453 {
1454         pgd_t *pgdp, pgd;
1455         pud_t pud;
1456         pmd_t pmd;
1457         pte_t pte;
1458         struct arm_smmu_domain *smmu_domain = domain->priv;
1459         struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
1460
1461         pgdp = root_cfg->pgd;
1462         if (!pgdp)
1463                 return 0;
1464
1465         pgd = *(pgdp + pgd_index(iova));
1466         if (pgd_none(pgd))
1467                 return 0;
1468
1469         pud = *pud_offset(&pgd, iova);
1470         if (pud_none(pud))
1471                 return 0;
1472
1473         pmd = *pmd_offset(&pud, iova);
1474         if (pmd_none(pmd))
1475                 return 0;
1476
1477         pte = *(pmd_page_vaddr(pmd) + pte_index(iova));
1478         if (pte_none(pte))
1479                 return 0;
1480
1481         return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
1482 }
1483
1484 static int arm_smmu_domain_has_cap(struct iommu_domain *domain,
1485                                    unsigned long cap)
1486 {
1487         unsigned long caps = 0;
1488         struct arm_smmu_domain *smmu_domain = domain->priv;
1489
1490         if (smmu_domain->root_cfg.smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
1491                 caps |= IOMMU_CAP_CACHE_COHERENCY;
1492
1493         return !!(cap & caps);
1494 }
1495
1496 static int arm_smmu_add_device(struct device *dev)
1497 {
1498         struct arm_smmu_device *child, *parent, *smmu;
1499         struct arm_smmu_master *master = NULL;
1500         struct iommu_group *group;
1501         int ret;
1502
1503         if (dev->archdata.iommu) {
1504                 dev_warn(dev, "IOMMU driver already assigned to device\n");
1505                 return -EINVAL;
1506         }
1507
1508         spin_lock(&arm_smmu_devices_lock);
1509         list_for_each_entry(parent, &arm_smmu_devices, list) {
1510                 smmu = parent;
1511
1512                 /* Try to find a child of the current SMMU. */
1513                 list_for_each_entry(child, &arm_smmu_devices, list) {
1514                         if (child->parent_of_node == parent->dev->of_node) {
1515                                 /* Does the child sit above our master? */
1516                                 master = find_smmu_master(child, dev->of_node);
1517                                 if (master) {
1518                                         smmu = NULL;
1519                                         break;
1520                                 }
1521                         }
1522                 }
1523
1524                 /* We found some children, so keep searching. */
1525                 if (!smmu) {
1526                         master = NULL;
1527                         continue;
1528                 }
1529
1530                 master = find_smmu_master(smmu, dev->of_node);
1531                 if (master)
1532                         break;
1533         }
1534         spin_unlock(&arm_smmu_devices_lock);
1535
1536         if (!master)
1537                 return -ENODEV;
1538
1539         group = iommu_group_alloc();
1540         if (IS_ERR(group)) {
1541                 dev_err(dev, "Failed to allocate IOMMU group\n");
1542                 return PTR_ERR(group);
1543         }
1544
1545         ret = iommu_group_add_device(group, dev);
1546         iommu_group_put(group);
1547         dev->archdata.iommu = smmu;
1548
1549         return ret;
1550 }
1551
1552 static void arm_smmu_remove_device(struct device *dev)
1553 {
1554         dev->archdata.iommu = NULL;
1555         iommu_group_remove_device(dev);
1556 }
1557
1558 static struct iommu_ops arm_smmu_ops = {
1559         .domain_init    = arm_smmu_domain_init,
1560         .domain_destroy = arm_smmu_domain_destroy,
1561         .attach_dev     = arm_smmu_attach_dev,
1562         .detach_dev     = arm_smmu_detach_dev,
1563         .map            = arm_smmu_map,
1564         .unmap          = arm_smmu_unmap,
1565         .iova_to_phys   = arm_smmu_iova_to_phys,
1566         .domain_has_cap = arm_smmu_domain_has_cap,
1567         .add_device     = arm_smmu_add_device,
1568         .remove_device  = arm_smmu_remove_device,
1569         .pgsize_bitmap  = (SECTION_SIZE |
1570                            ARM_SMMU_PTE_CONT_SIZE |
1571                            PAGE_SIZE),
1572 };
1573
1574 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1575 {
1576         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1577         void __iomem *cb_base;
1578         int i = 0;
1579         u32 reg;
1580
1581         /* Clear Global FSR */
1582         reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
1583         writel(reg, gr0_base + ARM_SMMU_GR0_sGFSR);
1584
1585         /* Mark all SMRn as invalid and all S2CRn as bypass */
1586         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1587                 writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(i));
1588                 writel_relaxed(S2CR_TYPE_BYPASS, gr0_base + ARM_SMMU_GR0_S2CR(i));
1589         }
1590
1591         /* Make sure all context banks are disabled and clear CB_FSR  */
1592         for (i = 0; i < smmu->num_context_banks; ++i) {
1593                 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
1594                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1595                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1596         }
1597
1598         /* Invalidate the TLB, just in case */
1599         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL);
1600         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1601         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1602
1603         reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sCR0);
1604
1605         /* Enable fault reporting */
1606         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1607
1608         /* Disable TLB broadcasting. */
1609         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1610
1611         /* Enable client access, but bypass when no mapping is found */
1612         reg &= ~(sCR0_CLIENTPD | sCR0_USFCFG);
1613
1614         /* Disable forced broadcasting */
1615         reg &= ~sCR0_FB;
1616
1617         /* Don't upgrade barriers */
1618         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1619
1620         /* Push the button */
1621         arm_smmu_tlb_sync(smmu);
1622         writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sCR0);
1623 }
1624
1625 static int arm_smmu_id_size_to_bits(int size)
1626 {
1627         switch (size) {
1628         case 0:
1629                 return 32;
1630         case 1:
1631                 return 36;
1632         case 2:
1633                 return 40;
1634         case 3:
1635                 return 42;
1636         case 4:
1637                 return 44;
1638         case 5:
1639         default:
1640                 return 48;
1641         }
1642 }
1643
1644 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1645 {
1646         unsigned long size;
1647         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1648         u32 id;
1649
1650         dev_notice(smmu->dev, "probing hardware configuration...\n");
1651
1652         /* Primecell ID */
1653         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_PIDR2);
1654         smmu->version = ((id >> PIDR2_ARCH_SHIFT) & PIDR2_ARCH_MASK) + 1;
1655         dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version);
1656
1657         /* ID0 */
1658         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1659 #ifndef CONFIG_64BIT
1660         if (((id >> ID0_PTFS_SHIFT) & ID0_PTFS_MASK) == ID0_PTFS_V8_ONLY) {
1661                 dev_err(smmu->dev, "\tno v7 descriptor support!\n");
1662                 return -ENODEV;
1663         }
1664 #endif
1665         if (id & ID0_S1TS) {
1666                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1667                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1668         }
1669
1670         if (id & ID0_S2TS) {
1671                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1672                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1673         }
1674
1675         if (id & ID0_NTS) {
1676                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1677                 dev_notice(smmu->dev, "\tnested translation\n");
1678         }
1679
1680         if (!(smmu->features &
1681                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2 |
1682                  ARM_SMMU_FEAT_TRANS_NESTED))) {
1683                 dev_err(smmu->dev, "\tno translation support!\n");
1684                 return -ENODEV;
1685         }
1686
1687         if (id & ID0_CTTW) {
1688                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1689                 dev_notice(smmu->dev, "\tcoherent table walk\n");
1690         }
1691
1692         if (id & ID0_SMS) {
1693                 u32 smr, sid, mask;
1694
1695                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1696                 smmu->num_mapping_groups = (id >> ID0_NUMSMRG_SHIFT) &
1697                                            ID0_NUMSMRG_MASK;
1698                 if (smmu->num_mapping_groups == 0) {
1699                         dev_err(smmu->dev,
1700                                 "stream-matching supported, but no SMRs present!\n");
1701                         return -ENODEV;
1702                 }
1703
1704                 smr = SMR_MASK_MASK << SMR_MASK_SHIFT;
1705                 smr |= (SMR_ID_MASK << SMR_ID_SHIFT);
1706                 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1707                 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1708
1709                 mask = (smr >> SMR_MASK_SHIFT) & SMR_MASK_MASK;
1710                 sid = (smr >> SMR_ID_SHIFT) & SMR_ID_MASK;
1711                 if ((mask & sid) != sid) {
1712                         dev_err(smmu->dev,
1713                                 "SMR mask bits (0x%x) insufficient for ID field (0x%x)\n",
1714                                 mask, sid);
1715                         return -ENODEV;
1716                 }
1717
1718                 dev_notice(smmu->dev,
1719                            "\tstream matching with %u register groups, mask 0x%x",
1720                            smmu->num_mapping_groups, mask);
1721         }
1722
1723         /* ID1 */
1724         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1725         smmu->pagesize = (id & ID1_PAGESIZE) ? SZ_64K : SZ_4K;
1726
1727         /* Check for size mismatch of SMMU address space from mapped region */
1728         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1729         size *= (smmu->pagesize << 1);
1730         if (smmu->size != size)
1731                 dev_warn(smmu->dev, "SMMU address space size (0x%lx) differs "
1732                         "from mapped region size (0x%lx)!\n", size, smmu->size);
1733
1734         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) &
1735                                       ID1_NUMS2CB_MASK;
1736         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1737         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1738                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1739                 return -ENODEV;
1740         }
1741         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1742                    smmu->num_context_banks, smmu->num_s2_context_banks);
1743
1744         /* ID2 */
1745         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1746         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1747
1748         /*
1749          * Stage-1 output limited by stage-2 input size due to pgd
1750          * allocation (PTRS_PER_PGD).
1751          */
1752 #ifdef CONFIG_64BIT
1753         smmu->s1_output_size = min(39UL, size);
1754 #else
1755         smmu->s1_output_size = min(32UL, size);
1756 #endif
1757
1758         /* The stage-2 output mask is also applied for bypass */
1759         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1760         smmu->s2_output_size = min((unsigned long)PHYS_MASK_SHIFT, size);
1761
1762         if (smmu->version == 1) {
1763                 smmu->input_size = 32;
1764         } else {
1765 #ifdef CONFIG_64BIT
1766                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1767                 size = min(VA_BITS, arm_smmu_id_size_to_bits(size));
1768 #else
1769                 size = 32;
1770 #endif
1771                 smmu->input_size = size;
1772
1773                 if ((PAGE_SIZE == SZ_4K && !(id & ID2_PTFS_4K)) ||
1774                     (PAGE_SIZE == SZ_64K && !(id & ID2_PTFS_64K)) ||
1775                     (PAGE_SIZE != SZ_4K && PAGE_SIZE != SZ_64K)) {
1776                         dev_err(smmu->dev, "CPU page size 0x%lx unsupported\n",
1777                                 PAGE_SIZE);
1778                         return -ENODEV;
1779                 }
1780         }
1781
1782         dev_notice(smmu->dev,
1783                    "\t%lu-bit VA, %lu-bit IPA, %lu-bit PA\n",
1784                    smmu->input_size, smmu->s1_output_size, smmu->s2_output_size);
1785         return 0;
1786 }
1787
1788 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
1789 {
1790         struct resource *res;
1791         struct arm_smmu_device *smmu;
1792         struct device_node *dev_node;
1793         struct device *dev = &pdev->dev;
1794         struct rb_node *node;
1795         struct of_phandle_args masterspec;
1796         int num_irqs, i, err;
1797
1798         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
1799         if (!smmu) {
1800                 dev_err(dev, "failed to allocate arm_smmu_device\n");
1801                 return -ENOMEM;
1802         }
1803         smmu->dev = dev;
1804
1805         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1806         smmu->base = devm_ioremap_resource(dev, res);
1807         if (IS_ERR(smmu->base))
1808                 return PTR_ERR(smmu->base);
1809         smmu->size = resource_size(res);
1810
1811         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1812                                  &smmu->num_global_irqs)) {
1813                 dev_err(dev, "missing #global-interrupts property\n");
1814                 return -ENODEV;
1815         }
1816
1817         num_irqs = 0;
1818         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
1819                 num_irqs++;
1820                 if (num_irqs > smmu->num_global_irqs)
1821                         smmu->num_context_irqs++;
1822         }
1823
1824         if (!smmu->num_context_irqs) {
1825                 dev_err(dev, "found %d interrupts but expected at least %d\n",
1826                         num_irqs, smmu->num_global_irqs + 1);
1827                 return -ENODEV;
1828         }
1829
1830         smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
1831                                   GFP_KERNEL);
1832         if (!smmu->irqs) {
1833                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
1834                 return -ENOMEM;
1835         }
1836
1837         for (i = 0; i < num_irqs; ++i) {
1838                 int irq = platform_get_irq(pdev, i);
1839                 if (irq < 0) {
1840                         dev_err(dev, "failed to get irq index %d\n", i);
1841                         return -ENODEV;
1842                 }
1843                 smmu->irqs[i] = irq;
1844         }
1845
1846         i = 0;
1847         smmu->masters = RB_ROOT;
1848         while (!of_parse_phandle_with_args(dev->of_node, "mmu-masters",
1849                                            "#stream-id-cells", i,
1850                                            &masterspec)) {
1851                 err = register_smmu_master(smmu, dev, &masterspec);
1852                 if (err) {
1853                         dev_err(dev, "failed to add master %s\n",
1854                                 masterspec.np->name);
1855                         goto out_put_masters;
1856                 }
1857
1858                 i++;
1859         }
1860         dev_notice(dev, "registered %d master devices\n", i);
1861
1862         if ((dev_node = of_parse_phandle(dev->of_node, "smmu-parent", 0)))
1863                 smmu->parent_of_node = dev_node;
1864
1865         err = arm_smmu_device_cfg_probe(smmu);
1866         if (err)
1867                 goto out_put_parent;
1868
1869         if (smmu->version > 1 &&
1870             smmu->num_context_banks != smmu->num_context_irqs) {
1871                 dev_err(dev,
1872                         "found only %d context interrupt(s) but %d required\n",
1873                         smmu->num_context_irqs, smmu->num_context_banks);
1874                 err = -ENODEV;
1875                 goto out_put_parent;
1876         }
1877
1878         for (i = 0; i < smmu->num_global_irqs; ++i) {
1879                 err = request_irq(smmu->irqs[i],
1880                                   arm_smmu_global_fault,
1881                                   IRQF_SHARED,
1882                                   "arm-smmu global fault",
1883                                   smmu);
1884                 if (err) {
1885                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
1886                                 i, smmu->irqs[i]);
1887                         goto out_free_irqs;
1888                 }
1889         }
1890
1891         INIT_LIST_HEAD(&smmu->list);
1892         spin_lock(&arm_smmu_devices_lock);
1893         list_add(&smmu->list, &arm_smmu_devices);
1894         spin_unlock(&arm_smmu_devices_lock);
1895
1896         arm_smmu_device_reset(smmu);
1897         return 0;
1898
1899 out_free_irqs:
1900         while (i--)
1901                 free_irq(smmu->irqs[i], smmu);
1902
1903 out_put_parent:
1904         if (smmu->parent_of_node)
1905                 of_node_put(smmu->parent_of_node);
1906
1907 out_put_masters:
1908         for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
1909                 struct arm_smmu_master *master;
1910                 master = container_of(node, struct arm_smmu_master, node);
1911                 of_node_put(master->of_node);
1912         }
1913
1914         return err;
1915 }
1916
1917 static int arm_smmu_device_remove(struct platform_device *pdev)
1918 {
1919         int i;
1920         struct device *dev = &pdev->dev;
1921         struct arm_smmu_device *curr, *smmu = NULL;
1922         struct rb_node *node;
1923
1924         spin_lock(&arm_smmu_devices_lock);
1925         list_for_each_entry(curr, &arm_smmu_devices, list) {
1926                 if (curr->dev == dev) {
1927                         smmu = curr;
1928                         list_del(&smmu->list);
1929                         break;
1930                 }
1931         }
1932         spin_unlock(&arm_smmu_devices_lock);
1933
1934         if (!smmu)
1935                 return -ENODEV;
1936
1937         if (smmu->parent_of_node)
1938                 of_node_put(smmu->parent_of_node);
1939
1940         for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
1941                 struct arm_smmu_master *master;
1942                 master = container_of(node, struct arm_smmu_master, node);
1943                 of_node_put(master->of_node);
1944         }
1945
1946         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
1947                 dev_err(dev, "removing device with active domains!\n");
1948
1949         for (i = 0; i < smmu->num_global_irqs; ++i)
1950                 free_irq(smmu->irqs[i], smmu);
1951
1952         /* Turn the thing off */
1953         writel_relaxed(sCR0_CLIENTPD, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_sCR0);
1954         return 0;
1955 }
1956
1957 #ifdef CONFIG_OF
1958 static struct of_device_id arm_smmu_of_match[] = {
1959         { .compatible = "arm,smmu-v1", },
1960         { .compatible = "arm,smmu-v2", },
1961         { .compatible = "arm,mmu-400", },
1962         { .compatible = "arm,mmu-500", },
1963         { },
1964 };
1965 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1966 #endif
1967
1968 static struct platform_driver arm_smmu_driver = {
1969         .driver = {
1970                 .owner          = THIS_MODULE,
1971                 .name           = "arm-smmu",
1972                 .of_match_table = of_match_ptr(arm_smmu_of_match),
1973         },
1974         .probe  = arm_smmu_device_dt_probe,
1975         .remove = arm_smmu_device_remove,
1976 };
1977
1978 static int __init arm_smmu_init(void)
1979 {
1980         int ret;
1981
1982         ret = platform_driver_register(&arm_smmu_driver);
1983         if (ret)
1984                 return ret;
1985
1986         /* Oh, for a proper bus abstraction */
1987         if (!iommu_present(&platform_bus_type))
1988                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
1989
1990         if (!iommu_present(&amba_bustype))
1991                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
1992
1993         return 0;
1994 }
1995
1996 static void __exit arm_smmu_exit(void)
1997 {
1998         return platform_driver_unregister(&arm_smmu_driver);
1999 }
2000
2001 subsys_initcall(arm_smmu_init);
2002 module_exit(arm_smmu_exit);
2003
2004 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2005 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2006 MODULE_LICENSE("GPL v2");