m68k: Migrate exception table users off module.h and onto extable.h
[platform/kernel/linux-exynos.git] / drivers / iommu / arm-smmu-v3.c
1 /*
2  * IOMMU API for ARM architected SMMUv3 implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) 2015 ARM Limited
17  *
18  * Author: Will Deacon <will.deacon@arm.com>
19  *
20  * This driver is powered by bad coffee and bombay mix.
21  */
22
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/iommu.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/msi.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_platform.h>
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36
37 #include "io-pgtable.h"
38
39 /* MMIO registers */
40 #define ARM_SMMU_IDR0                   0x0
41 #define IDR0_ST_LVL_SHIFT               27
42 #define IDR0_ST_LVL_MASK                0x3
43 #define IDR0_ST_LVL_2LVL                (1 << IDR0_ST_LVL_SHIFT)
44 #define IDR0_STALL_MODEL_SHIFT          24
45 #define IDR0_STALL_MODEL_MASK           0x3
46 #define IDR0_STALL_MODEL_STALL          (0 << IDR0_STALL_MODEL_SHIFT)
47 #define IDR0_STALL_MODEL_FORCE          (2 << IDR0_STALL_MODEL_SHIFT)
48 #define IDR0_TTENDIAN_SHIFT             21
49 #define IDR0_TTENDIAN_MASK              0x3
50 #define IDR0_TTENDIAN_LE                (2 << IDR0_TTENDIAN_SHIFT)
51 #define IDR0_TTENDIAN_BE                (3 << IDR0_TTENDIAN_SHIFT)
52 #define IDR0_TTENDIAN_MIXED             (0 << IDR0_TTENDIAN_SHIFT)
53 #define IDR0_CD2L                       (1 << 19)
54 #define IDR0_VMID16                     (1 << 18)
55 #define IDR0_PRI                        (1 << 16)
56 #define IDR0_SEV                        (1 << 14)
57 #define IDR0_MSI                        (1 << 13)
58 #define IDR0_ASID16                     (1 << 12)
59 #define IDR0_ATS                        (1 << 10)
60 #define IDR0_HYP                        (1 << 9)
61 #define IDR0_COHACC                     (1 << 4)
62 #define IDR0_TTF_SHIFT                  2
63 #define IDR0_TTF_MASK                   0x3
64 #define IDR0_TTF_AARCH64                (2 << IDR0_TTF_SHIFT)
65 #define IDR0_TTF_AARCH32_64             (3 << IDR0_TTF_SHIFT)
66 #define IDR0_S1P                        (1 << 1)
67 #define IDR0_S2P                        (1 << 0)
68
69 #define ARM_SMMU_IDR1                   0x4
70 #define IDR1_TABLES_PRESET              (1 << 30)
71 #define IDR1_QUEUES_PRESET              (1 << 29)
72 #define IDR1_REL                        (1 << 28)
73 #define IDR1_CMDQ_SHIFT                 21
74 #define IDR1_CMDQ_MASK                  0x1f
75 #define IDR1_EVTQ_SHIFT                 16
76 #define IDR1_EVTQ_MASK                  0x1f
77 #define IDR1_PRIQ_SHIFT                 11
78 #define IDR1_PRIQ_MASK                  0x1f
79 #define IDR1_SSID_SHIFT                 6
80 #define IDR1_SSID_MASK                  0x1f
81 #define IDR1_SID_SHIFT                  0
82 #define IDR1_SID_MASK                   0x3f
83
84 #define ARM_SMMU_IDR5                   0x14
85 #define IDR5_STALL_MAX_SHIFT            16
86 #define IDR5_STALL_MAX_MASK             0xffff
87 #define IDR5_GRAN64K                    (1 << 6)
88 #define IDR5_GRAN16K                    (1 << 5)
89 #define IDR5_GRAN4K                     (1 << 4)
90 #define IDR5_OAS_SHIFT                  0
91 #define IDR5_OAS_MASK                   0x7
92 #define IDR5_OAS_32_BIT                 (0 << IDR5_OAS_SHIFT)
93 #define IDR5_OAS_36_BIT                 (1 << IDR5_OAS_SHIFT)
94 #define IDR5_OAS_40_BIT                 (2 << IDR5_OAS_SHIFT)
95 #define IDR5_OAS_42_BIT                 (3 << IDR5_OAS_SHIFT)
96 #define IDR5_OAS_44_BIT                 (4 << IDR5_OAS_SHIFT)
97 #define IDR5_OAS_48_BIT                 (5 << IDR5_OAS_SHIFT)
98
99 #define ARM_SMMU_CR0                    0x20
100 #define CR0_CMDQEN                      (1 << 3)
101 #define CR0_EVTQEN                      (1 << 2)
102 #define CR0_PRIQEN                      (1 << 1)
103 #define CR0_SMMUEN                      (1 << 0)
104
105 #define ARM_SMMU_CR0ACK                 0x24
106
107 #define ARM_SMMU_CR1                    0x28
108 #define CR1_SH_NSH                      0
109 #define CR1_SH_OSH                      2
110 #define CR1_SH_ISH                      3
111 #define CR1_CACHE_NC                    0
112 #define CR1_CACHE_WB                    1
113 #define CR1_CACHE_WT                    2
114 #define CR1_TABLE_SH_SHIFT              10
115 #define CR1_TABLE_OC_SHIFT              8
116 #define CR1_TABLE_IC_SHIFT              6
117 #define CR1_QUEUE_SH_SHIFT              4
118 #define CR1_QUEUE_OC_SHIFT              2
119 #define CR1_QUEUE_IC_SHIFT              0
120
121 #define ARM_SMMU_CR2                    0x2c
122 #define CR2_PTM                         (1 << 2)
123 #define CR2_RECINVSID                   (1 << 1)
124 #define CR2_E2H                         (1 << 0)
125
126 #define ARM_SMMU_IRQ_CTRL               0x50
127 #define IRQ_CTRL_EVTQ_IRQEN             (1 << 2)
128 #define IRQ_CTRL_PRIQ_IRQEN             (1 << 1)
129 #define IRQ_CTRL_GERROR_IRQEN           (1 << 0)
130
131 #define ARM_SMMU_IRQ_CTRLACK            0x54
132
133 #define ARM_SMMU_GERROR                 0x60
134 #define GERROR_SFM_ERR                  (1 << 8)
135 #define GERROR_MSI_GERROR_ABT_ERR       (1 << 7)
136 #define GERROR_MSI_PRIQ_ABT_ERR         (1 << 6)
137 #define GERROR_MSI_EVTQ_ABT_ERR         (1 << 5)
138 #define GERROR_MSI_CMDQ_ABT_ERR         (1 << 4)
139 #define GERROR_PRIQ_ABT_ERR             (1 << 3)
140 #define GERROR_EVTQ_ABT_ERR             (1 << 2)
141 #define GERROR_CMDQ_ERR                 (1 << 0)
142 #define GERROR_ERR_MASK                 0xfd
143
144 #define ARM_SMMU_GERRORN                0x64
145
146 #define ARM_SMMU_GERROR_IRQ_CFG0        0x68
147 #define ARM_SMMU_GERROR_IRQ_CFG1        0x70
148 #define ARM_SMMU_GERROR_IRQ_CFG2        0x74
149
150 #define ARM_SMMU_STRTAB_BASE            0x80
151 #define STRTAB_BASE_RA                  (1UL << 62)
152 #define STRTAB_BASE_ADDR_SHIFT          6
153 #define STRTAB_BASE_ADDR_MASK           0x3ffffffffffUL
154
155 #define ARM_SMMU_STRTAB_BASE_CFG        0x88
156 #define STRTAB_BASE_CFG_LOG2SIZE_SHIFT  0
157 #define STRTAB_BASE_CFG_LOG2SIZE_MASK   0x3f
158 #define STRTAB_BASE_CFG_SPLIT_SHIFT     6
159 #define STRTAB_BASE_CFG_SPLIT_MASK      0x1f
160 #define STRTAB_BASE_CFG_FMT_SHIFT       16
161 #define STRTAB_BASE_CFG_FMT_MASK        0x3
162 #define STRTAB_BASE_CFG_FMT_LINEAR      (0 << STRTAB_BASE_CFG_FMT_SHIFT)
163 #define STRTAB_BASE_CFG_FMT_2LVL        (1 << STRTAB_BASE_CFG_FMT_SHIFT)
164
165 #define ARM_SMMU_CMDQ_BASE              0x90
166 #define ARM_SMMU_CMDQ_PROD              0x98
167 #define ARM_SMMU_CMDQ_CONS              0x9c
168
169 #define ARM_SMMU_EVTQ_BASE              0xa0
170 #define ARM_SMMU_EVTQ_PROD              0x100a8
171 #define ARM_SMMU_EVTQ_CONS              0x100ac
172 #define ARM_SMMU_EVTQ_IRQ_CFG0          0xb0
173 #define ARM_SMMU_EVTQ_IRQ_CFG1          0xb8
174 #define ARM_SMMU_EVTQ_IRQ_CFG2          0xbc
175
176 #define ARM_SMMU_PRIQ_BASE              0xc0
177 #define ARM_SMMU_PRIQ_PROD              0x100c8
178 #define ARM_SMMU_PRIQ_CONS              0x100cc
179 #define ARM_SMMU_PRIQ_IRQ_CFG0          0xd0
180 #define ARM_SMMU_PRIQ_IRQ_CFG1          0xd8
181 #define ARM_SMMU_PRIQ_IRQ_CFG2          0xdc
182
183 /* Common MSI config fields */
184 #define MSI_CFG0_ADDR_SHIFT             2
185 #define MSI_CFG0_ADDR_MASK              0x3fffffffffffUL
186 #define MSI_CFG2_SH_SHIFT               4
187 #define MSI_CFG2_SH_NSH                 (0UL << MSI_CFG2_SH_SHIFT)
188 #define MSI_CFG2_SH_OSH                 (2UL << MSI_CFG2_SH_SHIFT)
189 #define MSI_CFG2_SH_ISH                 (3UL << MSI_CFG2_SH_SHIFT)
190 #define MSI_CFG2_MEMATTR_SHIFT          0
191 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE   (0x1 << MSI_CFG2_MEMATTR_SHIFT)
192
193 #define Q_IDX(q, p)                     ((p) & ((1 << (q)->max_n_shift) - 1))
194 #define Q_WRP(q, p)                     ((p) & (1 << (q)->max_n_shift))
195 #define Q_OVERFLOW_FLAG                 (1 << 31)
196 #define Q_OVF(q, p)                     ((p) & Q_OVERFLOW_FLAG)
197 #define Q_ENT(q, p)                     ((q)->base +                    \
198                                          Q_IDX(q, p) * (q)->ent_dwords)
199
200 #define Q_BASE_RWA                      (1UL << 62)
201 #define Q_BASE_ADDR_SHIFT               5
202 #define Q_BASE_ADDR_MASK                0xfffffffffffUL
203 #define Q_BASE_LOG2SIZE_SHIFT           0
204 #define Q_BASE_LOG2SIZE_MASK            0x1fUL
205
206 /*
207  * Stream table.
208  *
209  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
210  * 2lvl: 128k L1 entries,
211  *       256 lazy entries per table (each table covers a PCI bus)
212  */
213 #define STRTAB_L1_SZ_SHIFT              20
214 #define STRTAB_SPLIT                    8
215
216 #define STRTAB_L1_DESC_DWORDS           1
217 #define STRTAB_L1_DESC_SPAN_SHIFT       0
218 #define STRTAB_L1_DESC_SPAN_MASK        0x1fUL
219 #define STRTAB_L1_DESC_L2PTR_SHIFT      6
220 #define STRTAB_L1_DESC_L2PTR_MASK       0x3ffffffffffUL
221
222 #define STRTAB_STE_DWORDS               8
223 #define STRTAB_STE_0_V                  (1UL << 0)
224 #define STRTAB_STE_0_CFG_SHIFT          1
225 #define STRTAB_STE_0_CFG_MASK           0x7UL
226 #define STRTAB_STE_0_CFG_ABORT          (0UL << STRTAB_STE_0_CFG_SHIFT)
227 #define STRTAB_STE_0_CFG_BYPASS         (4UL << STRTAB_STE_0_CFG_SHIFT)
228 #define STRTAB_STE_0_CFG_S1_TRANS       (5UL << STRTAB_STE_0_CFG_SHIFT)
229 #define STRTAB_STE_0_CFG_S2_TRANS       (6UL << STRTAB_STE_0_CFG_SHIFT)
230
231 #define STRTAB_STE_0_S1FMT_SHIFT        4
232 #define STRTAB_STE_0_S1FMT_LINEAR       (0UL << STRTAB_STE_0_S1FMT_SHIFT)
233 #define STRTAB_STE_0_S1CTXPTR_SHIFT     6
234 #define STRTAB_STE_0_S1CTXPTR_MASK      0x3ffffffffffUL
235 #define STRTAB_STE_0_S1CDMAX_SHIFT      59
236 #define STRTAB_STE_0_S1CDMAX_MASK       0x1fUL
237
238 #define STRTAB_STE_1_S1C_CACHE_NC       0UL
239 #define STRTAB_STE_1_S1C_CACHE_WBRA     1UL
240 #define STRTAB_STE_1_S1C_CACHE_WT       2UL
241 #define STRTAB_STE_1_S1C_CACHE_WB       3UL
242 #define STRTAB_STE_1_S1C_SH_NSH         0UL
243 #define STRTAB_STE_1_S1C_SH_OSH         2UL
244 #define STRTAB_STE_1_S1C_SH_ISH         3UL
245 #define STRTAB_STE_1_S1CIR_SHIFT        2
246 #define STRTAB_STE_1_S1COR_SHIFT        4
247 #define STRTAB_STE_1_S1CSH_SHIFT        6
248
249 #define STRTAB_STE_1_S1STALLD           (1UL << 27)
250
251 #define STRTAB_STE_1_EATS_ABT           0UL
252 #define STRTAB_STE_1_EATS_TRANS         1UL
253 #define STRTAB_STE_1_EATS_S1CHK         2UL
254 #define STRTAB_STE_1_EATS_SHIFT         28
255
256 #define STRTAB_STE_1_STRW_NSEL1         0UL
257 #define STRTAB_STE_1_STRW_EL2           2UL
258 #define STRTAB_STE_1_STRW_SHIFT         30
259
260 #define STRTAB_STE_1_SHCFG_INCOMING     1UL
261 #define STRTAB_STE_1_SHCFG_SHIFT        44
262
263 #define STRTAB_STE_2_S2VMID_SHIFT       0
264 #define STRTAB_STE_2_S2VMID_MASK        0xffffUL
265 #define STRTAB_STE_2_VTCR_SHIFT         32
266 #define STRTAB_STE_2_VTCR_MASK          0x7ffffUL
267 #define STRTAB_STE_2_S2AA64             (1UL << 51)
268 #define STRTAB_STE_2_S2ENDI             (1UL << 52)
269 #define STRTAB_STE_2_S2PTW              (1UL << 54)
270 #define STRTAB_STE_2_S2R                (1UL << 58)
271
272 #define STRTAB_STE_3_S2TTB_SHIFT        4
273 #define STRTAB_STE_3_S2TTB_MASK         0xfffffffffffUL
274
275 /* Context descriptor (stage-1 only) */
276 #define CTXDESC_CD_DWORDS               8
277 #define CTXDESC_CD_0_TCR_T0SZ_SHIFT     0
278 #define ARM64_TCR_T0SZ_SHIFT            0
279 #define ARM64_TCR_T0SZ_MASK             0x1fUL
280 #define CTXDESC_CD_0_TCR_TG0_SHIFT      6
281 #define ARM64_TCR_TG0_SHIFT             14
282 #define ARM64_TCR_TG0_MASK              0x3UL
283 #define CTXDESC_CD_0_TCR_IRGN0_SHIFT    8
284 #define ARM64_TCR_IRGN0_SHIFT           8
285 #define ARM64_TCR_IRGN0_MASK            0x3UL
286 #define CTXDESC_CD_0_TCR_ORGN0_SHIFT    10
287 #define ARM64_TCR_ORGN0_SHIFT           10
288 #define ARM64_TCR_ORGN0_MASK            0x3UL
289 #define CTXDESC_CD_0_TCR_SH0_SHIFT      12
290 #define ARM64_TCR_SH0_SHIFT             12
291 #define ARM64_TCR_SH0_MASK              0x3UL
292 #define CTXDESC_CD_0_TCR_EPD0_SHIFT     14
293 #define ARM64_TCR_EPD0_SHIFT            7
294 #define ARM64_TCR_EPD0_MASK             0x1UL
295 #define CTXDESC_CD_0_TCR_EPD1_SHIFT     30
296 #define ARM64_TCR_EPD1_SHIFT            23
297 #define ARM64_TCR_EPD1_MASK             0x1UL
298
299 #define CTXDESC_CD_0_ENDI               (1UL << 15)
300 #define CTXDESC_CD_0_V                  (1UL << 31)
301
302 #define CTXDESC_CD_0_TCR_IPS_SHIFT      32
303 #define ARM64_TCR_IPS_SHIFT             32
304 #define ARM64_TCR_IPS_MASK              0x7UL
305 #define CTXDESC_CD_0_TCR_TBI0_SHIFT     38
306 #define ARM64_TCR_TBI0_SHIFT            37
307 #define ARM64_TCR_TBI0_MASK             0x1UL
308
309 #define CTXDESC_CD_0_AA64               (1UL << 41)
310 #define CTXDESC_CD_0_R                  (1UL << 45)
311 #define CTXDESC_CD_0_A                  (1UL << 46)
312 #define CTXDESC_CD_0_ASET_SHIFT         47
313 #define CTXDESC_CD_0_ASET_SHARED        (0UL << CTXDESC_CD_0_ASET_SHIFT)
314 #define CTXDESC_CD_0_ASET_PRIVATE       (1UL << CTXDESC_CD_0_ASET_SHIFT)
315 #define CTXDESC_CD_0_ASID_SHIFT         48
316 #define CTXDESC_CD_0_ASID_MASK          0xffffUL
317
318 #define CTXDESC_CD_1_TTB0_SHIFT         4
319 #define CTXDESC_CD_1_TTB0_MASK          0xfffffffffffUL
320
321 #define CTXDESC_CD_3_MAIR_SHIFT         0
322
323 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
324 #define ARM_SMMU_TCR2CD(tcr, fld)                                       \
325         (((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)    \
326          << CTXDESC_CD_0_TCR_##fld##_SHIFT)
327
328 /* Command queue */
329 #define CMDQ_ENT_DWORDS                 2
330 #define CMDQ_MAX_SZ_SHIFT               8
331
332 #define CMDQ_ERR_SHIFT                  24
333 #define CMDQ_ERR_MASK                   0x7f
334 #define CMDQ_ERR_CERROR_NONE_IDX        0
335 #define CMDQ_ERR_CERROR_ILL_IDX         1
336 #define CMDQ_ERR_CERROR_ABT_IDX         2
337
338 #define CMDQ_0_OP_SHIFT                 0
339 #define CMDQ_0_OP_MASK                  0xffUL
340 #define CMDQ_0_SSV                      (1UL << 11)
341
342 #define CMDQ_PREFETCH_0_SID_SHIFT       32
343 #define CMDQ_PREFETCH_1_SIZE_SHIFT      0
344 #define CMDQ_PREFETCH_1_ADDR_MASK       ~0xfffUL
345
346 #define CMDQ_CFGI_0_SID_SHIFT           32
347 #define CMDQ_CFGI_0_SID_MASK            0xffffffffUL
348 #define CMDQ_CFGI_1_LEAF                (1UL << 0)
349 #define CMDQ_CFGI_1_RANGE_SHIFT         0
350 #define CMDQ_CFGI_1_RANGE_MASK          0x1fUL
351
352 #define CMDQ_TLBI_0_VMID_SHIFT          32
353 #define CMDQ_TLBI_0_ASID_SHIFT          48
354 #define CMDQ_TLBI_1_LEAF                (1UL << 0)
355 #define CMDQ_TLBI_1_VA_MASK             ~0xfffUL
356 #define CMDQ_TLBI_1_IPA_MASK            0xfffffffff000UL
357
358 #define CMDQ_PRI_0_SSID_SHIFT           12
359 #define CMDQ_PRI_0_SSID_MASK            0xfffffUL
360 #define CMDQ_PRI_0_SID_SHIFT            32
361 #define CMDQ_PRI_0_SID_MASK             0xffffffffUL
362 #define CMDQ_PRI_1_GRPID_SHIFT          0
363 #define CMDQ_PRI_1_GRPID_MASK           0x1ffUL
364 #define CMDQ_PRI_1_RESP_SHIFT           12
365 #define CMDQ_PRI_1_RESP_DENY            (0UL << CMDQ_PRI_1_RESP_SHIFT)
366 #define CMDQ_PRI_1_RESP_FAIL            (1UL << CMDQ_PRI_1_RESP_SHIFT)
367 #define CMDQ_PRI_1_RESP_SUCC            (2UL << CMDQ_PRI_1_RESP_SHIFT)
368
369 #define CMDQ_SYNC_0_CS_SHIFT            12
370 #define CMDQ_SYNC_0_CS_NONE             (0UL << CMDQ_SYNC_0_CS_SHIFT)
371 #define CMDQ_SYNC_0_CS_SEV              (2UL << CMDQ_SYNC_0_CS_SHIFT)
372
373 /* Event queue */
374 #define EVTQ_ENT_DWORDS                 4
375 #define EVTQ_MAX_SZ_SHIFT               7
376
377 #define EVTQ_0_ID_SHIFT                 0
378 #define EVTQ_0_ID_MASK                  0xffUL
379
380 /* PRI queue */
381 #define PRIQ_ENT_DWORDS                 2
382 #define PRIQ_MAX_SZ_SHIFT               8
383
384 #define PRIQ_0_SID_SHIFT                0
385 #define PRIQ_0_SID_MASK                 0xffffffffUL
386 #define PRIQ_0_SSID_SHIFT               32
387 #define PRIQ_0_SSID_MASK                0xfffffUL
388 #define PRIQ_0_PERM_PRIV                (1UL << 58)
389 #define PRIQ_0_PERM_EXEC                (1UL << 59)
390 #define PRIQ_0_PERM_READ                (1UL << 60)
391 #define PRIQ_0_PERM_WRITE               (1UL << 61)
392 #define PRIQ_0_PRG_LAST                 (1UL << 62)
393 #define PRIQ_0_SSID_V                   (1UL << 63)
394
395 #define PRIQ_1_PRG_IDX_SHIFT            0
396 #define PRIQ_1_PRG_IDX_MASK             0x1ffUL
397 #define PRIQ_1_ADDR_SHIFT               12
398 #define PRIQ_1_ADDR_MASK                0xfffffffffffffUL
399
400 /* High-level queue structures */
401 #define ARM_SMMU_POLL_TIMEOUT_US        100
402
403 static bool disable_bypass;
404 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
405 MODULE_PARM_DESC(disable_bypass,
406         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
407
408 enum pri_resp {
409         PRI_RESP_DENY,
410         PRI_RESP_FAIL,
411         PRI_RESP_SUCC,
412 };
413
414 enum arm_smmu_msi_index {
415         EVTQ_MSI_INDEX,
416         GERROR_MSI_INDEX,
417         PRIQ_MSI_INDEX,
418         ARM_SMMU_MAX_MSIS,
419 };
420
421 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
422         [EVTQ_MSI_INDEX] = {
423                 ARM_SMMU_EVTQ_IRQ_CFG0,
424                 ARM_SMMU_EVTQ_IRQ_CFG1,
425                 ARM_SMMU_EVTQ_IRQ_CFG2,
426         },
427         [GERROR_MSI_INDEX] = {
428                 ARM_SMMU_GERROR_IRQ_CFG0,
429                 ARM_SMMU_GERROR_IRQ_CFG1,
430                 ARM_SMMU_GERROR_IRQ_CFG2,
431         },
432         [PRIQ_MSI_INDEX] = {
433                 ARM_SMMU_PRIQ_IRQ_CFG0,
434                 ARM_SMMU_PRIQ_IRQ_CFG1,
435                 ARM_SMMU_PRIQ_IRQ_CFG2,
436         },
437 };
438
439 struct arm_smmu_cmdq_ent {
440         /* Common fields */
441         u8                              opcode;
442         bool                            substream_valid;
443
444         /* Command-specific fields */
445         union {
446                 #define CMDQ_OP_PREFETCH_CFG    0x1
447                 struct {
448                         u32                     sid;
449                         u8                      size;
450                         u64                     addr;
451                 } prefetch;
452
453                 #define CMDQ_OP_CFGI_STE        0x3
454                 #define CMDQ_OP_CFGI_ALL        0x4
455                 struct {
456                         u32                     sid;
457                         union {
458                                 bool            leaf;
459                                 u8              span;
460                         };
461                 } cfgi;
462
463                 #define CMDQ_OP_TLBI_NH_ASID    0x11
464                 #define CMDQ_OP_TLBI_NH_VA      0x12
465                 #define CMDQ_OP_TLBI_EL2_ALL    0x20
466                 #define CMDQ_OP_TLBI_S12_VMALL  0x28
467                 #define CMDQ_OP_TLBI_S2_IPA     0x2a
468                 #define CMDQ_OP_TLBI_NSNH_ALL   0x30
469                 struct {
470                         u16                     asid;
471                         u16                     vmid;
472                         bool                    leaf;
473                         u64                     addr;
474                 } tlbi;
475
476                 #define CMDQ_OP_PRI_RESP        0x41
477                 struct {
478                         u32                     sid;
479                         u32                     ssid;
480                         u16                     grpid;
481                         enum pri_resp           resp;
482                 } pri;
483
484                 #define CMDQ_OP_CMD_SYNC        0x46
485         };
486 };
487
488 struct arm_smmu_queue {
489         int                             irq; /* Wired interrupt */
490
491         __le64                          *base;
492         dma_addr_t                      base_dma;
493         u64                             q_base;
494
495         size_t                          ent_dwords;
496         u32                             max_n_shift;
497         u32                             prod;
498         u32                             cons;
499
500         u32 __iomem                     *prod_reg;
501         u32 __iomem                     *cons_reg;
502 };
503
504 struct arm_smmu_cmdq {
505         struct arm_smmu_queue           q;
506         spinlock_t                      lock;
507 };
508
509 struct arm_smmu_evtq {
510         struct arm_smmu_queue           q;
511         u32                             max_stalls;
512 };
513
514 struct arm_smmu_priq {
515         struct arm_smmu_queue           q;
516 };
517
518 /* High-level stream table and context descriptor structures */
519 struct arm_smmu_strtab_l1_desc {
520         u8                              span;
521
522         __le64                          *l2ptr;
523         dma_addr_t                      l2ptr_dma;
524 };
525
526 struct arm_smmu_s1_cfg {
527         __le64                          *cdptr;
528         dma_addr_t                      cdptr_dma;
529
530         struct arm_smmu_ctx_desc {
531                 u16     asid;
532                 u64     ttbr;
533                 u64     tcr;
534                 u64     mair;
535         }                               cd;
536 };
537
538 struct arm_smmu_s2_cfg {
539         u16                             vmid;
540         u64                             vttbr;
541         u64                             vtcr;
542 };
543
544 struct arm_smmu_strtab_ent {
545         bool                            valid;
546
547         bool                            bypass; /* Overrides s1/s2 config */
548         struct arm_smmu_s1_cfg          *s1_cfg;
549         struct arm_smmu_s2_cfg          *s2_cfg;
550 };
551
552 struct arm_smmu_strtab_cfg {
553         __le64                          *strtab;
554         dma_addr_t                      strtab_dma;
555         struct arm_smmu_strtab_l1_desc  *l1_desc;
556         unsigned int                    num_l1_ents;
557
558         u64                             strtab_base;
559         u32                             strtab_base_cfg;
560 };
561
562 /* An SMMUv3 instance */
563 struct arm_smmu_device {
564         struct device                   *dev;
565         void __iomem                    *base;
566
567 #define ARM_SMMU_FEAT_2_LVL_STRTAB      (1 << 0)
568 #define ARM_SMMU_FEAT_2_LVL_CDTAB       (1 << 1)
569 #define ARM_SMMU_FEAT_TT_LE             (1 << 2)
570 #define ARM_SMMU_FEAT_TT_BE             (1 << 3)
571 #define ARM_SMMU_FEAT_PRI               (1 << 4)
572 #define ARM_SMMU_FEAT_ATS               (1 << 5)
573 #define ARM_SMMU_FEAT_SEV               (1 << 6)
574 #define ARM_SMMU_FEAT_MSI               (1 << 7)
575 #define ARM_SMMU_FEAT_COHERENCY         (1 << 8)
576 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 9)
577 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 10)
578 #define ARM_SMMU_FEAT_STALLS            (1 << 11)
579 #define ARM_SMMU_FEAT_HYP               (1 << 12)
580         u32                             features;
581
582 #define ARM_SMMU_OPT_SKIP_PREFETCH      (1 << 0)
583         u32                             options;
584
585         struct arm_smmu_cmdq            cmdq;
586         struct arm_smmu_evtq            evtq;
587         struct arm_smmu_priq            priq;
588
589         int                             gerr_irq;
590
591         unsigned long                   ias; /* IPA */
592         unsigned long                   oas; /* PA */
593         unsigned long                   pgsize_bitmap;
594
595 #define ARM_SMMU_MAX_ASIDS              (1 << 16)
596         unsigned int                    asid_bits;
597         DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
598
599 #define ARM_SMMU_MAX_VMIDS              (1 << 16)
600         unsigned int                    vmid_bits;
601         DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
602
603         unsigned int                    ssid_bits;
604         unsigned int                    sid_bits;
605
606         struct arm_smmu_strtab_cfg      strtab_cfg;
607 };
608
609 /* SMMU private data for an IOMMU group */
610 struct arm_smmu_group {
611         struct arm_smmu_device          *smmu;
612         struct arm_smmu_domain          *domain;
613         int                             num_sids;
614         u32                             *sids;
615         struct arm_smmu_strtab_ent      ste;
616 };
617
618 /* SMMU private data for an IOMMU domain */
619 enum arm_smmu_domain_stage {
620         ARM_SMMU_DOMAIN_S1 = 0,
621         ARM_SMMU_DOMAIN_S2,
622         ARM_SMMU_DOMAIN_NESTED,
623 };
624
625 struct arm_smmu_domain {
626         struct arm_smmu_device          *smmu;
627         struct mutex                    init_mutex; /* Protects smmu pointer */
628
629         struct io_pgtable_ops           *pgtbl_ops;
630         spinlock_t                      pgtbl_lock;
631
632         enum arm_smmu_domain_stage      stage;
633         union {
634                 struct arm_smmu_s1_cfg  s1_cfg;
635                 struct arm_smmu_s2_cfg  s2_cfg;
636         };
637
638         struct iommu_domain             domain;
639 };
640
641 struct arm_smmu_option_prop {
642         u32 opt;
643         const char *prop;
644 };
645
646 static struct arm_smmu_option_prop arm_smmu_options[] = {
647         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
648         { 0, NULL},
649 };
650
651 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
652 {
653         return container_of(dom, struct arm_smmu_domain, domain);
654 }
655
656 static void parse_driver_options(struct arm_smmu_device *smmu)
657 {
658         int i = 0;
659
660         do {
661                 if (of_property_read_bool(smmu->dev->of_node,
662                                                 arm_smmu_options[i].prop)) {
663                         smmu->options |= arm_smmu_options[i].opt;
664                         dev_notice(smmu->dev, "option %s\n",
665                                 arm_smmu_options[i].prop);
666                 }
667         } while (arm_smmu_options[++i].opt);
668 }
669
670 /* Low-level queue manipulation functions */
671 static bool queue_full(struct arm_smmu_queue *q)
672 {
673         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
674                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
675 }
676
677 static bool queue_empty(struct arm_smmu_queue *q)
678 {
679         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
680                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
681 }
682
683 static void queue_sync_cons(struct arm_smmu_queue *q)
684 {
685         q->cons = readl_relaxed(q->cons_reg);
686 }
687
688 static void queue_inc_cons(struct arm_smmu_queue *q)
689 {
690         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
691
692         q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
693         writel(q->cons, q->cons_reg);
694 }
695
696 static int queue_sync_prod(struct arm_smmu_queue *q)
697 {
698         int ret = 0;
699         u32 prod = readl_relaxed(q->prod_reg);
700
701         if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
702                 ret = -EOVERFLOW;
703
704         q->prod = prod;
705         return ret;
706 }
707
708 static void queue_inc_prod(struct arm_smmu_queue *q)
709 {
710         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
711
712         q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
713         writel(q->prod, q->prod_reg);
714 }
715
716 static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
717 {
718         if (Q_WRP(q, q->cons) == Q_WRP(q, until))
719                 return Q_IDX(q, q->cons) < Q_IDX(q, until);
720
721         return Q_IDX(q, q->cons) >= Q_IDX(q, until);
722 }
723
724 static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
725 {
726         ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
727
728         while (queue_sync_cons(q), __queue_cons_before(q, until)) {
729                 if (ktime_compare(ktime_get(), timeout) > 0)
730                         return -ETIMEDOUT;
731
732                 if (wfe) {
733                         wfe();
734                 } else {
735                         cpu_relax();
736                         udelay(1);
737                 }
738         }
739
740         return 0;
741 }
742
743 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
744 {
745         int i;
746
747         for (i = 0; i < n_dwords; ++i)
748                 *dst++ = cpu_to_le64(*src++);
749 }
750
751 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
752 {
753         if (queue_full(q))
754                 return -ENOSPC;
755
756         queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
757         queue_inc_prod(q);
758         return 0;
759 }
760
761 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
762 {
763         int i;
764
765         for (i = 0; i < n_dwords; ++i)
766                 *dst++ = le64_to_cpu(*src++);
767 }
768
769 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
770 {
771         if (queue_empty(q))
772                 return -EAGAIN;
773
774         queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
775         queue_inc_cons(q);
776         return 0;
777 }
778
779 /* High-level queue accessors */
780 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
781 {
782         memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
783         cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
784
785         switch (ent->opcode) {
786         case CMDQ_OP_TLBI_EL2_ALL:
787         case CMDQ_OP_TLBI_NSNH_ALL:
788                 break;
789         case CMDQ_OP_PREFETCH_CFG:
790                 cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
791                 cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
792                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
793                 break;
794         case CMDQ_OP_CFGI_STE:
795                 cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
796                 cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
797                 break;
798         case CMDQ_OP_CFGI_ALL:
799                 /* Cover the entire SID range */
800                 cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
801                 break;
802         case CMDQ_OP_TLBI_NH_VA:
803                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
804                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
805                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
806                 break;
807         case CMDQ_OP_TLBI_S2_IPA:
808                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
809                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
810                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
811                 break;
812         case CMDQ_OP_TLBI_NH_ASID:
813                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
814                 /* Fallthrough */
815         case CMDQ_OP_TLBI_S12_VMALL:
816                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
817                 break;
818         case CMDQ_OP_PRI_RESP:
819                 cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
820                 cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
821                 cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
822                 cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
823                 switch (ent->pri.resp) {
824                 case PRI_RESP_DENY:
825                         cmd[1] |= CMDQ_PRI_1_RESP_DENY;
826                         break;
827                 case PRI_RESP_FAIL:
828                         cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
829                         break;
830                 case PRI_RESP_SUCC:
831                         cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
832                         break;
833                 default:
834                         return -EINVAL;
835                 }
836                 break;
837         case CMDQ_OP_CMD_SYNC:
838                 cmd[0] |= CMDQ_SYNC_0_CS_SEV;
839                 break;
840         default:
841                 return -ENOENT;
842         }
843
844         return 0;
845 }
846
847 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
848 {
849         static const char *cerror_str[] = {
850                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
851                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
852                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
853         };
854
855         int i;
856         u64 cmd[CMDQ_ENT_DWORDS];
857         struct arm_smmu_queue *q = &smmu->cmdq.q;
858         u32 cons = readl_relaxed(q->cons_reg);
859         u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
860         struct arm_smmu_cmdq_ent cmd_sync = {
861                 .opcode = CMDQ_OP_CMD_SYNC,
862         };
863
864         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
865                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
866
867         switch (idx) {
868         case CMDQ_ERR_CERROR_ABT_IDX:
869                 dev_err(smmu->dev, "retrying command fetch\n");
870         case CMDQ_ERR_CERROR_NONE_IDX:
871                 return;
872         case CMDQ_ERR_CERROR_ILL_IDX:
873                 /* Fallthrough */
874         default:
875                 break;
876         }
877
878         /*
879          * We may have concurrent producers, so we need to be careful
880          * not to touch any of the shadow cmdq state.
881          */
882         queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
883         dev_err(smmu->dev, "skipping command in error state:\n");
884         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
885                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
886
887         /* Convert the erroneous command into a CMD_SYNC */
888         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
889                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
890                 return;
891         }
892
893         queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
894 }
895
896 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
897                                     struct arm_smmu_cmdq_ent *ent)
898 {
899         u32 until;
900         u64 cmd[CMDQ_ENT_DWORDS];
901         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
902         struct arm_smmu_queue *q = &smmu->cmdq.q;
903
904         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
905                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
906                          ent->opcode);
907                 return;
908         }
909
910         spin_lock(&smmu->cmdq.lock);
911         while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
912                 /*
913                  * Keep the queue locked, otherwise the producer could wrap
914                  * twice and we could see a future consumer pointer that looks
915                  * like it's behind us.
916                  */
917                 if (queue_poll_cons(q, until, wfe))
918                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
919         }
920
921         if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
922                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
923         spin_unlock(&smmu->cmdq.lock);
924 }
925
926 /* Context descriptor manipulation functions */
927 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
928 {
929         u64 val = 0;
930
931         /* Repack the TCR. Just care about TTBR0 for now */
932         val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
933         val |= ARM_SMMU_TCR2CD(tcr, TG0);
934         val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
935         val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
936         val |= ARM_SMMU_TCR2CD(tcr, SH0);
937         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
938         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
939         val |= ARM_SMMU_TCR2CD(tcr, IPS);
940         val |= ARM_SMMU_TCR2CD(tcr, TBI0);
941
942         return val;
943 }
944
945 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
946                                     struct arm_smmu_s1_cfg *cfg)
947 {
948         u64 val;
949
950         /*
951          * We don't need to issue any invalidation here, as we'll invalidate
952          * the STE when installing the new entry anyway.
953          */
954         val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
955 #ifdef __BIG_ENDIAN
956               CTXDESC_CD_0_ENDI |
957 #endif
958               CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
959               CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
960               CTXDESC_CD_0_V;
961         cfg->cdptr[0] = cpu_to_le64(val);
962
963         val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
964         cfg->cdptr[1] = cpu_to_le64(val);
965
966         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
967 }
968
969 /* Stream table manipulation functions */
970 static void
971 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
972 {
973         u64 val = 0;
974
975         val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
976                 << STRTAB_L1_DESC_SPAN_SHIFT;
977         val |= desc->l2ptr_dma &
978                STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
979
980         *dst = cpu_to_le64(val);
981 }
982
983 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
984 {
985         struct arm_smmu_cmdq_ent cmd = {
986                 .opcode = CMDQ_OP_CFGI_STE,
987                 .cfgi   = {
988                         .sid    = sid,
989                         .leaf   = true,
990                 },
991         };
992
993         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
994         cmd.opcode = CMDQ_OP_CMD_SYNC;
995         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
996 }
997
998 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
999                                       __le64 *dst, struct arm_smmu_strtab_ent *ste)
1000 {
1001         /*
1002          * This is hideously complicated, but we only really care about
1003          * three cases at the moment:
1004          *
1005          * 1. Invalid (all zero) -> bypass  (init)
1006          * 2. Bypass -> translation (attach)
1007          * 3. Translation -> bypass (detach)
1008          *
1009          * Given that we can't update the STE atomically and the SMMU
1010          * doesn't read the thing in a defined order, that leaves us
1011          * with the following maintenance requirements:
1012          *
1013          * 1. Update Config, return (init time STEs aren't live)
1014          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1015          * 3. Update Config, sync
1016          */
1017         u64 val = le64_to_cpu(dst[0]);
1018         bool ste_live = false;
1019         struct arm_smmu_cmdq_ent prefetch_cmd = {
1020                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1021                 .prefetch       = {
1022                         .sid    = sid,
1023                 },
1024         };
1025
1026         if (val & STRTAB_STE_0_V) {
1027                 u64 cfg;
1028
1029                 cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
1030                 switch (cfg) {
1031                 case STRTAB_STE_0_CFG_BYPASS:
1032                         break;
1033                 case STRTAB_STE_0_CFG_S1_TRANS:
1034                 case STRTAB_STE_0_CFG_S2_TRANS:
1035                         ste_live = true;
1036                         break;
1037                 default:
1038                         BUG(); /* STE corruption */
1039                 }
1040         }
1041
1042         /* Nuke the existing Config, as we're going to rewrite it */
1043         val &= ~(STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT);
1044
1045         if (ste->valid)
1046                 val |= STRTAB_STE_0_V;
1047         else
1048                 val &= ~STRTAB_STE_0_V;
1049
1050         if (ste->bypass) {
1051                 val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
1052                                       : STRTAB_STE_0_CFG_BYPASS;
1053                 dst[0] = cpu_to_le64(val);
1054                 dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
1055                          << STRTAB_STE_1_SHCFG_SHIFT);
1056                 dst[2] = 0; /* Nuke the VMID */
1057                 if (ste_live)
1058                         arm_smmu_sync_ste_for_sid(smmu, sid);
1059                 return;
1060         }
1061
1062         if (ste->s1_cfg) {
1063                 BUG_ON(ste_live);
1064                 dst[1] = cpu_to_le64(
1065                          STRTAB_STE_1_S1C_CACHE_WBRA
1066                          << STRTAB_STE_1_S1CIR_SHIFT |
1067                          STRTAB_STE_1_S1C_CACHE_WBRA
1068                          << STRTAB_STE_1_S1COR_SHIFT |
1069                          STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1070 #ifdef CONFIG_PCI_ATS
1071                          STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1072 #endif
1073                          STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1074
1075                 if (smmu->features & ARM_SMMU_FEAT_STALLS)
1076                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1077
1078                 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1079                         << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1080                         STRTAB_STE_0_CFG_S1_TRANS;
1081
1082         }
1083
1084         if (ste->s2_cfg) {
1085                 BUG_ON(ste_live);
1086                 dst[2] = cpu_to_le64(
1087                          ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1088                          (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1089                           << STRTAB_STE_2_VTCR_SHIFT |
1090 #ifdef __BIG_ENDIAN
1091                          STRTAB_STE_2_S2ENDI |
1092 #endif
1093                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1094                          STRTAB_STE_2_S2R);
1095
1096                 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1097                          STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1098
1099                 val |= STRTAB_STE_0_CFG_S2_TRANS;
1100         }
1101
1102         arm_smmu_sync_ste_for_sid(smmu, sid);
1103         dst[0] = cpu_to_le64(val);
1104         arm_smmu_sync_ste_for_sid(smmu, sid);
1105
1106         /* It's likely that we'll want to use the new STE soon */
1107         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1108                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1109 }
1110
1111 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1112 {
1113         unsigned int i;
1114         struct arm_smmu_strtab_ent ste = {
1115                 .valid  = true,
1116                 .bypass = true,
1117         };
1118
1119         for (i = 0; i < nent; ++i) {
1120                 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1121                 strtab += STRTAB_STE_DWORDS;
1122         }
1123 }
1124
1125 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1126 {
1127         size_t size;
1128         void *strtab;
1129         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1130         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1131
1132         if (desc->l2ptr)
1133                 return 0;
1134
1135         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1136         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1137
1138         desc->span = STRTAB_SPLIT + 1;
1139         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1140                                           GFP_KERNEL | __GFP_ZERO);
1141         if (!desc->l2ptr) {
1142                 dev_err(smmu->dev,
1143                         "failed to allocate l2 stream table for SID %u\n",
1144                         sid);
1145                 return -ENOMEM;
1146         }
1147
1148         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1149         arm_smmu_write_strtab_l1_desc(strtab, desc);
1150         return 0;
1151 }
1152
1153 /* IRQ and event handlers */
1154 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1155 {
1156         int i;
1157         struct arm_smmu_device *smmu = dev;
1158         struct arm_smmu_queue *q = &smmu->evtq.q;
1159         u64 evt[EVTQ_ENT_DWORDS];
1160
1161         while (!queue_remove_raw(q, evt)) {
1162                 u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1163
1164                 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1165                 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1166                         dev_info(smmu->dev, "\t0x%016llx\n",
1167                                  (unsigned long long)evt[i]);
1168         }
1169
1170         /* Sync our overflow flag, as we believe we're up to speed */
1171         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1172         return IRQ_HANDLED;
1173 }
1174
1175 static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
1176 {
1177         irqreturn_t ret = IRQ_WAKE_THREAD;
1178         struct arm_smmu_device *smmu = dev;
1179         struct arm_smmu_queue *q = &smmu->evtq.q;
1180
1181         /*
1182          * Not much we can do on overflow, so scream and pretend we're
1183          * trying harder.
1184          */
1185         if (queue_sync_prod(q) == -EOVERFLOW)
1186                 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1187         else if (queue_empty(q))
1188                 ret = IRQ_NONE;
1189
1190         return ret;
1191 }
1192
1193 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1194 {
1195         struct arm_smmu_device *smmu = dev;
1196         struct arm_smmu_queue *q = &smmu->priq.q;
1197         u64 evt[PRIQ_ENT_DWORDS];
1198
1199         while (!queue_remove_raw(q, evt)) {
1200                 u32 sid, ssid;
1201                 u16 grpid;
1202                 bool ssv, last;
1203
1204                 sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1205                 ssv = evt[0] & PRIQ_0_SSID_V;
1206                 ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1207                 last = evt[0] & PRIQ_0_PRG_LAST;
1208                 grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1209
1210                 dev_info(smmu->dev, "unexpected PRI request received:\n");
1211                 dev_info(smmu->dev,
1212                          "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1213                          sid, ssid, grpid, last ? "L" : "",
1214                          evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1215                          evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1216                          evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1217                          evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1218                          evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1219
1220                 if (last) {
1221                         struct arm_smmu_cmdq_ent cmd = {
1222                                 .opcode                 = CMDQ_OP_PRI_RESP,
1223                                 .substream_valid        = ssv,
1224                                 .pri                    = {
1225                                         .sid    = sid,
1226                                         .ssid   = ssid,
1227                                         .grpid  = grpid,
1228                                         .resp   = PRI_RESP_DENY,
1229                                 },
1230                         };
1231
1232                         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1233                 }
1234         }
1235
1236         /* Sync our overflow flag, as we believe we're up to speed */
1237         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1238         return IRQ_HANDLED;
1239 }
1240
1241 static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
1242 {
1243         irqreturn_t ret = IRQ_WAKE_THREAD;
1244         struct arm_smmu_device *smmu = dev;
1245         struct arm_smmu_queue *q = &smmu->priq.q;
1246
1247         /* PRIQ overflow indicates a programming error */
1248         if (queue_sync_prod(q) == -EOVERFLOW)
1249                 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1250         else if (queue_empty(q))
1251                 ret = IRQ_NONE;
1252
1253         return ret;
1254 }
1255
1256 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1257 {
1258         /* We don't actually use CMD_SYNC interrupts for anything */
1259         return IRQ_HANDLED;
1260 }
1261
1262 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1263
1264 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1265 {
1266         u32 gerror, gerrorn, active;
1267         struct arm_smmu_device *smmu = dev;
1268
1269         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1270         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1271
1272         active = gerror ^ gerrorn;
1273         if (!(active & GERROR_ERR_MASK))
1274                 return IRQ_NONE; /* No errors pending */
1275
1276         dev_warn(smmu->dev,
1277                  "unexpected global error reported (0x%08x), this could be serious\n",
1278                  active);
1279
1280         if (active & GERROR_SFM_ERR) {
1281                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1282                 arm_smmu_device_disable(smmu);
1283         }
1284
1285         if (active & GERROR_MSI_GERROR_ABT_ERR)
1286                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1287
1288         if (active & GERROR_MSI_PRIQ_ABT_ERR) {
1289                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1290                 arm_smmu_priq_handler(irq, smmu->dev);
1291         }
1292
1293         if (active & GERROR_MSI_EVTQ_ABT_ERR) {
1294                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1295                 arm_smmu_evtq_handler(irq, smmu->dev);
1296         }
1297
1298         if (active & GERROR_MSI_CMDQ_ABT_ERR) {
1299                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1300                 arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1301         }
1302
1303         if (active & GERROR_PRIQ_ABT_ERR)
1304                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1305
1306         if (active & GERROR_EVTQ_ABT_ERR)
1307                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1308
1309         if (active & GERROR_CMDQ_ERR)
1310                 arm_smmu_cmdq_skip_err(smmu);
1311
1312         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1313         return IRQ_HANDLED;
1314 }
1315
1316 /* IO_PGTABLE API */
1317 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1318 {
1319         struct arm_smmu_cmdq_ent cmd;
1320
1321         cmd.opcode = CMDQ_OP_CMD_SYNC;
1322         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1323 }
1324
1325 static void arm_smmu_tlb_sync(void *cookie)
1326 {
1327         struct arm_smmu_domain *smmu_domain = cookie;
1328         __arm_smmu_tlb_sync(smmu_domain->smmu);
1329 }
1330
1331 static void arm_smmu_tlb_inv_context(void *cookie)
1332 {
1333         struct arm_smmu_domain *smmu_domain = cookie;
1334         struct arm_smmu_device *smmu = smmu_domain->smmu;
1335         struct arm_smmu_cmdq_ent cmd;
1336
1337         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1338                 cmd.opcode      = CMDQ_OP_TLBI_NH_ASID;
1339                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1340                 cmd.tlbi.vmid   = 0;
1341         } else {
1342                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1343                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1344         }
1345
1346         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1347         __arm_smmu_tlb_sync(smmu);
1348 }
1349
1350 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1351                                           size_t granule, bool leaf, void *cookie)
1352 {
1353         struct arm_smmu_domain *smmu_domain = cookie;
1354         struct arm_smmu_device *smmu = smmu_domain->smmu;
1355         struct arm_smmu_cmdq_ent cmd = {
1356                 .tlbi = {
1357                         .leaf   = leaf,
1358                         .addr   = iova,
1359                 },
1360         };
1361
1362         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1363                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1364                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1365         } else {
1366                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1367                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1368         }
1369
1370         do {
1371                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1372                 cmd.tlbi.addr += granule;
1373         } while (size -= granule);
1374 }
1375
1376 static struct iommu_gather_ops arm_smmu_gather_ops = {
1377         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1378         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
1379         .tlb_sync       = arm_smmu_tlb_sync,
1380 };
1381
1382 /* IOMMU API */
1383 static bool arm_smmu_capable(enum iommu_cap cap)
1384 {
1385         switch (cap) {
1386         case IOMMU_CAP_CACHE_COHERENCY:
1387                 return true;
1388         case IOMMU_CAP_INTR_REMAP:
1389                 return true; /* MSIs are just memory writes */
1390         case IOMMU_CAP_NOEXEC:
1391                 return true;
1392         default:
1393                 return false;
1394         }
1395 }
1396
1397 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1398 {
1399         struct arm_smmu_domain *smmu_domain;
1400
1401         if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1402                 return NULL;
1403
1404         /*
1405          * Allocate the domain and initialise some of its data structures.
1406          * We can't really do anything meaningful until we've added a
1407          * master.
1408          */
1409         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1410         if (!smmu_domain)
1411                 return NULL;
1412
1413         if (type == IOMMU_DOMAIN_DMA &&
1414             iommu_get_dma_cookie(&smmu_domain->domain)) {
1415                 kfree(smmu_domain);
1416                 return NULL;
1417         }
1418
1419         mutex_init(&smmu_domain->init_mutex);
1420         spin_lock_init(&smmu_domain->pgtbl_lock);
1421         return &smmu_domain->domain;
1422 }
1423
1424 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1425 {
1426         int idx, size = 1 << span;
1427
1428         do {
1429                 idx = find_first_zero_bit(map, size);
1430                 if (idx == size)
1431                         return -ENOSPC;
1432         } while (test_and_set_bit(idx, map));
1433
1434         return idx;
1435 }
1436
1437 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1438 {
1439         clear_bit(idx, map);
1440 }
1441
1442 static void arm_smmu_domain_free(struct iommu_domain *domain)
1443 {
1444         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1445         struct arm_smmu_device *smmu = smmu_domain->smmu;
1446
1447         iommu_put_dma_cookie(domain);
1448         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1449
1450         /* Free the CD and ASID, if we allocated them */
1451         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1452                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1453
1454                 if (cfg->cdptr) {
1455                         dmam_free_coherent(smmu_domain->smmu->dev,
1456                                            CTXDESC_CD_DWORDS << 3,
1457                                            cfg->cdptr,
1458                                            cfg->cdptr_dma);
1459
1460                         arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1461                 }
1462         } else {
1463                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1464                 if (cfg->vmid)
1465                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1466         }
1467
1468         kfree(smmu_domain);
1469 }
1470
1471 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1472                                        struct io_pgtable_cfg *pgtbl_cfg)
1473 {
1474         int ret;
1475         int asid;
1476         struct arm_smmu_device *smmu = smmu_domain->smmu;
1477         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1478
1479         asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1480         if (asid < 0)
1481                 return asid;
1482
1483         cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1484                                          &cfg->cdptr_dma,
1485                                          GFP_KERNEL | __GFP_ZERO);
1486         if (!cfg->cdptr) {
1487                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1488                 ret = -ENOMEM;
1489                 goto out_free_asid;
1490         }
1491
1492         cfg->cd.asid    = (u16)asid;
1493         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1494         cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1495         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1496         return 0;
1497
1498 out_free_asid:
1499         arm_smmu_bitmap_free(smmu->asid_map, asid);
1500         return ret;
1501 }
1502
1503 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1504                                        struct io_pgtable_cfg *pgtbl_cfg)
1505 {
1506         int vmid;
1507         struct arm_smmu_device *smmu = smmu_domain->smmu;
1508         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1509
1510         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1511         if (vmid < 0)
1512                 return vmid;
1513
1514         cfg->vmid       = (u16)vmid;
1515         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1516         cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1517         return 0;
1518 }
1519
1520 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1521 {
1522         int ret;
1523         unsigned long ias, oas;
1524         enum io_pgtable_fmt fmt;
1525         struct io_pgtable_cfg pgtbl_cfg;
1526         struct io_pgtable_ops *pgtbl_ops;
1527         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1528                                  struct io_pgtable_cfg *);
1529         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1530         struct arm_smmu_device *smmu = smmu_domain->smmu;
1531
1532         /* Restrict the stage to what we can actually support */
1533         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1534                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1535         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1536                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1537
1538         switch (smmu_domain->stage) {
1539         case ARM_SMMU_DOMAIN_S1:
1540                 ias = VA_BITS;
1541                 oas = smmu->ias;
1542                 fmt = ARM_64_LPAE_S1;
1543                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1544                 break;
1545         case ARM_SMMU_DOMAIN_NESTED:
1546         case ARM_SMMU_DOMAIN_S2:
1547                 ias = smmu->ias;
1548                 oas = smmu->oas;
1549                 fmt = ARM_64_LPAE_S2;
1550                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1551                 break;
1552         default:
1553                 return -EINVAL;
1554         }
1555
1556         pgtbl_cfg = (struct io_pgtable_cfg) {
1557                 .pgsize_bitmap  = smmu->pgsize_bitmap,
1558                 .ias            = ias,
1559                 .oas            = oas,
1560                 .tlb            = &arm_smmu_gather_ops,
1561                 .iommu_dev      = smmu->dev,
1562         };
1563
1564         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1565         if (!pgtbl_ops)
1566                 return -ENOMEM;
1567
1568         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1569         smmu_domain->pgtbl_ops = pgtbl_ops;
1570
1571         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1572         if (ret < 0)
1573                 free_io_pgtable_ops(pgtbl_ops);
1574
1575         return ret;
1576 }
1577
1578 static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
1579 {
1580         struct iommu_group *group;
1581         struct arm_smmu_group *smmu_group;
1582
1583         group = iommu_group_get(dev);
1584         if (!group)
1585                 return NULL;
1586
1587         smmu_group = iommu_group_get_iommudata(group);
1588         iommu_group_put(group);
1589         return smmu_group;
1590 }
1591
1592 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1593 {
1594         __le64 *step;
1595         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1596
1597         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1598                 struct arm_smmu_strtab_l1_desc *l1_desc;
1599                 int idx;
1600
1601                 /* Two-level walk */
1602                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1603                 l1_desc = &cfg->l1_desc[idx];
1604                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1605                 step = &l1_desc->l2ptr[idx];
1606         } else {
1607                 /* Simple linear lookup */
1608                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1609         }
1610
1611         return step;
1612 }
1613
1614 static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
1615 {
1616         int i;
1617         struct arm_smmu_domain *smmu_domain = smmu_group->domain;
1618         struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
1619         struct arm_smmu_device *smmu = smmu_group->smmu;
1620
1621         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1622                 ste->s1_cfg = &smmu_domain->s1_cfg;
1623                 ste->s2_cfg = NULL;
1624                 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1625         } else {
1626                 ste->s1_cfg = NULL;
1627                 ste->s2_cfg = &smmu_domain->s2_cfg;
1628         }
1629
1630         for (i = 0; i < smmu_group->num_sids; ++i) {
1631                 u32 sid = smmu_group->sids[i];
1632                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1633
1634                 arm_smmu_write_strtab_ent(smmu, sid, step, ste);
1635         }
1636
1637         return 0;
1638 }
1639
1640 static void arm_smmu_detach_dev(struct device *dev)
1641 {
1642         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1643
1644         smmu_group->ste.bypass = true;
1645         if (arm_smmu_install_ste_for_group(smmu_group) < 0)
1646                 dev_warn(dev, "failed to install bypass STE\n");
1647
1648         smmu_group->domain = NULL;
1649 }
1650
1651 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1652 {
1653         int ret = 0;
1654         struct arm_smmu_device *smmu;
1655         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1656         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1657
1658         if (!smmu_group)
1659                 return -ENOENT;
1660
1661         /* Already attached to a different domain? */
1662         if (smmu_group->domain && smmu_group->domain != smmu_domain)
1663                 arm_smmu_detach_dev(dev);
1664
1665         smmu = smmu_group->smmu;
1666         mutex_lock(&smmu_domain->init_mutex);
1667
1668         if (!smmu_domain->smmu) {
1669                 smmu_domain->smmu = smmu;
1670                 ret = arm_smmu_domain_finalise(domain);
1671                 if (ret) {
1672                         smmu_domain->smmu = NULL;
1673                         goto out_unlock;
1674                 }
1675         } else if (smmu_domain->smmu != smmu) {
1676                 dev_err(dev,
1677                         "cannot attach to SMMU %s (upstream of %s)\n",
1678                         dev_name(smmu_domain->smmu->dev),
1679                         dev_name(smmu->dev));
1680                 ret = -ENXIO;
1681                 goto out_unlock;
1682         }
1683
1684         /* Group already attached to this domain? */
1685         if (smmu_group->domain)
1686                 goto out_unlock;
1687
1688         smmu_group->domain      = smmu_domain;
1689
1690         /*
1691          * FIXME: This should always be "false" once we have IOMMU-backed
1692          * DMA ops for all devices behind the SMMU.
1693          */
1694         smmu_group->ste.bypass  = domain->type == IOMMU_DOMAIN_DMA;
1695
1696         ret = arm_smmu_install_ste_for_group(smmu_group);
1697         if (ret < 0)
1698                 smmu_group->domain = NULL;
1699
1700 out_unlock:
1701         mutex_unlock(&smmu_domain->init_mutex);
1702         return ret;
1703 }
1704
1705 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1706                         phys_addr_t paddr, size_t size, int prot)
1707 {
1708         int ret;
1709         unsigned long flags;
1710         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1711         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1712
1713         if (!ops)
1714                 return -ENODEV;
1715
1716         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1717         ret = ops->map(ops, iova, paddr, size, prot);
1718         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1719         return ret;
1720 }
1721
1722 static size_t
1723 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1724 {
1725         size_t ret;
1726         unsigned long flags;
1727         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1728         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1729
1730         if (!ops)
1731                 return 0;
1732
1733         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1734         ret = ops->unmap(ops, iova, size);
1735         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1736         return ret;
1737 }
1738
1739 static phys_addr_t
1740 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1741 {
1742         phys_addr_t ret;
1743         unsigned long flags;
1744         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1745         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1746
1747         if (!ops)
1748                 return 0;
1749
1750         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1751         ret = ops->iova_to_phys(ops, iova);
1752         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1753
1754         return ret;
1755 }
1756
1757 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
1758 {
1759         *(u32 *)sidp = alias;
1760         return 0; /* Continue walking */
1761 }
1762
1763 static void __arm_smmu_release_pci_iommudata(void *data)
1764 {
1765         kfree(data);
1766 }
1767
1768 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
1769 {
1770         struct device_node *of_node;
1771         struct platform_device *smmu_pdev;
1772         struct arm_smmu_device *smmu = NULL;
1773         struct pci_bus *bus = pdev->bus;
1774
1775         /* Walk up to the root bus */
1776         while (!pci_is_root_bus(bus))
1777                 bus = bus->parent;
1778
1779         /* Follow the "iommus" phandle from the host controller */
1780         of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
1781         if (!of_node)
1782                 return NULL;
1783
1784         /* See if we can find an SMMU corresponding to the phandle */
1785         smmu_pdev = of_find_device_by_node(of_node);
1786         if (smmu_pdev)
1787                 smmu = platform_get_drvdata(smmu_pdev);
1788
1789         of_node_put(of_node);
1790         return smmu;
1791 }
1792
1793 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1794 {
1795         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1796
1797         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1798                 limit *= 1UL << STRTAB_SPLIT;
1799
1800         return sid < limit;
1801 }
1802
1803 static int arm_smmu_add_device(struct device *dev)
1804 {
1805         int i, ret;
1806         u32 sid, *sids;
1807         struct pci_dev *pdev;
1808         struct iommu_group *group;
1809         struct arm_smmu_group *smmu_group;
1810         struct arm_smmu_device *smmu;
1811
1812         /* We only support PCI, for now */
1813         if (!dev_is_pci(dev))
1814                 return -ENODEV;
1815
1816         pdev = to_pci_dev(dev);
1817         group = iommu_group_get_for_dev(dev);
1818         if (IS_ERR(group))
1819                 return PTR_ERR(group);
1820
1821         smmu_group = iommu_group_get_iommudata(group);
1822         if (!smmu_group) {
1823                 smmu = arm_smmu_get_for_pci_dev(pdev);
1824                 if (!smmu) {
1825                         ret = -ENOENT;
1826                         goto out_remove_dev;
1827                 }
1828
1829                 smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
1830                 if (!smmu_group) {
1831                         ret = -ENOMEM;
1832                         goto out_remove_dev;
1833                 }
1834
1835                 smmu_group->ste.valid   = true;
1836                 smmu_group->smmu        = smmu;
1837                 iommu_group_set_iommudata(group, smmu_group,
1838                                           __arm_smmu_release_pci_iommudata);
1839         } else {
1840                 smmu = smmu_group->smmu;
1841         }
1842
1843         /* Assume SID == RID until firmware tells us otherwise */
1844         pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1845         for (i = 0; i < smmu_group->num_sids; ++i) {
1846                 /* If we already know about this SID, then we're done */
1847                 if (smmu_group->sids[i] == sid)
1848                         goto out_put_group;
1849         }
1850
1851         /* Check the SID is in range of the SMMU and our stream table */
1852         if (!arm_smmu_sid_in_range(smmu, sid)) {
1853                 ret = -ERANGE;
1854                 goto out_remove_dev;
1855         }
1856
1857         /* Ensure l2 strtab is initialised */
1858         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1859                 ret = arm_smmu_init_l2_strtab(smmu, sid);
1860                 if (ret)
1861                         goto out_remove_dev;
1862         }
1863
1864         /* Resize the SID array for the group */
1865         smmu_group->num_sids++;
1866         sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
1867                         GFP_KERNEL);
1868         if (!sids) {
1869                 smmu_group->num_sids--;
1870                 ret = -ENOMEM;
1871                 goto out_remove_dev;
1872         }
1873
1874         /* Add the new SID */
1875         sids[smmu_group->num_sids - 1] = sid;
1876         smmu_group->sids = sids;
1877
1878 out_put_group:
1879         iommu_group_put(group);
1880         return 0;
1881
1882 out_remove_dev:
1883         iommu_group_remove_device(dev);
1884         iommu_group_put(group);
1885         return ret;
1886 }
1887
1888 static void arm_smmu_remove_device(struct device *dev)
1889 {
1890         iommu_group_remove_device(dev);
1891 }
1892
1893 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1894                                     enum iommu_attr attr, void *data)
1895 {
1896         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1897
1898         switch (attr) {
1899         case DOMAIN_ATTR_NESTING:
1900                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1901                 return 0;
1902         default:
1903                 return -ENODEV;
1904         }
1905 }
1906
1907 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1908                                     enum iommu_attr attr, void *data)
1909 {
1910         int ret = 0;
1911         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1912
1913         mutex_lock(&smmu_domain->init_mutex);
1914
1915         switch (attr) {
1916         case DOMAIN_ATTR_NESTING:
1917                 if (smmu_domain->smmu) {
1918                         ret = -EPERM;
1919                         goto out_unlock;
1920                 }
1921
1922                 if (*(int *)data)
1923                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1924                 else
1925                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1926
1927                 break;
1928         default:
1929                 ret = -ENODEV;
1930         }
1931
1932 out_unlock:
1933         mutex_unlock(&smmu_domain->init_mutex);
1934         return ret;
1935 }
1936
1937 static struct iommu_ops arm_smmu_ops = {
1938         .capable                = arm_smmu_capable,
1939         .domain_alloc           = arm_smmu_domain_alloc,
1940         .domain_free            = arm_smmu_domain_free,
1941         .attach_dev             = arm_smmu_attach_dev,
1942         .map                    = arm_smmu_map,
1943         .unmap                  = arm_smmu_unmap,
1944         .map_sg                 = default_iommu_map_sg,
1945         .iova_to_phys           = arm_smmu_iova_to_phys,
1946         .add_device             = arm_smmu_add_device,
1947         .remove_device          = arm_smmu_remove_device,
1948         .device_group           = pci_device_group,
1949         .domain_get_attr        = arm_smmu_domain_get_attr,
1950         .domain_set_attr        = arm_smmu_domain_set_attr,
1951         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1952 };
1953
1954 /* Probing and initialisation functions */
1955 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1956                                    struct arm_smmu_queue *q,
1957                                    unsigned long prod_off,
1958                                    unsigned long cons_off,
1959                                    size_t dwords)
1960 {
1961         size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1962
1963         q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1964         if (!q->base) {
1965                 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1966                         qsz);
1967                 return -ENOMEM;
1968         }
1969
1970         q->prod_reg     = smmu->base + prod_off;
1971         q->cons_reg     = smmu->base + cons_off;
1972         q->ent_dwords   = dwords;
1973
1974         q->q_base  = Q_BASE_RWA;
1975         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1976         q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1977                      << Q_BASE_LOG2SIZE_SHIFT;
1978
1979         q->prod = q->cons = 0;
1980         return 0;
1981 }
1982
1983 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1984 {
1985         int ret;
1986
1987         /* cmdq */
1988         spin_lock_init(&smmu->cmdq.lock);
1989         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1990                                       ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1991         if (ret)
1992                 return ret;
1993
1994         /* evtq */
1995         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
1996                                       ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
1997         if (ret)
1998                 return ret;
1999
2000         /* priq */
2001         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2002                 return 0;
2003
2004         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2005                                        ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2006 }
2007
2008 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2009 {
2010         unsigned int i;
2011         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2012         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2013         void *strtab = smmu->strtab_cfg.strtab;
2014
2015         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2016         if (!cfg->l1_desc) {
2017                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2018                 return -ENOMEM;
2019         }
2020
2021         for (i = 0; i < cfg->num_l1_ents; ++i) {
2022                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2023                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2024         }
2025
2026         return 0;
2027 }
2028
2029 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2030 {
2031         void *strtab;
2032         u64 reg;
2033         u32 size, l1size;
2034         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2035
2036         /*
2037          * If we can resolve everything with a single L2 table, then we
2038          * just need a single L1 descriptor. Otherwise, calculate the L1
2039          * size, capped to the SIDSIZE.
2040          */
2041         if (smmu->sid_bits < STRTAB_SPLIT) {
2042                 size = 0;
2043         } else {
2044                 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2045                 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2046         }
2047         cfg->num_l1_ents = 1 << size;
2048
2049         size += STRTAB_SPLIT;
2050         if (size < smmu->sid_bits)
2051                 dev_warn(smmu->dev,
2052                          "2-level strtab only covers %u/%u bits of SID\n",
2053                          size, smmu->sid_bits);
2054
2055         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2056         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2057                                      GFP_KERNEL | __GFP_ZERO);
2058         if (!strtab) {
2059                 dev_err(smmu->dev,
2060                         "failed to allocate l1 stream table (%u bytes)\n",
2061                         size);
2062                 return -ENOMEM;
2063         }
2064         cfg->strtab = strtab;
2065
2066         /* Configure strtab_base_cfg for 2 levels */
2067         reg  = STRTAB_BASE_CFG_FMT_2LVL;
2068         reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2069                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2070         reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2071                 << STRTAB_BASE_CFG_SPLIT_SHIFT;
2072         cfg->strtab_base_cfg = reg;
2073
2074         return arm_smmu_init_l1_strtab(smmu);
2075 }
2076
2077 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2078 {
2079         void *strtab;
2080         u64 reg;
2081         u32 size;
2082         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2083
2084         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2085         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2086                                      GFP_KERNEL | __GFP_ZERO);
2087         if (!strtab) {
2088                 dev_err(smmu->dev,
2089                         "failed to allocate linear stream table (%u bytes)\n",
2090                         size);
2091                 return -ENOMEM;
2092         }
2093         cfg->strtab = strtab;
2094         cfg->num_l1_ents = 1 << smmu->sid_bits;
2095
2096         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2097         reg  = STRTAB_BASE_CFG_FMT_LINEAR;
2098         reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2099                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2100         cfg->strtab_base_cfg = reg;
2101
2102         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2103         return 0;
2104 }
2105
2106 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2107 {
2108         u64 reg;
2109         int ret;
2110
2111         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2112                 ret = arm_smmu_init_strtab_2lvl(smmu);
2113         else
2114                 ret = arm_smmu_init_strtab_linear(smmu);
2115
2116         if (ret)
2117                 return ret;
2118
2119         /* Set the strtab base address */
2120         reg  = smmu->strtab_cfg.strtab_dma &
2121                STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2122         reg |= STRTAB_BASE_RA;
2123         smmu->strtab_cfg.strtab_base = reg;
2124
2125         /* Allocate the first VMID for stage-2 bypass STEs */
2126         set_bit(0, smmu->vmid_map);
2127         return 0;
2128 }
2129
2130 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2131 {
2132         int ret;
2133
2134         ret = arm_smmu_init_queues(smmu);
2135         if (ret)
2136                 return ret;
2137
2138         return arm_smmu_init_strtab(smmu);
2139 }
2140
2141 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2142                                    unsigned int reg_off, unsigned int ack_off)
2143 {
2144         u32 reg;
2145
2146         writel_relaxed(val, smmu->base + reg_off);
2147         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2148                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2149 }
2150
2151 static void arm_smmu_free_msis(void *data)
2152 {
2153         struct device *dev = data;
2154         platform_msi_domain_free_irqs(dev);
2155 }
2156
2157 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2158 {
2159         phys_addr_t doorbell;
2160         struct device *dev = msi_desc_to_dev(desc);
2161         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2162         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2163
2164         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2165         doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
2166
2167         writeq_relaxed(doorbell, smmu->base + cfg[0]);
2168         writel_relaxed(msg->data, smmu->base + cfg[1]);
2169         writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2170 }
2171
2172 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2173 {
2174         struct msi_desc *desc;
2175         int ret, nvec = ARM_SMMU_MAX_MSIS;
2176         struct device *dev = smmu->dev;
2177
2178         /* Clear the MSI address regs */
2179         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2180         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2181
2182         if (smmu->features & ARM_SMMU_FEAT_PRI)
2183                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2184         else
2185                 nvec--;
2186
2187         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2188                 return;
2189
2190         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2191         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2192         if (ret) {
2193                 dev_warn(dev, "failed to allocate MSIs\n");
2194                 return;
2195         }
2196
2197         for_each_msi_entry(desc, dev) {
2198                 switch (desc->platform.msi_index) {
2199                 case EVTQ_MSI_INDEX:
2200                         smmu->evtq.q.irq = desc->irq;
2201                         break;
2202                 case GERROR_MSI_INDEX:
2203                         smmu->gerr_irq = desc->irq;
2204                         break;
2205                 case PRIQ_MSI_INDEX:
2206                         smmu->priq.q.irq = desc->irq;
2207                         break;
2208                 default:        /* Unknown */
2209                         continue;
2210                 }
2211         }
2212
2213         /* Add callback to free MSIs on teardown */
2214         devm_add_action(dev, arm_smmu_free_msis, dev);
2215 }
2216
2217 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2218 {
2219         int ret, irq;
2220         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2221
2222         /* Disable IRQs first */
2223         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2224                                       ARM_SMMU_IRQ_CTRLACK);
2225         if (ret) {
2226                 dev_err(smmu->dev, "failed to disable irqs\n");
2227                 return ret;
2228         }
2229
2230         arm_smmu_setup_msis(smmu);
2231
2232         /* Request interrupt lines */
2233         irq = smmu->evtq.q.irq;
2234         if (irq) {
2235                 ret = devm_request_threaded_irq(smmu->dev, irq,
2236                                                 arm_smmu_evtq_handler,
2237                                                 arm_smmu_evtq_thread,
2238                                                 0, "arm-smmu-v3-evtq", smmu);
2239                 if (ret < 0)
2240                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2241         }
2242
2243         irq = smmu->cmdq.q.irq;
2244         if (irq) {
2245                 ret = devm_request_irq(smmu->dev, irq,
2246                                        arm_smmu_cmdq_sync_handler, 0,
2247                                        "arm-smmu-v3-cmdq-sync", smmu);
2248                 if (ret < 0)
2249                         dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2250         }
2251
2252         irq = smmu->gerr_irq;
2253         if (irq) {
2254                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2255                                        0, "arm-smmu-v3-gerror", smmu);
2256                 if (ret < 0)
2257                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2258         }
2259
2260         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2261                 irq = smmu->priq.q.irq;
2262                 if (irq) {
2263                         ret = devm_request_threaded_irq(smmu->dev, irq,
2264                                                         arm_smmu_priq_handler,
2265                                                         arm_smmu_priq_thread,
2266                                                         0, "arm-smmu-v3-priq",
2267                                                         smmu);
2268                         if (ret < 0)
2269                                 dev_warn(smmu->dev,
2270                                          "failed to enable priq irq\n");
2271                         else
2272                                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2273                 }
2274         }
2275
2276         /* Enable interrupt generation on the SMMU */
2277         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2278                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2279         if (ret)
2280                 dev_warn(smmu->dev, "failed to enable irqs\n");
2281
2282         return 0;
2283 }
2284
2285 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2286 {
2287         int ret;
2288
2289         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2290         if (ret)
2291                 dev_err(smmu->dev, "failed to clear cr0\n");
2292
2293         return ret;
2294 }
2295
2296 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
2297 {
2298         int ret;
2299         u32 reg, enables;
2300         struct arm_smmu_cmdq_ent cmd;
2301
2302         /* Clear CR0 and sync (disables SMMU and queue processing) */
2303         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2304         if (reg & CR0_SMMUEN)
2305                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2306
2307         ret = arm_smmu_device_disable(smmu);
2308         if (ret)
2309                 return ret;
2310
2311         /* CR1 (table and queue memory attributes) */
2312         reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2313               (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2314               (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2315               (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2316               (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2317               (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2318         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2319
2320         /* CR2 (random crap) */
2321         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2322         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2323
2324         /* Stream table */
2325         writeq_relaxed(smmu->strtab_cfg.strtab_base,
2326                        smmu->base + ARM_SMMU_STRTAB_BASE);
2327         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2328                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2329
2330         /* Command queue */
2331         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2332         writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2333         writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2334
2335         enables = CR0_CMDQEN;
2336         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2337                                       ARM_SMMU_CR0ACK);
2338         if (ret) {
2339                 dev_err(smmu->dev, "failed to enable command queue\n");
2340                 return ret;
2341         }
2342
2343         /* Invalidate any cached configuration */
2344         cmd.opcode = CMDQ_OP_CFGI_ALL;
2345         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2346         cmd.opcode = CMDQ_OP_CMD_SYNC;
2347         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2348
2349         /* Invalidate any stale TLB entries */
2350         if (smmu->features & ARM_SMMU_FEAT_HYP) {
2351                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2352                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2353         }
2354
2355         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2356         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2357         cmd.opcode = CMDQ_OP_CMD_SYNC;
2358         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2359
2360         /* Event queue */
2361         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2362         writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2363         writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2364
2365         enables |= CR0_EVTQEN;
2366         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2367                                       ARM_SMMU_CR0ACK);
2368         if (ret) {
2369                 dev_err(smmu->dev, "failed to enable event queue\n");
2370                 return ret;
2371         }
2372
2373         /* PRI queue */
2374         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2375                 writeq_relaxed(smmu->priq.q.q_base,
2376                                smmu->base + ARM_SMMU_PRIQ_BASE);
2377                 writel_relaxed(smmu->priq.q.prod,
2378                                smmu->base + ARM_SMMU_PRIQ_PROD);
2379                 writel_relaxed(smmu->priq.q.cons,
2380                                smmu->base + ARM_SMMU_PRIQ_CONS);
2381
2382                 enables |= CR0_PRIQEN;
2383                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2384                                               ARM_SMMU_CR0ACK);
2385                 if (ret) {
2386                         dev_err(smmu->dev, "failed to enable PRI queue\n");
2387                         return ret;
2388                 }
2389         }
2390
2391         ret = arm_smmu_setup_irqs(smmu);
2392         if (ret) {
2393                 dev_err(smmu->dev, "failed to setup irqs\n");
2394                 return ret;
2395         }
2396
2397         /* Enable the SMMU interface */
2398         enables |= CR0_SMMUEN;
2399         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2400                                       ARM_SMMU_CR0ACK);
2401         if (ret) {
2402                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2403                 return ret;
2404         }
2405
2406         return 0;
2407 }
2408
2409 static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2410 {
2411         u32 reg;
2412         bool coherent;
2413
2414         /* IDR0 */
2415         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2416
2417         /* 2-level structures */
2418         if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2419                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2420
2421         if (reg & IDR0_CD2L)
2422                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2423
2424         /*
2425          * Translation table endianness.
2426          * We currently require the same endianness as the CPU, but this
2427          * could be changed later by adding a new IO_PGTABLE_QUIRK.
2428          */
2429         switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2430         case IDR0_TTENDIAN_MIXED:
2431                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2432                 break;
2433 #ifdef __BIG_ENDIAN
2434         case IDR0_TTENDIAN_BE:
2435                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2436                 break;
2437 #else
2438         case IDR0_TTENDIAN_LE:
2439                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2440                 break;
2441 #endif
2442         default:
2443                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2444                 return -ENXIO;
2445         }
2446
2447         /* Boolean feature flags */
2448         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2449                 smmu->features |= ARM_SMMU_FEAT_PRI;
2450
2451         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2452                 smmu->features |= ARM_SMMU_FEAT_ATS;
2453
2454         if (reg & IDR0_SEV)
2455                 smmu->features |= ARM_SMMU_FEAT_SEV;
2456
2457         if (reg & IDR0_MSI)
2458                 smmu->features |= ARM_SMMU_FEAT_MSI;
2459
2460         if (reg & IDR0_HYP)
2461                 smmu->features |= ARM_SMMU_FEAT_HYP;
2462
2463         /*
2464          * The dma-coherent property is used in preference to the ID
2465          * register, but warn on mismatch.
2466          */
2467         coherent = of_dma_is_coherent(smmu->dev->of_node);
2468         if (coherent)
2469                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2470
2471         if (!!(reg & IDR0_COHACC) != coherent)
2472                 dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2473                          coherent ? "true" : "false");
2474
2475         switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
2476         case IDR0_STALL_MODEL_STALL:
2477                 /* Fallthrough */
2478         case IDR0_STALL_MODEL_FORCE:
2479                 smmu->features |= ARM_SMMU_FEAT_STALLS;
2480         }
2481
2482         if (reg & IDR0_S1P)
2483                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2484
2485         if (reg & IDR0_S2P)
2486                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2487
2488         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2489                 dev_err(smmu->dev, "no translation support!\n");
2490                 return -ENXIO;
2491         }
2492
2493         /* We only support the AArch64 table format at present */
2494         switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
2495         case IDR0_TTF_AARCH32_64:
2496                 smmu->ias = 40;
2497                 /* Fallthrough */
2498         case IDR0_TTF_AARCH64:
2499                 break;
2500         default:
2501                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2502                 return -ENXIO;
2503         }
2504
2505         /* ASID/VMID sizes */
2506         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2507         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2508
2509         /* IDR1 */
2510         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2511         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2512                 dev_err(smmu->dev, "embedded implementation not supported\n");
2513                 return -ENXIO;
2514         }
2515
2516         /* Queue sizes, capped at 4k */
2517         smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2518                                        reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2519         if (!smmu->cmdq.q.max_n_shift) {
2520                 /* Odd alignment restrictions on the base, so ignore for now */
2521                 dev_err(smmu->dev, "unit-length command queue not supported\n");
2522                 return -ENXIO;
2523         }
2524
2525         smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2526                                        reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2527         smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2528                                        reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2529
2530         /* SID/SSID sizes */
2531         smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2532         smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2533
2534         /* IDR5 */
2535         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2536
2537         /* Maximum number of outstanding stalls */
2538         smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2539                                 & IDR5_STALL_MAX_MASK;
2540
2541         /* Page sizes */
2542         if (reg & IDR5_GRAN64K)
2543                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2544         if (reg & IDR5_GRAN16K)
2545                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2546         if (reg & IDR5_GRAN4K)
2547                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2548
2549         if (arm_smmu_ops.pgsize_bitmap == -1UL)
2550                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2551         else
2552                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2553
2554         /* Output address size */
2555         switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2556         case IDR5_OAS_32_BIT:
2557                 smmu->oas = 32;
2558                 break;
2559         case IDR5_OAS_36_BIT:
2560                 smmu->oas = 36;
2561                 break;
2562         case IDR5_OAS_40_BIT:
2563                 smmu->oas = 40;
2564                 break;
2565         case IDR5_OAS_42_BIT:
2566                 smmu->oas = 42;
2567                 break;
2568         case IDR5_OAS_44_BIT:
2569                 smmu->oas = 44;
2570                 break;
2571         default:
2572                 dev_info(smmu->dev,
2573                         "unknown output address size. Truncating to 48-bit\n");
2574                 /* Fallthrough */
2575         case IDR5_OAS_48_BIT:
2576                 smmu->oas = 48;
2577         }
2578
2579         /* Set the DMA mask for our table walker */
2580         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2581                 dev_warn(smmu->dev,
2582                          "failed to set DMA mask for table walker\n");
2583
2584         smmu->ias = max(smmu->ias, smmu->oas);
2585
2586         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2587                  smmu->ias, smmu->oas, smmu->features);
2588         return 0;
2589 }
2590
2591 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2592 {
2593         int irq, ret;
2594         struct resource *res;
2595         struct arm_smmu_device *smmu;
2596         struct device *dev = &pdev->dev;
2597
2598         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2599         if (!smmu) {
2600                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2601                 return -ENOMEM;
2602         }
2603         smmu->dev = dev;
2604
2605         /* Base address */
2606         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2607         if (resource_size(res) + 1 < SZ_128K) {
2608                 dev_err(dev, "MMIO region too small (%pr)\n", res);
2609                 return -EINVAL;
2610         }
2611
2612         smmu->base = devm_ioremap_resource(dev, res);
2613         if (IS_ERR(smmu->base))
2614                 return PTR_ERR(smmu->base);
2615
2616         /* Interrupt lines */
2617         irq = platform_get_irq_byname(pdev, "eventq");
2618         if (irq > 0)
2619                 smmu->evtq.q.irq = irq;
2620
2621         irq = platform_get_irq_byname(pdev, "priq");
2622         if (irq > 0)
2623                 smmu->priq.q.irq = irq;
2624
2625         irq = platform_get_irq_byname(pdev, "cmdq-sync");
2626         if (irq > 0)
2627                 smmu->cmdq.q.irq = irq;
2628
2629         irq = platform_get_irq_byname(pdev, "gerror");
2630         if (irq > 0)
2631                 smmu->gerr_irq = irq;
2632
2633         parse_driver_options(smmu);
2634
2635         /* Probe the h/w */
2636         ret = arm_smmu_device_probe(smmu);
2637         if (ret)
2638                 return ret;
2639
2640         /* Initialise in-memory data structures */
2641         ret = arm_smmu_init_structures(smmu);
2642         if (ret)
2643                 return ret;
2644
2645         /* Record our private device structure */
2646         platform_set_drvdata(pdev, smmu);
2647
2648         /* Reset the device */
2649         return arm_smmu_device_reset(smmu);
2650 }
2651
2652 static int arm_smmu_device_remove(struct platform_device *pdev)
2653 {
2654         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2655
2656         arm_smmu_device_disable(smmu);
2657         return 0;
2658 }
2659
2660 static struct of_device_id arm_smmu_of_match[] = {
2661         { .compatible = "arm,smmu-v3", },
2662         { },
2663 };
2664 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2665
2666 static struct platform_driver arm_smmu_driver = {
2667         .driver = {
2668                 .name           = "arm-smmu-v3",
2669                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2670         },
2671         .probe  = arm_smmu_device_dt_probe,
2672         .remove = arm_smmu_device_remove,
2673 };
2674
2675 static int __init arm_smmu_init(void)
2676 {
2677         struct device_node *np;
2678         int ret;
2679
2680         np = of_find_matching_node(NULL, arm_smmu_of_match);
2681         if (!np)
2682                 return 0;
2683
2684         of_node_put(np);
2685
2686         ret = platform_driver_register(&arm_smmu_driver);
2687         if (ret)
2688                 return ret;
2689
2690         pci_request_acs();
2691
2692         return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2693 }
2694
2695 static void __exit arm_smmu_exit(void)
2696 {
2697         return platform_driver_unregister(&arm_smmu_driver);
2698 }
2699
2700 subsys_initcall(arm_smmu_init);
2701 module_exit(arm_smmu_exit);
2702
2703 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2704 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2705 MODULE_LICENSE("GPL v2");