1 // SPDX-License-Identifier: GPL-2.0
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
31 #include <linux/amba/bus.h>
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 "Disable MSI-based polling for CMD_SYNC completion.");
46 enum arm_smmu_msi_index {
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
55 ARM_SMMU_EVTQ_IRQ_CFG0,
56 ARM_SMMU_EVTQ_IRQ_CFG1,
57 ARM_SMMU_EVTQ_IRQ_CFG2,
59 [GERROR_MSI_INDEX] = {
60 ARM_SMMU_GERROR_IRQ_CFG0,
61 ARM_SMMU_GERROR_IRQ_CFG1,
62 ARM_SMMU_GERROR_IRQ_CFG2,
65 ARM_SMMU_PRIQ_IRQ_CFG0,
66 ARM_SMMU_PRIQ_IRQ_CFG1,
67 ARM_SMMU_PRIQ_IRQ_CFG2,
71 struct arm_smmu_option_prop {
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
80 * Special value used by SVA when a process dies, to quiesce a CD without
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
91 static void parse_driver_options(struct arm_smmu_device *smmu)
96 if (of_property_read_bool(smmu->dev->of_node,
97 arm_smmu_options[i].prop)) {
98 smmu->options |= arm_smmu_options[i].opt;
99 dev_notice(smmu->dev, "option %s\n",
100 arm_smmu_options[i].prop);
102 } while (arm_smmu_options[++i].opt);
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
108 u32 space, prod, cons;
110 prod = Q_IDX(q, q->prod);
111 cons = Q_IDX(q, q->cons);
113 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 space = (1 << q->max_n_shift) - (prod - cons);
121 static bool queue_full(struct arm_smmu_ll_queue *q)
123 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
129 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
135 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
144 * Ensure that all CPU accesses (reads and writes) to the queue
145 * are complete before we update the cons pointer.
148 writel_relaxed(q->llq.cons, q->cons_reg);
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
153 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
163 * We can't use the _relaxed() variant here, as we must prevent
164 * speculative reads of the queue before we have determined that
165 * prod has indeed moved.
167 prod = readl(q->prod_reg);
169 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
178 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183 struct arm_smmu_queue_poll *qp)
187 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
193 if (ktime_compare(ktime_get(), qp->timeout) > 0)
198 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
213 for (i = 0; i < n_dwords; ++i)
214 *dst++ = cpu_to_le64(*src++);
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
221 for (i = 0; i < n_dwords; ++i)
222 *dst++ = le64_to_cpu(*src++);
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
227 if (queue_empty(&q->llq))
230 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231 queue_inc_cons(&q->llq);
232 queue_sync_cons_out(q);
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
239 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
242 switch (ent->opcode) {
243 case CMDQ_OP_TLBI_EL2_ALL:
244 case CMDQ_OP_TLBI_NSNH_ALL:
246 case CMDQ_OP_PREFETCH_CFG:
247 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
249 case CMDQ_OP_CFGI_CD:
250 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
252 case CMDQ_OP_CFGI_STE:
253 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
256 case CMDQ_OP_CFGI_CD_ALL:
257 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
259 case CMDQ_OP_CFGI_ALL:
260 /* Cover the entire SID range */
261 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
263 case CMDQ_OP_TLBI_NH_VA:
264 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
266 case CMDQ_OP_TLBI_EL2_VA:
267 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
275 case CMDQ_OP_TLBI_S2_IPA:
276 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
284 case CMDQ_OP_TLBI_NH_ASID:
285 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
287 case CMDQ_OP_TLBI_S12_VMALL:
288 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
290 case CMDQ_OP_TLBI_EL2_ASID:
291 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
293 case CMDQ_OP_ATC_INV:
294 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
301 case CMDQ_OP_PRI_RESP:
302 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306 switch (ent->pri.resp) {
314 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
317 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
318 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
319 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
321 case CMDQ_OP_CMD_SYNC:
322 if (ent->sync.msiaddr) {
323 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
326 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
328 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
338 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
341 struct arm_smmu_queue *q = &smmu->cmdq.q;
342 struct arm_smmu_cmdq_ent ent = {
343 .opcode = CMDQ_OP_CMD_SYNC,
347 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
348 * payload, so the write will zero the entire command on that platform.
350 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
351 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
355 arm_smmu_cmdq_build_cmd(cmd, &ent);
358 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
360 static const char * const cerror_str[] = {
361 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
362 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
363 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
364 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
368 u64 cmd[CMDQ_ENT_DWORDS];
369 struct arm_smmu_queue *q = &smmu->cmdq.q;
370 u32 cons = readl_relaxed(q->cons_reg);
371 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372 struct arm_smmu_cmdq_ent cmd_sync = {
373 .opcode = CMDQ_OP_CMD_SYNC,
376 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
377 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
380 case CMDQ_ERR_CERROR_ABT_IDX:
381 dev_err(smmu->dev, "retrying command fetch\n");
382 case CMDQ_ERR_CERROR_NONE_IDX:
384 case CMDQ_ERR_CERROR_ATC_INV_IDX:
386 * ATC Invalidation Completion timeout. CONS is still pointing
387 * at the CMD_SYNC. Attempt to complete other pending commands
388 * by repeating the CMD_SYNC, though we might well end up back
389 * here since the ATC invalidation may still be pending.
392 case CMDQ_ERR_CERROR_ILL_IDX:
398 * We may have concurrent producers, so we need to be careful
399 * not to touch any of the shadow cmdq state.
401 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
402 dev_err(smmu->dev, "skipping command in error state:\n");
403 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
404 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
406 /* Convert the erroneous command into a CMD_SYNC */
407 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
408 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
412 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
416 * Command queue locking.
417 * This is a form of bastardised rwlock with the following major changes:
419 * - The only LOCK routines are exclusive_trylock() and shared_lock().
420 * Neither have barrier semantics, and instead provide only a control
423 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
424 * fails if the caller appears to be the last lock holder (yes, this is
425 * racy). All successful UNLOCK routines have RELEASE semantics.
427 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
432 * We can try to avoid the cmpxchg() loop by simply incrementing the
433 * lock counter. When held in exclusive state, the lock counter is set
434 * to INT_MIN so these increments won't hurt as the value will remain
437 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
441 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
442 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
445 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
447 (void)atomic_dec_return_release(&cmdq->lock);
450 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
452 if (atomic_read(&cmdq->lock) == 1)
455 arm_smmu_cmdq_shared_unlock(cmdq);
459 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
462 local_irq_save(flags); \
463 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
465 local_irq_restore(flags); \
469 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
471 atomic_set_release(&cmdq->lock, 0); \
472 local_irq_restore(flags); \
477 * Command queue insertion.
478 * This is made fiddly by our attempts to achieve some sort of scalability
479 * since there is one queue shared amongst all of the CPUs in the system. If
480 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
481 * then you'll *love* this monstrosity.
483 * The basic idea is to split the queue up into ranges of commands that are
484 * owned by a given CPU; the owner may not have written all of the commands
485 * itself, but is responsible for advancing the hardware prod pointer when
486 * the time comes. The algorithm is roughly:
488 * 1. Allocate some space in the queue. At this point we also discover
489 * whether the head of the queue is currently owned by another CPU,
490 * or whether we are the owner.
492 * 2. Write our commands into our allocated slots in the queue.
494 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
496 * 4. If we are an owner:
497 * a. Wait for the previous owner to finish.
498 * b. Mark the queue head as unowned, which tells us the range
499 * that we are responsible for publishing.
500 * c. Wait for all commands in our owned range to become valid.
501 * d. Advance the hardware prod pointer.
502 * e. Tell the next owner we've finished.
504 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
505 * owner), then we need to stick around until it has completed:
506 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
507 * to clear the first 4 bytes.
508 * b. Otherwise, we spin waiting for the hardware cons pointer to
509 * advance past our command.
511 * The devil is in the details, particularly the use of locking for handling
512 * SYNC completion and freeing up space in the queue before we think that it is
515 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
516 u32 sprod, u32 eprod, bool set)
518 u32 swidx, sbidx, ewidx, ebidx;
519 struct arm_smmu_ll_queue llq = {
520 .max_n_shift = cmdq->q.llq.max_n_shift,
524 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
525 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
527 while (llq.prod != eprod) {
530 u32 limit = BITS_PER_LONG;
532 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
533 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
535 ptr = &cmdq->valid_map[swidx];
537 if ((swidx == ewidx) && (sbidx < ebidx))
540 mask = GENMASK(limit - 1, sbidx);
543 * The valid bit is the inverse of the wrap bit. This means
544 * that a zero-initialised queue is invalid and, after marking
545 * all entries as valid, they become invalid again when we
549 atomic_long_xor(mask, ptr);
553 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
554 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
557 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
561 /* Mark all entries in the range [sprod, eprod) as valid */
562 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
563 u32 sprod, u32 eprod)
565 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
568 /* Wait for all entries in the range [sprod, eprod) to become valid */
569 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
570 u32 sprod, u32 eprod)
572 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
575 /* Wait for the command queue to become non-full */
576 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
577 struct arm_smmu_ll_queue *llq)
580 struct arm_smmu_queue_poll qp;
581 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
585 * Try to update our copy of cons by grabbing exclusive cmdq access. If
586 * that fails, spin until somebody else updates it for us.
588 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
589 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
590 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
591 llq->val = READ_ONCE(cmdq->q.llq.val);
595 queue_poll_init(smmu, &qp);
597 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
598 if (!queue_full(llq))
601 ret = queue_poll(&qp);
608 * Wait until the SMMU signals a CMD_SYNC completion MSI.
609 * Must be called with the cmdq lock held in some capacity.
611 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
612 struct arm_smmu_ll_queue *llq)
615 struct arm_smmu_queue_poll qp;
616 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
617 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
619 queue_poll_init(smmu, &qp);
622 * The MSI won't generate an event, since it's being written back
623 * into the command queue.
626 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
627 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
632 * Wait until the SMMU cons index passes llq->prod.
633 * Must be called with the cmdq lock held in some capacity.
635 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
636 struct arm_smmu_ll_queue *llq)
638 struct arm_smmu_queue_poll qp;
639 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
640 u32 prod = llq->prod;
643 queue_poll_init(smmu, &qp);
644 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
646 if (queue_consumed(llq, prod))
649 ret = queue_poll(&qp);
652 * This needs to be a readl() so that our subsequent call
653 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
655 * Specifically, we need to ensure that we observe all
656 * shared_lock()s by other CMD_SYNCs that share our owner,
657 * so that a failing call to tryunlock() means that we're
658 * the last one out and therefore we can safely advance
659 * cmdq->q.llq.cons. Roughly speaking:
661 * CPU 0 CPU1 CPU2 (us)
671 * <control dependency>
677 * Requires us to see CPU 0's shared_lock() acquisition.
679 llq->cons = readl(cmdq->q.cons_reg);
685 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
686 struct arm_smmu_ll_queue *llq)
688 if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
689 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
691 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
694 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
698 struct arm_smmu_ll_queue llq = {
699 .max_n_shift = cmdq->q.llq.max_n_shift,
703 for (i = 0; i < n; ++i) {
704 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
706 prod = queue_inc_prod_n(&llq, i);
707 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
712 * This is the actual insertion function, and provides the following
713 * ordering guarantees to callers:
715 * - There is a dma_wmb() before publishing any commands to the queue.
716 * This can be relied upon to order prior writes to data structures
717 * in memory (such as a CD or an STE) before the command.
719 * - On completion of a CMD_SYNC, there is a control dependency.
720 * This can be relied upon to order subsequent writes to memory (e.g.
721 * freeing an IOVA) after completion of the CMD_SYNC.
723 * - Command insertion is totally ordered, so if two CPUs each race to
724 * insert their own list of commands then all of the commands from one
725 * CPU will appear before any of the commands from the other CPU.
727 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
728 u64 *cmds, int n, bool sync)
730 u64 cmd_sync[CMDQ_ENT_DWORDS];
734 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
735 struct arm_smmu_ll_queue llq = {
736 .max_n_shift = cmdq->q.llq.max_n_shift,
740 /* 1. Allocate some space in the queue */
741 local_irq_save(flags);
742 llq.val = READ_ONCE(cmdq->q.llq.val);
746 while (!queue_has_space(&llq, n + sync)) {
747 local_irq_restore(flags);
748 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
749 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
750 local_irq_save(flags);
753 head.cons = llq.cons;
754 head.prod = queue_inc_prod_n(&llq, n + sync) |
755 CMDQ_PROD_OWNED_FLAG;
757 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
763 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
764 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
765 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
768 * 2. Write our commands into the queue
769 * Dependency ordering from the cmpxchg() loop above.
771 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
773 prod = queue_inc_prod_n(&llq, n);
774 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
775 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
778 * In order to determine completion of our CMD_SYNC, we must
779 * ensure that the queue can't wrap twice without us noticing.
780 * We achieve that by taking the cmdq lock as shared before
781 * marking our slot as valid.
783 arm_smmu_cmdq_shared_lock(cmdq);
786 /* 3. Mark our slots as valid, ensuring commands are visible first */
788 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
790 /* 4. If we are the owner, take control of the SMMU hardware */
792 /* a. Wait for previous owner to finish */
793 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
795 /* b. Stop gathering work by clearing the owned flag */
796 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
797 &cmdq->q.llq.atomic.prod);
798 prod &= ~CMDQ_PROD_OWNED_FLAG;
801 * c. Wait for any gathered work to be written to the queue.
802 * Note that we read our own entries so that we have the control
803 * dependency required by (d).
805 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
808 * d. Advance the hardware prod pointer
809 * Control dependency ordering from the entries becoming valid.
811 writel_relaxed(prod, cmdq->q.prod_reg);
814 * e. Tell the next owner we're done
815 * Make sure we've updated the hardware first, so that we don't
816 * race to update prod and potentially move it backwards.
818 atomic_set_release(&cmdq->owner_prod, prod);
821 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
823 llq.prod = queue_inc_prod_n(&llq, n);
824 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
826 dev_err_ratelimited(smmu->dev,
827 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
829 readl_relaxed(cmdq->q.prod_reg),
830 readl_relaxed(cmdq->q.cons_reg));
834 * Try to unlock the cmdq lock. This will fail if we're the last
835 * reader, in which case we can safely update cmdq->q.llq.cons
837 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
838 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
839 arm_smmu_cmdq_shared_unlock(cmdq);
843 local_irq_restore(flags);
847 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
848 struct arm_smmu_cmdq_ent *ent)
850 u64 cmd[CMDQ_ENT_DWORDS];
852 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
853 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
858 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
861 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
863 return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
866 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
867 struct arm_smmu_cmdq_batch *cmds,
868 struct arm_smmu_cmdq_ent *cmd)
870 if (cmds->num == CMDQ_BATCH_ENTRIES) {
871 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
874 arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
878 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
879 struct arm_smmu_cmdq_batch *cmds)
881 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
884 static int arm_smmu_page_response(struct device *dev,
885 struct iommu_fault_event *unused,
886 struct iommu_page_response *resp)
888 struct arm_smmu_cmdq_ent cmd = {0};
889 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
890 int sid = master->streams[0].id;
892 if (master->stall_enabled) {
893 cmd.opcode = CMDQ_OP_RESUME;
894 cmd.resume.sid = sid;
895 cmd.resume.stag = resp->grpid;
896 switch (resp->code) {
897 case IOMMU_PAGE_RESP_INVALID:
898 case IOMMU_PAGE_RESP_FAILURE:
899 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
901 case IOMMU_PAGE_RESP_SUCCESS:
902 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
911 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
913 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
914 * RESUME consumption guarantees that the stalled transaction will be
915 * terminated... at some point in the future. PRI_RESP is fire and
922 /* Context descriptor manipulation functions */
923 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
925 struct arm_smmu_cmdq_ent cmd = {
926 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
927 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
931 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
932 arm_smmu_cmdq_issue_sync(smmu);
935 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
940 struct arm_smmu_master *master;
941 struct arm_smmu_cmdq_batch cmds = {};
942 struct arm_smmu_device *smmu = smmu_domain->smmu;
943 struct arm_smmu_cmdq_ent cmd = {
944 .opcode = CMDQ_OP_CFGI_CD,
951 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
952 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
953 for (i = 0; i < master->num_streams; i++) {
954 cmd.cfgi.sid = master->streams[i].id;
955 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
958 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
960 arm_smmu_cmdq_batch_submit(smmu, &cmds);
963 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
964 struct arm_smmu_l1_ctx_desc *l1_desc)
966 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
968 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
969 &l1_desc->l2ptr_dma, GFP_KERNEL);
970 if (!l1_desc->l2ptr) {
972 "failed to allocate context descriptor table\n");
978 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
979 struct arm_smmu_l1_ctx_desc *l1_desc)
981 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
984 /* See comment in arm_smmu_write_ctx_desc() */
985 WRITE_ONCE(*dst, cpu_to_le64(val));
988 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
993 struct arm_smmu_l1_ctx_desc *l1_desc;
994 struct arm_smmu_device *smmu = smmu_domain->smmu;
995 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
997 if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
998 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1000 idx = ssid >> CTXDESC_SPLIT;
1001 l1_desc = &cdcfg->l1_desc[idx];
1002 if (!l1_desc->l2ptr) {
1003 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1006 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1007 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1008 /* An invalid L1CD can be cached */
1009 arm_smmu_sync_cd(smmu_domain, ssid, false);
1011 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1012 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1015 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1016 struct arm_smmu_ctx_desc *cd)
1019 * This function handles the following cases:
1021 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1022 * (2) Install a secondary CD, for SID+SSID traffic.
1023 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1024 * CD, then invalidate the old entry and mappings.
1025 * (4) Quiesce the context without clearing the valid bit. Disable
1026 * translation, and ignore any translation fault.
1027 * (5) Remove a secondary CD.
1033 if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1036 cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1040 val = le64_to_cpu(cdptr[0]);
1041 cd_live = !!(val & CTXDESC_CD_0_V);
1043 if (!cd) { /* (5) */
1045 } else if (cd == &quiet_cd) { /* (4) */
1046 val |= CTXDESC_CD_0_TCR_EPD0;
1047 } else if (cd_live) { /* (3) */
1048 val &= ~CTXDESC_CD_0_ASID;
1049 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1051 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1052 * this substream's traffic
1054 } else { /* (1) and (2) */
1055 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1057 cdptr[3] = cpu_to_le64(cd->mair);
1060 * STE is live, and the SMMU might read dwords of this CD in any
1061 * order. Ensure that it observes valid values before reading
1064 arm_smmu_sync_cd(smmu_domain, ssid, true);
1070 CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1071 (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1073 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1076 if (smmu_domain->stall_enabled)
1077 val |= CTXDESC_CD_0_S;
1081 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1082 * "Configuration structures and configuration invalidation completion"
1084 * The size of single-copy atomic reads made by the SMMU is
1085 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1086 * field within an aligned 64-bit span of a structure can be altered
1087 * without first making the structure invalid.
1089 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1090 arm_smmu_sync_cd(smmu_domain, ssid, true);
1094 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1098 size_t max_contexts;
1099 struct arm_smmu_device *smmu = smmu_domain->smmu;
1100 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1101 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1103 max_contexts = 1 << cfg->s1cdmax;
1105 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1106 max_contexts <= CTXDESC_L2_ENTRIES) {
1107 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1108 cdcfg->num_l1_ents = max_contexts;
1110 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1112 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1113 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1114 CTXDESC_L2_ENTRIES);
1116 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1117 sizeof(*cdcfg->l1_desc),
1119 if (!cdcfg->l1_desc)
1122 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1125 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1127 if (!cdcfg->cdtab) {
1128 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1136 if (cdcfg->l1_desc) {
1137 devm_kfree(smmu->dev, cdcfg->l1_desc);
1138 cdcfg->l1_desc = NULL;
1143 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1146 size_t size, l1size;
1147 struct arm_smmu_device *smmu = smmu_domain->smmu;
1148 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1150 if (cdcfg->l1_desc) {
1151 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1153 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1154 if (!cdcfg->l1_desc[i].l2ptr)
1157 dmam_free_coherent(smmu->dev, size,
1158 cdcfg->l1_desc[i].l2ptr,
1159 cdcfg->l1_desc[i].l2ptr_dma);
1161 devm_kfree(smmu->dev, cdcfg->l1_desc);
1162 cdcfg->l1_desc = NULL;
1164 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1166 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1169 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1170 cdcfg->cdtab_dma = 0;
1171 cdcfg->cdtab = NULL;
1174 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1177 struct arm_smmu_ctx_desc *old_cd;
1182 free = refcount_dec_and_test(&cd->refs);
1184 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1185 WARN_ON(old_cd != cd);
1190 /* Stream table manipulation functions */
1192 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1196 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1197 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1199 /* See comment in arm_smmu_write_ctx_desc() */
1200 WRITE_ONCE(*dst, cpu_to_le64(val));
1203 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1205 struct arm_smmu_cmdq_ent cmd = {
1206 .opcode = CMDQ_OP_CFGI_STE,
1213 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1214 arm_smmu_cmdq_issue_sync(smmu);
1217 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1221 * This is hideously complicated, but we only really care about
1222 * three cases at the moment:
1224 * 1. Invalid (all zero) -> bypass/fault (init)
1225 * 2. Bypass/fault -> translation/bypass (attach)
1226 * 3. Translation/bypass -> bypass/fault (detach)
1228 * Given that we can't update the STE atomically and the SMMU
1229 * doesn't read the thing in a defined order, that leaves us
1230 * with the following maintenance requirements:
1232 * 1. Update Config, return (init time STEs aren't live)
1233 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1234 * 3. Update Config, sync
1236 u64 val = le64_to_cpu(dst[0]);
1237 bool ste_live = false;
1238 struct arm_smmu_device *smmu = NULL;
1239 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1240 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1241 struct arm_smmu_domain *smmu_domain = NULL;
1242 struct arm_smmu_cmdq_ent prefetch_cmd = {
1243 .opcode = CMDQ_OP_PREFETCH_CFG,
1250 smmu_domain = master->domain;
1251 smmu = master->smmu;
1255 switch (smmu_domain->stage) {
1256 case ARM_SMMU_DOMAIN_S1:
1257 s1_cfg = &smmu_domain->s1_cfg;
1259 case ARM_SMMU_DOMAIN_S2:
1260 case ARM_SMMU_DOMAIN_NESTED:
1261 s2_cfg = &smmu_domain->s2_cfg;
1268 if (val & STRTAB_STE_0_V) {
1269 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1270 case STRTAB_STE_0_CFG_BYPASS:
1272 case STRTAB_STE_0_CFG_S1_TRANS:
1273 case STRTAB_STE_0_CFG_S2_TRANS:
1276 case STRTAB_STE_0_CFG_ABORT:
1277 BUG_ON(!disable_bypass);
1280 BUG(); /* STE corruption */
1284 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1285 val = STRTAB_STE_0_V;
1288 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1289 if (!smmu_domain && disable_bypass)
1290 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1292 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1294 dst[0] = cpu_to_le64(val);
1295 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1296 STRTAB_STE_1_SHCFG_INCOMING));
1297 dst[2] = 0; /* Nuke the VMID */
1299 * The SMMU can perform negative caching, so we must sync
1300 * the STE regardless of whether the old value was live.
1303 arm_smmu_sync_ste_for_sid(smmu, sid);
1308 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1309 STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1312 dst[1] = cpu_to_le64(
1313 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1314 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1315 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1316 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1317 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1319 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1320 !master->stall_enabled)
1321 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1323 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1324 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1325 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1326 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1331 dst[2] = cpu_to_le64(
1332 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1333 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1335 STRTAB_STE_2_S2ENDI |
1337 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1340 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1342 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1345 if (master->ats_enabled)
1346 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1347 STRTAB_STE_1_EATS_TRANS));
1349 arm_smmu_sync_ste_for_sid(smmu, sid);
1350 /* See comment in arm_smmu_write_ctx_desc() */
1351 WRITE_ONCE(dst[0], cpu_to_le64(val));
1352 arm_smmu_sync_ste_for_sid(smmu, sid);
1354 /* It's likely that we'll want to use the new STE soon */
1355 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1356 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1359 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1363 for (i = 0; i < nent; ++i) {
1364 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1365 strtab += STRTAB_STE_DWORDS;
1369 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1373 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1374 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1379 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1380 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1382 desc->span = STRTAB_SPLIT + 1;
1383 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1387 "failed to allocate l2 stream table for SID %u\n",
1392 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1393 arm_smmu_write_strtab_l1_desc(strtab, desc);
1397 static struct arm_smmu_master *
1398 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1400 struct rb_node *node;
1401 struct arm_smmu_stream *stream;
1403 lockdep_assert_held(&smmu->streams_mutex);
1405 node = smmu->streams.rb_node;
1407 stream = rb_entry(node, struct arm_smmu_stream, node);
1408 if (stream->id < sid)
1409 node = node->rb_right;
1410 else if (stream->id > sid)
1411 node = node->rb_left;
1413 return stream->master;
1419 /* IRQ and event handlers */
1420 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1425 struct arm_smmu_master *master;
1426 bool ssid_valid = evt[0] & EVTQ_0_SSV;
1427 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1428 struct iommu_fault_event fault_evt = { };
1429 struct iommu_fault *flt = &fault_evt.fault;
1431 switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1432 case EVT_ID_TRANSLATION_FAULT:
1433 reason = IOMMU_FAULT_REASON_PTE_FETCH;
1435 case EVT_ID_ADDR_SIZE_FAULT:
1436 reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1438 case EVT_ID_ACCESS_FAULT:
1439 reason = IOMMU_FAULT_REASON_ACCESS;
1441 case EVT_ID_PERMISSION_FAULT:
1442 reason = IOMMU_FAULT_REASON_PERMISSION;
1448 /* Stage-2 is always pinned at the moment */
1449 if (evt[1] & EVTQ_1_S2)
1452 if (evt[1] & EVTQ_1_RnW)
1453 perm |= IOMMU_FAULT_PERM_READ;
1455 perm |= IOMMU_FAULT_PERM_WRITE;
1457 if (evt[1] & EVTQ_1_InD)
1458 perm |= IOMMU_FAULT_PERM_EXEC;
1460 if (evt[1] & EVTQ_1_PnU)
1461 perm |= IOMMU_FAULT_PERM_PRIV;
1463 if (evt[1] & EVTQ_1_STALL) {
1464 flt->type = IOMMU_FAULT_PAGE_REQ;
1465 flt->prm = (struct iommu_fault_page_request) {
1466 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1467 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1469 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1473 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1474 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1477 flt->type = IOMMU_FAULT_DMA_UNRECOV;
1478 flt->event = (struct iommu_fault_unrecoverable) {
1480 .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1482 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1486 flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1487 flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1491 mutex_lock(&smmu->streams_mutex);
1492 master = arm_smmu_find_master(smmu, sid);
1498 ret = iommu_report_device_fault(master->dev, &fault_evt);
1499 if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1500 /* Nobody cared, abort the access */
1501 struct iommu_page_response resp = {
1502 .pasid = flt->prm.pasid,
1503 .grpid = flt->prm.grpid,
1504 .code = IOMMU_PAGE_RESP_FAILURE,
1506 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1510 mutex_unlock(&smmu->streams_mutex);
1514 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1517 struct arm_smmu_device *smmu = dev;
1518 struct arm_smmu_queue *q = &smmu->evtq.q;
1519 struct arm_smmu_ll_queue *llq = &q->llq;
1520 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1521 DEFAULT_RATELIMIT_BURST);
1522 u64 evt[EVTQ_ENT_DWORDS];
1525 while (!queue_remove_raw(q, evt)) {
1526 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1528 ret = arm_smmu_handle_evt(smmu, evt);
1529 if (!ret || !__ratelimit(&rs))
1532 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1533 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1534 dev_info(smmu->dev, "\t0x%016llx\n",
1535 (unsigned long long)evt[i]);
1540 * Not much we can do on overflow, so scream and pretend we're
1543 if (queue_sync_prod_in(q) == -EOVERFLOW)
1544 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1545 } while (!queue_empty(llq));
1547 /* Sync our overflow flag, as we believe we're up to speed */
1548 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1549 Q_IDX(llq, llq->cons);
1553 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1559 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1560 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1561 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1562 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1563 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1565 dev_info(smmu->dev, "unexpected PRI request received:\n");
1567 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1568 sid, ssid, grpid, last ? "L" : "",
1569 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1570 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1571 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1572 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1573 evt[1] & PRIQ_1_ADDR_MASK);
1576 struct arm_smmu_cmdq_ent cmd = {
1577 .opcode = CMDQ_OP_PRI_RESP,
1578 .substream_valid = ssv,
1583 .resp = PRI_RESP_DENY,
1587 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1591 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1593 struct arm_smmu_device *smmu = dev;
1594 struct arm_smmu_queue *q = &smmu->priq.q;
1595 struct arm_smmu_ll_queue *llq = &q->llq;
1596 u64 evt[PRIQ_ENT_DWORDS];
1599 while (!queue_remove_raw(q, evt))
1600 arm_smmu_handle_ppr(smmu, evt);
1602 if (queue_sync_prod_in(q) == -EOVERFLOW)
1603 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1604 } while (!queue_empty(llq));
1606 /* Sync our overflow flag, as we believe we're up to speed */
1607 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1608 Q_IDX(llq, llq->cons);
1609 queue_sync_cons_out(q);
1613 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1615 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1617 u32 gerror, gerrorn, active;
1618 struct arm_smmu_device *smmu = dev;
1620 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1621 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1623 active = gerror ^ gerrorn;
1624 if (!(active & GERROR_ERR_MASK))
1625 return IRQ_NONE; /* No errors pending */
1628 "unexpected global error reported (0x%08x), this could be serious\n",
1631 if (active & GERROR_SFM_ERR) {
1632 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1633 arm_smmu_device_disable(smmu);
1636 if (active & GERROR_MSI_GERROR_ABT_ERR)
1637 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1639 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1640 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1642 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1643 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1645 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1646 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1648 if (active & GERROR_PRIQ_ABT_ERR)
1649 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1651 if (active & GERROR_EVTQ_ABT_ERR)
1652 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1654 if (active & GERROR_CMDQ_ERR)
1655 arm_smmu_cmdq_skip_err(smmu);
1657 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1661 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1663 struct arm_smmu_device *smmu = dev;
1665 arm_smmu_evtq_thread(irq, dev);
1666 if (smmu->features & ARM_SMMU_FEAT_PRI)
1667 arm_smmu_priq_thread(irq, dev);
1672 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1674 arm_smmu_gerror_handler(irq, dev);
1675 return IRQ_WAKE_THREAD;
1679 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1680 struct arm_smmu_cmdq_ent *cmd)
1684 /* ATC invalidates are always on 4096-bytes pages */
1685 size_t inval_grain_shift = 12;
1686 unsigned long page_start, page_end;
1691 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1692 * prefix. In that case all ATC entries within the address range are
1693 * invalidated, including those that were requested with a PASID! There
1694 * is no way to invalidate only entries without PASID.
1696 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1697 * traffic), translation requests without PASID create ATC entries
1698 * without PASID, which must be invalidated with substream_valid clear.
1699 * This has the unpleasant side-effect of invalidating all PASID-tagged
1700 * ATC entries within the address range.
1702 *cmd = (struct arm_smmu_cmdq_ent) {
1703 .opcode = CMDQ_OP_ATC_INV,
1704 .substream_valid = !!ssid,
1709 cmd->atc.size = ATC_INV_SIZE_ALL;
1713 page_start = iova >> inval_grain_shift;
1714 page_end = (iova + size - 1) >> inval_grain_shift;
1717 * In an ATS Invalidate Request, the address must be aligned on the
1718 * range size, which must be a power of two number of page sizes. We
1719 * thus have to choose between grossly over-invalidating the region, or
1720 * splitting the invalidation into multiple commands. For simplicity
1721 * we'll go with the first solution, but should refine it in the future
1722 * if multiple commands are shown to be more efficient.
1724 * Find the smallest power of two that covers the range. The most
1725 * significant differing bit between the start and end addresses,
1726 * fls(start ^ end), indicates the required span. For example:
1728 * We want to invalidate pages [8; 11]. This is already the ideal range:
1729 * x = 0b1000 ^ 0b1011 = 0b11
1730 * span = 1 << fls(x) = 4
1732 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1733 * x = 0b0111 ^ 0b1010 = 0b1101
1734 * span = 1 << fls(x) = 16
1736 log2_span = fls_long(page_start ^ page_end);
1737 span_mask = (1ULL << log2_span) - 1;
1739 page_start &= ~span_mask;
1741 cmd->atc.addr = page_start << inval_grain_shift;
1742 cmd->atc.size = log2_span;
1745 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1748 struct arm_smmu_cmdq_ent cmd;
1750 arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1752 for (i = 0; i < master->num_streams; i++) {
1753 cmd.atc.sid = master->streams[i].id;
1754 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1757 return arm_smmu_cmdq_issue_sync(master->smmu);
1760 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1761 unsigned long iova, size_t size)
1764 unsigned long flags;
1765 struct arm_smmu_cmdq_ent cmd;
1766 struct arm_smmu_master *master;
1767 struct arm_smmu_cmdq_batch cmds = {};
1769 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1773 * Ensure that we've completed prior invalidation of the main TLBs
1774 * before we read 'nr_ats_masters' in case of a concurrent call to
1775 * arm_smmu_enable_ats():
1777 * // unmap() // arm_smmu_enable_ats()
1778 * TLBI+SYNC atomic_inc(&nr_ats_masters);
1780 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
1782 * Ensures that we always see the incremented 'nr_ats_masters' count if
1783 * ATS was enabled at the PCI device before completion of the TLBI.
1786 if (!atomic_read(&smmu_domain->nr_ats_masters))
1789 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1791 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1792 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1793 if (!master->ats_enabled)
1796 for (i = 0; i < master->num_streams; i++) {
1797 cmd.atc.sid = master->streams[i].id;
1798 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1801 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1803 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1806 /* IO_PGTABLE API */
1807 static void arm_smmu_tlb_inv_context(void *cookie)
1809 struct arm_smmu_domain *smmu_domain = cookie;
1810 struct arm_smmu_device *smmu = smmu_domain->smmu;
1811 struct arm_smmu_cmdq_ent cmd;
1814 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1815 * PTEs previously cleared by unmaps on the current CPU not yet visible
1816 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1817 * insertion to guarantee those are observed before the TLBI. Do be
1820 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1821 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1823 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1824 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1825 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1826 arm_smmu_cmdq_issue_sync(smmu);
1828 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1831 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1832 unsigned long iova, size_t size,
1834 struct arm_smmu_domain *smmu_domain)
1836 struct arm_smmu_device *smmu = smmu_domain->smmu;
1837 unsigned long end = iova + size, num_pages = 0, tg = 0;
1838 size_t inv_range = granule;
1839 struct arm_smmu_cmdq_batch cmds = {};
1844 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1845 /* Get the leaf page size */
1846 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1848 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1849 cmd->tlbi.tg = (tg - 10) / 2;
1851 /* Determine what level the granule is at */
1852 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1854 num_pages = size >> tg;
1857 while (iova < end) {
1858 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1860 * On each iteration of the loop, the range is 5 bits
1861 * worth of the aligned size remaining.
1862 * The range in pages is:
1864 * range = (num_pages & (0x1f << __ffs(num_pages)))
1866 unsigned long scale, num;
1868 /* Determine the power of 2 multiple number of pages */
1869 scale = __ffs(num_pages);
1870 cmd->tlbi.scale = scale;
1872 /* Determine how many chunks of 2^scale size we have */
1873 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1874 cmd->tlbi.num = num - 1;
1876 /* range is num * 2^scale * pgsize */
1877 inv_range = num << (scale + tg);
1879 /* Clear out the lower order bits for the next iteration */
1880 num_pages -= num << scale;
1883 cmd->tlbi.addr = iova;
1884 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1887 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1890 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1891 size_t granule, bool leaf,
1892 struct arm_smmu_domain *smmu_domain)
1894 struct arm_smmu_cmdq_ent cmd = {
1900 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1901 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1902 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1903 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1905 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1906 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1908 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1911 * Unfortunately, this can't be leaf-only since we may have
1912 * zapped an entire table.
1914 arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1917 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1918 size_t granule, bool leaf,
1919 struct arm_smmu_domain *smmu_domain)
1921 struct arm_smmu_cmdq_ent cmd = {
1922 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1923 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1930 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1933 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1934 unsigned long iova, size_t granule,
1937 struct arm_smmu_domain *smmu_domain = cookie;
1938 struct iommu_domain *domain = &smmu_domain->domain;
1940 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1943 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1944 size_t granule, void *cookie)
1946 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1949 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1950 .tlb_flush_all = arm_smmu_tlb_inv_context,
1951 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1952 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
1956 static bool arm_smmu_capable(enum iommu_cap cap)
1959 case IOMMU_CAP_CACHE_COHERENCY:
1961 case IOMMU_CAP_NOEXEC:
1968 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1970 struct arm_smmu_domain *smmu_domain;
1972 if (type != IOMMU_DOMAIN_UNMANAGED &&
1973 type != IOMMU_DOMAIN_DMA &&
1974 type != IOMMU_DOMAIN_IDENTITY)
1978 * Allocate the domain and initialise some of its data structures.
1979 * We can't really do anything meaningful until we've added a
1982 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1986 if (type == IOMMU_DOMAIN_DMA &&
1987 iommu_get_dma_cookie(&smmu_domain->domain)) {
1992 mutex_init(&smmu_domain->init_mutex);
1993 INIT_LIST_HEAD(&smmu_domain->devices);
1994 spin_lock_init(&smmu_domain->devices_lock);
1995 INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1997 return &smmu_domain->domain;
2000 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2002 int idx, size = 1 << span;
2005 idx = find_first_zero_bit(map, size);
2008 } while (test_and_set_bit(idx, map));
2013 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2015 clear_bit(idx, map);
2018 static void arm_smmu_domain_free(struct iommu_domain *domain)
2020 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2021 struct arm_smmu_device *smmu = smmu_domain->smmu;
2023 iommu_put_dma_cookie(domain);
2024 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2026 /* Free the CD and ASID, if we allocated them */
2027 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2028 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2030 /* Prevent SVA from touching the CD while we're freeing it */
2031 mutex_lock(&arm_smmu_asid_lock);
2032 if (cfg->cdcfg.cdtab)
2033 arm_smmu_free_cd_tables(smmu_domain);
2034 arm_smmu_free_asid(&cfg->cd);
2035 mutex_unlock(&arm_smmu_asid_lock);
2037 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2039 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2045 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2046 struct arm_smmu_master *master,
2047 struct io_pgtable_cfg *pgtbl_cfg)
2051 struct arm_smmu_device *smmu = smmu_domain->smmu;
2052 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2053 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2055 refcount_set(&cfg->cd.refs, 1);
2057 /* Prevent SVA from modifying the ASID until it is written to the CD */
2058 mutex_lock(&arm_smmu_asid_lock);
2059 ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2060 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2064 cfg->s1cdmax = master->ssid_bits;
2066 smmu_domain->stall_enabled = master->stall_enabled;
2068 ret = arm_smmu_alloc_cd_tables(smmu_domain);
2072 cfg->cd.asid = (u16)asid;
2073 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2074 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2075 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2076 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2077 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2078 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2079 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2080 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2081 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2084 * Note that this will end up calling arm_smmu_sync_cd() before
2085 * the master has been added to the devices list for this domain.
2086 * This isn't an issue because the STE hasn't been installed yet.
2088 ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2090 goto out_free_cd_tables;
2092 mutex_unlock(&arm_smmu_asid_lock);
2096 arm_smmu_free_cd_tables(smmu_domain);
2098 arm_smmu_free_asid(&cfg->cd);
2100 mutex_unlock(&arm_smmu_asid_lock);
2104 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2105 struct arm_smmu_master *master,
2106 struct io_pgtable_cfg *pgtbl_cfg)
2109 struct arm_smmu_device *smmu = smmu_domain->smmu;
2110 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2111 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2113 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2117 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2118 cfg->vmid = (u16)vmid;
2119 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2120 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2121 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2122 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2123 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2124 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2125 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2126 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2130 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2131 struct arm_smmu_master *master)
2134 unsigned long ias, oas;
2135 enum io_pgtable_fmt fmt;
2136 struct io_pgtable_cfg pgtbl_cfg;
2137 struct io_pgtable_ops *pgtbl_ops;
2138 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2139 struct arm_smmu_master *,
2140 struct io_pgtable_cfg *);
2141 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2142 struct arm_smmu_device *smmu = smmu_domain->smmu;
2144 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2145 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2149 /* Restrict the stage to what we can actually support */
2150 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2151 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2152 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2153 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2155 switch (smmu_domain->stage) {
2156 case ARM_SMMU_DOMAIN_S1:
2157 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2158 ias = min_t(unsigned long, ias, VA_BITS);
2160 fmt = ARM_64_LPAE_S1;
2161 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2163 case ARM_SMMU_DOMAIN_NESTED:
2164 case ARM_SMMU_DOMAIN_S2:
2167 fmt = ARM_64_LPAE_S2;
2168 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2174 pgtbl_cfg = (struct io_pgtable_cfg) {
2175 .pgsize_bitmap = smmu->pgsize_bitmap,
2178 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2179 .tlb = &arm_smmu_flush_ops,
2180 .iommu_dev = smmu->dev,
2183 if (!iommu_get_dma_strict(domain))
2184 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2186 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2190 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2191 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2192 domain->geometry.force_aperture = true;
2194 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2196 free_io_pgtable_ops(pgtbl_ops);
2200 smmu_domain->pgtbl_ops = pgtbl_ops;
2204 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2207 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2209 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2210 struct arm_smmu_strtab_l1_desc *l1_desc;
2213 /* Two-level walk */
2214 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2215 l1_desc = &cfg->l1_desc[idx];
2216 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2217 step = &l1_desc->l2ptr[idx];
2219 /* Simple linear lookup */
2220 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2226 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2229 struct arm_smmu_device *smmu = master->smmu;
2231 for (i = 0; i < master->num_streams; ++i) {
2232 u32 sid = master->streams[i].id;
2233 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2235 /* Bridged PCI devices may end up with duplicated IDs */
2236 for (j = 0; j < i; j++)
2237 if (master->streams[j].id == sid)
2242 arm_smmu_write_strtab_ent(master, sid, step);
2246 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2248 struct device *dev = master->dev;
2249 struct arm_smmu_device *smmu = master->smmu;
2250 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2252 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2255 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2258 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2261 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2264 struct pci_dev *pdev;
2265 struct arm_smmu_device *smmu = master->smmu;
2266 struct arm_smmu_domain *smmu_domain = master->domain;
2268 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2269 if (!master->ats_enabled)
2272 /* Smallest Translation Unit: log2 of the smallest supported granule */
2273 stu = __ffs(smmu->pgsize_bitmap);
2274 pdev = to_pci_dev(master->dev);
2276 atomic_inc(&smmu_domain->nr_ats_masters);
2277 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2278 if (pci_enable_ats(pdev, stu))
2279 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2282 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2284 struct arm_smmu_domain *smmu_domain = master->domain;
2286 if (!master->ats_enabled)
2289 pci_disable_ats(to_pci_dev(master->dev));
2291 * Ensure ATS is disabled at the endpoint before we issue the
2292 * ATC invalidation via the SMMU.
2295 arm_smmu_atc_inv_master(master);
2296 atomic_dec(&smmu_domain->nr_ats_masters);
2299 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2304 struct pci_dev *pdev;
2306 if (!dev_is_pci(master->dev))
2309 pdev = to_pci_dev(master->dev);
2311 features = pci_pasid_features(pdev);
2315 num_pasids = pci_max_pasids(pdev);
2316 if (num_pasids <= 0)
2319 ret = pci_enable_pasid(pdev, features);
2321 dev_err(&pdev->dev, "Failed to enable PASID\n");
2325 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2326 master->smmu->ssid_bits);
2330 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2332 struct pci_dev *pdev;
2334 if (!dev_is_pci(master->dev))
2337 pdev = to_pci_dev(master->dev);
2339 if (!pdev->pasid_enabled)
2342 master->ssid_bits = 0;
2343 pci_disable_pasid(pdev);
2346 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2348 unsigned long flags;
2349 struct arm_smmu_domain *smmu_domain = master->domain;
2354 arm_smmu_disable_ats(master);
2356 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2357 list_del(&master->domain_head);
2358 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2360 master->domain = NULL;
2361 master->ats_enabled = false;
2362 arm_smmu_install_ste_for_dev(master);
2365 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2368 unsigned long flags;
2369 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2370 struct arm_smmu_device *smmu;
2371 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2372 struct arm_smmu_master *master;
2377 master = dev_iommu_priv_get(dev);
2378 smmu = master->smmu;
2381 * Checking that SVA is disabled ensures that this device isn't bound to
2382 * any mm, and can be safely detached from its old domain. Bonds cannot
2383 * be removed concurrently since we're holding the group mutex.
2385 if (arm_smmu_master_sva_enabled(master)) {
2386 dev_err(dev, "cannot attach - SVA enabled\n");
2390 arm_smmu_detach_dev(master);
2392 mutex_lock(&smmu_domain->init_mutex);
2394 if (!smmu_domain->smmu) {
2395 smmu_domain->smmu = smmu;
2396 ret = arm_smmu_domain_finalise(domain, master);
2398 smmu_domain->smmu = NULL;
2401 } else if (smmu_domain->smmu != smmu) {
2403 "cannot attach to SMMU %s (upstream of %s)\n",
2404 dev_name(smmu_domain->smmu->dev),
2405 dev_name(smmu->dev));
2408 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2409 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2411 "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2412 smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2415 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2416 smmu_domain->stall_enabled != master->stall_enabled) {
2417 dev_err(dev, "cannot attach to stall-%s domain\n",
2418 smmu_domain->stall_enabled ? "enabled" : "disabled");
2423 master->domain = smmu_domain;
2425 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2426 master->ats_enabled = arm_smmu_ats_supported(master);
2428 arm_smmu_install_ste_for_dev(master);
2430 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2431 list_add(&master->domain_head, &smmu_domain->devices);
2432 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2434 arm_smmu_enable_ats(master);
2437 mutex_unlock(&smmu_domain->init_mutex);
2441 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2442 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2444 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2449 return ops->map(ops, iova, paddr, size, prot, gfp);
2452 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2453 size_t size, struct iommu_iotlb_gather *gather)
2455 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2456 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2461 return ops->unmap(ops, iova, size, gather);
2464 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2466 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2468 if (smmu_domain->smmu)
2469 arm_smmu_tlb_inv_context(smmu_domain);
2472 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2473 struct iommu_iotlb_gather *gather)
2475 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2477 if (!gather->pgsize)
2480 arm_smmu_tlb_inv_range_domain(gather->start,
2481 gather->end - gather->start + 1,
2482 gather->pgsize, true, smmu_domain);
2486 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2488 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2490 if (domain->type == IOMMU_DOMAIN_IDENTITY)
2496 return ops->iova_to_phys(ops, iova);
2499 static struct platform_driver arm_smmu_driver;
2502 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2504 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2507 return dev ? dev_get_drvdata(dev) : NULL;
2510 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2512 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2514 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2515 limit *= 1UL << STRTAB_SPLIT;
2520 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2521 struct arm_smmu_master *master)
2525 struct arm_smmu_stream *new_stream, *cur_stream;
2526 struct rb_node **new_node, *parent_node = NULL;
2527 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2529 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2531 if (!master->streams)
2533 master->num_streams = fwspec->num_ids;
2535 mutex_lock(&smmu->streams_mutex);
2536 for (i = 0; i < fwspec->num_ids; i++) {
2537 u32 sid = fwspec->ids[i];
2539 new_stream = &master->streams[i];
2540 new_stream->id = sid;
2541 new_stream->master = master;
2544 * Check the SIDs are in range of the SMMU and our stream table
2546 if (!arm_smmu_sid_in_range(smmu, sid)) {
2551 /* Ensure l2 strtab is initialised */
2552 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2553 ret = arm_smmu_init_l2_strtab(smmu, sid);
2558 /* Insert into SID tree */
2559 new_node = &(smmu->streams.rb_node);
2561 cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2563 parent_node = *new_node;
2564 if (cur_stream->id > new_stream->id) {
2565 new_node = &((*new_node)->rb_left);
2566 } else if (cur_stream->id < new_stream->id) {
2567 new_node = &((*new_node)->rb_right);
2569 dev_warn(master->dev,
2570 "stream %u already in tree\n",
2579 rb_link_node(&new_stream->node, parent_node, new_node);
2580 rb_insert_color(&new_stream->node, &smmu->streams);
2584 for (i--; i >= 0; i--)
2585 rb_erase(&master->streams[i].node, &smmu->streams);
2586 kfree(master->streams);
2588 mutex_unlock(&smmu->streams_mutex);
2593 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2596 struct arm_smmu_device *smmu = master->smmu;
2597 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2599 if (!smmu || !master->streams)
2602 mutex_lock(&smmu->streams_mutex);
2603 for (i = 0; i < fwspec->num_ids; i++)
2604 rb_erase(&master->streams[i].node, &smmu->streams);
2605 mutex_unlock(&smmu->streams_mutex);
2607 kfree(master->streams);
2610 static struct iommu_ops arm_smmu_ops;
2612 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2615 struct arm_smmu_device *smmu;
2616 struct arm_smmu_master *master;
2617 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2619 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2620 return ERR_PTR(-ENODEV);
2622 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2623 return ERR_PTR(-EBUSY);
2625 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2627 return ERR_PTR(-ENODEV);
2629 master = kzalloc(sizeof(*master), GFP_KERNEL);
2631 return ERR_PTR(-ENOMEM);
2634 master->smmu = smmu;
2635 INIT_LIST_HEAD(&master->bonds);
2636 dev_iommu_priv_set(dev, master);
2638 ret = arm_smmu_insert_master(smmu, master);
2640 goto err_free_master;
2642 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2643 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2646 * Note that PASID must be enabled before, and disabled after ATS:
2647 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2649 * Behavior is undefined if this bit is Set and the value of the PASID
2650 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2653 arm_smmu_enable_pasid(master);
2655 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2656 master->ssid_bits = min_t(u8, master->ssid_bits,
2657 CTXDESC_LINEAR_CDMAX);
2659 if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2660 device_property_read_bool(dev, "dma-can-stall")) ||
2661 smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2662 master->stall_enabled = true;
2664 return &smmu->iommu;
2668 dev_iommu_priv_set(dev, NULL);
2669 return ERR_PTR(ret);
2672 static void arm_smmu_release_device(struct device *dev)
2674 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2675 struct arm_smmu_master *master;
2677 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2680 master = dev_iommu_priv_get(dev);
2681 if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2682 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2683 arm_smmu_detach_dev(master);
2684 arm_smmu_disable_pasid(master);
2685 arm_smmu_remove_master(master);
2687 iommu_fwspec_free(dev);
2690 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2692 struct iommu_group *group;
2695 * We don't support devices sharing stream IDs other than PCI RID
2696 * aliases, since the necessary ID-to-device lookup becomes rather
2697 * impractical given a potential sparse 32-bit stream ID space.
2699 if (dev_is_pci(dev))
2700 group = pci_device_group(dev);
2702 group = generic_device_group(dev);
2707 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2709 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2712 mutex_lock(&smmu_domain->init_mutex);
2713 if (smmu_domain->smmu)
2716 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2717 mutex_unlock(&smmu_domain->init_mutex);
2722 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2724 return iommu_fwspec_add_ids(dev, args->args, 1);
2727 static void arm_smmu_get_resv_regions(struct device *dev,
2728 struct list_head *head)
2730 struct iommu_resv_region *region;
2731 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2733 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2734 prot, IOMMU_RESV_SW_MSI);
2738 list_add_tail(®ion->list, head);
2740 iommu_dma_get_resv_regions(dev, head);
2743 static bool arm_smmu_dev_has_feature(struct device *dev,
2744 enum iommu_dev_features feat)
2746 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2752 case IOMMU_DEV_FEAT_IOPF:
2753 return arm_smmu_master_iopf_supported(master);
2754 case IOMMU_DEV_FEAT_SVA:
2755 return arm_smmu_master_sva_supported(master);
2761 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2762 enum iommu_dev_features feat)
2764 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2770 case IOMMU_DEV_FEAT_IOPF:
2771 return master->iopf_enabled;
2772 case IOMMU_DEV_FEAT_SVA:
2773 return arm_smmu_master_sva_enabled(master);
2779 static int arm_smmu_dev_enable_feature(struct device *dev,
2780 enum iommu_dev_features feat)
2782 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2784 if (!arm_smmu_dev_has_feature(dev, feat))
2787 if (arm_smmu_dev_feature_enabled(dev, feat))
2791 case IOMMU_DEV_FEAT_IOPF:
2792 master->iopf_enabled = true;
2794 case IOMMU_DEV_FEAT_SVA:
2795 return arm_smmu_master_enable_sva(master);
2801 static int arm_smmu_dev_disable_feature(struct device *dev,
2802 enum iommu_dev_features feat)
2804 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2806 if (!arm_smmu_dev_feature_enabled(dev, feat))
2810 case IOMMU_DEV_FEAT_IOPF:
2811 if (master->sva_enabled)
2813 master->iopf_enabled = false;
2815 case IOMMU_DEV_FEAT_SVA:
2816 return arm_smmu_master_disable_sva(master);
2822 static struct iommu_ops arm_smmu_ops = {
2823 .capable = arm_smmu_capable,
2824 .domain_alloc = arm_smmu_domain_alloc,
2825 .domain_free = arm_smmu_domain_free,
2826 .attach_dev = arm_smmu_attach_dev,
2827 .map = arm_smmu_map,
2828 .unmap = arm_smmu_unmap,
2829 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2830 .iotlb_sync = arm_smmu_iotlb_sync,
2831 .iova_to_phys = arm_smmu_iova_to_phys,
2832 .probe_device = arm_smmu_probe_device,
2833 .release_device = arm_smmu_release_device,
2834 .device_group = arm_smmu_device_group,
2835 .enable_nesting = arm_smmu_enable_nesting,
2836 .of_xlate = arm_smmu_of_xlate,
2837 .get_resv_regions = arm_smmu_get_resv_regions,
2838 .put_resv_regions = generic_iommu_put_resv_regions,
2839 .dev_has_feat = arm_smmu_dev_has_feature,
2840 .dev_feat_enabled = arm_smmu_dev_feature_enabled,
2841 .dev_enable_feat = arm_smmu_dev_enable_feature,
2842 .dev_disable_feat = arm_smmu_dev_disable_feature,
2843 .sva_bind = arm_smmu_sva_bind,
2844 .sva_unbind = arm_smmu_sva_unbind,
2845 .sva_get_pasid = arm_smmu_sva_get_pasid,
2846 .page_response = arm_smmu_page_response,
2847 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2848 .owner = THIS_MODULE,
2851 /* Probing and initialisation functions */
2852 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2853 struct arm_smmu_queue *q,
2855 unsigned long prod_off,
2856 unsigned long cons_off,
2857 size_t dwords, const char *name)
2862 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2863 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2865 if (q->base || qsz < PAGE_SIZE)
2868 q->llq.max_n_shift--;
2873 "failed to allocate queue (0x%zx bytes) for %s\n",
2878 if (!WARN_ON(q->base_dma & (qsz - 1))) {
2879 dev_info(smmu->dev, "allocated %u entries for %s\n",
2880 1 << q->llq.max_n_shift, name);
2883 q->prod_reg = page + prod_off;
2884 q->cons_reg = page + cons_off;
2885 q->ent_dwords = dwords;
2887 q->q_base = Q_BASE_RWA;
2888 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2889 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2891 q->llq.prod = q->llq.cons = 0;
2895 static void arm_smmu_cmdq_free_bitmap(void *data)
2897 unsigned long *bitmap = data;
2898 bitmap_free(bitmap);
2901 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2904 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2905 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2906 atomic_long_t *bitmap;
2908 atomic_set(&cmdq->owner_prod, 0);
2909 atomic_set(&cmdq->lock, 0);
2911 bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2913 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2916 cmdq->valid_map = bitmap;
2917 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2923 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2928 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2929 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2930 CMDQ_ENT_DWORDS, "cmdq");
2934 ret = arm_smmu_cmdq_init(smmu);
2939 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2940 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2941 EVTQ_ENT_DWORDS, "evtq");
2945 if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2946 (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2947 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2948 if (!smmu->evtq.iopf)
2953 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2956 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2957 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2958 PRIQ_ENT_DWORDS, "priq");
2961 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2964 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2965 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2966 void *strtab = smmu->strtab_cfg.strtab;
2968 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2972 for (i = 0; i < cfg->num_l1_ents; ++i) {
2973 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2974 strtab += STRTAB_L1_DESC_DWORDS << 3;
2980 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2985 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2987 /* Calculate the L1 size, capped to the SIDSIZE. */
2988 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2989 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2990 cfg->num_l1_ents = 1 << size;
2992 size += STRTAB_SPLIT;
2993 if (size < smmu->sid_bits)
2995 "2-level strtab only covers %u/%u bits of SID\n",
2996 size, smmu->sid_bits);
2998 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2999 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3003 "failed to allocate l1 stream table (%u bytes)\n",
3007 cfg->strtab = strtab;
3009 /* Configure strtab_base_cfg for 2 levels */
3010 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3011 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3012 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3013 cfg->strtab_base_cfg = reg;
3015 return arm_smmu_init_l1_strtab(smmu);
3018 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3023 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3025 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3026 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3030 "failed to allocate linear stream table (%u bytes)\n",
3034 cfg->strtab = strtab;
3035 cfg->num_l1_ents = 1 << smmu->sid_bits;
3037 /* Configure strtab_base_cfg for a linear table covering all SIDs */
3038 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3039 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3040 cfg->strtab_base_cfg = reg;
3042 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3046 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3051 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3052 ret = arm_smmu_init_strtab_2lvl(smmu);
3054 ret = arm_smmu_init_strtab_linear(smmu);
3059 /* Set the strtab base address */
3060 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3061 reg |= STRTAB_BASE_RA;
3062 smmu->strtab_cfg.strtab_base = reg;
3064 /* Allocate the first VMID for stage-2 bypass STEs */
3065 set_bit(0, smmu->vmid_map);
3069 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3073 mutex_init(&smmu->streams_mutex);
3074 smmu->streams = RB_ROOT;
3076 ret = arm_smmu_init_queues(smmu);
3080 return arm_smmu_init_strtab(smmu);
3083 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3084 unsigned int reg_off, unsigned int ack_off)
3088 writel_relaxed(val, smmu->base + reg_off);
3089 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3090 1, ARM_SMMU_POLL_TIMEOUT_US);
3093 /* GBPA is "special" */
3094 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3097 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3099 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3100 1, ARM_SMMU_POLL_TIMEOUT_US);
3106 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3107 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3108 1, ARM_SMMU_POLL_TIMEOUT_US);
3111 dev_err(smmu->dev, "GBPA not responding to update\n");
3115 static void arm_smmu_free_msis(void *data)
3117 struct device *dev = data;
3118 platform_msi_domain_free_irqs(dev);
3121 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3123 phys_addr_t doorbell;
3124 struct device *dev = msi_desc_to_dev(desc);
3125 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3126 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3128 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3129 doorbell &= MSI_CFG0_ADDR_MASK;
3131 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3132 writel_relaxed(msg->data, smmu->base + cfg[1]);
3133 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3136 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3138 struct msi_desc *desc;
3139 int ret, nvec = ARM_SMMU_MAX_MSIS;
3140 struct device *dev = smmu->dev;
3142 /* Clear the MSI address regs */
3143 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3144 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3146 if (smmu->features & ARM_SMMU_FEAT_PRI)
3147 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3151 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3154 if (!dev->msi_domain) {
3155 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3159 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3160 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3162 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3166 for_each_msi_entry(desc, dev) {
3167 switch (desc->platform.msi_index) {
3168 case EVTQ_MSI_INDEX:
3169 smmu->evtq.q.irq = desc->irq;
3171 case GERROR_MSI_INDEX:
3172 smmu->gerr_irq = desc->irq;
3174 case PRIQ_MSI_INDEX:
3175 smmu->priq.q.irq = desc->irq;
3177 default: /* Unknown */
3182 /* Add callback to free MSIs on teardown */
3183 devm_add_action(dev, arm_smmu_free_msis, dev);
3186 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3190 arm_smmu_setup_msis(smmu);
3192 /* Request interrupt lines */
3193 irq = smmu->evtq.q.irq;
3195 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3196 arm_smmu_evtq_thread,
3198 "arm-smmu-v3-evtq", smmu);
3200 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3202 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3205 irq = smmu->gerr_irq;
3207 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3208 0, "arm-smmu-v3-gerror", smmu);
3210 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3212 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3215 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3216 irq = smmu->priq.q.irq;
3218 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3219 arm_smmu_priq_thread,
3225 "failed to enable priq irq\n");
3227 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3232 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3235 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3237 /* Disable IRQs first */
3238 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3239 ARM_SMMU_IRQ_CTRLACK);
3241 dev_err(smmu->dev, "failed to disable irqs\n");
3245 irq = smmu->combined_irq;
3248 * Cavium ThunderX2 implementation doesn't support unique irq
3249 * lines. Use a single irq line for all the SMMUv3 interrupts.
3251 ret = devm_request_threaded_irq(smmu->dev, irq,
3252 arm_smmu_combined_irq_handler,
3253 arm_smmu_combined_irq_thread,
3255 "arm-smmu-v3-combined-irq", smmu);
3257 dev_warn(smmu->dev, "failed to enable combined irq\n");
3259 arm_smmu_setup_unique_irqs(smmu);
3261 if (smmu->features & ARM_SMMU_FEAT_PRI)
3262 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3264 /* Enable interrupt generation on the SMMU */
3265 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3266 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3268 dev_warn(smmu->dev, "failed to enable irqs\n");
3273 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3277 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3279 dev_err(smmu->dev, "failed to clear cr0\n");
3284 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3288 struct arm_smmu_cmdq_ent cmd;
3290 /* Clear CR0 and sync (disables SMMU and queue processing) */
3291 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3292 if (reg & CR0_SMMUEN) {
3293 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3294 WARN_ON(is_kdump_kernel() && !disable_bypass);
3295 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3298 ret = arm_smmu_device_disable(smmu);
3302 /* CR1 (table and queue memory attributes) */
3303 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3304 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3305 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3306 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3307 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3308 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3309 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3311 /* CR2 (random crap) */
3312 reg = CR2_PTM | CR2_RECINVSID;
3314 if (smmu->features & ARM_SMMU_FEAT_E2H)
3317 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3320 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3321 smmu->base + ARM_SMMU_STRTAB_BASE);
3322 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3323 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3326 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3327 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3328 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3330 enables = CR0_CMDQEN;
3331 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3334 dev_err(smmu->dev, "failed to enable command queue\n");
3338 /* Invalidate any cached configuration */
3339 cmd.opcode = CMDQ_OP_CFGI_ALL;
3340 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3341 arm_smmu_cmdq_issue_sync(smmu);
3343 /* Invalidate any stale TLB entries */
3344 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3345 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3346 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3349 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3350 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3351 arm_smmu_cmdq_issue_sync(smmu);
3354 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3355 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3356 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3358 enables |= CR0_EVTQEN;
3359 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3362 dev_err(smmu->dev, "failed to enable event queue\n");
3367 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3368 writeq_relaxed(smmu->priq.q.q_base,
3369 smmu->base + ARM_SMMU_PRIQ_BASE);
3370 writel_relaxed(smmu->priq.q.llq.prod,
3371 smmu->page1 + ARM_SMMU_PRIQ_PROD);
3372 writel_relaxed(smmu->priq.q.llq.cons,
3373 smmu->page1 + ARM_SMMU_PRIQ_CONS);
3375 enables |= CR0_PRIQEN;
3376 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3379 dev_err(smmu->dev, "failed to enable PRI queue\n");
3384 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3385 enables |= CR0_ATSCHK;
3386 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3389 dev_err(smmu->dev, "failed to enable ATS check\n");
3394 ret = arm_smmu_setup_irqs(smmu);
3396 dev_err(smmu->dev, "failed to setup irqs\n");
3400 if (is_kdump_kernel())
3401 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3403 /* Enable the SMMU interface, or ensure bypass */
3404 if (!bypass || disable_bypass) {
3405 enables |= CR0_SMMUEN;
3407 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3411 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3414 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3421 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3424 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3427 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3429 /* 2-level structures */
3430 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3431 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3433 if (reg & IDR0_CD2L)
3434 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3437 * Translation table endianness.
3438 * We currently require the same endianness as the CPU, but this
3439 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3441 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3442 case IDR0_TTENDIAN_MIXED:
3443 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3446 case IDR0_TTENDIAN_BE:
3447 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3450 case IDR0_TTENDIAN_LE:
3451 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3455 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3459 /* Boolean feature flags */
3460 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3461 smmu->features |= ARM_SMMU_FEAT_PRI;
3463 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3464 smmu->features |= ARM_SMMU_FEAT_ATS;
3467 smmu->features |= ARM_SMMU_FEAT_SEV;
3469 if (reg & IDR0_MSI) {
3470 smmu->features |= ARM_SMMU_FEAT_MSI;
3471 if (coherent && !disable_msipolling)
3472 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3475 if (reg & IDR0_HYP) {
3476 smmu->features |= ARM_SMMU_FEAT_HYP;
3477 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3478 smmu->features |= ARM_SMMU_FEAT_E2H;
3482 * The coherency feature as set by FW is used in preference to the ID
3483 * register, but warn on mismatch.
3485 if (!!(reg & IDR0_COHACC) != coherent)
3486 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3487 coherent ? "true" : "false");
3489 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3490 case IDR0_STALL_MODEL_FORCE:
3491 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3493 case IDR0_STALL_MODEL_STALL:
3494 smmu->features |= ARM_SMMU_FEAT_STALLS;
3498 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3501 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3503 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3504 dev_err(smmu->dev, "no translation support!\n");
3508 /* We only support the AArch64 table format at present */
3509 switch (FIELD_GET(IDR0_TTF, reg)) {
3510 case IDR0_TTF_AARCH32_64:
3513 case IDR0_TTF_AARCH64:
3516 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3520 /* ASID/VMID sizes */
3521 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3522 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3525 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3526 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3527 dev_err(smmu->dev, "embedded implementation not supported\n");
3531 /* Queue sizes, capped to ensure natural alignment */
3532 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3533 FIELD_GET(IDR1_CMDQS, reg));
3534 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3536 * We don't support splitting up batches, so one batch of
3537 * commands plus an extra sync needs to fit inside the command
3538 * queue. There's also no way we can handle the weird alignment
3539 * restrictions on the base pointer for a unit-length queue.
3541 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3542 CMDQ_BATCH_ENTRIES);
3546 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3547 FIELD_GET(IDR1_EVTQS, reg));
3548 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3549 FIELD_GET(IDR1_PRIQS, reg));
3551 /* SID/SSID sizes */
3552 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3553 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3556 * If the SMMU supports fewer bits than would fill a single L2 stream
3557 * table, use a linear table instead.
3559 if (smmu->sid_bits <= STRTAB_SPLIT)
3560 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3563 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3564 if (FIELD_GET(IDR3_RIL, reg))
3565 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3568 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3570 /* Maximum number of outstanding stalls */
3571 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3574 if (reg & IDR5_GRAN64K)
3575 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3576 if (reg & IDR5_GRAN16K)
3577 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3578 if (reg & IDR5_GRAN4K)
3579 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3581 /* Input address size */
3582 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3583 smmu->features |= ARM_SMMU_FEAT_VAX;
3585 /* Output address size */
3586 switch (FIELD_GET(IDR5_OAS, reg)) {
3587 case IDR5_OAS_32_BIT:
3590 case IDR5_OAS_36_BIT:
3593 case IDR5_OAS_40_BIT:
3596 case IDR5_OAS_42_BIT:
3599 case IDR5_OAS_44_BIT:
3602 case IDR5_OAS_52_BIT:
3604 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3608 "unknown output address size. Truncating to 48-bit\n");
3610 case IDR5_OAS_48_BIT:
3614 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3615 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3617 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3619 /* Set the DMA mask for our table walker */
3620 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3622 "failed to set DMA mask for table walker\n");
3624 smmu->ias = max(smmu->ias, smmu->oas);
3626 if (arm_smmu_sva_supported(smmu))
3627 smmu->features |= ARM_SMMU_FEAT_SVA;
3629 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3630 smmu->ias, smmu->oas, smmu->features);
3635 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3638 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3639 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3641 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3642 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3646 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3649 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3650 struct arm_smmu_device *smmu)
3652 struct acpi_iort_smmu_v3 *iort_smmu;
3653 struct device *dev = smmu->dev;
3654 struct acpi_iort_node *node;
3656 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3658 /* Retrieve SMMUv3 specific data */
3659 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3661 acpi_smmu_get_options(iort_smmu->model, smmu);
3663 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3664 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3669 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3670 struct arm_smmu_device *smmu)
3676 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3677 struct arm_smmu_device *smmu)
3679 struct device *dev = &pdev->dev;
3683 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3684 dev_err(dev, "missing #iommu-cells property\n");
3685 else if (cells != 1)
3686 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3690 parse_driver_options(smmu);
3692 if (of_dma_is_coherent(dev->of_node))
3693 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3698 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3700 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3706 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3711 if (pci_bus_type.iommu_ops != ops) {
3712 err = bus_set_iommu(&pci_bus_type, ops);
3717 #ifdef CONFIG_ARM_AMBA
3718 if (amba_bustype.iommu_ops != ops) {
3719 err = bus_set_iommu(&amba_bustype, ops);
3721 goto err_reset_pci_ops;
3724 if (platform_bus_type.iommu_ops != ops) {
3725 err = bus_set_iommu(&platform_bus_type, ops);
3727 goto err_reset_amba_ops;
3733 #ifdef CONFIG_ARM_AMBA
3734 bus_set_iommu(&amba_bustype, NULL);
3736 err_reset_pci_ops: __maybe_unused;
3738 bus_set_iommu(&pci_bus_type, NULL);
3743 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3744 resource_size_t size)
3746 struct resource res = DEFINE_RES_MEM(start, size);
3748 return devm_ioremap_resource(dev, &res);
3751 static int arm_smmu_device_probe(struct platform_device *pdev)
3754 struct resource *res;
3755 resource_size_t ioaddr;
3756 struct arm_smmu_device *smmu;
3757 struct device *dev = &pdev->dev;
3760 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3766 ret = arm_smmu_device_dt_probe(pdev, smmu);
3768 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3773 /* Set bypass mode according to firmware probing result */
3777 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3778 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3779 dev_err(dev, "MMIO region too small (%pr)\n", res);
3782 ioaddr = res->start;
3785 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3786 * the PMCG registers which are reserved by the PMU driver.
3788 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3789 if (IS_ERR(smmu->base))
3790 return PTR_ERR(smmu->base);
3792 if (arm_smmu_resource_size(smmu) > SZ_64K) {
3793 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3795 if (IS_ERR(smmu->page1))
3796 return PTR_ERR(smmu->page1);
3798 smmu->page1 = smmu->base;
3801 /* Interrupt lines */
3803 irq = platform_get_irq_byname_optional(pdev, "combined");
3805 smmu->combined_irq = irq;
3807 irq = platform_get_irq_byname_optional(pdev, "eventq");
3809 smmu->evtq.q.irq = irq;
3811 irq = platform_get_irq_byname_optional(pdev, "priq");
3813 smmu->priq.q.irq = irq;
3815 irq = platform_get_irq_byname_optional(pdev, "gerror");
3817 smmu->gerr_irq = irq;
3820 ret = arm_smmu_device_hw_probe(smmu);
3824 /* Initialise in-memory data structures */
3825 ret = arm_smmu_init_structures(smmu);
3829 /* Record our private device structure */
3830 platform_set_drvdata(pdev, smmu);
3832 /* Reset the device */
3833 ret = arm_smmu_device_reset(smmu, bypass);
3837 /* And we're up. Go go go! */
3838 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3839 "smmu3.%pa", &ioaddr);
3843 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3845 dev_err(dev, "Failed to register iommu\n");
3846 goto err_sysfs_remove;
3849 ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3851 goto err_unregister_device;
3855 err_unregister_device:
3856 iommu_device_unregister(&smmu->iommu);
3858 iommu_device_sysfs_remove(&smmu->iommu);
3862 static int arm_smmu_device_remove(struct platform_device *pdev)
3864 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3866 arm_smmu_set_bus_ops(NULL);
3867 iommu_device_unregister(&smmu->iommu);
3868 iommu_device_sysfs_remove(&smmu->iommu);
3869 arm_smmu_device_disable(smmu);
3870 iopf_queue_free(smmu->evtq.iopf);
3875 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3877 arm_smmu_device_remove(pdev);
3880 static const struct of_device_id arm_smmu_of_match[] = {
3881 { .compatible = "arm,smmu-v3", },
3884 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3886 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3888 arm_smmu_sva_notifier_synchronize();
3889 platform_driver_unregister(drv);
3892 static struct platform_driver arm_smmu_driver = {
3894 .name = "arm-smmu-v3",
3895 .of_match_table = arm_smmu_of_match,
3896 .suppress_bind_attrs = true,
3898 .probe = arm_smmu_device_probe,
3899 .remove = arm_smmu_device_remove,
3900 .shutdown = arm_smmu_device_shutdown,
3902 module_driver(arm_smmu_driver, platform_driver_register,
3903 arm_smmu_driver_unregister);
3905 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3906 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3907 MODULE_ALIAS("platform:arm-smmu-v3");
3908 MODULE_LICENSE("GPL v2");