From: Joerg Roedel Date: Fri, 20 Aug 2021 15:14:35 +0000 (+0200) Subject: Merge branches 'apple/dart', 'arm/smmu', 'iommu/fixes', 'x86/amd', 'x86/vt-d' and... X-Git-Tag: v5.15~301^2~5 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d8768d7eb9c21ef928adb93402d9348bcc4a6915;p=platform%2Fkernel%2Flinux-starfive.git Merge branches 'apple/dart', 'arm/smmu', 'iommu/fixes', 'x86/amd', 'x86/vt-d' and 'core' into next --- d8768d7eb9c21ef928adb93402d9348bcc4a6915 diff --cc drivers/iommu/Kconfig index 07b7c25,e908b82,07b7c25,07b7c25,c84da82,6e06f87,6e06f87..f14f2e4 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@@@@@@@ -79,16 -79,57 -79,16 -79,16 -79,57 -79,57 -79,57 +79,57 @@@@@@@@ config IOMMU_DEBUGF debug/iommu directory, and then populate a subdirectory with entries as required. ----- config IOMMU_DEFAULT_PASSTHROUGH ----- bool "IOMMU passthrough by default" +++++ choice +++++ prompt "IOMMU default domain type" depends on IOMMU_API +++++ default IOMMU_DEFAULT_DMA_LAZY if AMD_IOMMU || INTEL_IOMMU +++++ default IOMMU_DEFAULT_DMA_STRICT help ----- Enable passthrough by default, removing the need to pass in ----- iommu.passthrough=on or iommu=pt through command line. If this ----- is enabled, you can still disable with iommu.passthrough=off ----- or iommu=nopt depending on the architecture. - - If unsure, say N here. +++++ Choose the type of IOMMU domain used to manage DMA API usage by +++++ device drivers. The options here typically represent different +++++ levels of tradeoff between robustness/security and performance, +++++ depending on the IOMMU driver. Not all IOMMUs support all options. +++++ This choice can be overridden at boot via the command line, and for +++++ some devices also at runtime via sysfs. ---- If unsure, say N here. - - - choice - - prompt "IOMMU default DMA IOTLB invalidation mode" - - depends on IOMMU_DMA +++++ If unsure, keep the default. + ++ - - default IOMMU_DEFAULT_LAZY if (AMD_IOMMU || INTEL_IOMMU) - - default IOMMU_DEFAULT_STRICT +++++ config IOMMU_DEFAULT_DMA_STRICT +++++ bool "Translated - Strict" + ++ help - - This option allows an IOMMU DMA IOTLB invalidation mode to be - - chosen at build time, to override the default mode of each ARCH, - - removing the need to pass in kernel parameters through command line. - - It is still possible to provide common boot params to override this - - config. +++++ Trusted devices use translation to restrict their access to only +++++ DMA-mapped pages, with strict TLB invalidation on unmap. Equivalent +++++ to passing "iommu.passthrough=0 iommu.strict=1" on the command line. + ++ - - If unsure, keep the default. +++++ Untrusted devices always use this mode, with an additional layer of +++++ bounce-buffering such that they cannot gain access to any unrelated +++++ data within a mapped page. + ++ - - config IOMMU_DEFAULT_STRICT - - bool "strict" +++++ config IOMMU_DEFAULT_DMA_LAZY +++++ bool "Translated - Lazy" + ++ help - - For every IOMMU DMA unmap operation, the flush operation of IOTLB and - - the free operation of IOVA are guaranteed to be done in the unmap - - function. +++++ Trusted devices use translation to restrict their access to only +++++ DMA-mapped pages, but with "lazy" batched TLB invalidation. This +++++ mode allows higher performance with some IOMMUs due to reduced TLB +++++ flushing, but at the cost of reduced isolation since devices may be +++++ able to access memory for some time after it has been unmapped. +++++ Equivalent to passing "iommu.passthrough=0 iommu.strict=0" on the +++++ command line. +++++ +++++ If this mode is not supported by the IOMMU driver, the effective +++++ runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT. + ++ - - config IOMMU_DEFAULT_LAZY - - bool "lazy" +++++ config IOMMU_DEFAULT_PASSTHROUGH +++++ bool "Passthrough" + ++ help - - Support lazy mode, where for every IOMMU DMA unmap operation, the - - flush operation of IOTLB and the free operation of IOVA are deferred. - - They are only guaranteed to be done before the related IOVA will be - - reused. - - - - The isolation provided in this mode is not as secure as STRICT mode, - - such that a vulnerable time window may be created between the DMA - - unmap and the mappings cached in the IOMMU IOTLB or device TLB - - finally being invalidated, where the device could still access the - - memory which has already been unmapped by the device driver. - - However this mode may provide better performance in high throughput - - scenarios, and is still considerably more secure than passthrough - - mode or no IOMMU. +++++ Trusted devices are identity-mapped, giving them unrestricted access +++++ to memory with minimal performance overhead. Equivalent to passing +++++ "iommu.passthrough=1" (historically "iommu=pt") on the command line. +++++ +++++ If this mode is not supported by the IOMMU driver, the effective +++++ runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT. + ++ + ++ endchoice config OF_IOMMU def_bool y diff --cc drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 235f9bd,35d5491,3216e74,235f9bd,6346f21,f29dbe9,f29dbe9..a388e31 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@@@@@@@ -856,12 -856,12 -866,19 -856,12 -856,12 -856,12 -856,12 +866,19 @@@@@@@@ static int __arm_smmu_cmdq_issue_cmd(st return -EINVAL; } -- ---- return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false); ++ ++++ return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync); + + ++} + + ++ - - static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) ++ ++++static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, ++ ++++ struct arm_smmu_cmdq_ent *ent) + + ++{ - - return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true); ++ ++++ return __arm_smmu_cmdq_issue_cmd(smmu, ent, false); + + } + + - - --static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) ++ ++++static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, ++ ++++ struct arm_smmu_cmdq_ent *ent) + + { - - -- return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true); ++ ++++ return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); } static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, diff --cc drivers/iommu/arm/arm-smmu/arm-smmu.c index f22dbeb,ac21170,45b4aed,f22dbeb,ac21170,1d013b1,1d013b1..4bc75c4 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@@@@@@@ -2289,10 -2288,10 -2313,19 -2289,10 -2288,10 -2279,10 -2279,10 +2303,19 @@@@@@@@ static int __maybe_unused arm_smmu_pm_r static int __maybe_unused arm_smmu_pm_suspend(struct device *dev) { ++ ++++ int ret = 0; ++ ++++ struct arm_smmu_device *smmu = dev_get_drvdata(dev); ++ ++++ if (pm_runtime_suspended(dev)) -- ---- return 0; ++ ++++ goto clk_unprepare; ++ ++ -- return arm_smmu_runtime_suspend(dev); ++ ++++ ret = arm_smmu_runtime_suspend(dev); ++ ++++ if (ret) ++ ++++ return ret; ++ -- -- return arm_smmu_runtime_suspend(dev); ++ ++++clk_unprepare: ++ ++++ clk_bulk_unprepare(smmu->num_clks, smmu->clks); ++ ++++ return ret; } static const struct dev_pm_ops arm_smmu_pm_ops = { diff --cc drivers/iommu/io-pgtable-arm.c index 87def58,0779eb9,87def58,87def58,053df40,48a5bd8,9697721..dd9e471 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@@@@@@@ -585,26 -635,37 -585,26 -585,26 -623,37 -623,30 -623,30 +635,30 @@@@@@@@ static size_t __arm_lpae_unmap(struct a /* If the size matches this level, we're in the right place */ if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { - -- __arm_lpae_set_pte(ptep, 0, &iop->cfg); - -- - -- if (!iopte_leaf(pte, lvl, iop->fmt)) { - -- /* Also flush any partial walks */ - -- io_pgtable_tlb_flush_walk(iop, iova, size, - -- ARM_LPAE_GRANULE(data)); - -- ptep = iopte_deref(pte, data); - -- __arm_lpae_free_pgtable(data, lvl + 1, ptep); - -- } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { - -- /* - -- * Order the PTE update against queueing the IOVA, to - -- * guarantee that a flush callback from a different CPU - -- * has observed it before the TLBIALL can be issued. - -- */ - -- smp_wmb(); - -- } else { - -- io_pgtable_tlb_add_page(iop, gather, iova, size); + ++ max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start; + ++ num_entries = min_t(int, pgcount, max_entries); + ++ + ++ while (i < num_entries) { + ++ pte = READ_ONCE(*ptep); + ++ if (WARN_ON(!pte)) + ++ break; + ++ + ++ __arm_lpae_clear_pte(ptep, &iop->cfg); + ++ + ++ if (!iopte_leaf(pte, lvl, iop->fmt)) { + ++ /* Also flush any partial walks */ + ++ io_pgtable_tlb_flush_walk(iop, iova + i * size, size, + ++ ARM_LPAE_GRANULE(data)); + ++ __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); - - } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { - - /* - - * Order the PTE update against queueing the IOVA, to - - * guarantee that a flush callback from a different CPU - - * has observed it before the TLBIALL can be issued. - - */ - - smp_wmb(); - - } else { - } else if (!gather->queued) { ++++++ } else if (!iommu_iotlb_gather_queued(gather)) { + ++ io_pgtable_tlb_add_page(iop, gather, iova + i * size, size); + ++ } + ++ + ++ ptep++; + ++ i++; } - -- return size; + ++ return i * size; } else if (iopte_leaf(pte, lvl, iop->fmt)) { /* * Insert a table at the next level to map the old region, diff --cc drivers/iommu/iommu.c index 5419c4b,f2cda99,80c5a1c,63f0af1,f2cda99,feb66d9,feb66d9..b4499b1 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@@@@@@@ -7,7 -7,8 -7,7 -7,7 -7,8 -7,9 -7,9 +7,9 @@@@@@@@ #define pr_fmt(fmt) "iommu: " fmt #include +++++ #include #include + ++ #include #include #include #include @@@@@@@@ -29,7 -30,7 -29,7 -29,7 -30,7 -31,7 -31,7 +31,7 @@@@@@@@ static struct kset *iommu_group_kset static DEFINE_IDA(iommu_group_ida); static unsigned int iommu_def_domain_type __read_mostly; - -- static bool iommu_dma_strict __read_mostly = true; - - static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_STRICT); +++++ static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); static u32 iommu_cmd_line __read_mostly; struct iommu_group { @@@@@@@@ -138,6 -139,11 -138,6 -138,6 -139,11 -144,12 -144,12 +144,12 @@@@@@@@ static int __init iommu_subsys_init(voi (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? "(set via kernel command line)" : ""); - - pr_info("DMA domain TLB invalidation policy: %s mode %s\n", - - iommu_dma_strict ? "strict" : "lazy", - - (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? - - "(set via kernel command line)" : ""); +++++ if (!iommu_default_passthrough()) +++++ pr_info("DMA domain TLB invalidation policy: %s mode %s\n", +++++ iommu_dma_strict ? "strict" : "lazy", +++++ (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? +++++ "(set via kernel command line)" : ""); + ++ return 0; } subsys_initcall(iommu_subsys_init); @@@@@@@@ -344,21 -350,20 -346,21 -344,21 -350,20 -356,13 -356,13 +358,13 @@@@@@@@ static int __init iommu_dma_setup(char } early_param("iommu.strict", iommu_dma_setup); - -- void iommu_set_dma_strict(bool strict) - - { - - if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT)) - - iommu_dma_strict = strict; - - } - - - - bool iommu_get_dma_strict(struct iommu_domain *domain) + ++ void iommu_set_dma_strict(void) { - - /* only allow lazy flushing for DMA domains */ - - if (domain->type == IOMMU_DOMAIN_DMA) - - return iommu_dma_strict; - - return true; - if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT)) - iommu_dma_strict = strict; + ++ iommu_dma_strict = true; +++++ if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) +++++ iommu_def_domain_type = IOMMU_DOMAIN_DMA; } - - EXPORT_SYMBOL_GPL(iommu_get_dma_strict); - -- bool iommu_get_dma_strict(struct iommu_domain *domain) - -- { - -- /* only allow lazy flushing for DMA domains */ - -- if (domain->type == IOMMU_DOMAIN_DMA) - -- return iommu_dma_strict; - -- return true; - -- } - -- EXPORT_SYMBOL_GPL(iommu_get_dma_strict); - -- static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) { @@@@@@@@ -2374,35 -2382,57 -2376,35 -2377,35 -2382,57 -2390,57 -2390,57 +2395,57 @@@@@@@@ phys_addr_t iommu_iova_to_phys(struct i } EXPORT_SYMBOL_GPL(iommu_iova_to_phys); - -- static size_t iommu_pgsize(struct iommu_domain *domain, - -- unsigned long addr_merge, size_t size) + ++ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, + ++ phys_addr_t paddr, size_t size, size_t *count) { - -- unsigned int pgsize_idx; - -- size_t pgsize; + ++ unsigned int pgsize_idx, pgsize_idx_next; + ++ unsigned long pgsizes; + ++ size_t offset, pgsize, pgsize_next; + ++ unsigned long addr_merge = paddr | iova; - -- /* Max page size that still fits into 'size' */ - -- pgsize_idx = __fls(size); + ++ /* Page sizes supported by the hardware and small enough for @size */ + ++ pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); - -- /* need to consider alignment requirements ? */ - -- if (likely(addr_merge)) { - -- /* Max page size allowed by address */ - -- unsigned int align_pgsize_idx = __ffs(addr_merge); - -- pgsize_idx = min(pgsize_idx, align_pgsize_idx); - -- } + ++ /* Constrain the page sizes further based on the maximum alignment */ + ++ if (likely(addr_merge)) + ++ pgsizes &= GENMASK(__ffs(addr_merge), 0); + + - /* build a mask of acceptable page sizes */ - pgsize = (1UL << (pgsize_idx + 1)) - 1; + ++ /* Make sure we have at least one suitable page size */ + ++ BUG_ON(!pgsizes); + - /* build a mask of acceptable page sizes */ - pgsize = (1UL << (pgsize_idx + 1)) - 1; - /* throw away page sizes not supported by the hardware */ - pgsize &= domain->pgsize_bitmap; + ++ /* Pick the biggest page size remaining */ + ++ pgsize_idx = __fls(pgsizes); + ++ pgsize = BIT(pgsize_idx); + ++ if (!count) + ++ return pgsize; - /* throw away page sizes not supported by the hardware */ - pgsize &= domain->pgsize_bitmap; - /* build a mask of acceptable page sizes */ - pgsize = (1UL << (pgsize_idx + 1)) - 1; - /* make sure we're still sane */ - BUG_ON(!pgsize); + ++ /* Find the next biggest support page size, if it exists */ + ++ pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); + ++ if (!pgsizes) + ++ goto out_set_count; - /* make sure we're still sane */ - BUG_ON(!pgsize); - /* throw away page sizes not supported by the hardware */ - pgsize &= domain->pgsize_bitmap; - /* pick the biggest page */ - pgsize_idx = __fls(pgsize); - pgsize = 1UL << pgsize_idx; + ++ pgsize_idx_next = __ffs(pgsizes); + ++ pgsize_next = BIT(pgsize_idx_next); + - /* make sure we're still sane */ - BUG_ON(!pgsize); + ++ /* + ++ * There's no point trying a bigger page size unless the virtual + ++ * and physical addresses are similarly offset within the larger page. + ++ */ + ++ if ((iova ^ paddr) & (pgsize_next - 1)) + ++ goto out_set_count; + - - /* pick the biggest page */ - - pgsize_idx = __fls(pgsize); - - pgsize = 1UL << pgsize_idx; + ++ /* Calculate the offset to the next page size alignment boundary */ + ++ offset = pgsize_next - (addr_merge & (pgsize_next - 1)); + + ++ /* + ++ * If size is big enough to accommodate the larger page, reduce + ++ * the number of smaller pages. + ++ */ + ++ if (offset + pgsize_next <= size) + ++ size = offset; + ++ + ++ out_set_count: + ++ *count = size >> pgsize_idx; return pgsize; } diff --cc include/linux/iommu.h index 32d4480,4997c78,32d4480,32d4480,e552ecf,923a8d1,a23779c..6633040 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@@@@@@@ -476,8 -485,8 -476,8 -476,8 -485,8 -503,7 -503,7 +503,7 @@@@@@@@ int iommu_enable_nesting(struct iommu_d int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); - -- void iommu_set_dma_strict(bool val); - -- bool iommu_get_dma_strict(struct iommu_domain *domain); + ++ void iommu_set_dma_strict(void); - - bool iommu_get_dma_strict(struct iommu_domain *domain); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); @@@@@@@@ -508,20 -517,20 -508,20 -508,20 -569,14 -534,20 -534,25 +586,19 @@@@@@@@ static inline void iommu_iotlb_gather_a * a different granularity, then sync the TLB so that the gather * structure can be rewritten. */ ---- -- if (gather->pgsize != size || ---- -- end + 1 < gather->start || start > gather->end + 1) { ---- -- if (gather->pgsize) ---- -- iommu_iotlb_sync(domain, gather); ---- -- gather->pgsize = size; ---- -- } - - if (gather->end < end) - gather->end = end; ++++ ++ if ((gather->pgsize && gather->pgsize != size) || ++++ ++ iommu_iotlb_gather_is_disjoint(gather, iova, size)) ++++ ++ iommu_iotlb_sync(domain, gather); ---- - if (gather->end < end) ---- - gather->end = end; - if (gather->start > start) - gather->start = start; ++++ ++ gather->pgsize = size; ++++ ++ iommu_iotlb_gather_add_range(gather, iova, size); ++++ + } ---- - if (gather->start > start) ---- - gather->start = start; ++++++ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) ++++++ { ++++++ return gather && gather->queued; + } + /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */