From fec777c385b6376048fc4b08f039366545b335cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:15 +0100 Subject: [PATCH] x86/dma: Use DMA-direct (CONFIG_DMA_DIRECT_OPS=y) The generic DMA-direct (CONFIG_DMA_DIRECT_OPS=y) implementation is now functionally equivalent to the x86 nommu dma_map implementation, so switch over to using it. That includes switching from using x86_dma_supported in various IOMMU drivers to use dma_direct_supported instead, which provides the same functionality. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-4-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/include/asm/dma-mapping.h | 8 ----- arch/x86/include/asm/iommu.h | 3 -- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/amd_gart_64.c | 7 ++-- arch/x86/kernel/pci-calgary_64.c | 3 +- arch/x86/kernel/pci-dma.c | 66 +------------------------------------- arch/x86/kernel/pci-swiotlb.c | 5 ++- arch/x86/pci/sta2x11-fixup.c | 2 +- drivers/iommu/amd_iommu.c | 7 ++-- drivers/iommu/intel-iommu.c | 3 +- 11 files changed, 17 insertions(+), 90 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 18233e4..7dc3472 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -83,6 +83,7 @@ config X86 select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CLOCKSOURCE_WATCHDOG select DCACHE_WORD_ACCESS + select DMA_DIRECT_OPS select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select GENERIC_CLOCKEVENTS diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 545bf372..df9816b 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -36,14 +36,6 @@ int arch_dma_supported(struct device *dev, u64 mask); bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); #define arch_dma_alloc_attrs arch_dma_alloc_attrs -extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs); - -extern void dma_generic_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) { if (dev->coherent_dma_mask <= DMA_BIT_MASK(24)) diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 1e5d5d9..baedab8 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,13 +2,10 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern const struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; -int x86_dma_supported(struct device *dev, u64 mask); - /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 29786c8..2e8c8a0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o +obj-y += alternative.o i8253.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index ecd486c..52e3abc 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -501,8 +501,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, } __free_pages(page, get_order(size)); } else - return dma_generic_alloc_coherent(dev, size, dma_addr, flag, - attrs); + return dma_direct_alloc(dev, size, dma_addr, flag, attrs); return NULL; } @@ -513,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); + dma_direct_free(dev, size, vaddr, dma_addr, attrs); } static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -705,7 +704,7 @@ static const struct dma_map_ops gart_dma_ops = { .alloc = gart_alloc_coherent, .free = gart_free_coherent, .mapping_error = gart_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; static void gart_iommu_shutdown(void) diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 35c461f..5647853 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -493,7 +494,7 @@ static const struct dma_map_ops calgary_dma_ops = { .map_page = calgary_map_page, .unmap_page = calgary_unmap_page, .mapping_error = calgary_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; static inline void __iomem * busno_to_bbar(unsigned char num) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b598208..db0b88e 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -18,7 +18,7 @@ static int forbid_dac __read_mostly; -const struct dma_map_ops *dma_ops = &nommu_dma_ops; +const struct dma_map_ops *dma_ops = &dma_direct_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -76,60 +76,6 @@ void __init pci_iommu_alloc(void) } } } -void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) -{ - struct page *page; - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - dma_addr_t addr; - -again: - page = NULL; - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(flag)) { - page = dma_alloc_from_contiguous(dev, count, get_order(size), - flag); - if (page) { - addr = phys_to_dma(dev, page_to_phys(page)); - if (addr + size > dev->coherent_dma_mask) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - } - /* fallback */ - if (!page) - page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); - if (!page) - return NULL; - - addr = phys_to_dma(dev, page_to_phys(page)); - if (addr + size > dev->coherent_dma_mask) { - __free_pages(page, get_order(size)); - - if (dev->coherent_dma_mask < DMA_BIT_MASK(32) && - !(flag & GFP_DMA)) { - flag = (flag & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - return NULL; - } - memset(page_address(page), 0, size); - *dma_addr = addr; - return page_address(page); -} - -void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct page *page = virt_to_page(vaddr); - - if (!dma_release_from_contiguous(dev, page, count)) - free_pages((unsigned long)vaddr, get_order(size)); -} bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { @@ -243,16 +189,6 @@ int arch_dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(arch_dma_supported); -int x86_dma_supported(struct device *dev, u64 mask) -{ - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_BIT_MASK(24)) - return 0; - return 1; -} - static int __init pci_iommu_init(void) { struct iommu_table_entry *p; diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 0ee0f8f3..bcb6a9b 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -30,8 +30,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ flags |= __GFP_NOWARN; - vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags, - attrs); + vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); if (vaddr) return vaddr; @@ -45,7 +44,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size, if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr))) swiotlb_free_coherent(dev, size, vaddr, dma_addr); else - dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); + dma_direct_free(dev, size, vaddr, dma_addr, attrs); } static const struct dma_map_ops x86_swiotlb_dma_ops = { diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 75577c1..6c712fe 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -193,7 +193,7 @@ static const struct dma_map_ops sta2x11_dma_ops = { .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, .mapping_error = swiotlb_dma_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; /* At setup time, we use our own ops if the device is a ConneXt one */ diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 74788fd..0bf1942 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -2193,7 +2194,7 @@ static int amd_iommu_add_device(struct device *dev) dev_name(dev)); iommu_ignore_device(dev); - dev->dma_ops = &nommu_dma_ops; + dev->dma_ops = &dma_direct_ops; goto out; } init_iommu_group(dev); @@ -2680,7 +2681,7 @@ free_mem: */ static int amd_iommu_dma_supported(struct device *dev, u64 mask) { - if (!x86_dma_supported(dev, mask)) + if (!dma_direct_supported(dev, mask)) return 0; return check_device(dev); } @@ -2794,7 +2795,7 @@ int __init amd_iommu_init_dma_ops(void) * continue to be SWIOTLB. */ if (!swiotlb) - dma_ops = &nommu_dma_ops; + dma_ops = &dma_direct_ops; if (amd_iommu_unmap_flush) pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n"); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 582fd01..fd899b2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -3871,7 +3872,7 @@ const struct dma_map_ops intel_dma_ops = { .unmap_page = intel_unmap_page, .mapping_error = intel_mapping_error, #ifdef CONFIG_X86 - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, #endif }; -- 2.7.4