From: Linus Torvalds Date: Thu, 19 Sep 2019 20:27:23 +0000 (-0700) Subject: Merge tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping X-Git-Tag: v5.4-rc1~113 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=671df189537883f36cf9c7d4f9495bfac0f86627;p=platform%2Fkernel%2Flinux-rpi.git Merge tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping Pull dma-mapping updates from Christoph Hellwig: - add dma-mapping and block layer helpers to take care of IOMMU merging for mmc plus subsequent fixups (Yoshihiro Shimoda) - rework handling of the pgprot bits for remapping (me) - take care of the dma direct infrastructure for swiotlb-xen (me) - improve the dma noncoherent remapping infrastructure (me) - better defaults for ->mmap, ->get_sgtable and ->get_required_mask (me) - cleanup mmaping of coherent DMA allocations (me) - various misc cleanups (Andy Shevchenko, me) * tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping: (41 commits) mmc: renesas_sdhi_internal_dmac: Add MMC_CAP2_MERGE_CAPABLE mmc: queue: Fix bigger segments usage arm64: use asm-generic/dma-mapping.h swiotlb-xen: merge xen_unmap_single into xen_swiotlb_unmap_page swiotlb-xen: simplify cache maintainance swiotlb-xen: use the same foreign page check everywhere swiotlb-xen: remove xen_swiotlb_dma_mmap and xen_swiotlb_dma_get_sgtable xen: remove the exports for xen_{create,destroy}_contiguous_region xen/arm: remove xen_dma_ops xen/arm: simplify dma_cache_maint xen/arm: use dev_is_dma_coherent xen/arm: consolidate page-coherent.h xen/arm: use dma-noncoherent.h calls for xen-swiotlb cache maintainance arm: remove wrappers for the generic dma remap helpers dma-mapping: introduce a dma_common_find_pages helper dma-mapping: always use VM_DMA_COHERENT for generic DMA remap vmalloc: lift the arm flag for coherent mappings to common code dma-mapping: provide a better default ->get_required_mask dma-mapping: remove the dma_declare_coherent_memory export remoteproc: don't allow modular build ... --- 671df189537883f36cf9c7d4f9495bfac0f86627 diff --cc arch/arc/mm/dma.c index 70a3fbe7,ff4a575..73a7e88 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@@ -101,12 -101,6 +101,6 @@@ void arch_setup_dma_ops(struct device * if (is_isa_arcv2() && ioc_enable && coherent) dev->dma_coherent = true; - dev_info(dev, "use %sncoherent DMA ops\n", + dev_info(dev, "use %scoherent DMA ops\n", dev->dma_coherent ? "" : "non"); } - - static int __init atomic_pool_init(void) - { - return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); - } - postcore_initcall(atomic_pool_init); diff --cc arch/ia64/hp/common/sba_iommu.c index a7eff5e,4c0ea6c..a806227 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@@ -2055,22 -2062,6 +2055,24 @@@ static int __init acpi_sba_ioc_init_acp /* This has to run before acpi_scan_init(). */ arch_initcall(acpi_sba_ioc_init_acpi); +static int sba_dma_supported (struct device *dev, u64 mask) +{ + /* make sure it's at least 32bit capable */ + return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); +} + +static const struct dma_map_ops sba_dma_ops = { + .alloc = sba_alloc_coherent, + .free = sba_free_coherent, + .map_page = sba_map_page, + .unmap_page = sba_unmap_page, + .map_sg = sba_map_sg_attrs, + .unmap_sg = sba_unmap_sg_attrs, + .dma_supported = sba_dma_supported, ++ .mmap = dma_common_mmap, ++ .get_sgtable = dma_common_get_sgtable, +}; + static int __init sba_init(void) { diff --cc block/blk-settings.c index 6bd1e3b,c3632fc..5f6dcc7 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@@ -833,21 -834,27 +834,43 @@@ void blk_queue_write_cache(struct reque EXPORT_SYMBOL_GPL(blk_queue_write_cache); /** + * blk_queue_required_elevator_features - Set a queue required elevator features + * @q: the request queue for the target device + * @features: Required elevator features OR'ed together + * + * Tell the block layer that for the device controlled through @q, only the + * only elevators that can be used are those that implement at least the set of + * features specified by @features. + */ +void blk_queue_required_elevator_features(struct request_queue *q, + unsigned int features) +{ + q->required_elevator_features = features; +} +EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features); + ++/** + * blk_queue_can_use_dma_map_merging - configure queue for merging segments. + * @q: the request queue for the device + * @dev: the device pointer for dma + * + * Tell the block layer about merging the segments by dma map of @q. + */ + bool blk_queue_can_use_dma_map_merging(struct request_queue *q, + struct device *dev) + { + unsigned long boundary = dma_get_merge_boundary(dev); + + if (!boundary) + return false; + + /* No need to update max_segment_size. see blk_queue_virt_boundary() */ + blk_queue_virt_boundary(q, boundary); + + return true; + } + EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --cc drivers/iommu/intel-iommu.c index 87de0b9,dca1b06..3f97491 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@@ -3785,254 -3738,10 +3785,256 @@@ static const struct dma_map_ops intel_d .map_resource = intel_map_resource, .unmap_resource = intel_unmap_resource, .dma_supported = dma_direct_supported, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; +static void +bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, enum dma_sync_target target) +{ + struct dmar_domain *domain; + phys_addr_t tlb_addr; + + domain = find_domain(dev); + if (WARN_ON(!domain)) + return; + + tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr); + if (is_swiotlb_buffer(tlb_addr)) + swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target); +} + +static dma_addr_t +bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, + enum dma_data_direction dir, unsigned long attrs, + u64 dma_mask) +{ + size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE); + struct dmar_domain *domain; + struct intel_iommu *iommu; + unsigned long iova_pfn; + unsigned long nrpages; + phys_addr_t tlb_addr; + int prot = 0; + int ret; + + domain = find_domain(dev); + if (WARN_ON(dir == DMA_NONE || !domain)) + return DMA_MAPPING_ERROR; + + iommu = domain_get_iommu(domain); + if (WARN_ON(!iommu)) + return DMA_MAPPING_ERROR; + + nrpages = aligned_nrpages(0, size); + iova_pfn = intel_alloc_iova(dev, domain, + dma_to_mm_pfn(nrpages), dma_mask); + if (!iova_pfn) + return DMA_MAPPING_ERROR; + + /* + * Check if DMAR supports zero-length reads on write only + * mappings.. + */ + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || + !cap_zlr(iommu->cap)) + prot |= DMA_PTE_READ; + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + prot |= DMA_PTE_WRITE; + + /* + * If both the physical buffer start address and size are + * page aligned, we don't need to use a bounce page. + */ + if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) { + tlb_addr = swiotlb_tbl_map_single(dev, + __phys_to_dma(dev, io_tlb_start), + paddr, size, aligned_size, dir, attrs); + if (tlb_addr == DMA_MAPPING_ERROR) { + goto swiotlb_error; + } else { + /* Cleanup the padding area. */ + void *padding_start = phys_to_virt(tlb_addr); + size_t padding_size = aligned_size; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || + dir == DMA_BIDIRECTIONAL)) { + padding_start += size; + padding_size -= size; + } + + memset(padding_start, 0, padding_size); + } + } else { + tlb_addr = paddr; + } + + ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn), + tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot); + if (ret) + goto mapping_error; + + trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size); + + return (phys_addr_t)iova_pfn << PAGE_SHIFT; + +mapping_error: + if (is_swiotlb_buffer(tlb_addr)) + swiotlb_tbl_unmap_single(dev, tlb_addr, size, + aligned_size, dir, attrs); +swiotlb_error: + free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages)); + dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n", + size, (unsigned long long)paddr, dir); + + return DMA_MAPPING_ERROR; +} + +static void +bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE); + struct dmar_domain *domain; + phys_addr_t tlb_addr; + + domain = find_domain(dev); + if (WARN_ON(!domain)) + return; + + tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr); + if (WARN_ON(!tlb_addr)) + return; + + intel_unmap(dev, dev_addr, size); + if (is_swiotlb_buffer(tlb_addr)) + swiotlb_tbl_unmap_single(dev, tlb_addr, size, + aligned_size, dir, attrs); + + trace_bounce_unmap_single(dev, dev_addr, size); +} + +static dma_addr_t +bounce_map_page(struct device *dev, struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return bounce_map_single(dev, page_to_phys(page) + offset, + size, dir, attrs, *dev->dma_mask); +} + +static dma_addr_t +bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + return bounce_map_single(dev, phys_addr, size, + dir, attrs, *dev->dma_mask); +} + +static void +bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + bounce_unmap_single(dev, dev_addr, size, dir, attrs); +} + +static void +bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + bounce_unmap_single(dev, dev_addr, size, dir, attrs); +} + +static void +bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, + enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sglist, sg, nelems, i) + bounce_unmap_page(dev, sg->dma_address, + sg_dma_len(sg), dir, attrs); +} + +static int +bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, + enum dma_data_direction dir, unsigned long attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sglist, sg, nelems, i) { + sg->dma_address = bounce_map_page(dev, sg_page(sg), + sg->offset, sg->length, + dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) + goto out_unmap; + sg_dma_len(sg) = sg->length; + } + + return nelems; + +out_unmap: + bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + return 0; +} + +static void +bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU); +} + +static void +bounce_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE); +} + +static void +bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sglist, sg, nelems, i) + bounce_sync_single(dev, sg_dma_address(sg), + sg_dma_len(sg), dir, SYNC_FOR_CPU); +} + +static void +bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sglist, sg, nelems, i) + bounce_sync_single(dev, sg_dma_address(sg), + sg_dma_len(sg), dir, SYNC_FOR_DEVICE); +} + +static const struct dma_map_ops bounce_dma_ops = { + .alloc = intel_alloc_coherent, + .free = intel_free_coherent, + .map_sg = bounce_map_sg, + .unmap_sg = bounce_unmap_sg, + .map_page = bounce_map_page, + .unmap_page = bounce_unmap_page, + .sync_single_for_cpu = bounce_sync_single_for_cpu, + .sync_single_for_device = bounce_sync_single_for_device, + .sync_sg_for_cpu = bounce_sync_sg_for_cpu, + .sync_sg_for_device = bounce_sync_sg_for_device, + .map_resource = bounce_map_resource, + .unmap_resource = bounce_unmap_resource, + .dma_supported = dma_direct_supported, +}; + static inline int iommu_domain_cache_init(void) { int ret = 0; diff --cc drivers/xen/swiotlb-xen.c index adcabd9,1190934..58c9365 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@@ -433,16 -430,9 +430,9 @@@ static void xen_swiotlb_unmap_page(stru /* NOTE: We use dev_addr here, not paddr! */ if (is_xen_swiotlb_buffer(dev_addr)) - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); + swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs); } - static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) - { - xen_unmap_single(hwdev, dev_addr, size, dir, attrs); - } - static void xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) diff --cc include/linux/blkdev.h index 3094f2d,f6d55e2..d9db32f --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@@ -1108,8 -1085,8 +1108,10 @@@ extern void blk_queue_dma_alignment(str extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); +extern void blk_queue_required_elevator_features(struct request_queue *q, + unsigned int features); + extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, + struct device *dev); /* * Number of physical segments as sent to the device.