From: Linus Torvalds Date: Mon, 19 Oct 2020 21:28:30 +0000 (-0700) Subject: Merge tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszered... X-Git-Tag: v5.10.7~1375 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=694565356c2e06224d94774a42709cc8dfab49ee;p=platform%2Fkernel%2Flinux-rpi.git Merge tag 'fuse-update-5.10' of git://git./linux/kernel/git/mszeredi/fuse Pull fuse updates from Miklos Szeredi: - Support directly accessing host page cache from virtiofs. This can improve I/O performance for various workloads, as well as reducing the memory requirement by eliminating double caching. Thanks to Vivek Goyal for doing most of the work on this. - Allow automatic submounting inside virtiofs. This allows unique st_dev/ st_ino values to be assigned inside the guest to files residing on different filesystems on the host. Thanks to Max Reitz for the patches. - Fix an old use after free bug found by Pradeep P V K. * tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (25 commits) virtiofs: calculate number of scatter-gather elements accurately fuse: connection remove fix fuse: implement crossmounts fuse: Allow fuse_fill_super_common() for submounts fuse: split fuse_mount off of fuse_conn fuse: drop fuse_conn parameter where possible fuse: store fuse_conn in fuse_req fuse: add submount support to fuse: fix page dereference after free virtiofs: add logic to free up a memory range virtiofs: maintain a list of busy elements virtiofs: serialize truncate/punch_hole and dax fault path virtiofs: define dax address space operations virtiofs: add DAX mmap support virtiofs: implement dax read/write operations virtiofs: introduce setupmapping/removemapping commands virtiofs: implement FUSE_INIT map_alignment field virtiofs: keep a list of free dax memory ranges virtiofs: add a mount option to enable dax virtiofs: set up virtio_fs dax_device ... --- 694565356c2e06224d94774a42709cc8dfab49ee diff --cc fs/fuse/file.c index 43c165e,53d4dd1..c03034e --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@@ -3120,17 -3156,10 +3146,17 @@@ fuse_direct_IO(struct kiocb *iocb, stru * By default, we want to optimize all I/Os with async request * submission to the client filesystem if supported. */ - io->async = ff->fc->async_dio; - io->async = async_dio; ++ io->async = ff->fm->fc->async_dio; io->iocb = iocb; io->blocking = is_sync_kiocb(iocb); + /* optimization for short read */ + if (io->async && !io->write && offset + count > i_size) { - iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset)); ++ iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset)); + shortened = count - iov_iter_count(iter); + count -= shortened; + } + /* * We cannot asynchronously extend the size of a file. * In such case the aio will behave exactly like sync io. diff --cc fs/fuse/virtio_fs.c index 104f35d,f9c1aa1..21a9e53 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@@ -676,6 -733,130 +733,130 @@@ static void virtio_fs_cleanup_vqs(struc vdev->config->del_vqs(vdev); } + /* Map a window offset to a page frame number. The window offset will have + * been produced by .iomap_begin(), which maps a file offset to a window + * offset. + */ + static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn) + { + struct virtio_fs *fs = dax_get_private(dax_dev); + phys_addr_t offset = PFN_PHYS(pgoff); + size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff; + + if (kaddr) + *kaddr = fs->window_kaddr + offset; + if (pfn) + *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, + PFN_DEV | PFN_MAP); + return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; + } + + static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, + pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) + { + return copy_from_iter(addr, bytes, i); + } + + static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, + pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) + { + return copy_to_iter(addr, bytes, i); + } + + static int virtio_fs_zero_page_range(struct dax_device *dax_dev, + pgoff_t pgoff, size_t nr_pages) + { + long rc; + void *kaddr; + + rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); + if (rc < 0) + return rc; + memset(kaddr, 0, nr_pages << PAGE_SHIFT); + dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); + return 0; + } + + static const struct dax_operations virtio_fs_dax_ops = { + .direct_access = virtio_fs_direct_access, + .copy_from_iter = virtio_fs_copy_from_iter, + .copy_to_iter = virtio_fs_copy_to_iter, + .zero_page_range = virtio_fs_zero_page_range, + }; + + static void virtio_fs_cleanup_dax(void *data) + { + struct dax_device *dax_dev = data; + + kill_dax(dax_dev); + put_dax(dax_dev); + } + + static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) + { + struct virtio_shm_region cache_reg; + struct dev_pagemap *pgmap; + bool have_cache; + + if (!IS_ENABLED(CONFIG_FUSE_DAX)) + return 0; + + /* Get cache region */ + have_cache = virtio_get_shm_region(vdev, &cache_reg, + (u8)VIRTIO_FS_SHMCAP_ID_CACHE); + if (!have_cache) { + dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); + return 0; + } + + if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, + dev_name(&vdev->dev))) { + dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", + cache_reg.addr, cache_reg.len); + return -EBUSY; + } + + dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, + cache_reg.addr); + + pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); + if (!pgmap) + return -ENOMEM; + + pgmap->type = MEMORY_DEVICE_FS_DAX; + + /* Ideally we would directly use the PCI BAR resource but + * devm_memremap_pages() wants its own copy in pgmap. So + * initialize a struct resource from scratch (only the start + * and end fields will be used). + */ - pgmap->res = (struct resource){ - .name = "virtio-fs dax window", ++ pgmap->range = (struct range) { + .start = (phys_addr_t) cache_reg.addr, + .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, + }; ++ pgmap->nr_range = 1; + + fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); + if (IS_ERR(fs->window_kaddr)) + return PTR_ERR(fs->window_kaddr); + + fs->window_phys_addr = (phys_addr_t) cache_reg.addr; + fs->window_len = (phys_addr_t) cache_reg.len; + + dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", + __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); + + fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); + if (IS_ERR(fs->dax_dev)) + return PTR_ERR(fs->dax_dev); + + return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, + fs->dax_dev); + } + static int virtio_fs_probe(struct virtio_device *vdev) { struct virtio_fs *fs;