bio_vec-backed iov_iter
authorAl Viro <viro@zeniv.linux.org.uk>
Sat, 5 Apr 2014 03:12:29 +0000 (23:12 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Tue, 6 May 2014 21:39:45 +0000 (17:39 -0400)
New variant of iov_iter - ITER_BVEC in iter->type, backed with
bio_vec array instead of iovec one.  Primitives taught to deal
with such beasts, __swap_write() switched to using that kind
of iov_iter.

Note that bio_vec is just a <page, offset, length> triple - there's
nothing block-specific about it.  I've left the definition where it
was, but took it from under ifdef CONFIG_BLOCK.

Next target: ->splice_write()...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/fuse/file.c
include/linux/blk_types.h
include/linux/uio.h
mm/iov_iter.c
mm/page_io.c

index 7fbc803..b2dae9d 100644 (file)
@@ -1288,7 +1288,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
        size_t nbytes = 0;  /* # bytes already packed in req */
 
        /* Special case for kernel I/O: can copy directly into the buffer */
-       if (ii->type & REQ_KERNEL) {
+       if (ii->type & ITER_KVEC) {
                unsigned long user_addr = fuse_get_user_addr(ii);
                size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
index aa0eaa2..86df13b 100644 (file)
@@ -5,8 +5,6 @@
 #ifndef __LINUX_BLK_TYPES_H
 #define __LINUX_BLK_TYPES_H
 
-#ifdef CONFIG_BLOCK
-
 #include <linux/types.h>
 
 struct bio_set;
@@ -28,6 +26,8 @@ struct bio_vec {
        unsigned int    bv_offset;
 };
 
+#ifdef CONFIG_BLOCK
+
 struct bvec_iter {
        sector_t                bi_sector;      /* device address in 512 byte
                                                   sectors */
index e8a109a..e2231e4 100644 (file)
@@ -19,12 +19,21 @@ struct kvec {
        size_t iov_len;
 };
 
+enum {
+       ITER_IOVEC = 0,
+       ITER_KVEC = 2,
+       ITER_BVEC = 4,
+};
+
 struct iov_iter {
        int type;
-       const struct iovec *iov;
-       unsigned long nr_segs;
        size_t iov_offset;
        size_t count;
+       union {
+               const struct iovec *iov;
+               const struct bio_vec *bvec;
+       };
+       unsigned long nr_segs;
 };
 
 /*
@@ -54,6 +63,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 }
 
 #define iov_for_each(iov, iter, start)                         \
+       if (!((start).type & ITER_BVEC))                        \
        for (iter = (start);                                    \
             (iter).count &&                                    \
             ((iov = iov_iter_iovec(&(iter))), 1);              \
index fcdaaab..7b5dbd1 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
-size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i)
 {
        size_t skip, copy, left, wanted;
@@ -84,9 +84,8 @@ done:
        i->iov_offset = skip;
        return wanted - bytes;
 }
-EXPORT_SYMBOL(copy_page_to_iter);
 
-size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i)
 {
        size_t skip, copy, left, wanted;
@@ -166,7 +165,6 @@ done:
        i->iov_offset = skip;
        return wanted - bytes;
 }
-EXPORT_SYMBOL(copy_page_from_iter);
 
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
                        const struct iovec *iov, size_t base, size_t bytes)
@@ -195,7 +193,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr,
  * were successfully copied.  If a fault is encountered then return the number of
  * bytes which were copied.
  */
-size_t iov_iter_copy_from_user_atomic(struct page *page,
+static size_t copy_from_user_atomic_iovec(struct page *page,
                struct iov_iter *i, unsigned long offset, size_t bytes)
 {
        char *kaddr;
@@ -215,9 +213,8 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 
        return copied;
 }
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
-void iov_iter_advance(struct iov_iter *i, size_t bytes)
+static void advance_iovec(struct iov_iter *i, size_t bytes)
 {
        BUG_ON(i->count < bytes);
 
@@ -252,7 +249,6 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
                i->nr_segs = nr_segs;
        }
 }
-EXPORT_SYMBOL(iov_iter_advance);
 
 /*
  * Fault in the first iovec of the given iov_iter, to a maximum length
@@ -265,26 +261,16 @@ EXPORT_SYMBOL(iov_iter_advance);
  */
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
-       char __user *buf = i->iov->iov_base + i->iov_offset;
-       bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-       return fault_in_pages_readable(buf, bytes);
+       if (!(i->type & ITER_BVEC)) {
+               char __user *buf = i->iov->iov_base + i->iov_offset;
+               bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+               return fault_in_pages_readable(buf, bytes);
+       }
+       return 0;
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
-/*
- * Return the count of just the current iov_iter segment.
- */
-size_t iov_iter_single_seg_count(const struct iov_iter *i)
-{
-       const struct iovec *iov = i->iov;
-       if (i->nr_segs == 1)
-               return i->count;
-       else
-               return min(i->count, iov->iov_len - i->iov_offset);
-}
-EXPORT_SYMBOL(iov_iter_single_seg_count);
-
-unsigned long iov_iter_alignment(const struct iov_iter *i)
+static unsigned long alignment_iovec(const struct iov_iter *i)
 {
        const struct iovec *iov = i->iov;
        unsigned long res;
@@ -307,7 +293,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
        res |= (unsigned long)iov->iov_base | size;
        return res;
 }
-EXPORT_SYMBOL(iov_iter_alignment);
 
 void iov_iter_init(struct iov_iter *i, int direction,
                        const struct iovec *iov, unsigned long nr_segs,
@@ -315,7 +300,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
 {
        /* It will get better.  Eventually... */
        if (segment_eq(get_fs(), KERNEL_DS))
-               direction |= REQ_KERNEL;
+               direction |= ITER_KVEC;
        i->type = direction;
        i->iov = iov;
        i->nr_segs = nr_segs;
@@ -324,7 +309,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_init);
 
-ssize_t iov_iter_get_pages(struct iov_iter *i,
+static ssize_t get_pages_iovec(struct iov_iter *i,
                   struct page **pages, size_t maxsize,
                   size_t *start)
 {
@@ -349,9 +334,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
                return res;
        return (res == n ? len : res * PAGE_SIZE) - *start;
 }
-EXPORT_SYMBOL(iov_iter_get_pages);
 
-ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+static ssize_t get_pages_alloc_iovec(struct iov_iter *i,
                   struct page ***pages, size_t maxsize,
                   size_t *start)
 {
@@ -387,9 +371,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
        *pages = p;
        return (res == n ? len : res * PAGE_SIZE) - *start;
 }
-EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
-int iov_iter_npages(const struct iov_iter *i, int maxpages)
+static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages)
 {
        size_t offset = i->iov_offset;
        size_t size = i->count;
@@ -414,4 +397,347 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
        }
        return min(npages, maxpages);
 }
+
+static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
+{
+       char *from = kmap_atomic(page);
+       memcpy(to, from + offset, len);
+       kunmap_atomic(from);
+}
+
+static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
+{
+       char *to = kmap_atomic(page);
+       memcpy(to + offset, from, len);
+       kunmap_atomic(to);
+}
+
+static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       size_t skip, copy, wanted;
+       const struct bio_vec *bvec;
+       void *kaddr, *from;
+
+       if (unlikely(bytes > i->count))
+               bytes = i->count;
+
+       if (unlikely(!bytes))
+               return 0;
+
+       wanted = bytes;
+       bvec = i->bvec;
+       skip = i->iov_offset;
+       copy = min_t(size_t, bytes, bvec->bv_len - skip);
+
+       kaddr = kmap_atomic(page);
+       from = kaddr + offset;
+       memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
+       skip += copy;
+       from += copy;
+       bytes -= copy;
+       while (bytes) {
+               bvec++;
+               copy = min(bytes, (size_t)bvec->bv_len);
+               memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy);
+               skip = copy;
+               from += copy;
+               bytes -= copy;
+       }
+       kunmap_atomic(kaddr);
+       if (skip == bvec->bv_len) {
+               bvec++;
+               skip = 0;
+       }
+       i->count -= wanted - bytes;
+       i->nr_segs -= bvec - i->bvec;
+       i->bvec = bvec;
+       i->iov_offset = skip;
+       return wanted - bytes;
+}
+
+static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       size_t skip, copy, wanted;
+       const struct bio_vec *bvec;
+       void *kaddr, *to;
+
+       if (unlikely(bytes > i->count))
+               bytes = i->count;
+
+       if (unlikely(!bytes))
+               return 0;
+
+       wanted = bytes;
+       bvec = i->bvec;
+       skip = i->iov_offset;
+
+       kaddr = kmap_atomic(page);
+
+       to = kaddr + offset;
+
+       copy = min(bytes, bvec->bv_len - skip);
+
+       memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
+
+       to += copy;
+       skip += copy;
+       bytes -= copy;
+
+       while (bytes) {
+               bvec++;
+               copy = min(bytes, (size_t)bvec->bv_len);
+               memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy);
+               skip = copy;
+               to += copy;
+               bytes -= copy;
+       }
+       kunmap_atomic(kaddr);
+       if (skip == bvec->bv_len) {
+               bvec++;
+               skip = 0;
+       }
+       i->count -= wanted;
+       i->nr_segs -= bvec - i->bvec;
+       i->bvec = bvec;
+       i->iov_offset = skip;
+       return wanted;
+}
+
+static size_t copy_from_user_bvec(struct page *page,
+               struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+       char *kaddr;
+       size_t left;
+       const struct bio_vec *bvec;
+       size_t base = i->iov_offset;
+
+       kaddr = kmap_atomic(page);
+       for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) {
+               size_t copy = min(left, bvec->bv_len - base);
+               if (!bvec->bv_len)
+                       continue;
+               memcpy_from_page(kaddr + offset, bvec->bv_page,
+                                bvec->bv_offset + base, copy);
+               offset += copy;
+               left -= copy;
+       }
+       kunmap_atomic(kaddr);
+       return bytes;
+}
+
+static void advance_bvec(struct iov_iter *i, size_t bytes)
+{
+       BUG_ON(i->count < bytes);
+
+       if (likely(i->nr_segs == 1)) {
+               i->iov_offset += bytes;
+               i->count -= bytes;
+       } else {
+               const struct bio_vec *bvec = i->bvec;
+               size_t base = i->iov_offset;
+               unsigned long nr_segs = i->nr_segs;
+
+               /*
+                * The !iov->iov_len check ensures we skip over unlikely
+                * zero-length segments (without overruning the iovec).
+                */
+               while (bytes || unlikely(i->count && !bvec->bv_len)) {
+                       int copy;
+
+                       copy = min(bytes, bvec->bv_len - base);
+                       BUG_ON(!i->count || i->count < copy);
+                       i->count -= copy;
+                       bytes -= copy;
+                       base += copy;
+                       if (bvec->bv_len == base) {
+                               bvec++;
+                               nr_segs--;
+                               base = 0;
+                       }
+               }
+               i->bvec = bvec;
+               i->iov_offset = base;
+               i->nr_segs = nr_segs;
+       }
+}
+
+static unsigned long alignment_bvec(const struct iov_iter *i)
+{
+       const struct bio_vec *bvec = i->bvec;
+       unsigned long res;
+       size_t size = i->count;
+       size_t n;
+
+       if (!size)
+               return 0;
+
+       res = bvec->bv_offset + i->iov_offset;
+       n = bvec->bv_len - i->iov_offset;
+       if (n >= size)
+               return res | size;
+       size -= n;
+       res |= n;
+       while (size > (++bvec)->bv_len) {
+               res |= bvec->bv_offset | bvec->bv_len;
+               size -= bvec->bv_len;
+       }
+       res |= bvec->bv_offset | size;
+       return res;
+}
+
+static ssize_t get_pages_bvec(struct iov_iter *i,
+                  struct page **pages, size_t maxsize,
+                  size_t *start)
+{
+       const struct bio_vec *bvec = i->bvec;
+       size_t len = bvec->bv_len - i->iov_offset;
+       if (len > i->count)
+               len = i->count;
+       if (len > maxsize)
+               len = maxsize;
+       *start = bvec->bv_offset + i->iov_offset;
+
+       get_page(*pages = bvec->bv_page);
+
+       return len;
+}
+
+static ssize_t get_pages_alloc_bvec(struct iov_iter *i,
+                  struct page ***pages, size_t maxsize,
+                  size_t *start)
+{
+       const struct bio_vec *bvec = i->bvec;
+       size_t len = bvec->bv_len - i->iov_offset;
+       if (len > i->count)
+               len = i->count;
+       if (len > maxsize)
+               len = maxsize;
+       *start = bvec->bv_offset + i->iov_offset;
+
+       *pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
+       if (!*pages)
+               return -ENOMEM;
+
+       get_page(**pages = bvec->bv_page);
+
+       return len;
+}
+
+static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages)
+{
+       size_t offset = i->iov_offset;
+       size_t size = i->count;
+       const struct bio_vec *bvec = i->bvec;
+       int npages = 0;
+       int n;
+
+       for (n = 0; size && n < i->nr_segs; n++, bvec++) {
+               size_t len = bvec->bv_len - offset;
+               offset = 0;
+               if (unlikely(!len))     /* empty segment */
+                       continue;
+               if (len > size)
+                       len = size;
+               npages++;
+               if (npages >= maxpages) /* don't bother going further */
+                       return maxpages;
+               size -= len;
+               offset = 0;
+       }
+       return min(npages, maxpages);
+}
+
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       if (i->type & ITER_BVEC)
+               return copy_page_to_iter_bvec(page, offset, bytes, i);
+       else
+               return copy_page_to_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_to_iter);
+
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       if (i->type & ITER_BVEC)
+               return copy_page_from_iter_bvec(page, offset, bytes, i);
+       else
+               return copy_page_from_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_from_iter);
+
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+               struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+       if (i->type & ITER_BVEC)
+               return copy_from_user_bvec(page, i, offset, bytes);
+       else
+               return copy_from_user_atomic_iovec(page, i, offset, bytes);
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+
+void iov_iter_advance(struct iov_iter *i, size_t size)
+{
+       if (i->type & ITER_BVEC)
+               advance_bvec(i, size);
+       else
+               advance_iovec(i, size);
+}
+EXPORT_SYMBOL(iov_iter_advance);
+
+/*
+ * Return the count of just the current iov_iter segment.
+ */
+size_t iov_iter_single_seg_count(const struct iov_iter *i)
+{
+       if (i->nr_segs == 1)
+               return i->count;
+       else if (i->type & ITER_BVEC)
+               return min(i->count, i->iov->iov_len - i->iov_offset);
+       else
+               return min(i->count, i->bvec->bv_len - i->iov_offset);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+       if (i->type & ITER_BVEC)
+               return alignment_bvec(i);
+       else
+               return alignment_iovec(i);
+}
+EXPORT_SYMBOL(iov_iter_alignment);
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+                  struct page **pages, size_t maxsize,
+                  size_t *start)
+{
+       if (i->type & ITER_BVEC)
+               return get_pages_bvec(i, pages, maxsize, start);
+       else
+               return get_pages_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages);
+
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+                  struct page ***pages, size_t maxsize,
+                  size_t *start)
+{
+       if (i->type & ITER_BVEC)
+               return get_pages_alloc_bvec(i, pages, maxsize, start);
+       else
+               return get_pages_alloc_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages_alloc);
+
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+       if (i->type & ITER_BVEC)
+               return iov_iter_npages_bvec(i, maxpages);
+       else
+               return iov_iter_npages_iovec(i, maxpages);
+}
 EXPORT_SYMBOL(iov_iter_npages);
index 313bfed..33bb38c 100644 (file)
@@ -259,23 +259,28 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
                struct kiocb kiocb;
                struct file *swap_file = sis->swap_file;
                struct address_space *mapping = swap_file->f_mapping;
-               struct iovec iov = {
-                       .iov_base = kmap(page),
-                       .iov_len  = PAGE_SIZE,
+               struct bio_vec bv = {
+                       .bv_page = page,
+                       .bv_len  = PAGE_SIZE,
+                       .bv_offset = 0
+               };
+               struct iov_iter from = {
+                       .type = ITER_BVEC | WRITE,
+                       .count = PAGE_SIZE,
+                       .iov_offset = 0,
+                       .nr_segs = 1,
+                       .bvec = &bv
                };
-               struct iov_iter from;
 
                init_sync_kiocb(&kiocb, swap_file);
                kiocb.ki_pos = page_file_offset(page);
                kiocb.ki_nbytes = PAGE_SIZE;
-               iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE);
 
                set_page_writeback(page);
                unlock_page(page);
-               ret = mapping->a_ops->direct_IO(KERNEL_WRITE,
+               ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE,
                                                &kiocb, &from,
                                                kiocb.ki_pos);
-               kunmap(page);
                if (ret == PAGE_SIZE) {
                        count_vm_event(PSWPOUT);
                        ret = 0;