Merge branch 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Nov 2017 20:08:18 +0000 (12:08 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Nov 2017 20:08:18 +0000 (12:08 -0800)
Pull iov_iter updates from Al Viro:

 - bio_{map,copy}_user_iov() series; those are cleanups - fixes from the
   same pile went into mainline (and stable) in late September.

 - fs/iomap.c iov_iter-related fixes

 - new primitive - iov_iter_for_each_range(), which applies a function
   to kernel-mapped segments of an iov_iter.

   Usable for kvec and bvec ones, the latter does kmap()/kunmap() around
   the callback. _Not_ usable for iovec- or pipe-backed iov_iter; the
   latter is not hard to fix if the need ever appears, the former is by
   design.

   Another related primitive will have to wait for the next cycle - it
   passes page + offset + size instead of pointer + size, and that one
   will be usable for everything _except_ kvec. Unfortunately, that one
   didn't get exposure in -next yet, so...

 - a bit more lustre iov_iter work, including a use case for
   iov_iter_for_each_range() (checksum calculation)

 - vhost/scsi leak fix in failure exit

 - misc cleanups and detritectomy...

* 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (21 commits)
  iomap_dio_actor(): fix iov_iter bugs
  switch ksocknal_lib_recv_...() to use of iov_iter_for_each_range()
  lustre: switch struct ksock_conn to iov_iter
  vhost/scsi: switch to iov_iter_get_pages()
  fix a page leak in vhost_scsi_iov_to_sgl() error recovery
  new primitive: iov_iter_for_each_range()
  lnet_return_rx_credits_locked: don't abuse list_entry
  xen: don't open-code iov_iter_kvec()
  orangefs: remove detritus from struct orangefs_kiocb_s
  kill iov_shorten()
  bio_alloc_map_data(): do bmd->iter setup right there
  bio_copy_user_iov(): saner bio size calculation
  bio_map_user_iov(): get rid of copying iov_iter
  bio_copy_from_iter(): get rid of copying iov_iter
  move more stuff down into bio_copy_user_iov()
  blk_rq_map_user_iov(): move iov_iter_advance() down
  bio_map_user_iov(): get rid of the iov_for_each()
  bio_map_user_iov(): move alignment check into the main loop
  don't rely upon subsequent bio_add_pc_page() calls failing
  ... and with iov_iter_get_pages_alloc() it becomes even simpler
  ...

13 files changed:
1  2 
block/bio.c
block/blk-map.c
drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
drivers/staging/lustre/lnet/lnet/lib-move.c
drivers/vhost/scsi.c
drivers/xen/pvcalls-back.c
fs/iomap.c
fs/orangefs/orangefs-kernel.h
fs/read_write.c
include/linux/bio.h

diff --combined block/bio.c
@@@ -400,7 -400,7 +400,7 @@@ static void punt_bios_to_rescuer(struc
  
  /**
   * bio_alloc_bioset - allocate a bio for I/O
 - * @gfp_mask:   the GFP_ mask given to the slab allocator
 + * @gfp_mask:   the GFP_* mask given to the slab allocator
   * @nr_iovecs:        number of iovecs to pre-allocate
   * @bs:               the bio_set to allocate from.
   *
@@@ -597,7 -597,6 +597,7 @@@ void __bio_clone_fast(struct bio *bio, 
         * so we don't set nor calculate new physical/hw segment counts here
         */
        bio->bi_disk = bio_src->bi_disk;
 +      bio->bi_partno = bio_src->bi_partno;
        bio_set_flag(bio, BIO_CLONED);
        bio->bi_opf = bio_src->bi_opf;
        bio->bi_write_hint = bio_src->bi_write_hint;
@@@ -918,9 -917,17 +918,9 @@@ int bio_iov_iter_get_pages(struct bio *
  }
  EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
  
 -struct submit_bio_ret {
 -      struct completion event;
 -      int error;
 -};
 -
  static void submit_bio_wait_endio(struct bio *bio)
  {
 -      struct submit_bio_ret *ret = bio->bi_private;
 -
 -      ret->error = blk_status_to_errno(bio->bi_status);
 -      complete(&ret->event);
 +      complete(bio->bi_private);
  }
  
  /**
   */
  int submit_bio_wait(struct bio *bio)
  {
 -      struct submit_bio_ret ret;
 +      DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
  
 -      init_completion(&ret.event);
 -      bio->bi_private = &ret;
 +      bio->bi_private = &done;
        bio->bi_end_io = submit_bio_wait_endio;
        bio->bi_opf |= REQ_SYNC;
        submit_bio(bio);
 -      wait_for_completion_io(&ret.event);
 +      wait_for_completion_io(&done);
  
 -      return ret.error;
 +      return blk_status_to_errno(bio->bi_status);
  }
  EXPORT_SYMBOL(submit_bio_wait);
  
@@@ -1062,14 -1070,21 +1062,21 @@@ struct bio_map_data 
        struct iovec iov[];
  };
  
- static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
+ static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
                                               gfp_t gfp_mask)
  {
-       if (iov_count > UIO_MAXIOV)
+       struct bio_map_data *bmd;
+       if (data->nr_segs > UIO_MAXIOV)
                return NULL;
  
-       return kmalloc(sizeof(struct bio_map_data) +
-                      sizeof(struct iovec) * iov_count, gfp_mask);
+       bmd = kmalloc(sizeof(struct bio_map_data) +
+                      sizeof(struct iovec) * data->nr_segs, gfp_mask);
+       if (!bmd)
+               return NULL;
+       memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
+       bmd->iter = *data;
+       bmd->iter.iov = bmd->iov;
+       return bmd;
  }
  
  /**
   * Copy all pages from iov_iter to bio.
   * Returns 0 on success, or error on failure.
   */
- static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
+ static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
  {
        int i;
        struct bio_vec *bvec;
                ret = copy_page_from_iter(bvec->bv_page,
                                          bvec->bv_offset,
                                          bvec->bv_len,
-                                         &iter);
+                                         iter);
  
-               if (!iov_iter_count(&iter))
+               if (!iov_iter_count(iter))
                        break;
  
                if (ret < bvec->bv_len)
@@@ -1187,40 -1202,18 +1194,18 @@@ int bio_uncopy_user(struct bio *bio
   */
  struct bio *bio_copy_user_iov(struct request_queue *q,
                              struct rq_map_data *map_data,
-                             const struct iov_iter *iter,
+                             struct iov_iter *iter,
                              gfp_t gfp_mask)
  {
        struct bio_map_data *bmd;
        struct page *page;
        struct bio *bio;
-       int i, ret;
-       int nr_pages = 0;
+       int i = 0, ret;
+       int nr_pages;
        unsigned int len = iter->count;
        unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
  
-       for (i = 0; i < iter->nr_segs; i++) {
-               unsigned long uaddr;
-               unsigned long end;
-               unsigned long start;
-               uaddr = (unsigned long) iter->iov[i].iov_base;
-               end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
-                       >> PAGE_SHIFT;
-               start = uaddr >> PAGE_SHIFT;
-               /*
-                * Overflow, abort
-                */
-               if (end < start)
-                       return ERR_PTR(-EINVAL);
-               nr_pages += end - start;
-       }
-       if (offset)
-               nr_pages++;
-       bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
+       bmd = bio_alloc_map_data(iter, gfp_mask);
        if (!bmd)
                return ERR_PTR(-ENOMEM);
  
         * shortlived one.
         */
        bmd->is_our_pages = map_data ? 0 : 1;
-       memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
-       bmd->iter = *iter;
-       bmd->iter.iov = bmd->iov;
+       nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+       if (nr_pages > BIO_MAX_PAGES)
+               nr_pages = BIO_MAX_PAGES;
  
        ret = -ENOMEM;
        bio = bio_kmalloc(gfp_mask, nr_pages);
        if (ret)
                goto cleanup;
  
+       if (map_data)
+               map_data->offset += bio->bi_iter.bi_size;
        /*
         * success
         */
        if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
            (map_data && map_data->from_user)) {
-               ret = bio_copy_from_iter(bio, *iter);
+               ret = bio_copy_from_iter(bio, iter);
                if (ret)
                        goto cleanup;
+       } else {
+               iov_iter_advance(iter, bio->bi_iter.bi_size);
        }
  
        bio->bi_private = bmd;
+       if (map_data && map_data->null_mapped)
+               bio_set_flag(bio, BIO_NULL_MAPPED);
        return bio;
  cleanup:
        if (!map_data)
@@@ -1312,111 -1313,74 +1305,74 @@@ out_bmd
   *    device. Returns an error pointer in case of error.
   */
  struct bio *bio_map_user_iov(struct request_queue *q,
-                            const struct iov_iter *iter,
+                            struct iov_iter *iter,
                             gfp_t gfp_mask)
  {
        int j;
-       int nr_pages = 0;
-       struct page **pages;
        struct bio *bio;
-       int cur_page = 0;
-       int ret, offset;
-       struct iov_iter i;
-       struct iovec iov;
+       int ret;
        struct bio_vec *bvec;
  
-       iov_for_each(iov, i, *iter) {
-               unsigned long uaddr = (unsigned long) iov.iov_base;
-               unsigned long len = iov.iov_len;
-               unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               unsigned long start = uaddr >> PAGE_SHIFT;
-               /*
-                * Overflow, abort
-                */
-               if (end < start)
-                       return ERR_PTR(-EINVAL);
-               nr_pages += end - start;
-               /*
-                * buffer must be aligned to at least logical block size for now
-                */
-               if (uaddr & queue_dma_alignment(q))
-                       return ERR_PTR(-EINVAL);
-       }
-       if (!nr_pages)
+       if (!iov_iter_count(iter))
                return ERR_PTR(-EINVAL);
  
-       bio = bio_kmalloc(gfp_mask, nr_pages);
+       bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
        if (!bio)
                return ERR_PTR(-ENOMEM);
  
-       ret = -ENOMEM;
-       pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
-       if (!pages)
-               goto out;
+       while (iov_iter_count(iter)) {
+               struct page **pages;
+               ssize_t bytes;
+               size_t offs, added = 0;
+               int npages;
  
-       iov_for_each(iov, i, *iter) {
-               unsigned long uaddr = (unsigned long) iov.iov_base;
-               unsigned long len = iov.iov_len;
-               unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               unsigned long start = uaddr >> PAGE_SHIFT;
-               const int local_nr_pages = end - start;
-               const int page_limit = cur_page + local_nr_pages;
-               ret = get_user_pages_fast(uaddr, local_nr_pages,
-                               (iter->type & WRITE) != WRITE,
-                               &pages[cur_page]);
-               if (unlikely(ret < local_nr_pages)) {
-                       for (j = cur_page; j < page_limit; j++) {
-                               if (!pages[j])
-                                       break;
-                               put_page(pages[j]);
-                       }
-                       ret = -EFAULT;
+               bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
+               if (unlikely(bytes <= 0)) {
+                       ret = bytes ? bytes : -EFAULT;
                        goto out_unmap;
                }
  
-               offset = offset_in_page(uaddr);
-               for (j = cur_page; j < page_limit; j++) {
-                       unsigned int bytes = PAGE_SIZE - offset;
-                       unsigned short prev_bi_vcnt = bio->bi_vcnt;
+               npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
  
-                       if (len <= 0)
-                               break;
-                       
-                       if (bytes > len)
-                               bytes = len;
-                       /*
-                        * sorry...
-                        */
-                       if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
-                                           bytes)
-                               break;
+               if (unlikely(offs & queue_dma_alignment(q))) {
+                       ret = -EINVAL;
+                       j = 0;
+               } else {
+                       for (j = 0; j < npages; j++) {
+                               struct page *page = pages[j];
+                               unsigned int n = PAGE_SIZE - offs;
+                               unsigned short prev_bi_vcnt = bio->bi_vcnt;
  
-                       /*
-                        * check if vector was merged with previous
-                        * drop page reference if needed
-                        */
-                       if (bio->bi_vcnt == prev_bi_vcnt)
-                               put_page(pages[j]);
+                               if (n > bytes)
+                                       n = bytes;
  
-                       len -= bytes;
-                       offset = 0;
-               }
+                               if (!bio_add_pc_page(q, bio, page, n, offs))
+                                       break;
  
-               cur_page = j;
+                               /*
+                                * check if vector was merged with previous
+                                * drop page reference if needed
+                                */
+                               if (bio->bi_vcnt == prev_bi_vcnt)
+                                       put_page(page);
+                               added += n;
+                               bytes -= n;
+                               offs = 0;
+                       }
+                       iov_iter_advance(iter, added);
+               }
                /*
                 * release the pages we didn't map into the bio, if any
                 */
-               while (j < page_limit)
+               while (j < npages)
                        put_page(pages[j++]);
+               kvfree(pages);
+               /* couldn't stuff something into bio? */
+               if (bytes)
+                       break;
        }
  
-       kfree(pages);
        bio_set_flag(bio, BIO_USER_MAPPED);
  
        /*
        bio_for_each_segment_all(bvec, bio, j) {
                put_page(bvec->bv_page);
        }
-  out:
-       kfree(pages);
        bio_put(bio);
        return ERR_PTR(ret);
  }
@@@ -1932,8 -1894,11 +1886,8 @@@ void bioset_free(struct bio_set *bs
        if (bs->rescue_workqueue)
                destroy_workqueue(bs->rescue_workqueue);
  
 -      if (bs->bio_pool)
 -              mempool_destroy(bs->bio_pool);
 -
 -      if (bs->bvec_pool)
 -              mempool_destroy(bs->bvec_pool);
 +      mempool_destroy(bs->bio_pool);
 +      mempool_destroy(bs->bvec_pool);
  
        bioset_integrity_free(bs);
        bio_put_slab(bs);
@@@ -2034,6 -1999,37 +1988,6 @@@ int bio_associate_blkcg(struct bio *bio
  EXPORT_SYMBOL_GPL(bio_associate_blkcg);
  
  /**
 - * bio_associate_current - associate a bio with %current
 - * @bio: target bio
 - *
 - * Associate @bio with %current if it hasn't been associated yet.  Block
 - * layer will treat @bio as if it were issued by %current no matter which
 - * task actually issues it.
 - *
 - * This function takes an extra reference of @task's io_context and blkcg
 - * which will be put when @bio is released.  The caller must own @bio,
 - * ensure %current->io_context exists, and is responsible for synchronizing
 - * calls to this function.
 - */
 -int bio_associate_current(struct bio *bio)
 -{
 -      struct io_context *ioc;
 -
 -      if (bio->bi_css)
 -              return -EBUSY;
 -
 -      ioc = current->io_context;
 -      if (!ioc)
 -              return -ENOENT;
 -
 -      get_io_context_active(ioc);
 -      bio->bi_ioc = ioc;
 -      bio->bi_css = task_get_css(current, io_cgrp_id);
 -      return 0;
 -}
 -EXPORT_SYMBOL_GPL(bio_associate_current);
 -
 -/**
   * bio_disassociate_task - undo bio_associate_current()
   * @bio: target bio
   */
diff --combined block/blk-map.c
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0
  /*
   * Functions related to mapping data to requests
   */
@@@ -67,13 -66,6 +67,6 @@@ static int __blk_rq_map_user_iov(struc
        bio->bi_opf &= ~REQ_OP_MASK;
        bio->bi_opf |= req_op(rq);
  
-       if (map_data && map_data->null_mapped)
-               bio_set_flag(bio, BIO_NULL_MAPPED);
-       iov_iter_advance(iter, bio->bi_iter.bi_size);
-       if (map_data)
-               map_data->offset += bio->bi_iter.bi_size;
        orig_bio = bio;
  
        /*
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0
  /*
   * GPL HEADER START
   *
@@@ -177,9 -176,12 +177,9 @@@ struct ksock_peer 
  ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id)
  {
        struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
 -      struct list_head *tmp;
        struct ksock_peer *peer;
  
 -      list_for_each(tmp, peer_list) {
 -              peer = list_entry(tmp, struct ksock_peer, ksnp_list);
 -
 +      list_for_each_entry(peer, peer_list, ksnp_list) {
                LASSERT(!peer->ksnp_closing);
  
                if (peer->ksnp_ni != ni)
@@@ -451,6 -453,7 +451,6 @@@ in
  ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
                  int port)
  {
 -      struct list_head *tmp;
        struct ksock_peer *peer;
        struct ksock_peer *peer2;
        struct ksock_route *route;
        }
  
        route2 = NULL;
 -      list_for_each(tmp, &peer->ksnp_routes) {
 -              route2 = list_entry(tmp, struct ksock_route, ksnr_list);
 -
 +      list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
                if (route2->ksnr_ipaddr == ipaddr)
                        break;
  
@@@ -1683,10 -1688,10 +1683,10 @@@ ksocknal_destroy_conn(struct ksock_con
        case SOCKNAL_RX_LNET_PAYLOAD:
                last_rcv = conn->ksnc_rx_deadline -
                           cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
-               CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %d, left: %d, last alive is %ld secs ago\n",
+               CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n",
                       libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
                       &conn->ksnc_ipaddr, conn->ksnc_port,
-                      conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
+                      iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left,
                       cfs_duration_sec(cfs_time_sub(cfs_time_current(),
                                                     last_rcv)));
                lnet_finalize(conn->ksnc_peer->ksnp_ni,
@@@ -1849,10 -1854,12 +1849,10 @@@ ksocknal_query(struct lnet_ni *ni, lnet
  
        peer = ksocknal_find_peer_locked(ni, id);
        if (peer) {
 -              struct list_head *tmp;
                struct ksock_conn *conn;
                int bufnob;
  
 -              list_for_each(tmp, &peer->ksnp_conns) {
 -                      conn = list_entry(tmp, struct ksock_conn, ksnc_list);
 +              list_for_each_entry(conn, &peer->ksnp_conns, ksnc_list) {
                        bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
  
                        if (bufnob < conn->ksnc_tx_bufnob) {
@@@ -2309,7 -2316,7 +2309,7 @@@ ksocknal_base_shutdown(void
        switch (ksocknal_data.ksnd_init) {
        default:
                LASSERT(0);
 -
 +              /* fall through */
        case SOCKNAL_INIT_ALL:
        case SOCKNAL_INIT_DATA:
                LASSERT(ksocknal_data.ksnd_peers);
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0
  /*
   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   *
@@@ -358,11 -357,7 +358,7 @@@ struct ksock_conn 
        __u8               ksnc_rx_scheduled; /* being progressed */
        __u8               ksnc_rx_state;     /* what is being read */
        int                ksnc_rx_nob_left;  /* # bytes to next hdr/body */
-       int                ksnc_rx_nob_wanted;/* bytes actually wanted */
-       int                ksnc_rx_niov;      /* # iovec frags */
-       struct kvec        *ksnc_rx_iov;      /* the iovec frags */
-       int                ksnc_rx_nkiov;     /* # page frags */
-       struct bio_vec          *ksnc_rx_kiov;  /* the page frags */
+       struct iov_iter    ksnc_rx_to;          /* copy destination */
        union ksock_rxiovspace ksnc_rx_iov_space; /* space for frag descriptors */
        __u32              ksnc_rx_csum;      /* partial checksum for incoming
                                               * data
@@@ -701,8 -696,7 +697,7 @@@ int ksocknal_lib_setup_sock(struct sock
  int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
  int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
  void ksocknal_lib_eager_ack(struct ksock_conn *conn);
- int ksocknal_lib_recv_iov(struct ksock_conn *conn);
- int ksocknal_lib_recv_kiov(struct ksock_conn *conn);
+ int ksocknal_lib_recv(struct ksock_conn *conn);
  int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
                                   int *rxmem, int *nagle);
  
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0
  /*
   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   *
@@@ -250,66 -249,16 +250,16 @@@ ksocknal_transmit(struct ksock_conn *co
  }
  
  static int
- ksocknal_recv_iov(struct ksock_conn *conn)
+ ksocknal_recv_iter(struct ksock_conn *conn)
  {
-       struct kvec *iov = conn->ksnc_rx_iov;
        int nob;
        int rc;
  
-       LASSERT(conn->ksnc_rx_niov > 0);
-       /*
-        * Never touch conn->ksnc_rx_iov or change connection
-        * status inside ksocknal_lib_recv_iov
-        */
-       rc = ksocknal_lib_recv_iov(conn);
-       if (rc <= 0)
-               return rc;
-       /* received something... */
-       nob = rc;
-       conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
-       conn->ksnc_rx_deadline =
-               cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
-       mb();                  /* order with setting rx_started */
-       conn->ksnc_rx_started = 1;
-       conn->ksnc_rx_nob_wanted -= nob;
-       conn->ksnc_rx_nob_left -= nob;
-       do {
-               LASSERT(conn->ksnc_rx_niov > 0);
-               if (nob < (int)iov->iov_len) {
-                       iov->iov_len -= nob;
-                       iov->iov_base += nob;
-                       return -EAGAIN;
-               }
-               nob -= iov->iov_len;
-               conn->ksnc_rx_iov = ++iov;
-               conn->ksnc_rx_niov--;
-       } while (nob);
-       return rc;
- }
- static int
- ksocknal_recv_kiov(struct ksock_conn *conn)
- {
-       struct bio_vec *kiov = conn->ksnc_rx_kiov;
-       int nob;
-       int rc;
-       LASSERT(conn->ksnc_rx_nkiov > 0);
        /*
-        * Never touch conn->ksnc_rx_kiov or change connection
-        * status inside ksocknal_lib_recv_iov
+        * Never touch conn->ksnc_rx_to or change connection
+        * status inside ksocknal_lib_recv
         */
-       rc = ksocknal_lib_recv_kiov(conn);
+       rc = ksocknal_lib_recv(conn);
  
        if (rc <= 0)
                return rc;
        mb();                  /* order with setting rx_started */
        conn->ksnc_rx_started = 1;
  
-       conn->ksnc_rx_nob_wanted -= nob;
        conn->ksnc_rx_nob_left -= nob;
  
-       do {
-               LASSERT(conn->ksnc_rx_nkiov > 0);
-               if (nob < (int)kiov->bv_len) {
-                       kiov->bv_offset += nob;
-                       kiov->bv_len -= nob;
-                       return -EAGAIN;
-               }
-               nob -= kiov->bv_len;
-               conn->ksnc_rx_kiov = ++kiov;
-               conn->ksnc_rx_nkiov--;
-       } while (nob);
+       iov_iter_advance(&conn->ksnc_rx_to, nob);
+       if (iov_iter_count(&conn->ksnc_rx_to))
+               return -EAGAIN;
  
        return 1;
  }
@@@ -348,7 -286,7 +287,7 @@@ ksocknal_receive(struct ksock_conn *con
  {
        /*
         * Return 1 on success, 0 on EOF, < 0 on error.
-        * Caller checks ksnc_rx_nob_wanted to determine
+        * Caller checks ksnc_rx_to to determine
         * progress/completion.
         */
        int rc;
        }
  
        for (;;) {
-               if (conn->ksnc_rx_niov)
-                       rc = ksocknal_recv_iov(conn);
-               else
-                       rc = ksocknal_recv_kiov(conn);
+               rc = ksocknal_recv_iter(conn);
                if (rc <= 0) {
                        /* error/EOF or partial receive */
                        if (rc == -EAGAIN) {
  
                /* Completed a fragment */
  
-               if (!conn->ksnc_rx_nob_wanted) {
+               if (!iov_iter_count(&conn->ksnc_rx_to)) {
                        rc = 1;
                        break;
                }
@@@ -1051,6 -985,7 +986,7 @@@ in
  ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip)
  {
        static char ksocknal_slop_buffer[4096];
+       struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space;
  
        int nob;
        unsigned int niov;
                case  KSOCK_PROTO_V2:
                case  KSOCK_PROTO_V3:
                        conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
-                       conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
-                       conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg;
-                       conn->ksnc_rx_nob_wanted = offsetof(struct ksock_msg, ksm_u);
+                       kvec->iov_base = &conn->ksnc_msg;
+                       kvec->iov_len = offsetof(struct ksock_msg, ksm_u);
                        conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u);
-                       conn->ksnc_rx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u);
+                       iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
+                                       1, offsetof(struct ksock_msg, ksm_u));
                        break;
  
                case KSOCK_PROTO_V1:
                        /* Receiving bare struct lnet_hdr */
                        conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
-                       conn->ksnc_rx_nob_wanted = sizeof(struct lnet_hdr);
+                       kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
+                       kvec->iov_len = sizeof(struct lnet_hdr);
                        conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr);
-                       conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
-                       conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
-                       conn->ksnc_rx_iov[0].iov_len = sizeof(struct lnet_hdr);
+                       iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
+                                       1, sizeof(struct lnet_hdr));
                        break;
  
                default:
                        LBUG();
                }
-               conn->ksnc_rx_niov = 1;
-               conn->ksnc_rx_kiov = NULL;
-               conn->ksnc_rx_nkiov = 0;
                conn->ksnc_rx_csum = ~0;
                return 1;
        }
         */
        conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
        conn->ksnc_rx_nob_left = nob_to_skip;
-       conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
        skipped = 0;
        niov = 0;
  
        do {
                nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer));
  
-               conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
-               conn->ksnc_rx_iov[niov].iov_len  = nob;
+               kvec[niov].iov_base = ksocknal_slop_buffer;
+               kvec[niov].iov_len  = nob;
                niov++;
                skipped += nob;
                nob_to_skip -= nob;
        } while (nob_to_skip &&    /* mustn't overflow conn's rx iov */
                 niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec));
  
-       conn->ksnc_rx_niov = niov;
-       conn->ksnc_rx_kiov = NULL;
-       conn->ksnc_rx_nkiov = 0;
-       conn->ksnc_rx_nob_wanted = skipped;
+       iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped);
        return 0;
  }
  
  static int
  ksocknal_process_receive(struct ksock_conn *conn)
  {
+       struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space;
        struct lnet_hdr *lhdr;
        struct lnet_process_id *id;
        int rc;
                conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER ||
                conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
   again:
-       if (conn->ksnc_rx_nob_wanted) {
+       if (iov_iter_count(&conn->ksnc_rx_to)) {
                rc = ksocknal_receive(conn);
  
                if (rc <= 0) {
                        return (!rc ? -ESHUTDOWN : rc);
                }
  
-               if (conn->ksnc_rx_nob_wanted) {
+               if (iov_iter_count(&conn->ksnc_rx_to)) {
                        /* short read */
                        return -EAGAIN;
                }
                }
  
                conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
-               conn->ksnc_rx_nob_wanted = sizeof(struct ksock_lnet_msg);
                conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg);
  
-               conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
-               conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
-               conn->ksnc_rx_iov[0].iov_len = sizeof(struct ksock_lnet_msg);
+               kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
+               kvec->iov_len = sizeof(struct ksock_lnet_msg);
  
-               conn->ksnc_rx_niov = 1;
-               conn->ksnc_rx_kiov = NULL;
-               conn->ksnc_rx_nkiov = 0;
+               iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
+                               1, sizeof(struct ksock_lnet_msg));
  
                goto again;     /* read lnet header now */
  
@@@ -1345,26 -1268,9 +1269,9 @@@ ksocknal_recv(struct lnet_ni *ni, void 
        LASSERT(to->nr_segs <= LNET_MAX_IOV);
  
        conn->ksnc_cookie = msg;
-       conn->ksnc_rx_nob_wanted = iov_iter_count(to);
        conn->ksnc_rx_nob_left = rlen;
  
-       if (to->type & ITER_KVEC) {
-               conn->ksnc_rx_nkiov = 0;
-               conn->ksnc_rx_kiov = NULL;
-               conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
-               conn->ksnc_rx_niov =
-                       lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
-                                        to->nr_segs, to->kvec,
-                                        to->iov_offset, iov_iter_count(to));
-       } else {
-               conn->ksnc_rx_niov = 0;
-               conn->ksnc_rx_iov = NULL;
-               conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
-               conn->ksnc_rx_nkiov =
-                       lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
-                                        to->nr_segs, to->bvec,
-                                        to->iov_offset, iov_iter_count(to));
-       }
+       conn->ksnc_rx_to = *to;
  
        LASSERT(conn->ksnc_rx_scheduled);
  
@@@ -2329,12 -2235,12 +2236,12 @@@ ksocknal_find_timed_out_conn(struct kso
                                     conn->ksnc_rx_deadline)) {
                        /* Timed out incomplete incoming message */
                        ksocknal_conn_addref(conn);
-                       CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %d left %d\n",
+                       CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n",
                                libcfs_id2str(peer->ksnp_id),
                                &conn->ksnc_ipaddr,
                                conn->ksnc_port,
                                conn->ksnc_rx_state,
-                               conn->ksnc_rx_nob_wanted,
+                               iov_iter_count(&conn->ksnc_rx_to),
                                conn->ksnc_rx_nob_left);
                        return conn;
                }
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0
  /*
   * GPL HEADER START
   *
@@@ -162,94 -161,39 +162,39 @@@ ksocknal_lib_eager_ack(struct ksock_con
                          sizeof(opt));
  }
  
- int
- ksocknal_lib_recv_iov(struct ksock_conn *conn)
+ static int lustre_csum(struct kvec *v, void *context)
  {
-       unsigned int niov = conn->ksnc_rx_niov;
-       struct kvec *iov = conn->ksnc_rx_iov;
-       struct msghdr msg = {
-               .msg_flags = 0
-       };
-       int nob;
-       int i;
-       int rc;
-       int fragnob;
-       int sum;
-       __u32 saved_csum;
-       LASSERT(niov > 0);
-       for (nob = i = 0; i < niov; i++)
-               nob += iov[i].iov_len;
-       LASSERT(nob <= conn->ksnc_rx_nob_wanted);
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob);
-       rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
-       saved_csum = 0;
-       if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
-               saved_csum = conn->ksnc_msg.ksm_csum;
-               conn->ksnc_msg.ksm_csum = 0;
-       }
-       if (saved_csum) {
-               /* accumulate checksum */
-               for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
-                       LASSERT(i < niov);
-                       fragnob = iov[i].iov_len;
-                       if (fragnob > sum)
-                               fragnob = sum;
-                       conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
-                                                     iov[i].iov_base,
-                                                     fragnob);
-               }
-               conn->ksnc_msg.ksm_csum = saved_csum;
-       }
-       return rc;
+       struct ksock_conn *conn = context;
+       conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
+                                     v->iov_base, v->iov_len);
+       return 0;
  }
  
  int
- ksocknal_lib_recv_kiov(struct ksock_conn *conn)
+ ksocknal_lib_recv(struct ksock_conn *conn)
  {
-       unsigned int niov = conn->ksnc_rx_nkiov;
-       struct bio_vec *kiov = conn->ksnc_rx_kiov;
-       struct msghdr msg = {
-               .msg_flags = 0
-       };
-       int nob;
-       int i;
+       struct msghdr msg = { .msg_iter = conn->ksnc_rx_to };
+       __u32 saved_csum;
        int rc;
-       void *base;
-       int sum;
-       int fragnob;
  
-       for (nob = i = 0; i < niov; i++)
-               nob += kiov[i].bv_len;
-       LASSERT(nob <= conn->ksnc_rx_nob_wanted);
-       iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob);
        rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
+       if (rc <= 0)
+               return rc;
  
-       if (conn->ksnc_msg.ksm_csum) {
-               for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
-                       LASSERT(i < niov);
-                       base = kmap(kiov[i].bv_page) + kiov[i].bv_offset;
-                       fragnob = kiov[i].bv_len;
-                       if (fragnob > sum)
-                               fragnob = sum;
+       saved_csum = conn->ksnc_msg.ksm_csum;
+       if (!saved_csum)
+               return rc;
  
-                       conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
-                                                     base, fragnob);
+       /* header is included only in V2 - V3 checksums only the bulk data */
+       if (!(conn->ksnc_rx_to.type & ITER_BVEC) &&
+            conn->ksnc_proto != &ksocknal_protocol_v2x)
+               return rc;
+               
+       /* accumulate checksum */
+       conn->ksnc_msg.ksm_csum = 0;
+       iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn);
+       conn->ksnc_msg.ksm_csum = saved_csum;
  
-                       kunmap(kiov[i].bv_page);
-               }
-       }
        return rc;
  }
  
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0
  /*
   * GPL HEADER START
   *
@@@ -890,7 -889,7 +890,7 @@@ lnet_return_rx_credits_locked(struct ln
                 */
                LASSERT(msg->msg_kiov);
  
-               rb = list_entry(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]);
+               rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]);
                rbp = rb->rb_pool;
  
                msg->msg_kiov = NULL;
diff --combined drivers/vhost/scsi.c
@@@ -210,12 -210,6 +210,6 @@@ static struct workqueue_struct *vhost_s
  static DEFINE_MUTEX(vhost_scsi_mutex);
  static LIST_HEAD(vhost_scsi_list);
  
- static int iov_num_pages(void __user *iov_base, size_t iov_len)
- {
-       return (PAGE_ALIGN((unsigned long)iov_base + iov_len) -
-              ((unsigned long)iov_base & PAGE_MASK)) >> PAGE_SHIFT;
- }
  static void vhost_scsi_done_inflight(struct kref *kref)
  {
        struct vhost_scsi_inflight *inflight;
@@@ -519,7 -513,7 +513,7 @@@ static void vhost_scsi_complete_cmd_wor
                                        vs_completion_work);
        DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ);
        struct virtio_scsi_cmd_resp v_rsp;
 -      struct vhost_scsi_cmd *cmd;
 +      struct vhost_scsi_cmd *cmd, *t;
        struct llist_node *llnode;
        struct se_cmd *se_cmd;
        struct iov_iter iov_iter;
  
        bitmap_zero(signal, VHOST_SCSI_MAX_VQ);
        llnode = llist_del_all(&vs->vs_completion_list);
 -      llist_for_each_entry(cmd, llnode, tvc_completion_list) {
 +      llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) {
                se_cmd = &cmd->tvc_se_cmd;
  
                pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__,
@@@ -618,48 -612,31 +612,31 @@@ vhost_scsi_get_tag(struct vhost_virtque
   */
  static int
  vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
-                     void __user *ptr,
-                     size_t len,
+                     struct iov_iter *iter,
                      struct scatterlist *sgl,
                      bool write)
  {
-       unsigned int npages = 0, offset, nbytes;
-       unsigned int pages_nr = iov_num_pages(ptr, len);
-       struct scatterlist *sg = sgl;
        struct page **pages = cmd->tvc_upages;
-       int ret, i;
-       if (pages_nr > VHOST_SCSI_PREALLOC_UPAGES) {
-               pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
-                      " preallocated VHOST_SCSI_PREALLOC_UPAGES: %u\n",
-                       pages_nr, VHOST_SCSI_PREALLOC_UPAGES);
-               return -ENOBUFS;
-       }
+       struct scatterlist *sg = sgl;
+       ssize_t bytes;
+       size_t offset;
+       unsigned int npages = 0;
  
-       ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages);
+       bytes = iov_iter_get_pages(iter, pages, LONG_MAX,
+                               VHOST_SCSI_PREALLOC_UPAGES, &offset);
        /* No pages were pinned */
-       if (ret < 0)
-               goto out;
-       /* Less pages pinned than wanted */
-       if (ret != pages_nr) {
-               for (i = 0; i < ret; i++)
-                       put_page(pages[i]);
-               ret = -EFAULT;
-               goto out;
-       }
+       if (bytes <= 0)
+               return bytes < 0 ? bytes : -EFAULT;
  
-       while (len > 0) {
-               offset = (uintptr_t)ptr & ~PAGE_MASK;
-               nbytes = min_t(unsigned int, PAGE_SIZE - offset, len);
-               sg_set_page(sg, pages[npages], nbytes, offset);
-               ptr += nbytes;
-               len -= nbytes;
-               sg++;
-               npages++;
-       }
+       iov_iter_advance(iter, bytes);
  
- out:
-       return ret;
+       while (bytes) {
+               unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes);
+               sg_set_page(sg++, pages[npages++], n, offset);
+               bytes -= n;
+               offset = 0;
+       }
+       return npages;
  }
  
  static int
@@@ -687,24 -664,20 +664,20 @@@ vhost_scsi_iov_to_sgl(struct vhost_scsi
                      struct iov_iter *iter,
                      struct scatterlist *sg, int sg_count)
  {
-       size_t off = iter->iov_offset;
-       int i, ret;
-       for (i = 0; i < iter->nr_segs; i++) {
-               void __user *base = iter->iov[i].iov_base + off;
-               size_t len = iter->iov[i].iov_len - off;
+       struct scatterlist *p = sg;
+       int ret;
  
-               ret = vhost_scsi_map_to_sgl(cmd, base, len, sg, write);
+       while (iov_iter_count(iter)) {
+               ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write);
                if (ret < 0) {
-                       for (i = 0; i < sg_count; i++) {
-                               struct page *page = sg_page(&sg[i]);
+                       while (p < sg) {
+                               struct page *page = sg_page(p++);
                                if (page)
                                        put_page(page);
                        }
                        return ret;
                }
                sg += ret;
-               off = 0;
        }
        return 0;
  }
@@@ -929,7 -902,7 +902,7 @@@ vhost_scsi_handle_vq(struct vhost_scsi 
                        continue;
                }
  
 -              tpg = ACCESS_ONCE(vs_tpg[*target]);
 +              tpg = READ_ONCE(vs_tpg[*target]);
                if (unlikely(!tpg)) {
                        /* Target does not exist, fail the request */
                        vhost_scsi_send_bad_target(vs, vq, head, out);
@@@ -134,20 -134,16 +134,16 @@@ static void pvcalls_conn_back_read(voi
        masked_cons = pvcalls_mask(cons, array_size);
  
        memset(&msg, 0, sizeof(msg));
-       msg.msg_iter.type = ITER_KVEC|WRITE;
-       msg.msg_iter.count = wanted;
        if (masked_prod < masked_cons) {
                vec[0].iov_base = data->in + masked_prod;
                vec[0].iov_len = wanted;
-               msg.msg_iter.kvec = vec;
-               msg.msg_iter.nr_segs = 1;
+               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted);
        } else {
                vec[0].iov_base = data->in + masked_prod;
                vec[0].iov_len = array_size - masked_prod;
                vec[1].iov_base = data->in;
                vec[1].iov_len = wanted - vec[0].iov_len;
-               msg.msg_iter.kvec = vec;
-               msg.msg_iter.nr_segs = 2;
+               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted);
        }
  
        atomic_set(&map->read, 0);
@@@ -196,20 -192,16 +192,16 @@@ static void pvcalls_conn_back_write(str
  
        memset(&msg, 0, sizeof(msg));
        msg.msg_flags |= MSG_DONTWAIT;
-       msg.msg_iter.type = ITER_KVEC|READ;
-       msg.msg_iter.count = size;
        if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) {
                vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
                vec[0].iov_len = size;
-               msg.msg_iter.kvec = vec;
-               msg.msg_iter.nr_segs = 1;
+               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size);
        } else {
                vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
                vec[0].iov_len = array_size - pvcalls_mask(cons, array_size);
                vec[1].iov_base = data->out;
                vec[1].iov_len = size - vec[0].iov_len;
-               msg.msg_iter.kvec = vec;
-               msg.msg_iter.nr_segs = 2;
+               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size);
        }
  
        atomic_set(&map->write, 0);
@@@ -1238,7 -1230,3 +1230,7 @@@ static void __exit pvcalls_back_fin(voi
  }
  
  module_exit(pvcalls_back_fin);
 +
 +MODULE_DESCRIPTION("Xen PV Calls backend driver");
 +MODULE_AUTHOR("Stefano Stabellini <sstabellini@kernel.org>");
 +MODULE_LICENSE("GPL");
diff --combined fs/iomap.c
@@@ -350,8 -350,8 +350,8 @@@ static int iomap_zero(struct inode *ino
  static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
                struct iomap *iomap)
  {
 -      sector_t sector = iomap->blkno +
 -              (((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
 +      sector_t sector = (iomap->addr +
 +                         (pos & PAGE_MASK) - iomap->offset) >> 9;
  
        return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector,
                        offset, bytes);
@@@ -510,12 -510,11 +510,12 @@@ static int iomap_to_fiemap(struct fiema
                flags |= FIEMAP_EXTENT_MERGED;
        if (iomap->flags & IOMAP_F_SHARED)
                flags |= FIEMAP_EXTENT_SHARED;
 +      if (iomap->flags & IOMAP_F_DATA_INLINE)
 +              flags |= FIEMAP_EXTENT_DATA_INLINE;
  
        return fiemap_fill_next_extent(fi, iomap->offset,
 -                      iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
 +                      iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
                        iomap->length, flags);
 -
  }
  
  static loff_t
@@@ -715,9 -714,23 +715,9 @@@ static ssize_t iomap_dio_complete(struc
  {
        struct kiocb *iocb = dio->iocb;
        struct inode *inode = file_inode(iocb->ki_filp);
 +      loff_t offset = iocb->ki_pos;
        ssize_t ret;
  
 -      /*
 -       * Try again to invalidate clean pages which might have been cached by
 -       * non-direct readahead, or faulted in by get_user_pages() if the source
 -       * of the write was an mmap'ed region of the file we're writing.  Either
 -       * one is a pretty crazy thing to do, so we don't support it 100%.  If
 -       * this invalidation fails, tough, the write still worked...
 -       */
 -      if (!dio->error &&
 -          (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
 -              ret = invalidate_inode_pages2_range(inode->i_mapping,
 -                              iocb->ki_pos >> PAGE_SHIFT,
 -                              (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
 -              WARN_ON_ONCE(ret);
 -      }
 -
        if (dio->end_io) {
                ret = dio->end_io(iocb,
                                dio->error ? dio->error : dio->size,
        if (likely(!ret)) {
                ret = dio->size;
                /* check for short read */
 -              if (iocb->ki_pos + ret > dio->i_size &&
 +              if (offset + ret > dio->i_size &&
                    !(dio->flags & IOMAP_DIO_WRITE))
 -                      ret = dio->i_size - iocb->ki_pos;
 +                      ret = dio->i_size - offset;
                iocb->ki_pos += ret;
        }
  
 +      /*
 +       * Try again to invalidate clean pages which might have been cached by
 +       * non-direct readahead, or faulted in by get_user_pages() if the source
 +       * of the write was an mmap'ed region of the file we're writing.  Either
 +       * one is a pretty crazy thing to do, so we don't support it 100%.  If
 +       * this invalidation fails, tough, the write still worked...
 +       *
 +       * And this page cache invalidation has to be after dio->end_io(), as
 +       * some filesystems convert unwritten extents to real allocations in
 +       * end_io() when necessary, otherwise a racing buffer read would cache
 +       * zeros from unwritten extents.
 +       */
 +      if (!dio->error &&
 +          (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
 +              int err;
 +              err = invalidate_inode_pages2_range(inode->i_mapping,
 +                              offset >> PAGE_SHIFT,
 +                              (offset + dio->size - 1) >> PAGE_SHIFT);
 +              WARN_ON_ONCE(err);
 +      }
 +
        inode_dio_end(file_inode(iocb->ki_filp));
        kfree(dio);
  
@@@ -831,7 -823,7 +831,7 @@@ iomap_dio_zero(struct iomap_dio *dio, s
        bio = bio_alloc(GFP_KERNEL, 1);
        bio_set_dev(bio, iomap->bdev);
        bio->bi_iter.bi_sector =
 -              iomap->blkno + ((pos - iomap->offset) >> 9);
 +              (iomap->addr + pos - iomap->offset) >> 9;
        bio->bi_private = dio;
        bio->bi_end_io = iomap_dio_bio_end_io;
  
@@@ -856,6 -848,7 +856,7 @@@ iomap_dio_actor(struct inode *inode, lo
        struct bio *bio;
        bool need_zeroout = false;
        int nr_pages, ret;
+       size_t copied = 0;
  
        if ((pos | length | align) & ((1 << blkbits) - 1))
                return -EINVAL;
                /*FALLTHRU*/
        case IOMAP_UNWRITTEN:
                if (!(dio->flags & IOMAP_DIO_WRITE)) {
-                       iov_iter_zero(length, dio->submit.iter);
+                       length = iov_iter_zero(length, dio->submit.iter);
                        dio->size += length;
                        return length;
                }
        }
  
        do {
-               if (dio->error)
+               size_t n;
+               if (dio->error) {
+                       iov_iter_revert(dio->submit.iter, copied);
                        return 0;
+               }
  
                bio = bio_alloc(GFP_KERNEL, nr_pages);
                bio_set_dev(bio, iomap->bdev);
                bio->bi_iter.bi_sector =
 -                      iomap->blkno + ((pos - iomap->offset) >> 9);
 +                      (iomap->addr + pos - iomap->offset) >> 9;
                bio->bi_write_hint = dio->iocb->ki_hint;
                bio->bi_private = dio;
                bio->bi_end_io = iomap_dio_bio_end_io;
                ret = bio_iov_iter_get_pages(bio, &iter);
                if (unlikely(ret)) {
                        bio_put(bio);
-                       return ret;
+                       return copied ? copied : ret;
                }
  
+               n = bio->bi_iter.bi_size;
                if (dio->flags & IOMAP_DIO_WRITE) {
                        bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
-                       task_io_account_write(bio->bi_iter.bi_size);
+                       task_io_account_write(n);
                } else {
                        bio_set_op_attrs(bio, REQ_OP_READ, 0);
                        if (dio->flags & IOMAP_DIO_DIRTY)
                                bio_set_pages_dirty(bio);
                }
  
-               dio->size += bio->bi_iter.bi_size;
-               pos += bio->bi_iter.bi_size;
+               iov_iter_advance(dio->submit.iter, n);
+               dio->size += n;
+               pos += n;
+               copied += n;
  
                nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
  
                if (pad)
                        iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
        }
-       iov_iter_advance(dio->submit.iter, length);
-       return length;
+       return copied;
  }
  
  ssize_t
@@@ -1057,7 -1055,7 +1063,7 @@@ iomap_dio_rw(struct kiocb *iocb, struc
  
                        if (!(iocb->ki_flags & IOCB_HIPRI) ||
                            !dio->submit.last_queue ||
 -                          !blk_mq_poll(dio->submit.last_queue,
 +                          !blk_poll(dio->submit.last_queue,
                                         dio->submit.cookie))
                                io_schedule();
                }
@@@ -1,4 -1,3 +1,4 @@@
 +/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * (C) 2001 Clemson University and The University of Chicago
   *
@@@ -275,12 -274,6 +275,6 @@@ struct orangefs_kiocb_s 
        /* orangefs kernel operation type */
        struct orangefs_kernel_op_s *op;
  
-       /* The user space buffers from/to which I/O is being staged */
-       struct iovec *iov;
-       /* number of elements in the iovector */
-       unsigned long nr_segs;
        /* set to indicate the type of the operation */
        int rw;
  
diff --combined fs/read_write.c
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0
  /*
   *  linux/fs/read_write.c
   *
@@@ -635,27 -634,6 +635,6 @@@ SYSCALL_DEFINE4(pwrite64, unsigned int
        return ret;
  }
  
- /*
-  * Reduce an iovec's length in-place.  Return the resulting number of segments
-  */
- unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
- {
-       unsigned long seg = 0;
-       size_t len = 0;
-       while (seg < nr_segs) {
-               seg++;
-               if (len + iov->iov_len >= to) {
-                       iov->iov_len = to - len;
-                       break;
-               }
-               len += iov->iov_len;
-               iov++;
-       }
-       return seg;
- }
- EXPORT_SYMBOL(iov_shorten);
  static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
                loff_t *ppos, int type, rwf_t flags)
  {
diff --combined include/linux/bio.h
@@@ -129,6 -129,18 +129,6 @@@ static inline void *bio_data(struct bi
  #define bvec_to_phys(bv)      (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
  
  /*
 - * queues that have highmem support enabled may still need to revert to
 - * PIO transfers occasionally and thus map high pages temporarily. For
 - * permanent PIO fall back, user is probably better off disabling highmem
 - * I/O completely on that queue (see ide-dma for example)
 - */
 -#define __bio_kmap_atomic(bio, iter)                          \
 -      (kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) +   \
 -              bio_iter_iovec((bio), (iter)).bv_offset)
 -
 -#define __bio_kunmap_atomic(addr)     kunmap_atomic(addr)
 -
 -/*
   * merge helpers etc
   */
  
@@@ -450,7 -462,7 +450,7 @@@ extern int bio_add_pc_page(struct reque
  int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
  struct rq_map_data;
  extern struct bio *bio_map_user_iov(struct request_queue *,
-                                   const struct iov_iter *, gfp_t);
+                                   struct iov_iter *, gfp_t);
  extern void bio_unmap_user(struct bio *);
  extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
                                gfp_t);
@@@ -482,7 -494,7 +482,7 @@@ extern void bio_free_pages(struct bio *
  
  extern struct bio *bio_copy_user_iov(struct request_queue *,
                                     struct rq_map_data *,
-                                    const struct iov_iter *,
+                                    struct iov_iter *,
                                     gfp_t);
  extern int bio_uncopy_user(struct bio *);
  void zero_fill_bio(struct bio *bio);
@@@ -510,11 -522,13 +510,11 @@@ do {                                            
  
  #ifdef CONFIG_BLK_CGROUP
  int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 -int bio_associate_current(struct bio *bio);
  void bio_disassociate_task(struct bio *bio);
  void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
  #else /* CONFIG_BLK_CGROUP */
  static inline int bio_associate_blkcg(struct bio *bio,
                        struct cgroup_subsys_state *blkcg_css) { return 0; }
 -static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
  static inline void bio_disassociate_task(struct bio *bio) { }
  static inline void bio_clone_blkcg_association(struct bio *dst,
                        struct bio *src) { }
@@@ -561,6 -575,17 +561,6 @@@ static inline void bvec_kunmap_irq(cha
  }
  #endif
  
 -static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
 -                                 unsigned long *flags)
 -{
 -      return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags);
 -}
 -#define __bio_kunmap_irq(buf, flags)  bvec_kunmap_irq(buf, flags)
 -
 -#define bio_kmap_irq(bio, flags) \
 -      __bio_kmap_irq((bio), (bio)->bi_iter, (flags))
 -#define bio_kunmap_irq(buf,flags)     __bio_kunmap_irq(buf, flags)
 -
  /*
   * BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
   *