Merge tag 'nfs-for-6.1-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Oct 2022 16:58:42 +0000 (09:58 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Oct 2022 16:58:42 +0000 (09:58 -0700)
Pull NFS client updates from Anna Schumaker:
 "New Features:
   - Add NFSv4.2 xattr tracepoints
   - Replace xprtiod WQ in rpcrdma
   - Flexfiles cancels I/O on layout recall or revoke

  Bugfixes and Cleanups:
   - Directly use ida_alloc() / ida_free()
   - Don't open-code max_t()
   - Prefer using strscpy over strlcpy
   - Remove unused forward declarations
   - Always return layout states on flexfiles layout return
   - Have LISTXATTR treat NFS4ERR_NOXATTR as an empty reply instead of
     error
   - Allow more xprtrdma memory allocations to fail without triggering a
     reclaim
   - Various other xprtrdma clean ups
   - Fix rpc_killall_tasks() races"

* tag 'nfs-for-6.1-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (27 commits)
  NFSv4/flexfiles: Cancel I/O if the layout is recalled or revoked
  SUNRPC: Add API to force the client to disconnect
  SUNRPC: Add a helper to allow pNFS drivers to selectively cancel RPC calls
  SUNRPC: Fix races with rpc_killall_tasks()
  xprtrdma: Fix uninitialized variable
  xprtrdma: Prevent memory allocations from driving a reclaim
  xprtrdma: Memory allocation should be allowed to fail during connect
  xprtrdma: MR-related memory allocation should be allowed to fail
  xprtrdma: Clean up synopsis of rpcrdma_regbuf_alloc()
  xprtrdma: Clean up synopsis of rpcrdma_req_create()
  svcrdma: Clean up RPCRDMA_DEF_GFP
  SUNRPC: Replace the use of the xprtiod WQ in rpcrdma
  NFSv4.2: Add a tracepoint for listxattr
  NFSv4.2: Add tracepoints for getxattr, setxattr, and removexattr
  NFSv4.2: Move TRACE_DEFINE_ENUM(NFS4_CONTENT_*) under CONFIG_NFS_V4_2
  NFSv4.2: Add special handling for LISTXATTR receiving NFS4ERR_NOXATTR
  nfs: remove nfs_wait_atomic_killable() and nfs_write_prepare() declaration
  NFSv4: remove nfs4_renewd_prepare_shutdown() declaration
  fs/nfs/pnfs_nfs.c: fix spelling typo and syntax error in comment
  NFSv4/pNFS: Always return layout stats on layout return for flexfiles
  ...

1  2 
fs/nfs/file.c
fs/nfs/inode.c
fs/nfs/nfs42proc.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/pnfs.c
include/linux/sunrpc/sched.h
net/sunrpc/sched.c

diff --combined fs/nfs/file.c
@@@ -567,8 -567,7 +567,8 @@@ static vm_fault_t nfs_vm_page_mkwrite(s
        }
  
        wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
 -                      nfs_wait_bit_killable, TASK_KILLABLE);
 +                         nfs_wait_bit_killable,
 +                         TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
  
        lock_page(page);
        mapping = page_file_mapping(page);
@@@ -656,9 -655,9 +656,9 @@@ ssize_t nfs_file_write(struct kiocb *io
                        goto out;
        }
        if (mntflags & NFS_MOUNT_WRITE_WAIT) {
-               result = filemap_fdatawait_range(file->f_mapping,
-                                                iocb->ki_pos - written,
-                                                iocb->ki_pos - 1);
+               filemap_fdatawait_range(file->f_mapping,
+                                       iocb->ki_pos - written,
+                                       iocb->ki_pos - 1);
        }
        result = generic_write_sync(iocb, written);
        if (result < 0)
diff --combined fs/nfs/inode.c
@@@ -72,13 -72,18 +72,13 @@@ nfs_fattr_to_ino_t(struct nfs_fattr *fa
        return nfs_fileid_to_ino_t(fattr->fileid);
  }
  
 -static int nfs_wait_killable(int mode)
 +int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
  {
 -      freezable_schedule_unsafe();
 +      schedule();
        if (signal_pending_state(mode, current))
                return -ERESTARTSYS;
        return 0;
  }
 -
 -int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
 -{
 -      return nfs_wait_killable(mode);
 -}
  EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
  
  /**
@@@ -313,7 -318,7 +313,7 @@@ struct nfs_find_desc 
  static int
  nfs_find_actor(struct inode *inode, void *opaque)
  {
-       struct nfs_find_desc    *desc = (struct nfs_find_desc *)opaque;
+       struct nfs_find_desc    *desc = opaque;
        struct nfs_fh           *fh = desc->fh;
        struct nfs_fattr        *fattr = desc->fattr;
  
  static int
  nfs_init_locked(struct inode *inode, void *opaque)
  {
-       struct nfs_find_desc    *desc = (struct nfs_find_desc *)opaque;
+       struct nfs_find_desc    *desc = opaque;
        struct nfs_fattr        *fattr = desc->fattr;
  
        set_nfs_fileid(inode, fattr->fileid);
@@@ -1327,8 -1332,7 +1327,8 @@@ int nfs_clear_invalid_mapping(struct ad
         */
        for (;;) {
                ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
 -                                       nfs_wait_bit_killable, TASK_KILLABLE);
 +                                       nfs_wait_bit_killable,
 +                                       TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
                if (ret)
                        goto out;
                spin_lock(&inode->i_lock);
@@@ -2267,7 -2271,7 +2267,7 @@@ static inline void nfs4_init_once(struc
  
  static void init_once(void *foo)
  {
-       struct nfs_inode *nfsi = (struct nfs_inode *) foo;
+       struct nfs_inode *nfsi = foo;
  
        inode_init_once(&nfsi->vfs_inode);
        INIT_LIST_HEAD(&nfsi->open_files);
diff --combined fs/nfs/nfs42proc.c
@@@ -341,7 -341,7 +341,7 @@@ static ssize_t _nfs42_proc_copy(struct 
                        return status;
                }
        }
 -      status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
 +      status = nfs_filemap_write_and_wait_range(src->f_mapping,
                        pos_src, pos_src + (loff_t)count - 1);
        if (status)
                return status;
@@@ -1175,6 -1175,7 +1175,7 @@@ static int _nfs42_proc_removexattr(stru
  
        ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
            &res.seq_res, 1);
+       trace_nfs4_removexattr(inode, name, ret);
        if (!ret)
                nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
  
@@@ -1214,6 -1215,7 +1215,7 @@@ static int _nfs42_proc_setxattr(struct 
  
        ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
            &res.seq_res, 1);
+       trace_nfs4_setxattr(inode, name, ret);
  
        for (; np > 0; np--)
                put_page(pages[np - 1]);
@@@ -1246,6 -1248,7 +1248,7 @@@ static ssize_t _nfs42_proc_getxattr(str
  
        ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
            &res.seq_res, 0);
+       trace_nfs4_getxattr(inode, name, ret);
        if (ret < 0)
                return ret;
  
@@@ -1317,6 -1320,7 +1320,7 @@@ static ssize_t _nfs42_proc_listxattrs(s
  
        ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
            &res.seq_res, 0);
+       trace_nfs4_listxattr(inode, ret);
  
        if (ret >= 0) {
                ret = res.copied;
diff --combined fs/nfs/nfs4proc.c
@@@ -416,8 -416,8 +416,8 @@@ static int nfs4_delay_killable(long *ti
  {
        might_sleep();
  
 -      freezable_schedule_timeout_killable_unsafe(
 -              nfs4_update_delay(timeout));
 +      __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
 +      schedule_timeout(nfs4_update_delay(timeout));
        if (!__fatal_signal_pending(current))
                return 0;
        return -EINTR;
@@@ -427,8 -427,7 +427,8 @@@ static int nfs4_delay_interruptible(lon
  {
        might_sleep();
  
 -      freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout));
 +      __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE);
 +      schedule_timeout(nfs4_update_delay(timeout));
        if (!signal_pending(current))
                return 0;
        return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
@@@ -6608,7 -6607,7 +6608,7 @@@ static void nfs4_delegreturn_prepare(st
        struct nfs4_delegreturndata *d_data;
        struct pnfs_layout_hdr *lo;
  
-       d_data = (struct nfs4_delegreturndata *)data;
+       d_data = data;
  
        if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) {
                nfs4_sequence_done(task, &d_data->res.seq_res);
@@@ -7407,8 -7406,7 +7407,8 @@@ nfs4_retry_setlk_simple(struct nfs4_sta
                status = nfs4_proc_setlk(state, cmd, request);
                if ((status != -EAGAIN) || IS_SETLK(cmd))
                        break;
 -              freezable_schedule_timeout_interruptible(timeout);
 +              __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
 +              schedule_timeout(timeout);
                timeout *= 2;
                timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout);
                status = -ERESTARTSYS;
@@@ -7476,8 -7474,10 +7476,8 @@@ nfs4_retry_setlk(struct nfs4_state *sta
                        break;
  
                status = -ERESTARTSYS;
 -              freezer_do_not_count();
 -              wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
 +              wait_woken(&waiter.wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE,
                           NFS4_LOCK_MAXTIMEOUT);
 -              freezer_count();
        } while (!signalled());
  
        remove_wait_queue(q, &waiter.wait);
@@@ -8900,7 -8900,7 +8900,7 @@@ int nfs4_proc_exchange_id(struct nfs_cl
  void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
                            void *data)
  {
-       struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data;
+       struct nfs4_add_xprt_data *adata = data;
        struct rpc_task *task;
        int status;
  
diff --combined fs/nfs/nfs4state.c
@@@ -497,8 -497,7 +497,7 @@@ nfs4_alloc_state_owner(struct nfs_serve
        sp = kzalloc(sizeof(*sp), gfp_flags);
        if (!sp)
                return NULL;
-       sp->so_seqid.owner_id = ida_simple_get(&server->openowner_id, 0, 0,
-                                               gfp_flags);
+       sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags);
        if (sp->so_seqid.owner_id < 0) {
                kfree(sp);
                return NULL;
@@@ -534,7 -533,7 +533,7 @@@ static void nfs4_free_state_owner(struc
  {
        nfs4_destroy_seqid_counter(&sp->so_seqid);
        put_cred(sp->so_cred);
-       ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
+       ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
        kfree(sp);
  }
  
@@@ -877,8 -876,7 +876,7 @@@ static struct nfs4_lock_state *nfs4_all
        refcount_set(&lsp->ls_count, 1);
        lsp->ls_state = state;
        lsp->ls_owner = fl_owner;
-       lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id,
-                                               0, 0, GFP_KERNEL_ACCOUNT);
+       lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT);
        if (lsp->ls_seqid.owner_id < 0)
                goto out_free;
        INIT_LIST_HEAD(&lsp->ls_locks);
@@@ -890,7 -888,7 +888,7 @@@ out_free
  
  void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
  {
-       ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
+       ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id);
        nfs4_destroy_seqid_counter(&lsp->ls_seqid);
        kfree(lsp);
  }
@@@ -1314,8 -1312,7 +1312,8 @@@ int nfs4_wait_clnt_recover(struct nfs_c
  
        refcount_inc(&clp->cl_count);
        res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
 -                               nfs_wait_bit_killable, TASK_KILLABLE);
 +                               nfs_wait_bit_killable,
 +                               TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
        if (res)
                goto out;
        if (clp->cl_cons_state < 0)
diff --combined fs/nfs/pnfs.c
@@@ -710,6 -710,7 +710,7 @@@ pnfs_mark_matching_lsegs_invalid(struc
                            u32 seq)
  {
        struct pnfs_layout_segment *lseg, *next;
+       struct nfs_server *server = NFS_SERVER(lo->plh_inode);
        int remaining = 0;
  
        dprintk("%s:Begin lo %p\n", __func__, lo);
                                "offset %llu length %llu\n", __func__,
                                lseg, lseg->pls_range.iomode, lseg->pls_seq,
                                lseg->pls_range.offset, lseg->pls_range.length);
-                       if (!mark_lseg_invalid(lseg, tmp_list))
-                               remaining++;
+                       if (mark_lseg_invalid(lseg, tmp_list))
+                               continue;
+                       remaining++;
+                       pnfs_lseg_cancel_io(server, lseg);
                }
        dprintk("%s:Return %i\n", __func__, remaining);
        return remaining;
@@@ -1908,7 -1911,7 +1911,7 @@@ static int pnfs_prepare_to_retry_layout
        pnfs_layoutcommit_inode(lo->plh_inode, false);
        return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
                                   nfs_wait_bit_killable,
 -                                 TASK_KILLABLE);
 +                                 TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
  }
  
  static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
@@@ -2485,6 -2488,7 +2488,7 @@@ pnfs_mark_matching_lsegs_return(struct 
                                u32 seq)
  {
        struct pnfs_layout_segment *lseg, *next;
+       struct nfs_server *server = NFS_SERVER(lo->plh_inode);
        int remaining = 0;
  
        dprintk("%s:Begin lo %p\n", __func__, lo);
                                continue;
                        remaining++;
                        set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
+                       pnfs_lseg_cancel_io(server, lseg);
                }
  
        if (remaining) {
@@@ -3192,7 -3197,7 +3197,7 @@@ pnfs_layoutcommit_inode(struct inode *i
                status = wait_on_bit_lock_action(&nfsi->flags,
                                NFS_INO_LAYOUTCOMMITTING,
                                nfs_wait_bit_killable,
 -                              TASK_KILLABLE);
 +                              TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
                if (status)
                        goto out;
        }
@@@ -209,11 -209,17 +209,17 @@@ struct rpc_task *rpc_run_task(const str
  struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
  void          rpc_put_task(struct rpc_task *);
  void          rpc_put_task_async(struct rpc_task *);
+ bool          rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status);
+ void          rpc_task_try_cancel(struct rpc_task *task, int error);
  void          rpc_signal_task(struct rpc_task *);
  void          rpc_exit_task(struct rpc_task *);
  void          rpc_exit(struct rpc_task *, int);
  void          rpc_release_calldata(const struct rpc_call_ops *, void *);
  void          rpc_killall_tasks(struct rpc_clnt *);
+ unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error,
+                                bool (*fnmatch)(const struct rpc_task *,
+                                                const void *),
+                                const void *data);
  void          rpc_execute(struct rpc_task *);
  void          rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
  void          rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
@@@ -252,7 -258,7 +258,7 @@@ int                rpc_malloc(struct rpc_task *)
  void          rpc_free(struct rpc_task *);
  int           rpciod_up(void);
  void          rpciod_down(void);
 -int           __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *);
 +int           rpc_wait_for_completion_task(struct rpc_task *task);
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  struct net;
  void          rpc_show_tasks(struct net *);
@@@ -264,6 -270,11 +270,6 @@@ extern struct workqueue_struct *xprtiod
  void          rpc_prepare_task(struct rpc_task *task);
  gfp_t         rpc_task_gfp_mask(void);
  
 -static inline int rpc_wait_for_completion_task(struct rpc_task *task)
 -{
 -      return __rpc_wait_for_completion_task(task, NULL);
 -}
 -
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
  static inline const char * rpc_qname(const struct rpc_wait_queue *q)
  {
diff --combined net/sunrpc/sched.c
@@@ -65,6 -65,13 +65,13 @@@ gfp_t rpc_task_gfp_mask(void
  }
  EXPORT_SYMBOL_GPL(rpc_task_gfp_mask);
  
+ bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status)
+ {
+       if (cmpxchg(&task->tk_rpc_status, 0, rpc_status) == 0)
+               return true;
+       return false;
+ }
  unsigned long
  rpc_task_timeout(const struct rpc_task *task)
  {
@@@ -269,7 -276,7 +276,7 @@@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queu
  
  static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
  {
 -      freezable_schedule_unsafe();
 +      schedule();
        if (signal_pending_state(mode, current))
                return -ERESTARTSYS;
        return 0;
@@@ -333,12 -340,14 +340,12 @@@ static int rpc_complete_task(struct rpc
   * to enforce taking of the wq->lock and hence avoid races with
   * rpc_complete_task().
   */
 -int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
 +int rpc_wait_for_completion_task(struct rpc_task *task)
  {
 -      if (action == NULL)
 -              action = rpc_wait_bit_killable;
        return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
 -                      action, TASK_KILLABLE);
 +                      rpc_wait_bit_killable, TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
  }
 -EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
 +EXPORT_SYMBOL_GPL(rpc_wait_for_completion_task);
  
  /*
   * Make an RPC task runnable.
@@@ -853,12 -862,25 +860,25 @@@ void rpc_signal_task(struct rpc_task *t
        if (!RPC_IS_ACTIVATED(task))
                return;
  
+       if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
+               return;
        trace_rpc_task_signalled(task, task->tk_action);
        set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
        smp_mb__after_atomic();
        queue = READ_ONCE(task->tk_waitqueue);
        if (queue)
-               rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
+               rpc_wake_up_queued_task(queue, task);
+ }
+ void rpc_task_try_cancel(struct rpc_task *task, int error)
+ {
+       struct rpc_wait_queue *queue;
+       if (!rpc_task_set_rpc_status(task, error))
+               return;
+       queue = READ_ONCE(task->tk_waitqueue);
+       if (queue)
+               rpc_wake_up_queued_task(queue, task);
  }
  
  void rpc_exit(struct rpc_task *task, int status)
@@@ -905,10 -927,16 +925,16 @@@ static void __rpc_execute(struct rpc_ta
                 * Perform the next FSM step or a pending callback.
                 *
                 * tk_action may be NULL if the task has been killed.
-                * In particular, note that rpc_killall_tasks may
-                * do this at any time, so beware when dereferencing.
                 */
                do_action = task->tk_action;
+               /* Tasks with an RPC error status should exit */
+               if (do_action != rpc_exit_task &&
+                   (status = READ_ONCE(task->tk_rpc_status)) != 0) {
+                       task->tk_status = status;
+                       if (do_action != NULL)
+                               do_action = rpc_exit_task;
+               }
+               /* Callbacks override all actions */
                if (task->tk_callback) {
                        do_action = task->tk_callback;
                        task->tk_callback = NULL;
                }
  
                /*
-                * Signalled tasks should exit rather than sleep.
-                */
-               if (RPC_SIGNALLED(task)) {
-                       task->tk_rpc_status = -ERESTARTSYS;
-                       rpc_exit(task, -ERESTARTSYS);
-               }
-               /*
                 * The queue->lock protects against races with
                 * rpc_make_runnable().
                 *
                        spin_unlock(&queue->lock);
                        continue;
                }
+               /* Wake up any task that has an exit status */
+               if (READ_ONCE(task->tk_rpc_status) != 0) {
+                       rpc_wake_up_task_queue_locked(queue, task);
+                       spin_unlock(&queue->lock);
+                       continue;
+               }
                rpc_clear_running(task);
                spin_unlock(&queue->lock);
                if (task_is_async)
                trace_rpc_task_sync_sleep(task, task->tk_action);
                status = out_of_line_wait_on_bit(&task->tk_runstate,
                                RPC_TASK_QUEUED, rpc_wait_bit_killable,
 -                              TASK_KILLABLE);
 +                              TASK_KILLABLE|TASK_FREEZABLE);
                if (status < 0) {
                        /*
                         * When a sync task receives a signal, it exits with
                         * clean up after sleeping on some queue, we don't
                         * break the loop here, but go around once more.
                         */
-                       trace_rpc_task_signalled(task, task->tk_action);
-                       set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
-                       task->tk_rpc_status = -ERESTARTSYS;
-                       rpc_exit(task, -ERESTARTSYS);
+                       rpc_signal_task(task);
                }
                trace_rpc_task_sync_wake(task, task->tk_action);
        }