NFSv4.1: pnfs filelayout driver write
authorFred Isaman <iisaman@netapp.com>
Thu, 3 Mar 2011 15:13:47 +0000 (15:13 +0000)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 11 Mar 2011 20:38:44 +0000 (15:38 -0500)
Allows the pnfs filelayout driver to write to the data servers.

Note that COMMIT to data servers will be implemented in a future
patch.  To avoid improper behavior, for the moment any WRITE to a data
server that would also require a COMMIT to the data server is sent
NFS_FILE_SYNC.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com>
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Mingyang Guo <guomingyang@nrchpc.ac.cn>
Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/nfs/internal.h
fs/nfs/nfs4filelayout.c
fs/nfs/nfs4proc.c
fs/nfs/write.c
include/linux/nfs_xdr.h

index 1a3228e..d1ddc23 100644 (file)
@@ -276,6 +276,10 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 
 /* write.c */
+extern int nfs_initiate_write(struct nfs_write_data *data,
+                             struct rpc_clnt *clnt,
+                             const struct rpc_call_ops *call_ops,
+                             int how);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
@@ -291,6 +295,7 @@ extern int nfs4_init_client(struct nfs_client *clp,
                            const char *ip_addr,
                            rpc_authflavor_t authflavour,
                            int noresvport);
+extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
 extern int _nfs4_call_sync(struct nfs_server *server,
                           struct rpc_message *msg,
                           struct nfs4_sequence_args *args,
index 9d21bfe..7e1d457 100644 (file)
@@ -189,12 +189,69 @@ static void filelayout_read_release(void *data)
        rdata->mds_ops->rpc_release(data);
 }
 
+static int filelayout_write_done_cb(struct rpc_task *task,
+                               struct nfs_write_data *data)
+{
+       int reset = 0;
+
+       if (filelayout_async_handle_error(task, data->args.context->state,
+                                         data->ds_clp, &reset) == -EAGAIN) {
+               struct nfs_client *clp;
+
+               dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
+                       __func__, data->ds_clp, data->ds_clp->cl_session);
+               if (reset) {
+                       filelayout_set_lo_fail(data->lseg);
+                       nfs4_reset_write(task, data);
+                       clp = NFS_SERVER(data->inode)->nfs_client;
+               } else
+                       clp = data->ds_clp;
+               nfs_restart_rpc(task, clp);
+               return -EAGAIN;
+       }
+
+       return 0;
+}
+
+static void filelayout_write_prepare(struct rpc_task *task, void *data)
+{
+       struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+       if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+                               &wdata->args.seq_args, &wdata->res.seq_res,
+                               0, task))
+               return;
+
+       rpc_call_start(task);
+}
+
+static void filelayout_write_call_done(struct rpc_task *task, void *data)
+{
+       struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+       /* Note this may cause RPC to be resent */
+       wdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_write_release(void *data)
+{
+       struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+       wdata->mds_ops->rpc_release(data);
+}
+
 struct rpc_call_ops filelayout_read_call_ops = {
        .rpc_call_prepare = filelayout_read_prepare,
        .rpc_call_done = filelayout_read_call_done,
        .rpc_release = filelayout_read_release,
 };
 
+struct rpc_call_ops filelayout_write_call_ops = {
+       .rpc_call_prepare = filelayout_write_prepare,
+       .rpc_call_done = filelayout_write_call_done,
+       .rpc_release = filelayout_write_release,
+};
+
 static enum pnfs_try_status
 filelayout_read_pagelist(struct nfs_read_data *data)
 {
@@ -238,10 +295,52 @@ filelayout_read_pagelist(struct nfs_read_data *data)
        return PNFS_ATTEMPTED;
 }
 
+/* Perform async writes. */
 static enum pnfs_try_status
 filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 {
-       return PNFS_NOT_ATTEMPTED;
+       struct pnfs_layout_segment *lseg = data->lseg;
+       struct nfs4_pnfs_ds *ds;
+       loff_t offset = data->args.offset;
+       u32 j, idx;
+       struct nfs_fh *fh;
+       int status;
+
+       /* Retrieve the correct rpc_client for the byte range */
+       j = nfs4_fl_calc_j_index(lseg, offset);
+       idx = nfs4_fl_calc_ds_index(lseg, j);
+       ds = nfs4_fl_prepare_ds(lseg, idx);
+       if (!ds) {
+               printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+               set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+               set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+               return PNFS_NOT_ATTEMPTED;
+       }
+       dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+               data->inode->i_ino, sync, (size_t) data->args.count, offset,
+               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+
+       /* We can't handle commit to ds yet */
+       if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
+               data->args.stable = NFS_FILE_SYNC;
+
+       data->write_done_cb = filelayout_write_done_cb;
+       data->ds_clp = ds->ds_clp;
+       fh = nfs4_fl_select_ds_fh(lseg, j);
+       if (fh)
+               data->args.fh = fh;
+       /*
+        * Get the file offset on the dserver. Set the write offset to
+        * this offset and save the original offset.
+        */
+       data->args.offset = filelayout_get_dserver_offset(lseg, offset);
+       data->mds_offset = offset;
+
+       /* Perform an asynchronous write */
+       status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
+                                   &filelayout_write_call_ops, sync);
+       BUG_ON(status != 0);
+       return PNFS_ATTEMPTED;
 }
 
 /*
index da90212..7b4b9f3 100644 (file)
@@ -3145,6 +3145,23 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
        return data->write_done_cb(task, data);
 }
 
+/* Reset the the nfs_write_data to send the write to the MDS. */
+void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
+{
+       dprintk("%s Reset task for i/o through\n", __func__);
+       put_lseg(data->lseg);
+       data->lseg          = NULL;
+       data->ds_clp        = NULL;
+       data->write_done_cb = nfs4_write_done_cb;
+       data->args.fh       = NFS_FH(data->inode);
+       data->args.bitmask  = data->res.server->cache_consistency_bitmask;
+       data->args.offset   = data->mds_offset;
+       data->res.fattr     = &data->fattr;
+       task->tk_ops        = data->mds_ops;
+       rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+}
+EXPORT_SYMBOL_GPL(nfs4_reset_write);
+
 static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
        struct nfs_server *server = NFS_SERVER(data->inode);
index df99c5b..ee62ddf 100644 (file)
@@ -783,7 +783,7 @@ static int flush_task_priority(int how)
        return RPC_PRIORITY_NORMAL;
 }
 
-static int nfs_initiate_write(struct nfs_write_data *data,
+int nfs_initiate_write(struct nfs_write_data *data,
                       struct rpc_clnt *clnt,
                       const struct rpc_call_ops *call_ops,
                       int how)
@@ -833,6 +833,7 @@ static int nfs_initiate_write(struct nfs_write_data *data,
 out:
        return ret;
 }
+EXPORT_SYMBOL_GPL(nfs_initiate_write);
 
 /*
  * Set up the argument/result storage required for the RPC call.
@@ -1194,6 +1195,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
                 */
                static unsigned long    complain;
 
+               /* Note this will print the MDS for a DS write */
                if (time_before(complain, jiffies)) {
                        dprintk("NFS:       faulty NFS server %s:"
                                " (committed = %d) != (stable = %d)\n",
@@ -1214,6 +1216,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
                        /* Was this an NFSv2 write or an NFSv3 stable write? */
                        if (resp->verf->committed != NFS_UNSTABLE) {
                                /* Resend from where the server left off */
+                               data->mds_offset += resp->count;
                                argp->offset += resp->count;
                                argp->pgbase += resp->count;
                                argp->count -= resp->count;
index c82ad33..3440f5a 100644 (file)
@@ -1039,11 +1039,13 @@ struct nfs_write_data {
        struct nfs_writeargs    args;           /* argument struct */
        struct nfs_writeres     res;            /* result struct */
        struct pnfs_layout_segment *lseg;
+       struct nfs_client       *ds_clp;        /* pNFS data server */
        const struct rpc_call_ops *mds_ops;
        int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 #ifdef CONFIG_NFS_V4
        unsigned long           timestamp;      /* For lease renewal */
 #endif
+       __u64                   mds_offset;     /* Filelayout dense stripe */
        struct page             *page_array[NFS_PAGEVEC_SIZE];
 };