NFSD: Fix nfs4_verifier memory alignment
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / nfsd / nfs4state.c
index 9ca16dc..e318964 100644 (file)
 time_t nfsd4_lease = 90;     /* default lease time */
 time_t nfsd4_grace = 90;
 static time_t boot_time;
-static stateid_t zerostateid;             /* bits all 0 */
-static stateid_t onestateid;              /* bits all 1 */
+
+#define all_ones {{~0,~0},~0}
+static const stateid_t one_stateid = {
+       .si_generation = ~0,
+       .si_opaque = all_ones,
+};
+static const stateid_t zero_stateid = {
+       /* all fields zero */
+};
+static const stateid_t currentstateid = {
+       .si_generation = 1,
+};
+
 static u64 current_sessionid = 1;
 
-#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
-#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
+#define ONE_STATEID(stateid)  (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
+#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
 
 /* forward declarations */
 static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
@@ -83,6 +95,19 @@ nfs4_lock_state(void)
        mutex_lock(&client_mutex);
 }
 
+static void free_session(struct kref *);
+
+/* Must be called under the client_lock */
+static void nfsd4_put_session_locked(struct nfsd4_session *ses)
+{
+       kref_put(&ses->se_ref, free_session);
+}
+
+static void nfsd4_get_session(struct nfsd4_session *ses)
+{
+       kref_get(&ses->se_ref);
+}
+
 void
 nfs4_unlock_state(void)
 {
@@ -133,21 +158,21 @@ unsigned int max_delegations;
  * Open owner state (share locks)
  */
 
-/* hash tables for open owners */
-#define OPEN_OWNER_HASH_BITS              8
-#define OPEN_OWNER_HASH_SIZE             (1 << OPEN_OWNER_HASH_BITS)
-#define OPEN_OWNER_HASH_MASK             (OPEN_OWNER_HASH_SIZE - 1)
+/* hash tables for lock and open owners */
+#define OWNER_HASH_BITS              8
+#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
+#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
 
-static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
+static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
 {
        unsigned int ret;
 
        ret = opaque_hashval(ownername->data, ownername->len);
        ret += clientid;
-       return ret & OPEN_OWNER_HASH_MASK;
+       return ret & OWNER_HASH_MASK;
 }
 
-static struct list_head        open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE];
+static struct list_head        ownerstr_hashtbl[OWNER_HASH_SIZE];
 
 /* hash table for nfs4_file */
 #define FILE_HASH_BITS                   8
@@ -514,6 +539,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)
 
        list_del(&lo->lo_owner.so_strhash);
        list_del(&lo->lo_perstateid);
+       list_del(&lo->lo_owner_ino_hash);
        while (!list_empty(&lo->lo_owner.so_stateids)) {
                stp = list_first_entry(&lo->lo_owner.so_stateids,
                                struct nfs4_ol_stateid, st_perstateowner);
@@ -596,12 +622,20 @@ hash_sessionid(struct nfs4_sessionid *sessionid)
        return sid->sequence % SESSION_HASH_SIZE;
 }
 
+#ifdef NFSD_DEBUG
 static inline void
 dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
 {
        u32 *ptr = (u32 *)(&sessionid->data[0]);
        dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
 }
+#else
+static inline void
+dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
+{
+}
+#endif
+
 
 static void
 gen_sessionid(struct nfsd4_session *ses)
@@ -823,11 +857,12 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
        spin_unlock(&clp->cl_lock);
 }
 
-void free_session(struct kref *kref)
+static void free_session(struct kref *kref)
 {
        struct nfsd4_session *ses;
        int mem;
 
+       BUG_ON(!spin_is_locked(&client_lock));
        ses = container_of(kref, struct nfsd4_session, se_ref);
        nfsd4_del_conns(ses);
        spin_lock(&nfsd_drc_lock);
@@ -838,6 +873,13 @@ void free_session(struct kref *kref)
        kfree(ses);
 }
 
+void nfsd4_put_session(struct nfsd4_session *ses)
+{
+       spin_lock(&client_lock);
+       nfsd4_put_session_locked(ses);
+       spin_unlock(&client_lock);
+}
+
 static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
        struct nfsd4_session *new;
@@ -885,7 +927,9 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n
        status = nfsd4_new_conn_from_crses(rqstp, new);
        /* whoops: benny points out, status is ignored! (err, or bogus) */
        if (status) {
+               spin_lock(&client_lock);
                free_session(&new->se_ref);
+               spin_unlock(&client_lock);
                return NULL;
        }
        if (cses->flags & SESSION4_BACK_CHAN) {
@@ -985,12 +1029,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
        clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
        if (clp == NULL)
                return NULL;
-       clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
+       clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
        if (clp->cl_name.data == NULL) {
                kfree(clp);
                return NULL;
        }
-       memcpy(clp->cl_name.data, name.data, name.len);
        clp->cl_name.len = name.len;
        return clp;
 }
@@ -998,12 +1041,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 static inline void
 free_client(struct nfs4_client *clp)
 {
+       BUG_ON(!spin_is_locked(&client_lock));
        while (!list_empty(&clp->cl_sessions)) {
                struct nfsd4_session *ses;
                ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
                                se_perclnt);
                list_del(&ses->se_perclnt);
-               nfsd4_put_session(ses);
+               nfsd4_put_session_locked(ses);
        }
        if (clp->cl_cred.cr_group_info)
                put_group_info(clp->cl_cred.cr_group_info);
@@ -1058,7 +1102,6 @@ expire_client(struct nfs4_client *clp)
        spin_unlock(&recall_lock);
        while (!list_empty(&reaplist)) {
                dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
-               list_del_init(&dp->dl_recall_lru);
                unhash_delegation(dp);
        }
        while (!list_empty(&clp->cl_openowners)) {
@@ -1131,12 +1174,12 @@ static void gen_clid(struct nfs4_client *clp)
 
 static void gen_confirm(struct nfs4_client *clp)
 {
+       __be32 verf[2];
        static u32 i;
-       u32 *p;
 
-       p = (u32 *)clp->cl_confirm.data;
-       *p++ = get_seconds();
-       *p++ = i++;
+       verf[0] = (__be32)get_seconds();
+       verf[1] = (__be32)i++;
+       memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
 }
 
 static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
@@ -1173,7 +1216,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
        if (princ) {
                clp->cl_principal = kstrdup(princ, GFP_KERNEL);
                if (clp->cl_principal == NULL) {
+                       spin_lock(&client_lock);
                        free_client(clp);
+                       spin_unlock(&client_lock);
                        return NULL;
                }
        }
@@ -1340,6 +1385,7 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
        slot->sl_opcnt = resp->opcnt;
        slot->sl_status = resp->cstate.status;
 
+       slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
        if (nfsd4_not_cached(resp)) {
                slot->sl_datalen = 0;
                return;
@@ -1367,15 +1413,12 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
        struct nfsd4_op *op;
        struct nfsd4_slot *slot = resp->cstate.slot;
 
-       dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
-               resp->opcnt, resp->cstate.slot->sl_cachethis);
-
        /* Encode the replayed sequence operation */
        op = &args->ops[resp->opcnt - 1];
        nfsd4_encode_operation(resp, op);
 
        /* Return nfserr_retry_uncached_rep in next operation. */
-       if (args->opcnt > 1 && slot->sl_cachethis == 0) {
+       if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
                op = &args->ops[resp->opcnt++];
                op->status = nfserr_retry_uncached_rep;
                nfsd4_encode_operation(resp, op);
@@ -1568,16 +1611,11 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
                else
                        return nfserr_seq_misordered;
        }
-       /* Normal */
+       /* Note unsigned 32-bit arithmetic handles wraparound: */
        if (likely(seqid == slot_seqid + 1))
                return nfs_ok;
-       /* Replay */
        if (seqid == slot_seqid)
                return nfserr_replay_cache;
-       /* Wraparound */
-       if (seqid == 1 && (slot_seqid + 1) == 0)
-               return nfs_ok;
-       /* Misordered replay or misordered new request */
        return nfserr_seq_misordered;
 }
 
@@ -1808,9 +1846,10 @@ nfsd4_destroy_session(struct svc_rqst *r,
        nfsd4_probe_callback_sync(ses->se_client);
        nfs4_unlock_state();
 
+       spin_lock(&client_lock);
        nfsd4_del_conns(ses);
-
-       nfsd4_put_session(ses);
+       nfsd4_put_session_locked(ses);
+       spin_unlock(&client_lock);
        status = nfs_ok;
 out:
        dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1914,8 +1953,12 @@ nfsd4_sequence(struct svc_rqst *rqstp,
         * sr_highest_slotid and the sr_target_slot id to maxslots */
        seq->maxslots = session->se_fchannel.maxreqs;
 
-       status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse);
+       status = check_slot_seqid(seq->seqid, slot->sl_seqid,
+                                       slot->sl_flags & NFSD4_SLOT_INUSE);
        if (status == nfserr_replay_cache) {
+               status = nfserr_seq_misordered;
+               if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
+                       goto out;
                cstate->slot = slot;
                cstate->session = session;
                /* Return the cached reply status and set cstate->status
@@ -1931,9 +1974,12 @@ nfsd4_sequence(struct svc_rqst *rqstp,
        conn = NULL;
 
        /* Success! bump slot seqid */
-       slot->sl_inuse = true;
        slot->sl_seqid = seq->seqid;
-       slot->sl_cachethis = seq->cachethis;
+       slot->sl_flags |= NFSD4_SLOT_INUSE;
+       if (seq->cachethis)
+               slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
+       else
+               slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS;
 
        cstate->slot = slot;
        cstate->session = session;
@@ -2301,7 +2347,7 @@ nfsd4_free_slabs(void)
        nfsd4_free_slab(&deleg_slab);
 }
 
-static int
+int
 nfsd4_init_slabs(void)
 {
        openowner_slab = kmem_cache_create("nfsd4_openowners",
@@ -2373,7 +2419,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
 
 static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
 {
-       list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]);
+       list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
        list_add(&oo->oo_perclient, &clp->cl_openowners);
 }
 
@@ -2436,7 +2482,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
        struct nfs4_stateowner *so;
        struct nfs4_openowner *oo;
 
-       list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) {
+       list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+               if (!so->so_is_open_owner)
+                       continue;
                if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
                        oo = openowner(so);
                        renew_client(oo->oo_owner.so_client);
@@ -2580,7 +2628,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
        if (open->op_file == NULL)
                return nfserr_jukebox;
 
-       strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner);
+       strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
        oo = find_openstateowner_str(strhashval, open);
        open->op_openowner = oo;
        if (!oo) {
@@ -2624,8 +2672,6 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
 
 static int share_access_to_flags(u32 share_access)
 {
-       share_access &= ~NFS4_SHARE_WANT_MASK;
-
        return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
 }
 
@@ -2767,10 +2813,15 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
 
 
 static void
-nfs4_set_claim_prev(struct nfsd4_open *open)
+nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
 {
        open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
-       open->op_openowner->oo_owner.so_client->cl_firststate = 1;
+       /*
+        * On a 4.1+ client, we don't create a state record for a client
+        * until it performs RECLAIM_COMPLETE:
+        */
+       if (!has_session)
+               open->op_openowner->oo_owner.so_client->cl_firststate = 1;
 }
 
 /* Should we give out recallable state?: */
@@ -2846,6 +2897,27 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
        return 0;
 }
 
+static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
+{
+       open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+       if (status == -EAGAIN)
+               open->op_why_no_deleg = WND4_CONTENTION;
+       else {
+               open->op_why_no_deleg = WND4_RESOURCE;
+               switch (open->op_deleg_want) {
+               case NFS4_SHARE_WANT_READ_DELEG:
+               case NFS4_SHARE_WANT_WRITE_DELEG:
+               case NFS4_SHARE_WANT_ANY_DELEG:
+                       break;
+               case NFS4_SHARE_WANT_CANCEL:
+                       open->op_why_no_deleg = WND4_CANCELLED;
+                       break;
+               case NFS4_SHARE_WANT_NO_DELEG:
+                       BUG();  /* not supposed to get here */
+               }
+       }
+}
+
 /*
  * Attempt to hand out a delegation.
  */
@@ -2855,7 +2927,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_
        struct nfs4_delegation *dp;
        struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner);
        int cb_up;
-       int status, flag = 0;
+       int status = 0, flag = 0;
 
        cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
        flag = NFS4_OPEN_DELEGATE_NONE;
@@ -2896,11 +2968,16 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_
        dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
                STATEID_VAL(&dp->dl_stid.sc_stateid));
 out:
-       if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
-                       && flag == NFS4_OPEN_DELEGATE_NONE
-                       && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
-               dprintk("NFSD: WARNING: refusing delegation reclaim\n");
        open->op_delegate_type = flag;
+       if (flag == NFS4_OPEN_DELEGATE_NONE) {
+               if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
+                   open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
+                       dprintk("NFSD: WARNING: refusing delegation reclaim\n");
+
+               /* 4.1 client asking for a delegation? */
+               if (open->op_deleg_want)
+                       nfsd4_open_deleg_none_ext(open, status);
+       }
        return;
 out_free:
        nfs4_put_delegation(dp);
@@ -2909,6 +2986,24 @@ out_no_deleg:
        goto out;
 }
 
+static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
+                                       struct nfs4_delegation *dp)
+{
+       if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
+           dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
+               open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+               open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
+       } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
+                  dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
+               open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+               open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+       }
+       /* Otherwise the client must be confused wanting a delegation
+        * it already has, therefore we don't return
+        * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
+        */
+}
+
 /*
  * called with nfs4_lock_state() held.
  */
@@ -2970,24 +3065,36 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
        update_stateid(&stp->st_stid.sc_stateid);
        memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
 
-       if (nfsd4_has_session(&resp->cstate))
+       if (nfsd4_has_session(&resp->cstate)) {
                open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
 
+               if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
+                       open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+                       open->op_why_no_deleg = WND4_NOT_WANTED;
+                       goto nodeleg;
+               }
+       }
+
        /*
        * Attempt to hand out a delegation. No error return, because the
        * OPEN succeeds even if we fail.
        */
        nfs4_open_delegation(current_fh, open, stp);
-
+nodeleg:
        status = nfs_ok;
 
        dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
                STATEID_VAL(&stp->st_stid.sc_stateid));
 out:
+       /* 4.1 client trying to upgrade/downgrade delegation? */
+       if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
+           open->op_deleg_want)
+               nfsd4_deleg_xgrade_none_ext(open, dp);
+
        if (fp)
                put_nfs4_file(fp);
        if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
-               nfs4_set_claim_prev(open);
+               nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate));
        /*
        * To finish the open response, we just need to set the rflags.
        */
@@ -3123,7 +3230,6 @@ nfs4_laundromat(void)
        spin_unlock(&recall_lock);
        list_for_each_safe(pos, next, &reaplist) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               list_del_init(&dp->dl_recall_lru);
                unhash_delegation(dp);
        }
        test_val = nfsd4_lease;
@@ -3392,7 +3498,14 @@ __be32
 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                   struct nfsd4_test_stateid *test_stateid)
 {
-       /* real work is done during encoding */
+       struct nfsd4_test_stateid_id *stateid;
+       struct nfs4_client *cl = cstate->session->se_client;
+
+       nfs4_lock_state();
+       list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
+               stateid->ts_id_status = nfs4_validate_stateid(cl, &stateid->ts_id_stateid);
+       nfs4_unlock_state();
+
        return nfs_ok;
 }
 
@@ -3588,7 +3701,9 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
                        cstate->current_fh.fh_dentry->d_name.name);
 
        /* We don't yet support WANT bits: */
-       od->od_share_access &= NFS4_SHARE_ACCESS_MASK;
+       if (od->od_deleg_want)
+               dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__,
+                       od->od_deleg_want);
 
        nfs4_lock_state();
        status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
@@ -3718,13 +3833,11 @@ out:
 }
 
 
-/* 
- * Lock owner state (byte-range locks)
- */
 #define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
-#define LOCK_HASH_BITS              8
-#define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
-#define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+
+#define LOCKOWNER_INO_HASH_BITS 8
+#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
+#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
 
 static inline u64
 end_offset(u64 start, u64 len)
@@ -3746,16 +3859,14 @@ last_byte_offset(u64 start, u64 len)
        return end > start ? end - 1: NFS4_MAX_UINT64;
 }
 
-static inline unsigned int
-lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
-               struct xdr_netobj *ownername)
+static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
 {
        return (file_hashval(inode) + cl_id
                        + opaque_hashval(ownername->data, ownername->len))
-               & LOCK_HASH_MASK;
+               & LOCKOWNER_INO_HASH_MASK;
 }
 
-static struct list_head        lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];
 
 /*
  * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
@@ -3809,23 +3920,39 @@ nevermind:
                deny->ld_type = NFS4_WRITE_LT;
 }
 
+static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner)
+{
+       struct nfs4_ol_stateid *lst;
+
+       if (!same_owner_str(&lo->lo_owner, owner, clid))
+               return false;
+       lst = list_first_entry(&lo->lo_owner.so_stateids,
+                              struct nfs4_ol_stateid, st_perstateowner);
+       return lst->st_file->fi_inode == inode;
+}
+
 static struct nfs4_lockowner *
 find_lockowner_str(struct inode *inode, clientid_t *clid,
                struct xdr_netobj *owner)
 {
-       unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
-       struct nfs4_stateowner *op;
+       unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
+       struct nfs4_lockowner *lo;
 
-       list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
-               if (same_owner_str(op, owner, clid))
-                       return lockowner(op);
+       list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
+               if (same_lockowner_ino(lo, inode, clid, owner))
+                       return lo;
        }
        return NULL;
 }
 
 static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
 {
-       list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]);
+       struct inode *inode = open_stp->st_file->fi_inode;
+       unsigned int inohash = lockowner_ino_hashval(inode,
+                       clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
+
+       list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+       list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
        list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
 }
 
@@ -3834,7 +3961,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
  * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 
  * occurred. 
  *
- * strhashval = lock_ownerstr_hashval 
+ * strhashval = ownerstr_hashval
  */
 
 static struct nfs4_lockowner *
@@ -3892,6 +4019,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
        __set_bit(access, &lock_stp->st_access_bmap);
 }
 
+__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new)
+{
+       struct nfs4_file *fi = ost->st_file;
+       struct nfs4_openowner *oo = openowner(ost->st_stateowner);
+       struct nfs4_client *cl = oo->oo_owner.so_client;
+       struct nfs4_lockowner *lo;
+       unsigned int strhashval;
+
+       lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner);
+       if (lo) {
+               if (!cstate->minorversion)
+                       return nfserr_bad_seqid;
+               /* XXX: a lockowner always has exactly one stateid: */
+               *lst = list_first_entry(&lo->lo_owner.so_stateids,
+                               struct nfs4_ol_stateid, st_perstateowner);
+               return nfs_ok;
+       }
+       strhashval = ownerstr_hashval(cl->cl_clientid.cl_id,
+                       &lock->v.new.owner);
+       lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
+       if (lo == NULL)
+               return nfserr_jukebox;
+       *lst = alloc_init_lock_stateid(lo, fi, ost);
+       if (*lst == NULL) {
+               release_lockowner(lo);
+               return nfserr_jukebox;
+       }
+       *new = true;
+       return nfs_ok;
+}
+
 /*
  *  LOCK operation 
  */
@@ -3907,7 +4065,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct file_lock file_lock;
        struct file_lock conflock;
        __be32 status = 0;
-       unsigned int strhashval;
+       bool new_state = false;
        int lkflg;
        int err;
 
@@ -3933,10 +4091,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                 * lock stateid.
                 */
                struct nfs4_ol_stateid *open_stp = NULL;
-               
+
+               if (nfsd4_has_session(cstate))
+                       /* See rfc 5661 18.10.3: given clientid is ignored: */
+                       memcpy(&lock->v.new.clientid,
+                               &cstate->session->se_client->cl_clientid,
+                               sizeof(clientid_t));
+
                status = nfserr_stale_clientid;
-               if (!nfsd4_has_session(cstate) &&
-                   STALE_CLIENTID(&lock->lk_new_clientid))
+               if (STALE_CLIENTID(&lock->lk_new_clientid))
                        goto out;
 
                /* validate and update open stateid and open seqid */
@@ -3948,25 +4111,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        goto out;
                open_sop = openowner(open_stp->st_stateowner);
                status = nfserr_bad_stateid;
-               if (!nfsd4_has_session(cstate) &&
-                       !same_clid(&open_sop->oo_owner.so_client->cl_clientid,
+               if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
                                                &lock->v.new.clientid))
                        goto out;
-               /* create lockowner and lock stateid */
-               fp = open_stp->st_file;
-               strhashval = lock_ownerstr_hashval(fp->fi_inode,
-                               open_sop->oo_owner.so_client->cl_clientid.cl_id,
-                               &lock->v.new.owner);
-               /* XXX: Do we need to check for duplicate stateowners on
-                * the same file, or should they just be allowed (and
-                * create new stateids)? */
-               status = nfserr_jukebox;
-               lock_sop = alloc_init_lock_stateowner(strhashval,
-                               open_sop->oo_owner.so_client, open_stp, lock);
-               if (lock_sop == NULL)
-                       goto out;
-               lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
-               if (lock_stp == NULL)
+               status = lookup_or_create_lock_state(cstate, open_stp, lock,
+                                                       &lock_stp, &new_state);
+               if (status)
                        goto out;
        } else {
                /* lock (lock owner + lock stateid) already exists */
@@ -3976,10 +4126,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                       NFS4_LOCK_STID, &lock_stp);
                if (status)
                        goto out;
-               lock_sop = lockowner(lock_stp->st_stateowner);
-               fp = lock_stp->st_file;
        }
-       /* lock_sop and lock_stp have been created or found */
+       lock_sop = lockowner(lock_stp->st_stateowner);
+       fp = lock_stp->st_file;
 
        lkflg = setlkflg(lock->lk_type);
        status = nfs4_check_openmode(lock_stp, lkflg);
@@ -4054,7 +4203,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                break;
        }
 out:
-       if (status && lock->lk_is_new && lock_sop)
+       if (status && new_state)
                release_lockowner(lock_sop);
        if (!cstate->replay_owner)
                nfs4_unlock_state();
@@ -4251,7 +4400,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
        struct nfs4_ol_stateid *stp;
        struct xdr_netobj *owner = &rlockowner->rl_owner;
        struct list_head matches;
-       int i;
+       unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
        __be32 status;
 
        dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -4266,22 +4415,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
        nfs4_lock_state();
 
        status = nfserr_locks_held;
-       /* XXX: we're doing a linear search through all the lockowners.
-        * Yipes!  For now we'll just hope clients aren't really using
-        * release_lockowner much, but eventually we have to fix these
-        * data structures. */
        INIT_LIST_HEAD(&matches);
-       for (i = 0; i < LOCK_HASH_SIZE; i++) {
-               list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) {
-                       if (!same_owner_str(sop, owner, clid))
-                               continue;
-                       list_for_each_entry(stp, &sop->so_stateids,
-                                       st_perstateowner) {
-                               lo = lockowner(sop);
-                               if (check_for_locks(stp->st_file, lo))
-                                       goto out;
-                               list_add(&lo->lo_list, &matches);
-                       }
+
+       list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) {
+               if (sop->so_is_open_owner)
+                       continue;
+               if (!same_owner_str(sop, owner, clid))
+                       continue;
+               list_for_each_entry(stp, &sop->so_stateids,
+                               st_perstateowner) {
+                       lo = lockowner(sop);
+                       if (check_for_locks(stp->st_file, lo))
+                               goto out;
+                       list_add(&lo->lo_list, &matches);
                }
        }
        /* Clients probably won't expect us to return with some (but not all)
@@ -4314,7 +4460,9 @@ nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
        struct nfs4_client *clp;
 
        clp = find_confirmed_client_by_str(name, strhashval);
-       return clp ? 1 : 0;
+       if (!clp)
+               return 0;
+       return clp->cl_firststate;
 }
 
 /*
@@ -4394,16 +4542,127 @@ nfs4_check_open_reclaim(clientid_t *clid)
        return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
 }
 
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+
+void nfsd_forget_clients(u64 num)
+{
+       struct nfs4_client *clp, *next;
+       int count = 0;
+
+       nfs4_lock_state();
+       list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
+               nfsd4_remove_clid_dir(clp);
+               expire_client(clp);
+               if (++count == num)
+                       break;
+       }
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d clients", count);
+}
+
+static void release_lockowner_sop(struct nfs4_stateowner *sop)
+{
+       release_lockowner(lockowner(sop));
+}
+
+static void release_openowner_sop(struct nfs4_stateowner *sop)
+{
+       release_openowner(openowner(sop));
+}
+
+static int nfsd_release_n_owners(u64 num, bool is_open_owner,
+                               void (*release_sop)(struct nfs4_stateowner *))
+{
+       int i, count = 0;
+       struct nfs4_stateowner *sop, *next;
+
+       for (i = 0; i < OWNER_HASH_SIZE; i++) {
+               list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) {
+                       if (sop->so_is_open_owner != is_open_owner)
+                               continue;
+                       release_sop(sop);
+                       if (++count == num)
+                               return count;
+               }
+       }
+       return count;
+}
+
+void nfsd_forget_locks(u64 num)
+{
+       int count;
+
+       nfs4_lock_state();
+       count = nfsd_release_n_owners(num, false, release_lockowner_sop);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d locks", count);
+}
+
+void nfsd_forget_openowners(u64 num)
+{
+       int count;
+
+       nfs4_lock_state();
+       count = nfsd_release_n_owners(num, true, release_openowner_sop);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d open owners", count);
+}
+
+int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *))
+{
+       int i, count = 0;
+       struct nfs4_file *fp, *fnext;
+       struct nfs4_delegation *dp, *dnext;
+
+       for (i = 0; i < FILE_HASH_SIZE; i++) {
+               list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
+                       list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
+                               deleg_func(dp);
+                               if (++count == num)
+                                       return count;
+                       }
+               }
+       }
+
+       return count;
+}
+
+void nfsd_forget_delegations(u64 num)
+{
+       unsigned int count;
+
+       nfs4_lock_state();
+       count = nfsd_process_n_delegations(num, unhash_delegation);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d delegations", count);
+}
+
+void nfsd_recall_delegations(u64 num)
+{
+       unsigned int count;
+
+       nfs4_lock_state();
+       spin_lock(&recall_lock);
+       count = nfsd_process_n_delegations(num, nfsd_break_one_deleg);
+       spin_unlock(&recall_lock);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Recalled %d delegations", count);
+}
+
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
 /* initialization to perform at module load time: */
 
-int
+void
 nfs4_state_init(void)
 {
-       int i, status;
+       int i;
 
-       status = nfsd4_init_slabs();
-       if (status)
-               return status;
        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                INIT_LIST_HEAD(&conf_id_hashtbl[i]);
                INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -4416,18 +4675,15 @@ nfs4_state_init(void)
        for (i = 0; i < FILE_HASH_SIZE; i++) {
                INIT_LIST_HEAD(&file_hashtbl[i]);
        }
-       for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) {
-               INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]);
-       }
-       for (i = 0; i < LOCK_HASH_SIZE; i++) {
-               INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
+       for (i = 0; i < OWNER_HASH_SIZE; i++) {
+               INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
        }
-       memset(&onestateid, ~0, sizeof(stateid_t));
+       for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
+               INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
        INIT_LIST_HEAD(&close_lru);
        INIT_LIST_HEAD(&client_lru);
        INIT_LIST_HEAD(&del_recall_lru);
        reclaim_str_hashtbl_size = 0;
-       return 0;
 }
 
 static void
@@ -4466,21 +4722,26 @@ set_max_delegations(void)
 
 /* initialization to perform when the nfsd service is started: */
 
-static int
-__nfs4_state_start(void)
+int
+nfs4_state_start(void)
 {
        int ret;
 
+       nfsd4_load_reboot_recovery_data();
        boot_time = get_seconds();
        locks_start_grace(&nfsd4_manager);
        printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
               nfsd4_grace);
        ret = set_callback_cred();
-       if (ret)
-               return -ENOMEM;
+       if (ret) {
+               ret = -ENOMEM;
+               goto out_recovery;
+       }
        laundry_wq = create_singlethread_workqueue("nfsd4");
-       if (laundry_wq == NULL)
-               return -ENOMEM;
+       if (laundry_wq == NULL) {
+               ret = -ENOMEM;
+               goto out_recovery;
+       }
        ret = nfsd4_create_callback_queue();
        if (ret)
                goto out_free_laundry;
@@ -4489,16 +4750,12 @@ __nfs4_state_start(void)
        return 0;
 out_free_laundry:
        destroy_workqueue(laundry_wq);
+out_recovery:
+       nfs4_release_reclaim();
+       nfsd4_shutdown_recdir();
        return ret;
 }
 
-int
-nfs4_state_start(void)
-{
-       nfsd4_load_reboot_recovery_data();
-       return __nfs4_state_start();
-}
-
 static void
 __nfs4_state_shutdown(void)
 {
@@ -4526,7 +4783,6 @@ __nfs4_state_shutdown(void)
        spin_unlock(&recall_lock);
        list_for_each_safe(pos, next, &reaplist) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               list_del_init(&dp->dl_recall_lru);
                unhash_delegation(dp);
        }
 
@@ -4545,3 +4801,104 @@ nfs4_state_shutdown(void)
        nfs4_unlock_state();
        nfsd4_destroy_callback_queue();
 }
+
+static void
+get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
+{
+       if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid))
+               memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
+}
+
+static void
+put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
+{
+       if (cstate->minorversion) {
+               memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
+               SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+       }
+}
+
+void
+clear_current_stateid(struct nfsd4_compound_state *cstate)
+{
+       CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+}
+
+/*
+ * functions to set current state id
+ */
+void
+nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+{
+       put_stateid(cstate, &odp->od_stateid);
+}
+
+void
+nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
+{
+       put_stateid(cstate, &open->op_stateid);
+}
+
+void
+nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+{
+       put_stateid(cstate, &close->cl_stateid);
+}
+
+void
+nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock)
+{
+       put_stateid(cstate, &lock->lk_resp_stateid);
+}
+
+/*
+ * functions to consume current state id
+ */
+
+void
+nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+{
+       get_stateid(cstate, &odp->od_stateid);
+}
+
+void
+nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp)
+{
+       get_stateid(cstate, &drp->dr_stateid);
+}
+
+void
+nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp)
+{
+       get_stateid(cstate, &fsp->fr_stateid);
+}
+
+void
+nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr)
+{
+       get_stateid(cstate, &setattr->sa_stateid);
+}
+
+void
+nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+{
+       get_stateid(cstate, &close->cl_stateid);
+}
+
+void
+nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku)
+{
+       get_stateid(cstate, &locku->lu_stateid);
+}
+
+void
+nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read)
+{
+       get_stateid(cstate, &read->rd_stateid);
+}
+
+void
+nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write)
+{
+       get_stateid(cstate, &write->wr_stateid);
+}