Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ryusuke...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 7 Aug 2010 20:10:55 +0000 (13:10 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 7 Aug 2010 20:10:55 +0000 (13:10 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ryusuke/nilfs2: (45 commits)
  nilfs2: reject filesystem with unsupported block size
  nilfs2: avoid rec_len overflow with 64KB block size
  nilfs2: simplify nilfs_get_page function
  nilfs2: reject incompatible filesystem
  nilfs2: add feature set fields to super block
  nilfs2: clarify byte offset in super block format
  nilfs2: apply read-ahead for nilfs_btree_lookup_contig
  nilfs2: introduce check flag to btree node buffer
  nilfs2: add btree get block function with readahead option
  nilfs2: add read ahead mode to nilfs_btnode_submit_block
  nilfs2: fix buffer head leak in nilfs_btnode_submit_block
  nilfs2: eliminate inline keywords in btree implementation
  nilfs2: get maximum number of child nodes from bmap object
  nilfs2: reduce repetitive calculation of max number of child nodes
  nilfs2: optimize calculation of min/max number of btree node children
  nilfs2: remove redundant pointer checks in bmap lookup functions
  nilfs2: get rid of nilfs_bmap_union
  nilfs2: unify bmap set_target_v operations
  nilfs2: get rid of nilfs_btree uses
  nilfs2: get rid of nilfs_direct uses
  ...

24 files changed:
Documentation/filesystems/nilfs2.txt
fs/nilfs2/bmap.c
fs/nilfs2/bmap.h
fs/nilfs2/bmap_union.h [deleted file]
fs/nilfs2/btnode.c
fs/nilfs2/btnode.h
fs/nilfs2/btree.c
fs/nilfs2/btree.h
fs/nilfs2/dir.c
fs/nilfs2/direct.c
fs/nilfs2/direct.h
fs/nilfs2/gcinode.c
fs/nilfs2/mdt.c
fs/nilfs2/nilfs.h
fs/nilfs2/page.c
fs/nilfs2/page.h
fs/nilfs2/recovery.c
fs/nilfs2/segbuf.h
fs/nilfs2/segment.c
fs/nilfs2/segment.h
fs/nilfs2/super.c
fs/nilfs2/the_nilfs.c
fs/nilfs2/the_nilfs.h
include/linux/nilfs2_fs.h

index d3e7673..d5c0cef 100644 (file)
@@ -49,7 +49,10 @@ Mount options
 NILFS2 supports the following mount options:
 (*) == default
 
-nobarrier              Disables barriers.
+barrier(*)             This enables/disables the use of write barriers.  This
+nobarrier              requires an IO stack which can support barriers, and
+                       if nilfs gets an error on a barrier write, it will
+                       disable again with a warning.
 errors=continue                Keep going on a filesystem error.
 errors=remount-ro(*)   Remount the filesystem read-only on an error.
 errors=panic           Panic and halt the machine if an error occurs.
@@ -74,9 +77,10 @@ norecovery           Disable recovery of the filesystem on mount.
                        This disables every write access on the device for
                        read-only mounts or snapshots.  This option will fail
                        for r/w mounts on an unclean volume.
-discard                        Issue discard/TRIM commands to the underlying block
-                       device when blocks are freed.  This is useful for SSD
-                       devices and sparse/thinly-provisioned LUNs.
+discard                        This enables/disables the use of discard/TRIM commands.
+nodiscard(*)           The discard/TRIM commands are sent to the underlying
+                       block device when blocks are freed.  This is useful
+                       for SSD devices and sparse/thinly-provisioned LUNs.
 
 NILFS2 usage
 ============
index effdbdb..3dbdc1d 100644 (file)
@@ -26,6 +26,8 @@
 #include "nilfs.h"
 #include "bmap.h"
 #include "sb.h"
+#include "btree.h"
+#include "direct.h"
 #include "btnode.h"
 #include "mdt.h"
 #include "dat.h"
@@ -533,7 +535,7 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
 
 void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
 {
-       memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union));
+       memcpy(gcbmap, bmap, sizeof(*bmap));
        init_rwsem(&gcbmap->b_sem);
        lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
        gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode;
@@ -541,7 +543,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
 
 void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
 {
-       memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union));
+       memcpy(bmap, gcbmap, sizeof(*bmap));
        init_rwsem(&bmap->b_sem);
        lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
        bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
index 9980d7d..a20569b 100644 (file)
 
 #define NILFS_BMAP_INVALID_PTR 0
 
-#define nilfs_bmap_dkey_to_key(dkey)   le64_to_cpu(dkey)
-#define nilfs_bmap_key_to_dkey(key)    cpu_to_le64(key)
-#define nilfs_bmap_dptr_to_ptr(dptr)   le64_to_cpu(dptr)
-#define nilfs_bmap_ptr_to_dptr(ptr)    cpu_to_le64(ptr)
-
 #define nilfs_bmap_keydiff_abs(diff)   ((diff) < 0 ? -(diff) : (diff))
 
 
@@ -71,7 +66,7 @@ struct nilfs_bmap_operations {
        int (*bop_delete)(struct nilfs_bmap *, __u64);
        void (*bop_clear)(struct nilfs_bmap *);
 
-       int (*bop_propagate)(const struct nilfs_bmap *, struct buffer_head *);
+       int (*bop_propagate)(struct nilfs_bmap *, struct buffer_head *);
        void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *,
                                         struct list_head *);
 
@@ -110,6 +105,7 @@ static inline int nilfs_bmap_is_new_ptr(unsigned long ptr)
  * @b_last_allocated_ptr: last allocated ptr for data block
  * @b_ptr_type: pointer type
  * @b_state: state
+ * @b_nchildren_per_block: maximum number of child nodes for non-root nodes
  */
 struct nilfs_bmap {
        union {
@@ -123,6 +119,7 @@ struct nilfs_bmap {
        __u64 b_last_allocated_ptr;
        int b_ptr_type;
        int b_state;
+       __u16 b_nchildren_per_block;
 };
 
 /* pointer type */
@@ -224,6 +221,13 @@ static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap,
                nilfs_dat_abort_end(dat, &req->bpr_req);
 }
 
+static inline void nilfs_bmap_set_target_v(struct nilfs_bmap *bmap, __u64 key,
+                                          __u64 ptr)
+{
+       bmap->b_last_allocated_key = key;
+       bmap->b_last_allocated_ptr = ptr;
+}
+
 __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
                              const struct buffer_head *);
 
diff --git a/fs/nilfs2/bmap_union.h b/fs/nilfs2/bmap_union.h
deleted file mode 100644 (file)
index d41509b..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * bmap_union.h - NILFS block mapping.
- *
- * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- *
- * Written by Koji Sato <koji@osrg.net>.
- */
-
-#ifndef _NILFS_BMAP_UNION_H
-#define _NILFS_BMAP_UNION_H
-
-#include "bmap.h"
-#include "direct.h"
-#include "btree.h"
-
-/**
- * nilfs_bmap_union -
- * @bi_bmap: bmap structure
- * @bi_btree: direct map structure
- * @bi_direct: B-tree structure
- */
-union nilfs_bmap_union {
-       struct nilfs_bmap bi_bmap;
-       struct nilfs_direct bi_direct;
-       struct nilfs_btree bi_btree;
-};
-
-#endif /* _NILFS_BMAP_UNION_H */
index 447ce47..f78ab10 100644 (file)
@@ -96,10 +96,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
 }
 
 int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
-                             sector_t pblocknr, struct buffer_head **pbh)
+                             sector_t pblocknr, int mode,
+                             struct buffer_head **pbh, sector_t *submit_ptr)
 {
        struct buffer_head *bh;
        struct inode *inode = NILFS_BTNC_I(btnc);
+       struct page *page;
        int err;
 
        bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
@@ -107,6 +109,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
                return -ENOMEM;
 
        err = -EEXIST; /* internal code */
+       page = bh->b_page;
 
        if (buffer_uptodate(bh) || buffer_dirty(bh))
                goto found;
@@ -125,7 +128,16 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
                        }
                }
        }
-       lock_buffer(bh);
+
+       if (mode == READA) {
+               if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) {
+                       err = -EBUSY; /* internal code */
+                       brelse(bh);
+                       goto out_locked;
+               }
+       } else { /* mode == READ */
+               lock_buffer(bh);
+       }
        if (buffer_uptodate(bh)) {
                unlock_buffer(bh);
                err = -EEXIST; /* internal code */
@@ -136,15 +148,16 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
        bh->b_blocknr = pblocknr; /* set block address for read */
        bh->b_end_io = end_buffer_read_sync;
        get_bh(bh);
-       submit_bh(READ, bh);
+       submit_bh(mode, bh);
        bh->b_blocknr = blocknr; /* set back to the given block address */
+       *submit_ptr = pblocknr;
        err = 0;
 found:
        *pbh = bh;
 
 out_locked:
-       unlock_page(bh->b_page);
-       page_cache_release(bh->b_page);
+       unlock_page(page);
+       page_cache_release(page);
        return err;
 }
 
index 07da83f..7903749 100644 (file)
@@ -42,8 +42,8 @@ void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
 void nilfs_btnode_cache_clear(struct address_space *);
 struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
                                              __u64 blocknr);
-int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t,
-                             struct buffer_head **);
+int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int,
+                             struct buffer_head **, sector_t *);
 void nilfs_btnode_delete(struct buffer_head *);
 int nilfs_btnode_prepare_change_key(struct address_space *,
                                    struct nilfs_btnode_chkey_ctxt *);
index b27a342..300c2bc 100644 (file)
@@ -66,30 +66,10 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path)
 /*
  * B-tree node operations
  */
-static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
-                                struct buffer_head **bhp)
-{
-       struct address_space *btnc =
-               &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
-       int err;
-
-       err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
-       if (err)
-               return err == -EEXIST ? 0 : err;
-
-       wait_on_buffer(*bhp);
-       if (!buffer_uptodate(*bhp)) {
-               brelse(*bhp);
-               return -EIO;
-       }
-       return 0;
-}
-
-static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
+static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
                                     __u64 ptr, struct buffer_head **bhp)
 {
-       struct address_space *btnc =
-               &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
+       struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
        struct buffer_head *bh;
 
        bh = nilfs_btnode_create_block(btnc, ptr);
@@ -101,71 +81,55 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
        return 0;
 }
 
-static inline int
-nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
+static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
 {
        return node->bn_flags;
 }
 
-static inline void
+static void
 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
 {
        node->bn_flags = flags;
 }
 
-static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
+static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
 {
        return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
 }
 
-static inline int
-nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
+static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
 {
        return node->bn_level;
 }
 
-static inline void
+static void
 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
 {
        node->bn_level = level;
 }
 
-static inline int
-nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
+static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 {
        return le16_to_cpu(node->bn_nchildren);
 }
 
-static inline void
+static void
 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 {
        node->bn_nchildren = cpu_to_le16(nchildren);
 }
 
-static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
+static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 {
-       return 1 << btree->bt_bmap.b_inode->i_blkbits;
+       return 1 << btree->b_inode->i_blkbits;
 }
 
-static inline int
-nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
-                              const struct nilfs_btree *btree)
+static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 {
-       return nilfs_btree_node_root(node) ?
-               NILFS_BTREE_ROOT_NCHILDREN_MIN :
-               NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
+       return btree->b_nchildren_per_block;
 }
 
-static inline int
-nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
-                              const struct nilfs_btree *btree)
-{
-       return nilfs_btree_node_root(node) ?
-               NILFS_BTREE_ROOT_NCHILDREN_MAX :
-               NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
-}
-
-static inline __le64 *
+static __le64 *
 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 {
        return (__le64 *)((char *)(node + 1) +
@@ -173,45 +137,40 @@ nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
                           0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 }
 
-static inline __le64 *
-nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
-                      const struct nilfs_btree *btree)
+static __le64 *
+nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 {
-       return (__le64 *)(nilfs_btree_node_dkeys(node) +
-                         nilfs_btree_node_nchildren_max(node, btree));
+       return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 }
 
-static inline __u64
+static __u64
 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 {
-       return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
+       return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 }
 
-static inline void
+static void
 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 {
-       *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
+       *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 }
 
-static inline __u64
-nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
-                        const struct nilfs_btree_node *node, int index)
+static __u64
+nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
+                        int ncmax)
 {
-       return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
-                                       index));
+       return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 }
 
-static inline void
-nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
-                        struct nilfs_btree_node *node, int index, __u64 ptr)
+static void
+nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
+                        int ncmax)
 {
-       *(nilfs_btree_node_dptrs(node, btree) + index) =
-               nilfs_bmap_ptr_to_dptr(ptr);
+       *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 }
 
-static void nilfs_btree_node_init(struct nilfs_btree *btree,
-                                 struct nilfs_btree_node *node,
-                                 int flags, int level, int nchildren,
+static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
+                                 int level, int nchildren, int ncmax,
                                  const __u64 *keys, const __u64 *ptrs)
 {
        __le64 *dkeys;
@@ -223,29 +182,28 @@ static void nilfs_btree_node_init(struct nilfs_btree *btree,
        nilfs_btree_node_set_nchildren(node, nchildren);
 
        dkeys = nilfs_btree_node_dkeys(node);
-       dptrs = nilfs_btree_node_dptrs(node, btree);
+       dptrs = nilfs_btree_node_dptrs(node, ncmax);
        for (i = 0; i < nchildren; i++) {
-               dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
-               dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
+               dkeys[i] = cpu_to_le64(keys[i]);
+               dptrs[i] = cpu_to_le64(ptrs[i]);
        }
 }
 
 /* Assume the buffer heads corresponding to left and right are locked. */
-static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
-                                      struct nilfs_btree_node *left,
+static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
                                       struct nilfs_btree_node *right,
-                                      int n)
+                                      int n, int lncmax, int rncmax)
 {
        __le64 *ldkeys, *rdkeys;
        __le64 *ldptrs, *rdptrs;
        int lnchildren, rnchildren;
 
        ldkeys = nilfs_btree_node_dkeys(left);
-       ldptrs = nilfs_btree_node_dptrs(left, btree);
+       ldptrs = nilfs_btree_node_dptrs(left, lncmax);
        lnchildren = nilfs_btree_node_get_nchildren(left);
 
        rdkeys = nilfs_btree_node_dkeys(right);
-       rdptrs = nilfs_btree_node_dptrs(right, btree);
+       rdptrs = nilfs_btree_node_dptrs(right, rncmax);
        rnchildren = nilfs_btree_node_get_nchildren(right);
 
        memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
@@ -260,21 +218,20 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
 }
 
 /* Assume that the buffer heads corresponding to left and right are locked. */
-static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
-                                       struct nilfs_btree_node *left,
+static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
                                        struct nilfs_btree_node *right,
-                                       int n)
+                                       int n, int lncmax, int rncmax)
 {
        __le64 *ldkeys, *rdkeys;
        __le64 *ldptrs, *rdptrs;
        int lnchildren, rnchildren;
 
        ldkeys = nilfs_btree_node_dkeys(left);
-       ldptrs = nilfs_btree_node_dptrs(left, btree);
+       ldptrs = nilfs_btree_node_dptrs(left, lncmax);
        lnchildren = nilfs_btree_node_get_nchildren(left);
 
        rdkeys = nilfs_btree_node_dkeys(right);
-       rdptrs = nilfs_btree_node_dptrs(right, btree);
+       rdptrs = nilfs_btree_node_dptrs(right, rncmax);
        rnchildren = nilfs_btree_node_get_nchildren(right);
 
        memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
@@ -289,16 +246,15 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
 }
 
 /* Assume that the buffer head corresponding to node is locked. */
-static void nilfs_btree_node_insert(struct nilfs_btree *btree,
-                                   struct nilfs_btree_node *node,
-                                   __u64 key, __u64 ptr, int index)
+static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
+                                   __u64 key, __u64 ptr, int ncmax)
 {
        __le64 *dkeys;
        __le64 *dptrs;
        int nchildren;
 
        dkeys = nilfs_btree_node_dkeys(node);
-       dptrs = nilfs_btree_node_dptrs(node, btree);
+       dptrs = nilfs_btree_node_dptrs(node, ncmax);
        nchildren = nilfs_btree_node_get_nchildren(node);
        if (index < nchildren) {
                memmove(dkeys + index + 1, dkeys + index,
@@ -306,16 +262,15 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree,
                memmove(dptrs + index + 1, dptrs + index,
                        (nchildren - index) * sizeof(*dptrs));
        }
-       dkeys[index] = nilfs_bmap_key_to_dkey(key);
-       dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
+       dkeys[index] = cpu_to_le64(key);
+       dptrs[index] = cpu_to_le64(ptr);
        nchildren++;
        nilfs_btree_node_set_nchildren(node, nchildren);
 }
 
 /* Assume that the buffer head corresponding to node is locked. */
-static void nilfs_btree_node_delete(struct nilfs_btree *btree,
-                                   struct nilfs_btree_node *node,
-                                   __u64 *keyp, __u64 *ptrp, int index)
+static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
+                                   __u64 *keyp, __u64 *ptrp, int ncmax)
 {
        __u64 key;
        __u64 ptr;
@@ -324,9 +279,9 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree,
        int nchildren;
 
        dkeys = nilfs_btree_node_dkeys(node);
-       dptrs = nilfs_btree_node_dptrs(node, btree);
-       key = nilfs_bmap_dkey_to_key(dkeys[index]);
-       ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
+       dptrs = nilfs_btree_node_dptrs(node, ncmax);
+       key = le64_to_cpu(dkeys[index]);
+       ptr = le64_to_cpu(dptrs[index]);
        nchildren = nilfs_btree_node_get_nchildren(node);
        if (keyp != NULL)
                *keyp = key;
@@ -382,40 +337,92 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
        return s == 0;
 }
 
-static inline struct nilfs_btree_node *
-nilfs_btree_get_root(const struct nilfs_btree *btree)
+/**
+ * nilfs_btree_node_broken - verify consistency of btree node
+ * @node: btree node block to be examined
+ * @size: node size (in bytes)
+ * @blocknr: block number
+ *
+ * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
+ */
+static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
+                                  size_t size, sector_t blocknr)
 {
-       return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
+       int level, flags, nchildren;
+       int ret = 0;
+
+       level = nilfs_btree_node_get_level(node);
+       flags = nilfs_btree_node_get_flags(node);
+       nchildren = nilfs_btree_node_get_nchildren(node);
+
+       if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
+                    level >= NILFS_BTREE_LEVEL_MAX ||
+                    (flags & NILFS_BTREE_NODE_ROOT) ||
+                    nchildren < 0 ||
+                    nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
+               printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
+                      "level = %d, flags = 0x%x, nchildren = %d\n",
+                      (unsigned long long)blocknr, level, flags, nchildren);
+               ret = 1;
+       }
+       return ret;
 }
 
-static inline struct nilfs_btree_node *
+int nilfs_btree_broken_node_block(struct buffer_head *bh)
+{
+       int ret;
+
+       if (buffer_nilfs_checked(bh))
+               return 0;
+
+       ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
+                                      bh->b_size, bh->b_blocknr);
+       if (likely(!ret))
+               set_buffer_nilfs_checked(bh);
+       return ret;
+}
+
+static struct nilfs_btree_node *
+nilfs_btree_get_root(const struct nilfs_bmap *btree)
+{
+       return (struct nilfs_btree_node *)btree->b_u.u_data;
+}
+
+static struct nilfs_btree_node *
 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 {
        return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 }
 
-static inline struct nilfs_btree_node *
+static struct nilfs_btree_node *
 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 {
        return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 }
 
-static inline int nilfs_btree_height(const struct nilfs_btree *btree)
+static int nilfs_btree_height(const struct nilfs_bmap *btree)
 {
        return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 }
 
-static inline struct nilfs_btree_node *
-nilfs_btree_get_node(const struct nilfs_btree *btree,
+static struct nilfs_btree_node *
+nilfs_btree_get_node(const struct nilfs_bmap *btree,
                     const struct nilfs_btree_path *path,
-                    int level)
+                    int level, int *ncmaxp)
 {
-       return (level == nilfs_btree_height(btree) - 1) ?
-               nilfs_btree_get_root(btree) :
-               nilfs_btree_get_nonroot_node(path, level);
+       struct nilfs_btree_node *node;
+
+       if (level == nilfs_btree_height(btree) - 1) {
+               node = nilfs_btree_get_root(btree);
+               *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
+       } else {
+               node = nilfs_btree_get_nonroot_node(path, level);
+               *ncmaxp = nilfs_btree_nchildren_per_block(btree);
+       }
+       return node;
 }
 
-static inline int
+static int
 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
 {
        if (unlikely(nilfs_btree_node_get_level(node) != level)) {
@@ -427,13 +434,83 @@ nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
        return 0;
 }
 
-static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
+struct nilfs_btree_readahead_info {
+       struct nilfs_btree_node *node;  /* parent node */
+       int max_ra_blocks;              /* max nof blocks to read ahead */
+       int index;                      /* current index on the parent node */
+       int ncmax;                      /* nof children in the parent node */
+};
+
+static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
+                                  struct buffer_head **bhp,
+                                  const struct nilfs_btree_readahead_info *ra)
+{
+       struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
+       struct buffer_head *bh, *ra_bh;
+       sector_t submit_ptr = 0;
+       int ret;
+
+       ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
+       if (ret) {
+               if (ret != -EEXIST)
+                       return ret;
+               goto out_check;
+       }
+
+       if (ra) {
+               int i, n;
+               __u64 ptr2;
+
+               /* read ahead sibling nodes */
+               for (n = ra->max_ra_blocks, i = ra->index + 1;
+                    n > 0 && i < ra->ncmax; n--, i++) {
+                       ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
+
+                       ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
+                                                       &ra_bh, &submit_ptr);
+                       if (likely(!ret || ret == -EEXIST))
+                               brelse(ra_bh);
+                       else if (ret != -EBUSY)
+                               break;
+                       if (!buffer_locked(bh))
+                               goto out_no_wait;
+               }
+       }
+
+       wait_on_buffer(bh);
+
+ out_no_wait:
+       if (!buffer_uptodate(bh)) {
+               brelse(bh);
+               return -EIO;
+       }
+
+ out_check:
+       if (nilfs_btree_broken_node_block(bh)) {
+               clear_buffer_uptodate(bh);
+               brelse(bh);
+               return -EINVAL;
+       }
+
+       *bhp = bh;
+       return 0;
+}
+
+static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
+                                  struct buffer_head **bhp)
+{
+       return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
+}
+
+static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
                                 struct nilfs_btree_path *path,
-                                __u64 key, __u64 *ptrp, int minlevel)
+                                __u64 key, __u64 *ptrp, int minlevel,
+                                int readahead)
 {
        struct nilfs_btree_node *node;
+       struct nilfs_btree_readahead_info p, *ra;
        __u64 ptr;
-       int level, index, found, ret;
+       int level, index, found, ncmax, ret;
 
        node = nilfs_btree_get_root(btree);
        level = nilfs_btree_node_get_level(node);
@@ -441,14 +518,27 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
                return -ENOENT;
 
        found = nilfs_btree_node_lookup(node, key, &index);
-       ptr = nilfs_btree_node_get_ptr(btree, node, index);
+       ptr = nilfs_btree_node_get_ptr(node, index,
+                                      NILFS_BTREE_ROOT_NCHILDREN_MAX);
        path[level].bp_bh = NULL;
        path[level].bp_index = index;
 
-       for (level--; level >= minlevel; level--) {
-               ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
+       ncmax = nilfs_btree_nchildren_per_block(btree);
+
+       while (--level >= minlevel) {
+               ra = NULL;
+               if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
+                       p.node = nilfs_btree_get_node(btree, path, level + 1,
+                                                     &p.ncmax);
+                       p.index = index;
+                       p.max_ra_blocks = 7;
+                       ra = &p;
+               }
+               ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
+                                             ra);
                if (ret < 0)
                        return ret;
+
                node = nilfs_btree_get_nonroot_node(path, level);
                if (nilfs_btree_bad_node(node, level))
                        return -EINVAL;
@@ -456,9 +546,9 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
                        found = nilfs_btree_node_lookup(node, key, &index);
                else
                        index = 0;
-               if (index < nilfs_btree_node_nchildren_max(node, btree))
-                       ptr = nilfs_btree_node_get_ptr(btree, node, index);
-               else {
+               if (index < ncmax) {
+                       ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
+               else {
                        WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
                        /* insert */
                        ptr = NILFS_BMAP_INVALID_PTR;
@@ -474,22 +564,24 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
        return 0;
 }
 
-static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
+static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
                                      struct nilfs_btree_path *path,
                                      __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node;
        __u64 ptr;
-       int index, level, ret;
+       int index, level, ncmax, ret;
 
        node = nilfs_btree_get_root(btree);
        index = nilfs_btree_node_get_nchildren(node) - 1;
        if (index < 0)
                return -ENOENT;
        level = nilfs_btree_node_get_level(node);
-       ptr = nilfs_btree_node_get_ptr(btree, node, index);
+       ptr = nilfs_btree_node_get_ptr(node, index,
+                                      NILFS_BTREE_ROOT_NCHILDREN_MAX);
        path[level].bp_bh = NULL;
        path[level].bp_index = index;
+       ncmax = nilfs_btree_nchildren_per_block(btree);
 
        for (level--; level > 0; level--) {
                ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
@@ -499,7 +591,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
                if (nilfs_btree_bad_node(node, level))
                        return -EINVAL;
                index = nilfs_btree_node_get_nchildren(node) - 1;
-               ptr = nilfs_btree_node_get_ptr(btree, node, index);
+               ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
                path[level].bp_index = index;
        }
 
@@ -511,51 +603,45 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
        return 0;
 }
 
-static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
+static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
                              __u64 key, int level, __u64 *ptrp)
 {
-       struct nilfs_btree *btree;
        struct nilfs_btree_path *path;
-       __u64 ptr;
        int ret;
 
-       btree = (struct nilfs_btree *)bmap;
        path = nilfs_btree_alloc_path();
        if (path == NULL)
                return -ENOMEM;
 
-       ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
-
-       if (ptrp != NULL)
-               *ptrp = ptr;
+       ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 
        nilfs_btree_free_path(path);
 
        return ret;
 }
 
-static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
+static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
                                     __u64 key, __u64 *ptrp, unsigned maxblocks)
 {
-       struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
        struct nilfs_btree_path *path;
        struct nilfs_btree_node *node;
        struct inode *dat = NULL;
        __u64 ptr, ptr2;
        sector_t blocknr;
        int level = NILFS_BTREE_LEVEL_NODE_MIN;
-       int ret, cnt, index, maxlevel;
+       int ret, cnt, index, maxlevel, ncmax;
+       struct nilfs_btree_readahead_info p;
 
        path = nilfs_btree_alloc_path();
        if (path == NULL)
                return -ENOMEM;
 
-       ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
+       ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
        if (ret < 0)
                goto out;
 
-       if (NILFS_BMAP_USE_VBN(bmap)) {
-               dat = nilfs_bmap_get_dat(bmap);
+       if (NILFS_BMAP_USE_VBN(btree)) {
+               dat = nilfs_bmap_get_dat(btree);
                ret = nilfs_dat_translate(dat, ptr, &blocknr);
                if (ret < 0)
                        goto out;
@@ -566,14 +652,14 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
                goto end;
 
        maxlevel = nilfs_btree_height(btree) - 1;
-       node = nilfs_btree_get_node(btree, path, level);
+       node = nilfs_btree_get_node(btree, path, level, &ncmax);
        index = path[level].bp_index + 1;
        for (;;) {
                while (index < nilfs_btree_node_get_nchildren(node)) {
                        if (nilfs_btree_node_get_key(node, index) !=
                            key + cnt)
                                goto end;
-                       ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
+                       ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
                        if (dat) {
                                ret = nilfs_dat_translate(dat, ptr2, &blocknr);
                                if (ret < 0)
@@ -589,20 +675,24 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
                        break;
 
                /* look-up right sibling node */
-               node = nilfs_btree_get_node(btree, path, level + 1);
-               index = path[level + 1].bp_index + 1;
-               if (index >= nilfs_btree_node_get_nchildren(node) ||
-                   nilfs_btree_node_get_key(node, index) != key + cnt)
+               p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
+               p.index = path[level + 1].bp_index + 1;
+               p.max_ra_blocks = 7;
+               if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
+                   nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
                        break;
-               ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
-               path[level + 1].bp_index = index;
+               ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
+               path[level + 1].bp_index = p.index;
 
                brelse(path[level].bp_bh);
                path[level].bp_bh = NULL;
-               ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
+
+               ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
+                                             &p);
                if (ret < 0)
                        goto out;
                node = nilfs_btree_get_nonroot_node(path, level);
+               ncmax = nilfs_btree_nchildren_per_block(btree);
                index = 0;
                path[level].bp_index = index;
        }
@@ -614,7 +704,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
        return ret;
 }
 
-static void nilfs_btree_promote_key(struct nilfs_btree *btree,
+static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
                                    struct nilfs_btree_path *path,
                                    int level, __u64 key)
 {
@@ -636,16 +726,18 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
        }
 }
 
-static void nilfs_btree_do_insert(struct nilfs_btree *btree,
+static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
                                  struct nilfs_btree_path *path,
                                  int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node;
+       int ncblk;
 
        if (level < nilfs_btree_height(btree) - 1) {
                node = nilfs_btree_get_nonroot_node(path, level);
-               nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
-                                       path[level].bp_index);
+               ncblk = nilfs_btree_nchildren_per_block(btree);
+               nilfs_btree_node_insert(node, path[level].bp_index,
+                                       *keyp, *ptrp, ncblk);
                if (!buffer_dirty(path[level].bp_bh))
                        nilfs_btnode_mark_dirty(path[level].bp_bh);
 
@@ -655,22 +747,24 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
                                                                         0));
        } else {
                node = nilfs_btree_get_root(btree);
-               nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
-                                       path[level].bp_index);
+               nilfs_btree_node_insert(node, path[level].bp_index,
+                                       *keyp, *ptrp,
+                                       NILFS_BTREE_ROOT_NCHILDREN_MAX);
        }
 }
 
-static void nilfs_btree_carry_left(struct nilfs_btree *btree,
+static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
                                   struct nilfs_btree_path *path,
                                   int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node, *left;
-       int nchildren, lnchildren, n, move;
+       int nchildren, lnchildren, n, move, ncblk;
 
        node = nilfs_btree_get_nonroot_node(path, level);
        left = nilfs_btree_get_sib_node(path, level);
        nchildren = nilfs_btree_node_get_nchildren(node);
        lnchildren = nilfs_btree_node_get_nchildren(left);
+       ncblk = nilfs_btree_nchildren_per_block(btree);
        move = 0;
 
        n = (nchildren + lnchildren + 1) / 2 - lnchildren;
@@ -680,7 +774,7 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
                move = 1;
        }
 
-       nilfs_btree_node_move_left(btree, left, node, n);
+       nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 
        if (!buffer_dirty(path[level].bp_bh))
                nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -705,17 +799,18 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 }
 
-static void nilfs_btree_carry_right(struct nilfs_btree *btree,
+static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
                                    struct nilfs_btree_path *path,
                                    int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node, *right;
-       int nchildren, rnchildren, n, move;
+       int nchildren, rnchildren, n, move, ncblk;
 
        node = nilfs_btree_get_nonroot_node(path, level);
        right = nilfs_btree_get_sib_node(path, level);
        nchildren = nilfs_btree_node_get_nchildren(node);
        rnchildren = nilfs_btree_node_get_nchildren(right);
+       ncblk = nilfs_btree_nchildren_per_block(btree);
        move = 0;
 
        n = (nchildren + rnchildren + 1) / 2 - rnchildren;
@@ -725,7 +820,7 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
                move = 1;
        }
 
-       nilfs_btree_node_move_right(btree, node, right, n);
+       nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 
        if (!buffer_dirty(path[level].bp_bh))
                nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -751,18 +846,19 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 }
 
-static void nilfs_btree_split(struct nilfs_btree *btree,
+static void nilfs_btree_split(struct nilfs_bmap *btree,
                              struct nilfs_btree_path *path,
                              int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node, *right;
        __u64 newkey;
        __u64 newptr;
-       int nchildren, n, move;
+       int nchildren, n, move, ncblk;
 
        node = nilfs_btree_get_nonroot_node(path, level);
        right = nilfs_btree_get_sib_node(path, level);
        nchildren = nilfs_btree_node_get_nchildren(node);
+       ncblk = nilfs_btree_nchildren_per_block(btree);
        move = 0;
 
        n = (nchildren + 1) / 2;
@@ -771,7 +867,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
                move = 1;
        }
 
-       nilfs_btree_node_move_right(btree, node, right, n);
+       nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 
        if (!buffer_dirty(path[level].bp_bh))
                nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -783,8 +879,8 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
 
        if (move) {
                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
-               nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
-                                       path[level].bp_index);
+               nilfs_btree_node_insert(right, path[level].bp_index,
+                                       *keyp, *ptrp, ncblk);
 
                *keyp = nilfs_btree_node_get_key(right, 0);
                *ptrp = path[level].bp_newreq.bpr_ptr;
@@ -805,19 +901,21 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
        path[level + 1].bp_index++;
 }
 
-static void nilfs_btree_grow(struct nilfs_btree *btree,
+static void nilfs_btree_grow(struct nilfs_bmap *btree,
                             struct nilfs_btree_path *path,
                             int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *root, *child;
-       int n;
+       int n, ncblk;
 
        root = nilfs_btree_get_root(btree);
        child = nilfs_btree_get_sib_node(path, level);
+       ncblk = nilfs_btree_nchildren_per_block(btree);
 
        n = nilfs_btree_node_get_nchildren(root);
 
-       nilfs_btree_node_move_right(btree, root, child, n);
+       nilfs_btree_node_move_right(root, child, n,
+                                   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
        nilfs_btree_node_set_level(root, level + 1);
 
        if (!buffer_dirty(path[level].bp_sib_bh))
@@ -832,11 +930,11 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
        *ptrp = path[level].bp_newreq.bpr_ptr;
 }
 
-static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
+static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
                                   const struct nilfs_btree_path *path)
 {
        struct nilfs_btree_node *node;
-       int level;
+       int level, ncmax;
 
        if (path == NULL)
                return NILFS_BMAP_INVALID_PTR;
@@ -844,29 +942,30 @@ static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
        /* left sibling */
        level = NILFS_BTREE_LEVEL_NODE_MIN;
        if (path[level].bp_index > 0) {
-               node = nilfs_btree_get_node(btree, path, level);
-               return nilfs_btree_node_get_ptr(btree, node,
-                                               path[level].bp_index - 1);
+               node = nilfs_btree_get_node(btree, path, level, &ncmax);
+               return nilfs_btree_node_get_ptr(node,
+                                               path[level].bp_index - 1,
+                                               ncmax);
        }
 
        /* parent */
        level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
        if (level <= nilfs_btree_height(btree) - 1) {
-               node = nilfs_btree_get_node(btree, path, level);
-               return nilfs_btree_node_get_ptr(btree, node,
-                                               path[level].bp_index);
+               node = nilfs_btree_get_node(btree, path, level, &ncmax);
+               return nilfs_btree_node_get_ptr(node, path[level].bp_index,
+                                               ncmax);
        }
 
        return NILFS_BMAP_INVALID_PTR;
 }
 
-static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
+static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
                                       const struct nilfs_btree_path *path,
                                       __u64 key)
 {
        __u64 ptr;
 
-       ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
+       ptr = nilfs_bmap_find_target_seq(btree, key);
        if (ptr != NILFS_BMAP_INVALID_PTR)
                /* sequential access */
                return ptr;
@@ -877,17 +976,10 @@ static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
                        return ptr;
        }
        /* block group */
-       return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
-}
-
-static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
-                                    __u64 ptr)
-{
-       btree->bt_bmap.b_last_allocated_key = key;
-       btree->bt_bmap.b_last_allocated_ptr = ptr;
+       return nilfs_bmap_find_target_in_group(btree);
 }
 
-static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
+static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
                                      struct nilfs_btree_path *path,
                                      int *levelp, __u64 key, __u64 ptr,
                                      struct nilfs_bmap_stats *stats)
@@ -895,79 +987,78 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
        struct buffer_head *bh;
        struct nilfs_btree_node *node, *parent, *sib;
        __u64 sibptr;
-       int pindex, level, ret;
+       int pindex, level, ncmax, ncblk, ret;
        struct inode *dat = NULL;
 
        stats->bs_nblocks = 0;
        level = NILFS_BTREE_LEVEL_DATA;
 
        /* allocate a new ptr for data block */
-       if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
+       if (NILFS_BMAP_USE_VBN(btree)) {
                path[level].bp_newreq.bpr_ptr =
                        nilfs_btree_find_target_v(btree, path, key);
-               dat = nilfs_bmap_get_dat(&btree->bt_bmap);
+               dat = nilfs_bmap_get_dat(btree);
        }
 
-       ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
-                                          &path[level].bp_newreq, dat);
+       ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
        if (ret < 0)
                goto err_out_data;
 
+       ncblk = nilfs_btree_nchildren_per_block(btree);
+
        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
             level < nilfs_btree_height(btree) - 1;
             level++) {
                node = nilfs_btree_get_nonroot_node(path, level);
-               if (nilfs_btree_node_get_nchildren(node) <
-                   nilfs_btree_node_nchildren_max(node, btree)) {
+               if (nilfs_btree_node_get_nchildren(node) < ncblk) {
                        path[level].bp_op = nilfs_btree_do_insert;
                        stats->bs_nblocks++;
                        goto out;
                }
 
-               parent = nilfs_btree_get_node(btree, path, level + 1);
+               parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
                pindex = path[level + 1].bp_index;
 
                /* left sibling */
                if (pindex > 0) {
-                       sibptr = nilfs_btree_node_get_ptr(btree, parent,
-                                                         pindex - 1);
+                       sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
+                                                         ncmax);
                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
                        if (ret < 0)
                                goto err_out_child_node;
                        sib = (struct nilfs_btree_node *)bh->b_data;
-                       if (nilfs_btree_node_get_nchildren(sib) <
-                           nilfs_btree_node_nchildren_max(sib, btree)) {
+                       if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
                                path[level].bp_sib_bh = bh;
                                path[level].bp_op = nilfs_btree_carry_left;
                                stats->bs_nblocks++;
                                goto out;
-                       } else
+                       } else {
                                brelse(bh);
+                       }
                }
 
                /* right sibling */
-               if (pindex <
-                   nilfs_btree_node_get_nchildren(parent) - 1) {
-                       sibptr = nilfs_btree_node_get_ptr(btree, parent,
-                                                         pindex + 1);
+               if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
+                       sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
+                                                         ncmax);
                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
                        if (ret < 0)
                                goto err_out_child_node;
                        sib = (struct nilfs_btree_node *)bh->b_data;
-                       if (nilfs_btree_node_get_nchildren(sib) <
-                           nilfs_btree_node_nchildren_max(sib, btree)) {
+                       if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
                                path[level].bp_sib_bh = bh;
                                path[level].bp_op = nilfs_btree_carry_right;
                                stats->bs_nblocks++;
                                goto out;
-                       } else
+                       } else {
                                brelse(bh);
+                       }
                }
 
                /* split */
                path[level].bp_newreq.bpr_ptr =
                        path[level - 1].bp_newreq.bpr_ptr + 1;
-               ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
+               ret = nilfs_bmap_prepare_alloc_ptr(btree,
                                                   &path[level].bp_newreq, dat);
                if (ret < 0)
                        goto err_out_child_node;
@@ -979,9 +1070,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
 
                stats->bs_nblocks++;
 
-               nilfs_btree_node_init(btree,
-                                     (struct nilfs_btree_node *)bh->b_data,
-                                     0, level, 0, NULL, NULL);
+               sib = (struct nilfs_btree_node *)bh->b_data;
+               nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
                path[level].bp_sib_bh = bh;
                path[level].bp_op = nilfs_btree_split;
        }
@@ -989,7 +1079,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
        /* root */
        node = nilfs_btree_get_root(btree);
        if (nilfs_btree_node_get_nchildren(node) <
-           nilfs_btree_node_nchildren_max(node, btree)) {
+           NILFS_BTREE_ROOT_NCHILDREN_MAX) {
                path[level].bp_op = nilfs_btree_do_insert;
                stats->bs_nblocks++;
                goto out;
@@ -997,8 +1087,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
 
        /* grow */
        path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
-       ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
-                                          &path[level].bp_newreq, dat);
+       ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
        if (ret < 0)
                goto err_out_child_node;
        ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
@@ -1006,8 +1095,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
        if (ret < 0)
                goto err_out_curr_node;
 
-       nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
-                             0, level, 0, NULL, NULL);
+       nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
+                             0, level, 0, ncblk, NULL, NULL);
        path[level].bp_sib_bh = bh;
        path[level].bp_op = nilfs_btree_grow;
 
@@ -1024,25 +1113,22 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
 
        /* error */
  err_out_curr_node:
-       nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
-                                  dat);
+       nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
  err_out_child_node:
        for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
                nilfs_btnode_delete(path[level].bp_sib_bh);
-               nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
-                                          &path[level].bp_newreq, dat);
+               nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
 
        }
 
-       nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
-                                  dat);
+       nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
  err_out_data:
        *levelp = level;
        stats->bs_nblocks = 0;
        return ret;
 }
 
-static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
+static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
                                      struct nilfs_btree_path *path,
                                      int maxlevel, __u64 key, __u64 ptr)
 {
@@ -1051,35 +1137,33 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
 
        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
        ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
-       if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
-               nilfs_btree_set_target_v(btree, key, ptr);
-               dat = nilfs_bmap_get_dat(&btree->bt_bmap);
+       if (NILFS_BMAP_USE_VBN(btree)) {
+               nilfs_bmap_set_target_v(btree, key, ptr);
+               dat = nilfs_bmap_get_dat(btree);
        }
 
        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
-               nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
+               nilfs_bmap_commit_alloc_ptr(btree,
                                            &path[level - 1].bp_newreq, dat);
                path[level].bp_op(btree, path, level, &key, &ptr);
        }
 
-       if (!nilfs_bmap_dirty(&btree->bt_bmap))
-               nilfs_bmap_set_dirty(&btree->bt_bmap);
+       if (!nilfs_bmap_dirty(btree))
+               nilfs_bmap_set_dirty(btree);
 }
 
-static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
+static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
 {
-       struct nilfs_btree *btree;
        struct nilfs_btree_path *path;
        struct nilfs_bmap_stats stats;
        int level, ret;
 
-       btree = (struct nilfs_btree *)bmap;
        path = nilfs_btree_alloc_path();
        if (path == NULL)
                return -ENOMEM;
 
        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
-                                   NILFS_BTREE_LEVEL_NODE_MIN);
+                                   NILFS_BTREE_LEVEL_NODE_MIN, 0);
        if (ret != -ENOENT) {
                if (ret == 0)
                        ret = -EEXIST;
@@ -1090,23 +1174,25 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
        if (ret < 0)
                goto out;
        nilfs_btree_commit_insert(btree, path, level, key, ptr);
-       nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
+       nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
 
  out:
        nilfs_btree_free_path(path);
        return ret;
 }
 
-static void nilfs_btree_do_delete(struct nilfs_btree *btree,
+static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
                                  struct nilfs_btree_path *path,
                                  int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node;
+       int ncblk;
 
        if (level < nilfs_btree_height(btree) - 1) {
                node = nilfs_btree_get_nonroot_node(path, level);
-               nilfs_btree_node_delete(btree, node, keyp, ptrp,
-                                       path[level].bp_index);
+               ncblk = nilfs_btree_nchildren_per_block(btree);
+               nilfs_btree_node_delete(node, path[level].bp_index,
+                                       keyp, ptrp, ncblk);
                if (!buffer_dirty(path[level].bp_bh))
                        nilfs_btnode_mark_dirty(path[level].bp_bh);
                if (path[level].bp_index == 0)
@@ -1114,17 +1200,18 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
                                nilfs_btree_node_get_key(node, 0));
        } else {
                node = nilfs_btree_get_root(btree);
-               nilfs_btree_node_delete(btree, node, keyp, ptrp,
-                                       path[level].bp_index);
+               nilfs_btree_node_delete(node, path[level].bp_index,
+                                       keyp, ptrp,
+                                       NILFS_BTREE_ROOT_NCHILDREN_MAX);
        }
 }
 
-static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
+static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
                                    struct nilfs_btree_path *path,
                                    int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node, *left;
-       int nchildren, lnchildren, n;
+       int nchildren, lnchildren, n, ncblk;
 
        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
 
@@ -1132,10 +1219,11 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
        left = nilfs_btree_get_sib_node(path, level);
        nchildren = nilfs_btree_node_get_nchildren(node);
        lnchildren = nilfs_btree_node_get_nchildren(left);
+       ncblk = nilfs_btree_nchildren_per_block(btree);
 
        n = (nchildren + lnchildren) / 2 - nchildren;
 
-       nilfs_btree_node_move_right(btree, left, node, n);
+       nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
 
        if (!buffer_dirty(path[level].bp_bh))
                nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -1150,12 +1238,12 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
        path[level].bp_index += n;
 }
 
-static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
+static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
                                     struct nilfs_btree_path *path,
                                     int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node, *right;
-       int nchildren, rnchildren, n;
+       int nchildren, rnchildren, n, ncblk;
 
        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
 
@@ -1163,10 +1251,11 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
        right = nilfs_btree_get_sib_node(path, level);
        nchildren = nilfs_btree_node_get_nchildren(node);
        rnchildren = nilfs_btree_node_get_nchildren(right);
+       ncblk = nilfs_btree_nchildren_per_block(btree);
 
        n = (nchildren + rnchildren) / 2 - nchildren;
 
-       nilfs_btree_node_move_left(btree, node, right, n);
+       nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
 
        if (!buffer_dirty(path[level].bp_bh))
                nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -1182,21 +1271,22 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
        path[level].bp_sib_bh = NULL;
 }
 
-static void nilfs_btree_concat_left(struct nilfs_btree *btree,
+static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
                                    struct nilfs_btree_path *path,
                                    int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node, *left;
-       int n;
+       int n, ncblk;
 
        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
 
        node = nilfs_btree_get_nonroot_node(path, level);
        left = nilfs_btree_get_sib_node(path, level);
+       ncblk = nilfs_btree_nchildren_per_block(btree);
 
        n = nilfs_btree_node_get_nchildren(node);
 
-       nilfs_btree_node_move_left(btree, left, node, n);
+       nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 
        if (!buffer_dirty(path[level].bp_sib_bh))
                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
@@ -1207,21 +1297,22 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
        path[level].bp_index += nilfs_btree_node_get_nchildren(left);
 }
 
-static void nilfs_btree_concat_right(struct nilfs_btree *btree,
+static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
                                     struct nilfs_btree_path *path,
                                     int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *node, *right;
-       int n;
+       int n, ncblk;
 
        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
 
        node = nilfs_btree_get_nonroot_node(path, level);
        right = nilfs_btree_get_sib_node(path, level);
+       ncblk = nilfs_btree_nchildren_per_block(btree);
 
        n = nilfs_btree_node_get_nchildren(right);
 
-       nilfs_btree_node_move_left(btree, node, right, n);
+       nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
 
        if (!buffer_dirty(path[level].bp_bh))
                nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -1231,29 +1322,32 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
        path[level + 1].bp_index++;
 }
 
-static void nilfs_btree_shrink(struct nilfs_btree *btree,
+static void nilfs_btree_shrink(struct nilfs_bmap *btree,
                               struct nilfs_btree_path *path,
                               int level, __u64 *keyp, __u64 *ptrp)
 {
        struct nilfs_btree_node *root, *child;
-       int n;
+       int n, ncblk;
 
        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
 
        root = nilfs_btree_get_root(btree);
        child = nilfs_btree_get_nonroot_node(path, level);
+       ncblk = nilfs_btree_nchildren_per_block(btree);
 
-       nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
+       nilfs_btree_node_delete(root, 0, NULL, NULL,
+                               NILFS_BTREE_ROOT_NCHILDREN_MAX);
        nilfs_btree_node_set_level(root, level);
        n = nilfs_btree_node_get_nchildren(child);
-       nilfs_btree_node_move_left(btree, root, child, n);
+       nilfs_btree_node_move_left(root, child, n,
+                                  NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 
        nilfs_btnode_delete(path[level].bp_bh);
        path[level].bp_bh = NULL;
 }
 
 
-static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
+static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
                                      struct nilfs_btree_path *path,
                                      int *levelp,
                                      struct nilfs_bmap_stats *stats,
@@ -1262,42 +1356,43 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
        struct buffer_head *bh;
        struct nilfs_btree_node *node, *parent, *sib;
        __u64 sibptr;
-       int pindex, level, ret;
+       int pindex, level, ncmin, ncmax, ncblk, ret;
 
        ret = 0;
        stats->bs_nblocks = 0;
+       ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
+       ncblk = nilfs_btree_nchildren_per_block(btree);
+
        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
             level < nilfs_btree_height(btree) - 1;
             level++) {
                node = nilfs_btree_get_nonroot_node(path, level);
                path[level].bp_oldreq.bpr_ptr =
-                       nilfs_btree_node_get_ptr(btree, node,
-                                                path[level].bp_index);
-               ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
+                       nilfs_btree_node_get_ptr(node, path[level].bp_index,
+                                                ncblk);
+               ret = nilfs_bmap_prepare_end_ptr(btree,
                                                 &path[level].bp_oldreq, dat);
                if (ret < 0)
                        goto err_out_child_node;
 
-               if (nilfs_btree_node_get_nchildren(node) >
-                   nilfs_btree_node_nchildren_min(node, btree)) {
+               if (nilfs_btree_node_get_nchildren(node) > ncmin) {
                        path[level].bp_op = nilfs_btree_do_delete;
                        stats->bs_nblocks++;
                        goto out;
                }
 
-               parent = nilfs_btree_get_node(btree, path, level + 1);
+               parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
                pindex = path[level + 1].bp_index;
 
                if (pindex > 0) {
                        /* left sibling */
-                       sibptr = nilfs_btree_node_get_ptr(btree, parent,
-                                                         pindex - 1);
+                       sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
+                                                         ncmax);
                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
                        if (ret < 0)
                                goto err_out_curr_node;
                        sib = (struct nilfs_btree_node *)bh->b_data;
-                       if (nilfs_btree_node_get_nchildren(sib) >
-                           nilfs_btree_node_nchildren_min(sib, btree)) {
+                       if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
                                path[level].bp_sib_bh = bh;
                                path[level].bp_op = nilfs_btree_borrow_left;
                                stats->bs_nblocks++;
@@ -1311,14 +1406,13 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
                } else if (pindex <
                           nilfs_btree_node_get_nchildren(parent) - 1) {
                        /* right sibling */
-                       sibptr = nilfs_btree_node_get_ptr(btree, parent,
-                                                         pindex + 1);
+                       sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
+                                                         ncmax);
                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
                        if (ret < 0)
                                goto err_out_curr_node;
                        sib = (struct nilfs_btree_node *)bh->b_data;
-                       if (nilfs_btree_node_get_nchildren(sib) >
-                           nilfs_btree_node_nchildren_min(sib, btree)) {
+                       if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
                                path[level].bp_sib_bh = bh;
                                path[level].bp_op = nilfs_btree_borrow_right;
                                stats->bs_nblocks++;
@@ -1349,10 +1443,10 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
 
        node = nilfs_btree_get_root(btree);
        path[level].bp_oldreq.bpr_ptr =
-               nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
+               nilfs_btree_node_get_ptr(node, path[level].bp_index,
+                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
 
-       ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
-                                        &path[level].bp_oldreq, dat);
+       ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
        if (ret < 0)
                goto err_out_child_node;
 
@@ -1367,75 +1461,68 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
 
        /* error */
  err_out_curr_node:
-       nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
+       nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
  err_out_child_node:
        for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
                brelse(path[level].bp_sib_bh);
-               nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
-                                        &path[level].bp_oldreq, dat);
+               nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
        }
        *levelp = level;
        stats->bs_nblocks = 0;
        return ret;
 }
 
-static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
+static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
                                      struct nilfs_btree_path *path,
                                      int maxlevel, struct inode *dat)
 {
        int level;
 
        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
-               nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
-                                         &path[level].bp_oldreq, dat);
+               nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
                path[level].bp_op(btree, path, level, NULL, NULL);
        }
 
-       if (!nilfs_bmap_dirty(&btree->bt_bmap))
-               nilfs_bmap_set_dirty(&btree->bt_bmap);
+       if (!nilfs_bmap_dirty(btree))
+               nilfs_bmap_set_dirty(btree);
 }
 
-static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
+static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
 
 {
-       struct nilfs_btree *btree;
        struct nilfs_btree_path *path;
        struct nilfs_bmap_stats stats;
        struct inode *dat;
        int level, ret;
 
-       btree = (struct nilfs_btree *)bmap;
        path = nilfs_btree_alloc_path();
        if (path == NULL)
                return -ENOMEM;
 
        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
-                                   NILFS_BTREE_LEVEL_NODE_MIN);
+                                   NILFS_BTREE_LEVEL_NODE_MIN, 0);
        if (ret < 0)
                goto out;
 
 
-       dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
-               nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
+       dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
 
        ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
        if (ret < 0)
                goto out;
        nilfs_btree_commit_delete(btree, path, level, dat);
-       nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
+       nilfs_bmap_sub_blocks(btree, stats.bs_nblocks);
 
 out:
        nilfs_btree_free_path(path);
        return ret;
 }
 
-static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
+static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
 {
-       struct nilfs_btree *btree;
        struct nilfs_btree_path *path;
        int ret;
 
-       btree = (struct nilfs_btree *)bmap;
        path = nilfs_btree_alloc_path();
        if (path == NULL)
                return -ENOMEM;
@@ -1447,16 +1534,14 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
        return ret;
 }
 
-static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
+static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
 {
        struct buffer_head *bh;
-       struct nilfs_btree *btree;
        struct nilfs_btree_node *root, *node;
        __u64 maxkey, nextmaxkey;
        __u64 ptr;
        int nchildren, ret;
 
-       btree = (struct nilfs_btree *)bmap;
        root = nilfs_btree_get_root(btree);
        switch (nilfs_btree_height(btree)) {
        case 2:
@@ -1467,7 +1552,8 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
                nchildren = nilfs_btree_node_get_nchildren(root);
                if (nchildren > 1)
                        return 0;
-               ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
+               ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
+                                              NILFS_BTREE_ROOT_NCHILDREN_MAX);
                ret = nilfs_btree_get_block(btree, ptr, &bh);
                if (ret < 0)
                        return ret;
@@ -1487,32 +1573,33 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
        return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
 }
 
-static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
+static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
                                   __u64 *keys, __u64 *ptrs, int nitems)
 {
        struct buffer_head *bh;
-       struct nilfs_btree *btree;
        struct nilfs_btree_node *node, *root;
        __le64 *dkeys;
        __le64 *dptrs;
        __u64 ptr;
-       int nchildren, i, ret;
+       int nchildren, ncmax, i, ret;
 
-       btree = (struct nilfs_btree *)bmap;
        root = nilfs_btree_get_root(btree);
        switch (nilfs_btree_height(btree)) {
        case 2:
                bh = NULL;
                node = root;
+               ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
                break;
        case 3:
                nchildren = nilfs_btree_node_get_nchildren(root);
                WARN_ON(nchildren > 1);
-               ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
+               ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
+                                              NILFS_BTREE_ROOT_NCHILDREN_MAX);
                ret = nilfs_btree_get_block(btree, ptr, &bh);
                if (ret < 0)
                        return ret;
                node = (struct nilfs_btree_node *)bh->b_data;
+               ncmax = nilfs_btree_nchildren_per_block(btree);
                break;
        default:
                node = NULL;
@@ -1523,10 +1610,10 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
        if (nchildren < nitems)
                nitems = nchildren;
        dkeys = nilfs_btree_node_dkeys(node);
-       dptrs = nilfs_btree_node_dptrs(node, btree);
+       dptrs = nilfs_btree_node_dptrs(node, ncmax);
        for (i = 0; i < nitems; i++) {
-               keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
-               ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
+               keys[i] = le64_to_cpu(dkeys[i]);
+               ptrs[i] = le64_to_cpu(dptrs[i]);
        }
 
        if (bh != NULL)
@@ -1536,14 +1623,13 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
 }
 
 static int
-nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
+nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
                                       union nilfs_bmap_ptr_req *dreq,
                                       union nilfs_bmap_ptr_req *nreq,
                                       struct buffer_head **bhp,
                                       struct nilfs_bmap_stats *stats)
 {
        struct buffer_head *bh;
-       struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
        struct inode *dat = NULL;
        int ret;
 
@@ -1551,12 +1637,12 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
 
        /* for data */
        /* cannot find near ptr */
-       if (NILFS_BMAP_USE_VBN(bmap)) {
+       if (NILFS_BMAP_USE_VBN(btree)) {
                dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
-               dat = nilfs_bmap_get_dat(bmap);
+               dat = nilfs_bmap_get_dat(btree);
        }
 
-       ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
+       ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
        if (ret < 0)
                return ret;
 
@@ -1564,7 +1650,7 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
        stats->bs_nblocks++;
        if (nreq != NULL) {
                nreq->bpr_ptr = dreq->bpr_ptr + 1;
-               ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
+               ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
                if (ret < 0)
                        goto err_out_dreq;
 
@@ -1581,16 +1667,16 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
 
        /* error */
  err_out_nreq:
-       nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
+       nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
  err_out_dreq:
-       nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
+       nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
        stats->bs_nblocks = 0;
        return ret;
 
 }
 
 static void
-nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
+nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
                                      __u64 key, __u64 ptr,
                                      const __u64 *keys, const __u64 *ptrs,
                                      int n,
@@ -1598,57 +1684,59 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
                                      union nilfs_bmap_ptr_req *nreq,
                                      struct buffer_head *bh)
 {
-       struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
        struct nilfs_btree_node *node;
        struct inode *dat;
        __u64 tmpptr;
+       int ncblk;
 
        /* free resources */
-       if (bmap->b_ops->bop_clear != NULL)
-               bmap->b_ops->bop_clear(bmap);
+       if (btree->b_ops->bop_clear != NULL)
+               btree->b_ops->bop_clear(btree);
 
        /* ptr must be a pointer to a buffer head. */
        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
 
        /* convert and insert */
-       dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
-       nilfs_btree_init(bmap);
+       dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
+       nilfs_btree_init(btree);
        if (nreq != NULL) {
-               nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
-               nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
+               nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
+               nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
 
                /* create child node at level 1 */
                node = (struct nilfs_btree_node *)bh->b_data;
-               nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
-               nilfs_btree_node_insert(btree, node,
-                                       key, dreq->bpr_ptr, n);
+               ncblk = nilfs_btree_nchildren_per_block(btree);
+               nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
+               nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
                if (!buffer_dirty(bh))
                        nilfs_btnode_mark_dirty(bh);
-               if (!nilfs_bmap_dirty(bmap))
-                       nilfs_bmap_set_dirty(bmap);
+               if (!nilfs_bmap_dirty(btree))
+                       nilfs_bmap_set_dirty(btree);
 
                brelse(bh);
 
                /* create root node at level 2 */
                node = nilfs_btree_get_root(btree);
                tmpptr = nreq->bpr_ptr;
-               nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
-                                     2, 1, &keys[0], &tmpptr);
+               nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
+                                     NILFS_BTREE_ROOT_NCHILDREN_MAX,
+                                     &keys[0], &tmpptr);
        } else {
-               nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
+               nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
 
                /* create root node at level 1 */
                node = nilfs_btree_get_root(btree);
-               nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
-                                     1, n, keys, ptrs);
-               nilfs_btree_node_insert(btree, node,
-                                       key, dreq->bpr_ptr, n);
-               if (!nilfs_bmap_dirty(bmap))
-                       nilfs_bmap_set_dirty(bmap);
+               nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
+                                     NILFS_BTREE_ROOT_NCHILDREN_MAX,
+                                     keys, ptrs);
+               nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
+                                       NILFS_BTREE_ROOT_NCHILDREN_MAX);
+               if (!nilfs_bmap_dirty(btree))
+                       nilfs_bmap_set_dirty(btree);
        }
 
-       if (NILFS_BMAP_USE_VBN(bmap))
-               nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
+       if (NILFS_BMAP_USE_VBN(btree))
+               nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
 }
 
 /**
@@ -1660,7 +1748,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
  * @ptrs:
  * @n:
  */
-int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
+int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
                                   __u64 key, __u64 ptr,
                                   const __u64 *keys, const __u64 *ptrs, int n)
 {
@@ -1673,7 +1761,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
                di = &dreq;
                ni = NULL;
        } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
-                          1 << bmap->b_inode->i_blkbits)) {
+                          1 << btree->b_inode->i_blkbits)) {
                di = &dreq;
                ni = &nreq;
        } else {
@@ -1682,17 +1770,17 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
                BUG();
        }
 
-       ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
+       ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
                                                     &stats);
        if (ret < 0)
                return ret;
-       nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
+       nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
                                              di, ni, bh);
-       nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
+       nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
        return 0;
 }
 
-static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
+static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
                                   struct nilfs_btree_path *path,
                                   int level,
                                   struct buffer_head *bh)
@@ -1704,17 +1792,17 @@ static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
        return 0;
 }
 
-static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
+static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
                                        struct nilfs_btree_path *path,
                                        int level, struct inode *dat)
 {
        struct nilfs_btree_node *parent;
-       int ret;
+       int ncmax, ret;
 
-       parent = nilfs_btree_get_node(btree, path, level + 1);
+       parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
        path[level].bp_oldreq.bpr_ptr =
-               nilfs_btree_node_get_ptr(btree, parent,
-                                        path[level + 1].bp_index);
+               nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
+                                        ncmax);
        path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
        ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
                                       &path[level].bp_newreq.bpr_req);
@@ -1726,7 +1814,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
                path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
                path[level].bp_ctxt.bh = path[level].bp_bh;
                ret = nilfs_btnode_prepare_change_key(
-                       &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
+                       &NILFS_BMAP_I(btree)->i_btnode_cache,
                        &path[level].bp_ctxt);
                if (ret < 0) {
                        nilfs_dat_abort_update(dat,
@@ -1739,30 +1827,31 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
        return 0;
 }
 
-static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
+static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
                                        struct nilfs_btree_path *path,
                                        int level, struct inode *dat)
 {
        struct nilfs_btree_node *parent;
+       int ncmax;
 
        nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
                                &path[level].bp_newreq.bpr_req,
-                               btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
+                               btree->b_ptr_type == NILFS_BMAP_PTR_VS);
 
        if (buffer_nilfs_node(path[level].bp_bh)) {
                nilfs_btnode_commit_change_key(
-                       &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
+                       &NILFS_BMAP_I(btree)->i_btnode_cache,
                        &path[level].bp_ctxt);
                path[level].bp_bh = path[level].bp_ctxt.bh;
        }
        set_buffer_nilfs_volatile(path[level].bp_bh);
 
-       parent = nilfs_btree_get_node(btree, path, level + 1);
-       nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
-                                path[level].bp_newreq.bpr_ptr);
+       parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
+       nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
+                                path[level].bp_newreq.bpr_ptr, ncmax);
 }
 
-static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
+static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
                                       struct nilfs_btree_path *path,
                                       int level, struct inode *dat)
 {
@@ -1770,11 +1859,11 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
                               &path[level].bp_newreq.bpr_req);
        if (buffer_nilfs_node(path[level].bp_bh))
                nilfs_btnode_abort_change_key(
-                       &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
+                       &NILFS_BMAP_I(btree)->i_btnode_cache,
                        &path[level].bp_ctxt);
 }
 
-static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
+static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
                                           struct nilfs_btree_path *path,
                                           int minlevel, int *maxlevelp,
                                           struct inode *dat)
@@ -1809,7 +1898,7 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
        return ret;
 }
 
-static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
+static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
                                           struct nilfs_btree_path *path,
                                           int minlevel, int maxlevel,
                                           struct buffer_head *bh,
@@ -1824,14 +1913,15 @@ static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
                nilfs_btree_commit_update_v(btree, path, level, dat);
 }
 
-static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
+static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
                                   struct nilfs_btree_path *path,
                                   int level, struct buffer_head *bh)
 {
        int maxlevel = 0, ret;
        struct nilfs_btree_node *parent;
-       struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
+       struct inode *dat = nilfs_bmap_get_dat(btree);
        __u64 ptr;
+       int ncmax;
 
        get_bh(bh);
        path[level].bp_bh = bh;
@@ -1841,9 +1931,10 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
                goto out;
 
        if (buffer_nilfs_volatile(path[level].bp_bh)) {
-               parent = nilfs_btree_get_node(btree, path, level + 1);
-               ptr = nilfs_btree_node_get_ptr(btree, parent,
-                                              path[level + 1].bp_index);
+               parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
+               ptr = nilfs_btree_node_get_ptr(parent,
+                                              path[level + 1].bp_index,
+                                              ncmax);
                ret = nilfs_dat_mark_dirty(dat, ptr);
                if (ret < 0)
                        goto out;
@@ -1857,10 +1948,9 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
        return ret;
 }
 
-static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
+static int nilfs_btree_propagate(struct nilfs_bmap *btree,
                                 struct buffer_head *bh)
 {
-       struct nilfs_btree *btree;
        struct nilfs_btree_path *path;
        struct nilfs_btree_node *node;
        __u64 key;
@@ -1868,7 +1958,6 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
 
        WARN_ON(!buffer_dirty(bh));
 
-       btree = (struct nilfs_btree *)bmap;
        path = nilfs_btree_alloc_path();
        if (path == NULL)
                return -ENOMEM;
@@ -1878,11 +1967,11 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
                key = nilfs_btree_node_get_key(node, 0);
                level = nilfs_btree_node_get_level(node);
        } else {
-               key = nilfs_bmap_data_get_key(bmap, bh);
+               key = nilfs_bmap_data_get_key(btree, bh);
                level = NILFS_BTREE_LEVEL_DATA;
        }
 
-       ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
+       ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
        if (ret < 0) {
                if (unlikely(ret == -ENOENT))
                        printk(KERN_CRIT "%s: key = %llu, level == %d\n",
@@ -1890,7 +1979,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
                goto out;
        }
 
-       ret = NILFS_BMAP_USE_VBN(bmap) ?
+       ret = NILFS_BMAP_USE_VBN(btree) ?
                nilfs_btree_propagate_v(btree, path, level, bh) :
                nilfs_btree_propagate_p(btree, path, level, bh);
 
@@ -1900,13 +1989,13 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
        return ret;
 }
 
-static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
+static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
                                    struct buffer_head *bh)
 {
-       return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
+       return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
 }
 
-static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
+static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
                                         struct list_head *lists,
                                         struct buffer_head *bh)
 {
@@ -1920,6 +2009,18 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
        node = (struct nilfs_btree_node *)bh->b_data;
        key = nilfs_btree_node_get_key(node, 0);
        level = nilfs_btree_node_get_level(node);
+       if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
+           level >= NILFS_BTREE_LEVEL_MAX) {
+               dump_stack();
+               printk(KERN_WARNING
+                      "%s: invalid btree level: %d (key=%llu, ino=%lu, "
+                      "blocknr=%llu)\n",
+                      __func__, level, (unsigned long long)key,
+                      NILFS_BMAP_I(btree)->vfs_inode.i_ino,
+                      (unsigned long long)bh->b_blocknr);
+               return;
+       }
+
        list_for_each(head, &lists[level]) {
                cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
                cnode = (struct nilfs_btree_node *)cbh->b_data;
@@ -1930,11 +2031,10 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
        list_add_tail(&bh->b_assoc_buffers, head);
 }
 
-static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
+static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
                                             struct list_head *listp)
 {
-       struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
-       struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
+       struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
        struct list_head lists[NILFS_BTREE_LEVEL_MAX];
        struct pagevec pvec;
        struct buffer_head *bh, *head;
@@ -1968,7 +2068,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
                list_splice_tail(&lists[level], listp);
 }
 
-static int nilfs_btree_assign_p(struct nilfs_btree *btree,
+static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
                                struct nilfs_btree_path *path,
                                int level,
                                struct buffer_head **bh,
@@ -1978,38 +2078,38 @@ static int nilfs_btree_assign_p(struct nilfs_btree *btree,
        struct nilfs_btree_node *parent;
        __u64 key;
        __u64 ptr;
-       int ret;
+       int ncmax, ret;
 
-       parent = nilfs_btree_get_node(btree, path, level + 1);
-       ptr = nilfs_btree_node_get_ptr(btree, parent,
-                                      path[level + 1].bp_index);
+       parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
+       ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
+                                      ncmax);
        if (buffer_nilfs_node(*bh)) {
                path[level].bp_ctxt.oldkey = ptr;
                path[level].bp_ctxt.newkey = blocknr;
                path[level].bp_ctxt.bh = *bh;
                ret = nilfs_btnode_prepare_change_key(
-                       &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
+                       &NILFS_BMAP_I(btree)->i_btnode_cache,
                        &path[level].bp_ctxt);
                if (ret < 0)
                        return ret;
                nilfs_btnode_commit_change_key(
-                       &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
+                       &NILFS_BMAP_I(btree)->i_btnode_cache,
                        &path[level].bp_ctxt);
                *bh = path[level].bp_ctxt.bh;
        }
 
-       nilfs_btree_node_set_ptr(btree, parent,
-                                path[level + 1].bp_index, blocknr);
+       nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
+                                ncmax);
 
        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
        /* on-disk format */
-       binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
+       binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
        binfo->bi_dat.bi_level = level;
 
        return 0;
 }
 
-static int nilfs_btree_assign_v(struct nilfs_btree *btree,
+static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
                                struct nilfs_btree_path *path,
                                int level,
                                struct buffer_head **bh,
@@ -2017,15 +2117,15 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
                                union nilfs_binfo *binfo)
 {
        struct nilfs_btree_node *parent;
-       struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
+       struct inode *dat = nilfs_bmap_get_dat(btree);
        __u64 key;
        __u64 ptr;
        union nilfs_bmap_ptr_req req;
-       int ret;
+       int ncmax, ret;
 
-       parent = nilfs_btree_get_node(btree, path, level + 1);
-       ptr = nilfs_btree_node_get_ptr(btree, parent,
-                                      path[level + 1].bp_index);
+       parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
+       ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
+                                      ncmax);
        req.bpr_ptr = ptr;
        ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
        if (ret < 0)
@@ -2034,24 +2134,22 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
 
        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
        /* on-disk format */
-       binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
-       binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
+       binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
+       binfo->bi_v.bi_blkoff = cpu_to_le64(key);
 
        return 0;
 }
 
-static int nilfs_btree_assign(struct nilfs_bmap *bmap,
+static int nilfs_btree_assign(struct nilfs_bmap *btree,
                              struct buffer_head **bh,
                              sector_t blocknr,
                              union nilfs_binfo *binfo)
 {
-       struct nilfs_btree *btree;
        struct nilfs_btree_path *path;
        struct nilfs_btree_node *node;
        __u64 key;
        int level, ret;
 
-       btree = (struct nilfs_btree *)bmap;
        path = nilfs_btree_alloc_path();
        if (path == NULL)
                return -ENOMEM;
@@ -2061,17 +2159,17 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
                key = nilfs_btree_node_get_key(node, 0);
                level = nilfs_btree_node_get_level(node);
        } else {
-               key = nilfs_bmap_data_get_key(bmap, *bh);
+               key = nilfs_bmap_data_get_key(btree, *bh);
                level = NILFS_BTREE_LEVEL_DATA;
        }
 
-       ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
+       ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
        if (ret < 0) {
                WARN_ON(ret == -ENOENT);
                goto out;
        }
 
-       ret = NILFS_BMAP_USE_VBN(bmap) ?
+       ret = NILFS_BMAP_USE_VBN(btree) ?
                nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
                nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
 
@@ -2081,7 +2179,7 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
        return ret;
 }
 
-static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
+static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
                                 struct buffer_head **bh,
                                 sector_t blocknr,
                                 union nilfs_binfo *binfo)
@@ -2090,7 +2188,7 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
        __u64 key;
        int ret;
 
-       ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
+       ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
                             blocknr);
        if (ret < 0)
                return ret;
@@ -2099,29 +2197,27 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
                node = (struct nilfs_btree_node *)(*bh)->b_data;
                key = nilfs_btree_node_get_key(node, 0);
        } else
-               key = nilfs_bmap_data_get_key(bmap, *bh);
+               key = nilfs_bmap_data_get_key(btree, *bh);
 
        /* on-disk format */
        binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
-       binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
+       binfo->bi_v.bi_blkoff = cpu_to_le64(key);
 
        return 0;
 }
 
-static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
+static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
 {
        struct buffer_head *bh;
-       struct nilfs_btree *btree;
        struct nilfs_btree_path *path;
        __u64 ptr;
        int ret;
 
-       btree = (struct nilfs_btree *)bmap;
        path = nilfs_btree_alloc_path();
        if (path == NULL)
                return -ENOMEM;
 
-       ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
+       ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
        if (ret < 0) {
                WARN_ON(ret == -ENOENT);
                goto out;
@@ -2135,8 +2231,8 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
        if (!buffer_dirty(bh))
                nilfs_btnode_mark_dirty(bh);
        brelse(bh);
-       if (!nilfs_bmap_dirty(&btree->bt_bmap))
-               nilfs_bmap_set_dirty(&btree->bt_bmap);
+       if (!nilfs_bmap_dirty(btree))
+               nilfs_bmap_set_dirty(btree);
 
  out:
        nilfs_btree_free_path(path);
@@ -2186,10 +2282,14 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
 int nilfs_btree_init(struct nilfs_bmap *bmap)
 {
        bmap->b_ops = &nilfs_btree_ops;
+       bmap->b_nchildren_per_block =
+               NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
        return 0;
 }
 
 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
 {
        bmap->b_ops = &nilfs_btree_ops_gc;
+       bmap->b_nchildren_per_block =
+               NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
 }
index 43c8c5b..22c02e3 100644 (file)
 #include "bmap.h"
 
 /**
- * struct nilfs_btree - B-tree structure
- * @bt_bmap: bmap base structure
- */
-struct nilfs_btree {
-       struct nilfs_bmap bt_bmap;
-};
-
-/**
  * struct nilfs_btree_path - A path on which B-tree operations are executed
  * @bp_bh: buffer head of node block
  * @bp_sib_bh: buffer head of sibling node block
@@ -54,7 +46,7 @@ struct nilfs_btree_path {
        union nilfs_bmap_ptr_req bp_oldreq;
        union nilfs_bmap_ptr_req bp_newreq;
        struct nilfs_btnode_chkey_ctxt bp_ctxt;
-       void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
+       void (*bp_op)(struct nilfs_bmap *, struct nilfs_btree_path *,
                      int, __u64 *, __u64 *);
 };
 
@@ -80,4 +72,6 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
                                   const __u64 *, const __u64 *, int);
 void nilfs_btree_init_gc(struct nilfs_bmap *);
 
+int nilfs_btree_broken_node_block(struct buffer_head *bh);
+
 #endif /* _NILFS_BTREE_H */
index 85c89df..b60277b 100644 (file)
@@ -141,7 +141,7 @@ static void nilfs_check_page(struct page *page)
        }
        for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) {
                p = (struct nilfs_dir_entry *)(kaddr + offs);
-               rec_len = le16_to_cpu(p->rec_len);
+               rec_len = nilfs_rec_len_from_disk(p->rec_len);
 
                if (rec_len < NILFS_DIR_REC_LEN(1))
                        goto Eshort;
@@ -199,13 +199,10 @@ fail:
 static struct page *nilfs_get_page(struct inode *dir, unsigned long n)
 {
        struct address_space *mapping = dir->i_mapping;
-       struct page *page = read_cache_page(mapping, n,
-                               (filler_t *)mapping->a_ops->readpage, NULL);
+       struct page *page = read_mapping_page(mapping, n, NULL);
+
        if (!IS_ERR(page)) {
-               wait_on_page_locked(page);
                kmap(page);
-               if (!PageUptodate(page))
-                       goto fail;
                if (!PageChecked(page))
                        nilfs_check_page(page);
                if (PageError(page))
@@ -238,7 +235,8 @@ nilfs_match(int len, const unsigned char *name, struct nilfs_dir_entry *de)
  */
 static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p)
 {
-       return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len));
+       return (struct nilfs_dir_entry *)((char *)p +
+                                         nilfs_rec_len_from_disk(p->rec_len));
 }
 
 static unsigned char
@@ -329,7 +327,7 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                                        goto success;
                                }
                        }
-                       filp->f_pos += le16_to_cpu(de->rec_len);
+                       filp->f_pos += nilfs_rec_len_from_disk(de->rec_len);
                }
                nilfs_put_page(page);
        }
@@ -444,7 +442,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
                    struct page *page, struct inode *inode)
 {
        unsigned from = (char *) de - (char *) page_address(page);
-       unsigned to = from + le16_to_cpu(de->rec_len);
+       unsigned to = from + nilfs_rec_len_from_disk(de->rec_len);
        struct address_space *mapping = page->mapping;
        int err;
 
@@ -500,7 +498,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
                                /* We hit i_size */
                                name_len = 0;
                                rec_len = chunk_size;
-                               de->rec_len = cpu_to_le16(chunk_size);
+                               de->rec_len = nilfs_rec_len_to_disk(chunk_size);
                                de->inode = 0;
                                goto got_it;
                        }
@@ -514,7 +512,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
                        if (nilfs_match(namelen, name, de))
                                goto out_unlock;
                        name_len = NILFS_DIR_REC_LEN(de->name_len);
-                       rec_len = le16_to_cpu(de->rec_len);
+                       rec_len = nilfs_rec_len_from_disk(de->rec_len);
                        if (!de->inode && rec_len >= reclen)
                                goto got_it;
                        if (rec_len >= name_len + reclen)
@@ -537,8 +535,8 @@ got_it:
                struct nilfs_dir_entry *de1;
 
                de1 = (struct nilfs_dir_entry *)((char *)de + name_len);
-               de1->rec_len = cpu_to_le16(rec_len - name_len);
-               de->rec_len = cpu_to_le16(name_len);
+               de1->rec_len = nilfs_rec_len_to_disk(rec_len - name_len);
+               de->rec_len = nilfs_rec_len_to_disk(name_len);
                de = de1;
        }
        de->name_len = namelen;
@@ -569,7 +567,8 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
        struct inode *inode = mapping->host;
        char *kaddr = page_address(page);
        unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1);
-       unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len);
+       unsigned to = ((char *)dir - kaddr) +
+               nilfs_rec_len_from_disk(dir->rec_len);
        struct nilfs_dir_entry *pde = NULL;
        struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from);
        int err;
@@ -590,7 +589,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
        err = nilfs_prepare_chunk(page, mapping, from, to);
        BUG_ON(err);
        if (pde)
-               pde->rec_len = cpu_to_le16(to - from);
+               pde->rec_len = nilfs_rec_len_to_disk(to - from);
        dir->inode = 0;
        nilfs_commit_chunk(page, mapping, from, to);
        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -624,14 +623,14 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
        memset(kaddr, 0, chunk_size);
        de = (struct nilfs_dir_entry *)kaddr;
        de->name_len = 1;
-       de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1));
+       de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1));
        memcpy(de->name, ".\0\0", 4);
        de->inode = cpu_to_le64(inode->i_ino);
        nilfs_set_de_type(de, inode);
 
        de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1));
        de->name_len = 2;
-       de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1));
+       de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1));
        de->inode = cpu_to_le64(parent->i_ino);
        memcpy(de->name, "..\0", 4);
        nilfs_set_de_type(de, inode);
index 236753d..324d80c 100644 (file)
 #include "alloc.h"
 #include "dat.h"
 
-static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct)
+static inline __le64 *nilfs_direct_dptrs(const struct nilfs_bmap *direct)
 {
        return (__le64 *)
-               ((struct nilfs_direct_node *)direct->d_bmap.b_u.u_data + 1);
+               ((struct nilfs_direct_node *)direct->b_u.u_data + 1);
 }
 
 static inline __u64
-nilfs_direct_get_ptr(const struct nilfs_direct *direct, __u64 key)
+nilfs_direct_get_ptr(const struct nilfs_bmap *direct, __u64 key)
 {
-       return nilfs_bmap_dptr_to_ptr(*(nilfs_direct_dptrs(direct) + key));
+       return le64_to_cpu(*(nilfs_direct_dptrs(direct) + key));
 }
 
-static inline void nilfs_direct_set_ptr(struct nilfs_direct *direct,
+static inline void nilfs_direct_set_ptr(struct nilfs_bmap *direct,
                                        __u64 key, __u64 ptr)
 {
-       *(nilfs_direct_dptrs(direct) + key) = nilfs_bmap_ptr_to_dptr(ptr);
+       *(nilfs_direct_dptrs(direct) + key) = cpu_to_le64(ptr);
 }
 
-static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
+static int nilfs_direct_lookup(const struct nilfs_bmap *direct,
                               __u64 key, int level, __u64 *ptrp)
 {
-       struct nilfs_direct *direct;
        __u64 ptr;
 
-       direct = (struct nilfs_direct *)bmap;  /* XXX: use macro for level 1 */
        if (key > NILFS_DIRECT_KEY_MAX || level != 1)
                return -ENOENT;
        ptr = nilfs_direct_get_ptr(direct, key);
        if (ptr == NILFS_BMAP_INVALID_PTR)
                return -ENOENT;
 
-       if (ptrp != NULL)
-               *ptrp = ptr;
+       *ptrp = ptr;
        return 0;
 }
 
-static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
+static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
                                      __u64 key, __u64 *ptrp,
                                      unsigned maxblocks)
 {
-       struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
        struct inode *dat = NULL;
        __u64 ptr, ptr2;
        sector_t blocknr;
@@ -79,8 +75,8 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
        if (ptr == NILFS_BMAP_INVALID_PTR)
                return -ENOENT;
 
-       if (NILFS_BMAP_USE_VBN(bmap)) {
-               dat = nilfs_bmap_get_dat(bmap);
+       if (NILFS_BMAP_USE_VBN(direct)) {
+               dat = nilfs_bmap_get_dat(direct);
                ret = nilfs_dat_translate(dat, ptr, &blocknr);
                if (ret < 0)
                        return ret;
@@ -106,29 +102,21 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
 }
 
 static __u64
-nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key)
+nilfs_direct_find_target_v(const struct nilfs_bmap *direct, __u64 key)
 {
        __u64 ptr;
 
-       ptr = nilfs_bmap_find_target_seq(&direct->d_bmap, key);
+       ptr = nilfs_bmap_find_target_seq(direct, key);
        if (ptr != NILFS_BMAP_INVALID_PTR)
                /* sequential access */
                return ptr;
        else
                /* block group */
-               return nilfs_bmap_find_target_in_group(&direct->d_bmap);
-}
-
-static void nilfs_direct_set_target_v(struct nilfs_direct *direct,
-                                     __u64 key, __u64 ptr)
-{
-       direct->d_bmap.b_last_allocated_key = key;
-       direct->d_bmap.b_last_allocated_ptr = ptr;
+               return nilfs_bmap_find_target_in_group(direct);
 }
 
 static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
 {
-       struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
        union nilfs_bmap_ptr_req req;
        struct inode *dat = NULL;
        struct buffer_head *bh;
@@ -136,11 +124,11 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
 
        if (key > NILFS_DIRECT_KEY_MAX)
                return -ENOENT;
-       if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR)
+       if (nilfs_direct_get_ptr(bmap, key) != NILFS_BMAP_INVALID_PTR)
                return -EEXIST;
 
        if (NILFS_BMAP_USE_VBN(bmap)) {
-               req.bpr_ptr = nilfs_direct_find_target_v(direct, key);
+               req.bpr_ptr = nilfs_direct_find_target_v(bmap, key);
                dat = nilfs_bmap_get_dat(bmap);
        }
        ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat);
@@ -150,13 +138,13 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
                set_buffer_nilfs_volatile(bh);
 
                nilfs_bmap_commit_alloc_ptr(bmap, &req, dat);
-               nilfs_direct_set_ptr(direct, key, req.bpr_ptr);
+               nilfs_direct_set_ptr(bmap, key, req.bpr_ptr);
 
                if (!nilfs_bmap_dirty(bmap))
                        nilfs_bmap_set_dirty(bmap);
 
                if (NILFS_BMAP_USE_VBN(bmap))
-                       nilfs_direct_set_target_v(direct, key, req.bpr_ptr);
+                       nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr);
 
                nilfs_bmap_add_blocks(bmap, 1);
        }
@@ -165,33 +153,30 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
 
 static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
 {
-       struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
        union nilfs_bmap_ptr_req req;
        struct inode *dat;
        int ret;
 
        if (key > NILFS_DIRECT_KEY_MAX ||
-           nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR)
+           nilfs_direct_get_ptr(bmap, key) == NILFS_BMAP_INVALID_PTR)
                return -ENOENT;
 
        dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
-       req.bpr_ptr = nilfs_direct_get_ptr(direct, key);
+       req.bpr_ptr = nilfs_direct_get_ptr(bmap, key);
 
        ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat);
        if (!ret) {
                nilfs_bmap_commit_end_ptr(bmap, &req, dat);
-               nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
+               nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR);
                nilfs_bmap_sub_blocks(bmap, 1);
        }
        return ret;
 }
 
-static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
+static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp)
 {
-       struct nilfs_direct *direct;
        __u64 key, lastkey;
 
-       direct = (struct nilfs_direct *)bmap;
        lastkey = NILFS_DIRECT_KEY_MAX + 1;
        for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++)
                if (nilfs_direct_get_ptr(direct, key) !=
@@ -211,15 +196,13 @@ static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key)
        return key > NILFS_DIRECT_KEY_MAX;
 }
 
-static int nilfs_direct_gather_data(struct nilfs_bmap *bmap,
+static int nilfs_direct_gather_data(struct nilfs_bmap *direct,
                                    __u64 *keys, __u64 *ptrs, int nitems)
 {
-       struct nilfs_direct *direct;
        __u64 key;
        __u64 ptr;
        int n;
 
-       direct = (struct nilfs_direct *)bmap;
        if (nitems > NILFS_DIRECT_NBLOCKS)
                nitems = NILFS_DIRECT_NBLOCKS;
        n = 0;
@@ -237,7 +220,6 @@ static int nilfs_direct_gather_data(struct nilfs_bmap *bmap,
 int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
                                    __u64 key, __u64 *keys, __u64 *ptrs, int n)
 {
-       struct nilfs_direct *direct;
        __le64 *dptrs;
        int ret, i, j;
 
@@ -253,12 +235,11 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
                bmap->b_ops->bop_clear(bmap);
 
        /* convert */
-       direct = (struct nilfs_direct *)bmap;
-       dptrs = nilfs_direct_dptrs(direct);
+       dptrs = nilfs_direct_dptrs(bmap);
        for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) {
                if ((j < n) && (i == keys[j])) {
                        dptrs[i] = (i != key) ?
-                               nilfs_bmap_ptr_to_dptr(ptrs[j]) :
+                               cpu_to_le64(ptrs[j]) :
                                NILFS_BMAP_INVALID_PTR;
                        j++;
                } else
@@ -269,10 +250,9 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
        return 0;
 }
 
-static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
+static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
                                  struct buffer_head *bh)
 {
-       struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
        struct nilfs_palloc_req oldreq, newreq;
        struct inode *dat;
        __u64 key;
@@ -284,7 +264,7 @@ static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
 
        dat = nilfs_bmap_get_dat(bmap);
        key = nilfs_bmap_data_get_key(bmap, bh);
-       ptr = nilfs_direct_get_ptr(direct, key);
+       ptr = nilfs_direct_get_ptr(bmap, key);
        if (!buffer_nilfs_volatile(bh)) {
                oldreq.pr_entry_nr = ptr;
                newreq.pr_entry_nr = ptr;
@@ -294,20 +274,20 @@ static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
                nilfs_dat_commit_update(dat, &oldreq, &newreq,
                                        bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
                set_buffer_nilfs_volatile(bh);
-               nilfs_direct_set_ptr(direct, key, newreq.pr_entry_nr);
+               nilfs_direct_set_ptr(bmap, key, newreq.pr_entry_nr);
        } else
                ret = nilfs_dat_mark_dirty(dat, ptr);
 
        return ret;
 }
 
-static int nilfs_direct_assign_v(struct nilfs_direct *direct,
+static int nilfs_direct_assign_v(struct nilfs_bmap *direct,
                                 __u64 key, __u64 ptr,
                                 struct buffer_head **bh,
                                 sector_t blocknr,
                                 union nilfs_binfo *binfo)
 {
-       struct inode *dat = nilfs_bmap_get_dat(&direct->d_bmap);
+       struct inode *dat = nilfs_bmap_get_dat(direct);
        union nilfs_bmap_ptr_req req;
        int ret;
 
@@ -315,13 +295,13 @@ static int nilfs_direct_assign_v(struct nilfs_direct *direct,
        ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
        if (!ret) {
                nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
-               binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
-               binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
+               binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
+               binfo->bi_v.bi_blkoff = cpu_to_le64(key);
        }
        return ret;
 }
 
-static int nilfs_direct_assign_p(struct nilfs_direct *direct,
+static int nilfs_direct_assign_p(struct nilfs_bmap *direct,
                                 __u64 key, __u64 ptr,
                                 struct buffer_head **bh,
                                 sector_t blocknr,
@@ -329,7 +309,7 @@ static int nilfs_direct_assign_p(struct nilfs_direct *direct,
 {
        nilfs_direct_set_ptr(direct, key, blocknr);
 
-       binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
+       binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
        binfo->bi_dat.bi_level = 0;
 
        return 0;
@@ -340,18 +320,16 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
                               sector_t blocknr,
                               union nilfs_binfo *binfo)
 {
-       struct nilfs_direct *direct;
        __u64 key;
        __u64 ptr;
 
-       direct = (struct nilfs_direct *)bmap;
        key = nilfs_bmap_data_get_key(bmap, *bh);
        if (unlikely(key > NILFS_DIRECT_KEY_MAX)) {
                printk(KERN_CRIT "%s: invalid key: %llu\n", __func__,
                       (unsigned long long)key);
                return -EINVAL;
        }
-       ptr = nilfs_direct_get_ptr(direct, key);
+       ptr = nilfs_direct_get_ptr(bmap, key);
        if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) {
                printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__,
                       (unsigned long long)ptr);
@@ -359,8 +337,8 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
        }
 
        return NILFS_BMAP_USE_VBN(bmap) ?
-               nilfs_direct_assign_v(direct, key, ptr, bh, blocknr, binfo) :
-               nilfs_direct_assign_p(direct, key, ptr, bh, blocknr, binfo);
+               nilfs_direct_assign_v(bmap, key, ptr, bh, blocknr, binfo) :
+               nilfs_direct_assign_p(bmap, key, ptr, bh, blocknr, binfo);
 }
 
 static const struct nilfs_bmap_operations nilfs_direct_ops = {
index a5ffd66..dc643de 100644 (file)
@@ -28,8 +28,6 @@
 #include "bmap.h"
 
 
-struct nilfs_direct;
-
 /**
  * struct nilfs_direct_node - direct node
  * @dn_flags: flags
@@ -40,15 +38,6 @@ struct nilfs_direct_node {
        __u8 pad[7];
 };
 
-/**
- * struct nilfs_direct - direct mapping
- * @d_bmap: bmap structure
- */
-struct nilfs_direct {
-       struct nilfs_bmap d_bmap;
-};
-
-
 #define NILFS_DIRECT_NBLOCKS   (NILFS_BMAP_SIZE / sizeof(__le64) - 1)
 #define NILFS_DIRECT_KEY_MIN   0
 #define NILFS_DIRECT_KEY_MAX   (NILFS_DIRECT_NBLOCKS - 1)
index 145f03c..bed3a78 100644 (file)
@@ -48,6 +48,8 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include "nilfs.h"
+#include "btree.h"
+#include "btnode.h"
 #include "page.h"
 #include "mdt.h"
 #include "dat.h"
@@ -149,8 +151,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
 int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
                                   __u64 vbn, struct buffer_head **out_bh)
 {
-       int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
-                                           vbn ? : pbn, pbn, out_bh);
+       int ret;
+
+       ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
+                                       vbn ? : pbn, pbn, READ, out_bh, &pbn);
        if (ret == -EEXIST) /* internal code (cache hit) */
                ret = 0;
        return ret;
@@ -164,10 +168,15 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
        if (buffer_dirty(bh))
                return -EEXIST;
 
-       if (buffer_nilfs_node(bh))
+       if (buffer_nilfs_node(bh)) {
+               if (nilfs_btree_broken_node_block(bh)) {
+                       clear_buffer_uptodate(bh);
+                       return -EIO;
+               }
                nilfs_btnode_mark_dirty(bh);
-       else
+       } else {
                nilfs_mdt_mark_buffer_dirty(bh);
+       }
        return 0;
 }
 
index 024be8c..d01aff4 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/swap.h>
 #include <linux/slab.h>
 #include "nilfs.h"
+#include "btnode.h"
 #include "segment.h"
 #include "page.h"
 #include "mdt.h"
index 47d6d79..0842d77 100644 (file)
@@ -32,7 +32,6 @@
 #include "the_nilfs.h"
 #include "sb.h"
 #include "bmap.h"
-#include "bmap_union.h"
 
 /*
  * nilfs inode data in memory
@@ -41,7 +40,7 @@ struct nilfs_inode_info {
        __u32 i_flags;
        unsigned long  i_state;         /* Dynamic state flags */
        struct nilfs_bmap *i_bmap;
-       union nilfs_bmap_union i_bmap_union;
+       struct nilfs_bmap i_bmap_data;
        __u64 i_xattr;  /* sector_t ??? */
        __u32 i_dir_start_lookup;
        __u64 i_cno;            /* check point number for GC inode */
@@ -71,9 +70,7 @@ static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode)
 static inline struct nilfs_inode_info *
 NILFS_BMAP_I(const struct nilfs_bmap *bmap)
 {
-       return container_of((union nilfs_bmap_union *)bmap,
-                           struct nilfs_inode_info,
-                           i_bmap_union);
+       return container_of(bmap, struct nilfs_inode_info, i_bmap_data);
 }
 
 static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
@@ -107,6 +104,14 @@ enum {
 };
 
 /*
+ * commit flags for nilfs_commit_super and nilfs_sync_super
+ */
+enum {
+       NILFS_SB_COMMIT = 0,    /* Commit a super block alternately */
+       NILFS_SB_COMMIT_ALL     /* Commit both super blocks */
+};
+
+/*
  * Macros to check inode numbers
  */
 #define NILFS_MDT_INO_BITS   \
@@ -270,7 +275,14 @@ extern struct nilfs_super_block *
 nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
 extern int nilfs_store_magic_and_option(struct super_block *,
                                        struct nilfs_super_block *, char *);
+extern int nilfs_check_feature_compatibility(struct super_block *,
+                                            struct nilfs_super_block *);
+extern void nilfs_set_log_cursor(struct nilfs_super_block *,
+                                struct the_nilfs *);
+extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *,
+                                                     int flip);
 extern int nilfs_commit_super(struct nilfs_sb_info *, int);
+extern int nilfs_cleanup_super(struct nilfs_sb_info *);
 extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64);
 extern void nilfs_detach_checkpoint(struct nilfs_sb_info *);
 
index 8de3e1e..aab11db 100644 (file)
@@ -37,7 +37,8 @@
 
 #define NILFS_BUFFER_INHERENT_BITS  \
        ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
-        (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated))
+        (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \
+        (1UL << BH_NILFS_Checked))
 
 static struct buffer_head *
 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -129,6 +130,7 @@ void nilfs_forget_buffer(struct buffer_head *bh)
 
        lock_buffer(bh);
        clear_buffer_nilfs_volatile(bh);
+       clear_buffer_nilfs_checked(bh);
        clear_buffer_dirty(bh);
        if (nilfs_page_buffers_clean(page))
                __nilfs_clear_page_dirty(page);
@@ -480,6 +482,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping)
                                lock_buffer(bh);
                                clear_buffer_dirty(bh);
                                clear_buffer_nilfs_volatile(bh);
+                               clear_buffer_nilfs_checked(bh);
                                clear_buffer_uptodate(bh);
                                clear_buffer_mapped(bh);
                                unlock_buffer(bh);
index 8abca4d..f53d8da 100644 (file)
@@ -34,11 +34,13 @@ enum {
        BH_NILFS_Allocated = BH_PrivateStart,
        BH_NILFS_Node,
        BH_NILFS_Volatile,
+       BH_NILFS_Checked,
 };
 
 BUFFER_FNS(NILFS_Allocated, nilfs_allocated)   /* nilfs private buffers */
 BUFFER_FNS(NILFS_Node, nilfs_node)             /* nilfs node buffers */
 BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
+BUFFER_FNS(NILFS_Checked, nilfs_checked)       /* buffer is verified */
 
 
 void nilfs_mark_buffer_dirty(struct buffer_head *bh);
index bae2a51..83e3d8c 100644 (file)
@@ -91,27 +91,9 @@ static int nilfs_warn_segment_error(int err)
        return -EINVAL;
 }
 
-static void store_segsum_info(struct nilfs_segsum_info *ssi,
-                             struct nilfs_segment_summary *sum,
-                             unsigned int blocksize)
-{
-       ssi->flags = le16_to_cpu(sum->ss_flags);
-       ssi->seg_seq = le64_to_cpu(sum->ss_seq);
-       ssi->ctime = le64_to_cpu(sum->ss_create);
-       ssi->next = le64_to_cpu(sum->ss_next);
-       ssi->nblocks = le32_to_cpu(sum->ss_nblocks);
-       ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo);
-       ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes);
-
-       ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
-       ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
-
-       /* need to verify ->ss_bytes field if read ->ss_cno */
-}
-
 /**
- * calc_crc_cont - check CRC of blocks continuously
- * @sbi: nilfs_sb_info
+ * nilfs_compute_checksum - compute checksum of blocks continuously
+ * @nilfs: nilfs object
  * @bhs: buffer head of start block
  * @sum: place to store result
  * @offset: offset bytes in the first block
@@ -119,23 +101,25 @@ static void store_segsum_info(struct nilfs_segsum_info *ssi,
  * @start: DBN of start block
  * @nblock: number of blocks to be checked
  */
-static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs,
-                        u32 *sum, unsigned long offset, u64 check_bytes,
-                        sector_t start, unsigned long nblock)
+static int nilfs_compute_checksum(struct the_nilfs *nilfs,
+                                 struct buffer_head *bhs, u32 *sum,
+                                 unsigned long offset, u64 check_bytes,
+                                 sector_t start, unsigned long nblock)
 {
-       unsigned long blocksize = sbi->s_super->s_blocksize;
+       unsigned int blocksize = nilfs->ns_blocksize;
        unsigned long size;
        u32 crc;
 
        BUG_ON(offset >= blocksize);
        check_bytes -= offset;
        size = min_t(u64, check_bytes, blocksize - offset);
-       crc = crc32_le(sbi->s_nilfs->ns_crc_seed,
+       crc = crc32_le(nilfs->ns_crc_seed,
                       (unsigned char *)bhs->b_data + offset, size);
        if (--nblock > 0) {
                do {
-                       struct buffer_head *bh
-                               = sb_bread(sbi->s_super, ++start);
+                       struct buffer_head *bh;
+
+                       bh = __bread(nilfs->ns_bdev, ++start, blocksize);
                        if (!bh)
                                return -EIO;
                        check_bytes -= size;
@@ -150,12 +134,12 @@ static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs,
 
 /**
  * nilfs_read_super_root_block - read super root block
- * @sb: super_block
+ * @nilfs: nilfs object
  * @sr_block: disk block number of the super root block
  * @pbh: address of a buffer_head pointer to return super root buffer
  * @check: CRC check flag
  */
-int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
+int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
                                struct buffer_head **pbh, int check)
 {
        struct buffer_head *bh_sr;
@@ -164,7 +148,7 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
        int ret;
 
        *pbh = NULL;
-       bh_sr = sb_bread(sb, sr_block);
+       bh_sr = __bread(nilfs->ns_bdev, sr_block, nilfs->ns_blocksize);
        if (unlikely(!bh_sr)) {
                ret = NILFS_SEG_FAIL_IO;
                goto failed;
@@ -174,12 +158,13 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
        if (check) {
                unsigned bytes = le16_to_cpu(sr->sr_bytes);
 
-               if (bytes == 0 || bytes > sb->s_blocksize) {
+               if (bytes == 0 || bytes > nilfs->ns_blocksize) {
                        ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
                        goto failed_bh;
                }
-               if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc,
-                                 sizeof(sr->sr_sum), bytes, sr_block, 1)) {
+               if (nilfs_compute_checksum(
+                           nilfs, bh_sr, &crc, sizeof(sr->sr_sum), bytes,
+                           sr_block, 1)) {
                        ret = NILFS_SEG_FAIL_IO;
                        goto failed_bh;
                }
@@ -199,64 +184,76 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
 }
 
 /**
- * load_segment_summary - read segment summary of the specified partial segment
- * @sbi: nilfs_sb_info
- * @pseg_start: start disk block number of partial segment
- * @seg_seq: sequence number requested
- * @ssi: pointer to nilfs_segsum_info struct to store information
+ * nilfs_read_log_header - read summary header of the specified log
+ * @nilfs: nilfs object
+ * @start_blocknr: start block number of the log
+ * @sum: pointer to return segment summary structure
  */
-static int
-load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start,
-                    u64 seg_seq, struct nilfs_segsum_info *ssi)
+static struct buffer_head *
+nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr,
+                     struct nilfs_segment_summary **sum)
 {
        struct buffer_head *bh_sum;
-       struct nilfs_segment_summary *sum;
+
+       bh_sum = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
+       if (bh_sum)
+               *sum = (struct nilfs_segment_summary *)bh_sum->b_data;
+       return bh_sum;
+}
+
+/**
+ * nilfs_validate_log - verify consistency of log
+ * @nilfs: nilfs object
+ * @seg_seq: sequence number of segment
+ * @bh_sum: buffer head of summary block
+ * @sum: segment summary struct
+ */
+static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq,
+                             struct buffer_head *bh_sum,
+                             struct nilfs_segment_summary *sum)
+{
        unsigned long nblock;
        u32 crc;
-       int ret = NILFS_SEG_FAIL_IO;
+       int ret;
 
-       bh_sum = sb_bread(sbi->s_super, pseg_start);
-       if (!bh_sum)
+       ret = NILFS_SEG_FAIL_MAGIC;
+       if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC)
                goto out;
 
-       sum = (struct nilfs_segment_summary *)bh_sum->b_data;
-
-       /* Check consistency of segment summary */
-       if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) {
-               ret = NILFS_SEG_FAIL_MAGIC;
-               goto failed;
-       }
-       store_segsum_info(ssi, sum, sbi->s_super->s_blocksize);
-       if (seg_seq != ssi->seg_seq) {
-               ret = NILFS_SEG_FAIL_SEQ;
-               goto failed;
-       }
+       ret = NILFS_SEG_FAIL_SEQ;
+       if (le64_to_cpu(sum->ss_seq) != seg_seq)
+               goto out;
 
-       nblock = ssi->nblocks;
-       if (unlikely(nblock == 0 ||
-                    nblock > sbi->s_nilfs->ns_blocks_per_segment)) {
+       nblock = le32_to_cpu(sum->ss_nblocks);
+       ret = NILFS_SEG_FAIL_CONSISTENCY;
+       if (unlikely(nblock == 0 || nblock > nilfs->ns_blocks_per_segment))
                /* This limits the number of blocks read in the CRC check */
-               ret = NILFS_SEG_FAIL_CONSISTENCY;
-               goto failed;
-       }
-       if (calc_crc_cont(sbi, bh_sum, &crc, sizeof(sum->ss_datasum),
-                         ((u64)nblock << sbi->s_super->s_blocksize_bits),
-                         pseg_start, nblock)) {
-               ret = NILFS_SEG_FAIL_IO;
-               goto failed;
-       }
-       if (crc == le32_to_cpu(sum->ss_datasum))
-               ret = 0;
-       else
-               ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
- failed:
-       brelse(bh_sum);
- out:
+               goto out;
+
+       ret = NILFS_SEG_FAIL_IO;
+       if (nilfs_compute_checksum(nilfs, bh_sum, &crc, sizeof(sum->ss_datasum),
+                                  ((u64)nblock << nilfs->ns_blocksize_bits),
+                                  bh_sum->b_blocknr, nblock))
+               goto out;
+
+       ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
+       if (crc != le32_to_cpu(sum->ss_datasum))
+               goto out;
+       ret = 0;
+out:
        return ret;
 }
 
-static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
-                       unsigned int *offset, unsigned int bytes)
+/**
+ * nilfs_read_summary_info - read an item on summary blocks of a log
+ * @nilfs: nilfs object
+ * @pbh: the current buffer head on summary blocks [in, out]
+ * @offset: the current byte offset on summary blocks [in, out]
+ * @bytes: byte size of the item to be read
+ */
+static void *nilfs_read_summary_info(struct the_nilfs *nilfs,
+                                    struct buffer_head **pbh,
+                                    unsigned int *offset, unsigned int bytes)
 {
        void *ptr;
        sector_t blocknr;
@@ -265,7 +262,8 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
        if (bytes > (*pbh)->b_size - *offset) {
                blocknr = (*pbh)->b_blocknr;
                brelse(*pbh);
-               *pbh = sb_bread(sb, blocknr + 1);
+               *pbh = __bread(nilfs->ns_bdev, blocknr + 1,
+                              nilfs->ns_blocksize);
                if (unlikely(!*pbh))
                        return NULL;
                *offset = 0;
@@ -275,9 +273,18 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
        return ptr;
 }
 
-static void segsum_skip(struct super_block *sb, struct buffer_head **pbh,
-                       unsigned int *offset, unsigned int bytes,
-                       unsigned long count)
+/**
+ * nilfs_skip_summary_info - skip items on summary blocks of a log
+ * @nilfs: nilfs object
+ * @pbh: the current buffer head on summary blocks [in, out]
+ * @offset: the current byte offset on summary blocks [in, out]
+ * @bytes: byte size of the item to be skipped
+ * @count: number of items to be skipped
+ */
+static void nilfs_skip_summary_info(struct the_nilfs *nilfs,
+                                   struct buffer_head **pbh,
+                                   unsigned int *offset, unsigned int bytes,
+                                   unsigned long count)
 {
        unsigned int rest_item_in_current_block
                = ((*pbh)->b_size - *offset) / bytes;
@@ -294,36 +301,46 @@ static void segsum_skip(struct super_block *sb, struct buffer_head **pbh,
                *offset = bytes * (count - (bcnt - 1) * nitem_per_block);
 
                brelse(*pbh);
-               *pbh = sb_bread(sb, blocknr + bcnt);
+               *pbh = __bread(nilfs->ns_bdev, blocknr + bcnt,
+                              nilfs->ns_blocksize);
        }
 }
 
-static int
-collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
-                          struct nilfs_segsum_info *ssi,
-                          struct list_head *head)
+/**
+ * nilfs_scan_dsync_log - get block information of a log written for data sync
+ * @nilfs: nilfs object
+ * @start_blocknr: start block number of the log
+ * @sum: log summary information
+ * @head: list head to add nilfs_recovery_block struct
+ */
+static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
+                               struct nilfs_segment_summary *sum,
+                               struct list_head *head)
 {
        struct buffer_head *bh;
        unsigned int offset;
-       unsigned long nfinfo = ssi->nfinfo;
-       sector_t blocknr = sum_blocknr + ssi->nsumblk;
+       u32 nfinfo, sumbytes;
+       sector_t blocknr;
        ino_t ino;
        int err = -EIO;
 
+       nfinfo = le32_to_cpu(sum->ss_nfinfo);
        if (!nfinfo)
                return 0;
 
-       bh = sb_bread(sbi->s_super, sum_blocknr);
+       sumbytes = le32_to_cpu(sum->ss_sumbytes);
+       blocknr = start_blocknr + DIV_ROUND_UP(sumbytes, nilfs->ns_blocksize);
+       bh = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
        if (unlikely(!bh))
                goto out;
 
-       offset = le16_to_cpu(
-               ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes);
+       offset = le16_to_cpu(sum->ss_bytes);
        for (;;) {
                unsigned long nblocks, ndatablk, nnodeblk;
                struct nilfs_finfo *finfo;
 
-               finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo));
+               finfo = nilfs_read_summary_info(nilfs, &bh, &offset,
+                                               sizeof(*finfo));
                if (unlikely(!finfo))
                        goto out;
 
@@ -336,8 +353,8 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
                        struct nilfs_recovery_block *rb;
                        struct nilfs_binfo_v *binfo;
 
-                       binfo = segsum_get(sbi->s_super, &bh, &offset,
-                                          sizeof(*binfo));
+                       binfo = nilfs_read_summary_info(nilfs, &bh, &offset,
+                                                       sizeof(*binfo));
                        if (unlikely(!binfo))
                                goto out;
 
@@ -355,9 +372,9 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
                }
                if (--nfinfo == 0)
                        break;
-               blocknr += nnodeblk; /* always 0 for the data sync segments */
-               segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64),
-                           nnodeblk);
+               blocknr += nnodeblk; /* always 0 for data sync logs */
+               nilfs_skip_summary_info(nilfs, &bh, &offset, sizeof(__le64),
+                                       nnodeblk);
                if (unlikely(!bh))
                        goto out;
        }
@@ -467,14 +484,14 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
        return err;
 }
 
-static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi,
+static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
                                     struct nilfs_recovery_block *rb,
                                     struct page *page)
 {
        struct buffer_head *bh_org;
        void *kaddr;
 
-       bh_org = sb_bread(sbi->s_super, rb->blocknr);
+       bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
        if (unlikely(!bh_org))
                return -EIO;
 
@@ -485,13 +502,14 @@ static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi,
        return 0;
 }
 
-static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
-                               struct list_head *head,
-                               unsigned long *nr_salvaged_blocks)
+static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
+                                     struct nilfs_sb_info *sbi,
+                                     struct list_head *head,
+                                     unsigned long *nr_salvaged_blocks)
 {
        struct inode *inode;
        struct nilfs_recovery_block *rb, *n;
-       unsigned blocksize = sbi->s_super->s_blocksize;
+       unsigned blocksize = nilfs->ns_blocksize;
        struct page *page;
        loff_t pos;
        int err = 0, err2 = 0;
@@ -511,7 +529,7 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
                if (unlikely(err))
                        goto failed_inode;
 
-               err = nilfs_recovery_copy_block(sbi, rb, page);
+               err = nilfs_recovery_copy_block(nilfs, rb, page);
                if (unlikely(err))
                        goto failed_page;
 
@@ -551,18 +569,20 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
 /**
  * nilfs_do_roll_forward - salvage logical segments newer than the latest
  * checkpoint
+ * @nilfs: nilfs object
  * @sbi: nilfs_sb_info
- * @nilfs: the_nilfs
  * @ri: pointer to a nilfs_recovery_info
  */
 static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
                                 struct nilfs_sb_info *sbi,
                                 struct nilfs_recovery_info *ri)
 {
-       struct nilfs_segsum_info ssi;
+       struct buffer_head *bh_sum = NULL;
+       struct nilfs_segment_summary *sum;
        sector_t pseg_start;
        sector_t seg_start, seg_end;  /* Starting/ending DBN of full segment */
        unsigned long nsalvaged_blocks = 0;
+       unsigned int flags;
        u64 seg_seq;
        __u64 segnum, nextnum = 0;
        int empty_seg = 0;
@@ -581,8 +601,14 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
        nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
 
        while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
+               brelse(bh_sum);
+               bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
+               if (!bh_sum) {
+                       err = -EIO;
+                       goto failed;
+               }
 
-               ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi);
+               ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
                if (ret) {
                        if (ret == NILFS_SEG_FAIL_IO) {
                                err = -EIO;
@@ -590,33 +616,38 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
                        }
                        goto strayed;
                }
-               if (unlikely(NILFS_SEG_HAS_SR(&ssi)))
+
+               flags = le16_to_cpu(sum->ss_flags);
+               if (flags & NILFS_SS_SR)
                        goto confused;
 
                /* Found a valid partial segment; do recovery actions */
-               nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
+               nextnum = nilfs_get_segnum_of_block(nilfs,
+                                                   le64_to_cpu(sum->ss_next));
                empty_seg = 0;
-               nilfs->ns_ctime = ssi.ctime;
-               if (!(ssi.flags & NILFS_SS_GC))
-                       nilfs->ns_nongc_ctime = ssi.ctime;
+               nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
+               if (!(flags & NILFS_SS_GC))
+                       nilfs->ns_nongc_ctime = nilfs->ns_ctime;
 
                switch (state) {
                case RF_INIT_ST:
-                       if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi))
+                       if (!(flags & NILFS_SS_LOGBGN) ||
+                           !(flags & NILFS_SS_SYNDT))
                                goto try_next_pseg;
                        state = RF_DSYNC_ST;
                        /* Fall through */
                case RF_DSYNC_ST:
-                       if (!NILFS_SEG_DSYNC(&ssi))
+                       if (!(flags & NILFS_SS_SYNDT))
                                goto confused;
 
-                       err = collect_blocks_from_segsum(
-                               sbi, pseg_start, &ssi, &dsync_blocks);
+                       err = nilfs_scan_dsync_log(nilfs, pseg_start, sum,
+                                                  &dsync_blocks);
                        if (unlikely(err))
                                goto failed;
-                       if (NILFS_SEG_LOGEND(&ssi)) {
-                               err = recover_dsync_blocks(
-                                       sbi, &dsync_blocks, &nsalvaged_blocks);
+                       if (flags & NILFS_SS_LOGEND) {
+                               err = nilfs_recover_dsync_blocks(
+                                       nilfs, sbi, &dsync_blocks,
+                                       &nsalvaged_blocks);
                                if (unlikely(err))
                                        goto failed;
                                state = RF_INIT_ST;
@@ -627,7 +658,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
  try_next_pseg:
                if (pseg_start == ri->ri_lsegs_end)
                        break;
-               pseg_start += ssi.nblocks;
+               pseg_start += le32_to_cpu(sum->ss_nblocks);
                if (pseg_start < seg_end)
                        continue;
                goto feed_segment;
@@ -652,8 +683,9 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
                ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
        }
  out:
+       brelse(bh_sum);
        dispose_recovery_list(&dsync_blocks);
-       nilfs_detach_writer(sbi->s_nilfs, sbi);
+       nilfs_detach_writer(nilfs, sbi);
        return err;
 
  confused:
@@ -667,7 +699,6 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
 }
 
 static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
-                                     struct nilfs_sb_info *sbi,
                                      struct nilfs_recovery_info *ri)
 {
        struct buffer_head *bh;
@@ -677,7 +708,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
            nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
                return;
 
-       bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start);
+       bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize);
        BUG_ON(!bh);
        memset(bh->b_data, 0, bh->b_size);
        set_buffer_dirty(bh);
@@ -690,9 +721,8 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
 }
 
 /**
- * nilfs_recover_logical_segments - salvage logical segments written after
- * the latest super root
- * @nilfs: the_nilfs
+ * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
+ * @nilfs: nilfs object
  * @sbi: nilfs_sb_info
  * @ri: pointer to a nilfs_recovery_info struct to store search results.
  *
@@ -709,9 +739,9 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
  *
  * %-ENOMEM - Insufficient memory available.
  */
-int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
-                                  struct nilfs_sb_info *sbi,
-                                  struct nilfs_recovery_info *ri)
+int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
+                             struct nilfs_sb_info *sbi,
+                             struct nilfs_recovery_info *ri)
 {
        int err;
 
@@ -751,7 +781,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
                        goto failed;
                }
 
-               nilfs_finish_roll_forward(nilfs, sbi, ri);
+               nilfs_finish_roll_forward(nilfs, ri);
        }
 
  failed:
@@ -762,7 +792,6 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
 /**
  * nilfs_search_super_root - search the latest valid super root
  * @nilfs: the_nilfs
- * @sbi: nilfs_sb_info
  * @ri: pointer to a nilfs_recovery_info struct to store search results.
  *
  * nilfs_search_super_root() looks for the latest super-root from a partial
@@ -775,14 +804,19 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
  * %-EINVAL - No valid segment found
  *
  * %-EIO - I/O error
+ *
+ * %-ENOMEM - Insufficient memory available.
  */
-int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
+int nilfs_search_super_root(struct the_nilfs *nilfs,
                            struct nilfs_recovery_info *ri)
 {
-       struct nilfs_segsum_info ssi;
+       struct buffer_head *bh_sum = NULL;
+       struct nilfs_segment_summary *sum;
        sector_t pseg_start, pseg_end, sr_pseg_start = 0;
        sector_t seg_start, seg_end; /* range of full segment (block number) */
        sector_t b, end;
+       unsigned long nblocks;
+       unsigned int flags;
        u64 seg_seq;
        __u64 segnum, nextnum = 0;
        __u64 cno;
@@ -801,17 +835,24 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
        /* Read ahead segment */
        b = seg_start;
        while (b <= seg_end)
-               sb_breadahead(sbi->s_super, b++);
+               __breadahead(nilfs->ns_bdev, b++, nilfs->ns_blocksize);
 
        for (;;) {
-               /* Load segment summary */
-               ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi);
+               brelse(bh_sum);
+               ret = NILFS_SEG_FAIL_IO;
+               bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
+               if (!bh_sum)
+                       goto failed;
+
+               ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
                if (ret) {
                        if (ret == NILFS_SEG_FAIL_IO)
                                goto failed;
                        goto strayed;
                }
-               pseg_end = pseg_start + ssi.nblocks - 1;
+
+               nblocks = le32_to_cpu(sum->ss_nblocks);
+               pseg_end = pseg_start + nblocks - 1;
                if (unlikely(pseg_end > seg_end)) {
                        ret = NILFS_SEG_FAIL_CONSISTENCY;
                        goto strayed;
@@ -821,11 +862,13 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
                ri->ri_pseg_start = pseg_start;
                ri->ri_seq = seg_seq;
                ri->ri_segnum = segnum;
-               nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
+               nextnum = nilfs_get_segnum_of_block(nilfs,
+                                                   le64_to_cpu(sum->ss_next));
                ri->ri_nextnum = nextnum;
                empty_seg = 0;
 
-               if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) {
+               flags = le16_to_cpu(sum->ss_flags);
+               if (!(flags & NILFS_SS_SR) && !scan_newer) {
                        /* This will never happen because a superblock
                           (last_segment) always points to a pseg
                           having a super root. */
@@ -836,14 +879,15 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
                if (pseg_start == seg_start) {
                        nilfs_get_segment_range(nilfs, nextnum, &b, &end);
                        while (b <= end)
-                               sb_breadahead(sbi->s_super, b++);
+                               __breadahead(nilfs->ns_bdev, b++,
+                                            nilfs->ns_blocksize);
                }
-               if (!NILFS_SEG_HAS_SR(&ssi)) {
-                       if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
+               if (!(flags & NILFS_SS_SR)) {
+                       if (!ri->ri_lsegs_start && (flags & NILFS_SS_LOGBGN)) {
                                ri->ri_lsegs_start = pseg_start;
                                ri->ri_lsegs_start_seq = seg_seq;
                        }
-                       if (NILFS_SEG_LOGEND(&ssi))
+                       if (flags & NILFS_SS_LOGEND)
                                ri->ri_lsegs_end = pseg_start;
                        goto try_next_pseg;
                }
@@ -854,12 +898,12 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
                ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
 
                nilfs_dispose_segment_list(&segments);
-               nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start)
-                       + ssi.nblocks - seg_start;
+               sr_pseg_start = pseg_start;
+               nilfs->ns_pseg_offset = pseg_start + nblocks - seg_start;
                nilfs->ns_seg_seq = seg_seq;
                nilfs->ns_segnum = segnum;
                nilfs->ns_cno = cno;  /* nilfs->ns_cno = ri->ri_cno + 1 */
-               nilfs->ns_ctime = ssi.ctime;
+               nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
                nilfs->ns_nextnum = nextnum;
 
                if (scan_newer)
@@ -870,15 +914,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
                        scan_newer = 1;
                }
 
-               /* reset region for roll-forward */
-               pseg_start += ssi.nblocks;
-               if (pseg_start < seg_end)
-                       continue;
-               goto feed_segment;
-
  try_next_pseg:
                /* Standing on a course, or met an inconsistent state */
-               pseg_start += ssi.nblocks;
+               pseg_start += nblocks;
                if (pseg_start < seg_end)
                        continue;
                goto feed_segment;
@@ -909,6 +947,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
 
  super_root_found:
        /* Updating pointers relating to the latest checkpoint */
+       brelse(bh_sum);
        list_splice_tail(&segments, &ri->ri_used_segments);
        nilfs->ns_last_pseg = sr_pseg_start;
        nilfs->ns_last_seq = nilfs->ns_seg_seq;
@@ -916,6 +955,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
        return 0;
 
  failed:
+       brelse(bh_sum);
        nilfs_dispose_segment_list(&segments);
        return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
 }
index 85fbb66..b04f08c 100644 (file)
@@ -54,17 +54,6 @@ struct nilfs_segsum_info {
        sector_t                next;
 };
 
-/* macro for the flags */
-#define NILFS_SEG_HAS_SR(sum)    ((sum)->flags & NILFS_SS_SR)
-#define NILFS_SEG_LOGBGN(sum)    ((sum)->flags & NILFS_SS_LOGBGN)
-#define NILFS_SEG_LOGEND(sum)    ((sum)->flags & NILFS_SS_LOGEND)
-#define NILFS_SEG_DSYNC(sum)     ((sum)->flags & NILFS_SS_SYNDT)
-#define NILFS_SEG_SIMPLEX(sum) \
-       (((sum)->flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == \
-        (NILFS_SS_LOGBGN | NILFS_SS_LOGEND))
-
-#define NILFS_SEG_EMPTY(sum)   ((sum)->nblocks == (sum)->nsumblk)
-
 /**
  * struct nilfs_segment_buffer - Segment buffer
  * @sb_super: back pointer to a superblock struct
@@ -141,6 +130,19 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
                                struct buffer_head **);
 void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *);
 
+static inline int nilfs_segbuf_simplex(struct nilfs_segment_buffer *segbuf)
+{
+       unsigned int flags = segbuf->sb_sum.flags;
+
+       return (flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) ==
+               (NILFS_SS_LOGBGN | NILFS_SS_LOGEND);
+}
+
+static inline int nilfs_segbuf_empty(struct nilfs_segment_buffer *segbuf)
+{
+       return segbuf->sb_sum.nblocks == segbuf->sb_sum.nsumblk;
+}
+
 static inline void
 nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf,
                               struct buffer_head *bh)
index c920164..9fd051a 100644 (file)
@@ -1914,12 +1914,12 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
                        }
                }
 
-               if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) {
-                       if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) {
+               if (!nilfs_segbuf_simplex(segbuf)) {
+                       if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) {
                                set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
                                sci->sc_lseg_stime = jiffies;
                        }
-                       if (NILFS_SEG_LOGEND(&segbuf->sb_sum))
+                       if (segbuf->sb_sum.flags & NILFS_SS_LOGEND)
                                clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
                }
        }
@@ -1951,7 +1951,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
        if (update_sr) {
                nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
                                       segbuf->sb_sum.seg_seq, nilfs->ns_cno++);
-               set_nilfs_sb_dirty(nilfs);
 
                clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
                clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
@@ -2082,7 +2081,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
 
                /* Avoid empty segment */
                if (sci->sc_stage.scnt == NILFS_ST_DONE &&
-                   NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) {
+                   nilfs_segbuf_empty(sci->sc_curseg)) {
                        nilfs_segctor_abort_construction(sci, nilfs, 1);
                        goto out;
                }
@@ -2408,6 +2407,7 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
 {
        struct nilfs_sb_info *sbi = sci->sc_sbi;
        struct the_nilfs *nilfs = sbi->s_nilfs;
+       struct nilfs_super_block **sbp;
        int err = 0;
 
        nilfs_segctor_accept(sci);
@@ -2423,8 +2423,13 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
                if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
                    nilfs_discontinued(nilfs)) {
                        down_write(&nilfs->ns_sem);
-                       err = nilfs_commit_super(
-                               sbi, nilfs_altsb_need_update(nilfs));
+                       err = -EIO;
+                       sbp = nilfs_prepare_super(sbi,
+                                                 nilfs_sb_will_flip(nilfs));
+                       if (likely(sbp)) {
+                               nilfs_set_log_cursor(sbp[0], nilfs);
+                               err = nilfs_commit_super(sbi, NILFS_SB_COMMIT);
+                       }
                        up_write(&nilfs->ns_sem);
                }
        }
index 01e20db..17c487b 100644 (file)
@@ -234,13 +234,13 @@ extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *);
 extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *);
 
 /* recovery.c */
-extern int nilfs_read_super_root_block(struct super_block *, sector_t,
+extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t,
                                       struct buffer_head **, int);
-extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *,
+extern int nilfs_search_super_root(struct the_nilfs *,
                                   struct nilfs_recovery_info *);
-extern int nilfs_recover_logical_segments(struct the_nilfs *,
-                                         struct nilfs_sb_info *,
-                                         struct nilfs_recovery_info *);
+extern int nilfs_salvage_orphan_logs(struct the_nilfs *,
+                                    struct nilfs_sb_info *,
+                                    struct nilfs_recovery_info *);
 extern void nilfs_dispose_segment_list(struct list_head *);
 
 #endif /* _NILFS_SEGMENT_H */
index 414ef68..26078b3 100644 (file)
@@ -55,6 +55,8 @@
 #include "nilfs.h"
 #include "mdt.h"
 #include "alloc.h"
+#include "btree.h"
+#include "btnode.h"
 #include "page.h"
 #include "cpfile.h"
 #include "ifile.h"
@@ -74,6 +76,25 @@ struct kmem_cache *nilfs_btree_path_cache;
 
 static int nilfs_remount(struct super_block *sb, int *flags, char *data);
 
+static void nilfs_set_error(struct nilfs_sb_info *sbi)
+{
+       struct the_nilfs *nilfs = sbi->s_nilfs;
+       struct nilfs_super_block **sbp;
+
+       down_write(&nilfs->ns_sem);
+       if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
+               nilfs->ns_mount_state |= NILFS_ERROR_FS;
+               sbp = nilfs_prepare_super(sbi, 0);
+               if (likely(sbp)) {
+                       sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
+                       if (sbp[1])
+                               sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
+                       nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL);
+               }
+       }
+       up_write(&nilfs->ns_sem);
+}
+
 /**
  * nilfs_error() - report failure condition on a filesystem
  *
@@ -99,16 +120,7 @@ void nilfs_error(struct super_block *sb, const char *function,
        va_end(args);
 
        if (!(sb->s_flags & MS_RDONLY)) {
-               struct the_nilfs *nilfs = sbi->s_nilfs;
-
-               down_write(&nilfs->ns_sem);
-               if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
-                       nilfs->ns_mount_state |= NILFS_ERROR_FS;
-                       nilfs->ns_sbp[0]->s_state |=
-                               cpu_to_le16(NILFS_ERROR_FS);
-                       nilfs_commit_super(sbi, 1);
-               }
-               up_write(&nilfs->ns_sem);
+               nilfs_set_error(sbi);
 
                if (nilfs_test_opt(sbi, ERRORS_RO)) {
                        printk(KERN_CRIT "Remounting filesystem read-only\n");
@@ -176,7 +188,7 @@ static void nilfs_clear_inode(struct inode *inode)
        nilfs_btnode_cache_clear(&ii->i_btnode_cache);
 }
 
-static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
+static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
 {
        struct the_nilfs *nilfs = sbi->s_nilfs;
        int err;
@@ -202,12 +214,20 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
                printk(KERN_ERR
                       "NILFS: unable to write superblock (err=%d)\n", err);
                if (err == -EIO && nilfs->ns_sbh[1]) {
+                       /*
+                        * sbp[0] points to newer log than sbp[1],
+                        * so copy sbp[0] to sbp[1] to take over sbp[0].
+                        */
+                       memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0],
+                              nilfs->ns_sbsize);
                        nilfs_fall_back_super_block(nilfs);
                        goto retry;
                }
        } else {
                struct nilfs_super_block *sbp = nilfs->ns_sbp[0];
 
+               nilfs->ns_sbwcount++;
+
                /*
                 * The latest segment becomes trailable from the position
                 * written in superblock.
@@ -216,66 +236,122 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
 
                /* update GC protection for recent segments */
                if (nilfs->ns_sbh[1]) {
-                       sbp = NULL;
-                       if (dupsb) {
+                       if (flag == NILFS_SB_COMMIT_ALL) {
                                set_buffer_dirty(nilfs->ns_sbh[1]);
-                               if (!sync_dirty_buffer(nilfs->ns_sbh[1]))
-                                       sbp = nilfs->ns_sbp[1];
+                               if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0)
+                                       goto out;
                        }
+                       if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) <
+                           le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno))
+                               sbp = nilfs->ns_sbp[1];
                }
-               if (sbp) {
-                       spin_lock(&nilfs->ns_last_segment_lock);
-                       nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq);
-                       spin_unlock(&nilfs->ns_last_segment_lock);
-               }
-       }
 
+               spin_lock(&nilfs->ns_last_segment_lock);
+               nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq);
+               spin_unlock(&nilfs->ns_last_segment_lock);
+       }
+ out:
        return err;
 }
 
-int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb)
+void nilfs_set_log_cursor(struct nilfs_super_block *sbp,
+                         struct the_nilfs *nilfs)
+{
+       sector_t nfreeblocks;
+
+       /* nilfs->ns_sem must be locked by the caller. */
+       nilfs_count_free_blocks(nilfs, &nfreeblocks);
+       sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks);
+
+       spin_lock(&nilfs->ns_last_segment_lock);
+       sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq);
+       sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg);
+       sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno);
+       spin_unlock(&nilfs->ns_last_segment_lock);
+}
+
+struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi,
+                                              int flip)
 {
        struct the_nilfs *nilfs = sbi->s_nilfs;
        struct nilfs_super_block **sbp = nilfs->ns_sbp;
-       sector_t nfreeblocks;
-       time_t t;
-       int err;
 
-       /* nilfs->sem must be locked by the caller. */
+       /* nilfs->ns_sem must be locked by the caller. */
        if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
-               if (sbp[1] && sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC))
-                       nilfs_swap_super_block(nilfs);
-               else {
+               if (sbp[1] &&
+                   sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) {
+                       memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
+               } else {
                        printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
                               sbi->s_super->s_id);
-                       return -EIO;
+                       return NULL;
                }
+       } else if (sbp[1] &&
+                  sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
+                       memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
        }
-       err = nilfs_count_free_blocks(nilfs, &nfreeblocks);
-       if (unlikely(err)) {
-               printk(KERN_ERR "NILFS: failed to count free blocks\n");
-               return err;
-       }
-       spin_lock(&nilfs->ns_last_segment_lock);
-       sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq);
-       sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg);
-       sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno);
-       spin_unlock(&nilfs->ns_last_segment_lock);
 
+       if (flip && sbp[1])
+               nilfs_swap_super_block(nilfs);
+
+       return sbp;
+}
+
+int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag)
+{
+       struct the_nilfs *nilfs = sbi->s_nilfs;
+       struct nilfs_super_block **sbp = nilfs->ns_sbp;
+       time_t t;
+
+       /* nilfs->ns_sem must be locked by the caller. */
        t = get_seconds();
-       nilfs->ns_sbwtime[0] = t;
-       sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks);
+       nilfs->ns_sbwtime = t;
        sbp[0]->s_wtime = cpu_to_le64(t);
        sbp[0]->s_sum = 0;
        sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
                                             (unsigned char *)sbp[0],
                                             nilfs->ns_sbsize));
-       if (dupsb && sbp[1]) {
-               memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
-               nilfs->ns_sbwtime[1] = t;
+       if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) {
+               sbp[1]->s_wtime = sbp[0]->s_wtime;
+               sbp[1]->s_sum = 0;
+               sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
+                                           (unsigned char *)sbp[1],
+                                           nilfs->ns_sbsize));
        }
        clear_nilfs_sb_dirty(nilfs);
-       return nilfs_sync_super(sbi, dupsb);
+       return nilfs_sync_super(sbi, flag);
+}
+
+/**
+ * nilfs_cleanup_super() - write filesystem state for cleanup
+ * @sbi: nilfs_sb_info to be unmounted or degraded to read-only
+ *
+ * This function restores state flags in the on-disk super block.
+ * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the
+ * filesystem was not clean previously.
+ */
+int nilfs_cleanup_super(struct nilfs_sb_info *sbi)
+{
+       struct nilfs_super_block **sbp;
+       int flag = NILFS_SB_COMMIT;
+       int ret = -EIO;
+
+       sbp = nilfs_prepare_super(sbi, 0);
+       if (sbp) {
+               sbp[0]->s_state = cpu_to_le16(sbi->s_nilfs->ns_mount_state);
+               nilfs_set_log_cursor(sbp[0], sbi->s_nilfs);
+               if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) {
+                       /*
+                        * make the "clean" flag also to the opposite
+                        * super block if both super blocks point to
+                        * the same checkpoint.
+                        */
+                       sbp[1]->s_state = sbp[0]->s_state;
+                       flag = NILFS_SB_COMMIT_ALL;
+               }
+               ret = nilfs_commit_super(sbi, flag);
+       }
+       return ret;
 }
 
 static void nilfs_put_super(struct super_block *sb)
@@ -289,8 +365,7 @@ static void nilfs_put_super(struct super_block *sb)
 
        if (!(sb->s_flags & MS_RDONLY)) {
                down_write(&nilfs->ns_sem);
-               nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
-               nilfs_commit_super(sbi, 1);
+               nilfs_cleanup_super(sbi);
                up_write(&nilfs->ns_sem);
        }
        down_write(&nilfs->ns_super_sem);
@@ -311,6 +386,7 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
 {
        struct nilfs_sb_info *sbi = NILFS_SB(sb);
        struct the_nilfs *nilfs = sbi->s_nilfs;
+       struct nilfs_super_block **sbp;
        int err = 0;
 
        /* This function is called when super block should be written back */
@@ -318,8 +394,13 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
                err = nilfs_construct_segment(sb);
 
        down_write(&nilfs->ns_sem);
-       if (nilfs_sb_dirty(nilfs))
-               nilfs_commit_super(sbi, 1);
+       if (nilfs_sb_dirty(nilfs)) {
+               sbp = nilfs_prepare_super(sbi, nilfs_sb_will_flip(nilfs));
+               if (likely(sbp)) {
+                       nilfs_set_log_cursor(sbp[0], nilfs);
+                       nilfs_commit_super(sbi, NILFS_SB_COMMIT);
+               }
+       }
        up_write(&nilfs->ns_sem);
 
        return err;
@@ -442,20 +523,20 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
        struct nilfs_sb_info *sbi = NILFS_SB(sb);
 
        if (!nilfs_test_opt(sbi, BARRIER))
-               seq_printf(seq, ",nobarrier");
+               seq_puts(seq, ",nobarrier");
        if (nilfs_test_opt(sbi, SNAPSHOT))
                seq_printf(seq, ",cp=%llu",
                           (unsigned long long int)sbi->s_snapshot_cno);
        if (nilfs_test_opt(sbi, ERRORS_PANIC))
-               seq_printf(seq, ",errors=panic");
+               seq_puts(seq, ",errors=panic");
        if (nilfs_test_opt(sbi, ERRORS_CONT))
-               seq_printf(seq, ",errors=continue");
+               seq_puts(seq, ",errors=continue");
        if (nilfs_test_opt(sbi, STRICT_ORDER))
-               seq_printf(seq, ",order=strict");
+               seq_puts(seq, ",order=strict");
        if (nilfs_test_opt(sbi, NORECOVERY))
-               seq_printf(seq, ",norecovery");
+               seq_puts(seq, ",norecovery");
        if (nilfs_test_opt(sbi, DISCARD))
-               seq_printf(seq, ",discard");
+               seq_puts(seq, ",discard");
 
        return 0;
 }
@@ -524,23 +605,25 @@ static const struct export_operations nilfs_export_ops = {
 
 enum {
        Opt_err_cont, Opt_err_panic, Opt_err_ro,
-       Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
-       Opt_discard, Opt_err,
+       Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
+       Opt_discard, Opt_nodiscard, Opt_err,
 };
 
 static match_table_t tokens = {
        {Opt_err_cont, "errors=continue"},
        {Opt_err_panic, "errors=panic"},
        {Opt_err_ro, "errors=remount-ro"},
+       {Opt_barrier, "barrier"},
        {Opt_nobarrier, "nobarrier"},
        {Opt_snapshot, "cp=%u"},
        {Opt_order, "order=%s"},
        {Opt_norecovery, "norecovery"},
        {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
        {Opt_err, NULL}
 };
 
-static int parse_options(char *options, struct super_block *sb)
+static int parse_options(char *options, struct super_block *sb, int is_remount)
 {
        struct nilfs_sb_info *sbi = NILFS_SB(sb);
        char *p;
@@ -557,6 +640,9 @@ static int parse_options(char *options, struct super_block *sb)
 
                token = match_token(p, tokens, args);
                switch (token) {
+               case Opt_barrier:
+                       nilfs_set_opt(sbi, BARRIER);
+                       break;
                case Opt_nobarrier:
                        nilfs_clear_opt(sbi, BARRIER);
                        break;
@@ -582,8 +668,26 @@ static int parse_options(char *options, struct super_block *sb)
                case Opt_snapshot:
                        if (match_int(&args[0], &option) || option <= 0)
                                return 0;
-                       if (!(sb->s_flags & MS_RDONLY))
+                       if (is_remount) {
+                               if (!nilfs_test_opt(sbi, SNAPSHOT)) {
+                                       printk(KERN_ERR
+                                              "NILFS: cannot change regular "
+                                              "mount to snapshot.\n");
+                                       return 0;
+                               } else if (option != sbi->s_snapshot_cno) {
+                                       printk(KERN_ERR
+                                              "NILFS: cannot remount to a "
+                                              "different snapshot.\n");
+                                       return 0;
+                               }
+                               break;
+                       }
+                       if (!(sb->s_flags & MS_RDONLY)) {
+                               printk(KERN_ERR "NILFS: cannot mount snapshot "
+                                      "read/write.  A read-only option is "
+                                      "required.\n");
                                return 0;
+                       }
                        sbi->s_snapshot_cno = option;
                        nilfs_set_opt(sbi, SNAPSHOT);
                        break;
@@ -593,6 +697,9 @@ static int parse_options(char *options, struct super_block *sb)
                case Opt_discard:
                        nilfs_set_opt(sbi, DISCARD);
                        break;
+               case Opt_nodiscard:
+                       nilfs_clear_opt(sbi, DISCARD);
+                       break;
                default:
                        printk(KERN_ERR
                               "NILFS: Unrecognized mount option \"%s\"\n", p);
@@ -613,11 +720,18 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi,
 static int nilfs_setup_super(struct nilfs_sb_info *sbi)
 {
        struct the_nilfs *nilfs = sbi->s_nilfs;
-       struct nilfs_super_block *sbp = nilfs->ns_sbp[0];
-       int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count);
-       int mnt_count = le16_to_cpu(sbp->s_mnt_count);
+       struct nilfs_super_block **sbp;
+       int max_mnt_count;
+       int mnt_count;
+
+       /* nilfs->ns_sem must be locked by the caller. */
+       sbp = nilfs_prepare_super(sbi, 0);
+       if (!sbp)
+               return -EIO;
+
+       max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count);
+       mnt_count = le16_to_cpu(sbp[0]->s_mnt_count);
 
-       /* nilfs->sem must be locked by the caller. */
        if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
                printk(KERN_WARNING
                       "NILFS warning: mounting fs with errors\n");
@@ -628,12 +742,15 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi)
 #endif
        }
        if (!max_mnt_count)
-               sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
-
-       sbp->s_mnt_count = cpu_to_le16(mnt_count + 1);
-       sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS);
-       sbp->s_mtime = cpu_to_le64(get_seconds());
-       return nilfs_commit_super(sbi, 1);
+               sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
+
+       sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1);
+       sbp[0]->s_state =
+               cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS);
+       sbp[0]->s_mtime = cpu_to_le64(get_seconds());
+       /* synchronize sbp[1] with sbp[0] */
+       memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
+       return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL);
 }
 
 struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
@@ -670,7 +787,31 @@ int nilfs_store_magic_and_option(struct super_block *sb,
        sbi->s_interval = le32_to_cpu(sbp->s_c_interval);
        sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max);
 
-       return !parse_options(data, sb) ? -EINVAL : 0 ;
+       return !parse_options(data, sb, 0) ? -EINVAL : 0 ;
+}
+
+int nilfs_check_feature_compatibility(struct super_block *sb,
+                                     struct nilfs_super_block *sbp)
+{
+       __u64 features;
+
+       features = le64_to_cpu(sbp->s_feature_incompat) &
+               ~NILFS_FEATURE_INCOMPAT_SUPP;
+       if (features) {
+               printk(KERN_ERR "NILFS: couldn't mount because of unsupported "
+                      "optional features (%llx)\n",
+                      (unsigned long long)features);
+               return -EINVAL;
+       }
+       features = le64_to_cpu(sbp->s_feature_compat_ro) &
+               ~NILFS_FEATURE_COMPAT_RO_SUPP;
+       if (!(sb->s_flags & MS_RDONLY) && features) {
+               printk(KERN_ERR "NILFS: couldn't mount RDWR because of "
+                      "unsupported optional features (%llx)\n",
+                      (unsigned long long)features);
+               return -EINVAL;
+       }
+       return 0;
 }
 
 /**
@@ -819,7 +960,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
 static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 {
        struct nilfs_sb_info *sbi = NILFS_SB(sb);
-       struct nilfs_super_block *sbp;
        struct the_nilfs *nilfs = sbi->s_nilfs;
        unsigned long old_sb_flags;
        struct nilfs_mount_options old_opts;
@@ -833,32 +973,17 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
        old_opts.snapshot_cno = sbi->s_snapshot_cno;
        was_snapshot = nilfs_test_opt(sbi, SNAPSHOT);
 
-       if (!parse_options(data, sb)) {
+       if (!parse_options(data, sb, 1)) {
                err = -EINVAL;
                goto restore_opts;
        }
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL);
 
        err = -EINVAL;
-       if (was_snapshot) {
-               if (!(*flags & MS_RDONLY)) {
-                       printk(KERN_ERR "NILFS (device %s): cannot remount "
-                              "snapshot read/write.\n",
-                              sb->s_id);
-                       goto restore_opts;
-               } else if (sbi->s_snapshot_cno != old_opts.snapshot_cno) {
-                       printk(KERN_ERR "NILFS (device %s): cannot "
-                              "remount to a different snapshot.\n",
-                              sb->s_id);
-                       goto restore_opts;
-               }
-       } else {
-               if (nilfs_test_opt(sbi, SNAPSHOT)) {
-                       printk(KERN_ERR "NILFS (device %s): cannot change "
-                              "a regular mount to a snapshot.\n",
-                              sb->s_id);
-                       goto restore_opts;
-               }
+       if (was_snapshot && !(*flags & MS_RDONLY)) {
+               printk(KERN_ERR "NILFS (device %s): cannot remount snapshot "
+                      "read/write.\n", sb->s_id);
+               goto restore_opts;
        }
 
        if (!nilfs_valid_fs(nilfs)) {
@@ -880,19 +1005,29 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
                 * the RDONLY flag and then mark the partition as valid again.
                 */
                down_write(&nilfs->ns_sem);
-               sbp = nilfs->ns_sbp[0];
-               if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) &&
-                   (nilfs->ns_mount_state & NILFS_VALID_FS))
-                       sbp->s_state = cpu_to_le16(nilfs->ns_mount_state);
-               sbp->s_mtime = cpu_to_le64(get_seconds());
-               nilfs_commit_super(sbi, 1);
+               nilfs_cleanup_super(sbi);
                up_write(&nilfs->ns_sem);
        } else {
+               __u64 features;
+
                /*
                 * Mounting a RDONLY partition read-write, so reread and
                 * store the current valid flag.  (It may have been changed
                 * by fsck since we originally mounted the partition.)
                 */
+               down_read(&nilfs->ns_sem);
+               features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) &
+                       ~NILFS_FEATURE_COMPAT_RO_SUPP;
+               up_read(&nilfs->ns_sem);
+               if (features) {
+                       printk(KERN_WARNING "NILFS (device %s): couldn't "
+                              "remount RDWR because of unsupported optional "
+                              "features (%llx)\n",
+                              sb->s_id, (unsigned long long)features);
+                       err = -EROFS;
+                       goto restore_opts;
+               }
+
                sb->s_flags &= ~MS_RDONLY;
 
                err = nilfs_attach_segment_constructor(sbi);
@@ -1119,7 +1254,7 @@ static void nilfs_inode_init_once(void *obj)
        init_rwsem(&ii->xattr_sem);
 #endif
        nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
-       ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union;
+       ii->i_bmap = &ii->i_bmap_data;
        inode_init_once(&ii->vfs_inode);
 }
 
index 8c10973..37de1f0 100644 (file)
@@ -38,6 +38,8 @@
 static LIST_HEAD(nilfs_objects);
 static DEFINE_SPINLOCK(nilfs_lock);
 
+static int nilfs_valid_sb(struct nilfs_super_block *sbp);
+
 void nilfs_set_last_segment(struct the_nilfs *nilfs,
                            sector_t start_blocknr, u64 seq, __u64 cno)
 {
@@ -45,6 +47,16 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs,
        nilfs->ns_last_pseg = start_blocknr;
        nilfs->ns_last_seq = seq;
        nilfs->ns_last_cno = cno;
+
+       if (!nilfs_sb_dirty(nilfs)) {
+               if (nilfs->ns_prev_seq == nilfs->ns_last_seq)
+                       goto stay_cursor;
+
+               set_nilfs_sb_dirty(nilfs);
+       }
+       nilfs->ns_prev_seq = nilfs->ns_last_seq;
+
+ stay_cursor:
        spin_unlock(&nilfs->ns_last_segment_lock);
 }
 
@@ -159,8 +171,7 @@ void put_nilfs(struct the_nilfs *nilfs)
        kfree(nilfs);
 }
 
-static int nilfs_load_super_root(struct the_nilfs *nilfs,
-                                struct nilfs_sb_info *sbi, sector_t sr_block)
+static int nilfs_load_super_root(struct the_nilfs *nilfs, sector_t sr_block)
 {
        struct buffer_head *bh_sr;
        struct nilfs_super_root *raw_sr;
@@ -169,7 +180,7 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
        unsigned inode_size;
        int err;
 
-       err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1);
+       err = nilfs_read_super_root_block(nilfs, sr_block, &bh_sr, 1);
        if (unlikely(err))
                return err;
 
@@ -248,6 +259,37 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri)
 }
 
 /**
+ * nilfs_store_log_cursor - load log cursor from a super block
+ * @nilfs: nilfs object
+ * @sbp: buffer storing super block to be read
+ *
+ * nilfs_store_log_cursor() reads the last position of the log
+ * containing a super root from a given super block, and initializes
+ * relevant information on the nilfs object preparatory for log
+ * scanning and recovery.
+ */
+static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
+                                 struct nilfs_super_block *sbp)
+{
+       int ret = 0;
+
+       nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg);
+       nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno);
+       nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq);
+
+       nilfs->ns_prev_seq = nilfs->ns_last_seq;
+       nilfs->ns_seg_seq = nilfs->ns_last_seq;
+       nilfs->ns_segnum =
+               nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg);
+       nilfs->ns_cno = nilfs->ns_last_cno + 1;
+       if (nilfs->ns_segnum >= nilfs->ns_nsegments) {
+               printk(KERN_ERR "NILFS invalid last segment number.\n");
+               ret = -EINVAL;
+       }
+       return ret;
+}
+
+/**
  * load_nilfs - load and recover the nilfs
  * @nilfs: the_nilfs structure to be released
  * @sbi: nilfs_sb_info used to recover past segment
@@ -285,13 +327,55 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
 
        nilfs_init_recovery_info(&ri);
 
-       err = nilfs_search_super_root(nilfs, sbi, &ri);
+       err = nilfs_search_super_root(nilfs, &ri);
        if (unlikely(err)) {
-               printk(KERN_ERR "NILFS: error searching super root.\n");
-               goto failed;
+               struct nilfs_super_block **sbp = nilfs->ns_sbp;
+               int blocksize;
+
+               if (err != -EINVAL)
+                       goto scan_error;
+
+               if (!nilfs_valid_sb(sbp[1])) {
+                       printk(KERN_WARNING
+                              "NILFS warning: unable to fall back to spare"
+                              "super block\n");
+                       goto scan_error;
+               }
+               printk(KERN_INFO
+                      "NILFS: try rollback from an earlier position\n");
+
+               /*
+                * restore super block with its spare and reconfigure
+                * relevant states of the nilfs object.
+                */
+               memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
+               nilfs->ns_crc_seed = le32_to_cpu(sbp[0]->s_crc_seed);
+               nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
+
+               /* verify consistency between two super blocks */
+               blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size);
+               if (blocksize != nilfs->ns_blocksize) {
+                       printk(KERN_WARNING
+                              "NILFS warning: blocksize differs between "
+                              "two super blocks (%d != %d)\n",
+                              blocksize, nilfs->ns_blocksize);
+                       goto scan_error;
+               }
+
+               err = nilfs_store_log_cursor(nilfs, sbp[0]);
+               if (err)
+                       goto scan_error;
+
+               /* drop clean flag to allow roll-forward and recovery */
+               nilfs->ns_mount_state &= ~NILFS_VALID_FS;
+               valid_fs = 0;
+
+               err = nilfs_search_super_root(nilfs, &ri);
+               if (err)
+                       goto scan_error;
        }
 
-       err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root);
+       err = nilfs_load_super_root(nilfs, ri.ri_super_root);
        if (unlikely(err)) {
                printk(KERN_ERR "NILFS: error loading super root.\n");
                goto failed;
@@ -301,11 +385,23 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
                goto skip_recovery;
 
        if (s_flags & MS_RDONLY) {
+               __u64 features;
+
                if (nilfs_test_opt(sbi, NORECOVERY)) {
                        printk(KERN_INFO "NILFS: norecovery option specified. "
                               "skipping roll-forward recovery\n");
                        goto skip_recovery;
                }
+               features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) &
+                       ~NILFS_FEATURE_COMPAT_RO_SUPP;
+               if (features) {
+                       printk(KERN_ERR "NILFS: couldn't proceed with "
+                              "recovery because of unsupported optional "
+                              "features (%llx)\n",
+                              (unsigned long long)features);
+                       err = -EROFS;
+                       goto failed_unload;
+               }
                if (really_read_only) {
                        printk(KERN_ERR "NILFS: write access "
                               "unavailable, cannot proceed.\n");
@@ -320,14 +416,13 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
                goto failed_unload;
        }
 
-       err = nilfs_recover_logical_segments(nilfs, sbi, &ri);
+       err = nilfs_salvage_orphan_logs(nilfs, sbi, &ri);
        if (err)
                goto failed_unload;
 
        down_write(&nilfs->ns_sem);
-       nilfs->ns_mount_state |= NILFS_VALID_FS;
-       nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
-       err = nilfs_commit_super(sbi, 1);
+       nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */
+       err = nilfs_cleanup_super(sbi);
        up_write(&nilfs->ns_sem);
 
        if (err) {
@@ -343,6 +438,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
        sbi->s_super->s_flags = s_flags;
        return 0;
 
+ scan_error:
+       printk(KERN_ERR "NILFS: error searching super root.\n");
+       goto failed;
+
  failed_unload:
        nilfs_mdt_destroy(nilfs->ns_cpfile);
        nilfs_mdt_destroy(nilfs->ns_sufile);
@@ -515,8 +614,8 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
                nilfs_swap_super_block(nilfs);
        }
 
-       nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime);
-       nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0;
+       nilfs->ns_sbwcount = 0;
+       nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
        nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq);
        *sbpp = sbp[0];
        return 0;
@@ -557,6 +656,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
                if (err)
                        goto out;
 
+               err = nilfs_check_feature_compatibility(sb, sbp);
+               if (err)
+                       goto out;
+
                blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
                if (sb->s_blocksize != blocksize &&
                    !sb_set_blocksize(sb, blocksize)) {
@@ -568,7 +671,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
                goto out;
        }
 
-       blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
+       blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
        if (!blocksize) {
                printk(KERN_ERR "NILFS: unable to set blocksize\n");
                err = -EINVAL;
@@ -582,7 +685,18 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
        if (err)
                goto failed_sbh;
 
+       err = nilfs_check_feature_compatibility(sb, sbp);
+       if (err)
+               goto failed_sbh;
+
        blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
+       if (blocksize < NILFS_MIN_BLOCK_SIZE ||
+           blocksize > NILFS_MAX_BLOCK_SIZE) {
+               printk(KERN_ERR "NILFS: couldn't mount because of unsupported "
+                      "filesystem blocksize %d\n", blocksize);
+               err = -EINVAL;
+               goto failed_sbh;
+       }
        if (sb->s_blocksize != blocksize) {
                int hw_blocksize = bdev_logical_block_size(sb->s_bdev);
 
@@ -604,6 +718,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
                           when reloading fails. */
        }
        nilfs->ns_blocksize_bits = sb->s_blocksize_bits;
+       nilfs->ns_blocksize = blocksize;
 
        err = nilfs_store_disk_layout(nilfs, sbp);
        if (err)
@@ -616,23 +731,9 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
        bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
        nilfs->ns_bdi = bdi ? : &default_backing_dev_info;
 
-       /* Finding last segment */
-       nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg);
-       nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno);
-       nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq);
-
-       nilfs->ns_seg_seq = nilfs->ns_last_seq;
-       nilfs->ns_segnum =
-               nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg);
-       nilfs->ns_cno = nilfs->ns_last_cno + 1;
-       if (nilfs->ns_segnum >= nilfs->ns_nsegments) {
-               printk(KERN_ERR "NILFS invalid last segment number.\n");
-               err = -EINVAL;
+       err = nilfs_store_log_cursor(nilfs, sbp);
+       if (err)
                goto failed_sbh;
-       }
-       /* Dummy values  */
-       nilfs->ns_free_segments_count =
-               nilfs->ns_nsegments - (nilfs->ns_segnum + 1);
 
        /* Initialize gcinode cache */
        err = nilfs_init_gccache(nilfs);
index 1ab9745..f785a7b 100644 (file)
@@ -57,7 +57,8 @@ enum {
  * @ns_current: back pointer to current mount
  * @ns_sbh: buffer heads of on-disk super blocks
  * @ns_sbp: pointers to super block data
- * @ns_sbwtime: previous write time of super blocks
+ * @ns_sbwtime: previous write time of super block
+ * @ns_sbwcount: write count of super block
  * @ns_sbsize: size of valid data in super block
  * @ns_supers: list of nilfs super block structs
  * @ns_seg_seq: segment sequence counter
@@ -73,7 +74,7 @@ enum {
  * @ns_last_seq: sequence value of the latest segment
  * @ns_last_cno: checkpoint number of the latest segment
  * @ns_prot_seq: least sequence number of segments which must not be reclaimed
- * @ns_free_segments_count: counter of free segments
+ * @ns_prev_seq: base sequence number used to decide if advance log cursor
  * @ns_segctor_sem: segment constructor semaphore
  * @ns_dat: DAT file inode
  * @ns_cpfile: checkpoint file inode
@@ -82,6 +83,7 @@ enum {
  * @ns_gc_inodes: dummy inodes to keep live blocks
  * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks
  * @ns_blocksize_bits: bit length of block size
+ * @ns_blocksize: block size
  * @ns_nsegments: number of segments in filesystem
  * @ns_blocks_per_segment: number of blocks per segment
  * @ns_r_segments_percentage: reserved segments percentage
@@ -119,7 +121,8 @@ struct the_nilfs {
         */
        struct buffer_head     *ns_sbh[2];
        struct nilfs_super_block *ns_sbp[2];
-       time_t                  ns_sbwtime[2];
+       time_t                  ns_sbwtime;
+       unsigned                ns_sbwcount;
        unsigned                ns_sbsize;
        unsigned                ns_mount_state;
 
@@ -149,7 +152,7 @@ struct the_nilfs {
        u64                     ns_last_seq;
        __u64                   ns_last_cno;
        u64                     ns_prot_seq;
-       unsigned long           ns_free_segments_count;
+       u64                     ns_prev_seq;
 
        struct rw_semaphore     ns_segctor_sem;
 
@@ -168,6 +171,7 @@ struct the_nilfs {
 
        /* Disk layout information (static) */
        unsigned int            ns_blocksize_bits;
+       unsigned int            ns_blocksize;
        unsigned long           ns_nsegments;
        unsigned long           ns_blocks_per_segment;
        unsigned long           ns_r_segments_percentage;
@@ -203,20 +207,17 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
 
 /* Minimum interval of periodical update of superblocks (in seconds) */
 #define NILFS_SB_FREQ          10
-#define NILFS_ALTSB_FREQ       60  /* spare superblock */
 
 static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
 {
        u64 t = get_seconds();
-       return t < nilfs->ns_sbwtime[0] ||
-                t > nilfs->ns_sbwtime[0] + NILFS_SB_FREQ;
+       return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ;
 }
 
-static inline int nilfs_altsb_need_update(struct the_nilfs *nilfs)
+static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
 {
-       u64 t = get_seconds();
-       struct nilfs_super_block **sbp = nilfs->ns_sbp;
-       return sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ;
+       int flip_bits = nilfs->ns_sbwcount & 0x0FL;
+       return (flip_bits != 0x08 && flip_bits != 0x0F);
 }
 
 void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
index 8c2c611..f5487b6 100644 (file)
@@ -160,7 +160,7 @@ struct nilfs_super_root {
  * struct nilfs_super_block - structure of super block on disk
  */
 struct nilfs_super_block {
-       __le32  s_rev_level;            /* Revision level */
+/*00*/ __le32  s_rev_level;            /* Revision level */
        __le16  s_minor_rev_level;      /* minor revision level */
        __le16  s_magic;                /* Magic signature */
 
@@ -169,50 +169,53 @@ struct nilfs_super_block {
                                           is excluded. */
        __le16  s_flags;                /* flags */
        __le32  s_crc_seed;             /* Seed value of CRC calculation */
-       __le32  s_sum;                  /* Check sum of super block */
+/*10*/ __le32  s_sum;                  /* Check sum of super block */
 
        __le32  s_log_block_size;       /* Block size represented as follows
                                           blocksize =
                                               1 << (s_log_block_size + 10) */
        __le64  s_nsegments;            /* Number of segments in filesystem */
-       __le64  s_dev_size;             /* block device size in bytes */
+/*20*/ __le64  s_dev_size;             /* block device size in bytes */
        __le64  s_first_data_block;     /* 1st seg disk block number */
-       __le32  s_blocks_per_segment;   /* number of blocks per full segment */
+/*30*/ __le32  s_blocks_per_segment;   /* number of blocks per full segment */
        __le32  s_r_segments_percentage; /* Reserved segments percentage */
 
        __le64  s_last_cno;             /* Last checkpoint number */
-       __le64  s_last_pseg;            /* disk block addr pseg written last */
+/*40*/ __le64  s_last_pseg;            /* disk block addr pseg written last */
        __le64  s_last_seq;             /* seq. number of seg written last */
-       __le64  s_free_blocks_count;    /* Free blocks count */
+/*50*/ __le64  s_free_blocks_count;    /* Free blocks count */
 
        __le64  s_ctime;                /* Creation time (execution time of
                                           newfs) */
-       __le64  s_mtime;                /* Mount time */
+/*60*/ __le64  s_mtime;                /* Mount time */
        __le64  s_wtime;                /* Write time */
-       __le16  s_mnt_count;            /* Mount count */
+/*70*/ __le16  s_mnt_count;            /* Mount count */
        __le16  s_max_mnt_count;        /* Maximal mount count */
        __le16  s_state;                /* File system state */
        __le16  s_errors;               /* Behaviour when detecting errors */
        __le64  s_lastcheck;            /* time of last check */
 
-       __le32  s_checkinterval;        /* max. time between checks */
+/*80*/ __le32  s_checkinterval;        /* max. time between checks */
        __le32  s_creator_os;           /* OS */
        __le16  s_def_resuid;           /* Default uid for reserved blocks */
        __le16  s_def_resgid;           /* Default gid for reserved blocks */
        __le32  s_first_ino;            /* First non-reserved inode */
 
-       __le16  s_inode_size;           /* Size of an inode */
+/*90*/ __le16  s_inode_size;           /* Size of an inode */
        __le16  s_dat_entry_size;       /* Size of a dat entry */
        __le16  s_checkpoint_size;      /* Size of a checkpoint */
        __le16  s_segment_usage_size;   /* Size of a segment usage */
 
-       __u8    s_uuid[16];             /* 128-bit uuid for volume */
-       char    s_volume_name[80];      /* volume name */
+/*98*/ __u8    s_uuid[16];             /* 128-bit uuid for volume */
+/*A8*/ char    s_volume_name[80];      /* volume name */
 
-       __le32  s_c_interval;           /* Commit interval of segment */
+/*F8*/ __le32  s_c_interval;           /* Commit interval of segment */
        __le32  s_c_block_max;          /* Threshold of data amount for
                                           the segment construction */
-       __u32   s_reserved[192];        /* padding to the end of the block */
+/*100*/        __le64  s_feature_compat;       /* Compatible feature set */
+       __le64  s_feature_compat_ro;    /* Read-only compatible feature set */
+       __le64  s_feature_incompat;     /* Incompatible feature set */
+       __u32   s_reserved[186];        /* padding to the end of the block */
 };
 
 /*
@@ -228,6 +231,16 @@ struct nilfs_super_block {
 #define NILFS_MINOR_REV                0       /* minor revision */
 
 /*
+ * Feature set definitions
+ *
+ * If there is a bit set in the incompatible feature set that the kernel
+ * doesn't know about, it should refuse to mount the filesystem.
+ */
+#define NILFS_FEATURE_COMPAT_SUPP      0ULL
+#define NILFS_FEATURE_COMPAT_RO_SUPP   0ULL
+#define NILFS_FEATURE_INCOMPAT_SUPP    0ULL
+
+/*
  * Bytes count of super_block for CRC-calculation
  */
 #define NILFS_SB_BYTES  \
@@ -274,6 +287,12 @@ struct nilfs_super_block {
 #define NILFS_NAME_LEN 255
 
 /*
+ * Block size limitations
+ */
+#define NILFS_MIN_BLOCK_SIZE           1024
+#define NILFS_MAX_BLOCK_SIZE           65536
+
+/*
  * The new version of the directory entry.  Since V0 structures are
  * stored in intel byte order, and the name_len field could never be
  * bigger than 255 chars, it's safe to reclaim the extra byte for the
@@ -313,7 +332,25 @@ enum {
 #define NILFS_DIR_ROUND                        (NILFS_DIR_PAD - 1)
 #define NILFS_DIR_REC_LEN(name_len)    (((name_len) + 12 + NILFS_DIR_ROUND) & \
                                        ~NILFS_DIR_ROUND)
+#define NILFS_MAX_REC_LEN              ((1<<16)-1)
 
+static inline unsigned nilfs_rec_len_from_disk(__le16 dlen)
+{
+       unsigned len = le16_to_cpu(dlen);
+
+       if (len == NILFS_MAX_REC_LEN)
+               return 1 << 16;
+       return len;
+}
+
+static inline __le16 nilfs_rec_len_to_disk(unsigned len)
+{
+       if (len == (1 << 16))
+               return cpu_to_le16(NILFS_MAX_REC_LEN);
+       else if (len > (1 << 16))
+               BUG();
+       return cpu_to_le16(len);
+}
 
 /**
  * struct nilfs_finfo - file information