RISC-V: Fix a race condition during kernel stack overflow
[platform/kernel/linux-starfive.git] / fs / nilfs2 / btree.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * NILFS B-tree.
4  *
5  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6  *
7  * Written by Koji Sato.
8  */
9
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/pagevec.h>
14 #include "nilfs.h"
15 #include "page.h"
16 #include "btnode.h"
17 #include "btree.h"
18 #include "alloc.h"
19 #include "dat.h"
20
21 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
22
23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
24 {
25         struct nilfs_btree_path *path;
26         int level = NILFS_BTREE_LEVEL_DATA;
27
28         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
29         if (path == NULL)
30                 goto out;
31
32         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
33                 path[level].bp_bh = NULL;
34                 path[level].bp_sib_bh = NULL;
35                 path[level].bp_index = 0;
36                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
37                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
38                 path[level].bp_op = NULL;
39         }
40
41 out:
42         return path;
43 }
44
45 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
46 {
47         int level = NILFS_BTREE_LEVEL_DATA;
48
49         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
50                 brelse(path[level].bp_bh);
51
52         kmem_cache_free(nilfs_btree_path_cache, path);
53 }
54
55 /*
56  * B-tree node operations
57  */
58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
59                                      __u64 ptr, struct buffer_head **bhp)
60 {
61         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
62         struct address_space *btnc = btnc_inode->i_mapping;
63         struct buffer_head *bh;
64
65         bh = nilfs_btnode_create_block(btnc, ptr);
66         if (!bh)
67                 return -ENOMEM;
68
69         set_buffer_nilfs_volatile(bh);
70         *bhp = bh;
71         return 0;
72 }
73
74 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
75 {
76         return node->bn_flags;
77 }
78
79 static void
80 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
81 {
82         node->bn_flags = flags;
83 }
84
85 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
86 {
87         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
88 }
89
90 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
91 {
92         return node->bn_level;
93 }
94
95 static void
96 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
97 {
98         node->bn_level = level;
99 }
100
101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
102 {
103         return le16_to_cpu(node->bn_nchildren);
104 }
105
106 static void
107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
108 {
109         node->bn_nchildren = cpu_to_le16(nchildren);
110 }
111
112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
113 {
114         return i_blocksize(btree->b_inode);
115 }
116
117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
118 {
119         return btree->b_nchildren_per_block;
120 }
121
122 static __le64 *
123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
124 {
125         return (__le64 *)((char *)(node + 1) +
126                           (nilfs_btree_node_root(node) ?
127                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
128 }
129
130 static __le64 *
131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
132 {
133         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
134 }
135
136 static __u64
137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
138 {
139         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
140 }
141
142 static void
143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
144 {
145         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
146 }
147
148 static __u64
149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
150                          int ncmax)
151 {
152         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
153 }
154
155 static void
156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
157                          int ncmax)
158 {
159         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
160 }
161
162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
163                                   int level, int nchildren, int ncmax,
164                                   const __u64 *keys, const __u64 *ptrs)
165 {
166         __le64 *dkeys;
167         __le64 *dptrs;
168         int i;
169
170         nilfs_btree_node_set_flags(node, flags);
171         nilfs_btree_node_set_level(node, level);
172         nilfs_btree_node_set_nchildren(node, nchildren);
173
174         dkeys = nilfs_btree_node_dkeys(node);
175         dptrs = nilfs_btree_node_dptrs(node, ncmax);
176         for (i = 0; i < nchildren; i++) {
177                 dkeys[i] = cpu_to_le64(keys[i]);
178                 dptrs[i] = cpu_to_le64(ptrs[i]);
179         }
180 }
181
182 /* Assume the buffer heads corresponding to left and right are locked. */
183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
184                                        struct nilfs_btree_node *right,
185                                        int n, int lncmax, int rncmax)
186 {
187         __le64 *ldkeys, *rdkeys;
188         __le64 *ldptrs, *rdptrs;
189         int lnchildren, rnchildren;
190
191         ldkeys = nilfs_btree_node_dkeys(left);
192         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
193         lnchildren = nilfs_btree_node_get_nchildren(left);
194
195         rdkeys = nilfs_btree_node_dkeys(right);
196         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
197         rnchildren = nilfs_btree_node_get_nchildren(right);
198
199         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
200         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
201         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
202         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
203
204         lnchildren += n;
205         rnchildren -= n;
206         nilfs_btree_node_set_nchildren(left, lnchildren);
207         nilfs_btree_node_set_nchildren(right, rnchildren);
208 }
209
210 /* Assume that the buffer heads corresponding to left and right are locked. */
211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
212                                         struct nilfs_btree_node *right,
213                                         int n, int lncmax, int rncmax)
214 {
215         __le64 *ldkeys, *rdkeys;
216         __le64 *ldptrs, *rdptrs;
217         int lnchildren, rnchildren;
218
219         ldkeys = nilfs_btree_node_dkeys(left);
220         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
221         lnchildren = nilfs_btree_node_get_nchildren(left);
222
223         rdkeys = nilfs_btree_node_dkeys(right);
224         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
225         rnchildren = nilfs_btree_node_get_nchildren(right);
226
227         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
228         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
229         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
230         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
231
232         lnchildren -= n;
233         rnchildren += n;
234         nilfs_btree_node_set_nchildren(left, lnchildren);
235         nilfs_btree_node_set_nchildren(right, rnchildren);
236 }
237
238 /* Assume that the buffer head corresponding to node is locked. */
239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
240                                     __u64 key, __u64 ptr, int ncmax)
241 {
242         __le64 *dkeys;
243         __le64 *dptrs;
244         int nchildren;
245
246         dkeys = nilfs_btree_node_dkeys(node);
247         dptrs = nilfs_btree_node_dptrs(node, ncmax);
248         nchildren = nilfs_btree_node_get_nchildren(node);
249         if (index < nchildren) {
250                 memmove(dkeys + index + 1, dkeys + index,
251                         (nchildren - index) * sizeof(*dkeys));
252                 memmove(dptrs + index + 1, dptrs + index,
253                         (nchildren - index) * sizeof(*dptrs));
254         }
255         dkeys[index] = cpu_to_le64(key);
256         dptrs[index] = cpu_to_le64(ptr);
257         nchildren++;
258         nilfs_btree_node_set_nchildren(node, nchildren);
259 }
260
261 /* Assume that the buffer head corresponding to node is locked. */
262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
263                                     __u64 *keyp, __u64 *ptrp, int ncmax)
264 {
265         __u64 key;
266         __u64 ptr;
267         __le64 *dkeys;
268         __le64 *dptrs;
269         int nchildren;
270
271         dkeys = nilfs_btree_node_dkeys(node);
272         dptrs = nilfs_btree_node_dptrs(node, ncmax);
273         key = le64_to_cpu(dkeys[index]);
274         ptr = le64_to_cpu(dptrs[index]);
275         nchildren = nilfs_btree_node_get_nchildren(node);
276         if (keyp != NULL)
277                 *keyp = key;
278         if (ptrp != NULL)
279                 *ptrp = ptr;
280
281         if (index < nchildren - 1) {
282                 memmove(dkeys + index, dkeys + index + 1,
283                         (nchildren - index - 1) * sizeof(*dkeys));
284                 memmove(dptrs + index, dptrs + index + 1,
285                         (nchildren - index - 1) * sizeof(*dptrs));
286         }
287         nchildren--;
288         nilfs_btree_node_set_nchildren(node, nchildren);
289 }
290
291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
292                                    __u64 key, int *indexp)
293 {
294         __u64 nkey;
295         int index, low, high, s;
296
297         /* binary search */
298         low = 0;
299         high = nilfs_btree_node_get_nchildren(node) - 1;
300         index = 0;
301         s = 0;
302         while (low <= high) {
303                 index = (low + high) / 2;
304                 nkey = nilfs_btree_node_get_key(node, index);
305                 if (nkey == key) {
306                         s = 0;
307                         goto out;
308                 } else if (nkey < key) {
309                         low = index + 1;
310                         s = -1;
311                 } else {
312                         high = index - 1;
313                         s = 1;
314                 }
315         }
316
317         /* adjust index */
318         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
319                 if (s > 0 && index > 0)
320                         index--;
321         } else if (s < 0)
322                 index++;
323
324  out:
325         *indexp = index;
326
327         return s == 0;
328 }
329
330 /**
331  * nilfs_btree_node_broken - verify consistency of btree node
332  * @node: btree node block to be examined
333  * @size: node size (in bytes)
334  * @inode: host inode of btree
335  * @blocknr: block number
336  *
337  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
338  */
339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
340                                    size_t size, struct inode *inode,
341                                    sector_t blocknr)
342 {
343         int level, flags, nchildren;
344         int ret = 0;
345
346         level = nilfs_btree_node_get_level(node);
347         flags = nilfs_btree_node_get_flags(node);
348         nchildren = nilfs_btree_node_get_nchildren(node);
349
350         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
351                      level >= NILFS_BTREE_LEVEL_MAX ||
352                      (flags & NILFS_BTREE_NODE_ROOT) ||
353                      nchildren < 0 ||
354                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
355                 nilfs_crit(inode->i_sb,
356                            "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
357                            inode->i_ino, (unsigned long long)blocknr, level,
358                            flags, nchildren);
359                 ret = 1;
360         }
361         return ret;
362 }
363
364 /**
365  * nilfs_btree_root_broken - verify consistency of btree root node
366  * @node: btree root node to be examined
367  * @inode: host inode of btree
368  *
369  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
370  */
371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
372                                    struct inode *inode)
373 {
374         int level, flags, nchildren;
375         int ret = 0;
376
377         level = nilfs_btree_node_get_level(node);
378         flags = nilfs_btree_node_get_flags(node);
379         nchildren = nilfs_btree_node_get_nchildren(node);
380
381         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
382                      level >= NILFS_BTREE_LEVEL_MAX ||
383                      nchildren < 0 ||
384                      nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
385                 nilfs_crit(inode->i_sb,
386                            "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
387                            inode->i_ino, level, flags, nchildren);
388                 ret = 1;
389         }
390         return ret;
391 }
392
393 int nilfs_btree_broken_node_block(struct buffer_head *bh)
394 {
395         struct inode *inode;
396         int ret;
397
398         if (buffer_nilfs_checked(bh))
399                 return 0;
400
401         inode = bh->b_page->mapping->host;
402         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
403                                       bh->b_size, inode, bh->b_blocknr);
404         if (likely(!ret))
405                 set_buffer_nilfs_checked(bh);
406         return ret;
407 }
408
409 static struct nilfs_btree_node *
410 nilfs_btree_get_root(const struct nilfs_bmap *btree)
411 {
412         return (struct nilfs_btree_node *)btree->b_u.u_data;
413 }
414
415 static struct nilfs_btree_node *
416 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
417 {
418         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
419 }
420
421 static struct nilfs_btree_node *
422 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
423 {
424         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
425 }
426
427 static int nilfs_btree_height(const struct nilfs_bmap *btree)
428 {
429         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
430 }
431
432 static struct nilfs_btree_node *
433 nilfs_btree_get_node(const struct nilfs_bmap *btree,
434                      const struct nilfs_btree_path *path,
435                      int level, int *ncmaxp)
436 {
437         struct nilfs_btree_node *node;
438
439         if (level == nilfs_btree_height(btree) - 1) {
440                 node = nilfs_btree_get_root(btree);
441                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
442         } else {
443                 node = nilfs_btree_get_nonroot_node(path, level);
444                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
445         }
446         return node;
447 }
448
449 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
450                                 struct nilfs_btree_node *node, int level)
451 {
452         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
453                 dump_stack();
454                 nilfs_crit(btree->b_inode->i_sb,
455                            "btree level mismatch (ino=%lu): %d != %d",
456                            btree->b_inode->i_ino,
457                            nilfs_btree_node_get_level(node), level);
458                 return 1;
459         }
460         return 0;
461 }
462
463 struct nilfs_btree_readahead_info {
464         struct nilfs_btree_node *node;  /* parent node */
465         int max_ra_blocks;              /* max nof blocks to read ahead */
466         int index;                      /* current index on the parent node */
467         int ncmax;                      /* nof children in the parent node */
468 };
469
470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471                                    struct buffer_head **bhp,
472                                    const struct nilfs_btree_readahead_info *ra)
473 {
474         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
475         struct address_space *btnc = btnc_inode->i_mapping;
476         struct buffer_head *bh, *ra_bh;
477         sector_t submit_ptr = 0;
478         int ret;
479
480         ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
481                                         &submit_ptr);
482         if (ret) {
483                 if (ret != -EEXIST)
484                         return ret;
485                 goto out_check;
486         }
487
488         if (ra) {
489                 int i, n;
490                 __u64 ptr2;
491
492                 /* read ahead sibling nodes */
493                 for (n = ra->max_ra_blocks, i = ra->index + 1;
494                      n > 0 && i < ra->ncmax; n--, i++) {
495                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
496
497                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
498                                                 REQ_OP_READ | REQ_RAHEAD,
499                                                 &ra_bh, &submit_ptr);
500                         if (likely(!ret || ret == -EEXIST))
501                                 brelse(ra_bh);
502                         else if (ret != -EBUSY)
503                                 break;
504                         if (!buffer_locked(bh))
505                                 goto out_no_wait;
506                 }
507         }
508
509         wait_on_buffer(bh);
510
511  out_no_wait:
512         if (!buffer_uptodate(bh)) {
513                 nilfs_err(btree->b_inode->i_sb,
514                           "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
515                           btree->b_inode->i_ino, (unsigned long long)ptr);
516                 brelse(bh);
517                 return -EIO;
518         }
519
520  out_check:
521         if (nilfs_btree_broken_node_block(bh)) {
522                 clear_buffer_uptodate(bh);
523                 brelse(bh);
524                 return -EINVAL;
525         }
526
527         *bhp = bh;
528         return 0;
529 }
530
531 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
532                                    struct buffer_head **bhp)
533 {
534         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
535 }
536
537 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
538                                  struct nilfs_btree_path *path,
539                                  __u64 key, __u64 *ptrp, int minlevel,
540                                  int readahead)
541 {
542         struct nilfs_btree_node *node;
543         struct nilfs_btree_readahead_info p, *ra;
544         __u64 ptr;
545         int level, index, found, ncmax, ret;
546
547         node = nilfs_btree_get_root(btree);
548         level = nilfs_btree_node_get_level(node);
549         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
550                 return -ENOENT;
551
552         found = nilfs_btree_node_lookup(node, key, &index);
553         ptr = nilfs_btree_node_get_ptr(node, index,
554                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
555         path[level].bp_bh = NULL;
556         path[level].bp_index = index;
557
558         ncmax = nilfs_btree_nchildren_per_block(btree);
559
560         while (--level >= minlevel) {
561                 ra = NULL;
562                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
563                         p.node = nilfs_btree_get_node(btree, path, level + 1,
564                                                       &p.ncmax);
565                         p.index = index;
566                         p.max_ra_blocks = 7;
567                         ra = &p;
568                 }
569                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
570                                               ra);
571                 if (ret < 0)
572                         return ret;
573
574                 node = nilfs_btree_get_nonroot_node(path, level);
575                 if (nilfs_btree_bad_node(btree, node, level))
576                         return -EINVAL;
577                 if (!found)
578                         found = nilfs_btree_node_lookup(node, key, &index);
579                 else
580                         index = 0;
581                 if (index < ncmax) {
582                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
583                 } else {
584                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
585                         /* insert */
586                         ptr = NILFS_BMAP_INVALID_PTR;
587                 }
588                 path[level].bp_index = index;
589         }
590         if (!found)
591                 return -ENOENT;
592
593         if (ptrp != NULL)
594                 *ptrp = ptr;
595
596         return 0;
597 }
598
599 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
600                                       struct nilfs_btree_path *path,
601                                       __u64 *keyp, __u64 *ptrp)
602 {
603         struct nilfs_btree_node *node;
604         __u64 ptr;
605         int index, level, ncmax, ret;
606
607         node = nilfs_btree_get_root(btree);
608         index = nilfs_btree_node_get_nchildren(node) - 1;
609         if (index < 0)
610                 return -ENOENT;
611         level = nilfs_btree_node_get_level(node);
612         ptr = nilfs_btree_node_get_ptr(node, index,
613                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
614         path[level].bp_bh = NULL;
615         path[level].bp_index = index;
616         ncmax = nilfs_btree_nchildren_per_block(btree);
617
618         for (level--; level > 0; level--) {
619                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
620                 if (ret < 0)
621                         return ret;
622                 node = nilfs_btree_get_nonroot_node(path, level);
623                 if (nilfs_btree_bad_node(btree, node, level))
624                         return -EINVAL;
625                 index = nilfs_btree_node_get_nchildren(node) - 1;
626                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
627                 path[level].bp_index = index;
628         }
629
630         if (keyp != NULL)
631                 *keyp = nilfs_btree_node_get_key(node, index);
632         if (ptrp != NULL)
633                 *ptrp = ptr;
634
635         return 0;
636 }
637
638 /**
639  * nilfs_btree_get_next_key - get next valid key from btree path array
640  * @btree: bmap struct of btree
641  * @path: array of nilfs_btree_path struct
642  * @minlevel: start level
643  * @nextkey: place to store the next valid key
644  *
645  * Return Value: If a next key was found, 0 is returned. Otherwise,
646  * -ENOENT is returned.
647  */
648 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
649                                     const struct nilfs_btree_path *path,
650                                     int minlevel, __u64 *nextkey)
651 {
652         struct nilfs_btree_node *node;
653         int maxlevel = nilfs_btree_height(btree) - 1;
654         int index, next_adj, level;
655
656         /* Next index is already set to bp_index for leaf nodes. */
657         next_adj = 0;
658         for (level = minlevel; level <= maxlevel; level++) {
659                 if (level == maxlevel)
660                         node = nilfs_btree_get_root(btree);
661                 else
662                         node = nilfs_btree_get_nonroot_node(path, level);
663
664                 index = path[level].bp_index + next_adj;
665                 if (index < nilfs_btree_node_get_nchildren(node)) {
666                         /* Next key is in this node */
667                         *nextkey = nilfs_btree_node_get_key(node, index);
668                         return 0;
669                 }
670                 /* For non-leaf nodes, next index is stored at bp_index + 1. */
671                 next_adj = 1;
672         }
673         return -ENOENT;
674 }
675
676 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
677                               __u64 key, int level, __u64 *ptrp)
678 {
679         struct nilfs_btree_path *path;
680         int ret;
681
682         path = nilfs_btree_alloc_path();
683         if (path == NULL)
684                 return -ENOMEM;
685
686         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
687
688         nilfs_btree_free_path(path);
689
690         return ret;
691 }
692
693 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
694                                      __u64 key, __u64 *ptrp,
695                                      unsigned int maxblocks)
696 {
697         struct nilfs_btree_path *path;
698         struct nilfs_btree_node *node;
699         struct inode *dat = NULL;
700         __u64 ptr, ptr2;
701         sector_t blocknr;
702         int level = NILFS_BTREE_LEVEL_NODE_MIN;
703         int ret, cnt, index, maxlevel, ncmax;
704         struct nilfs_btree_readahead_info p;
705
706         path = nilfs_btree_alloc_path();
707         if (path == NULL)
708                 return -ENOMEM;
709
710         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
711         if (ret < 0)
712                 goto out;
713
714         if (NILFS_BMAP_USE_VBN(btree)) {
715                 dat = nilfs_bmap_get_dat(btree);
716                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
717                 if (ret < 0)
718                         goto out;
719                 ptr = blocknr;
720         }
721         cnt = 1;
722         if (cnt == maxblocks)
723                 goto end;
724
725         maxlevel = nilfs_btree_height(btree) - 1;
726         node = nilfs_btree_get_node(btree, path, level, &ncmax);
727         index = path[level].bp_index + 1;
728         for (;;) {
729                 while (index < nilfs_btree_node_get_nchildren(node)) {
730                         if (nilfs_btree_node_get_key(node, index) !=
731                             key + cnt)
732                                 goto end;
733                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
734                         if (dat) {
735                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
736                                 if (ret < 0)
737                                         goto out;
738                                 ptr2 = blocknr;
739                         }
740                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
741                                 goto end;
742                         index++;
743                 }
744                 if (level == maxlevel)
745                         break;
746
747                 /* look-up right sibling node */
748                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
749                 p.index = path[level + 1].bp_index + 1;
750                 p.max_ra_blocks = 7;
751                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
752                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
753                         break;
754                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
755                 path[level + 1].bp_index = p.index;
756
757                 brelse(path[level].bp_bh);
758                 path[level].bp_bh = NULL;
759
760                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
761                                               &p);
762                 if (ret < 0)
763                         goto out;
764                 node = nilfs_btree_get_nonroot_node(path, level);
765                 ncmax = nilfs_btree_nchildren_per_block(btree);
766                 index = 0;
767                 path[level].bp_index = index;
768         }
769  end:
770         *ptrp = ptr;
771         ret = cnt;
772  out:
773         nilfs_btree_free_path(path);
774         return ret;
775 }
776
777 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
778                                     struct nilfs_btree_path *path,
779                                     int level, __u64 key)
780 {
781         if (level < nilfs_btree_height(btree) - 1) {
782                 do {
783                         nilfs_btree_node_set_key(
784                                 nilfs_btree_get_nonroot_node(path, level),
785                                 path[level].bp_index, key);
786                         if (!buffer_dirty(path[level].bp_bh))
787                                 mark_buffer_dirty(path[level].bp_bh);
788                 } while ((path[level].bp_index == 0) &&
789                          (++level < nilfs_btree_height(btree) - 1));
790         }
791
792         /* root */
793         if (level == nilfs_btree_height(btree) - 1) {
794                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
795                                          path[level].bp_index, key);
796         }
797 }
798
799 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
800                                   struct nilfs_btree_path *path,
801                                   int level, __u64 *keyp, __u64 *ptrp)
802 {
803         struct nilfs_btree_node *node;
804         int ncblk;
805
806         if (level < nilfs_btree_height(btree) - 1) {
807                 node = nilfs_btree_get_nonroot_node(path, level);
808                 ncblk = nilfs_btree_nchildren_per_block(btree);
809                 nilfs_btree_node_insert(node, path[level].bp_index,
810                                         *keyp, *ptrp, ncblk);
811                 if (!buffer_dirty(path[level].bp_bh))
812                         mark_buffer_dirty(path[level].bp_bh);
813
814                 if (path[level].bp_index == 0)
815                         nilfs_btree_promote_key(btree, path, level + 1,
816                                                 nilfs_btree_node_get_key(node,
817                                                                          0));
818         } else {
819                 node = nilfs_btree_get_root(btree);
820                 nilfs_btree_node_insert(node, path[level].bp_index,
821                                         *keyp, *ptrp,
822                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
823         }
824 }
825
826 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
827                                    struct nilfs_btree_path *path,
828                                    int level, __u64 *keyp, __u64 *ptrp)
829 {
830         struct nilfs_btree_node *node, *left;
831         int nchildren, lnchildren, n, move, ncblk;
832
833         node = nilfs_btree_get_nonroot_node(path, level);
834         left = nilfs_btree_get_sib_node(path, level);
835         nchildren = nilfs_btree_node_get_nchildren(node);
836         lnchildren = nilfs_btree_node_get_nchildren(left);
837         ncblk = nilfs_btree_nchildren_per_block(btree);
838         move = 0;
839
840         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
841         if (n > path[level].bp_index) {
842                 /* move insert point */
843                 n--;
844                 move = 1;
845         }
846
847         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
848
849         if (!buffer_dirty(path[level].bp_bh))
850                 mark_buffer_dirty(path[level].bp_bh);
851         if (!buffer_dirty(path[level].bp_sib_bh))
852                 mark_buffer_dirty(path[level].bp_sib_bh);
853
854         nilfs_btree_promote_key(btree, path, level + 1,
855                                 nilfs_btree_node_get_key(node, 0));
856
857         if (move) {
858                 brelse(path[level].bp_bh);
859                 path[level].bp_bh = path[level].bp_sib_bh;
860                 path[level].bp_sib_bh = NULL;
861                 path[level].bp_index += lnchildren;
862                 path[level + 1].bp_index--;
863         } else {
864                 brelse(path[level].bp_sib_bh);
865                 path[level].bp_sib_bh = NULL;
866                 path[level].bp_index -= n;
867         }
868
869         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
870 }
871
872 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
873                                     struct nilfs_btree_path *path,
874                                     int level, __u64 *keyp, __u64 *ptrp)
875 {
876         struct nilfs_btree_node *node, *right;
877         int nchildren, rnchildren, n, move, ncblk;
878
879         node = nilfs_btree_get_nonroot_node(path, level);
880         right = nilfs_btree_get_sib_node(path, level);
881         nchildren = nilfs_btree_node_get_nchildren(node);
882         rnchildren = nilfs_btree_node_get_nchildren(right);
883         ncblk = nilfs_btree_nchildren_per_block(btree);
884         move = 0;
885
886         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
887         if (n > nchildren - path[level].bp_index) {
888                 /* move insert point */
889                 n--;
890                 move = 1;
891         }
892
893         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
894
895         if (!buffer_dirty(path[level].bp_bh))
896                 mark_buffer_dirty(path[level].bp_bh);
897         if (!buffer_dirty(path[level].bp_sib_bh))
898                 mark_buffer_dirty(path[level].bp_sib_bh);
899
900         path[level + 1].bp_index++;
901         nilfs_btree_promote_key(btree, path, level + 1,
902                                 nilfs_btree_node_get_key(right, 0));
903         path[level + 1].bp_index--;
904
905         if (move) {
906                 brelse(path[level].bp_bh);
907                 path[level].bp_bh = path[level].bp_sib_bh;
908                 path[level].bp_sib_bh = NULL;
909                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
910                 path[level + 1].bp_index++;
911         } else {
912                 brelse(path[level].bp_sib_bh);
913                 path[level].bp_sib_bh = NULL;
914         }
915
916         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
917 }
918
919 static void nilfs_btree_split(struct nilfs_bmap *btree,
920                               struct nilfs_btree_path *path,
921                               int level, __u64 *keyp, __u64 *ptrp)
922 {
923         struct nilfs_btree_node *node, *right;
924         int nchildren, n, move, ncblk;
925
926         node = nilfs_btree_get_nonroot_node(path, level);
927         right = nilfs_btree_get_sib_node(path, level);
928         nchildren = nilfs_btree_node_get_nchildren(node);
929         ncblk = nilfs_btree_nchildren_per_block(btree);
930         move = 0;
931
932         n = (nchildren + 1) / 2;
933         if (n > nchildren - path[level].bp_index) {
934                 n--;
935                 move = 1;
936         }
937
938         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
939
940         if (!buffer_dirty(path[level].bp_bh))
941                 mark_buffer_dirty(path[level].bp_bh);
942         if (!buffer_dirty(path[level].bp_sib_bh))
943                 mark_buffer_dirty(path[level].bp_sib_bh);
944
945         if (move) {
946                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
947                 nilfs_btree_node_insert(right, path[level].bp_index,
948                                         *keyp, *ptrp, ncblk);
949
950                 *keyp = nilfs_btree_node_get_key(right, 0);
951                 *ptrp = path[level].bp_newreq.bpr_ptr;
952
953                 brelse(path[level].bp_bh);
954                 path[level].bp_bh = path[level].bp_sib_bh;
955                 path[level].bp_sib_bh = NULL;
956         } else {
957                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
958
959                 *keyp = nilfs_btree_node_get_key(right, 0);
960                 *ptrp = path[level].bp_newreq.bpr_ptr;
961
962                 brelse(path[level].bp_sib_bh);
963                 path[level].bp_sib_bh = NULL;
964         }
965
966         path[level + 1].bp_index++;
967 }
968
969 static void nilfs_btree_grow(struct nilfs_bmap *btree,
970                              struct nilfs_btree_path *path,
971                              int level, __u64 *keyp, __u64 *ptrp)
972 {
973         struct nilfs_btree_node *root, *child;
974         int n, ncblk;
975
976         root = nilfs_btree_get_root(btree);
977         child = nilfs_btree_get_sib_node(path, level);
978         ncblk = nilfs_btree_nchildren_per_block(btree);
979
980         n = nilfs_btree_node_get_nchildren(root);
981
982         nilfs_btree_node_move_right(root, child, n,
983                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
984         nilfs_btree_node_set_level(root, level + 1);
985
986         if (!buffer_dirty(path[level].bp_sib_bh))
987                 mark_buffer_dirty(path[level].bp_sib_bh);
988
989         path[level].bp_bh = path[level].bp_sib_bh;
990         path[level].bp_sib_bh = NULL;
991
992         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
993
994         *keyp = nilfs_btree_node_get_key(child, 0);
995         *ptrp = path[level].bp_newreq.bpr_ptr;
996 }
997
998 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
999                                    const struct nilfs_btree_path *path)
1000 {
1001         struct nilfs_btree_node *node;
1002         int level, ncmax;
1003
1004         if (path == NULL)
1005                 return NILFS_BMAP_INVALID_PTR;
1006
1007         /* left sibling */
1008         level = NILFS_BTREE_LEVEL_NODE_MIN;
1009         if (path[level].bp_index > 0) {
1010                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1011                 return nilfs_btree_node_get_ptr(node,
1012                                                 path[level].bp_index - 1,
1013                                                 ncmax);
1014         }
1015
1016         /* parent */
1017         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1018         if (level <= nilfs_btree_height(btree) - 1) {
1019                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1020                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1021                                                 ncmax);
1022         }
1023
1024         return NILFS_BMAP_INVALID_PTR;
1025 }
1026
1027 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1028                                        const struct nilfs_btree_path *path,
1029                                        __u64 key)
1030 {
1031         __u64 ptr;
1032
1033         ptr = nilfs_bmap_find_target_seq(btree, key);
1034         if (ptr != NILFS_BMAP_INVALID_PTR)
1035                 /* sequential access */
1036                 return ptr;
1037
1038         ptr = nilfs_btree_find_near(btree, path);
1039         if (ptr != NILFS_BMAP_INVALID_PTR)
1040                 /* near */
1041                 return ptr;
1042
1043         /* block group */
1044         return nilfs_bmap_find_target_in_group(btree);
1045 }
1046
1047 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1048                                       struct nilfs_btree_path *path,
1049                                       int *levelp, __u64 key, __u64 ptr,
1050                                       struct nilfs_bmap_stats *stats)
1051 {
1052         struct buffer_head *bh;
1053         struct nilfs_btree_node *node, *parent, *sib;
1054         __u64 sibptr;
1055         int pindex, level, ncmax, ncblk, ret;
1056         struct inode *dat = NULL;
1057
1058         stats->bs_nblocks = 0;
1059         level = NILFS_BTREE_LEVEL_DATA;
1060
1061         /* allocate a new ptr for data block */
1062         if (NILFS_BMAP_USE_VBN(btree)) {
1063                 path[level].bp_newreq.bpr_ptr =
1064                         nilfs_btree_find_target_v(btree, path, key);
1065                 dat = nilfs_bmap_get_dat(btree);
1066         }
1067
1068         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1069         if (ret < 0)
1070                 goto err_out_data;
1071
1072         ncblk = nilfs_btree_nchildren_per_block(btree);
1073
1074         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1075              level < nilfs_btree_height(btree) - 1;
1076              level++) {
1077                 node = nilfs_btree_get_nonroot_node(path, level);
1078                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1079                         path[level].bp_op = nilfs_btree_do_insert;
1080                         stats->bs_nblocks++;
1081                         goto out;
1082                 }
1083
1084                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1085                 pindex = path[level + 1].bp_index;
1086
1087                 /* left sibling */
1088                 if (pindex > 0) {
1089                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1090                                                           ncmax);
1091                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1092                         if (ret < 0)
1093                                 goto err_out_child_node;
1094                         sib = (struct nilfs_btree_node *)bh->b_data;
1095                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1096                                 path[level].bp_sib_bh = bh;
1097                                 path[level].bp_op = nilfs_btree_carry_left;
1098                                 stats->bs_nblocks++;
1099                                 goto out;
1100                         } else {
1101                                 brelse(bh);
1102                         }
1103                 }
1104
1105                 /* right sibling */
1106                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1107                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1108                                                           ncmax);
1109                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1110                         if (ret < 0)
1111                                 goto err_out_child_node;
1112                         sib = (struct nilfs_btree_node *)bh->b_data;
1113                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1114                                 path[level].bp_sib_bh = bh;
1115                                 path[level].bp_op = nilfs_btree_carry_right;
1116                                 stats->bs_nblocks++;
1117                                 goto out;
1118                         } else {
1119                                 brelse(bh);
1120                         }
1121                 }
1122
1123                 /* split */
1124                 path[level].bp_newreq.bpr_ptr =
1125                         path[level - 1].bp_newreq.bpr_ptr + 1;
1126                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1127                                                    &path[level].bp_newreq, dat);
1128                 if (ret < 0)
1129                         goto err_out_child_node;
1130                 ret = nilfs_btree_get_new_block(btree,
1131                                                 path[level].bp_newreq.bpr_ptr,
1132                                                 &bh);
1133                 if (ret < 0)
1134                         goto err_out_curr_node;
1135
1136                 stats->bs_nblocks++;
1137
1138                 sib = (struct nilfs_btree_node *)bh->b_data;
1139                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1140                 path[level].bp_sib_bh = bh;
1141                 path[level].bp_op = nilfs_btree_split;
1142         }
1143
1144         /* root */
1145         node = nilfs_btree_get_root(btree);
1146         if (nilfs_btree_node_get_nchildren(node) <
1147             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1148                 path[level].bp_op = nilfs_btree_do_insert;
1149                 stats->bs_nblocks++;
1150                 goto out;
1151         }
1152
1153         /* grow */
1154         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1155         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1156         if (ret < 0)
1157                 goto err_out_child_node;
1158         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1159                                         &bh);
1160         if (ret < 0)
1161                 goto err_out_curr_node;
1162
1163         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1164                               0, level, 0, ncblk, NULL, NULL);
1165         path[level].bp_sib_bh = bh;
1166         path[level].bp_op = nilfs_btree_grow;
1167
1168         level++;
1169         path[level].bp_op = nilfs_btree_do_insert;
1170
1171         /* a newly-created node block and a data block are added */
1172         stats->bs_nblocks += 2;
1173
1174         /* success */
1175  out:
1176         *levelp = level;
1177         return ret;
1178
1179         /* error */
1180  err_out_curr_node:
1181         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1182  err_out_child_node:
1183         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1184                 nilfs_btnode_delete(path[level].bp_sib_bh);
1185                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1186
1187         }
1188
1189         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1190  err_out_data:
1191         *levelp = level;
1192         stats->bs_nblocks = 0;
1193         return ret;
1194 }
1195
1196 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1197                                       struct nilfs_btree_path *path,
1198                                       int maxlevel, __u64 key, __u64 ptr)
1199 {
1200         struct inode *dat = NULL;
1201         int level;
1202
1203         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1204         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1205         if (NILFS_BMAP_USE_VBN(btree)) {
1206                 nilfs_bmap_set_target_v(btree, key, ptr);
1207                 dat = nilfs_bmap_get_dat(btree);
1208         }
1209
1210         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1211                 nilfs_bmap_commit_alloc_ptr(btree,
1212                                             &path[level - 1].bp_newreq, dat);
1213                 path[level].bp_op(btree, path, level, &key, &ptr);
1214         }
1215
1216         if (!nilfs_bmap_dirty(btree))
1217                 nilfs_bmap_set_dirty(btree);
1218 }
1219
1220 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1221 {
1222         struct nilfs_btree_path *path;
1223         struct nilfs_bmap_stats stats;
1224         int level, ret;
1225
1226         path = nilfs_btree_alloc_path();
1227         if (path == NULL)
1228                 return -ENOMEM;
1229
1230         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1231                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1232         if (ret != -ENOENT) {
1233                 if (ret == 0)
1234                         ret = -EEXIST;
1235                 goto out;
1236         }
1237
1238         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1239         if (ret < 0)
1240                 goto out;
1241         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1242         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1243
1244  out:
1245         nilfs_btree_free_path(path);
1246         return ret;
1247 }
1248
1249 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1250                                   struct nilfs_btree_path *path,
1251                                   int level, __u64 *keyp, __u64 *ptrp)
1252 {
1253         struct nilfs_btree_node *node;
1254         int ncblk;
1255
1256         if (level < nilfs_btree_height(btree) - 1) {
1257                 node = nilfs_btree_get_nonroot_node(path, level);
1258                 ncblk = nilfs_btree_nchildren_per_block(btree);
1259                 nilfs_btree_node_delete(node, path[level].bp_index,
1260                                         keyp, ptrp, ncblk);
1261                 if (!buffer_dirty(path[level].bp_bh))
1262                         mark_buffer_dirty(path[level].bp_bh);
1263                 if (path[level].bp_index == 0)
1264                         nilfs_btree_promote_key(btree, path, level + 1,
1265                                 nilfs_btree_node_get_key(node, 0));
1266         } else {
1267                 node = nilfs_btree_get_root(btree);
1268                 nilfs_btree_node_delete(node, path[level].bp_index,
1269                                         keyp, ptrp,
1270                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1271         }
1272 }
1273
1274 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1275                                     struct nilfs_btree_path *path,
1276                                     int level, __u64 *keyp, __u64 *ptrp)
1277 {
1278         struct nilfs_btree_node *node, *left;
1279         int nchildren, lnchildren, n, ncblk;
1280
1281         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1282
1283         node = nilfs_btree_get_nonroot_node(path, level);
1284         left = nilfs_btree_get_sib_node(path, level);
1285         nchildren = nilfs_btree_node_get_nchildren(node);
1286         lnchildren = nilfs_btree_node_get_nchildren(left);
1287         ncblk = nilfs_btree_nchildren_per_block(btree);
1288
1289         n = (nchildren + lnchildren) / 2 - nchildren;
1290
1291         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1292
1293         if (!buffer_dirty(path[level].bp_bh))
1294                 mark_buffer_dirty(path[level].bp_bh);
1295         if (!buffer_dirty(path[level].bp_sib_bh))
1296                 mark_buffer_dirty(path[level].bp_sib_bh);
1297
1298         nilfs_btree_promote_key(btree, path, level + 1,
1299                                 nilfs_btree_node_get_key(node, 0));
1300
1301         brelse(path[level].bp_sib_bh);
1302         path[level].bp_sib_bh = NULL;
1303         path[level].bp_index += n;
1304 }
1305
1306 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1307                                      struct nilfs_btree_path *path,
1308                                      int level, __u64 *keyp, __u64 *ptrp)
1309 {
1310         struct nilfs_btree_node *node, *right;
1311         int nchildren, rnchildren, n, ncblk;
1312
1313         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1314
1315         node = nilfs_btree_get_nonroot_node(path, level);
1316         right = nilfs_btree_get_sib_node(path, level);
1317         nchildren = nilfs_btree_node_get_nchildren(node);
1318         rnchildren = nilfs_btree_node_get_nchildren(right);
1319         ncblk = nilfs_btree_nchildren_per_block(btree);
1320
1321         n = (nchildren + rnchildren) / 2 - nchildren;
1322
1323         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1324
1325         if (!buffer_dirty(path[level].bp_bh))
1326                 mark_buffer_dirty(path[level].bp_bh);
1327         if (!buffer_dirty(path[level].bp_sib_bh))
1328                 mark_buffer_dirty(path[level].bp_sib_bh);
1329
1330         path[level + 1].bp_index++;
1331         nilfs_btree_promote_key(btree, path, level + 1,
1332                                 nilfs_btree_node_get_key(right, 0));
1333         path[level + 1].bp_index--;
1334
1335         brelse(path[level].bp_sib_bh);
1336         path[level].bp_sib_bh = NULL;
1337 }
1338
1339 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1340                                     struct nilfs_btree_path *path,
1341                                     int level, __u64 *keyp, __u64 *ptrp)
1342 {
1343         struct nilfs_btree_node *node, *left;
1344         int n, ncblk;
1345
1346         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1347
1348         node = nilfs_btree_get_nonroot_node(path, level);
1349         left = nilfs_btree_get_sib_node(path, level);
1350         ncblk = nilfs_btree_nchildren_per_block(btree);
1351
1352         n = nilfs_btree_node_get_nchildren(node);
1353
1354         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1355
1356         if (!buffer_dirty(path[level].bp_sib_bh))
1357                 mark_buffer_dirty(path[level].bp_sib_bh);
1358
1359         nilfs_btnode_delete(path[level].bp_bh);
1360         path[level].bp_bh = path[level].bp_sib_bh;
1361         path[level].bp_sib_bh = NULL;
1362         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1363 }
1364
1365 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1366                                      struct nilfs_btree_path *path,
1367                                      int level, __u64 *keyp, __u64 *ptrp)
1368 {
1369         struct nilfs_btree_node *node, *right;
1370         int n, ncblk;
1371
1372         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1373
1374         node = nilfs_btree_get_nonroot_node(path, level);
1375         right = nilfs_btree_get_sib_node(path, level);
1376         ncblk = nilfs_btree_nchildren_per_block(btree);
1377
1378         n = nilfs_btree_node_get_nchildren(right);
1379
1380         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1381
1382         if (!buffer_dirty(path[level].bp_bh))
1383                 mark_buffer_dirty(path[level].bp_bh);
1384
1385         nilfs_btnode_delete(path[level].bp_sib_bh);
1386         path[level].bp_sib_bh = NULL;
1387         path[level + 1].bp_index++;
1388 }
1389
1390 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1391                                struct nilfs_btree_path *path,
1392                                int level, __u64 *keyp, __u64 *ptrp)
1393 {
1394         struct nilfs_btree_node *root, *child;
1395         int n, ncblk;
1396
1397         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1398
1399         root = nilfs_btree_get_root(btree);
1400         child = nilfs_btree_get_nonroot_node(path, level);
1401         ncblk = nilfs_btree_nchildren_per_block(btree);
1402
1403         nilfs_btree_node_delete(root, 0, NULL, NULL,
1404                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1405         nilfs_btree_node_set_level(root, level);
1406         n = nilfs_btree_node_get_nchildren(child);
1407         nilfs_btree_node_move_left(root, child, n,
1408                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1409
1410         nilfs_btnode_delete(path[level].bp_bh);
1411         path[level].bp_bh = NULL;
1412 }
1413
1414 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1415                             struct nilfs_btree_path *path,
1416                             int level, __u64 *keyp, __u64 *ptrp)
1417 {
1418 }
1419
1420 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1421                                       struct nilfs_btree_path *path,
1422                                       int *levelp,
1423                                       struct nilfs_bmap_stats *stats,
1424                                       struct inode *dat)
1425 {
1426         struct buffer_head *bh;
1427         struct nilfs_btree_node *node, *parent, *sib;
1428         __u64 sibptr;
1429         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1430
1431         ret = 0;
1432         stats->bs_nblocks = 0;
1433         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1434         ncblk = nilfs_btree_nchildren_per_block(btree);
1435
1436         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1437              level < nilfs_btree_height(btree) - 1;
1438              level++) {
1439                 node = nilfs_btree_get_nonroot_node(path, level);
1440                 path[level].bp_oldreq.bpr_ptr =
1441                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1442                 ret = nilfs_bmap_prepare_end_ptr(btree,
1443                                                  &path[level].bp_oldreq, dat);
1444                 if (ret < 0)
1445                         goto err_out_child_node;
1446
1447                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1448                         path[level].bp_op = nilfs_btree_do_delete;
1449                         stats->bs_nblocks++;
1450                         goto out;
1451                 }
1452
1453                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1454                 pindex = path[level + 1].bp_index;
1455                 dindex = pindex;
1456
1457                 if (pindex > 0) {
1458                         /* left sibling */
1459                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1460                                                           ncmax);
1461                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1462                         if (ret < 0)
1463                                 goto err_out_curr_node;
1464                         sib = (struct nilfs_btree_node *)bh->b_data;
1465                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1466                                 path[level].bp_sib_bh = bh;
1467                                 path[level].bp_op = nilfs_btree_borrow_left;
1468                                 stats->bs_nblocks++;
1469                                 goto out;
1470                         } else {
1471                                 path[level].bp_sib_bh = bh;
1472                                 path[level].bp_op = nilfs_btree_concat_left;
1473                                 stats->bs_nblocks++;
1474                                 /* continue; */
1475                         }
1476                 } else if (pindex <
1477                            nilfs_btree_node_get_nchildren(parent) - 1) {
1478                         /* right sibling */
1479                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1480                                                           ncmax);
1481                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1482                         if (ret < 0)
1483                                 goto err_out_curr_node;
1484                         sib = (struct nilfs_btree_node *)bh->b_data;
1485                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1486                                 path[level].bp_sib_bh = bh;
1487                                 path[level].bp_op = nilfs_btree_borrow_right;
1488                                 stats->bs_nblocks++;
1489                                 goto out;
1490                         } else {
1491                                 path[level].bp_sib_bh = bh;
1492                                 path[level].bp_op = nilfs_btree_concat_right;
1493                                 stats->bs_nblocks++;
1494                                 /*
1495                                  * When merging right sibling node
1496                                  * into the current node, pointer to
1497                                  * the right sibling node must be
1498                                  * terminated instead.  The adjustment
1499                                  * below is required for that.
1500                                  */
1501                                 dindex = pindex + 1;
1502                                 /* continue; */
1503                         }
1504                 } else {
1505                         /* no siblings */
1506                         /* the only child of the root node */
1507                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1508                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1509                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1510                                 path[level].bp_op = nilfs_btree_shrink;
1511                                 stats->bs_nblocks += 2;
1512                                 level++;
1513                                 path[level].bp_op = nilfs_btree_nop;
1514                                 goto shrink_root_child;
1515                         } else {
1516                                 path[level].bp_op = nilfs_btree_do_delete;
1517                                 stats->bs_nblocks++;
1518                                 goto out;
1519                         }
1520                 }
1521         }
1522
1523         /* child of the root node is deleted */
1524         path[level].bp_op = nilfs_btree_do_delete;
1525         stats->bs_nblocks++;
1526
1527 shrink_root_child:
1528         node = nilfs_btree_get_root(btree);
1529         path[level].bp_oldreq.bpr_ptr =
1530                 nilfs_btree_node_get_ptr(node, dindex,
1531                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1532
1533         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1534         if (ret < 0)
1535                 goto err_out_child_node;
1536
1537         /* success */
1538  out:
1539         *levelp = level;
1540         return ret;
1541
1542         /* error */
1543  err_out_curr_node:
1544         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1545  err_out_child_node:
1546         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1547                 brelse(path[level].bp_sib_bh);
1548                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1549         }
1550         *levelp = level;
1551         stats->bs_nblocks = 0;
1552         return ret;
1553 }
1554
1555 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1556                                       struct nilfs_btree_path *path,
1557                                       int maxlevel, struct inode *dat)
1558 {
1559         int level;
1560
1561         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1562                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1563                 path[level].bp_op(btree, path, level, NULL, NULL);
1564         }
1565
1566         if (!nilfs_bmap_dirty(btree))
1567                 nilfs_bmap_set_dirty(btree);
1568 }
1569
1570 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1571
1572 {
1573         struct nilfs_btree_path *path;
1574         struct nilfs_bmap_stats stats;
1575         struct inode *dat;
1576         int level, ret;
1577
1578         path = nilfs_btree_alloc_path();
1579         if (path == NULL)
1580                 return -ENOMEM;
1581
1582         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1583                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1584         if (ret < 0)
1585                 goto out;
1586
1587
1588         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1589
1590         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1591         if (ret < 0)
1592                 goto out;
1593         nilfs_btree_commit_delete(btree, path, level, dat);
1594         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1595
1596 out:
1597         nilfs_btree_free_path(path);
1598         return ret;
1599 }
1600
1601 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1602                                 __u64 *keyp)
1603 {
1604         struct nilfs_btree_path *path;
1605         const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1606         int ret;
1607
1608         path = nilfs_btree_alloc_path();
1609         if (!path)
1610                 return -ENOMEM;
1611
1612         ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1613         if (!ret)
1614                 *keyp = start;
1615         else if (ret == -ENOENT)
1616                 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1617
1618         nilfs_btree_free_path(path);
1619         return ret;
1620 }
1621
1622 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1623 {
1624         struct nilfs_btree_path *path;
1625         int ret;
1626
1627         path = nilfs_btree_alloc_path();
1628         if (path == NULL)
1629                 return -ENOMEM;
1630
1631         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1632
1633         nilfs_btree_free_path(path);
1634
1635         return ret;
1636 }
1637
1638 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1639 {
1640         struct buffer_head *bh;
1641         struct nilfs_btree_node *root, *node;
1642         __u64 maxkey, nextmaxkey;
1643         __u64 ptr;
1644         int nchildren, ret;
1645
1646         root = nilfs_btree_get_root(btree);
1647         switch (nilfs_btree_height(btree)) {
1648         case 2:
1649                 bh = NULL;
1650                 node = root;
1651                 break;
1652         case 3:
1653                 nchildren = nilfs_btree_node_get_nchildren(root);
1654                 if (nchildren > 1)
1655                         return 0;
1656                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1657                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1658                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1659                 if (ret < 0)
1660                         return ret;
1661                 node = (struct nilfs_btree_node *)bh->b_data;
1662                 break;
1663         default:
1664                 return 0;
1665         }
1666
1667         nchildren = nilfs_btree_node_get_nchildren(node);
1668         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1669         nextmaxkey = (nchildren > 1) ?
1670                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1671         brelse(bh);
1672
1673         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1674 }
1675
1676 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1677                                    __u64 *keys, __u64 *ptrs, int nitems)
1678 {
1679         struct buffer_head *bh;
1680         struct nilfs_btree_node *node, *root;
1681         __le64 *dkeys;
1682         __le64 *dptrs;
1683         __u64 ptr;
1684         int nchildren, ncmax, i, ret;
1685
1686         root = nilfs_btree_get_root(btree);
1687         switch (nilfs_btree_height(btree)) {
1688         case 2:
1689                 bh = NULL;
1690                 node = root;
1691                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1692                 break;
1693         case 3:
1694                 nchildren = nilfs_btree_node_get_nchildren(root);
1695                 WARN_ON(nchildren > 1);
1696                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1697                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1698                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1699                 if (ret < 0)
1700                         return ret;
1701                 node = (struct nilfs_btree_node *)bh->b_data;
1702                 ncmax = nilfs_btree_nchildren_per_block(btree);
1703                 break;
1704         default:
1705                 node = NULL;
1706                 return -EINVAL;
1707         }
1708
1709         nchildren = nilfs_btree_node_get_nchildren(node);
1710         if (nchildren < nitems)
1711                 nitems = nchildren;
1712         dkeys = nilfs_btree_node_dkeys(node);
1713         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1714         for (i = 0; i < nitems; i++) {
1715                 keys[i] = le64_to_cpu(dkeys[i]);
1716                 ptrs[i] = le64_to_cpu(dptrs[i]);
1717         }
1718
1719         brelse(bh);
1720
1721         return nitems;
1722 }
1723
1724 static int
1725 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1726                                        union nilfs_bmap_ptr_req *dreq,
1727                                        union nilfs_bmap_ptr_req *nreq,
1728                                        struct buffer_head **bhp,
1729                                        struct nilfs_bmap_stats *stats)
1730 {
1731         struct buffer_head *bh;
1732         struct inode *dat = NULL;
1733         int ret;
1734
1735         stats->bs_nblocks = 0;
1736
1737         /* for data */
1738         /* cannot find near ptr */
1739         if (NILFS_BMAP_USE_VBN(btree)) {
1740                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1741                 dat = nilfs_bmap_get_dat(btree);
1742         }
1743
1744         ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1745         if (ret < 0)
1746                 return ret;
1747
1748         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1749         if (ret < 0)
1750                 return ret;
1751
1752         *bhp = NULL;
1753         stats->bs_nblocks++;
1754         if (nreq != NULL) {
1755                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1756                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1757                 if (ret < 0)
1758                         goto err_out_dreq;
1759
1760                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1761                 if (ret < 0)
1762                         goto err_out_nreq;
1763
1764                 *bhp = bh;
1765                 stats->bs_nblocks++;
1766         }
1767
1768         /* success */
1769         return 0;
1770
1771         /* error */
1772  err_out_nreq:
1773         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1774  err_out_dreq:
1775         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1776         stats->bs_nblocks = 0;
1777         return ret;
1778
1779 }
1780
1781 static void
1782 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1783                                       __u64 key, __u64 ptr,
1784                                       const __u64 *keys, const __u64 *ptrs,
1785                                       int n,
1786                                       union nilfs_bmap_ptr_req *dreq,
1787                                       union nilfs_bmap_ptr_req *nreq,
1788                                       struct buffer_head *bh)
1789 {
1790         struct nilfs_btree_node *node;
1791         struct inode *dat;
1792         __u64 tmpptr;
1793         int ncblk;
1794
1795         /* free resources */
1796         if (btree->b_ops->bop_clear != NULL)
1797                 btree->b_ops->bop_clear(btree);
1798
1799         /* ptr must be a pointer to a buffer head. */
1800         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1801
1802         /* convert and insert */
1803         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1804         __nilfs_btree_init(btree);
1805         if (nreq != NULL) {
1806                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1807                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1808
1809                 /* create child node at level 1 */
1810                 node = (struct nilfs_btree_node *)bh->b_data;
1811                 ncblk = nilfs_btree_nchildren_per_block(btree);
1812                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1813                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1814                 if (!buffer_dirty(bh))
1815                         mark_buffer_dirty(bh);
1816                 if (!nilfs_bmap_dirty(btree))
1817                         nilfs_bmap_set_dirty(btree);
1818
1819                 brelse(bh);
1820
1821                 /* create root node at level 2 */
1822                 node = nilfs_btree_get_root(btree);
1823                 tmpptr = nreq->bpr_ptr;
1824                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1825                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1826                                       &keys[0], &tmpptr);
1827         } else {
1828                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1829
1830                 /* create root node at level 1 */
1831                 node = nilfs_btree_get_root(btree);
1832                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1833                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1834                                       keys, ptrs);
1835                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1836                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1837                 if (!nilfs_bmap_dirty(btree))
1838                         nilfs_bmap_set_dirty(btree);
1839         }
1840
1841         if (NILFS_BMAP_USE_VBN(btree))
1842                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1843 }
1844
1845 /**
1846  * nilfs_btree_convert_and_insert -
1847  * @bmap:
1848  * @key:
1849  * @ptr:
1850  * @keys:
1851  * @ptrs:
1852  * @n:
1853  */
1854 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1855                                    __u64 key, __u64 ptr,
1856                                    const __u64 *keys, const __u64 *ptrs, int n)
1857 {
1858         struct buffer_head *bh = NULL;
1859         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1860         struct nilfs_bmap_stats stats;
1861         int ret;
1862
1863         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1864                 di = &dreq;
1865                 ni = NULL;
1866         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1867                            nilfs_btree_node_size(btree))) {
1868                 di = &dreq;
1869                 ni = &nreq;
1870         } else {
1871                 di = NULL;
1872                 ni = NULL;
1873                 BUG();
1874         }
1875
1876         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1877                                                      &stats);
1878         if (ret < 0)
1879                 return ret;
1880         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1881                                               di, ni, bh);
1882         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1883         return 0;
1884 }
1885
1886 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1887                                    struct nilfs_btree_path *path,
1888                                    int level,
1889                                    struct buffer_head *bh)
1890 {
1891         while ((++level < nilfs_btree_height(btree) - 1) &&
1892                !buffer_dirty(path[level].bp_bh))
1893                 mark_buffer_dirty(path[level].bp_bh);
1894
1895         return 0;
1896 }
1897
1898 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1899                                         struct nilfs_btree_path *path,
1900                                         int level, struct inode *dat)
1901 {
1902         struct nilfs_btree_node *parent;
1903         int ncmax, ret;
1904
1905         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1906         path[level].bp_oldreq.bpr_ptr =
1907                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1908                                          ncmax);
1909         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1910         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1911                                        &path[level].bp_newreq.bpr_req);
1912         if (ret < 0)
1913                 return ret;
1914
1915         if (buffer_nilfs_node(path[level].bp_bh)) {
1916                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1917                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1918                 path[level].bp_ctxt.bh = path[level].bp_bh;
1919                 ret = nilfs_btnode_prepare_change_key(
1920                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1921                         &path[level].bp_ctxt);
1922                 if (ret < 0) {
1923                         nilfs_dat_abort_update(dat,
1924                                                &path[level].bp_oldreq.bpr_req,
1925                                                &path[level].bp_newreq.bpr_req);
1926                         return ret;
1927                 }
1928         }
1929
1930         return 0;
1931 }
1932
1933 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1934                                         struct nilfs_btree_path *path,
1935                                         int level, struct inode *dat)
1936 {
1937         struct nilfs_btree_node *parent;
1938         int ncmax;
1939
1940         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1941                                 &path[level].bp_newreq.bpr_req,
1942                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1943
1944         if (buffer_nilfs_node(path[level].bp_bh)) {
1945                 nilfs_btnode_commit_change_key(
1946                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1947                         &path[level].bp_ctxt);
1948                 path[level].bp_bh = path[level].bp_ctxt.bh;
1949         }
1950         set_buffer_nilfs_volatile(path[level].bp_bh);
1951
1952         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1953         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1954                                  path[level].bp_newreq.bpr_ptr, ncmax);
1955 }
1956
1957 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1958                                        struct nilfs_btree_path *path,
1959                                        int level, struct inode *dat)
1960 {
1961         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1962                                &path[level].bp_newreq.bpr_req);
1963         if (buffer_nilfs_node(path[level].bp_bh))
1964                 nilfs_btnode_abort_change_key(
1965                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1966                         &path[level].bp_ctxt);
1967 }
1968
1969 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1970                                            struct nilfs_btree_path *path,
1971                                            int minlevel, int *maxlevelp,
1972                                            struct inode *dat)
1973 {
1974         int level, ret;
1975
1976         level = minlevel;
1977         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1978                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1979                 if (ret < 0)
1980                         return ret;
1981         }
1982         while ((++level < nilfs_btree_height(btree) - 1) &&
1983                !buffer_dirty(path[level].bp_bh)) {
1984
1985                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1986                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1987                 if (ret < 0)
1988                         goto out;
1989         }
1990
1991         /* success */
1992         *maxlevelp = level - 1;
1993         return 0;
1994
1995         /* error */
1996  out:
1997         while (--level > minlevel)
1998                 nilfs_btree_abort_update_v(btree, path, level, dat);
1999         if (!buffer_nilfs_volatile(path[level].bp_bh))
2000                 nilfs_btree_abort_update_v(btree, path, level, dat);
2001         return ret;
2002 }
2003
2004 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2005                                            struct nilfs_btree_path *path,
2006                                            int minlevel, int maxlevel,
2007                                            struct buffer_head *bh,
2008                                            struct inode *dat)
2009 {
2010         int level;
2011
2012         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2013                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2014
2015         for (level = minlevel + 1; level <= maxlevel; level++)
2016                 nilfs_btree_commit_update_v(btree, path, level, dat);
2017 }
2018
2019 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2020                                    struct nilfs_btree_path *path,
2021                                    int level, struct buffer_head *bh)
2022 {
2023         int maxlevel = 0, ret;
2024         struct nilfs_btree_node *parent;
2025         struct inode *dat = nilfs_bmap_get_dat(btree);
2026         __u64 ptr;
2027         int ncmax;
2028
2029         get_bh(bh);
2030         path[level].bp_bh = bh;
2031         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2032                                               dat);
2033         if (ret < 0)
2034                 goto out;
2035
2036         if (buffer_nilfs_volatile(path[level].bp_bh)) {
2037                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2038                 ptr = nilfs_btree_node_get_ptr(parent,
2039                                                path[level + 1].bp_index,
2040                                                ncmax);
2041                 ret = nilfs_dat_mark_dirty(dat, ptr);
2042                 if (ret < 0)
2043                         goto out;
2044         }
2045
2046         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2047
2048  out:
2049         brelse(path[level].bp_bh);
2050         path[level].bp_bh = NULL;
2051         return ret;
2052 }
2053
2054 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2055                                  struct buffer_head *bh)
2056 {
2057         struct nilfs_btree_path *path;
2058         struct nilfs_btree_node *node;
2059         __u64 key;
2060         int level, ret;
2061
2062         WARN_ON(!buffer_dirty(bh));
2063
2064         path = nilfs_btree_alloc_path();
2065         if (path == NULL)
2066                 return -ENOMEM;
2067
2068         if (buffer_nilfs_node(bh)) {
2069                 node = (struct nilfs_btree_node *)bh->b_data;
2070                 key = nilfs_btree_node_get_key(node, 0);
2071                 level = nilfs_btree_node_get_level(node);
2072         } else {
2073                 key = nilfs_bmap_data_get_key(btree, bh);
2074                 level = NILFS_BTREE_LEVEL_DATA;
2075         }
2076
2077         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2078         if (ret < 0) {
2079                 if (unlikely(ret == -ENOENT))
2080                         nilfs_crit(btree->b_inode->i_sb,
2081                                    "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2082                                    btree->b_inode->i_ino,
2083                                    (unsigned long long)key, level);
2084                 goto out;
2085         }
2086
2087         ret = NILFS_BMAP_USE_VBN(btree) ?
2088                 nilfs_btree_propagate_v(btree, path, level, bh) :
2089                 nilfs_btree_propagate_p(btree, path, level, bh);
2090
2091  out:
2092         nilfs_btree_free_path(path);
2093
2094         return ret;
2095 }
2096
2097 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2098                                     struct buffer_head *bh)
2099 {
2100         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2101 }
2102
2103 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2104                                          struct list_head *lists,
2105                                          struct buffer_head *bh)
2106 {
2107         struct list_head *head;
2108         struct buffer_head *cbh;
2109         struct nilfs_btree_node *node, *cnode;
2110         __u64 key, ckey;
2111         int level;
2112
2113         get_bh(bh);
2114         node = (struct nilfs_btree_node *)bh->b_data;
2115         key = nilfs_btree_node_get_key(node, 0);
2116         level = nilfs_btree_node_get_level(node);
2117         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2118             level >= NILFS_BTREE_LEVEL_MAX) {
2119                 dump_stack();
2120                 nilfs_warn(btree->b_inode->i_sb,
2121                            "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2122                            level, (unsigned long long)key,
2123                            btree->b_inode->i_ino,
2124                            (unsigned long long)bh->b_blocknr);
2125                 return;
2126         }
2127
2128         list_for_each(head, &lists[level]) {
2129                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2130                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2131                 ckey = nilfs_btree_node_get_key(cnode, 0);
2132                 if (key < ckey)
2133                         break;
2134         }
2135         list_add_tail(&bh->b_assoc_buffers, head);
2136 }
2137
2138 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2139                                              struct list_head *listp)
2140 {
2141         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2142         struct address_space *btcache = btnc_inode->i_mapping;
2143         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2144         struct pagevec pvec;
2145         struct buffer_head *bh, *head;
2146         pgoff_t index = 0;
2147         int level, i;
2148
2149         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2150              level < NILFS_BTREE_LEVEL_MAX;
2151              level++)
2152                 INIT_LIST_HEAD(&lists[level]);
2153
2154         pagevec_init(&pvec);
2155
2156         while (pagevec_lookup_tag(&pvec, btcache, &index,
2157                                         PAGECACHE_TAG_DIRTY)) {
2158                 for (i = 0; i < pagevec_count(&pvec); i++) {
2159                         bh = head = page_buffers(pvec.pages[i]);
2160                         do {
2161                                 if (buffer_dirty(bh))
2162                                         nilfs_btree_add_dirty_buffer(btree,
2163                                                                      lists, bh);
2164                         } while ((bh = bh->b_this_page) != head);
2165                 }
2166                 pagevec_release(&pvec);
2167                 cond_resched();
2168         }
2169
2170         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2171              level < NILFS_BTREE_LEVEL_MAX;
2172              level++)
2173                 list_splice_tail(&lists[level], listp);
2174 }
2175
2176 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2177                                 struct nilfs_btree_path *path,
2178                                 int level,
2179                                 struct buffer_head **bh,
2180                                 sector_t blocknr,
2181                                 union nilfs_binfo *binfo)
2182 {
2183         struct nilfs_btree_node *parent;
2184         __u64 key;
2185         __u64 ptr;
2186         int ncmax, ret;
2187
2188         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2189         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2190                                        ncmax);
2191         if (buffer_nilfs_node(*bh)) {
2192                 path[level].bp_ctxt.oldkey = ptr;
2193                 path[level].bp_ctxt.newkey = blocknr;
2194                 path[level].bp_ctxt.bh = *bh;
2195                 ret = nilfs_btnode_prepare_change_key(
2196                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2197                         &path[level].bp_ctxt);
2198                 if (ret < 0)
2199                         return ret;
2200                 nilfs_btnode_commit_change_key(
2201                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2202                         &path[level].bp_ctxt);
2203                 *bh = path[level].bp_ctxt.bh;
2204         }
2205
2206         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2207                                  ncmax);
2208
2209         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2210         /* on-disk format */
2211         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2212         binfo->bi_dat.bi_level = level;
2213
2214         return 0;
2215 }
2216
2217 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2218                                 struct nilfs_btree_path *path,
2219                                 int level,
2220                                 struct buffer_head **bh,
2221                                 sector_t blocknr,
2222                                 union nilfs_binfo *binfo)
2223 {
2224         struct nilfs_btree_node *parent;
2225         struct inode *dat = nilfs_bmap_get_dat(btree);
2226         __u64 key;
2227         __u64 ptr;
2228         union nilfs_bmap_ptr_req req;
2229         int ncmax, ret;
2230
2231         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2232         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2233                                        ncmax);
2234         req.bpr_ptr = ptr;
2235         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2236         if (ret < 0)
2237                 return ret;
2238         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2239
2240         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2241         /* on-disk format */
2242         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2243         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2244
2245         return 0;
2246 }
2247
2248 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2249                               struct buffer_head **bh,
2250                               sector_t blocknr,
2251                               union nilfs_binfo *binfo)
2252 {
2253         struct nilfs_btree_path *path;
2254         struct nilfs_btree_node *node;
2255         __u64 key;
2256         int level, ret;
2257
2258         path = nilfs_btree_alloc_path();
2259         if (path == NULL)
2260                 return -ENOMEM;
2261
2262         if (buffer_nilfs_node(*bh)) {
2263                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2264                 key = nilfs_btree_node_get_key(node, 0);
2265                 level = nilfs_btree_node_get_level(node);
2266         } else {
2267                 key = nilfs_bmap_data_get_key(btree, *bh);
2268                 level = NILFS_BTREE_LEVEL_DATA;
2269         }
2270
2271         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2272         if (ret < 0) {
2273                 WARN_ON(ret == -ENOENT);
2274                 goto out;
2275         }
2276
2277         ret = NILFS_BMAP_USE_VBN(btree) ?
2278                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2279                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2280
2281  out:
2282         nilfs_btree_free_path(path);
2283
2284         return ret;
2285 }
2286
2287 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2288                                  struct buffer_head **bh,
2289                                  sector_t blocknr,
2290                                  union nilfs_binfo *binfo)
2291 {
2292         struct nilfs_btree_node *node;
2293         __u64 key;
2294         int ret;
2295
2296         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2297                              blocknr);
2298         if (ret < 0)
2299                 return ret;
2300
2301         if (buffer_nilfs_node(*bh)) {
2302                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2303                 key = nilfs_btree_node_get_key(node, 0);
2304         } else
2305                 key = nilfs_bmap_data_get_key(btree, *bh);
2306
2307         /* on-disk format */
2308         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2309         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2310
2311         return 0;
2312 }
2313
2314 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2315 {
2316         struct buffer_head *bh;
2317         struct nilfs_btree_path *path;
2318         __u64 ptr;
2319         int ret;
2320
2321         path = nilfs_btree_alloc_path();
2322         if (path == NULL)
2323                 return -ENOMEM;
2324
2325         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2326         if (ret < 0) {
2327                 WARN_ON(ret == -ENOENT);
2328                 goto out;
2329         }
2330         ret = nilfs_btree_get_block(btree, ptr, &bh);
2331         if (ret < 0) {
2332                 WARN_ON(ret == -ENOENT);
2333                 goto out;
2334         }
2335
2336         if (!buffer_dirty(bh))
2337                 mark_buffer_dirty(bh);
2338         brelse(bh);
2339         if (!nilfs_bmap_dirty(btree))
2340                 nilfs_bmap_set_dirty(btree);
2341
2342  out:
2343         nilfs_btree_free_path(path);
2344         return ret;
2345 }
2346
2347 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2348         .bop_lookup             =       nilfs_btree_lookup,
2349         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2350         .bop_insert             =       nilfs_btree_insert,
2351         .bop_delete             =       nilfs_btree_delete,
2352         .bop_clear              =       NULL,
2353
2354         .bop_propagate          =       nilfs_btree_propagate,
2355
2356         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2357
2358         .bop_assign             =       nilfs_btree_assign,
2359         .bop_mark               =       nilfs_btree_mark,
2360
2361         .bop_seek_key           =       nilfs_btree_seek_key,
2362         .bop_last_key           =       nilfs_btree_last_key,
2363
2364         .bop_check_insert       =       NULL,
2365         .bop_check_delete       =       nilfs_btree_check_delete,
2366         .bop_gather_data        =       nilfs_btree_gather_data,
2367 };
2368
2369 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2370         .bop_lookup             =       NULL,
2371         .bop_lookup_contig      =       NULL,
2372         .bop_insert             =       NULL,
2373         .bop_delete             =       NULL,
2374         .bop_clear              =       NULL,
2375
2376         .bop_propagate          =       nilfs_btree_propagate_gc,
2377
2378         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2379
2380         .bop_assign             =       nilfs_btree_assign_gc,
2381         .bop_mark               =       NULL,
2382
2383         .bop_seek_key           =       NULL,
2384         .bop_last_key           =       NULL,
2385
2386         .bop_check_insert       =       NULL,
2387         .bop_check_delete       =       NULL,
2388         .bop_gather_data        =       NULL,
2389 };
2390
2391 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2392 {
2393         bmap->b_ops = &nilfs_btree_ops;
2394         bmap->b_nchildren_per_block =
2395                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2396 }
2397
2398 int nilfs_btree_init(struct nilfs_bmap *bmap)
2399 {
2400         int ret = 0;
2401
2402         __nilfs_btree_init(bmap);
2403
2404         if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2405                 ret = -EIO;
2406         else
2407                 ret = nilfs_attach_btree_node_cache(
2408                         &NILFS_BMAP_I(bmap)->vfs_inode);
2409
2410         return ret;
2411 }
2412
2413 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2414 {
2415         bmap->b_ops = &nilfs_btree_ops_gc;
2416         bmap->b_nchildren_per_block =
2417                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2418 }