mm: memcg: clean up fault accounting
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35 {
36         struct nilfs_btree_path *path;
37         int level = NILFS_BTREE_LEVEL_DATA;
38
39         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
40         if (path == NULL)
41                 goto out;
42
43         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
44                 path[level].bp_bh = NULL;
45                 path[level].bp_sib_bh = NULL;
46                 path[level].bp_index = 0;
47                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
48                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
49                 path[level].bp_op = NULL;
50         }
51
52 out:
53         return path;
54 }
55
56 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
57 {
58         int level = NILFS_BTREE_LEVEL_DATA;
59
60         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
61                 brelse(path[level].bp_bh);
62
63         kmem_cache_free(nilfs_btree_path_cache, path);
64 }
65
66 /*
67  * B-tree node operations
68  */
69 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
70                                      __u64 ptr, struct buffer_head **bhp)
71 {
72         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
73         struct buffer_head *bh;
74
75         bh = nilfs_btnode_create_block(btnc, ptr);
76         if (!bh)
77                 return -ENOMEM;
78
79         set_buffer_nilfs_volatile(bh);
80         *bhp = bh;
81         return 0;
82 }
83
84 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
85 {
86         return node->bn_flags;
87 }
88
89 static void
90 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
91 {
92         node->bn_flags = flags;
93 }
94
95 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
96 {
97         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
98 }
99
100 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
101 {
102         return node->bn_level;
103 }
104
105 static void
106 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
107 {
108         node->bn_level = level;
109 }
110
111 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
112 {
113         return le16_to_cpu(node->bn_nchildren);
114 }
115
116 static void
117 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
118 {
119         node->bn_nchildren = cpu_to_le16(nchildren);
120 }
121
122 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
123 {
124         return 1 << btree->b_inode->i_blkbits;
125 }
126
127 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
128 {
129         return btree->b_nchildren_per_block;
130 }
131
132 static __le64 *
133 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
134 {
135         return (__le64 *)((char *)(node + 1) +
136                           (nilfs_btree_node_root(node) ?
137                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
138 }
139
140 static __le64 *
141 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
142 {
143         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
144 }
145
146 static __u64
147 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
148 {
149         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
150 }
151
152 static void
153 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
154 {
155         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
156 }
157
158 static __u64
159 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
160                          int ncmax)
161 {
162         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
163 }
164
165 static void
166 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
167                          int ncmax)
168 {
169         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
170 }
171
172 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
173                                   int level, int nchildren, int ncmax,
174                                   const __u64 *keys, const __u64 *ptrs)
175 {
176         __le64 *dkeys;
177         __le64 *dptrs;
178         int i;
179
180         nilfs_btree_node_set_flags(node, flags);
181         nilfs_btree_node_set_level(node, level);
182         nilfs_btree_node_set_nchildren(node, nchildren);
183
184         dkeys = nilfs_btree_node_dkeys(node);
185         dptrs = nilfs_btree_node_dptrs(node, ncmax);
186         for (i = 0; i < nchildren; i++) {
187                 dkeys[i] = cpu_to_le64(keys[i]);
188                 dptrs[i] = cpu_to_le64(ptrs[i]);
189         }
190 }
191
192 /* Assume the buffer heads corresponding to left and right are locked. */
193 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
194                                        struct nilfs_btree_node *right,
195                                        int n, int lncmax, int rncmax)
196 {
197         __le64 *ldkeys, *rdkeys;
198         __le64 *ldptrs, *rdptrs;
199         int lnchildren, rnchildren;
200
201         ldkeys = nilfs_btree_node_dkeys(left);
202         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
203         lnchildren = nilfs_btree_node_get_nchildren(left);
204
205         rdkeys = nilfs_btree_node_dkeys(right);
206         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
207         rnchildren = nilfs_btree_node_get_nchildren(right);
208
209         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
210         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
211         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
212         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
213
214         lnchildren += n;
215         rnchildren -= n;
216         nilfs_btree_node_set_nchildren(left, lnchildren);
217         nilfs_btree_node_set_nchildren(right, rnchildren);
218 }
219
220 /* Assume that the buffer heads corresponding to left and right are locked. */
221 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
222                                         struct nilfs_btree_node *right,
223                                         int n, int lncmax, int rncmax)
224 {
225         __le64 *ldkeys, *rdkeys;
226         __le64 *ldptrs, *rdptrs;
227         int lnchildren, rnchildren;
228
229         ldkeys = nilfs_btree_node_dkeys(left);
230         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
231         lnchildren = nilfs_btree_node_get_nchildren(left);
232
233         rdkeys = nilfs_btree_node_dkeys(right);
234         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
235         rnchildren = nilfs_btree_node_get_nchildren(right);
236
237         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
238         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
239         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
240         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
241
242         lnchildren -= n;
243         rnchildren += n;
244         nilfs_btree_node_set_nchildren(left, lnchildren);
245         nilfs_btree_node_set_nchildren(right, rnchildren);
246 }
247
248 /* Assume that the buffer head corresponding to node is locked. */
249 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
250                                     __u64 key, __u64 ptr, int ncmax)
251 {
252         __le64 *dkeys;
253         __le64 *dptrs;
254         int nchildren;
255
256         dkeys = nilfs_btree_node_dkeys(node);
257         dptrs = nilfs_btree_node_dptrs(node, ncmax);
258         nchildren = nilfs_btree_node_get_nchildren(node);
259         if (index < nchildren) {
260                 memmove(dkeys + index + 1, dkeys + index,
261                         (nchildren - index) * sizeof(*dkeys));
262                 memmove(dptrs + index + 1, dptrs + index,
263                         (nchildren - index) * sizeof(*dptrs));
264         }
265         dkeys[index] = cpu_to_le64(key);
266         dptrs[index] = cpu_to_le64(ptr);
267         nchildren++;
268         nilfs_btree_node_set_nchildren(node, nchildren);
269 }
270
271 /* Assume that the buffer head corresponding to node is locked. */
272 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
273                                     __u64 *keyp, __u64 *ptrp, int ncmax)
274 {
275         __u64 key;
276         __u64 ptr;
277         __le64 *dkeys;
278         __le64 *dptrs;
279         int nchildren;
280
281         dkeys = nilfs_btree_node_dkeys(node);
282         dptrs = nilfs_btree_node_dptrs(node, ncmax);
283         key = le64_to_cpu(dkeys[index]);
284         ptr = le64_to_cpu(dptrs[index]);
285         nchildren = nilfs_btree_node_get_nchildren(node);
286         if (keyp != NULL)
287                 *keyp = key;
288         if (ptrp != NULL)
289                 *ptrp = ptr;
290
291         if (index < nchildren - 1) {
292                 memmove(dkeys + index, dkeys + index + 1,
293                         (nchildren - index - 1) * sizeof(*dkeys));
294                 memmove(dptrs + index, dptrs + index + 1,
295                         (nchildren - index - 1) * sizeof(*dptrs));
296         }
297         nchildren--;
298         nilfs_btree_node_set_nchildren(node, nchildren);
299 }
300
301 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
302                                    __u64 key, int *indexp)
303 {
304         __u64 nkey;
305         int index, low, high, s;
306
307         /* binary search */
308         low = 0;
309         high = nilfs_btree_node_get_nchildren(node) - 1;
310         index = 0;
311         s = 0;
312         while (low <= high) {
313                 index = (low + high) / 2;
314                 nkey = nilfs_btree_node_get_key(node, index);
315                 if (nkey == key) {
316                         s = 0;
317                         goto out;
318                 } else if (nkey < key) {
319                         low = index + 1;
320                         s = -1;
321                 } else {
322                         high = index - 1;
323                         s = 1;
324                 }
325         }
326
327         /* adjust index */
328         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
329                 if (s > 0 && index > 0)
330                         index--;
331         } else if (s < 0)
332                 index++;
333
334  out:
335         *indexp = index;
336
337         return s == 0;
338 }
339
340 /**
341  * nilfs_btree_node_broken - verify consistency of btree node
342  * @node: btree node block to be examined
343  * @size: node size (in bytes)
344  * @blocknr: block number
345  *
346  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
347  */
348 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
349                                    size_t size, sector_t blocknr)
350 {
351         int level, flags, nchildren;
352         int ret = 0;
353
354         level = nilfs_btree_node_get_level(node);
355         flags = nilfs_btree_node_get_flags(node);
356         nchildren = nilfs_btree_node_get_nchildren(node);
357
358         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
359                      level >= NILFS_BTREE_LEVEL_MAX ||
360                      (flags & NILFS_BTREE_NODE_ROOT) ||
361                      nchildren < 0 ||
362                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
363                 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
364                        "level = %d, flags = 0x%x, nchildren = %d\n",
365                        (unsigned long long)blocknr, level, flags, nchildren);
366                 ret = 1;
367         }
368         return ret;
369 }
370
371 int nilfs_btree_broken_node_block(struct buffer_head *bh)
372 {
373         int ret;
374
375         if (buffer_nilfs_checked(bh))
376                 return 0;
377
378         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
379                                        bh->b_size, bh->b_blocknr);
380         if (likely(!ret))
381                 set_buffer_nilfs_checked(bh);
382         return ret;
383 }
384
385 static struct nilfs_btree_node *
386 nilfs_btree_get_root(const struct nilfs_bmap *btree)
387 {
388         return (struct nilfs_btree_node *)btree->b_u.u_data;
389 }
390
391 static struct nilfs_btree_node *
392 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
393 {
394         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
395 }
396
397 static struct nilfs_btree_node *
398 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
399 {
400         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
401 }
402
403 static int nilfs_btree_height(const struct nilfs_bmap *btree)
404 {
405         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
406 }
407
408 static struct nilfs_btree_node *
409 nilfs_btree_get_node(const struct nilfs_bmap *btree,
410                      const struct nilfs_btree_path *path,
411                      int level, int *ncmaxp)
412 {
413         struct nilfs_btree_node *node;
414
415         if (level == nilfs_btree_height(btree) - 1) {
416                 node = nilfs_btree_get_root(btree);
417                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
418         } else {
419                 node = nilfs_btree_get_nonroot_node(path, level);
420                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
421         }
422         return node;
423 }
424
425 static int
426 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
427 {
428         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
429                 dump_stack();
430                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
431                        nilfs_btree_node_get_level(node), level);
432                 return 1;
433         }
434         return 0;
435 }
436
437 struct nilfs_btree_readahead_info {
438         struct nilfs_btree_node *node;  /* parent node */
439         int max_ra_blocks;              /* max nof blocks to read ahead */
440         int index;                      /* current index on the parent node */
441         int ncmax;                      /* nof children in the parent node */
442 };
443
444 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
445                                    struct buffer_head **bhp,
446                                    const struct nilfs_btree_readahead_info *ra)
447 {
448         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
449         struct buffer_head *bh, *ra_bh;
450         sector_t submit_ptr = 0;
451         int ret;
452
453         ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
454         if (ret) {
455                 if (ret != -EEXIST)
456                         return ret;
457                 goto out_check;
458         }
459
460         if (ra) {
461                 int i, n;
462                 __u64 ptr2;
463
464                 /* read ahead sibling nodes */
465                 for (n = ra->max_ra_blocks, i = ra->index + 1;
466                      n > 0 && i < ra->ncmax; n--, i++) {
467                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
468
469                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
470                                                         &ra_bh, &submit_ptr);
471                         if (likely(!ret || ret == -EEXIST))
472                                 brelse(ra_bh);
473                         else if (ret != -EBUSY)
474                                 break;
475                         if (!buffer_locked(bh))
476                                 goto out_no_wait;
477                 }
478         }
479
480         wait_on_buffer(bh);
481
482  out_no_wait:
483         if (!buffer_uptodate(bh)) {
484                 brelse(bh);
485                 return -EIO;
486         }
487
488  out_check:
489         if (nilfs_btree_broken_node_block(bh)) {
490                 clear_buffer_uptodate(bh);
491                 brelse(bh);
492                 return -EINVAL;
493         }
494
495         *bhp = bh;
496         return 0;
497 }
498
499 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
500                                    struct buffer_head **bhp)
501 {
502         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
503 }
504
505 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
506                                  struct nilfs_btree_path *path,
507                                  __u64 key, __u64 *ptrp, int minlevel,
508                                  int readahead)
509 {
510         struct nilfs_btree_node *node;
511         struct nilfs_btree_readahead_info p, *ra;
512         __u64 ptr;
513         int level, index, found, ncmax, ret;
514
515         node = nilfs_btree_get_root(btree);
516         level = nilfs_btree_node_get_level(node);
517         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
518                 return -ENOENT;
519
520         found = nilfs_btree_node_lookup(node, key, &index);
521         ptr = nilfs_btree_node_get_ptr(node, index,
522                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
523         path[level].bp_bh = NULL;
524         path[level].bp_index = index;
525
526         ncmax = nilfs_btree_nchildren_per_block(btree);
527
528         while (--level >= minlevel) {
529                 ra = NULL;
530                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
531                         p.node = nilfs_btree_get_node(btree, path, level + 1,
532                                                       &p.ncmax);
533                         p.index = index;
534                         p.max_ra_blocks = 7;
535                         ra = &p;
536                 }
537                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
538                                               ra);
539                 if (ret < 0)
540                         return ret;
541
542                 node = nilfs_btree_get_nonroot_node(path, level);
543                 if (nilfs_btree_bad_node(node, level))
544                         return -EINVAL;
545                 if (!found)
546                         found = nilfs_btree_node_lookup(node, key, &index);
547                 else
548                         index = 0;
549                 if (index < ncmax) {
550                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
551                 } else {
552                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
553                         /* insert */
554                         ptr = NILFS_BMAP_INVALID_PTR;
555                 }
556                 path[level].bp_index = index;
557         }
558         if (!found)
559                 return -ENOENT;
560
561         if (ptrp != NULL)
562                 *ptrp = ptr;
563
564         return 0;
565 }
566
567 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
568                                       struct nilfs_btree_path *path,
569                                       __u64 *keyp, __u64 *ptrp)
570 {
571         struct nilfs_btree_node *node;
572         __u64 ptr;
573         int index, level, ncmax, ret;
574
575         node = nilfs_btree_get_root(btree);
576         index = nilfs_btree_node_get_nchildren(node) - 1;
577         if (index < 0)
578                 return -ENOENT;
579         level = nilfs_btree_node_get_level(node);
580         ptr = nilfs_btree_node_get_ptr(node, index,
581                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
582         path[level].bp_bh = NULL;
583         path[level].bp_index = index;
584         ncmax = nilfs_btree_nchildren_per_block(btree);
585
586         for (level--; level > 0; level--) {
587                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
588                 if (ret < 0)
589                         return ret;
590                 node = nilfs_btree_get_nonroot_node(path, level);
591                 if (nilfs_btree_bad_node(node, level))
592                         return -EINVAL;
593                 index = nilfs_btree_node_get_nchildren(node) - 1;
594                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
595                 path[level].bp_index = index;
596         }
597
598         if (keyp != NULL)
599                 *keyp = nilfs_btree_node_get_key(node, index);
600         if (ptrp != NULL)
601                 *ptrp = ptr;
602
603         return 0;
604 }
605
606 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
607                               __u64 key, int level, __u64 *ptrp)
608 {
609         struct nilfs_btree_path *path;
610         int ret;
611
612         path = nilfs_btree_alloc_path();
613         if (path == NULL)
614                 return -ENOMEM;
615
616         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
617
618         nilfs_btree_free_path(path);
619
620         return ret;
621 }
622
623 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
624                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
625 {
626         struct nilfs_btree_path *path;
627         struct nilfs_btree_node *node;
628         struct inode *dat = NULL;
629         __u64 ptr, ptr2;
630         sector_t blocknr;
631         int level = NILFS_BTREE_LEVEL_NODE_MIN;
632         int ret, cnt, index, maxlevel, ncmax;
633         struct nilfs_btree_readahead_info p;
634
635         path = nilfs_btree_alloc_path();
636         if (path == NULL)
637                 return -ENOMEM;
638
639         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
640         if (ret < 0)
641                 goto out;
642
643         if (NILFS_BMAP_USE_VBN(btree)) {
644                 dat = nilfs_bmap_get_dat(btree);
645                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
646                 if (ret < 0)
647                         goto out;
648                 ptr = blocknr;
649         }
650         cnt = 1;
651         if (cnt == maxblocks)
652                 goto end;
653
654         maxlevel = nilfs_btree_height(btree) - 1;
655         node = nilfs_btree_get_node(btree, path, level, &ncmax);
656         index = path[level].bp_index + 1;
657         for (;;) {
658                 while (index < nilfs_btree_node_get_nchildren(node)) {
659                         if (nilfs_btree_node_get_key(node, index) !=
660                             key + cnt)
661                                 goto end;
662                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
663                         if (dat) {
664                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
665                                 if (ret < 0)
666                                         goto out;
667                                 ptr2 = blocknr;
668                         }
669                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
670                                 goto end;
671                         index++;
672                         continue;
673                 }
674                 if (level == maxlevel)
675                         break;
676
677                 /* look-up right sibling node */
678                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
679                 p.index = path[level + 1].bp_index + 1;
680                 p.max_ra_blocks = 7;
681                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
682                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
683                         break;
684                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
685                 path[level + 1].bp_index = p.index;
686
687                 brelse(path[level].bp_bh);
688                 path[level].bp_bh = NULL;
689
690                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
691                                               &p);
692                 if (ret < 0)
693                         goto out;
694                 node = nilfs_btree_get_nonroot_node(path, level);
695                 ncmax = nilfs_btree_nchildren_per_block(btree);
696                 index = 0;
697                 path[level].bp_index = index;
698         }
699  end:
700         *ptrp = ptr;
701         ret = cnt;
702  out:
703         nilfs_btree_free_path(path);
704         return ret;
705 }
706
707 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
708                                     struct nilfs_btree_path *path,
709                                     int level, __u64 key)
710 {
711         if (level < nilfs_btree_height(btree) - 1) {
712                 do {
713                         nilfs_btree_node_set_key(
714                                 nilfs_btree_get_nonroot_node(path, level),
715                                 path[level].bp_index, key);
716                         if (!buffer_dirty(path[level].bp_bh))
717                                 mark_buffer_dirty(path[level].bp_bh);
718                 } while ((path[level].bp_index == 0) &&
719                          (++level < nilfs_btree_height(btree) - 1));
720         }
721
722         /* root */
723         if (level == nilfs_btree_height(btree) - 1) {
724                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
725                                          path[level].bp_index, key);
726         }
727 }
728
729 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
730                                   struct nilfs_btree_path *path,
731                                   int level, __u64 *keyp, __u64 *ptrp)
732 {
733         struct nilfs_btree_node *node;
734         int ncblk;
735
736         if (level < nilfs_btree_height(btree) - 1) {
737                 node = nilfs_btree_get_nonroot_node(path, level);
738                 ncblk = nilfs_btree_nchildren_per_block(btree);
739                 nilfs_btree_node_insert(node, path[level].bp_index,
740                                         *keyp, *ptrp, ncblk);
741                 if (!buffer_dirty(path[level].bp_bh))
742                         mark_buffer_dirty(path[level].bp_bh);
743
744                 if (path[level].bp_index == 0)
745                         nilfs_btree_promote_key(btree, path, level + 1,
746                                                 nilfs_btree_node_get_key(node,
747                                                                          0));
748         } else {
749                 node = nilfs_btree_get_root(btree);
750                 nilfs_btree_node_insert(node, path[level].bp_index,
751                                         *keyp, *ptrp,
752                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
753         }
754 }
755
756 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
757                                    struct nilfs_btree_path *path,
758                                    int level, __u64 *keyp, __u64 *ptrp)
759 {
760         struct nilfs_btree_node *node, *left;
761         int nchildren, lnchildren, n, move, ncblk;
762
763         node = nilfs_btree_get_nonroot_node(path, level);
764         left = nilfs_btree_get_sib_node(path, level);
765         nchildren = nilfs_btree_node_get_nchildren(node);
766         lnchildren = nilfs_btree_node_get_nchildren(left);
767         ncblk = nilfs_btree_nchildren_per_block(btree);
768         move = 0;
769
770         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
771         if (n > path[level].bp_index) {
772                 /* move insert point */
773                 n--;
774                 move = 1;
775         }
776
777         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
778
779         if (!buffer_dirty(path[level].bp_bh))
780                 mark_buffer_dirty(path[level].bp_bh);
781         if (!buffer_dirty(path[level].bp_sib_bh))
782                 mark_buffer_dirty(path[level].bp_sib_bh);
783
784         nilfs_btree_promote_key(btree, path, level + 1,
785                                 nilfs_btree_node_get_key(node, 0));
786
787         if (move) {
788                 brelse(path[level].bp_bh);
789                 path[level].bp_bh = path[level].bp_sib_bh;
790                 path[level].bp_sib_bh = NULL;
791                 path[level].bp_index += lnchildren;
792                 path[level + 1].bp_index--;
793         } else {
794                 brelse(path[level].bp_sib_bh);
795                 path[level].bp_sib_bh = NULL;
796                 path[level].bp_index -= n;
797         }
798
799         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
800 }
801
802 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
803                                     struct nilfs_btree_path *path,
804                                     int level, __u64 *keyp, __u64 *ptrp)
805 {
806         struct nilfs_btree_node *node, *right;
807         int nchildren, rnchildren, n, move, ncblk;
808
809         node = nilfs_btree_get_nonroot_node(path, level);
810         right = nilfs_btree_get_sib_node(path, level);
811         nchildren = nilfs_btree_node_get_nchildren(node);
812         rnchildren = nilfs_btree_node_get_nchildren(right);
813         ncblk = nilfs_btree_nchildren_per_block(btree);
814         move = 0;
815
816         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
817         if (n > nchildren - path[level].bp_index) {
818                 /* move insert point */
819                 n--;
820                 move = 1;
821         }
822
823         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
824
825         if (!buffer_dirty(path[level].bp_bh))
826                 mark_buffer_dirty(path[level].bp_bh);
827         if (!buffer_dirty(path[level].bp_sib_bh))
828                 mark_buffer_dirty(path[level].bp_sib_bh);
829
830         path[level + 1].bp_index++;
831         nilfs_btree_promote_key(btree, path, level + 1,
832                                 nilfs_btree_node_get_key(right, 0));
833         path[level + 1].bp_index--;
834
835         if (move) {
836                 brelse(path[level].bp_bh);
837                 path[level].bp_bh = path[level].bp_sib_bh;
838                 path[level].bp_sib_bh = NULL;
839                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
840                 path[level + 1].bp_index++;
841         } else {
842                 brelse(path[level].bp_sib_bh);
843                 path[level].bp_sib_bh = NULL;
844         }
845
846         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
847 }
848
849 static void nilfs_btree_split(struct nilfs_bmap *btree,
850                               struct nilfs_btree_path *path,
851                               int level, __u64 *keyp, __u64 *ptrp)
852 {
853         struct nilfs_btree_node *node, *right;
854         __u64 newkey;
855         __u64 newptr;
856         int nchildren, n, move, ncblk;
857
858         node = nilfs_btree_get_nonroot_node(path, level);
859         right = nilfs_btree_get_sib_node(path, level);
860         nchildren = nilfs_btree_node_get_nchildren(node);
861         ncblk = nilfs_btree_nchildren_per_block(btree);
862         move = 0;
863
864         n = (nchildren + 1) / 2;
865         if (n > nchildren - path[level].bp_index) {
866                 n--;
867                 move = 1;
868         }
869
870         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
871
872         if (!buffer_dirty(path[level].bp_bh))
873                 mark_buffer_dirty(path[level].bp_bh);
874         if (!buffer_dirty(path[level].bp_sib_bh))
875                 mark_buffer_dirty(path[level].bp_sib_bh);
876
877         newkey = nilfs_btree_node_get_key(right, 0);
878         newptr = path[level].bp_newreq.bpr_ptr;
879
880         if (move) {
881                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
882                 nilfs_btree_node_insert(right, path[level].bp_index,
883                                         *keyp, *ptrp, ncblk);
884
885                 *keyp = nilfs_btree_node_get_key(right, 0);
886                 *ptrp = path[level].bp_newreq.bpr_ptr;
887
888                 brelse(path[level].bp_bh);
889                 path[level].bp_bh = path[level].bp_sib_bh;
890                 path[level].bp_sib_bh = NULL;
891         } else {
892                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
893
894                 *keyp = nilfs_btree_node_get_key(right, 0);
895                 *ptrp = path[level].bp_newreq.bpr_ptr;
896
897                 brelse(path[level].bp_sib_bh);
898                 path[level].bp_sib_bh = NULL;
899         }
900
901         path[level + 1].bp_index++;
902 }
903
904 static void nilfs_btree_grow(struct nilfs_bmap *btree,
905                              struct nilfs_btree_path *path,
906                              int level, __u64 *keyp, __u64 *ptrp)
907 {
908         struct nilfs_btree_node *root, *child;
909         int n, ncblk;
910
911         root = nilfs_btree_get_root(btree);
912         child = nilfs_btree_get_sib_node(path, level);
913         ncblk = nilfs_btree_nchildren_per_block(btree);
914
915         n = nilfs_btree_node_get_nchildren(root);
916
917         nilfs_btree_node_move_right(root, child, n,
918                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
919         nilfs_btree_node_set_level(root, level + 1);
920
921         if (!buffer_dirty(path[level].bp_sib_bh))
922                 mark_buffer_dirty(path[level].bp_sib_bh);
923
924         path[level].bp_bh = path[level].bp_sib_bh;
925         path[level].bp_sib_bh = NULL;
926
927         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
928
929         *keyp = nilfs_btree_node_get_key(child, 0);
930         *ptrp = path[level].bp_newreq.bpr_ptr;
931 }
932
933 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
934                                    const struct nilfs_btree_path *path)
935 {
936         struct nilfs_btree_node *node;
937         int level, ncmax;
938
939         if (path == NULL)
940                 return NILFS_BMAP_INVALID_PTR;
941
942         /* left sibling */
943         level = NILFS_BTREE_LEVEL_NODE_MIN;
944         if (path[level].bp_index > 0) {
945                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
946                 return nilfs_btree_node_get_ptr(node,
947                                                 path[level].bp_index - 1,
948                                                 ncmax);
949         }
950
951         /* parent */
952         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
953         if (level <= nilfs_btree_height(btree) - 1) {
954                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
955                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
956                                                 ncmax);
957         }
958
959         return NILFS_BMAP_INVALID_PTR;
960 }
961
962 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
963                                        const struct nilfs_btree_path *path,
964                                        __u64 key)
965 {
966         __u64 ptr;
967
968         ptr = nilfs_bmap_find_target_seq(btree, key);
969         if (ptr != NILFS_BMAP_INVALID_PTR)
970                 /* sequential access */
971                 return ptr;
972         else {
973                 ptr = nilfs_btree_find_near(btree, path);
974                 if (ptr != NILFS_BMAP_INVALID_PTR)
975                         /* near */
976                         return ptr;
977         }
978         /* block group */
979         return nilfs_bmap_find_target_in_group(btree);
980 }
981
982 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
983                                       struct nilfs_btree_path *path,
984                                       int *levelp, __u64 key, __u64 ptr,
985                                       struct nilfs_bmap_stats *stats)
986 {
987         struct buffer_head *bh;
988         struct nilfs_btree_node *node, *parent, *sib;
989         __u64 sibptr;
990         int pindex, level, ncmax, ncblk, ret;
991         struct inode *dat = NULL;
992
993         stats->bs_nblocks = 0;
994         level = NILFS_BTREE_LEVEL_DATA;
995
996         /* allocate a new ptr for data block */
997         if (NILFS_BMAP_USE_VBN(btree)) {
998                 path[level].bp_newreq.bpr_ptr =
999                         nilfs_btree_find_target_v(btree, path, key);
1000                 dat = nilfs_bmap_get_dat(btree);
1001         }
1002
1003         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1004         if (ret < 0)
1005                 goto err_out_data;
1006
1007         ncblk = nilfs_btree_nchildren_per_block(btree);
1008
1009         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1010              level < nilfs_btree_height(btree) - 1;
1011              level++) {
1012                 node = nilfs_btree_get_nonroot_node(path, level);
1013                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1014                         path[level].bp_op = nilfs_btree_do_insert;
1015                         stats->bs_nblocks++;
1016                         goto out;
1017                 }
1018
1019                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1020                 pindex = path[level + 1].bp_index;
1021
1022                 /* left sibling */
1023                 if (pindex > 0) {
1024                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1025                                                           ncmax);
1026                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1027                         if (ret < 0)
1028                                 goto err_out_child_node;
1029                         sib = (struct nilfs_btree_node *)bh->b_data;
1030                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1031                                 path[level].bp_sib_bh = bh;
1032                                 path[level].bp_op = nilfs_btree_carry_left;
1033                                 stats->bs_nblocks++;
1034                                 goto out;
1035                         } else {
1036                                 brelse(bh);
1037                         }
1038                 }
1039
1040                 /* right sibling */
1041                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1042                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1043                                                           ncmax);
1044                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1045                         if (ret < 0)
1046                                 goto err_out_child_node;
1047                         sib = (struct nilfs_btree_node *)bh->b_data;
1048                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1049                                 path[level].bp_sib_bh = bh;
1050                                 path[level].bp_op = nilfs_btree_carry_right;
1051                                 stats->bs_nblocks++;
1052                                 goto out;
1053                         } else {
1054                                 brelse(bh);
1055                         }
1056                 }
1057
1058                 /* split */
1059                 path[level].bp_newreq.bpr_ptr =
1060                         path[level - 1].bp_newreq.bpr_ptr + 1;
1061                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1062                                                    &path[level].bp_newreq, dat);
1063                 if (ret < 0)
1064                         goto err_out_child_node;
1065                 ret = nilfs_btree_get_new_block(btree,
1066                                                 path[level].bp_newreq.bpr_ptr,
1067                                                 &bh);
1068                 if (ret < 0)
1069                         goto err_out_curr_node;
1070
1071                 stats->bs_nblocks++;
1072
1073                 sib = (struct nilfs_btree_node *)bh->b_data;
1074                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1075                 path[level].bp_sib_bh = bh;
1076                 path[level].bp_op = nilfs_btree_split;
1077         }
1078
1079         /* root */
1080         node = nilfs_btree_get_root(btree);
1081         if (nilfs_btree_node_get_nchildren(node) <
1082             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1083                 path[level].bp_op = nilfs_btree_do_insert;
1084                 stats->bs_nblocks++;
1085                 goto out;
1086         }
1087
1088         /* grow */
1089         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1090         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1091         if (ret < 0)
1092                 goto err_out_child_node;
1093         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1094                                         &bh);
1095         if (ret < 0)
1096                 goto err_out_curr_node;
1097
1098         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1099                               0, level, 0, ncblk, NULL, NULL);
1100         path[level].bp_sib_bh = bh;
1101         path[level].bp_op = nilfs_btree_grow;
1102
1103         level++;
1104         path[level].bp_op = nilfs_btree_do_insert;
1105
1106         /* a newly-created node block and a data block are added */
1107         stats->bs_nblocks += 2;
1108
1109         /* success */
1110  out:
1111         *levelp = level;
1112         return ret;
1113
1114         /* error */
1115  err_out_curr_node:
1116         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1117  err_out_child_node:
1118         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1119                 nilfs_btnode_delete(path[level].bp_sib_bh);
1120                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1121
1122         }
1123
1124         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1125  err_out_data:
1126         *levelp = level;
1127         stats->bs_nblocks = 0;
1128         return ret;
1129 }
1130
1131 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1132                                       struct nilfs_btree_path *path,
1133                                       int maxlevel, __u64 key, __u64 ptr)
1134 {
1135         struct inode *dat = NULL;
1136         int level;
1137
1138         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1139         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1140         if (NILFS_BMAP_USE_VBN(btree)) {
1141                 nilfs_bmap_set_target_v(btree, key, ptr);
1142                 dat = nilfs_bmap_get_dat(btree);
1143         }
1144
1145         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1146                 nilfs_bmap_commit_alloc_ptr(btree,
1147                                             &path[level - 1].bp_newreq, dat);
1148                 path[level].bp_op(btree, path, level, &key, &ptr);
1149         }
1150
1151         if (!nilfs_bmap_dirty(btree))
1152                 nilfs_bmap_set_dirty(btree);
1153 }
1154
1155 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1156 {
1157         struct nilfs_btree_path *path;
1158         struct nilfs_bmap_stats stats;
1159         int level, ret;
1160
1161         path = nilfs_btree_alloc_path();
1162         if (path == NULL)
1163                 return -ENOMEM;
1164
1165         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1166                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1167         if (ret != -ENOENT) {
1168                 if (ret == 0)
1169                         ret = -EEXIST;
1170                 goto out;
1171         }
1172
1173         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1174         if (ret < 0)
1175                 goto out;
1176         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1177         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1178
1179  out:
1180         nilfs_btree_free_path(path);
1181         return ret;
1182 }
1183
1184 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1185                                   struct nilfs_btree_path *path,
1186                                   int level, __u64 *keyp, __u64 *ptrp)
1187 {
1188         struct nilfs_btree_node *node;
1189         int ncblk;
1190
1191         if (level < nilfs_btree_height(btree) - 1) {
1192                 node = nilfs_btree_get_nonroot_node(path, level);
1193                 ncblk = nilfs_btree_nchildren_per_block(btree);
1194                 nilfs_btree_node_delete(node, path[level].bp_index,
1195                                         keyp, ptrp, ncblk);
1196                 if (!buffer_dirty(path[level].bp_bh))
1197                         mark_buffer_dirty(path[level].bp_bh);
1198                 if (path[level].bp_index == 0)
1199                         nilfs_btree_promote_key(btree, path, level + 1,
1200                                 nilfs_btree_node_get_key(node, 0));
1201         } else {
1202                 node = nilfs_btree_get_root(btree);
1203                 nilfs_btree_node_delete(node, path[level].bp_index,
1204                                         keyp, ptrp,
1205                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1206         }
1207 }
1208
1209 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1210                                     struct nilfs_btree_path *path,
1211                                     int level, __u64 *keyp, __u64 *ptrp)
1212 {
1213         struct nilfs_btree_node *node, *left;
1214         int nchildren, lnchildren, n, ncblk;
1215
1216         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1217
1218         node = nilfs_btree_get_nonroot_node(path, level);
1219         left = nilfs_btree_get_sib_node(path, level);
1220         nchildren = nilfs_btree_node_get_nchildren(node);
1221         lnchildren = nilfs_btree_node_get_nchildren(left);
1222         ncblk = nilfs_btree_nchildren_per_block(btree);
1223
1224         n = (nchildren + lnchildren) / 2 - nchildren;
1225
1226         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1227
1228         if (!buffer_dirty(path[level].bp_bh))
1229                 mark_buffer_dirty(path[level].bp_bh);
1230         if (!buffer_dirty(path[level].bp_sib_bh))
1231                 mark_buffer_dirty(path[level].bp_sib_bh);
1232
1233         nilfs_btree_promote_key(btree, path, level + 1,
1234                                 nilfs_btree_node_get_key(node, 0));
1235
1236         brelse(path[level].bp_sib_bh);
1237         path[level].bp_sib_bh = NULL;
1238         path[level].bp_index += n;
1239 }
1240
1241 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1242                                      struct nilfs_btree_path *path,
1243                                      int level, __u64 *keyp, __u64 *ptrp)
1244 {
1245         struct nilfs_btree_node *node, *right;
1246         int nchildren, rnchildren, n, ncblk;
1247
1248         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1249
1250         node = nilfs_btree_get_nonroot_node(path, level);
1251         right = nilfs_btree_get_sib_node(path, level);
1252         nchildren = nilfs_btree_node_get_nchildren(node);
1253         rnchildren = nilfs_btree_node_get_nchildren(right);
1254         ncblk = nilfs_btree_nchildren_per_block(btree);
1255
1256         n = (nchildren + rnchildren) / 2 - nchildren;
1257
1258         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1259
1260         if (!buffer_dirty(path[level].bp_bh))
1261                 mark_buffer_dirty(path[level].bp_bh);
1262         if (!buffer_dirty(path[level].bp_sib_bh))
1263                 mark_buffer_dirty(path[level].bp_sib_bh);
1264
1265         path[level + 1].bp_index++;
1266         nilfs_btree_promote_key(btree, path, level + 1,
1267                                 nilfs_btree_node_get_key(right, 0));
1268         path[level + 1].bp_index--;
1269
1270         brelse(path[level].bp_sib_bh);
1271         path[level].bp_sib_bh = NULL;
1272 }
1273
1274 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1275                                     struct nilfs_btree_path *path,
1276                                     int level, __u64 *keyp, __u64 *ptrp)
1277 {
1278         struct nilfs_btree_node *node, *left;
1279         int n, ncblk;
1280
1281         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1282
1283         node = nilfs_btree_get_nonroot_node(path, level);
1284         left = nilfs_btree_get_sib_node(path, level);
1285         ncblk = nilfs_btree_nchildren_per_block(btree);
1286
1287         n = nilfs_btree_node_get_nchildren(node);
1288
1289         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1290
1291         if (!buffer_dirty(path[level].bp_sib_bh))
1292                 mark_buffer_dirty(path[level].bp_sib_bh);
1293
1294         nilfs_btnode_delete(path[level].bp_bh);
1295         path[level].bp_bh = path[level].bp_sib_bh;
1296         path[level].bp_sib_bh = NULL;
1297         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1298 }
1299
1300 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1301                                      struct nilfs_btree_path *path,
1302                                      int level, __u64 *keyp, __u64 *ptrp)
1303 {
1304         struct nilfs_btree_node *node, *right;
1305         int n, ncblk;
1306
1307         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1308
1309         node = nilfs_btree_get_nonroot_node(path, level);
1310         right = nilfs_btree_get_sib_node(path, level);
1311         ncblk = nilfs_btree_nchildren_per_block(btree);
1312
1313         n = nilfs_btree_node_get_nchildren(right);
1314
1315         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1316
1317         if (!buffer_dirty(path[level].bp_bh))
1318                 mark_buffer_dirty(path[level].bp_bh);
1319
1320         nilfs_btnode_delete(path[level].bp_sib_bh);
1321         path[level].bp_sib_bh = NULL;
1322         path[level + 1].bp_index++;
1323 }
1324
1325 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1326                                struct nilfs_btree_path *path,
1327                                int level, __u64 *keyp, __u64 *ptrp)
1328 {
1329         struct nilfs_btree_node *root, *child;
1330         int n, ncblk;
1331
1332         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1333
1334         root = nilfs_btree_get_root(btree);
1335         child = nilfs_btree_get_nonroot_node(path, level);
1336         ncblk = nilfs_btree_nchildren_per_block(btree);
1337
1338         nilfs_btree_node_delete(root, 0, NULL, NULL,
1339                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1340         nilfs_btree_node_set_level(root, level);
1341         n = nilfs_btree_node_get_nchildren(child);
1342         nilfs_btree_node_move_left(root, child, n,
1343                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1344
1345         nilfs_btnode_delete(path[level].bp_bh);
1346         path[level].bp_bh = NULL;
1347 }
1348
1349 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1350                             struct nilfs_btree_path *path,
1351                             int level, __u64 *keyp, __u64 *ptrp)
1352 {
1353 }
1354
1355 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1356                                       struct nilfs_btree_path *path,
1357                                       int *levelp,
1358                                       struct nilfs_bmap_stats *stats,
1359                                       struct inode *dat)
1360 {
1361         struct buffer_head *bh;
1362         struct nilfs_btree_node *node, *parent, *sib;
1363         __u64 sibptr;
1364         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1365
1366         ret = 0;
1367         stats->bs_nblocks = 0;
1368         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1369         ncblk = nilfs_btree_nchildren_per_block(btree);
1370
1371         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1372              level < nilfs_btree_height(btree) - 1;
1373              level++) {
1374                 node = nilfs_btree_get_nonroot_node(path, level);
1375                 path[level].bp_oldreq.bpr_ptr =
1376                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1377                 ret = nilfs_bmap_prepare_end_ptr(btree,
1378                                                  &path[level].bp_oldreq, dat);
1379                 if (ret < 0)
1380                         goto err_out_child_node;
1381
1382                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1383                         path[level].bp_op = nilfs_btree_do_delete;
1384                         stats->bs_nblocks++;
1385                         goto out;
1386                 }
1387
1388                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1389                 pindex = path[level + 1].bp_index;
1390                 dindex = pindex;
1391
1392                 if (pindex > 0) {
1393                         /* left sibling */
1394                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1395                                                           ncmax);
1396                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1397                         if (ret < 0)
1398                                 goto err_out_curr_node;
1399                         sib = (struct nilfs_btree_node *)bh->b_data;
1400                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1401                                 path[level].bp_sib_bh = bh;
1402                                 path[level].bp_op = nilfs_btree_borrow_left;
1403                                 stats->bs_nblocks++;
1404                                 goto out;
1405                         } else {
1406                                 path[level].bp_sib_bh = bh;
1407                                 path[level].bp_op = nilfs_btree_concat_left;
1408                                 stats->bs_nblocks++;
1409                                 /* continue; */
1410                         }
1411                 } else if (pindex <
1412                            nilfs_btree_node_get_nchildren(parent) - 1) {
1413                         /* right sibling */
1414                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1415                                                           ncmax);
1416                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1417                         if (ret < 0)
1418                                 goto err_out_curr_node;
1419                         sib = (struct nilfs_btree_node *)bh->b_data;
1420                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1421                                 path[level].bp_sib_bh = bh;
1422                                 path[level].bp_op = nilfs_btree_borrow_right;
1423                                 stats->bs_nblocks++;
1424                                 goto out;
1425                         } else {
1426                                 path[level].bp_sib_bh = bh;
1427                                 path[level].bp_op = nilfs_btree_concat_right;
1428                                 stats->bs_nblocks++;
1429                                 /*
1430                                  * When merging right sibling node
1431                                  * into the current node, pointer to
1432                                  * the right sibling node must be
1433                                  * terminated instead.  The adjustment
1434                                  * below is required for that.
1435                                  */
1436                                 dindex = pindex + 1;
1437                                 /* continue; */
1438                         }
1439                 } else {
1440                         /* no siblings */
1441                         /* the only child of the root node */
1442                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1443                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1444                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1445                                 path[level].bp_op = nilfs_btree_shrink;
1446                                 stats->bs_nblocks += 2;
1447                                 level++;
1448                                 path[level].bp_op = nilfs_btree_nop;
1449                                 goto shrink_root_child;
1450                         } else {
1451                                 path[level].bp_op = nilfs_btree_do_delete;
1452                                 stats->bs_nblocks++;
1453                                 goto out;
1454                         }
1455                 }
1456         }
1457
1458         /* child of the root node is deleted */
1459         path[level].bp_op = nilfs_btree_do_delete;
1460         stats->bs_nblocks++;
1461
1462 shrink_root_child:
1463         node = nilfs_btree_get_root(btree);
1464         path[level].bp_oldreq.bpr_ptr =
1465                 nilfs_btree_node_get_ptr(node, dindex,
1466                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1467
1468         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1469         if (ret < 0)
1470                 goto err_out_child_node;
1471
1472         /* success */
1473  out:
1474         *levelp = level;
1475         return ret;
1476
1477         /* error */
1478  err_out_curr_node:
1479         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1480  err_out_child_node:
1481         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1482                 brelse(path[level].bp_sib_bh);
1483                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1484         }
1485         *levelp = level;
1486         stats->bs_nblocks = 0;
1487         return ret;
1488 }
1489
1490 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1491                                       struct nilfs_btree_path *path,
1492                                       int maxlevel, struct inode *dat)
1493 {
1494         int level;
1495
1496         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1497                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1498                 path[level].bp_op(btree, path, level, NULL, NULL);
1499         }
1500
1501         if (!nilfs_bmap_dirty(btree))
1502                 nilfs_bmap_set_dirty(btree);
1503 }
1504
1505 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1506
1507 {
1508         struct nilfs_btree_path *path;
1509         struct nilfs_bmap_stats stats;
1510         struct inode *dat;
1511         int level, ret;
1512
1513         path = nilfs_btree_alloc_path();
1514         if (path == NULL)
1515                 return -ENOMEM;
1516
1517         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1518                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1519         if (ret < 0)
1520                 goto out;
1521
1522
1523         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1524
1525         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1526         if (ret < 0)
1527                 goto out;
1528         nilfs_btree_commit_delete(btree, path, level, dat);
1529         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1530
1531 out:
1532         nilfs_btree_free_path(path);
1533         return ret;
1534 }
1535
1536 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1537 {
1538         struct nilfs_btree_path *path;
1539         int ret;
1540
1541         path = nilfs_btree_alloc_path();
1542         if (path == NULL)
1543                 return -ENOMEM;
1544
1545         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1546
1547         nilfs_btree_free_path(path);
1548
1549         return ret;
1550 }
1551
1552 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1553 {
1554         struct buffer_head *bh;
1555         struct nilfs_btree_node *root, *node;
1556         __u64 maxkey, nextmaxkey;
1557         __u64 ptr;
1558         int nchildren, ret;
1559
1560         root = nilfs_btree_get_root(btree);
1561         switch (nilfs_btree_height(btree)) {
1562         case 2:
1563                 bh = NULL;
1564                 node = root;
1565                 break;
1566         case 3:
1567                 nchildren = nilfs_btree_node_get_nchildren(root);
1568                 if (nchildren > 1)
1569                         return 0;
1570                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1571                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1572                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1573                 if (ret < 0)
1574                         return ret;
1575                 node = (struct nilfs_btree_node *)bh->b_data;
1576                 break;
1577         default:
1578                 return 0;
1579         }
1580
1581         nchildren = nilfs_btree_node_get_nchildren(node);
1582         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1583         nextmaxkey = (nchildren > 1) ?
1584                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1585         if (bh != NULL)
1586                 brelse(bh);
1587
1588         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1589 }
1590
1591 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1592                                    __u64 *keys, __u64 *ptrs, int nitems)
1593 {
1594         struct buffer_head *bh;
1595         struct nilfs_btree_node *node, *root;
1596         __le64 *dkeys;
1597         __le64 *dptrs;
1598         __u64 ptr;
1599         int nchildren, ncmax, i, ret;
1600
1601         root = nilfs_btree_get_root(btree);
1602         switch (nilfs_btree_height(btree)) {
1603         case 2:
1604                 bh = NULL;
1605                 node = root;
1606                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1607                 break;
1608         case 3:
1609                 nchildren = nilfs_btree_node_get_nchildren(root);
1610                 WARN_ON(nchildren > 1);
1611                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1612                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1613                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1614                 if (ret < 0)
1615                         return ret;
1616                 node = (struct nilfs_btree_node *)bh->b_data;
1617                 ncmax = nilfs_btree_nchildren_per_block(btree);
1618                 break;
1619         default:
1620                 node = NULL;
1621                 return -EINVAL;
1622         }
1623
1624         nchildren = nilfs_btree_node_get_nchildren(node);
1625         if (nchildren < nitems)
1626                 nitems = nchildren;
1627         dkeys = nilfs_btree_node_dkeys(node);
1628         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1629         for (i = 0; i < nitems; i++) {
1630                 keys[i] = le64_to_cpu(dkeys[i]);
1631                 ptrs[i] = le64_to_cpu(dptrs[i]);
1632         }
1633
1634         if (bh != NULL)
1635                 brelse(bh);
1636
1637         return nitems;
1638 }
1639
1640 static int
1641 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1642                                        union nilfs_bmap_ptr_req *dreq,
1643                                        union nilfs_bmap_ptr_req *nreq,
1644                                        struct buffer_head **bhp,
1645                                        struct nilfs_bmap_stats *stats)
1646 {
1647         struct buffer_head *bh;
1648         struct inode *dat = NULL;
1649         int ret;
1650
1651         stats->bs_nblocks = 0;
1652
1653         /* for data */
1654         /* cannot find near ptr */
1655         if (NILFS_BMAP_USE_VBN(btree)) {
1656                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1657                 dat = nilfs_bmap_get_dat(btree);
1658         }
1659
1660         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1661         if (ret < 0)
1662                 return ret;
1663
1664         *bhp = NULL;
1665         stats->bs_nblocks++;
1666         if (nreq != NULL) {
1667                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1668                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1669                 if (ret < 0)
1670                         goto err_out_dreq;
1671
1672                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1673                 if (ret < 0)
1674                         goto err_out_nreq;
1675
1676                 *bhp = bh;
1677                 stats->bs_nblocks++;
1678         }
1679
1680         /* success */
1681         return 0;
1682
1683         /* error */
1684  err_out_nreq:
1685         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1686  err_out_dreq:
1687         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1688         stats->bs_nblocks = 0;
1689         return ret;
1690
1691 }
1692
1693 static void
1694 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1695                                       __u64 key, __u64 ptr,
1696                                       const __u64 *keys, const __u64 *ptrs,
1697                                       int n,
1698                                       union nilfs_bmap_ptr_req *dreq,
1699                                       union nilfs_bmap_ptr_req *nreq,
1700                                       struct buffer_head *bh)
1701 {
1702         struct nilfs_btree_node *node;
1703         struct inode *dat;
1704         __u64 tmpptr;
1705         int ncblk;
1706
1707         /* free resources */
1708         if (btree->b_ops->bop_clear != NULL)
1709                 btree->b_ops->bop_clear(btree);
1710
1711         /* ptr must be a pointer to a buffer head. */
1712         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1713
1714         /* convert and insert */
1715         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1716         nilfs_btree_init(btree);
1717         if (nreq != NULL) {
1718                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1719                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1720
1721                 /* create child node at level 1 */
1722                 node = (struct nilfs_btree_node *)bh->b_data;
1723                 ncblk = nilfs_btree_nchildren_per_block(btree);
1724                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1725                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1726                 if (!buffer_dirty(bh))
1727                         mark_buffer_dirty(bh);
1728                 if (!nilfs_bmap_dirty(btree))
1729                         nilfs_bmap_set_dirty(btree);
1730
1731                 brelse(bh);
1732
1733                 /* create root node at level 2 */
1734                 node = nilfs_btree_get_root(btree);
1735                 tmpptr = nreq->bpr_ptr;
1736                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1737                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1738                                       &keys[0], &tmpptr);
1739         } else {
1740                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1741
1742                 /* create root node at level 1 */
1743                 node = nilfs_btree_get_root(btree);
1744                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1745                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1746                                       keys, ptrs);
1747                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1748                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1749                 if (!nilfs_bmap_dirty(btree))
1750                         nilfs_bmap_set_dirty(btree);
1751         }
1752
1753         if (NILFS_BMAP_USE_VBN(btree))
1754                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1755 }
1756
1757 /**
1758  * nilfs_btree_convert_and_insert -
1759  * @bmap:
1760  * @key:
1761  * @ptr:
1762  * @keys:
1763  * @ptrs:
1764  * @n:
1765  */
1766 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1767                                    __u64 key, __u64 ptr,
1768                                    const __u64 *keys, const __u64 *ptrs, int n)
1769 {
1770         struct buffer_head *bh;
1771         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1772         struct nilfs_bmap_stats stats;
1773         int ret;
1774
1775         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1776                 di = &dreq;
1777                 ni = NULL;
1778         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1779                            1 << btree->b_inode->i_blkbits)) {
1780                 di = &dreq;
1781                 ni = &nreq;
1782         } else {
1783                 di = NULL;
1784                 ni = NULL;
1785                 BUG();
1786         }
1787
1788         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1789                                                      &stats);
1790         if (ret < 0)
1791                 return ret;
1792         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1793                                               di, ni, bh);
1794         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1795         return 0;
1796 }
1797
1798 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1799                                    struct nilfs_btree_path *path,
1800                                    int level,
1801                                    struct buffer_head *bh)
1802 {
1803         while ((++level < nilfs_btree_height(btree) - 1) &&
1804                !buffer_dirty(path[level].bp_bh))
1805                 mark_buffer_dirty(path[level].bp_bh);
1806
1807         return 0;
1808 }
1809
1810 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1811                                         struct nilfs_btree_path *path,
1812                                         int level, struct inode *dat)
1813 {
1814         struct nilfs_btree_node *parent;
1815         int ncmax, ret;
1816
1817         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1818         path[level].bp_oldreq.bpr_ptr =
1819                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1820                                          ncmax);
1821         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1822         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1823                                        &path[level].bp_newreq.bpr_req);
1824         if (ret < 0)
1825                 return ret;
1826
1827         if (buffer_nilfs_node(path[level].bp_bh)) {
1828                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1829                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1830                 path[level].bp_ctxt.bh = path[level].bp_bh;
1831                 ret = nilfs_btnode_prepare_change_key(
1832                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1833                         &path[level].bp_ctxt);
1834                 if (ret < 0) {
1835                         nilfs_dat_abort_update(dat,
1836                                                &path[level].bp_oldreq.bpr_req,
1837                                                &path[level].bp_newreq.bpr_req);
1838                         return ret;
1839                 }
1840         }
1841
1842         return 0;
1843 }
1844
1845 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1846                                         struct nilfs_btree_path *path,
1847                                         int level, struct inode *dat)
1848 {
1849         struct nilfs_btree_node *parent;
1850         int ncmax;
1851
1852         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1853                                 &path[level].bp_newreq.bpr_req,
1854                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1855
1856         if (buffer_nilfs_node(path[level].bp_bh)) {
1857                 nilfs_btnode_commit_change_key(
1858                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1859                         &path[level].bp_ctxt);
1860                 path[level].bp_bh = path[level].bp_ctxt.bh;
1861         }
1862         set_buffer_nilfs_volatile(path[level].bp_bh);
1863
1864         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1865         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1866                                  path[level].bp_newreq.bpr_ptr, ncmax);
1867 }
1868
1869 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1870                                        struct nilfs_btree_path *path,
1871                                        int level, struct inode *dat)
1872 {
1873         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1874                                &path[level].bp_newreq.bpr_req);
1875         if (buffer_nilfs_node(path[level].bp_bh))
1876                 nilfs_btnode_abort_change_key(
1877                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1878                         &path[level].bp_ctxt);
1879 }
1880
1881 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1882                                            struct nilfs_btree_path *path,
1883                                            int minlevel, int *maxlevelp,
1884                                            struct inode *dat)
1885 {
1886         int level, ret;
1887
1888         level = minlevel;
1889         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1890                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1891                 if (ret < 0)
1892                         return ret;
1893         }
1894         while ((++level < nilfs_btree_height(btree) - 1) &&
1895                !buffer_dirty(path[level].bp_bh)) {
1896
1897                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1898                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1899                 if (ret < 0)
1900                         goto out;
1901         }
1902
1903         /* success */
1904         *maxlevelp = level - 1;
1905         return 0;
1906
1907         /* error */
1908  out:
1909         while (--level > minlevel)
1910                 nilfs_btree_abort_update_v(btree, path, level, dat);
1911         if (!buffer_nilfs_volatile(path[level].bp_bh))
1912                 nilfs_btree_abort_update_v(btree, path, level, dat);
1913         return ret;
1914 }
1915
1916 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
1917                                            struct nilfs_btree_path *path,
1918                                            int minlevel, int maxlevel,
1919                                            struct buffer_head *bh,
1920                                            struct inode *dat)
1921 {
1922         int level;
1923
1924         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1925                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1926
1927         for (level = minlevel + 1; level <= maxlevel; level++)
1928                 nilfs_btree_commit_update_v(btree, path, level, dat);
1929 }
1930
1931 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
1932                                    struct nilfs_btree_path *path,
1933                                    int level, struct buffer_head *bh)
1934 {
1935         int maxlevel = 0, ret;
1936         struct nilfs_btree_node *parent;
1937         struct inode *dat = nilfs_bmap_get_dat(btree);
1938         __u64 ptr;
1939         int ncmax;
1940
1941         get_bh(bh);
1942         path[level].bp_bh = bh;
1943         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1944                                               dat);
1945         if (ret < 0)
1946                 goto out;
1947
1948         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1949                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1950                 ptr = nilfs_btree_node_get_ptr(parent,
1951                                                path[level + 1].bp_index,
1952                                                ncmax);
1953                 ret = nilfs_dat_mark_dirty(dat, ptr);
1954                 if (ret < 0)
1955                         goto out;
1956         }
1957
1958         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1959
1960  out:
1961         brelse(path[level].bp_bh);
1962         path[level].bp_bh = NULL;
1963         return ret;
1964 }
1965
1966 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
1967                                  struct buffer_head *bh)
1968 {
1969         struct nilfs_btree_path *path;
1970         struct nilfs_btree_node *node;
1971         __u64 key;
1972         int level, ret;
1973
1974         WARN_ON(!buffer_dirty(bh));
1975
1976         path = nilfs_btree_alloc_path();
1977         if (path == NULL)
1978                 return -ENOMEM;
1979
1980         if (buffer_nilfs_node(bh)) {
1981                 node = (struct nilfs_btree_node *)bh->b_data;
1982                 key = nilfs_btree_node_get_key(node, 0);
1983                 level = nilfs_btree_node_get_level(node);
1984         } else {
1985                 key = nilfs_bmap_data_get_key(btree, bh);
1986                 level = NILFS_BTREE_LEVEL_DATA;
1987         }
1988
1989         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
1990         if (ret < 0) {
1991                 if (unlikely(ret == -ENOENT))
1992                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1993                                __func__, (unsigned long long)key, level);
1994                 goto out;
1995         }
1996
1997         ret = NILFS_BMAP_USE_VBN(btree) ?
1998                 nilfs_btree_propagate_v(btree, path, level, bh) :
1999                 nilfs_btree_propagate_p(btree, path, level, bh);
2000
2001  out:
2002         nilfs_btree_free_path(path);
2003
2004         return ret;
2005 }
2006
2007 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2008                                     struct buffer_head *bh)
2009 {
2010         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2011 }
2012
2013 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2014                                          struct list_head *lists,
2015                                          struct buffer_head *bh)
2016 {
2017         struct list_head *head;
2018         struct buffer_head *cbh;
2019         struct nilfs_btree_node *node, *cnode;
2020         __u64 key, ckey;
2021         int level;
2022
2023         get_bh(bh);
2024         node = (struct nilfs_btree_node *)bh->b_data;
2025         key = nilfs_btree_node_get_key(node, 0);
2026         level = nilfs_btree_node_get_level(node);
2027         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2028             level >= NILFS_BTREE_LEVEL_MAX) {
2029                 dump_stack();
2030                 printk(KERN_WARNING
2031                        "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2032                        "blocknr=%llu)\n",
2033                        __func__, level, (unsigned long long)key,
2034                        NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2035                        (unsigned long long)bh->b_blocknr);
2036                 return;
2037         }
2038
2039         list_for_each(head, &lists[level]) {
2040                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2041                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2042                 ckey = nilfs_btree_node_get_key(cnode, 0);
2043                 if (key < ckey)
2044                         break;
2045         }
2046         list_add_tail(&bh->b_assoc_buffers, head);
2047 }
2048
2049 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2050                                              struct list_head *listp)
2051 {
2052         struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2053         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2054         struct pagevec pvec;
2055         struct buffer_head *bh, *head;
2056         pgoff_t index = 0;
2057         int level, i;
2058
2059         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2060              level < NILFS_BTREE_LEVEL_MAX;
2061              level++)
2062                 INIT_LIST_HEAD(&lists[level]);
2063
2064         pagevec_init(&pvec, 0);
2065
2066         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2067                                   PAGEVEC_SIZE)) {
2068                 for (i = 0; i < pagevec_count(&pvec); i++) {
2069                         bh = head = page_buffers(pvec.pages[i]);
2070                         do {
2071                                 if (buffer_dirty(bh))
2072                                         nilfs_btree_add_dirty_buffer(btree,
2073                                                                      lists, bh);
2074                         } while ((bh = bh->b_this_page) != head);
2075                 }
2076                 pagevec_release(&pvec);
2077                 cond_resched();
2078         }
2079
2080         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2081              level < NILFS_BTREE_LEVEL_MAX;
2082              level++)
2083                 list_splice_tail(&lists[level], listp);
2084 }
2085
2086 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2087                                 struct nilfs_btree_path *path,
2088                                 int level,
2089                                 struct buffer_head **bh,
2090                                 sector_t blocknr,
2091                                 union nilfs_binfo *binfo)
2092 {
2093         struct nilfs_btree_node *parent;
2094         __u64 key;
2095         __u64 ptr;
2096         int ncmax, ret;
2097
2098         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2099         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2100                                        ncmax);
2101         if (buffer_nilfs_node(*bh)) {
2102                 path[level].bp_ctxt.oldkey = ptr;
2103                 path[level].bp_ctxt.newkey = blocknr;
2104                 path[level].bp_ctxt.bh = *bh;
2105                 ret = nilfs_btnode_prepare_change_key(
2106                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2107                         &path[level].bp_ctxt);
2108                 if (ret < 0)
2109                         return ret;
2110                 nilfs_btnode_commit_change_key(
2111                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2112                         &path[level].bp_ctxt);
2113                 *bh = path[level].bp_ctxt.bh;
2114         }
2115
2116         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2117                                  ncmax);
2118
2119         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2120         /* on-disk format */
2121         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2122         binfo->bi_dat.bi_level = level;
2123
2124         return 0;
2125 }
2126
2127 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2128                                 struct nilfs_btree_path *path,
2129                                 int level,
2130                                 struct buffer_head **bh,
2131                                 sector_t blocknr,
2132                                 union nilfs_binfo *binfo)
2133 {
2134         struct nilfs_btree_node *parent;
2135         struct inode *dat = nilfs_bmap_get_dat(btree);
2136         __u64 key;
2137         __u64 ptr;
2138         union nilfs_bmap_ptr_req req;
2139         int ncmax, ret;
2140
2141         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2142         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2143                                        ncmax);
2144         req.bpr_ptr = ptr;
2145         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2146         if (ret < 0)
2147                 return ret;
2148         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2149
2150         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2151         /* on-disk format */
2152         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2153         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2154
2155         return 0;
2156 }
2157
2158 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2159                               struct buffer_head **bh,
2160                               sector_t blocknr,
2161                               union nilfs_binfo *binfo)
2162 {
2163         struct nilfs_btree_path *path;
2164         struct nilfs_btree_node *node;
2165         __u64 key;
2166         int level, ret;
2167
2168         path = nilfs_btree_alloc_path();
2169         if (path == NULL)
2170                 return -ENOMEM;
2171
2172         if (buffer_nilfs_node(*bh)) {
2173                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2174                 key = nilfs_btree_node_get_key(node, 0);
2175                 level = nilfs_btree_node_get_level(node);
2176         } else {
2177                 key = nilfs_bmap_data_get_key(btree, *bh);
2178                 level = NILFS_BTREE_LEVEL_DATA;
2179         }
2180
2181         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2182         if (ret < 0) {
2183                 WARN_ON(ret == -ENOENT);
2184                 goto out;
2185         }
2186
2187         ret = NILFS_BMAP_USE_VBN(btree) ?
2188                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2189                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2190
2191  out:
2192         nilfs_btree_free_path(path);
2193
2194         return ret;
2195 }
2196
2197 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2198                                  struct buffer_head **bh,
2199                                  sector_t blocknr,
2200                                  union nilfs_binfo *binfo)
2201 {
2202         struct nilfs_btree_node *node;
2203         __u64 key;
2204         int ret;
2205
2206         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2207                              blocknr);
2208         if (ret < 0)
2209                 return ret;
2210
2211         if (buffer_nilfs_node(*bh)) {
2212                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2213                 key = nilfs_btree_node_get_key(node, 0);
2214         } else
2215                 key = nilfs_bmap_data_get_key(btree, *bh);
2216
2217         /* on-disk format */
2218         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2219         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2220
2221         return 0;
2222 }
2223
2224 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2225 {
2226         struct buffer_head *bh;
2227         struct nilfs_btree_path *path;
2228         __u64 ptr;
2229         int ret;
2230
2231         path = nilfs_btree_alloc_path();
2232         if (path == NULL)
2233                 return -ENOMEM;
2234
2235         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2236         if (ret < 0) {
2237                 WARN_ON(ret == -ENOENT);
2238                 goto out;
2239         }
2240         ret = nilfs_btree_get_block(btree, ptr, &bh);
2241         if (ret < 0) {
2242                 WARN_ON(ret == -ENOENT);
2243                 goto out;
2244         }
2245
2246         if (!buffer_dirty(bh))
2247                 mark_buffer_dirty(bh);
2248         brelse(bh);
2249         if (!nilfs_bmap_dirty(btree))
2250                 nilfs_bmap_set_dirty(btree);
2251
2252  out:
2253         nilfs_btree_free_path(path);
2254         return ret;
2255 }
2256
2257 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2258         .bop_lookup             =       nilfs_btree_lookup,
2259         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2260         .bop_insert             =       nilfs_btree_insert,
2261         .bop_delete             =       nilfs_btree_delete,
2262         .bop_clear              =       NULL,
2263
2264         .bop_propagate          =       nilfs_btree_propagate,
2265
2266         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2267
2268         .bop_assign             =       nilfs_btree_assign,
2269         .bop_mark               =       nilfs_btree_mark,
2270
2271         .bop_last_key           =       nilfs_btree_last_key,
2272         .bop_check_insert       =       NULL,
2273         .bop_check_delete       =       nilfs_btree_check_delete,
2274         .bop_gather_data        =       nilfs_btree_gather_data,
2275 };
2276
2277 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2278         .bop_lookup             =       NULL,
2279         .bop_lookup_contig      =       NULL,
2280         .bop_insert             =       NULL,
2281         .bop_delete             =       NULL,
2282         .bop_clear              =       NULL,
2283
2284         .bop_propagate          =       nilfs_btree_propagate_gc,
2285
2286         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2287
2288         .bop_assign             =       nilfs_btree_assign_gc,
2289         .bop_mark               =       NULL,
2290
2291         .bop_last_key           =       NULL,
2292         .bop_check_insert       =       NULL,
2293         .bop_check_delete       =       NULL,
2294         .bop_gather_data        =       NULL,
2295 };
2296
2297 int nilfs_btree_init(struct nilfs_bmap *bmap)
2298 {
2299         bmap->b_ops = &nilfs_btree_ops;
2300         bmap->b_nchildren_per_block =
2301                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2302         return 0;
2303 }
2304
2305 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2306 {
2307         bmap->b_ops = &nilfs_btree_ops_gc;
2308         bmap->b_nchildren_per_block =
2309                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2310 }