Btrfsck: add the ability to prune corrupt extent allocation tree blocks
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include "ctree.h"
19 #include "disk-io.h"
20 #include "transaction.h"
21 #include "print-tree.h"
22 #include "repair.h"
23
24 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
25                       *root, struct btrfs_path *path, int level);
26 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
27                       *root, struct btrfs_key *ins_key,
28                       struct btrfs_path *path, int data_size, int extend);
29 static int push_node_left(struct btrfs_trans_handle *trans,
30                           struct btrfs_root *root, struct extent_buffer *dst,
31                           struct extent_buffer *src, int empty);
32 static int balance_node_right(struct btrfs_trans_handle *trans,
33                               struct btrfs_root *root,
34                               struct extent_buffer *dst_buf,
35                               struct extent_buffer *src_buf);
36
37 inline void btrfs_init_path(struct btrfs_path *p)
38 {
39         memset(p, 0, sizeof(*p));
40 }
41
42 struct btrfs_path *btrfs_alloc_path(void)
43 {
44         struct btrfs_path *path;
45         path = kmalloc(sizeof(struct btrfs_path), GFP_NOFS);
46         if (path) {
47                 btrfs_init_path(path);
48                 path->reada = 0;
49         }
50         return path;
51 }
52
53 void btrfs_free_path(struct btrfs_path *p)
54 {
55         btrfs_release_path(NULL, p);
56         kfree(p);
57 }
58
59 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
60 {
61         int i;
62         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
63                 if (!p->nodes[i])
64                         continue;
65                 free_extent_buffer(p->nodes[i]);
66         }
67         memset(p, 0, sizeof(*p));
68 }
69
70 static void add_root_to_dirty_list(struct btrfs_root *root)
71 {
72         if (root->track_dirty && list_empty(&root->dirty_list)) {
73                 list_add(&root->dirty_list,
74                          &root->fs_info->dirty_cowonly_roots);
75         }
76 }
77
78 int btrfs_copy_root(struct btrfs_trans_handle *trans,
79                       struct btrfs_root *root,
80                       struct extent_buffer *buf,
81                       struct extent_buffer **cow_ret, u64 new_root_objectid)
82 {
83         struct extent_buffer *cow;
84         int ret = 0;
85         int level;
86         struct btrfs_root *new_root;
87         struct btrfs_disk_key disk_key;
88
89         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
90         if (!new_root)
91                 return -ENOMEM;
92
93         memcpy(new_root, root, sizeof(*new_root));
94         new_root->root_key.objectid = new_root_objectid;
95
96         WARN_ON(root->ref_cows && trans->transid !=
97                 root->fs_info->running_transaction->transid);
98         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
99
100         level = btrfs_header_level(buf);
101         if (level == 0)
102                 btrfs_item_key(buf, &disk_key, 0);
103         else
104                 btrfs_node_key(buf, &disk_key, 0);
105         cow = btrfs_alloc_free_block(trans, new_root, buf->len,
106                                      new_root_objectid, &disk_key,
107                                      level, buf->start, 0);
108         if (IS_ERR(cow)) {
109                 kfree(new_root);
110                 return PTR_ERR(cow);
111         }
112
113         copy_extent_buffer(cow, buf, 0, 0, cow->len);
114         btrfs_set_header_bytenr(cow, cow->start);
115         btrfs_set_header_generation(cow, trans->transid);
116         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
117         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
118                                      BTRFS_HEADER_FLAG_RELOC);
119         if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
120                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
121         else
122                 btrfs_set_header_owner(cow, new_root_objectid);
123
124         write_extent_buffer(cow, root->fs_info->fsid,
125                             (unsigned long)btrfs_header_fsid(cow),
126                             BTRFS_FSID_SIZE);
127
128         WARN_ON(btrfs_header_generation(buf) > trans->transid);
129         ret = btrfs_inc_ref(trans, new_root, cow, 0);
130         kfree(new_root);
131
132         if (ret)
133                 return ret;
134
135         btrfs_mark_buffer_dirty(cow);
136         *cow_ret = cow;
137         return 0;
138 }
139
140 int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
141                       struct btrfs_root *root)
142 {
143         struct extent_buffer *c;
144         struct extent_buffer *old = root->node;
145         int level;
146         struct btrfs_disk_key disk_key = {0,0,0};
147
148         level = 0;
149
150         c = btrfs_alloc_free_block(trans, root,
151                                    btrfs_level_size(root, 0),
152                                    root->root_key.objectid,
153                                    &disk_key, level, 0, 0);
154         if (IS_ERR(c))
155                 return PTR_ERR(c);
156
157         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
158         btrfs_set_header_level(c, level);
159         btrfs_set_header_bytenr(c, c->start);
160         btrfs_set_header_generation(c, trans->transid);
161         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
162         btrfs_set_header_owner(c, root->root_key.objectid);
163
164         write_extent_buffer(c, root->fs_info->fsid,
165                             (unsigned long)btrfs_header_fsid(c),
166                             BTRFS_FSID_SIZE);
167
168         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
169                             (unsigned long)btrfs_header_chunk_tree_uuid(c),
170                             BTRFS_UUID_SIZE);
171
172         btrfs_mark_buffer_dirty(c);
173
174         free_extent_buffer(old);
175         root->node = c;
176         add_root_to_dirty_list(root);
177         return 0;
178 }
179
180 /*
181  * check if the tree block can be shared by multiple trees
182  */
183 int btrfs_block_can_be_shared(struct btrfs_root *root,
184                               struct extent_buffer *buf)
185 {
186         /*
187          * Tree blocks not in refernece counted trees and tree roots
188          * are never shared. If a block was allocated after the last
189          * snapshot and the block was not allocated by tree relocation,
190          * we know the block is not shared.
191          */
192         if (root->ref_cows &&
193             buf != root->node && buf != root->commit_root &&
194             (btrfs_header_generation(buf) <=
195              btrfs_root_last_snapshot(&root->root_item) ||
196              btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
197                 return 1;
198 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
199         if (root->ref_cows &&
200             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
201                 return 1;
202 #endif
203         return 0;
204 }
205
206 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
207                                        struct btrfs_root *root,
208                                        struct extent_buffer *buf,
209                                        struct extent_buffer *cow)
210 {
211         u64 refs;
212         u64 owner;
213         u64 flags;
214         u64 new_flags = 0;
215         int ret;
216
217         /*
218          * Backrefs update rules:
219          *
220          * Always use full backrefs for extent pointers in tree block
221          * allocated by tree relocation.
222          *
223          * If a shared tree block is no longer referenced by its owner
224          * tree (btrfs_header_owner(buf) == root->root_key.objectid),
225          * use full backrefs for extent pointers in tree block.
226          *
227          * If a tree block is been relocating
228          * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
229          * use full backrefs for extent pointers in tree block.
230          * The reason for this is some operations (such as drop tree)
231          * are only allowed for blocks use full backrefs.
232          */
233
234         if (btrfs_block_can_be_shared(root, buf)) {
235                 ret = btrfs_lookup_extent_info(trans, root, buf->start,
236                                                buf->len, &refs, &flags);
237                 BUG_ON(ret);
238                 BUG_ON(refs == 0);
239         } else {
240                 refs = 1;
241                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
242                     btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
243                         flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
244                 else
245                         flags = 0;
246         }
247
248         owner = btrfs_header_owner(buf);
249         BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
250                owner == BTRFS_TREE_RELOC_OBJECTID);
251
252         if (refs > 1) {
253                 if ((owner == root->root_key.objectid ||
254                      root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
255                     !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
256                         ret = btrfs_inc_ref(trans, root, buf, 1);
257                         BUG_ON(ret);
258
259                         if (root->root_key.objectid ==
260                             BTRFS_TREE_RELOC_OBJECTID) {
261                                 ret = btrfs_dec_ref(trans, root, buf, 0);
262                                 BUG_ON(ret);
263                                 ret = btrfs_inc_ref(trans, root, cow, 1);
264                                 BUG_ON(ret);
265                         }
266                         new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
267                 } else {
268
269                         if (root->root_key.objectid ==
270                             BTRFS_TREE_RELOC_OBJECTID)
271                                 ret = btrfs_inc_ref(trans, root, cow, 1);
272                         else
273                                 ret = btrfs_inc_ref(trans, root, cow, 0);
274                         BUG_ON(ret);
275                 }
276                 if (new_flags != 0) {
277                         ret = btrfs_set_block_flags(trans, root, buf->start,
278                                                     buf->len, new_flags);
279                         BUG_ON(ret);
280                 }
281         } else {
282                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
283                         if (root->root_key.objectid ==
284                             BTRFS_TREE_RELOC_OBJECTID)
285                                 ret = btrfs_inc_ref(trans, root, cow, 1);
286                         else
287                                 ret = btrfs_inc_ref(trans, root, cow, 0);
288                         BUG_ON(ret);
289                         ret = btrfs_dec_ref(trans, root, buf, 1);
290                         BUG_ON(ret);
291                 }
292                 clean_tree_block(trans, root, buf);
293         }
294         return 0;
295 }
296
297 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
298                              struct btrfs_root *root,
299                              struct extent_buffer *buf,
300                              struct extent_buffer *parent, int parent_slot,
301                              struct extent_buffer **cow_ret,
302                              u64 search_start, u64 empty_size)
303 {
304         struct extent_buffer *cow;
305         struct btrfs_disk_key disk_key;
306         int level;
307
308         WARN_ON(root->ref_cows && trans->transid !=
309                 root->fs_info->running_transaction->transid);
310         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
311
312         level = btrfs_header_level(buf);
313
314         if (level == 0)
315                 btrfs_item_key(buf, &disk_key, 0);
316         else
317                 btrfs_node_key(buf, &disk_key, 0);
318
319         cow = btrfs_alloc_free_block(trans, root, buf->len,
320                                      root->root_key.objectid, &disk_key,
321                                      level, search_start, empty_size);
322         if (IS_ERR(cow))
323                 return PTR_ERR(cow);
324
325         copy_extent_buffer(cow, buf, 0, 0, cow->len);
326         btrfs_set_header_bytenr(cow, cow->start);
327         btrfs_set_header_generation(cow, trans->transid);
328         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
329         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
330                                      BTRFS_HEADER_FLAG_RELOC);
331         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
332                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
333         else
334                 btrfs_set_header_owner(cow, root->root_key.objectid);
335
336         write_extent_buffer(cow, root->fs_info->fsid,
337                             (unsigned long)btrfs_header_fsid(cow),
338                             BTRFS_FSID_SIZE);
339
340         WARN_ON(btrfs_header_generation(buf) > trans->transid);
341
342         update_ref_for_cow(trans, root, buf, cow);
343
344         if (buf == root->node) {
345                 root->node = cow;
346                 extent_buffer_get(cow);
347
348                 btrfs_free_extent(trans, root, buf->start, buf->len,
349                                   0, root->root_key.objectid, level, 0);
350                 free_extent_buffer(buf);
351                 add_root_to_dirty_list(root);
352         } else {
353                 btrfs_set_node_blockptr(parent, parent_slot,
354                                         cow->start);
355                 WARN_ON(trans->transid == 0);
356                 btrfs_set_node_ptr_generation(parent, parent_slot,
357                                               trans->transid);
358                 btrfs_mark_buffer_dirty(parent);
359                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
360
361                 btrfs_free_extent(trans, root, buf->start, buf->len,
362                                   0, root->root_key.objectid, level, 1);
363         }
364         free_extent_buffer(buf);
365         btrfs_mark_buffer_dirty(cow);
366         *cow_ret = cow;
367         return 0;
368 }
369
370 static inline int should_cow_block(struct btrfs_trans_handle *trans,
371                                    struct btrfs_root *root,
372                                    struct extent_buffer *buf)
373 {
374         if (btrfs_header_generation(buf) == trans->transid &&
375             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
376             !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
377               btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
378                 return 0;
379         return 1;
380 }
381
382 int btrfs_cow_block(struct btrfs_trans_handle *trans,
383                     struct btrfs_root *root, struct extent_buffer *buf,
384                     struct extent_buffer *parent, int parent_slot,
385                     struct extent_buffer **cow_ret)
386 {
387         u64 search_start;
388         int ret;
389         /*
390         if (trans->transaction != root->fs_info->running_transaction) {
391                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
392                        root->fs_info->running_transaction->transid);
393                 WARN_ON(1);
394         }
395         */
396         if (trans->transid != root->fs_info->generation) {
397                 printk(KERN_CRIT "trans %llu running %llu\n",
398                         (unsigned long long)trans->transid,
399                         (unsigned long long)root->fs_info->generation);
400                 WARN_ON(1);
401         }
402         if (!should_cow_block(trans, root, buf)) {
403                 *cow_ret = buf;
404                 return 0;
405         }
406
407         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
408         ret = __btrfs_cow_block(trans, root, buf, parent,
409                                  parent_slot, cow_ret, search_start, 0);
410         return ret;
411 }
412
413 /*
414 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
415 {
416         if (blocknr < other && other - (blocknr + blocksize) < 32768)
417                 return 1;
418         if (blocknr > other && blocknr - (other + blocksize) < 32768)
419                 return 1;
420         return 0;
421 }
422 */
423
424 /*
425  * compare two keys in a memcmp fashion
426  */
427 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
428 {
429         struct btrfs_key k1;
430
431         btrfs_disk_key_to_cpu(&k1, disk);
432
433         if (k1.objectid > k2->objectid)
434                 return 1;
435         if (k1.objectid < k2->objectid)
436                 return -1;
437         if (k1.type > k2->type)
438                 return 1;
439         if (k1.type < k2->type)
440                 return -1;
441         if (k1.offset > k2->offset)
442                 return 1;
443         if (k1.offset < k2->offset)
444                 return -1;
445         return 0;
446 }
447
448
449 #if 0
450 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
451                        struct btrfs_root *root, struct extent_buffer *parent,
452                        int start_slot, int cache_only, u64 *last_ret,
453                        struct btrfs_key *progress)
454 {
455         struct extent_buffer *cur;
456         struct extent_buffer *tmp;
457         u64 blocknr;
458         u64 gen;
459         u64 search_start = *last_ret;
460         u64 last_block = 0;
461         u64 other;
462         u32 parent_nritems;
463         int end_slot;
464         int i;
465         int err = 0;
466         int parent_level;
467         int uptodate;
468         u32 blocksize;
469         int progress_passed = 0;
470         struct btrfs_disk_key disk_key;
471
472         parent_level = btrfs_header_level(parent);
473         if (cache_only && parent_level != 1)
474                 return 0;
475
476         if (trans->transaction != root->fs_info->running_transaction) {
477                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
478                        root->fs_info->running_transaction->transid);
479                 WARN_ON(1);
480         }
481         if (trans->transid != root->fs_info->generation) {
482                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
483                        root->fs_info->generation);
484                 WARN_ON(1);
485         }
486
487         parent_nritems = btrfs_header_nritems(parent);
488         blocksize = btrfs_level_size(root, parent_level - 1);
489         end_slot = parent_nritems;
490
491         if (parent_nritems == 1)
492                 return 0;
493
494         for (i = start_slot; i < end_slot; i++) {
495                 int close = 1;
496
497                 if (!parent->map_token) {
498                         map_extent_buffer(parent,
499                                         btrfs_node_key_ptr_offset(i),
500                                         sizeof(struct btrfs_key_ptr),
501                                         &parent->map_token, &parent->kaddr,
502                                         &parent->map_start, &parent->map_len,
503                                         KM_USER1);
504                 }
505                 btrfs_node_key(parent, &disk_key, i);
506                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
507                         continue;
508
509                 progress_passed = 1;
510                 blocknr = btrfs_node_blockptr(parent, i);
511                 gen = btrfs_node_ptr_generation(parent, i);
512                 if (last_block == 0)
513                         last_block = blocknr;
514
515                 if (i > 0) {
516                         other = btrfs_node_blockptr(parent, i - 1);
517                         close = close_blocks(blocknr, other, blocksize);
518                 }
519                 if (close && i < end_slot - 2) {
520                         other = btrfs_node_blockptr(parent, i + 1);
521                         close = close_blocks(blocknr, other, blocksize);
522                 }
523                 if (close) {
524                         last_block = blocknr;
525                         continue;
526                 }
527                 if (parent->map_token) {
528                         unmap_extent_buffer(parent, parent->map_token,
529                                             KM_USER1);
530                         parent->map_token = NULL;
531                 }
532
533                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
534                 if (cur)
535                         uptodate = btrfs_buffer_uptodate(cur, gen);
536                 else
537                         uptodate = 0;
538                 if (!cur || !uptodate) {
539                         if (cache_only) {
540                                 free_extent_buffer(cur);
541                                 continue;
542                         }
543                         if (!cur) {
544                                 cur = read_tree_block(root, blocknr,
545                                                          blocksize, gen);
546                         } else if (!uptodate) {
547                                 btrfs_read_buffer(cur, gen);
548                         }
549                 }
550                 if (search_start == 0)
551                         search_start = last_block;
552
553                 err = __btrfs_cow_block(trans, root, cur, parent, i,
554                                         &tmp, search_start,
555                                         min(16 * blocksize,
556                                             (end_slot - i) * blocksize));
557                 if (err) {
558                         free_extent_buffer(cur);
559                         break;
560                 }
561                 search_start = tmp->start;
562                 last_block = tmp->start;
563                 *last_ret = search_start;
564                 if (parent_level == 1)
565                         btrfs_clear_buffer_defrag(tmp);
566                 free_extent_buffer(tmp);
567         }
568         if (parent->map_token) {
569                 unmap_extent_buffer(parent, parent->map_token,
570                                     KM_USER1);
571                 parent->map_token = NULL;
572         }
573         return err;
574 }
575 #endif
576
577 /*
578  * The leaf data grows from end-to-front in the node.
579  * this returns the address of the start of the last item,
580  * which is the stop of the leaf data stack
581  */
582 static inline unsigned int leaf_data_end(struct btrfs_root *root,
583                                          struct extent_buffer *leaf)
584 {
585         u32 nr = btrfs_header_nritems(leaf);
586         if (nr == 0)
587                 return BTRFS_LEAF_DATA_SIZE(root);
588         return btrfs_item_offset_nr(leaf, nr - 1);
589 }
590
591 int btrfs_check_node(struct btrfs_root *root,
592                       struct btrfs_disk_key *parent_key,
593                       struct extent_buffer *buf)
594 {
595         int i;
596         struct btrfs_key cpukey;
597         struct btrfs_disk_key key;
598         u32 nritems = btrfs_header_nritems(buf);
599
600         if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
601                 goto fail;
602
603         if (parent_key && parent_key->type) {
604                 btrfs_node_key(buf, &key, 0);
605                 if (memcmp(parent_key, &key, sizeof(key)))
606                         goto fail;
607         }
608         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
609                 btrfs_node_key(buf, &key, i);
610                 btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
611                 if (btrfs_comp_keys(&key, &cpukey) >= 0)
612                         goto fail;
613         }
614         return 0;
615 fail:
616         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
617                 if (parent_key)
618                         btrfs_disk_key_to_cpu(&cpukey, parent_key);
619                 else
620                         btrfs_node_key_to_cpu(buf, &cpukey, 0);
621                 btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
622                                                 buf->start, buf->len,
623                                                 btrfs_header_level(buf));
624         }
625         return -EIO;
626 }
627
628 int btrfs_check_leaf(struct btrfs_root *root,
629                       struct btrfs_disk_key *parent_key,
630                       struct extent_buffer *buf)
631 {
632         int i;
633         struct btrfs_key cpukey;
634         struct btrfs_disk_key key;
635         u32 nritems = btrfs_header_nritems(buf);
636
637         if (btrfs_header_level(buf) != 0) {
638                 fprintf(stderr, "leaf is not a leaf %llu\n",
639                        (unsigned long long)btrfs_header_bytenr(buf));
640                 goto fail;
641         }
642         if (btrfs_leaf_free_space(root, buf) < 0) {
643                 fprintf(stderr, "leaf free space incorrect %llu %d\n",
644                         (unsigned long long)btrfs_header_bytenr(buf),
645                         btrfs_leaf_free_space(root, buf));
646                 goto fail;
647         }
648
649         if (nritems == 0)
650                 return 0;
651
652         btrfs_item_key(buf, &key, 0);
653         if (parent_key && parent_key->type &&
654             memcmp(parent_key, &key, sizeof(key))) {
655                 fprintf(stderr, "leaf parent key incorrect %llu\n",
656                        (unsigned long long)btrfs_header_bytenr(buf));
657                 goto fail;
658         }
659         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
660                 btrfs_item_key(buf, &key, i);
661                 btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
662                 if (btrfs_comp_keys(&key, &cpukey) >= 0) {
663                         fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
664                         goto fail;
665                 }
666                 if (btrfs_item_offset_nr(buf, i) !=
667                         btrfs_item_end_nr(buf, i + 1)) {
668                         fprintf(stderr, "incorrect offsets %u %u\n",
669                                 btrfs_item_offset_nr(buf, i),
670                                 btrfs_item_end_nr(buf, i + 1));
671                         goto fail;
672                 }
673                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
674                     BTRFS_LEAF_DATA_SIZE(root)) {
675                         fprintf(stderr, "bad item end %u wanted %u\n",
676                                 btrfs_item_end_nr(buf, i),
677                                 (unsigned)BTRFS_LEAF_DATA_SIZE(root));
678                         goto fail;
679                 }
680         }
681         return 0;
682 fail:
683         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
684                 if (parent_key)
685                         btrfs_disk_key_to_cpu(&cpukey, parent_key);
686                 else
687                         btrfs_item_key_to_cpu(buf, &cpukey, 0);
688
689                 btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
690                                                 buf->start, buf->len, 0);
691         }
692         return -EIO;
693 }
694
695 static int noinline check_block(struct btrfs_root *root,
696                                 struct btrfs_path *path, int level)
697 {
698         struct btrfs_disk_key key;
699         struct btrfs_disk_key *key_ptr = NULL;
700         struct extent_buffer *parent;
701
702         if (path->nodes[level + 1]) {
703                 parent = path->nodes[level + 1];
704                 btrfs_node_key(parent, &key, path->slots[level + 1]);
705                 key_ptr = &key;
706         }
707         if (level == 0)
708                 return btrfs_check_leaf(root, key_ptr, path->nodes[0]);
709         return btrfs_check_node(root, key_ptr, path->nodes[level]);
710 }
711
712 /*
713  * search for key in the extent_buffer.  The items start at offset p,
714  * and they are item_size apart.  There are 'max' items in p.
715  *
716  * the slot in the array is returned via slot, and it points to
717  * the place where you would insert key if it is not found in
718  * the array.
719  *
720  * slot may point to max if the key is bigger than all of the keys
721  */
722 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
723                               int item_size, struct btrfs_key *key,
724                               int max, int *slot)
725 {
726         int low = 0;
727         int high = max;
728         int mid;
729         int ret;
730         unsigned long offset;
731         struct btrfs_disk_key *tmp;
732
733         while(low < high) {
734                 mid = (low + high) / 2;
735                 offset = p + mid * item_size;
736
737                 tmp = (struct btrfs_disk_key *)(eb->data + offset);
738                 ret = btrfs_comp_keys(tmp, key);
739
740                 if (ret < 0)
741                         low = mid + 1;
742                 else if (ret > 0)
743                         high = mid;
744                 else {
745                         *slot = mid;
746                         return 0;
747                 }
748         }
749         *slot = low;
750         return 1;
751 }
752
753 /*
754  * simple bin_search frontend that does the right thing for
755  * leaves vs nodes
756  */
757 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
758                       int level, int *slot)
759 {
760         if (level == 0) {
761                 return generic_bin_search(eb,
762                                           offsetof(struct btrfs_leaf, items),
763                                           sizeof(struct btrfs_item),
764                                           key, btrfs_header_nritems(eb),
765                                           slot);
766         } else {
767                 return generic_bin_search(eb,
768                                           offsetof(struct btrfs_node, ptrs),
769                                           sizeof(struct btrfs_key_ptr),
770                                           key, btrfs_header_nritems(eb),
771                                           slot);
772         }
773         return -1;
774 }
775
776 struct extent_buffer *read_node_slot(struct btrfs_root *root,
777                                    struct extent_buffer *parent, int slot)
778 {
779         int level = btrfs_header_level(parent);
780         if (slot < 0)
781                 return NULL;
782         if (slot >= btrfs_header_nritems(parent))
783                 return NULL;
784
785         BUG_ON(level == 0);
786
787         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
788                        btrfs_level_size(root, level - 1),
789                        btrfs_node_ptr_generation(parent, slot));
790 }
791
792 static int balance_level(struct btrfs_trans_handle *trans,
793                          struct btrfs_root *root,
794                          struct btrfs_path *path, int level)
795 {
796         struct extent_buffer *right = NULL;
797         struct extent_buffer *mid;
798         struct extent_buffer *left = NULL;
799         struct extent_buffer *parent = NULL;
800         int ret = 0;
801         int wret;
802         int pslot;
803         int orig_slot = path->slots[level];
804         u64 orig_ptr;
805
806         if (level == 0)
807                 return 0;
808
809         mid = path->nodes[level];
810         WARN_ON(btrfs_header_generation(mid) != trans->transid);
811
812         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
813
814         if (level < BTRFS_MAX_LEVEL - 1)
815                 parent = path->nodes[level + 1];
816         pslot = path->slots[level + 1];
817
818         /*
819          * deal with the case where there is only one pointer in the root
820          * by promoting the node below to a root
821          */
822         if (!parent) {
823                 struct extent_buffer *child;
824
825                 if (btrfs_header_nritems(mid) != 1)
826                         return 0;
827
828                 /* promote the child to a root */
829                 child = read_node_slot(root, mid, 0);
830                 BUG_ON(!child);
831                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
832                 BUG_ON(ret);
833
834                 root->node = child;
835                 add_root_to_dirty_list(root);
836                 path->nodes[level] = NULL;
837                 clean_tree_block(trans, root, mid);
838                 wait_on_tree_block_writeback(root, mid);
839                 /* once for the path */
840                 free_extent_buffer(mid);
841
842                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
843                                         0, root->root_key.objectid,
844                                         level, 1);
845                 /* once for the root ptr */
846                 free_extent_buffer(mid);
847                 return ret;
848         }
849         if (btrfs_header_nritems(mid) >
850             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
851                 return 0;
852
853         left = read_node_slot(root, parent, pslot - 1);
854         if (left) {
855                 wret = btrfs_cow_block(trans, root, left,
856                                        parent, pslot - 1, &left);
857                 if (wret) {
858                         ret = wret;
859                         goto enospc;
860                 }
861         }
862         right = read_node_slot(root, parent, pslot + 1);
863         if (right) {
864                 wret = btrfs_cow_block(trans, root, right,
865                                        parent, pslot + 1, &right);
866                 if (wret) {
867                         ret = wret;
868                         goto enospc;
869                 }
870         }
871
872         /* first, try to make some room in the middle buffer */
873         if (left) {
874                 orig_slot += btrfs_header_nritems(left);
875                 wret = push_node_left(trans, root, left, mid, 1);
876                 if (wret < 0)
877                         ret = wret;
878         }
879
880         /*
881          * then try to empty the right most buffer into the middle
882          */
883         if (right) {
884                 wret = push_node_left(trans, root, mid, right, 1);
885                 if (wret < 0 && wret != -ENOSPC)
886                         ret = wret;
887                 if (btrfs_header_nritems(right) == 0) {
888                         u64 bytenr = right->start;
889                         u32 blocksize = right->len;
890
891                         clean_tree_block(trans, root, right);
892                         wait_on_tree_block_writeback(root, right);
893                         free_extent_buffer(right);
894                         right = NULL;
895                         wret = btrfs_del_ptr(trans, root, path,
896                                              level + 1, pslot + 1);
897                         if (wret)
898                                 ret = wret;
899                         wret = btrfs_free_extent(trans, root, bytenr,
900                                                  blocksize, 0,
901                                                  root->root_key.objectid,
902                                                  level, 0);
903                         if (wret)
904                                 ret = wret;
905                 } else {
906                         struct btrfs_disk_key right_key;
907                         btrfs_node_key(right, &right_key, 0);
908                         btrfs_set_node_key(parent, &right_key, pslot + 1);
909                         btrfs_mark_buffer_dirty(parent);
910                 }
911         }
912         if (btrfs_header_nritems(mid) == 1) {
913                 /*
914                  * we're not allowed to leave a node with one item in the
915                  * tree during a delete.  A deletion from lower in the tree
916                  * could try to delete the only pointer in this node.
917                  * So, pull some keys from the left.
918                  * There has to be a left pointer at this point because
919                  * otherwise we would have pulled some pointers from the
920                  * right
921                  */
922                 BUG_ON(!left);
923                 wret = balance_node_right(trans, root, mid, left);
924                 if (wret < 0) {
925                         ret = wret;
926                         goto enospc;
927                 }
928                 if (wret == 1) {
929                         wret = push_node_left(trans, root, left, mid, 1);
930                         if (wret < 0)
931                                 ret = wret;
932                 }
933                 BUG_ON(wret == 1);
934         }
935         if (btrfs_header_nritems(mid) == 0) {
936                 /* we've managed to empty the middle node, drop it */
937                 u64 bytenr = mid->start;
938                 u32 blocksize = mid->len;
939                 clean_tree_block(trans, root, mid);
940                 wait_on_tree_block_writeback(root, mid);
941                 free_extent_buffer(mid);
942                 mid = NULL;
943                 wret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
944                 if (wret)
945                         ret = wret;
946                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
947                                          0, root->root_key.objectid,
948                                          level, 0);
949                 if (wret)
950                         ret = wret;
951         } else {
952                 /* update the parent key to reflect our changes */
953                 struct btrfs_disk_key mid_key;
954                 btrfs_node_key(mid, &mid_key, 0);
955                 btrfs_set_node_key(parent, &mid_key, pslot);
956                 btrfs_mark_buffer_dirty(parent);
957         }
958
959         /* update the path */
960         if (left) {
961                 if (btrfs_header_nritems(left) > orig_slot) {
962                         extent_buffer_get(left);
963                         path->nodes[level] = left;
964                         path->slots[level + 1] -= 1;
965                         path->slots[level] = orig_slot;
966                         if (mid)
967                                 free_extent_buffer(mid);
968                 } else {
969                         orig_slot -= btrfs_header_nritems(left);
970                         path->slots[level] = orig_slot;
971                 }
972         }
973         /* double check we haven't messed things up */
974         check_block(root, path, level);
975         if (orig_ptr !=
976             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
977                 BUG();
978 enospc:
979         if (right)
980                 free_extent_buffer(right);
981         if (left)
982                 free_extent_buffer(left);
983         return ret;
984 }
985
986 /* returns zero if the push worked, non-zero otherwise */
987 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
988                                           struct btrfs_root *root,
989                                           struct btrfs_path *path, int level)
990 {
991         struct extent_buffer *right = NULL;
992         struct extent_buffer *mid;
993         struct extent_buffer *left = NULL;
994         struct extent_buffer *parent = NULL;
995         int ret = 0;
996         int wret;
997         int pslot;
998         int orig_slot = path->slots[level];
999
1000         if (level == 0)
1001                 return 1;
1002
1003         mid = path->nodes[level];
1004         WARN_ON(btrfs_header_generation(mid) != trans->transid);
1005
1006         if (level < BTRFS_MAX_LEVEL - 1)
1007                 parent = path->nodes[level + 1];
1008         pslot = path->slots[level + 1];
1009
1010         if (!parent)
1011                 return 1;
1012
1013         left = read_node_slot(root, parent, pslot - 1);
1014
1015         /* first, try to make some room in the middle buffer */
1016         if (left) {
1017                 u32 left_nr;
1018                 left_nr = btrfs_header_nritems(left);
1019                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1020                         wret = 1;
1021                 } else {
1022                         ret = btrfs_cow_block(trans, root, left, parent,
1023                                               pslot - 1, &left);
1024                         if (ret)
1025                                 wret = 1;
1026                         else {
1027                                 wret = push_node_left(trans, root,
1028                                                       left, mid, 0);
1029                         }
1030                 }
1031                 if (wret < 0)
1032                         ret = wret;
1033                 if (wret == 0) {
1034                         struct btrfs_disk_key disk_key;
1035                         orig_slot += left_nr;
1036                         btrfs_node_key(mid, &disk_key, 0);
1037                         btrfs_set_node_key(parent, &disk_key, pslot);
1038                         btrfs_mark_buffer_dirty(parent);
1039                         if (btrfs_header_nritems(left) > orig_slot) {
1040                                 path->nodes[level] = left;
1041                                 path->slots[level + 1] -= 1;
1042                                 path->slots[level] = orig_slot;
1043                                 free_extent_buffer(mid);
1044                         } else {
1045                                 orig_slot -=
1046                                         btrfs_header_nritems(left);
1047                                 path->slots[level] = orig_slot;
1048                                 free_extent_buffer(left);
1049                         }
1050                         return 0;
1051                 }
1052                 free_extent_buffer(left);
1053         }
1054         right= read_node_slot(root, parent, pslot + 1);
1055
1056         /*
1057          * then try to empty the right most buffer into the middle
1058          */
1059         if (right) {
1060                 u32 right_nr;
1061                 right_nr = btrfs_header_nritems(right);
1062                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1063                         wret = 1;
1064                 } else {
1065                         ret = btrfs_cow_block(trans, root, right,
1066                                               parent, pslot + 1,
1067                                               &right);
1068                         if (ret)
1069                                 wret = 1;
1070                         else {
1071                                 wret = balance_node_right(trans, root,
1072                                                           right, mid);
1073                         }
1074                 }
1075                 if (wret < 0)
1076                         ret = wret;
1077                 if (wret == 0) {
1078                         struct btrfs_disk_key disk_key;
1079
1080                         btrfs_node_key(right, &disk_key, 0);
1081                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
1082                         btrfs_mark_buffer_dirty(parent);
1083
1084                         if (btrfs_header_nritems(mid) <= orig_slot) {
1085                                 path->nodes[level] = right;
1086                                 path->slots[level + 1] += 1;
1087                                 path->slots[level] = orig_slot -
1088                                         btrfs_header_nritems(mid);
1089                                 free_extent_buffer(mid);
1090                         } else {
1091                                 free_extent_buffer(right);
1092                         }
1093                         return 0;
1094                 }
1095                 free_extent_buffer(right);
1096         }
1097         return 1;
1098 }
1099
1100 /*
1101  * readahead one full node of leaves
1102  */
1103 void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1104                              int level, int slot, u64 objectid)
1105 {
1106         struct extent_buffer *node;
1107         struct btrfs_disk_key disk_key;
1108         u32 nritems;
1109         u64 search;
1110         u64 lowest_read;
1111         u64 highest_read;
1112         u64 nread = 0;
1113         int direction = path->reada;
1114         struct extent_buffer *eb;
1115         u32 nr;
1116         u32 blocksize;
1117         u32 nscan = 0;
1118
1119         if (level != 1)
1120                 return;
1121
1122         if (!path->nodes[level])
1123                 return;
1124
1125         node = path->nodes[level];
1126         search = btrfs_node_blockptr(node, slot);
1127         blocksize = btrfs_level_size(root, level - 1);
1128         eb = btrfs_find_tree_block(root, search, blocksize);
1129         if (eb) {
1130                 free_extent_buffer(eb);
1131                 return;
1132         }
1133
1134         highest_read = search;
1135         lowest_read = search;
1136
1137         nritems = btrfs_header_nritems(node);
1138         nr = slot;
1139         while(1) {
1140                 if (direction < 0) {
1141                         if (nr == 0)
1142                                 break;
1143                         nr--;
1144                 } else if (direction > 0) {
1145                         nr++;
1146                         if (nr >= nritems)
1147                                 break;
1148                 }
1149                 if (path->reada < 0 && objectid) {
1150                         btrfs_node_key(node, &disk_key, nr);
1151                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1152                                 break;
1153                 }
1154                 search = btrfs_node_blockptr(node, nr);
1155                 if ((search >= lowest_read && search <= highest_read) ||
1156                     (search < lowest_read && lowest_read - search <= 32768) ||
1157                     (search > highest_read && search - highest_read <= 32768)) {
1158                         readahead_tree_block(root, search, blocksize,
1159                                      btrfs_node_ptr_generation(node, nr));
1160                         nread += blocksize;
1161                 }
1162                 nscan++;
1163                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1164                         break;
1165                 if(nread > (1024 * 1024) || nscan > 128)
1166                         break;
1167
1168                 if (search < lowest_read)
1169                         lowest_read = search;
1170                 if (search > highest_read)
1171                         highest_read = search;
1172         }
1173 }
1174
1175 /*
1176  * look for key in the tree.  path is filled in with nodes along the way
1177  * if key is found, we return zero and you can find the item in the leaf
1178  * level of the path (level 0)
1179  *
1180  * If the key isn't found, the path points to the slot where it should
1181  * be inserted, and 1 is returned.  If there are other errors during the
1182  * search a negative error number is returned.
1183  *
1184  * if ins_len > 0, nodes and leaves will be split as we walk down the
1185  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1186  * possible)
1187  */
1188 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1189                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1190                       ins_len, int cow)
1191 {
1192         struct extent_buffer *b;
1193         int slot;
1194         int ret;
1195         int level;
1196         int should_reada = p->reada;
1197         u8 lowest_level = 0;
1198
1199         lowest_level = p->lowest_level;
1200         WARN_ON(lowest_level && ins_len > 0);
1201         WARN_ON(p->nodes[0] != NULL);
1202         /*
1203         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1204         */
1205 again:
1206         b = root->node;
1207         extent_buffer_get(b);
1208         while (b) {
1209                 level = btrfs_header_level(b);
1210                 if (cow) {
1211                         int wret;
1212                         wret = btrfs_cow_block(trans, root, b,
1213                                                p->nodes[level + 1],
1214                                                p->slots[level + 1],
1215                                                &b);
1216                         if (wret) {
1217                                 free_extent_buffer(b);
1218                                 return wret;
1219                         }
1220                 }
1221                 BUG_ON(!cow && ins_len);
1222                 if (level != btrfs_header_level(b))
1223                         WARN_ON(1);
1224                 level = btrfs_header_level(b);
1225                 p->nodes[level] = b;
1226                 ret = check_block(root, p, level);
1227                 if (ret)
1228                         return -1;
1229                 ret = bin_search(b, key, level, &slot);
1230                 if (level != 0) {
1231                         if (ret && slot > 0)
1232                                 slot -= 1;
1233                         p->slots[level] = slot;
1234                         if ((p->search_for_split || ins_len > 0) &&
1235                             btrfs_header_nritems(b) >=
1236                             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1237                                 int sret = split_node(trans, root, p, level);
1238                                 BUG_ON(sret > 0);
1239                                 if (sret)
1240                                         return sret;
1241                                 b = p->nodes[level];
1242                                 slot = p->slots[level];
1243                         } else if (ins_len < 0) {
1244                                 int sret = balance_level(trans, root, p,
1245                                                          level);
1246                                 if (sret)
1247                                         return sret;
1248                                 b = p->nodes[level];
1249                                 if (!b) {
1250                                         btrfs_release_path(NULL, p);
1251                                         goto again;
1252                                 }
1253                                 slot = p->slots[level];
1254                                 BUG_ON(btrfs_header_nritems(b) == 1);
1255                         }
1256                         /* this is only true while dropping a snapshot */
1257                         if (level == lowest_level)
1258                                 break;
1259
1260                         if (should_reada)
1261                                 reada_for_search(root, p, level, slot,
1262                                                  key->objectid);
1263
1264                         b = read_node_slot(root, b, slot);
1265                 } else {
1266                         p->slots[level] = slot;
1267                         if (ins_len > 0 &&
1268                             ins_len > btrfs_leaf_free_space(root, b)) {
1269                                 int sret = split_leaf(trans, root, key,
1270                                                       p, ins_len, ret == 0);
1271                                 BUG_ON(sret > 0);
1272                                 if (sret)
1273                                         return sret;
1274                         }
1275                         return ret;
1276                 }
1277         }
1278         return 1;
1279 }
1280
1281 /*
1282  * adjust the pointers going up the tree, starting at level
1283  * making sure the right key of each node is points to 'key'.
1284  * This is used after shifting pointers to the left, so it stops
1285  * fixing up pointers when a given leaf/node is not in slot 0 of the
1286  * higher levels
1287  *
1288  * If this fails to write a tree block, it returns -1, but continues
1289  * fixing up the blocks in ram so the tree is consistent.
1290  */
1291 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1292                           struct btrfs_root *root, struct btrfs_path *path,
1293                           struct btrfs_disk_key *key, int level)
1294 {
1295         int i;
1296         int ret = 0;
1297         struct extent_buffer *t;
1298
1299         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1300                 int tslot = path->slots[i];
1301                 if (!path->nodes[i])
1302                         break;
1303                 t = path->nodes[i];
1304                 btrfs_set_node_key(t, key, tslot);
1305                 btrfs_mark_buffer_dirty(path->nodes[i]);
1306                 if (tslot != 0)
1307                         break;
1308         }
1309         return ret;
1310 }
1311
1312 /*
1313  * update item key.
1314  *
1315  * This function isn't completely safe. It's the caller's responsibility
1316  * that the new key won't break the order
1317  */
1318 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1319                             struct btrfs_root *root, struct btrfs_path *path,
1320                             struct btrfs_key *new_key)
1321 {
1322         struct btrfs_disk_key disk_key;
1323         struct extent_buffer *eb;
1324         int slot;
1325
1326         eb = path->nodes[0];
1327         slot = path->slots[0];
1328         if (slot > 0) {
1329                 btrfs_item_key(eb, &disk_key, slot - 1);
1330                 if (btrfs_comp_keys(&disk_key, new_key) >= 0)
1331                         return -1;
1332         }
1333         if (slot < btrfs_header_nritems(eb) - 1) {
1334                 btrfs_item_key(eb, &disk_key, slot + 1);
1335                 if (btrfs_comp_keys(&disk_key, new_key) <= 0)
1336                         return -1;
1337         }
1338
1339         btrfs_cpu_key_to_disk(&disk_key, new_key);
1340         btrfs_set_item_key(eb, &disk_key, slot);
1341         btrfs_mark_buffer_dirty(eb);
1342         if (slot == 0)
1343                 fixup_low_keys(trans, root, path, &disk_key, 1);
1344         return 0;
1345 }
1346
1347 /*
1348  * try to push data from one node into the next node left in the
1349  * tree.
1350  *
1351  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1352  * error, and > 0 if there was no room in the left hand block.
1353  */
1354 static int push_node_left(struct btrfs_trans_handle *trans,
1355                           struct btrfs_root *root, struct extent_buffer *dst,
1356                           struct extent_buffer *src, int empty)
1357 {
1358         int push_items = 0;
1359         int src_nritems;
1360         int dst_nritems;
1361         int ret = 0;
1362
1363         src_nritems = btrfs_header_nritems(src);
1364         dst_nritems = btrfs_header_nritems(dst);
1365         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1366         WARN_ON(btrfs_header_generation(src) != trans->transid);
1367         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1368
1369         if (!empty && src_nritems <= 8)
1370                 return 1;
1371
1372         if (push_items <= 0) {
1373                 return 1;
1374         }
1375
1376         if (empty) {
1377                 push_items = min(src_nritems, push_items);
1378                 if (push_items < src_nritems) {
1379                         /* leave at least 8 pointers in the node if
1380                          * we aren't going to empty it
1381                          */
1382                         if (src_nritems - push_items < 8) {
1383                                 if (push_items <= 8)
1384                                         return 1;
1385                                 push_items -= 8;
1386                         }
1387                 }
1388         } else
1389                 push_items = min(src_nritems - 8, push_items);
1390
1391         copy_extent_buffer(dst, src,
1392                            btrfs_node_key_ptr_offset(dst_nritems),
1393                            btrfs_node_key_ptr_offset(0),
1394                            push_items * sizeof(struct btrfs_key_ptr));
1395
1396         if (push_items < src_nritems) {
1397                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1398                                       btrfs_node_key_ptr_offset(push_items),
1399                                       (src_nritems - push_items) *
1400                                       sizeof(struct btrfs_key_ptr));
1401         }
1402         btrfs_set_header_nritems(src, src_nritems - push_items);
1403         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1404         btrfs_mark_buffer_dirty(src);
1405         btrfs_mark_buffer_dirty(dst);
1406
1407         return ret;
1408 }
1409
1410 /*
1411  * try to push data from one node into the next node right in the
1412  * tree.
1413  *
1414  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1415  * error, and > 0 if there was no room in the right hand block.
1416  *
1417  * this will  only push up to 1/2 the contents of the left node over
1418  */
1419 static int balance_node_right(struct btrfs_trans_handle *trans,
1420                               struct btrfs_root *root,
1421                               struct extent_buffer *dst,
1422                               struct extent_buffer *src)
1423 {
1424         int push_items = 0;
1425         int max_push;
1426         int src_nritems;
1427         int dst_nritems;
1428         int ret = 0;
1429
1430         WARN_ON(btrfs_header_generation(src) != trans->transid);
1431         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1432
1433         src_nritems = btrfs_header_nritems(src);
1434         dst_nritems = btrfs_header_nritems(dst);
1435         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1436         if (push_items <= 0) {
1437                 return 1;
1438         }
1439
1440         if (src_nritems < 4) {
1441                 return 1;
1442         }
1443
1444         max_push = src_nritems / 2 + 1;
1445         /* don't try to empty the node */
1446         if (max_push >= src_nritems) {
1447                 return 1;
1448         }
1449
1450         if (max_push < push_items)
1451                 push_items = max_push;
1452
1453         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1454                                       btrfs_node_key_ptr_offset(0),
1455                                       (dst_nritems) *
1456                                       sizeof(struct btrfs_key_ptr));
1457
1458         copy_extent_buffer(dst, src,
1459                            btrfs_node_key_ptr_offset(0),
1460                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1461                            push_items * sizeof(struct btrfs_key_ptr));
1462
1463         btrfs_set_header_nritems(src, src_nritems - push_items);
1464         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1465
1466         btrfs_mark_buffer_dirty(src);
1467         btrfs_mark_buffer_dirty(dst);
1468
1469         return ret;
1470 }
1471
1472 /*
1473  * helper function to insert a new root level in the tree.
1474  * A new node is allocated, and a single item is inserted to
1475  * point to the existing root
1476  *
1477  * returns zero on success or < 0 on failure.
1478  */
1479 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1480                            struct btrfs_root *root,
1481                            struct btrfs_path *path, int level)
1482 {
1483         u64 lower_gen;
1484         struct extent_buffer *lower;
1485         struct extent_buffer *c;
1486         struct extent_buffer *old;
1487         struct btrfs_disk_key lower_key;
1488
1489         BUG_ON(path->nodes[level]);
1490         BUG_ON(path->nodes[level-1] != root->node);
1491
1492         lower = path->nodes[level-1];
1493         if (level == 1)
1494                 btrfs_item_key(lower, &lower_key, 0);
1495         else
1496                 btrfs_node_key(lower, &lower_key, 0);
1497
1498         c = btrfs_alloc_free_block(trans, root, root->nodesize,
1499                                    root->root_key.objectid, &lower_key, 
1500                                    level, root->node->start, 0);
1501
1502         if (IS_ERR(c))
1503                 return PTR_ERR(c);
1504
1505         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
1506         btrfs_set_header_nritems(c, 1);
1507         btrfs_set_header_level(c, level);
1508         btrfs_set_header_bytenr(c, c->start);
1509         btrfs_set_header_generation(c, trans->transid);
1510         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
1511         btrfs_set_header_owner(c, root->root_key.objectid);
1512
1513         write_extent_buffer(c, root->fs_info->fsid,
1514                             (unsigned long)btrfs_header_fsid(c),
1515                             BTRFS_FSID_SIZE);
1516
1517         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
1518                             (unsigned long)btrfs_header_chunk_tree_uuid(c),
1519                             BTRFS_UUID_SIZE);
1520
1521         btrfs_set_node_key(c, &lower_key, 0);
1522         btrfs_set_node_blockptr(c, 0, lower->start);
1523         lower_gen = btrfs_header_generation(lower);
1524         WARN_ON(lower_gen != trans->transid);
1525
1526         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1527
1528         btrfs_mark_buffer_dirty(c);
1529
1530         old = root->node;
1531         root->node = c;
1532
1533         /* the super has an extra ref to root->node */
1534         free_extent_buffer(old);
1535
1536         add_root_to_dirty_list(root);
1537         extent_buffer_get(c);
1538         path->nodes[level] = c;
1539         path->slots[level] = 0;
1540         return 0;
1541 }
1542
1543 /*
1544  * worker function to insert a single pointer in a node.
1545  * the node should have enough room for the pointer already
1546  *
1547  * slot and level indicate where you want the key to go, and
1548  * blocknr is the block the key points to.
1549  *
1550  * returns zero on success and < 0 on any error
1551  */
1552 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1553                       *root, struct btrfs_path *path, struct btrfs_disk_key
1554                       *key, u64 bytenr, int slot, int level)
1555 {
1556         struct extent_buffer *lower;
1557         int nritems;
1558
1559         BUG_ON(!path->nodes[level]);
1560         lower = path->nodes[level];
1561         nritems = btrfs_header_nritems(lower);
1562         if (slot > nritems)
1563                 BUG();
1564         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1565                 BUG();
1566         if (slot != nritems) {
1567                 memmove_extent_buffer(lower,
1568                               btrfs_node_key_ptr_offset(slot + 1),
1569                               btrfs_node_key_ptr_offset(slot),
1570                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1571         }
1572         btrfs_set_node_key(lower, key, slot);
1573         btrfs_set_node_blockptr(lower, slot, bytenr);
1574         WARN_ON(trans->transid == 0);
1575         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1576         btrfs_set_header_nritems(lower, nritems + 1);
1577         btrfs_mark_buffer_dirty(lower);
1578         return 0;
1579 }
1580
1581 /*
1582  * split the node at the specified level in path in two.
1583  * The path is corrected to point to the appropriate node after the split
1584  *
1585  * Before splitting this tries to make some room in the node by pushing
1586  * left and right, if either one works, it returns right away.
1587  *
1588  * returns 0 on success and < 0 on failure
1589  */
1590 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1591                       *root, struct btrfs_path *path, int level)
1592 {
1593         struct extent_buffer *c;
1594         struct extent_buffer *split;
1595         struct btrfs_disk_key disk_key;
1596         int mid;
1597         int ret;
1598         int wret;
1599         u32 c_nritems;
1600
1601         c = path->nodes[level];
1602         WARN_ON(btrfs_header_generation(c) != trans->transid);
1603         if (c == root->node) {
1604                 /* trying to split the root, lets make a new one */
1605                 ret = insert_new_root(trans, root, path, level + 1);
1606                 if (ret)
1607                         return ret;
1608         } else {
1609                 ret = push_nodes_for_insert(trans, root, path, level);
1610                 c = path->nodes[level];
1611                 if (!ret && btrfs_header_nritems(c) <
1612                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
1613                         return 0;
1614                 if (ret < 0)
1615                         return ret;
1616         }
1617
1618         c_nritems = btrfs_header_nritems(c);
1619         mid = (c_nritems + 1) / 2;
1620         btrfs_node_key(c, &disk_key, mid);
1621
1622         split = btrfs_alloc_free_block(trans, root, root->nodesize,
1623                                         root->root_key.objectid,
1624                                         &disk_key, level, c->start, 0);
1625         if (IS_ERR(split))
1626                 return PTR_ERR(split);
1627
1628         memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
1629         btrfs_set_header_level(split, btrfs_header_level(c));
1630         btrfs_set_header_bytenr(split, split->start);
1631         btrfs_set_header_generation(split, trans->transid);
1632         btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
1633         btrfs_set_header_owner(split, root->root_key.objectid);
1634         write_extent_buffer(split, root->fs_info->fsid,
1635                             (unsigned long)btrfs_header_fsid(split),
1636                             BTRFS_FSID_SIZE);
1637         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
1638                             (unsigned long)btrfs_header_chunk_tree_uuid(split),
1639                             BTRFS_UUID_SIZE);
1640
1641
1642         copy_extent_buffer(split, c,
1643                            btrfs_node_key_ptr_offset(0),
1644                            btrfs_node_key_ptr_offset(mid),
1645                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1646         btrfs_set_header_nritems(split, c_nritems - mid);
1647         btrfs_set_header_nritems(c, mid);
1648         ret = 0;
1649
1650         btrfs_mark_buffer_dirty(c);
1651         btrfs_mark_buffer_dirty(split);
1652
1653         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1654                           path->slots[level + 1] + 1,
1655                           level + 1);
1656         if (wret)
1657                 ret = wret;
1658
1659         if (path->slots[level] >= mid) {
1660                 path->slots[level] -= mid;
1661                 free_extent_buffer(c);
1662                 path->nodes[level] = split;
1663                 path->slots[level + 1] += 1;
1664         } else {
1665                 free_extent_buffer(split);
1666         }
1667         return ret;
1668 }
1669
1670 /*
1671  * how many bytes are required to store the items in a leaf.  start
1672  * and nr indicate which items in the leaf to check.  This totals up the
1673  * space used both by the item structs and the item data
1674  */
1675 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1676 {
1677         int data_len;
1678         int nritems = btrfs_header_nritems(l);
1679         int end = min(nritems, start + nr) - 1;
1680
1681         if (!nr)
1682                 return 0;
1683         data_len = btrfs_item_end_nr(l, start);
1684         data_len = data_len - btrfs_item_offset_nr(l, end);
1685         data_len += sizeof(struct btrfs_item) * nr;
1686         WARN_ON(data_len < 0);
1687         return data_len;
1688 }
1689
1690 /*
1691  * The space between the end of the leaf items and
1692  * the start of the leaf data.  IOW, how much room
1693  * the leaf has left for both items and data
1694  */
1695 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1696 {
1697         int nritems = btrfs_header_nritems(leaf);
1698         int ret;
1699         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1700         if (ret < 0) {
1701                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1702                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1703                        leaf_space_used(leaf, 0, nritems), nritems);
1704         }
1705         return ret;
1706 }
1707
1708 /*
1709  * push some data in the path leaf to the right, trying to free up at
1710  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1711  *
1712  * returns 1 if the push failed because the other node didn't have enough
1713  * room, 0 if everything worked out and < 0 if there were major errors.
1714  */
1715 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1716                            *root, struct btrfs_path *path, int data_size,
1717                            int empty)
1718 {
1719         struct extent_buffer *left = path->nodes[0];
1720         struct extent_buffer *right;
1721         struct extent_buffer *upper;
1722         struct btrfs_disk_key disk_key;
1723         int slot;
1724         u32 i;
1725         int free_space;
1726         int push_space = 0;
1727         int push_items = 0;
1728         struct btrfs_item *item;
1729         u32 left_nritems;
1730         u32 nr;
1731         u32 right_nritems;
1732         u32 data_end;
1733         u32 this_item_size;
1734         int ret;
1735
1736         slot = path->slots[1];
1737         if (!path->nodes[1]) {
1738                 return 1;
1739         }
1740         upper = path->nodes[1];
1741         if (slot >= btrfs_header_nritems(upper) - 1)
1742                 return 1;
1743
1744         right = read_node_slot(root, upper, slot + 1);
1745         free_space = btrfs_leaf_free_space(root, right);
1746         if (free_space < data_size) {
1747                 free_extent_buffer(right);
1748                 return 1;
1749         }
1750
1751         /* cow and double check */
1752         ret = btrfs_cow_block(trans, root, right, upper,
1753                               slot + 1, &right);
1754         if (ret) {
1755                 free_extent_buffer(right);
1756                 return 1;
1757         }
1758         free_space = btrfs_leaf_free_space(root, right);
1759         if (free_space < data_size) {
1760                 free_extent_buffer(right);
1761                 return 1;
1762         }
1763
1764         left_nritems = btrfs_header_nritems(left);
1765         if (left_nritems == 0) {
1766                 free_extent_buffer(right);
1767                 return 1;
1768         }
1769
1770         if (empty)
1771                 nr = 0;
1772         else
1773                 nr = 1;
1774
1775         i = left_nritems - 1;
1776         while (i >= nr) {
1777                 item = btrfs_item_nr(left, i);
1778
1779                 if (path->slots[0] == i)
1780                         push_space += data_size + sizeof(*item);
1781
1782                 this_item_size = btrfs_item_size(left, item);
1783                 if (this_item_size + sizeof(*item) + push_space > free_space)
1784                         break;
1785                 push_items++;
1786                 push_space += this_item_size + sizeof(*item);
1787                 if (i == 0)
1788                         break;
1789                 i--;
1790         }
1791
1792         if (push_items == 0) {
1793                 free_extent_buffer(right);
1794                 return 1;
1795         }
1796
1797         if (!empty && push_items == left_nritems)
1798                 WARN_ON(1);
1799
1800         /* push left to right */
1801         right_nritems = btrfs_header_nritems(right);
1802
1803         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1804         push_space -= leaf_data_end(root, left);
1805
1806         /* make room in the right data area */
1807         data_end = leaf_data_end(root, right);
1808         memmove_extent_buffer(right,
1809                               btrfs_leaf_data(right) + data_end - push_space,
1810                               btrfs_leaf_data(right) + data_end,
1811                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1812
1813         /* copy from the left data area */
1814         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1815                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1816                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1817                      push_space);
1818
1819         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1820                               btrfs_item_nr_offset(0),
1821                               right_nritems * sizeof(struct btrfs_item));
1822
1823         /* copy the items from left to right */
1824         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1825                    btrfs_item_nr_offset(left_nritems - push_items),
1826                    push_items * sizeof(struct btrfs_item));
1827
1828         /* update the item pointers */
1829         right_nritems += push_items;
1830         btrfs_set_header_nritems(right, right_nritems);
1831         push_space = BTRFS_LEAF_DATA_SIZE(root);
1832         for (i = 0; i < right_nritems; i++) {
1833                 item = btrfs_item_nr(right, i);
1834                 push_space -= btrfs_item_size(right, item);
1835                 btrfs_set_item_offset(right, item, push_space);
1836         }
1837
1838         left_nritems -= push_items;
1839         btrfs_set_header_nritems(left, left_nritems);
1840
1841         if (left_nritems)
1842                 btrfs_mark_buffer_dirty(left);
1843         btrfs_mark_buffer_dirty(right);
1844
1845         btrfs_item_key(right, &disk_key, 0);
1846         btrfs_set_node_key(upper, &disk_key, slot + 1);
1847         btrfs_mark_buffer_dirty(upper);
1848
1849         /* then fixup the leaf pointer in the path */
1850         if (path->slots[0] >= left_nritems) {
1851                 path->slots[0] -= left_nritems;
1852                 free_extent_buffer(path->nodes[0]);
1853                 path->nodes[0] = right;
1854                 path->slots[1] += 1;
1855         } else {
1856                 free_extent_buffer(right);
1857         }
1858         return 0;
1859 }
1860 /*
1861  * push some data in the path leaf to the left, trying to free up at
1862  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1863  */
1864 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1865                           *root, struct btrfs_path *path, int data_size,
1866                           int empty)
1867 {
1868         struct btrfs_disk_key disk_key;
1869         struct extent_buffer *right = path->nodes[0];
1870         struct extent_buffer *left;
1871         int slot;
1872         int i;
1873         int free_space;
1874         int push_space = 0;
1875         int push_items = 0;
1876         struct btrfs_item *item;
1877         u32 old_left_nritems;
1878         u32 right_nritems;
1879         u32 nr;
1880         int ret = 0;
1881         int wret;
1882         u32 this_item_size;
1883         u32 old_left_item_size;
1884
1885         slot = path->slots[1];
1886         if (slot == 0)
1887                 return 1;
1888         if (!path->nodes[1])
1889                 return 1;
1890
1891         right_nritems = btrfs_header_nritems(right);
1892         if (right_nritems == 0) {
1893                 return 1;
1894         }
1895
1896         left = read_node_slot(root, path->nodes[1], slot - 1);
1897         free_space = btrfs_leaf_free_space(root, left);
1898         if (free_space < data_size) {
1899                 free_extent_buffer(left);
1900                 return 1;
1901         }
1902
1903         /* cow and double check */
1904         ret = btrfs_cow_block(trans, root, left,
1905                               path->nodes[1], slot - 1, &left);
1906         if (ret) {
1907                 /* we hit -ENOSPC, but it isn't fatal here */
1908                 free_extent_buffer(left);
1909                 return 1;
1910         }
1911
1912         free_space = btrfs_leaf_free_space(root, left);
1913         if (free_space < data_size) {
1914                 free_extent_buffer(left);
1915                 return 1;
1916         }
1917
1918         if (empty)
1919                 nr = right_nritems;
1920         else
1921                 nr = right_nritems - 1;
1922
1923         for (i = 0; i < nr; i++) {
1924                 item = btrfs_item_nr(right, i);
1925
1926                 if (path->slots[0] == i)
1927                         push_space += data_size + sizeof(*item);
1928
1929                 this_item_size = btrfs_item_size(right, item);
1930                 if (this_item_size + sizeof(*item) + push_space > free_space)
1931                         break;
1932
1933                 push_items++;
1934                 push_space += this_item_size + sizeof(*item);
1935         }
1936
1937         if (push_items == 0) {
1938                 free_extent_buffer(left);
1939                 return 1;
1940         }
1941         if (!empty && push_items == btrfs_header_nritems(right))
1942                 WARN_ON(1);
1943
1944         /* push data from right to left */
1945         copy_extent_buffer(left, right,
1946                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1947                            btrfs_item_nr_offset(0),
1948                            push_items * sizeof(struct btrfs_item));
1949
1950         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1951                      btrfs_item_offset_nr(right, push_items -1);
1952
1953         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1954                      leaf_data_end(root, left) - push_space,
1955                      btrfs_leaf_data(right) +
1956                      btrfs_item_offset_nr(right, push_items - 1),
1957                      push_space);
1958         old_left_nritems = btrfs_header_nritems(left);
1959         BUG_ON(old_left_nritems < 0);
1960
1961         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1962         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1963                 u32 ioff;
1964
1965                 item = btrfs_item_nr(left, i);
1966                 ioff = btrfs_item_offset(left, item);
1967                 btrfs_set_item_offset(left, item,
1968                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1969         }
1970         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1971
1972         /* fixup right node */
1973         if (push_items > right_nritems) {
1974                 printk("push items %d nr %u\n", push_items, right_nritems);
1975                 WARN_ON(1);
1976         }
1977
1978         if (push_items < right_nritems) {
1979                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1980                                                   leaf_data_end(root, right);
1981                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1982                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1983                                       btrfs_leaf_data(right) +
1984                                       leaf_data_end(root, right), push_space);
1985
1986                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1987                               btrfs_item_nr_offset(push_items),
1988                              (btrfs_header_nritems(right) - push_items) *
1989                              sizeof(struct btrfs_item));
1990         }
1991         right_nritems -= push_items;
1992         btrfs_set_header_nritems(right, right_nritems);
1993         push_space = BTRFS_LEAF_DATA_SIZE(root);
1994         for (i = 0; i < right_nritems; i++) {
1995                 item = btrfs_item_nr(right, i);
1996                 push_space = push_space - btrfs_item_size(right, item);
1997                 btrfs_set_item_offset(right, item, push_space);
1998         }
1999
2000         btrfs_mark_buffer_dirty(left);
2001         if (right_nritems)
2002                 btrfs_mark_buffer_dirty(right);
2003
2004         btrfs_item_key(right, &disk_key, 0);
2005         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2006         if (wret)
2007                 ret = wret;
2008
2009         /* then fixup the leaf pointer in the path */
2010         if (path->slots[0] < push_items) {
2011                 path->slots[0] += old_left_nritems;
2012                 free_extent_buffer(path->nodes[0]);
2013                 path->nodes[0] = left;
2014                 path->slots[1] -= 1;
2015         } else {
2016                 free_extent_buffer(left);
2017                 path->slots[0] -= push_items;
2018         }
2019         BUG_ON(path->slots[0] < 0);
2020         return ret;
2021 }
2022
2023 /*
2024  * split the path's leaf in two, making sure there is at least data_size
2025  * available for the resulting leaf level of the path.
2026  *
2027  * returns 0 if all went well and < 0 on failure.
2028  */
2029 static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2030                                struct btrfs_root *root,
2031                                struct btrfs_path *path,
2032                                struct extent_buffer *l,
2033                                struct extent_buffer *right,
2034                                int slot, int mid, int nritems)
2035 {
2036         int data_copy_size;
2037         int rt_data_off;
2038         int i;
2039         int ret = 0;
2040         int wret;
2041         struct btrfs_disk_key disk_key;
2042
2043         nritems = nritems - mid;
2044         btrfs_set_header_nritems(right, nritems);
2045         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2046
2047         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2048                            btrfs_item_nr_offset(mid),
2049                            nritems * sizeof(struct btrfs_item));
2050
2051         copy_extent_buffer(right, l,
2052                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2053                      data_copy_size, btrfs_leaf_data(l) +
2054                      leaf_data_end(root, l), data_copy_size);
2055
2056         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2057                       btrfs_item_end_nr(l, mid);
2058
2059         for (i = 0; i < nritems; i++) {
2060                 struct btrfs_item *item = btrfs_item_nr(right, i);
2061                 u32 ioff = btrfs_item_offset(right, item);
2062                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2063         }
2064
2065         btrfs_set_header_nritems(l, mid);
2066         ret = 0;
2067         btrfs_item_key(right, &disk_key, 0);
2068         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2069                           path->slots[1] + 1, 1);
2070         if (wret)
2071                 ret = wret;
2072
2073         btrfs_mark_buffer_dirty(right);
2074         btrfs_mark_buffer_dirty(l);
2075         BUG_ON(path->slots[0] != slot);
2076
2077         if (mid <= slot) {
2078                 free_extent_buffer(path->nodes[0]);
2079                 path->nodes[0] = right;
2080                 path->slots[0] -= mid;
2081                 path->slots[1] += 1;
2082         } else {
2083                 free_extent_buffer(right);
2084         }
2085
2086         BUG_ON(path->slots[0] < 0);
2087
2088         return ret;
2089 }
2090
2091 /*
2092  * split the path's leaf in two, making sure there is at least data_size
2093  * available for the resulting leaf level of the path.
2094  *
2095  * returns 0 if all went well and < 0 on failure.
2096  */
2097 static noinline int split_leaf(struct btrfs_trans_handle *trans,
2098                                struct btrfs_root *root,
2099                                struct btrfs_key *ins_key,
2100                                struct btrfs_path *path, int data_size,
2101                                int extend)
2102 {
2103         struct btrfs_disk_key disk_key;
2104         struct extent_buffer *l;
2105         u32 nritems;
2106         int mid;
2107         int slot;
2108         struct extent_buffer *right;
2109         int ret = 0;
2110         int wret;
2111         int split;
2112         int num_doubles = 0;
2113
2114         /* first try to make some room by pushing left and right */
2115         if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
2116                 wret = push_leaf_right(trans, root, path, data_size, 0);
2117                 if (wret < 0)
2118                         return wret;
2119                 if (wret) {
2120                         wret = push_leaf_left(trans, root, path, data_size, 0);
2121                         if (wret < 0)
2122                                 return wret;
2123                 }
2124                 l = path->nodes[0];
2125
2126                 /* did the pushes work? */
2127                 if (btrfs_leaf_free_space(root, l) >= data_size)
2128                         return 0;
2129         }
2130
2131         if (!path->nodes[1]) {
2132                 ret = insert_new_root(trans, root, path, 1);
2133                 if (ret)
2134                         return ret;
2135         }
2136 again:
2137         split = 1;
2138         l = path->nodes[0];
2139         slot = path->slots[0];
2140         nritems = btrfs_header_nritems(l);
2141         mid = (nritems + 1) / 2;
2142
2143         if (mid <= slot) {
2144                 if (nritems == 1 ||
2145                     leaf_space_used(l, mid, nritems - mid) + data_size >
2146                         BTRFS_LEAF_DATA_SIZE(root)) {
2147                         if (slot >= nritems) {
2148                                 split = 0;
2149                         } else {
2150                                 mid = slot;
2151                                 if (mid != nritems &&
2152                                     leaf_space_used(l, mid, nritems - mid) +
2153                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2154                                         split = 2;
2155                                 }
2156                         }
2157                 }
2158         } else {
2159                 if (leaf_space_used(l, 0, mid) + data_size >
2160                         BTRFS_LEAF_DATA_SIZE(root)) {
2161                         if (!extend && data_size && slot == 0) {
2162                                 split = 0;
2163                         } else if ((extend || !data_size) && slot == 0) {
2164                                 mid = 1;
2165                         } else {
2166                                 mid = slot;
2167                                 if (mid != nritems &&
2168                                     leaf_space_used(l, mid, nritems - mid) +
2169                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2170                                         split = 2 ;
2171                                 }
2172                         }
2173                 }
2174         }
2175         
2176         if (split == 0)
2177                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2178         else
2179                 btrfs_item_key(l, &disk_key, mid);
2180
2181         right = btrfs_alloc_free_block(trans, root, root->leafsize,
2182                                         root->root_key.objectid,
2183                                         &disk_key, 0, l->start, 0);
2184         if (IS_ERR(right)) {
2185                 BUG_ON(1);
2186                 return PTR_ERR(right);
2187         }
2188
2189         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2190         btrfs_set_header_bytenr(right, right->start);
2191         btrfs_set_header_generation(right, trans->transid);
2192         btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
2193         btrfs_set_header_owner(right, root->root_key.objectid);
2194         btrfs_set_header_level(right, 0);
2195         write_extent_buffer(right, root->fs_info->fsid,
2196                             (unsigned long)btrfs_header_fsid(right),
2197                             BTRFS_FSID_SIZE);
2198
2199         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2200                             (unsigned long)btrfs_header_chunk_tree_uuid(right),
2201                             BTRFS_UUID_SIZE);
2202
2203         if (split == 0) {
2204                 if (mid <= slot) {
2205                         btrfs_set_header_nritems(right, 0);
2206                         wret = insert_ptr(trans, root, path,
2207                                           &disk_key, right->start,
2208                                           path->slots[1] + 1, 1);
2209                         if (wret)
2210                                 ret = wret;
2211
2212                         free_extent_buffer(path->nodes[0]);
2213                         path->nodes[0] = right;
2214                         path->slots[0] = 0;
2215                         path->slots[1] += 1;
2216                 } else {
2217                         btrfs_set_header_nritems(right, 0);
2218                         wret = insert_ptr(trans, root, path,
2219                                           &disk_key,
2220                                           right->start,
2221                                           path->slots[1], 1);
2222                         if (wret)
2223                                 ret = wret;
2224                         free_extent_buffer(path->nodes[0]);
2225                         path->nodes[0] = right;
2226                         path->slots[0] = 0;
2227                         if (path->slots[1] == 0) {
2228                                 wret = fixup_low_keys(trans, root,
2229                                                 path, &disk_key, 1);
2230                                 if (wret)
2231                                         ret = wret;
2232                         }
2233                 }
2234                 btrfs_mark_buffer_dirty(right);
2235                 return ret;
2236         }
2237
2238         ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
2239         BUG_ON(ret);
2240
2241         if (split == 2) {
2242                 BUG_ON(num_doubles != 0);
2243                 num_doubles++;
2244                 goto again;
2245         }
2246
2247         return ret;
2248 }
2249
2250 /*
2251  * This function splits a single item into two items,
2252  * giving 'new_key' to the new item and splitting the
2253  * old one at split_offset (from the start of the item).
2254  *
2255  * The path may be released by this operation.  After
2256  * the split, the path is pointing to the old item.  The
2257  * new item is going to be in the same node as the old one.
2258  *
2259  * Note, the item being split must be smaller enough to live alone on
2260  * a tree block with room for one extra struct btrfs_item
2261  *
2262  * This allows us to split the item in place, keeping a lock on the
2263  * leaf the entire time.
2264  */
2265 int btrfs_split_item(struct btrfs_trans_handle *trans,
2266                      struct btrfs_root *root,
2267                      struct btrfs_path *path,
2268                      struct btrfs_key *new_key,
2269                      unsigned long split_offset)
2270 {
2271         u32 item_size;
2272         struct extent_buffer *leaf;
2273         struct btrfs_key orig_key;
2274         struct btrfs_item *item;
2275         struct btrfs_item *new_item;
2276         int ret = 0;
2277         int slot;
2278         u32 nritems;
2279         u32 orig_offset;
2280         struct btrfs_disk_key disk_key;
2281         char *buf;
2282
2283         leaf = path->nodes[0];
2284         btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
2285         if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
2286                 goto split;
2287
2288         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2289         btrfs_release_path(root, path);
2290
2291         path->search_for_split = 1;
2292
2293         ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
2294         path->search_for_split = 0;
2295
2296         /* if our item isn't there or got smaller, return now */
2297         if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
2298                                                         path->slots[0])) {
2299                 return -EAGAIN;
2300         }
2301
2302         ret = split_leaf(trans, root, &orig_key, path, 0, 0);
2303         BUG_ON(ret);
2304
2305         BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
2306         leaf = path->nodes[0];
2307
2308 split:
2309         item = btrfs_item_nr(leaf, path->slots[0]);
2310         orig_offset = btrfs_item_offset(leaf, item);
2311         item_size = btrfs_item_size(leaf, item);
2312
2313
2314         buf = kmalloc(item_size, GFP_NOFS);
2315         read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
2316                             path->slots[0]), item_size);
2317         slot = path->slots[0] + 1;
2318         leaf = path->nodes[0];
2319
2320         nritems = btrfs_header_nritems(leaf);
2321
2322         if (slot != nritems) {
2323                 /* shift the items */
2324                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2325                               btrfs_item_nr_offset(slot),
2326                               (nritems - slot) * sizeof(struct btrfs_item));
2327
2328         }
2329
2330         btrfs_cpu_key_to_disk(&disk_key, new_key);
2331         btrfs_set_item_key(leaf, &disk_key, slot);
2332
2333         new_item = btrfs_item_nr(leaf, slot);
2334
2335         btrfs_set_item_offset(leaf, new_item, orig_offset);
2336         btrfs_set_item_size(leaf, new_item, item_size - split_offset);
2337
2338         btrfs_set_item_offset(leaf, item,
2339                               orig_offset + item_size - split_offset);
2340         btrfs_set_item_size(leaf, item, split_offset);
2341
2342         btrfs_set_header_nritems(leaf, nritems + 1);
2343
2344         /* write the data for the start of the original item */
2345         write_extent_buffer(leaf, buf,
2346                             btrfs_item_ptr_offset(leaf, path->slots[0]),
2347                             split_offset);
2348
2349         /* write the data for the new item */
2350         write_extent_buffer(leaf, buf + split_offset,
2351                             btrfs_item_ptr_offset(leaf, slot),
2352                             item_size - split_offset);
2353         btrfs_mark_buffer_dirty(leaf);
2354
2355         ret = 0;
2356         if (btrfs_leaf_free_space(root, leaf) < 0) {
2357                 btrfs_print_leaf(root, leaf);
2358                 BUG();
2359         }
2360         kfree(buf);
2361         return ret;
2362 }
2363
2364 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2365                         struct btrfs_root *root,
2366                         struct btrfs_path *path,
2367                         u32 new_size, int from_end)
2368 {
2369         int ret = 0;
2370         int slot;
2371         struct extent_buffer *leaf;
2372         struct btrfs_item *item;
2373         u32 nritems;
2374         unsigned int data_end;
2375         unsigned int old_data_start;
2376         unsigned int old_size;
2377         unsigned int size_diff;
2378         int i;
2379
2380         leaf = path->nodes[0];
2381         slot = path->slots[0];
2382
2383         old_size = btrfs_item_size_nr(leaf, slot);
2384         if (old_size == new_size)
2385                 return 0;
2386
2387         nritems = btrfs_header_nritems(leaf);
2388         data_end = leaf_data_end(root, leaf);
2389
2390         old_data_start = btrfs_item_offset_nr(leaf, slot);
2391
2392         size_diff = old_size - new_size;
2393
2394         BUG_ON(slot < 0);
2395         BUG_ON(slot >= nritems);
2396
2397         /*
2398          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2399          */
2400         /* first correct the data pointers */
2401         for (i = slot; i < nritems; i++) {
2402                 u32 ioff;
2403                 item = btrfs_item_nr(leaf, i);
2404                 ioff = btrfs_item_offset(leaf, item);
2405                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2406         }
2407
2408         /* shift the data */
2409         if (from_end) {
2410                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2411                               data_end + size_diff, btrfs_leaf_data(leaf) +
2412                               data_end, old_data_start + new_size - data_end);
2413         } else {
2414                 struct btrfs_disk_key disk_key;
2415                 u64 offset;
2416
2417                 btrfs_item_key(leaf, &disk_key, slot);
2418
2419                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2420                         unsigned long ptr;
2421                         struct btrfs_file_extent_item *fi;
2422
2423                         fi = btrfs_item_ptr(leaf, slot,
2424                                             struct btrfs_file_extent_item);
2425                         fi = (struct btrfs_file_extent_item *)(
2426                              (unsigned long)fi - size_diff);
2427
2428                         if (btrfs_file_extent_type(leaf, fi) ==
2429                             BTRFS_FILE_EXTENT_INLINE) {
2430                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2431                                 memmove_extent_buffer(leaf, ptr,
2432                                         (unsigned long)fi,
2433                                         offsetof(struct btrfs_file_extent_item,
2434                                                  disk_bytenr));
2435                         }
2436                 }
2437
2438                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2439                               data_end + size_diff, btrfs_leaf_data(leaf) +
2440                               data_end, old_data_start - data_end);
2441
2442                 offset = btrfs_disk_key_offset(&disk_key);
2443                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2444                 btrfs_set_item_key(leaf, &disk_key, slot);
2445                 if (slot == 0)
2446                         fixup_low_keys(trans, root, path, &disk_key, 1);
2447         }
2448
2449         item = btrfs_item_nr(leaf, slot);
2450         btrfs_set_item_size(leaf, item, new_size);
2451         btrfs_mark_buffer_dirty(leaf);
2452
2453         ret = 0;
2454         if (btrfs_leaf_free_space(root, leaf) < 0) {
2455                 btrfs_print_leaf(root, leaf);
2456                 BUG();
2457         }
2458         return ret;
2459 }
2460
2461 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2462                       struct btrfs_root *root, struct btrfs_path *path,
2463                       u32 data_size)
2464 {
2465         int ret = 0;
2466         int slot;
2467         struct extent_buffer *leaf;
2468         struct btrfs_item *item;
2469         u32 nritems;
2470         unsigned int data_end;
2471         unsigned int old_data;
2472         unsigned int old_size;
2473         int i;
2474
2475         leaf = path->nodes[0];
2476
2477         nritems = btrfs_header_nritems(leaf);
2478         data_end = leaf_data_end(root, leaf);
2479
2480         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2481                 btrfs_print_leaf(root, leaf);
2482                 BUG();
2483         }
2484         slot = path->slots[0];
2485         old_data = btrfs_item_end_nr(leaf, slot);
2486
2487         BUG_ON(slot < 0);
2488         if (slot >= nritems) {
2489                 btrfs_print_leaf(root, leaf);
2490                 printk("slot %d too large, nritems %d\n", slot, nritems);
2491                 BUG_ON(1);
2492         }
2493
2494         /*
2495          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2496          */
2497         /* first correct the data pointers */
2498         for (i = slot; i < nritems; i++) {
2499                 u32 ioff;
2500                 item = btrfs_item_nr(leaf, i);
2501                 ioff = btrfs_item_offset(leaf, item);
2502                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2503         }
2504
2505         /* shift the data */
2506         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2507                       data_end - data_size, btrfs_leaf_data(leaf) +
2508                       data_end, old_data - data_end);
2509
2510         data_end = old_data;
2511         old_size = btrfs_item_size_nr(leaf, slot);
2512         item = btrfs_item_nr(leaf, slot);
2513         btrfs_set_item_size(leaf, item, old_size + data_size);
2514         btrfs_mark_buffer_dirty(leaf);
2515
2516         ret = 0;
2517         if (btrfs_leaf_free_space(root, leaf) < 0) {
2518                 btrfs_print_leaf(root, leaf);
2519                 BUG();
2520         }
2521         return ret;
2522 }
2523
2524 /*
2525  * Given a key and some data, insert an item into the tree.
2526  * This does all the path init required, making room in the tree if needed.
2527  */
2528 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2529                             struct btrfs_root *root,
2530                             struct btrfs_path *path,
2531                             struct btrfs_key *cpu_key, u32 *data_size,
2532                             int nr)
2533 {
2534         struct extent_buffer *leaf;
2535         struct btrfs_item *item;
2536         int ret = 0;
2537         int slot;
2538         int i;
2539         u32 nritems;
2540         u32 total_size = 0;
2541         u32 total_data = 0;
2542         unsigned int data_end;
2543         struct btrfs_disk_key disk_key;
2544
2545         for (i = 0; i < nr; i++) {
2546                 total_data += data_size[i];
2547         }
2548
2549         /* create a root if there isn't one */
2550         if (!root->node)
2551                 BUG();
2552
2553         total_size = total_data + nr * sizeof(struct btrfs_item);
2554         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2555         if (ret == 0) {
2556                 return -EEXIST;
2557         }
2558         if (ret < 0)
2559                 goto out;
2560
2561         leaf = path->nodes[0];
2562
2563         nritems = btrfs_header_nritems(leaf);
2564         data_end = leaf_data_end(root, leaf);
2565
2566         if (btrfs_leaf_free_space(root, leaf) < total_size) {
2567                 btrfs_print_leaf(root, leaf);
2568                 printk("not enough freespace need %u have %d\n",
2569                        total_size, btrfs_leaf_free_space(root, leaf));
2570                 BUG();
2571         }
2572
2573         slot = path->slots[0];
2574         BUG_ON(slot < 0);
2575
2576         if (slot != nritems) {
2577                 int i;
2578                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2579
2580                 if (old_data < data_end) {
2581                         btrfs_print_leaf(root, leaf);
2582                         printk("slot %d old_data %d data_end %d\n",
2583                                slot, old_data, data_end);
2584                         BUG_ON(1);
2585                 }
2586                 /*
2587                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2588                  */
2589                 /* first correct the data pointers */
2590                 for (i = slot; i < nritems; i++) {
2591                         u32 ioff;
2592
2593                         item = btrfs_item_nr(leaf, i);
2594                         ioff = btrfs_item_offset(leaf, item);
2595                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2596                 }
2597
2598                 /* shift the items */
2599                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2600                               btrfs_item_nr_offset(slot),
2601                               (nritems - slot) * sizeof(struct btrfs_item));
2602
2603                 /* shift the data */
2604                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2605                               data_end - total_data, btrfs_leaf_data(leaf) +
2606                               data_end, old_data - data_end);
2607                 data_end = old_data;
2608         }
2609
2610         /* setup the item for the new data */
2611         for (i = 0; i < nr; i++) {
2612                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2613                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2614                 item = btrfs_item_nr(leaf, slot + i);
2615                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2616                 data_end -= data_size[i];
2617                 btrfs_set_item_size(leaf, item, data_size[i]);
2618         }
2619         btrfs_set_header_nritems(leaf, nritems + nr);
2620         btrfs_mark_buffer_dirty(leaf);
2621
2622         ret = 0;
2623         if (slot == 0) {
2624                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2625                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2626         }
2627
2628         if (btrfs_leaf_free_space(root, leaf) < 0) {
2629                 btrfs_print_leaf(root, leaf);
2630                 BUG();
2631         }
2632
2633 out:
2634         return ret;
2635 }
2636
2637 /*
2638  * Given a key and some data, insert an item into the tree.
2639  * This does all the path init required, making room in the tree if needed.
2640  */
2641 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2642                       *root, struct btrfs_key *cpu_key, void *data, u32
2643                       data_size)
2644 {
2645         int ret = 0;
2646         struct btrfs_path *path;
2647         struct extent_buffer *leaf;
2648         unsigned long ptr;
2649
2650         path = btrfs_alloc_path();
2651         BUG_ON(!path);
2652         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2653         if (!ret) {
2654                 leaf = path->nodes[0];
2655                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2656                 write_extent_buffer(leaf, data, ptr, data_size);
2657                 btrfs_mark_buffer_dirty(leaf);
2658         }
2659         btrfs_free_path(path);
2660         return ret;
2661 }
2662
2663 /*
2664  * delete the pointer from a given node.
2665  *
2666  * If the delete empties a node, the node is removed from the tree,
2667  * continuing all the way the root if required.  The root is converted into
2668  * a leaf if all the nodes are emptied.
2669  */
2670 int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2671                    struct btrfs_path *path, int level, int slot)
2672 {
2673         struct extent_buffer *parent = path->nodes[level];
2674         u32 nritems;
2675         int ret = 0;
2676         int wret;
2677
2678         nritems = btrfs_header_nritems(parent);
2679         if (slot != nritems -1) {
2680                 memmove_extent_buffer(parent,
2681                               btrfs_node_key_ptr_offset(slot),
2682                               btrfs_node_key_ptr_offset(slot + 1),
2683                               sizeof(struct btrfs_key_ptr) *
2684                               (nritems - slot - 1));
2685         }
2686         nritems--;
2687         btrfs_set_header_nritems(parent, nritems);
2688         if (nritems == 0 && parent == root->node) {
2689                 BUG_ON(btrfs_header_level(root->node) != 1);
2690                 /* just turn the root into a leaf and break */
2691                 btrfs_set_header_level(root->node, 0);
2692         } else if (slot == 0) {
2693                 struct btrfs_disk_key disk_key;
2694
2695                 btrfs_node_key(parent, &disk_key, 0);
2696                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2697                 if (wret)
2698                         ret = wret;
2699         }
2700         btrfs_mark_buffer_dirty(parent);
2701         return ret;
2702 }
2703
2704 /*
2705  * a helper function to delete the leaf pointed to by path->slots[1] and
2706  * path->nodes[1].
2707  *
2708  * This deletes the pointer in path->nodes[1] and frees the leaf
2709  * block extent.  zero is returned if it all worked out, < 0 otherwise.
2710  *
2711  * The path must have already been setup for deleting the leaf, including
2712  * all the proper balancing.  path->nodes[1] must be locked.
2713  */
2714 static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
2715                                    struct btrfs_root *root,
2716                                    struct btrfs_path *path,
2717                                    struct extent_buffer *leaf)
2718 {
2719         int ret;
2720
2721         WARN_ON(btrfs_header_generation(leaf) != trans->transid);
2722         ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
2723         if (ret)
2724                 return ret;
2725
2726         ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
2727                                 0, root->root_key.objectid, 0, 0);
2728         return ret;
2729 }
2730
2731 /*
2732  * delete the item at the leaf level in path.  If that empties
2733  * the leaf, remove it from the tree
2734  */
2735 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2736                     struct btrfs_path *path, int slot, int nr)
2737 {
2738         struct extent_buffer *leaf;
2739         struct btrfs_item *item;
2740         int last_off;
2741         int dsize = 0;
2742         int ret = 0;
2743         int wret;
2744         int i;
2745         u32 nritems;
2746
2747         leaf = path->nodes[0];
2748         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2749
2750         for (i = 0; i < nr; i++)
2751                 dsize += btrfs_item_size_nr(leaf, slot + i);
2752
2753         nritems = btrfs_header_nritems(leaf);
2754
2755         if (slot + nr != nritems) {
2756                 int i;
2757                 int data_end = leaf_data_end(root, leaf);
2758
2759                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2760                               data_end + dsize,
2761                               btrfs_leaf_data(leaf) + data_end,
2762                               last_off - data_end);
2763
2764                 for (i = slot + nr; i < nritems; i++) {
2765                         u32 ioff;
2766
2767                         item = btrfs_item_nr(leaf, i);
2768                         ioff = btrfs_item_offset(leaf, item);
2769                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2770                 }
2771
2772                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2773                               btrfs_item_nr_offset(slot + nr),
2774                               sizeof(struct btrfs_item) *
2775                               (nritems - slot - nr));
2776         }
2777         btrfs_set_header_nritems(leaf, nritems - nr);
2778         nritems -= nr;
2779
2780         /* delete the leaf if we've emptied it */
2781         if (nritems == 0) {
2782                 if (leaf == root->node) {
2783                         btrfs_set_header_level(leaf, 0);
2784                 } else {
2785                         clean_tree_block(trans, root, leaf);
2786                         wait_on_tree_block_writeback(root, leaf);
2787
2788                         wret = btrfs_del_leaf(trans, root, path, leaf);
2789                         BUG_ON(ret);
2790                         if (wret)
2791                                 ret = wret;
2792                 }
2793         } else {
2794                 int used = leaf_space_used(leaf, 0, nritems);
2795                 if (slot == 0) {
2796                         struct btrfs_disk_key disk_key;
2797
2798                         btrfs_item_key(leaf, &disk_key, 0);
2799                         wret = fixup_low_keys(trans, root, path,
2800                                               &disk_key, 1);
2801                         if (wret)
2802                                 ret = wret;
2803                 }
2804
2805                 /* delete the leaf if it is mostly empty */
2806                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2807                         /* push_leaf_left fixes the path.
2808                          * make sure the path still points to our leaf
2809                          * for possible call to del_ptr below
2810                          */
2811                         slot = path->slots[1];
2812                         extent_buffer_get(leaf);
2813
2814                         wret = push_leaf_left(trans, root, path, 1, 1);
2815                         if (wret < 0 && wret != -ENOSPC)
2816                                 ret = wret;
2817
2818                         if (path->nodes[0] == leaf &&
2819                             btrfs_header_nritems(leaf)) {
2820                                 wret = push_leaf_right(trans, root, path, 1, 1);
2821                                 if (wret < 0 && wret != -ENOSPC)
2822                                         ret = wret;
2823                         }
2824
2825                         if (btrfs_header_nritems(leaf) == 0) {
2826                                 clean_tree_block(trans, root, leaf);
2827                                 wait_on_tree_block_writeback(root, leaf);
2828
2829                                 path->slots[1] = slot;
2830                                 ret = btrfs_del_leaf(trans, root, path, leaf);
2831                                 BUG_ON(ret);
2832                                 free_extent_buffer(leaf);
2833
2834                         } else {
2835                                 btrfs_mark_buffer_dirty(leaf);
2836                                 free_extent_buffer(leaf);
2837                         }
2838                 } else {
2839                         btrfs_mark_buffer_dirty(leaf);
2840                 }
2841         }
2842         return ret;
2843 }
2844
2845 /*
2846  * walk up the tree as far as required to find the previous leaf.
2847  * returns 0 if it found something or 1 if there are no lesser leaves.
2848  * returns < 0 on io errors.
2849  */
2850 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2851 {
2852         int slot;
2853         int level = 1;
2854         struct extent_buffer *c;
2855         struct extent_buffer *next = NULL;
2856
2857         while(level < BTRFS_MAX_LEVEL) {
2858                 if (!path->nodes[level])
2859                         return 1;
2860
2861                 slot = path->slots[level];
2862                 c = path->nodes[level];
2863                 if (slot == 0) {
2864                         level++;
2865                         if (level == BTRFS_MAX_LEVEL)
2866                                 return 1;
2867                         continue;
2868                 }
2869                 slot--;
2870
2871                 if (next)
2872                         free_extent_buffer(next);
2873
2874                 next = read_node_slot(root, c, slot);
2875                 break;
2876         }
2877         path->slots[level] = slot;
2878         while(1) {
2879                 level--;
2880                 c = path->nodes[level];
2881                 free_extent_buffer(c);
2882                 slot = btrfs_header_nritems(next);
2883                 if (slot != 0)
2884                         slot--;
2885                 path->nodes[level] = next;
2886                 path->slots[level] = slot;
2887                 if (!level)
2888                         break;
2889                 next = read_node_slot(root, next, slot);
2890         }
2891         return 0;
2892 }
2893
2894 /*
2895  * walk up the tree as far as required to find the next leaf.
2896  * returns 0 if it found something or 1 if there are no greater leaves.
2897  * returns < 0 on io errors.
2898  */
2899 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2900 {
2901         int slot;
2902         int level = 1;
2903         struct extent_buffer *c;
2904         struct extent_buffer *next = NULL;
2905
2906         while(level < BTRFS_MAX_LEVEL) {
2907                 if (!path->nodes[level])
2908                         return 1;
2909
2910                 slot = path->slots[level] + 1;
2911                 c = path->nodes[level];
2912                 if (slot >= btrfs_header_nritems(c)) {
2913                         level++;
2914                         if (level == BTRFS_MAX_LEVEL)
2915                                 return 1;
2916                         continue;
2917                 }
2918
2919                 if (next)
2920                         free_extent_buffer(next);
2921
2922                 if (path->reada)
2923                         reada_for_search(root, path, level, slot, 0);
2924
2925                 next = read_node_slot(root, c, slot);
2926                 if (!next)
2927                         return -EIO;
2928                 break;
2929         }
2930         path->slots[level] = slot;
2931         while(1) {
2932                 level--;
2933                 c = path->nodes[level];
2934                 free_extent_buffer(c);
2935                 path->nodes[level] = next;
2936                 path->slots[level] = 0;
2937                 if (!level)
2938                         break;
2939                 if (path->reada)
2940                         reada_for_search(root, path, level, 0, 0);
2941                 next = read_node_slot(root, next, 0);
2942                 if (!next)
2943                         return -EIO;
2944         }
2945         return 0;
2946 }
2947
2948 int btrfs_previous_item(struct btrfs_root *root,
2949                         struct btrfs_path *path, u64 min_objectid,
2950                         int type)
2951 {
2952         struct btrfs_key found_key;
2953         struct extent_buffer *leaf;
2954         int ret;
2955
2956         while(1) {
2957                 if (path->slots[0] == 0) {
2958                         ret = btrfs_prev_leaf(root, path);
2959                         if (ret != 0)
2960                                 return ret;
2961                 } else {
2962                         path->slots[0]--;
2963                 }
2964                 leaf = path->nodes[0];
2965                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2966                 if (found_key.type == type)
2967                         return 0;
2968         }
2969         return 1;
2970 }
2971