9e5b30f340e6f67c6577a5b084f8159fa7e59019
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include "ctree.h"
19 #include "disk-io.h"
20 #include "transaction.h"
21 #include "print-tree.h"
22 #include "repair.h"
23
24 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
25                       *root, struct btrfs_path *path, int level);
26 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
27                       *root, struct btrfs_key *ins_key,
28                       struct btrfs_path *path, int data_size, int extend);
29 static int push_node_left(struct btrfs_trans_handle *trans,
30                           struct btrfs_root *root, struct extent_buffer *dst,
31                           struct extent_buffer *src, int empty);
32 static int balance_node_right(struct btrfs_trans_handle *trans,
33                               struct btrfs_root *root,
34                               struct extent_buffer *dst_buf,
35                               struct extent_buffer *src_buf);
36
37 inline void btrfs_init_path(struct btrfs_path *p)
38 {
39         memset(p, 0, sizeof(*p));
40 }
41
42 struct btrfs_path *btrfs_alloc_path(void)
43 {
44         struct btrfs_path *path;
45         path = kzalloc(sizeof(struct btrfs_path), GFP_NOFS);
46         return path;
47 }
48
49 void btrfs_free_path(struct btrfs_path *p)
50 {
51         btrfs_release_path(p);
52         kfree(p);
53 }
54
55 void btrfs_release_path(struct btrfs_path *p)
56 {
57         int i;
58         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
59                 if (!p->nodes[i])
60                         continue;
61                 free_extent_buffer(p->nodes[i]);
62         }
63         memset(p, 0, sizeof(*p));
64 }
65
66 void add_root_to_dirty_list(struct btrfs_root *root)
67 {
68         if (root->track_dirty && list_empty(&root->dirty_list)) {
69                 list_add(&root->dirty_list,
70                          &root->fs_info->dirty_cowonly_roots);
71         }
72 }
73
74 int btrfs_copy_root(struct btrfs_trans_handle *trans,
75                       struct btrfs_root *root,
76                       struct extent_buffer *buf,
77                       struct extent_buffer **cow_ret, u64 new_root_objectid)
78 {
79         struct extent_buffer *cow;
80         int ret = 0;
81         int level;
82         struct btrfs_root *new_root;
83         struct btrfs_disk_key disk_key;
84
85         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
86         if (!new_root)
87                 return -ENOMEM;
88
89         memcpy(new_root, root, sizeof(*new_root));
90         new_root->root_key.objectid = new_root_objectid;
91
92         WARN_ON(root->ref_cows && trans->transid !=
93                 root->fs_info->running_transaction->transid);
94         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
95
96         level = btrfs_header_level(buf);
97         if (level == 0)
98                 btrfs_item_key(buf, &disk_key, 0);
99         else
100                 btrfs_node_key(buf, &disk_key, 0);
101         cow = btrfs_alloc_free_block(trans, new_root, buf->len,
102                                      new_root_objectid, &disk_key,
103                                      level, buf->start, 0);
104         if (IS_ERR(cow)) {
105                 kfree(new_root);
106                 return PTR_ERR(cow);
107         }
108
109         copy_extent_buffer(cow, buf, 0, 0, cow->len);
110         btrfs_set_header_bytenr(cow, cow->start);
111         btrfs_set_header_generation(cow, trans->transid);
112         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
113         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
114                                      BTRFS_HEADER_FLAG_RELOC);
115         if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
116                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
117         else
118                 btrfs_set_header_owner(cow, new_root_objectid);
119
120         write_extent_buffer(cow, root->fs_info->fsid,
121                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
122
123         WARN_ON(btrfs_header_generation(buf) > trans->transid);
124         ret = btrfs_inc_ref(trans, new_root, cow, 0);
125         kfree(new_root);
126
127         if (ret)
128                 return ret;
129
130         btrfs_mark_buffer_dirty(cow);
131         *cow_ret = cow;
132         return 0;
133 }
134
135 /*
136  * check if the tree block can be shared by multiple trees
137  */
138 static int btrfs_block_can_be_shared(struct btrfs_root *root,
139                                      struct extent_buffer *buf)
140 {
141         /*
142          * Tree blocks not in refernece counted trees and tree roots
143          * are never shared. If a block was allocated after the last
144          * snapshot and the block was not allocated by tree relocation,
145          * we know the block is not shared.
146          */
147         if (root->ref_cows &&
148             buf != root->node && buf != root->commit_root &&
149             (btrfs_header_generation(buf) <=
150              btrfs_root_last_snapshot(&root->root_item) ||
151              btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
152                 return 1;
153 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
154         if (root->ref_cows &&
155             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
156                 return 1;
157 #endif
158         return 0;
159 }
160
161 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
162                                        struct btrfs_root *root,
163                                        struct extent_buffer *buf,
164                                        struct extent_buffer *cow)
165 {
166         u64 refs;
167         u64 owner;
168         u64 flags;
169         u64 new_flags = 0;
170         int ret;
171
172         /*
173          * Backrefs update rules:
174          *
175          * Always use full backrefs for extent pointers in tree block
176          * allocated by tree relocation.
177          *
178          * If a shared tree block is no longer referenced by its owner
179          * tree (btrfs_header_owner(buf) == root->root_key.objectid),
180          * use full backrefs for extent pointers in tree block.
181          *
182          * If a tree block is been relocating
183          * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
184          * use full backrefs for extent pointers in tree block.
185          * The reason for this is some operations (such as drop tree)
186          * are only allowed for blocks use full backrefs.
187          */
188
189         if (btrfs_block_can_be_shared(root, buf)) {
190                 ret = btrfs_lookup_extent_info(trans, root, buf->start,
191                                                btrfs_header_level(buf), 1,
192                                                &refs, &flags);
193                 BUG_ON(ret);
194                 BUG_ON(refs == 0);
195         } else {
196                 refs = 1;
197                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
198                     btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
199                         flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
200                 else
201                         flags = 0;
202         }
203
204         owner = btrfs_header_owner(buf);
205         BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
206                owner == BTRFS_TREE_RELOC_OBJECTID);
207
208         if (refs > 1) {
209                 if ((owner == root->root_key.objectid ||
210                      root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
211                     !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
212                         ret = btrfs_inc_ref(trans, root, buf, 1);
213                         BUG_ON(ret);
214
215                         if (root->root_key.objectid ==
216                             BTRFS_TREE_RELOC_OBJECTID) {
217                                 ret = btrfs_dec_ref(trans, root, buf, 0);
218                                 BUG_ON(ret);
219                                 ret = btrfs_inc_ref(trans, root, cow, 1);
220                                 BUG_ON(ret);
221                         }
222                         new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
223                 } else {
224
225                         if (root->root_key.objectid ==
226                             BTRFS_TREE_RELOC_OBJECTID)
227                                 ret = btrfs_inc_ref(trans, root, cow, 1);
228                         else
229                                 ret = btrfs_inc_ref(trans, root, cow, 0);
230                         BUG_ON(ret);
231                 }
232                 if (new_flags != 0) {
233                         ret = btrfs_set_block_flags(trans, root, buf->start,
234                                                     btrfs_header_level(buf),
235                                                     new_flags);
236                         BUG_ON(ret);
237                 }
238         } else {
239                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
240                         if (root->root_key.objectid ==
241                             BTRFS_TREE_RELOC_OBJECTID)
242                                 ret = btrfs_inc_ref(trans, root, cow, 1);
243                         else
244                                 ret = btrfs_inc_ref(trans, root, cow, 0);
245                         BUG_ON(ret);
246                         ret = btrfs_dec_ref(trans, root, buf, 1);
247                         BUG_ON(ret);
248                 }
249                 clean_tree_block(trans, root, buf);
250         }
251         return 0;
252 }
253
254 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
255                              struct btrfs_root *root,
256                              struct extent_buffer *buf,
257                              struct extent_buffer *parent, int parent_slot,
258                              struct extent_buffer **cow_ret,
259                              u64 search_start, u64 empty_size)
260 {
261         struct extent_buffer *cow;
262         struct btrfs_disk_key disk_key;
263         int level;
264
265         WARN_ON(root->ref_cows && trans->transid !=
266                 root->fs_info->running_transaction->transid);
267         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
268
269         level = btrfs_header_level(buf);
270
271         if (level == 0)
272                 btrfs_item_key(buf, &disk_key, 0);
273         else
274                 btrfs_node_key(buf, &disk_key, 0);
275
276         cow = btrfs_alloc_free_block(trans, root, buf->len,
277                                      root->root_key.objectid, &disk_key,
278                                      level, search_start, empty_size);
279         if (IS_ERR(cow))
280                 return PTR_ERR(cow);
281
282         copy_extent_buffer(cow, buf, 0, 0, cow->len);
283         btrfs_set_header_bytenr(cow, cow->start);
284         btrfs_set_header_generation(cow, trans->transid);
285         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
286         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
287                                      BTRFS_HEADER_FLAG_RELOC);
288         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
289                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
290         else
291                 btrfs_set_header_owner(cow, root->root_key.objectid);
292
293         write_extent_buffer(cow, root->fs_info->fsid,
294                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
295
296         WARN_ON(!(buf->flags & EXTENT_BAD_TRANSID) &&
297                 btrfs_header_generation(buf) > trans->transid);
298
299         update_ref_for_cow(trans, root, buf, cow);
300
301         if (buf == root->node) {
302                 root->node = cow;
303                 extent_buffer_get(cow);
304
305                 btrfs_free_extent(trans, root, buf->start, buf->len,
306                                   0, root->root_key.objectid, level, 0);
307                 free_extent_buffer(buf);
308                 add_root_to_dirty_list(root);
309         } else {
310                 btrfs_set_node_blockptr(parent, parent_slot,
311                                         cow->start);
312                 WARN_ON(trans->transid == 0);
313                 btrfs_set_node_ptr_generation(parent, parent_slot,
314                                               trans->transid);
315                 btrfs_mark_buffer_dirty(parent);
316                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
317
318                 btrfs_free_extent(trans, root, buf->start, buf->len,
319                                   0, root->root_key.objectid, level, 1);
320         }
321         if (!list_empty(&buf->recow)) {
322                 list_del_init(&buf->recow);
323                 free_extent_buffer(buf);
324         }
325         free_extent_buffer(buf);
326         btrfs_mark_buffer_dirty(cow);
327         *cow_ret = cow;
328         return 0;
329 }
330
331 static inline int should_cow_block(struct btrfs_trans_handle *trans,
332                                    struct btrfs_root *root,
333                                    struct extent_buffer *buf)
334 {
335         if (btrfs_header_generation(buf) == trans->transid &&
336             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
337             !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
338               btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
339                 return 0;
340         return 1;
341 }
342
343 int btrfs_cow_block(struct btrfs_trans_handle *trans,
344                     struct btrfs_root *root, struct extent_buffer *buf,
345                     struct extent_buffer *parent, int parent_slot,
346                     struct extent_buffer **cow_ret)
347 {
348         u64 search_start;
349         int ret;
350         /*
351         if (trans->transaction != root->fs_info->running_transaction) {
352                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
353                        root->fs_info->running_transaction->transid);
354                 WARN_ON(1);
355         }
356         */
357         if (trans->transid != root->fs_info->generation) {
358                 printk(KERN_CRIT "trans %llu running %llu\n",
359                         (unsigned long long)trans->transid,
360                         (unsigned long long)root->fs_info->generation);
361                 WARN_ON(1);
362         }
363         if (!should_cow_block(trans, root, buf)) {
364                 *cow_ret = buf;
365                 return 0;
366         }
367
368         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
369         ret = __btrfs_cow_block(trans, root, buf, parent,
370                                  parent_slot, cow_ret, search_start, 0);
371         return ret;
372 }
373
374 int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
375 {
376         if (k1->objectid > k2->objectid)
377                 return 1;
378         if (k1->objectid < k2->objectid)
379                 return -1;
380         if (k1->type > k2->type)
381                 return 1;
382         if (k1->type < k2->type)
383                 return -1;
384         if (k1->offset > k2->offset)
385                 return 1;
386         if (k1->offset < k2->offset)
387                 return -1;
388         return 0;
389 }
390
391 /*
392  * compare two keys in a memcmp fashion
393  */
394 static int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
395 {
396         struct btrfs_key k1;
397
398         btrfs_disk_key_to_cpu(&k1, disk);
399         return btrfs_comp_cpu_keys(&k1, k2);
400 }
401
402 /*
403  * The leaf data grows from end-to-front in the node.
404  * this returns the address of the start of the last item,
405  * which is the stop of the leaf data stack
406  */
407 static inline unsigned int leaf_data_end(struct btrfs_root *root,
408                                          struct extent_buffer *leaf)
409 {
410         u32 nr = btrfs_header_nritems(leaf);
411         if (nr == 0)
412                 return BTRFS_LEAF_DATA_SIZE(root);
413         return btrfs_item_offset_nr(leaf, nr - 1);
414 }
415
416 enum btrfs_tree_block_status
417 btrfs_check_node(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
418                  struct extent_buffer *buf)
419 {
420         int i;
421         struct btrfs_key cpukey;
422         struct btrfs_disk_key key;
423         u32 nritems = btrfs_header_nritems(buf);
424         enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
425
426         if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
427                 goto fail;
428
429         ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
430         if (parent_key && parent_key->type) {
431                 btrfs_node_key(buf, &key, 0);
432                 if (memcmp(parent_key, &key, sizeof(key)))
433                         goto fail;
434         }
435         ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
436         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
437                 btrfs_node_key(buf, &key, i);
438                 btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
439                 if (btrfs_comp_keys(&key, &cpukey) >= 0)
440                         goto fail;
441         }
442         return BTRFS_TREE_BLOCK_CLEAN;
443 fail:
444         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
445                 if (parent_key)
446                         btrfs_disk_key_to_cpu(&cpukey, parent_key);
447                 else
448                         btrfs_node_key_to_cpu(buf, &cpukey, 0);
449                 btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
450                                                 buf->start, buf->len,
451                                                 btrfs_header_level(buf));
452         }
453         return ret;
454 }
455
456 enum btrfs_tree_block_status
457 btrfs_check_leaf(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
458                  struct extent_buffer *buf)
459 {
460         int i;
461         struct btrfs_key cpukey;
462         struct btrfs_disk_key key;
463         u32 nritems = btrfs_header_nritems(buf);
464         enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
465
466         if (nritems * sizeof(struct btrfs_item) > buf->len)  {
467                 fprintf(stderr, "invalid number of items %llu\n",
468                         (unsigned long long)buf->start);
469                 goto fail;
470         }
471
472         if (btrfs_header_level(buf) != 0) {
473                 ret = BTRFS_TREE_BLOCK_INVALID_LEVEL;
474                 fprintf(stderr, "leaf is not a leaf %llu\n",
475                        (unsigned long long)btrfs_header_bytenr(buf));
476                 goto fail;
477         }
478         if (btrfs_leaf_free_space(root, buf) < 0) {
479                 ret = BTRFS_TREE_BLOCK_INVALID_FREE_SPACE;
480                 fprintf(stderr, "leaf free space incorrect %llu %d\n",
481                         (unsigned long long)btrfs_header_bytenr(buf),
482                         btrfs_leaf_free_space(root, buf));
483                 goto fail;
484         }
485
486         if (nritems == 0)
487                 return BTRFS_TREE_BLOCK_CLEAN;
488
489         btrfs_item_key(buf, &key, 0);
490         if (parent_key && parent_key->type &&
491             memcmp(parent_key, &key, sizeof(key))) {
492                 ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
493                 fprintf(stderr, "leaf parent key incorrect %llu\n",
494                        (unsigned long long)btrfs_header_bytenr(buf));
495                 goto fail;
496         }
497         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
498                 btrfs_item_key(buf, &key, i);
499                 btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
500                 if (btrfs_comp_keys(&key, &cpukey) >= 0) {
501                         ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
502                         fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
503                         goto fail;
504                 }
505                 if (btrfs_item_offset_nr(buf, i) !=
506                         btrfs_item_end_nr(buf, i + 1)) {
507                         ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
508                         fprintf(stderr, "incorrect offsets %u %u\n",
509                                 btrfs_item_offset_nr(buf, i),
510                                 btrfs_item_end_nr(buf, i + 1));
511                         goto fail;
512                 }
513                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
514                     BTRFS_LEAF_DATA_SIZE(root)) {
515                         ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
516                         fprintf(stderr, "bad item end %u wanted %u\n",
517                                 btrfs_item_end_nr(buf, i),
518                                 (unsigned)BTRFS_LEAF_DATA_SIZE(root));
519                         goto fail;
520                 }
521         }
522         return BTRFS_TREE_BLOCK_CLEAN;
523 fail:
524         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
525                 if (parent_key)
526                         btrfs_disk_key_to_cpu(&cpukey, parent_key);
527                 else
528                         btrfs_item_key_to_cpu(buf, &cpukey, 0);
529
530                 btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
531                                                 buf->start, buf->len, 0);
532         }
533         return ret;
534 }
535
536 static int noinline check_block(struct btrfs_root *root,
537                                 struct btrfs_path *path, int level)
538 {
539         struct btrfs_disk_key key;
540         struct btrfs_disk_key *key_ptr = NULL;
541         struct extent_buffer *parent;
542         enum btrfs_tree_block_status ret;
543
544         if (path->skip_check_block)
545                 return 0;
546         if (path->nodes[level + 1]) {
547                 parent = path->nodes[level + 1];
548                 btrfs_node_key(parent, &key, path->slots[level + 1]);
549                 key_ptr = &key;
550         }
551         if (level == 0)
552                 ret =  btrfs_check_leaf(root, key_ptr, path->nodes[0]);
553         else
554                 ret = btrfs_check_node(root, key_ptr, path->nodes[level]);
555         if (ret == BTRFS_TREE_BLOCK_CLEAN)
556                 return 0;
557         return -EIO;
558 }
559
560 /*
561  * search for key in the extent_buffer.  The items start at offset p,
562  * and they are item_size apart.  There are 'max' items in p.
563  *
564  * the slot in the array is returned via slot, and it points to
565  * the place where you would insert key if it is not found in
566  * the array.
567  *
568  * slot may point to max if the key is bigger than all of the keys
569  */
570 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
571                               int item_size, struct btrfs_key *key,
572                               int max, int *slot)
573 {
574         int low = 0;
575         int high = max;
576         int mid;
577         int ret;
578         unsigned long offset;
579         struct btrfs_disk_key *tmp;
580
581         while(low < high) {
582                 mid = (low + high) / 2;
583                 offset = p + mid * item_size;
584
585                 tmp = (struct btrfs_disk_key *)(eb->data + offset);
586                 ret = btrfs_comp_keys(tmp, key);
587
588                 if (ret < 0)
589                         low = mid + 1;
590                 else if (ret > 0)
591                         high = mid;
592                 else {
593                         *slot = mid;
594                         return 0;
595                 }
596         }
597         *slot = low;
598         return 1;
599 }
600
601 /*
602  * simple bin_search frontend that does the right thing for
603  * leaves vs nodes
604  */
605 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
606                       int level, int *slot)
607 {
608         if (level == 0)
609                 return generic_bin_search(eb,
610                                           offsetof(struct btrfs_leaf, items),
611                                           sizeof(struct btrfs_item),
612                                           key, btrfs_header_nritems(eb),
613                                           slot);
614         else
615                 return generic_bin_search(eb,
616                                           offsetof(struct btrfs_node, ptrs),
617                                           sizeof(struct btrfs_key_ptr),
618                                           key, btrfs_header_nritems(eb),
619                                           slot);
620 }
621
622 struct extent_buffer *read_node_slot(struct btrfs_root *root,
623                                    struct extent_buffer *parent, int slot)
624 {
625         int level = btrfs_header_level(parent);
626         if (slot < 0)
627                 return NULL;
628         if (slot >= btrfs_header_nritems(parent))
629                 return NULL;
630
631         if (level == 0)
632                 return NULL;
633
634         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
635                        btrfs_level_size(root, level - 1),
636                        btrfs_node_ptr_generation(parent, slot));
637 }
638
639 static int balance_level(struct btrfs_trans_handle *trans,
640                          struct btrfs_root *root,
641                          struct btrfs_path *path, int level)
642 {
643         struct extent_buffer *right = NULL;
644         struct extent_buffer *mid;
645         struct extent_buffer *left = NULL;
646         struct extent_buffer *parent = NULL;
647         int ret = 0;
648         int wret;
649         int pslot;
650         int orig_slot = path->slots[level];
651         u64 orig_ptr;
652
653         if (level == 0)
654                 return 0;
655
656         mid = path->nodes[level];
657         WARN_ON(btrfs_header_generation(mid) != trans->transid);
658
659         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
660
661         if (level < BTRFS_MAX_LEVEL - 1) {
662                 parent = path->nodes[level + 1];
663                 pslot = path->slots[level + 1];
664         }
665
666         /*
667          * deal with the case where there is only one pointer in the root
668          * by promoting the node below to a root
669          */
670         if (!parent) {
671                 struct extent_buffer *child;
672
673                 if (btrfs_header_nritems(mid) != 1)
674                         return 0;
675
676                 /* promote the child to a root */
677                 child = read_node_slot(root, mid, 0);
678                 BUG_ON(!child);
679                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
680                 BUG_ON(ret);
681
682                 root->node = child;
683                 add_root_to_dirty_list(root);
684                 path->nodes[level] = NULL;
685                 clean_tree_block(trans, root, mid);
686                 wait_on_tree_block_writeback(root, mid);
687                 /* once for the path */
688                 free_extent_buffer(mid);
689
690                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
691                                         0, root->root_key.objectid,
692                                         level, 1);
693                 /* once for the root ptr */
694                 free_extent_buffer(mid);
695                 return ret;
696         }
697         if (btrfs_header_nritems(mid) >
698             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
699                 return 0;
700
701         left = read_node_slot(root, parent, pslot - 1);
702         if (left) {
703                 wret = btrfs_cow_block(trans, root, left,
704                                        parent, pslot - 1, &left);
705                 if (wret) {
706                         ret = wret;
707                         goto enospc;
708                 }
709         }
710         right = read_node_slot(root, parent, pslot + 1);
711         if (right) {
712                 wret = btrfs_cow_block(trans, root, right,
713                                        parent, pslot + 1, &right);
714                 if (wret) {
715                         ret = wret;
716                         goto enospc;
717                 }
718         }
719
720         /* first, try to make some room in the middle buffer */
721         if (left) {
722                 orig_slot += btrfs_header_nritems(left);
723                 wret = push_node_left(trans, root, left, mid, 1);
724                 if (wret < 0)
725                         ret = wret;
726         }
727
728         /*
729          * then try to empty the right most buffer into the middle
730          */
731         if (right) {
732                 wret = push_node_left(trans, root, mid, right, 1);
733                 if (wret < 0 && wret != -ENOSPC)
734                         ret = wret;
735                 if (btrfs_header_nritems(right) == 0) {
736                         u64 bytenr = right->start;
737                         u32 blocksize = right->len;
738
739                         clean_tree_block(trans, root, right);
740                         wait_on_tree_block_writeback(root, right);
741                         free_extent_buffer(right);
742                         right = NULL;
743                         wret = btrfs_del_ptr(trans, root, path,
744                                              level + 1, pslot + 1);
745                         if (wret)
746                                 ret = wret;
747                         wret = btrfs_free_extent(trans, root, bytenr,
748                                                  blocksize, 0,
749                                                  root->root_key.objectid,
750                                                  level, 0);
751                         if (wret)
752                                 ret = wret;
753                 } else {
754                         struct btrfs_disk_key right_key;
755                         btrfs_node_key(right, &right_key, 0);
756                         btrfs_set_node_key(parent, &right_key, pslot + 1);
757                         btrfs_mark_buffer_dirty(parent);
758                 }
759         }
760         if (btrfs_header_nritems(mid) == 1) {
761                 /*
762                  * we're not allowed to leave a node with one item in the
763                  * tree during a delete.  A deletion from lower in the tree
764                  * could try to delete the only pointer in this node.
765                  * So, pull some keys from the left.
766                  * There has to be a left pointer at this point because
767                  * otherwise we would have pulled some pointers from the
768                  * right
769                  */
770                 BUG_ON(!left);
771                 wret = balance_node_right(trans, root, mid, left);
772                 if (wret < 0) {
773                         ret = wret;
774                         goto enospc;
775                 }
776                 if (wret == 1) {
777                         wret = push_node_left(trans, root, left, mid, 1);
778                         if (wret < 0)
779                                 ret = wret;
780                 }
781                 BUG_ON(wret == 1);
782         }
783         if (btrfs_header_nritems(mid) == 0) {
784                 /* we've managed to empty the middle node, drop it */
785                 u64 bytenr = mid->start;
786                 u32 blocksize = mid->len;
787                 clean_tree_block(trans, root, mid);
788                 wait_on_tree_block_writeback(root, mid);
789                 free_extent_buffer(mid);
790                 mid = NULL;
791                 wret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
792                 if (wret)
793                         ret = wret;
794                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
795                                          0, root->root_key.objectid,
796                                          level, 0);
797                 if (wret)
798                         ret = wret;
799         } else {
800                 /* update the parent key to reflect our changes */
801                 struct btrfs_disk_key mid_key;
802                 btrfs_node_key(mid, &mid_key, 0);
803                 btrfs_set_node_key(parent, &mid_key, pslot);
804                 btrfs_mark_buffer_dirty(parent);
805         }
806
807         /* update the path */
808         if (left) {
809                 if (btrfs_header_nritems(left) > orig_slot) {
810                         extent_buffer_get(left);
811                         path->nodes[level] = left;
812                         path->slots[level + 1] -= 1;
813                         path->slots[level] = orig_slot;
814                         if (mid)
815                                 free_extent_buffer(mid);
816                 } else {
817                         orig_slot -= btrfs_header_nritems(left);
818                         path->slots[level] = orig_slot;
819                 }
820         }
821         /* double check we haven't messed things up */
822         check_block(root, path, level);
823         if (orig_ptr !=
824             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
825                 BUG();
826 enospc:
827         if (right)
828                 free_extent_buffer(right);
829         if (left)
830                 free_extent_buffer(left);
831         return ret;
832 }
833
834 /* returns zero if the push worked, non-zero otherwise */
835 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
836                                           struct btrfs_root *root,
837                                           struct btrfs_path *path, int level)
838 {
839         struct extent_buffer *right = NULL;
840         struct extent_buffer *mid;
841         struct extent_buffer *left = NULL;
842         struct extent_buffer *parent = NULL;
843         int ret = 0;
844         int wret;
845         int pslot;
846         int orig_slot = path->slots[level];
847
848         if (level == 0)
849                 return 1;
850
851         mid = path->nodes[level];
852         WARN_ON(btrfs_header_generation(mid) != trans->transid);
853
854         if (level < BTRFS_MAX_LEVEL - 1) {
855                 parent = path->nodes[level + 1];
856                 pslot = path->slots[level + 1];
857         }
858
859         if (!parent)
860                 return 1;
861
862         left = read_node_slot(root, parent, pslot - 1);
863
864         /* first, try to make some room in the middle buffer */
865         if (left) {
866                 u32 left_nr;
867                 left_nr = btrfs_header_nritems(left);
868                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
869                         wret = 1;
870                 } else {
871                         ret = btrfs_cow_block(trans, root, left, parent,
872                                               pslot - 1, &left);
873                         if (ret)
874                                 wret = 1;
875                         else {
876                                 wret = push_node_left(trans, root,
877                                                       left, mid, 0);
878                         }
879                 }
880                 if (wret < 0)
881                         ret = wret;
882                 if (wret == 0) {
883                         struct btrfs_disk_key disk_key;
884                         orig_slot += left_nr;
885                         btrfs_node_key(mid, &disk_key, 0);
886                         btrfs_set_node_key(parent, &disk_key, pslot);
887                         btrfs_mark_buffer_dirty(parent);
888                         if (btrfs_header_nritems(left) > orig_slot) {
889                                 path->nodes[level] = left;
890                                 path->slots[level + 1] -= 1;
891                                 path->slots[level] = orig_slot;
892                                 free_extent_buffer(mid);
893                         } else {
894                                 orig_slot -=
895                                         btrfs_header_nritems(left);
896                                 path->slots[level] = orig_slot;
897                                 free_extent_buffer(left);
898                         }
899                         return 0;
900                 }
901                 free_extent_buffer(left);
902         }
903         right= read_node_slot(root, parent, pslot + 1);
904
905         /*
906          * then try to empty the right most buffer into the middle
907          */
908         if (right) {
909                 u32 right_nr;
910                 right_nr = btrfs_header_nritems(right);
911                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
912                         wret = 1;
913                 } else {
914                         ret = btrfs_cow_block(trans, root, right,
915                                               parent, pslot + 1,
916                                               &right);
917                         if (ret)
918                                 wret = 1;
919                         else {
920                                 wret = balance_node_right(trans, root,
921                                                           right, mid);
922                         }
923                 }
924                 if (wret < 0)
925                         ret = wret;
926                 if (wret == 0) {
927                         struct btrfs_disk_key disk_key;
928
929                         btrfs_node_key(right, &disk_key, 0);
930                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
931                         btrfs_mark_buffer_dirty(parent);
932
933                         if (btrfs_header_nritems(mid) <= orig_slot) {
934                                 path->nodes[level] = right;
935                                 path->slots[level + 1] += 1;
936                                 path->slots[level] = orig_slot -
937                                         btrfs_header_nritems(mid);
938                                 free_extent_buffer(mid);
939                         } else {
940                                 free_extent_buffer(right);
941                         }
942                         return 0;
943                 }
944                 free_extent_buffer(right);
945         }
946         return 1;
947 }
948
949 /*
950  * readahead one full node of leaves
951  */
952 void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
953                              int level, int slot, u64 objectid)
954 {
955         struct extent_buffer *node;
956         struct btrfs_disk_key disk_key;
957         u32 nritems;
958         u64 search;
959         u64 lowest_read;
960         u64 highest_read;
961         u64 nread = 0;
962         int direction = path->reada;
963         struct extent_buffer *eb;
964         u32 nr;
965         u32 blocksize;
966         u32 nscan = 0;
967
968         if (level != 1)
969                 return;
970
971         if (!path->nodes[level])
972                 return;
973
974         node = path->nodes[level];
975         search = btrfs_node_blockptr(node, slot);
976         blocksize = btrfs_level_size(root, level - 1);
977         eb = btrfs_find_tree_block(root, search, blocksize);
978         if (eb) {
979                 free_extent_buffer(eb);
980                 return;
981         }
982
983         highest_read = search;
984         lowest_read = search;
985
986         nritems = btrfs_header_nritems(node);
987         nr = slot;
988         while(1) {
989                 if (direction < 0) {
990                         if (nr == 0)
991                                 break;
992                         nr--;
993                 } else if (direction > 0) {
994                         nr++;
995                         if (nr >= nritems)
996                                 break;
997                 }
998                 if (path->reada < 0 && objectid) {
999                         btrfs_node_key(node, &disk_key, nr);
1000                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1001                                 break;
1002                 }
1003                 search = btrfs_node_blockptr(node, nr);
1004                 if ((search >= lowest_read && search <= highest_read) ||
1005                     (search < lowest_read && lowest_read - search <= 32768) ||
1006                     (search > highest_read && search - highest_read <= 32768)) {
1007                         readahead_tree_block(root, search, blocksize,
1008                                      btrfs_node_ptr_generation(node, nr));
1009                         nread += blocksize;
1010                 }
1011                 nscan++;
1012                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1013                         break;
1014                 if(nread > (1024 * 1024) || nscan > 128)
1015                         break;
1016
1017                 if (search < lowest_read)
1018                         lowest_read = search;
1019                 if (search > highest_read)
1020                         highest_read = search;
1021         }
1022 }
1023
1024 /*
1025  * look for key in the tree.  path is filled in with nodes along the way
1026  * if key is found, we return zero and you can find the item in the leaf
1027  * level of the path (level 0)
1028  *
1029  * If the key isn't found, the path points to the slot where it should
1030  * be inserted, and 1 is returned.  If there are other errors during the
1031  * search a negative error number is returned.
1032  *
1033  * if ins_len > 0, nodes and leaves will be split as we walk down the
1034  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1035  * possible)
1036  */
1037 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1038                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1039                       ins_len, int cow)
1040 {
1041         struct extent_buffer *b;
1042         int slot;
1043         int ret;
1044         int level;
1045         int should_reada = p->reada;
1046         u8 lowest_level = 0;
1047
1048         lowest_level = p->lowest_level;
1049         WARN_ON(lowest_level && ins_len > 0);
1050         WARN_ON(p->nodes[0] != NULL);
1051         /*
1052         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1053         */
1054 again:
1055         b = root->node;
1056         extent_buffer_get(b);
1057         while (b) {
1058                 level = btrfs_header_level(b);
1059                 if (cow) {
1060                         int wret;
1061                         wret = btrfs_cow_block(trans, root, b,
1062                                                p->nodes[level + 1],
1063                                                p->slots[level + 1],
1064                                                &b);
1065                         if (wret) {
1066                                 free_extent_buffer(b);
1067                                 return wret;
1068                         }
1069                 }
1070                 BUG_ON(!cow && ins_len);
1071                 if (level != btrfs_header_level(b))
1072                         WARN_ON(1);
1073                 level = btrfs_header_level(b);
1074                 p->nodes[level] = b;
1075                 ret = check_block(root, p, level);
1076                 if (ret)
1077                         return -1;
1078                 ret = bin_search(b, key, level, &slot);
1079                 if (level != 0) {
1080                         if (ret && slot > 0)
1081                                 slot -= 1;
1082                         p->slots[level] = slot;
1083                         if ((p->search_for_split || ins_len > 0) &&
1084                             btrfs_header_nritems(b) >=
1085                             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1086                                 int sret = split_node(trans, root, p, level);
1087                                 BUG_ON(sret > 0);
1088                                 if (sret)
1089                                         return sret;
1090                                 b = p->nodes[level];
1091                                 slot = p->slots[level];
1092                         } else if (ins_len < 0) {
1093                                 int sret = balance_level(trans, root, p,
1094                                                          level);
1095                                 if (sret)
1096                                         return sret;
1097                                 b = p->nodes[level];
1098                                 if (!b) {
1099                                         btrfs_release_path(p);
1100                                         goto again;
1101                                 }
1102                                 slot = p->slots[level];
1103                                 BUG_ON(btrfs_header_nritems(b) == 1);
1104                         }
1105                         /* this is only true while dropping a snapshot */
1106                         if (level == lowest_level)
1107                                 break;
1108
1109                         if (should_reada)
1110                                 reada_for_search(root, p, level, slot,
1111                                                  key->objectid);
1112
1113                         b = read_node_slot(root, b, slot);
1114                         if (!extent_buffer_uptodate(b))
1115                                 return -EIO;
1116                 } else {
1117                         p->slots[level] = slot;
1118                         if (ins_len > 0 &&
1119                             ins_len > btrfs_leaf_free_space(root, b)) {
1120                                 int sret = split_leaf(trans, root, key,
1121                                                       p, ins_len, ret == 0);
1122                                 BUG_ON(sret > 0);
1123                                 if (sret)
1124                                         return sret;
1125                         }
1126                         return ret;
1127                 }
1128         }
1129         return 1;
1130 }
1131
1132 /*
1133  * adjust the pointers going up the tree, starting at level
1134  * making sure the right key of each node is points to 'key'.
1135  * This is used after shifting pointers to the left, so it stops
1136  * fixing up pointers when a given leaf/node is not in slot 0 of the
1137  * higher levels
1138  */
1139 void btrfs_fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
1140                           struct btrfs_disk_key *key, int level)
1141 {
1142         int i;
1143         struct extent_buffer *t;
1144
1145         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1146                 int tslot = path->slots[i];
1147                 if (!path->nodes[i])
1148                         break;
1149                 t = path->nodes[i];
1150                 btrfs_set_node_key(t, key, tslot);
1151                 btrfs_mark_buffer_dirty(path->nodes[i]);
1152                 if (tslot != 0)
1153                         break;
1154         }
1155 }
1156
1157 /*
1158  * update item key.
1159  *
1160  * This function isn't completely safe. It's the caller's responsibility
1161  * that the new key won't break the order
1162  */
1163 int btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
1164                             struct btrfs_key *new_key)
1165 {
1166         struct btrfs_disk_key disk_key;
1167         struct extent_buffer *eb;
1168         int slot;
1169
1170         eb = path->nodes[0];
1171         slot = path->slots[0];
1172         if (slot > 0) {
1173                 btrfs_item_key(eb, &disk_key, slot - 1);
1174                 if (btrfs_comp_keys(&disk_key, new_key) >= 0)
1175                         return -1;
1176         }
1177         if (slot < btrfs_header_nritems(eb) - 1) {
1178                 btrfs_item_key(eb, &disk_key, slot + 1);
1179                 if (btrfs_comp_keys(&disk_key, new_key) <= 0)
1180                         return -1;
1181         }
1182
1183         btrfs_cpu_key_to_disk(&disk_key, new_key);
1184         btrfs_set_item_key(eb, &disk_key, slot);
1185         btrfs_mark_buffer_dirty(eb);
1186         if (slot == 0)
1187                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
1188         return 0;
1189 }
1190
1191 /*
1192  * update an item key without the safety checks.  This is meant to be called by
1193  * fsck only.
1194  */
1195 void btrfs_set_item_key_unsafe(struct btrfs_root *root,
1196                                struct btrfs_path *path,
1197                                struct btrfs_key *new_key)
1198 {
1199         struct btrfs_disk_key disk_key;
1200         struct extent_buffer *eb;
1201         int slot;
1202
1203         eb = path->nodes[0];
1204         slot = path->slots[0];
1205
1206         btrfs_cpu_key_to_disk(&disk_key, new_key);
1207         btrfs_set_item_key(eb, &disk_key, slot);
1208         btrfs_mark_buffer_dirty(eb);
1209         if (slot == 0)
1210                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
1211 }
1212
1213 /*
1214  * try to push data from one node into the next node left in the
1215  * tree.
1216  *
1217  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1218  * error, and > 0 if there was no room in the left hand block.
1219  */
1220 static int push_node_left(struct btrfs_trans_handle *trans,
1221                           struct btrfs_root *root, struct extent_buffer *dst,
1222                           struct extent_buffer *src, int empty)
1223 {
1224         int push_items = 0;
1225         int src_nritems;
1226         int dst_nritems;
1227         int ret = 0;
1228
1229         src_nritems = btrfs_header_nritems(src);
1230         dst_nritems = btrfs_header_nritems(dst);
1231         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1232         WARN_ON(btrfs_header_generation(src) != trans->transid);
1233         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1234
1235         if (!empty && src_nritems <= 8)
1236                 return 1;
1237
1238         if (push_items <= 0) {
1239                 return 1;
1240         }
1241
1242         if (empty) {
1243                 push_items = min(src_nritems, push_items);
1244                 if (push_items < src_nritems) {
1245                         /* leave at least 8 pointers in the node if
1246                          * we aren't going to empty it
1247                          */
1248                         if (src_nritems - push_items < 8) {
1249                                 if (push_items <= 8)
1250                                         return 1;
1251                                 push_items -= 8;
1252                         }
1253                 }
1254         } else
1255                 push_items = min(src_nritems - 8, push_items);
1256
1257         copy_extent_buffer(dst, src,
1258                            btrfs_node_key_ptr_offset(dst_nritems),
1259                            btrfs_node_key_ptr_offset(0),
1260                            push_items * sizeof(struct btrfs_key_ptr));
1261
1262         if (push_items < src_nritems) {
1263                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1264                                       btrfs_node_key_ptr_offset(push_items),
1265                                       (src_nritems - push_items) *
1266                                       sizeof(struct btrfs_key_ptr));
1267         }
1268         btrfs_set_header_nritems(src, src_nritems - push_items);
1269         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1270         btrfs_mark_buffer_dirty(src);
1271         btrfs_mark_buffer_dirty(dst);
1272
1273         return ret;
1274 }
1275
1276 /*
1277  * try to push data from one node into the next node right in the
1278  * tree.
1279  *
1280  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1281  * error, and > 0 if there was no room in the right hand block.
1282  *
1283  * this will  only push up to 1/2 the contents of the left node over
1284  */
1285 static int balance_node_right(struct btrfs_trans_handle *trans,
1286                               struct btrfs_root *root,
1287                               struct extent_buffer *dst,
1288                               struct extent_buffer *src)
1289 {
1290         int push_items = 0;
1291         int max_push;
1292         int src_nritems;
1293         int dst_nritems;
1294         int ret = 0;
1295
1296         WARN_ON(btrfs_header_generation(src) != trans->transid);
1297         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1298
1299         src_nritems = btrfs_header_nritems(src);
1300         dst_nritems = btrfs_header_nritems(dst);
1301         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1302         if (push_items <= 0) {
1303                 return 1;
1304         }
1305
1306         if (src_nritems < 4) {
1307                 return 1;
1308         }
1309
1310         max_push = src_nritems / 2 + 1;
1311         /* don't try to empty the node */
1312         if (max_push >= src_nritems) {
1313                 return 1;
1314         }
1315
1316         if (max_push < push_items)
1317                 push_items = max_push;
1318
1319         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1320                                       btrfs_node_key_ptr_offset(0),
1321                                       (dst_nritems) *
1322                                       sizeof(struct btrfs_key_ptr));
1323
1324         copy_extent_buffer(dst, src,
1325                            btrfs_node_key_ptr_offset(0),
1326                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1327                            push_items * sizeof(struct btrfs_key_ptr));
1328
1329         btrfs_set_header_nritems(src, src_nritems - push_items);
1330         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1331
1332         btrfs_mark_buffer_dirty(src);
1333         btrfs_mark_buffer_dirty(dst);
1334
1335         return ret;
1336 }
1337
1338 /*
1339  * helper function to insert a new root level in the tree.
1340  * A new node is allocated, and a single item is inserted to
1341  * point to the existing root
1342  *
1343  * returns zero on success or < 0 on failure.
1344  */
1345 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1346                            struct btrfs_root *root,
1347                            struct btrfs_path *path, int level)
1348 {
1349         u64 lower_gen;
1350         struct extent_buffer *lower;
1351         struct extent_buffer *c;
1352         struct extent_buffer *old;
1353         struct btrfs_disk_key lower_key;
1354
1355         BUG_ON(path->nodes[level]);
1356         BUG_ON(path->nodes[level-1] != root->node);
1357
1358         lower = path->nodes[level-1];
1359         if (level == 1)
1360                 btrfs_item_key(lower, &lower_key, 0);
1361         else
1362                 btrfs_node_key(lower, &lower_key, 0);
1363
1364         c = btrfs_alloc_free_block(trans, root, root->nodesize,
1365                                    root->root_key.objectid, &lower_key, 
1366                                    level, root->node->start, 0);
1367
1368         if (IS_ERR(c))
1369                 return PTR_ERR(c);
1370
1371         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
1372         btrfs_set_header_nritems(c, 1);
1373         btrfs_set_header_level(c, level);
1374         btrfs_set_header_bytenr(c, c->start);
1375         btrfs_set_header_generation(c, trans->transid);
1376         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
1377         btrfs_set_header_owner(c, root->root_key.objectid);
1378
1379         write_extent_buffer(c, root->fs_info->fsid,
1380                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
1381
1382         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
1383                             btrfs_header_chunk_tree_uuid(c),
1384                             BTRFS_UUID_SIZE);
1385
1386         btrfs_set_node_key(c, &lower_key, 0);
1387         btrfs_set_node_blockptr(c, 0, lower->start);
1388         lower_gen = btrfs_header_generation(lower);
1389         WARN_ON(lower_gen != trans->transid);
1390
1391         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1392
1393         btrfs_mark_buffer_dirty(c);
1394
1395         old = root->node;
1396         root->node = c;
1397
1398         /* the super has an extra ref to root->node */
1399         free_extent_buffer(old);
1400
1401         add_root_to_dirty_list(root);
1402         extent_buffer_get(c);
1403         path->nodes[level] = c;
1404         path->slots[level] = 0;
1405         return 0;
1406 }
1407
1408 /*
1409  * worker function to insert a single pointer in a node.
1410  * the node should have enough room for the pointer already
1411  *
1412  * slot and level indicate where you want the key to go, and
1413  * blocknr is the block the key points to.
1414  *
1415  * returns zero on success and < 0 on any error
1416  */
1417 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1418                       *root, struct btrfs_path *path, struct btrfs_disk_key
1419                       *key, u64 bytenr, int slot, int level)
1420 {
1421         struct extent_buffer *lower;
1422         int nritems;
1423
1424         BUG_ON(!path->nodes[level]);
1425         lower = path->nodes[level];
1426         nritems = btrfs_header_nritems(lower);
1427         if (slot > nritems)
1428                 BUG();
1429         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1430                 BUG();
1431         if (slot != nritems) {
1432                 memmove_extent_buffer(lower,
1433                               btrfs_node_key_ptr_offset(slot + 1),
1434                               btrfs_node_key_ptr_offset(slot),
1435                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1436         }
1437         btrfs_set_node_key(lower, key, slot);
1438         btrfs_set_node_blockptr(lower, slot, bytenr);
1439         WARN_ON(trans->transid == 0);
1440         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1441         btrfs_set_header_nritems(lower, nritems + 1);
1442         btrfs_mark_buffer_dirty(lower);
1443         return 0;
1444 }
1445
1446 /*
1447  * split the node at the specified level in path in two.
1448  * The path is corrected to point to the appropriate node after the split
1449  *
1450  * Before splitting this tries to make some room in the node by pushing
1451  * left and right, if either one works, it returns right away.
1452  *
1453  * returns 0 on success and < 0 on failure
1454  */
1455 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1456                       *root, struct btrfs_path *path, int level)
1457 {
1458         struct extent_buffer *c;
1459         struct extent_buffer *split;
1460         struct btrfs_disk_key disk_key;
1461         int mid;
1462         int ret;
1463         int wret;
1464         u32 c_nritems;
1465
1466         c = path->nodes[level];
1467         WARN_ON(btrfs_header_generation(c) != trans->transid);
1468         if (c == root->node) {
1469                 /* trying to split the root, lets make a new one */
1470                 ret = insert_new_root(trans, root, path, level + 1);
1471                 if (ret)
1472                         return ret;
1473         } else {
1474                 ret = push_nodes_for_insert(trans, root, path, level);
1475                 c = path->nodes[level];
1476                 if (!ret && btrfs_header_nritems(c) <
1477                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
1478                         return 0;
1479                 if (ret < 0)
1480                         return ret;
1481         }
1482
1483         c_nritems = btrfs_header_nritems(c);
1484         mid = (c_nritems + 1) / 2;
1485         btrfs_node_key(c, &disk_key, mid);
1486
1487         split = btrfs_alloc_free_block(trans, root, root->nodesize,
1488                                         root->root_key.objectid,
1489                                         &disk_key, level, c->start, 0);
1490         if (IS_ERR(split))
1491                 return PTR_ERR(split);
1492
1493         memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
1494         btrfs_set_header_level(split, btrfs_header_level(c));
1495         btrfs_set_header_bytenr(split, split->start);
1496         btrfs_set_header_generation(split, trans->transid);
1497         btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
1498         btrfs_set_header_owner(split, root->root_key.objectid);
1499         write_extent_buffer(split, root->fs_info->fsid,
1500                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
1501         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
1502                             btrfs_header_chunk_tree_uuid(split),
1503                             BTRFS_UUID_SIZE);
1504
1505
1506         copy_extent_buffer(split, c,
1507                            btrfs_node_key_ptr_offset(0),
1508                            btrfs_node_key_ptr_offset(mid),
1509                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1510         btrfs_set_header_nritems(split, c_nritems - mid);
1511         btrfs_set_header_nritems(c, mid);
1512         ret = 0;
1513
1514         btrfs_mark_buffer_dirty(c);
1515         btrfs_mark_buffer_dirty(split);
1516
1517         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1518                           path->slots[level + 1] + 1,
1519                           level + 1);
1520         if (wret)
1521                 ret = wret;
1522
1523         if (path->slots[level] >= mid) {
1524                 path->slots[level] -= mid;
1525                 free_extent_buffer(c);
1526                 path->nodes[level] = split;
1527                 path->slots[level + 1] += 1;
1528         } else {
1529                 free_extent_buffer(split);
1530         }
1531         return ret;
1532 }
1533
1534 /*
1535  * how many bytes are required to store the items in a leaf.  start
1536  * and nr indicate which items in the leaf to check.  This totals up the
1537  * space used both by the item structs and the item data
1538  */
1539 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1540 {
1541         int data_len;
1542         int nritems = btrfs_header_nritems(l);
1543         int end = min(nritems, start + nr) - 1;
1544
1545         if (!nr)
1546                 return 0;
1547         data_len = btrfs_item_end_nr(l, start);
1548         data_len = data_len - btrfs_item_offset_nr(l, end);
1549         data_len += sizeof(struct btrfs_item) * nr;
1550         WARN_ON(data_len < 0);
1551         return data_len;
1552 }
1553
1554 /*
1555  * The space between the end of the leaf items and
1556  * the start of the leaf data.  IOW, how much room
1557  * the leaf has left for both items and data
1558  */
1559 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1560 {
1561         int nritems = btrfs_header_nritems(leaf);
1562         int ret;
1563         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1564         if (ret < 0) {
1565                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1566                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1567                        leaf_space_used(leaf, 0, nritems), nritems);
1568         }
1569         return ret;
1570 }
1571
1572 /*
1573  * push some data in the path leaf to the right, trying to free up at
1574  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1575  *
1576  * returns 1 if the push failed because the other node didn't have enough
1577  * room, 0 if everything worked out and < 0 if there were major errors.
1578  */
1579 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1580                            *root, struct btrfs_path *path, int data_size,
1581                            int empty)
1582 {
1583         struct extent_buffer *left = path->nodes[0];
1584         struct extent_buffer *right;
1585         struct extent_buffer *upper;
1586         struct btrfs_disk_key disk_key;
1587         int slot;
1588         u32 i;
1589         int free_space;
1590         int push_space = 0;
1591         int push_items = 0;
1592         struct btrfs_item *item;
1593         u32 left_nritems;
1594         u32 nr;
1595         u32 right_nritems;
1596         u32 data_end;
1597         u32 this_item_size;
1598         int ret;
1599
1600         slot = path->slots[1];
1601         if (!path->nodes[1]) {
1602                 return 1;
1603         }
1604         upper = path->nodes[1];
1605         if (slot >= btrfs_header_nritems(upper) - 1)
1606                 return 1;
1607
1608         right = read_node_slot(root, upper, slot + 1);
1609         free_space = btrfs_leaf_free_space(root, right);
1610         if (free_space < data_size) {
1611                 free_extent_buffer(right);
1612                 return 1;
1613         }
1614
1615         /* cow and double check */
1616         ret = btrfs_cow_block(trans, root, right, upper,
1617                               slot + 1, &right);
1618         if (ret) {
1619                 free_extent_buffer(right);
1620                 return 1;
1621         }
1622         free_space = btrfs_leaf_free_space(root, right);
1623         if (free_space < data_size) {
1624                 free_extent_buffer(right);
1625                 return 1;
1626         }
1627
1628         left_nritems = btrfs_header_nritems(left);
1629         if (left_nritems == 0) {
1630                 free_extent_buffer(right);
1631                 return 1;
1632         }
1633
1634         if (empty)
1635                 nr = 0;
1636         else
1637                 nr = 1;
1638
1639         i = left_nritems - 1;
1640         while (i >= nr) {
1641                 item = btrfs_item_nr(i);
1642
1643                 if (path->slots[0] == i)
1644                         push_space += data_size + sizeof(*item);
1645
1646                 this_item_size = btrfs_item_size(left, item);
1647                 if (this_item_size + sizeof(*item) + push_space > free_space)
1648                         break;
1649                 push_items++;
1650                 push_space += this_item_size + sizeof(*item);
1651                 if (i == 0)
1652                         break;
1653                 i--;
1654         }
1655
1656         if (push_items == 0) {
1657                 free_extent_buffer(right);
1658                 return 1;
1659         }
1660
1661         if (!empty && push_items == left_nritems)
1662                 WARN_ON(1);
1663
1664         /* push left to right */
1665         right_nritems = btrfs_header_nritems(right);
1666
1667         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1668         push_space -= leaf_data_end(root, left);
1669
1670         /* make room in the right data area */
1671         data_end = leaf_data_end(root, right);
1672         memmove_extent_buffer(right,
1673                               btrfs_leaf_data(right) + data_end - push_space,
1674                               btrfs_leaf_data(right) + data_end,
1675                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1676
1677         /* copy from the left data area */
1678         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1679                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1680                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1681                      push_space);
1682
1683         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1684                               btrfs_item_nr_offset(0),
1685                               right_nritems * sizeof(struct btrfs_item));
1686
1687         /* copy the items from left to right */
1688         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1689                    btrfs_item_nr_offset(left_nritems - push_items),
1690                    push_items * sizeof(struct btrfs_item));
1691
1692         /* update the item pointers */
1693         right_nritems += push_items;
1694         btrfs_set_header_nritems(right, right_nritems);
1695         push_space = BTRFS_LEAF_DATA_SIZE(root);
1696         for (i = 0; i < right_nritems; i++) {
1697                 item = btrfs_item_nr(i);
1698                 push_space -= btrfs_item_size(right, item);
1699                 btrfs_set_item_offset(right, item, push_space);
1700         }
1701
1702         left_nritems -= push_items;
1703         btrfs_set_header_nritems(left, left_nritems);
1704
1705         if (left_nritems)
1706                 btrfs_mark_buffer_dirty(left);
1707         btrfs_mark_buffer_dirty(right);
1708
1709         btrfs_item_key(right, &disk_key, 0);
1710         btrfs_set_node_key(upper, &disk_key, slot + 1);
1711         btrfs_mark_buffer_dirty(upper);
1712
1713         /* then fixup the leaf pointer in the path */
1714         if (path->slots[0] >= left_nritems) {
1715                 path->slots[0] -= left_nritems;
1716                 free_extent_buffer(path->nodes[0]);
1717                 path->nodes[0] = right;
1718                 path->slots[1] += 1;
1719         } else {
1720                 free_extent_buffer(right);
1721         }
1722         return 0;
1723 }
1724 /*
1725  * push some data in the path leaf to the left, trying to free up at
1726  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1727  */
1728 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1729                           *root, struct btrfs_path *path, int data_size,
1730                           int empty)
1731 {
1732         struct btrfs_disk_key disk_key;
1733         struct extent_buffer *right = path->nodes[0];
1734         struct extent_buffer *left;
1735         int slot;
1736         int i;
1737         int free_space;
1738         int push_space = 0;
1739         int push_items = 0;
1740         struct btrfs_item *item;
1741         u32 old_left_nritems;
1742         u32 right_nritems;
1743         u32 nr;
1744         int ret = 0;
1745         u32 this_item_size;
1746         u32 old_left_item_size;
1747
1748         slot = path->slots[1];
1749         if (slot == 0)
1750                 return 1;
1751         if (!path->nodes[1])
1752                 return 1;
1753
1754         right_nritems = btrfs_header_nritems(right);
1755         if (right_nritems == 0) {
1756                 return 1;
1757         }
1758
1759         left = read_node_slot(root, path->nodes[1], slot - 1);
1760         free_space = btrfs_leaf_free_space(root, left);
1761         if (free_space < data_size) {
1762                 free_extent_buffer(left);
1763                 return 1;
1764         }
1765
1766         /* cow and double check */
1767         ret = btrfs_cow_block(trans, root, left,
1768                               path->nodes[1], slot - 1, &left);
1769         if (ret) {
1770                 /* we hit -ENOSPC, but it isn't fatal here */
1771                 free_extent_buffer(left);
1772                 return 1;
1773         }
1774
1775         free_space = btrfs_leaf_free_space(root, left);
1776         if (free_space < data_size) {
1777                 free_extent_buffer(left);
1778                 return 1;
1779         }
1780
1781         if (empty)
1782                 nr = right_nritems;
1783         else
1784                 nr = right_nritems - 1;
1785
1786         for (i = 0; i < nr; i++) {
1787                 item = btrfs_item_nr(i);
1788
1789                 if (path->slots[0] == i)
1790                         push_space += data_size + sizeof(*item);
1791
1792                 this_item_size = btrfs_item_size(right, item);
1793                 if (this_item_size + sizeof(*item) + push_space > free_space)
1794                         break;
1795
1796                 push_items++;
1797                 push_space += this_item_size + sizeof(*item);
1798         }
1799
1800         if (push_items == 0) {
1801                 free_extent_buffer(left);
1802                 return 1;
1803         }
1804         if (!empty && push_items == btrfs_header_nritems(right))
1805                 WARN_ON(1);
1806
1807         /* push data from right to left */
1808         copy_extent_buffer(left, right,
1809                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1810                            btrfs_item_nr_offset(0),
1811                            push_items * sizeof(struct btrfs_item));
1812
1813         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1814                      btrfs_item_offset_nr(right, push_items -1);
1815
1816         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1817                      leaf_data_end(root, left) - push_space,
1818                      btrfs_leaf_data(right) +
1819                      btrfs_item_offset_nr(right, push_items - 1),
1820                      push_space);
1821         old_left_nritems = btrfs_header_nritems(left);
1822         BUG_ON(old_left_nritems == 0);
1823
1824         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1825         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1826                 u32 ioff;
1827
1828                 item = btrfs_item_nr(i);
1829                 ioff = btrfs_item_offset(left, item);
1830                 btrfs_set_item_offset(left, item,
1831                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1832         }
1833         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1834
1835         /* fixup right node */
1836         if (push_items > right_nritems) {
1837                 printk("push items %d nr %u\n", push_items, right_nritems);
1838                 WARN_ON(1);
1839         }
1840
1841         if (push_items < right_nritems) {
1842                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1843                                                   leaf_data_end(root, right);
1844                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1845                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1846                                       btrfs_leaf_data(right) +
1847                                       leaf_data_end(root, right), push_space);
1848
1849                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1850                               btrfs_item_nr_offset(push_items),
1851                              (btrfs_header_nritems(right) - push_items) *
1852                              sizeof(struct btrfs_item));
1853         }
1854         right_nritems -= push_items;
1855         btrfs_set_header_nritems(right, right_nritems);
1856         push_space = BTRFS_LEAF_DATA_SIZE(root);
1857         for (i = 0; i < right_nritems; i++) {
1858                 item = btrfs_item_nr(i);
1859                 push_space = push_space - btrfs_item_size(right, item);
1860                 btrfs_set_item_offset(right, item, push_space);
1861         }
1862
1863         btrfs_mark_buffer_dirty(left);
1864         if (right_nritems)
1865                 btrfs_mark_buffer_dirty(right);
1866
1867         btrfs_item_key(right, &disk_key, 0);
1868         btrfs_fixup_low_keys(root, path, &disk_key, 1);
1869
1870         /* then fixup the leaf pointer in the path */
1871         if (path->slots[0] < push_items) {
1872                 path->slots[0] += old_left_nritems;
1873                 free_extent_buffer(path->nodes[0]);
1874                 path->nodes[0] = left;
1875                 path->slots[1] -= 1;
1876         } else {
1877                 free_extent_buffer(left);
1878                 path->slots[0] -= push_items;
1879         }
1880         BUG_ON(path->slots[0] < 0);
1881         return ret;
1882 }
1883
1884 /*
1885  * split the path's leaf in two, making sure there is at least data_size
1886  * available for the resulting leaf level of the path.
1887  *
1888  * returns 0 if all went well and < 0 on failure.
1889  */
1890 static noinline int copy_for_split(struct btrfs_trans_handle *trans,
1891                                struct btrfs_root *root,
1892                                struct btrfs_path *path,
1893                                struct extent_buffer *l,
1894                                struct extent_buffer *right,
1895                                int slot, int mid, int nritems)
1896 {
1897         int data_copy_size;
1898         int rt_data_off;
1899         int i;
1900         int ret = 0;
1901         int wret;
1902         struct btrfs_disk_key disk_key;
1903
1904         nritems = nritems - mid;
1905         btrfs_set_header_nritems(right, nritems);
1906         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1907
1908         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1909                            btrfs_item_nr_offset(mid),
1910                            nritems * sizeof(struct btrfs_item));
1911
1912         copy_extent_buffer(right, l,
1913                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1914                      data_copy_size, btrfs_leaf_data(l) +
1915                      leaf_data_end(root, l), data_copy_size);
1916
1917         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1918                       btrfs_item_end_nr(l, mid);
1919
1920         for (i = 0; i < nritems; i++) {
1921                 struct btrfs_item *item = btrfs_item_nr(i);
1922                 u32 ioff = btrfs_item_offset(right, item);
1923                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
1924         }
1925
1926         btrfs_set_header_nritems(l, mid);
1927         ret = 0;
1928         btrfs_item_key(right, &disk_key, 0);
1929         wret = insert_ptr(trans, root, path, &disk_key, right->start,
1930                           path->slots[1] + 1, 1);
1931         if (wret)
1932                 ret = wret;
1933
1934         btrfs_mark_buffer_dirty(right);
1935         btrfs_mark_buffer_dirty(l);
1936         BUG_ON(path->slots[0] != slot);
1937
1938         if (mid <= slot) {
1939                 free_extent_buffer(path->nodes[0]);
1940                 path->nodes[0] = right;
1941                 path->slots[0] -= mid;
1942                 path->slots[1] += 1;
1943         } else {
1944                 free_extent_buffer(right);
1945         }
1946
1947         BUG_ON(path->slots[0] < 0);
1948
1949         return ret;
1950 }
1951
1952 /*
1953  * split the path's leaf in two, making sure there is at least data_size
1954  * available for the resulting leaf level of the path.
1955  *
1956  * returns 0 if all went well and < 0 on failure.
1957  */
1958 static noinline int split_leaf(struct btrfs_trans_handle *trans,
1959                                struct btrfs_root *root,
1960                                struct btrfs_key *ins_key,
1961                                struct btrfs_path *path, int data_size,
1962                                int extend)
1963 {
1964         struct btrfs_disk_key disk_key;
1965         struct extent_buffer *l;
1966         u32 nritems;
1967         int mid;
1968         int slot;
1969         struct extent_buffer *right;
1970         int ret = 0;
1971         int wret;
1972         int split;
1973         int num_doubles = 0;
1974
1975         /* first try to make some room by pushing left and right */
1976         if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
1977                 wret = push_leaf_right(trans, root, path, data_size, 0);
1978                 if (wret < 0)
1979                         return wret;
1980                 if (wret) {
1981                         wret = push_leaf_left(trans, root, path, data_size, 0);
1982                         if (wret < 0)
1983                                 return wret;
1984                 }
1985                 l = path->nodes[0];
1986
1987                 /* did the pushes work? */
1988                 if (btrfs_leaf_free_space(root, l) >= data_size)
1989                         return 0;
1990         }
1991
1992         if (!path->nodes[1]) {
1993                 ret = insert_new_root(trans, root, path, 1);
1994                 if (ret)
1995                         return ret;
1996         }
1997 again:
1998         split = 1;
1999         l = path->nodes[0];
2000         slot = path->slots[0];
2001         nritems = btrfs_header_nritems(l);
2002         mid = (nritems + 1) / 2;
2003
2004         if (mid <= slot) {
2005                 if (nritems == 1 ||
2006                     leaf_space_used(l, mid, nritems - mid) + data_size >
2007                         BTRFS_LEAF_DATA_SIZE(root)) {
2008                         if (slot >= nritems) {
2009                                 split = 0;
2010                         } else {
2011                                 mid = slot;
2012                                 if (mid != nritems &&
2013                                     leaf_space_used(l, mid, nritems - mid) +
2014                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2015                                         split = 2;
2016                                 }
2017                         }
2018                 }
2019         } else {
2020                 if (leaf_space_used(l, 0, mid) + data_size >
2021                         BTRFS_LEAF_DATA_SIZE(root)) {
2022                         if (!extend && data_size && slot == 0) {
2023                                 split = 0;
2024                         } else if ((extend || !data_size) && slot == 0) {
2025                                 mid = 1;
2026                         } else {
2027                                 mid = slot;
2028                                 if (mid != nritems &&
2029                                     leaf_space_used(l, mid, nritems - mid) +
2030                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2031                                         split = 2 ;
2032                                 }
2033                         }
2034                 }
2035         }
2036         
2037         if (split == 0)
2038                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2039         else
2040                 btrfs_item_key(l, &disk_key, mid);
2041
2042         right = btrfs_alloc_free_block(trans, root, root->leafsize,
2043                                         root->root_key.objectid,
2044                                         &disk_key, 0, l->start, 0);
2045         if (IS_ERR(right)) {
2046                 BUG_ON(1);
2047                 return PTR_ERR(right);
2048         }
2049
2050         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2051         btrfs_set_header_bytenr(right, right->start);
2052         btrfs_set_header_generation(right, trans->transid);
2053         btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
2054         btrfs_set_header_owner(right, root->root_key.objectid);
2055         btrfs_set_header_level(right, 0);
2056         write_extent_buffer(right, root->fs_info->fsid,
2057                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
2058
2059         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2060                             btrfs_header_chunk_tree_uuid(right),
2061                             BTRFS_UUID_SIZE);
2062
2063         if (split == 0) {
2064                 if (mid <= slot) {
2065                         btrfs_set_header_nritems(right, 0);
2066                         wret = insert_ptr(trans, root, path,
2067                                           &disk_key, right->start,
2068                                           path->slots[1] + 1, 1);
2069                         if (wret)
2070                                 ret = wret;
2071
2072                         free_extent_buffer(path->nodes[0]);
2073                         path->nodes[0] = right;
2074                         path->slots[0] = 0;
2075                         path->slots[1] += 1;
2076                 } else {
2077                         btrfs_set_header_nritems(right, 0);
2078                         wret = insert_ptr(trans, root, path,
2079                                           &disk_key,
2080                                           right->start,
2081                                           path->slots[1], 1);
2082                         if (wret)
2083                                 ret = wret;
2084                         free_extent_buffer(path->nodes[0]);
2085                         path->nodes[0] = right;
2086                         path->slots[0] = 0;
2087                         if (path->slots[1] == 0) {
2088                                 btrfs_fixup_low_keys(root, path,
2089                                                      &disk_key, 1);
2090                         }
2091                 }
2092                 btrfs_mark_buffer_dirty(right);
2093                 return ret;
2094         }
2095
2096         ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
2097         BUG_ON(ret);
2098
2099         if (split == 2) {
2100                 BUG_ON(num_doubles != 0);
2101                 num_doubles++;
2102                 goto again;
2103         }
2104
2105         return ret;
2106 }
2107
2108 /*
2109  * This function splits a single item into two items,
2110  * giving 'new_key' to the new item and splitting the
2111  * old one at split_offset (from the start of the item).
2112  *
2113  * The path may be released by this operation.  After
2114  * the split, the path is pointing to the old item.  The
2115  * new item is going to be in the same node as the old one.
2116  *
2117  * Note, the item being split must be smaller enough to live alone on
2118  * a tree block with room for one extra struct btrfs_item
2119  *
2120  * This allows us to split the item in place, keeping a lock on the
2121  * leaf the entire time.
2122  */
2123 int btrfs_split_item(struct btrfs_trans_handle *trans,
2124                      struct btrfs_root *root,
2125                      struct btrfs_path *path,
2126                      struct btrfs_key *new_key,
2127                      unsigned long split_offset)
2128 {
2129         u32 item_size;
2130         struct extent_buffer *leaf;
2131         struct btrfs_key orig_key;
2132         struct btrfs_item *item;
2133         struct btrfs_item *new_item;
2134         int ret = 0;
2135         int slot;
2136         u32 nritems;
2137         u32 orig_offset;
2138         struct btrfs_disk_key disk_key;
2139         char *buf;
2140
2141         leaf = path->nodes[0];
2142         btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
2143         if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
2144                 goto split;
2145
2146         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2147         btrfs_release_path(path);
2148
2149         path->search_for_split = 1;
2150
2151         ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
2152         path->search_for_split = 0;
2153
2154         /* if our item isn't there or got smaller, return now */
2155         if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
2156                                                         path->slots[0])) {
2157                 return -EAGAIN;
2158         }
2159
2160         ret = split_leaf(trans, root, &orig_key, path, 0, 0);
2161         BUG_ON(ret);
2162
2163         BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
2164         leaf = path->nodes[0];
2165
2166 split:
2167         item = btrfs_item_nr(path->slots[0]);
2168         orig_offset = btrfs_item_offset(leaf, item);
2169         item_size = btrfs_item_size(leaf, item);
2170
2171
2172         buf = kmalloc(item_size, GFP_NOFS);
2173         read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
2174                             path->slots[0]), item_size);
2175         slot = path->slots[0] + 1;
2176         leaf = path->nodes[0];
2177
2178         nritems = btrfs_header_nritems(leaf);
2179
2180         if (slot != nritems) {
2181                 /* shift the items */
2182                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2183                               btrfs_item_nr_offset(slot),
2184                               (nritems - slot) * sizeof(struct btrfs_item));
2185
2186         }
2187
2188         btrfs_cpu_key_to_disk(&disk_key, new_key);
2189         btrfs_set_item_key(leaf, &disk_key, slot);
2190
2191         new_item = btrfs_item_nr(slot);
2192
2193         btrfs_set_item_offset(leaf, new_item, orig_offset);
2194         btrfs_set_item_size(leaf, new_item, item_size - split_offset);
2195
2196         btrfs_set_item_offset(leaf, item,
2197                               orig_offset + item_size - split_offset);
2198         btrfs_set_item_size(leaf, item, split_offset);
2199
2200         btrfs_set_header_nritems(leaf, nritems + 1);
2201
2202         /* write the data for the start of the original item */
2203         write_extent_buffer(leaf, buf,
2204                             btrfs_item_ptr_offset(leaf, path->slots[0]),
2205                             split_offset);
2206
2207         /* write the data for the new item */
2208         write_extent_buffer(leaf, buf + split_offset,
2209                             btrfs_item_ptr_offset(leaf, slot),
2210                             item_size - split_offset);
2211         btrfs_mark_buffer_dirty(leaf);
2212
2213         ret = 0;
2214         if (btrfs_leaf_free_space(root, leaf) < 0) {
2215                 btrfs_print_leaf(root, leaf);
2216                 BUG();
2217         }
2218         kfree(buf);
2219         return ret;
2220 }
2221
2222 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2223                         struct btrfs_root *root,
2224                         struct btrfs_path *path,
2225                         u32 new_size, int from_end)
2226 {
2227         int ret = 0;
2228         int slot;
2229         struct extent_buffer *leaf;
2230         struct btrfs_item *item;
2231         u32 nritems;
2232         unsigned int data_end;
2233         unsigned int old_data_start;
2234         unsigned int old_size;
2235         unsigned int size_diff;
2236         int i;
2237
2238         leaf = path->nodes[0];
2239         slot = path->slots[0];
2240
2241         old_size = btrfs_item_size_nr(leaf, slot);
2242         if (old_size == new_size)
2243                 return 0;
2244
2245         nritems = btrfs_header_nritems(leaf);
2246         data_end = leaf_data_end(root, leaf);
2247
2248         old_data_start = btrfs_item_offset_nr(leaf, slot);
2249
2250         size_diff = old_size - new_size;
2251
2252         BUG_ON(slot < 0);
2253         BUG_ON(slot >= nritems);
2254
2255         /*
2256          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2257          */
2258         /* first correct the data pointers */
2259         for (i = slot; i < nritems; i++) {
2260                 u32 ioff;
2261                 item = btrfs_item_nr(i);
2262                 ioff = btrfs_item_offset(leaf, item);
2263                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2264         }
2265
2266         /* shift the data */
2267         if (from_end) {
2268                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2269                               data_end + size_diff, btrfs_leaf_data(leaf) +
2270                               data_end, old_data_start + new_size - data_end);
2271         } else {
2272                 struct btrfs_disk_key disk_key;
2273                 u64 offset;
2274
2275                 btrfs_item_key(leaf, &disk_key, slot);
2276
2277                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2278                         unsigned long ptr;
2279                         struct btrfs_file_extent_item *fi;
2280
2281                         fi = btrfs_item_ptr(leaf, slot,
2282                                             struct btrfs_file_extent_item);
2283                         fi = (struct btrfs_file_extent_item *)(
2284                              (unsigned long)fi - size_diff);
2285
2286                         if (btrfs_file_extent_type(leaf, fi) ==
2287                             BTRFS_FILE_EXTENT_INLINE) {
2288                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2289                                 memmove_extent_buffer(leaf, ptr,
2290                                         (unsigned long)fi,
2291                                         offsetof(struct btrfs_file_extent_item,
2292                                                  disk_bytenr));
2293                         }
2294                 }
2295
2296                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2297                               data_end + size_diff, btrfs_leaf_data(leaf) +
2298                               data_end, old_data_start - data_end);
2299
2300                 offset = btrfs_disk_key_offset(&disk_key);
2301                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2302                 btrfs_set_item_key(leaf, &disk_key, slot);
2303                 if (slot == 0)
2304                         btrfs_fixup_low_keys(root, path, &disk_key, 1);
2305         }
2306
2307         item = btrfs_item_nr(slot);
2308         btrfs_set_item_size(leaf, item, new_size);
2309         btrfs_mark_buffer_dirty(leaf);
2310
2311         ret = 0;
2312         if (btrfs_leaf_free_space(root, leaf) < 0) {
2313                 btrfs_print_leaf(root, leaf);
2314                 BUG();
2315         }
2316         return ret;
2317 }
2318
2319 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2320                       struct btrfs_root *root, struct btrfs_path *path,
2321                       u32 data_size)
2322 {
2323         int ret = 0;
2324         int slot;
2325         struct extent_buffer *leaf;
2326         struct btrfs_item *item;
2327         u32 nritems;
2328         unsigned int data_end;
2329         unsigned int old_data;
2330         unsigned int old_size;
2331         int i;
2332
2333         leaf = path->nodes[0];
2334
2335         nritems = btrfs_header_nritems(leaf);
2336         data_end = leaf_data_end(root, leaf);
2337
2338         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2339                 btrfs_print_leaf(root, leaf);
2340                 BUG();
2341         }
2342         slot = path->slots[0];
2343         old_data = btrfs_item_end_nr(leaf, slot);
2344
2345         BUG_ON(slot < 0);
2346         if (slot >= nritems) {
2347                 btrfs_print_leaf(root, leaf);
2348                 printk("slot %d too large, nritems %d\n", slot, nritems);
2349                 BUG_ON(1);
2350         }
2351
2352         /*
2353          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2354          */
2355         /* first correct the data pointers */
2356         for (i = slot; i < nritems; i++) {
2357                 u32 ioff;
2358                 item = btrfs_item_nr(i);
2359                 ioff = btrfs_item_offset(leaf, item);
2360                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2361         }
2362
2363         /* shift the data */
2364         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2365                       data_end - data_size, btrfs_leaf_data(leaf) +
2366                       data_end, old_data - data_end);
2367
2368         data_end = old_data;
2369         old_size = btrfs_item_size_nr(leaf, slot);
2370         item = btrfs_item_nr(slot);
2371         btrfs_set_item_size(leaf, item, old_size + data_size);
2372         btrfs_mark_buffer_dirty(leaf);
2373
2374         ret = 0;
2375         if (btrfs_leaf_free_space(root, leaf) < 0) {
2376                 btrfs_print_leaf(root, leaf);
2377                 BUG();
2378         }
2379         return ret;
2380 }
2381
2382 /*
2383  * Given a key and some data, insert an item into the tree.
2384  * This does all the path init required, making room in the tree if needed.
2385  */
2386 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2387                             struct btrfs_root *root,
2388                             struct btrfs_path *path,
2389                             struct btrfs_key *cpu_key, u32 *data_size,
2390                             int nr)
2391 {
2392         struct extent_buffer *leaf;
2393         struct btrfs_item *item;
2394         int ret = 0;
2395         int slot;
2396         int i;
2397         u32 nritems;
2398         u32 total_size = 0;
2399         u32 total_data = 0;
2400         unsigned int data_end;
2401         struct btrfs_disk_key disk_key;
2402
2403         for (i = 0; i < nr; i++) {
2404                 total_data += data_size[i];
2405         }
2406
2407         /* create a root if there isn't one */
2408         if (!root->node)
2409                 BUG();
2410
2411         total_size = total_data + nr * sizeof(struct btrfs_item);
2412         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2413         if (ret == 0) {
2414                 return -EEXIST;
2415         }
2416         if (ret < 0)
2417                 goto out;
2418
2419         leaf = path->nodes[0];
2420
2421         nritems = btrfs_header_nritems(leaf);
2422         data_end = leaf_data_end(root, leaf);
2423
2424         if (btrfs_leaf_free_space(root, leaf) < total_size) {
2425                 btrfs_print_leaf(root, leaf);
2426                 printk("not enough freespace need %u have %d\n",
2427                        total_size, btrfs_leaf_free_space(root, leaf));
2428                 BUG();
2429         }
2430
2431         slot = path->slots[0];
2432         BUG_ON(slot < 0);
2433
2434         if (slot != nritems) {
2435                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2436
2437                 if (old_data < data_end) {
2438                         btrfs_print_leaf(root, leaf);
2439                         printk("slot %d old_data %d data_end %d\n",
2440                                slot, old_data, data_end);
2441                         BUG_ON(1);
2442                 }
2443                 /*
2444                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2445                  */
2446                 /* first correct the data pointers */
2447                 for (i = slot; i < nritems; i++) {
2448                         u32 ioff;
2449
2450                         item = btrfs_item_nr(i);
2451                         ioff = btrfs_item_offset(leaf, item);
2452                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2453                 }
2454
2455                 /* shift the items */
2456                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2457                               btrfs_item_nr_offset(slot),
2458                               (nritems - slot) * sizeof(struct btrfs_item));
2459
2460                 /* shift the data */
2461                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2462                               data_end - total_data, btrfs_leaf_data(leaf) +
2463                               data_end, old_data - data_end);
2464                 data_end = old_data;
2465         }
2466
2467         /* setup the item for the new data */
2468         for (i = 0; i < nr; i++) {
2469                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2470                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2471                 item = btrfs_item_nr(slot + i);
2472                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2473                 data_end -= data_size[i];
2474                 btrfs_set_item_size(leaf, item, data_size[i]);
2475         }
2476         btrfs_set_header_nritems(leaf, nritems + nr);
2477         btrfs_mark_buffer_dirty(leaf);
2478
2479         ret = 0;
2480         if (slot == 0) {
2481                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2482                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
2483         }
2484
2485         if (btrfs_leaf_free_space(root, leaf) < 0) {
2486                 btrfs_print_leaf(root, leaf);
2487                 BUG();
2488         }
2489
2490 out:
2491         return ret;
2492 }
2493
2494 /*
2495  * Given a key and some data, insert an item into the tree.
2496  * This does all the path init required, making room in the tree if needed.
2497  */
2498 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2499                       *root, struct btrfs_key *cpu_key, void *data, u32
2500                       data_size)
2501 {
2502         int ret = 0;
2503         struct btrfs_path *path;
2504         struct extent_buffer *leaf;
2505         unsigned long ptr;
2506
2507         path = btrfs_alloc_path();
2508         BUG_ON(!path);
2509         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2510         if (!ret) {
2511                 leaf = path->nodes[0];
2512                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2513                 write_extent_buffer(leaf, data, ptr, data_size);
2514                 btrfs_mark_buffer_dirty(leaf);
2515         }
2516         btrfs_free_path(path);
2517         return ret;
2518 }
2519
2520 /*
2521  * delete the pointer from a given node.
2522  *
2523  * If the delete empties a node, the node is removed from the tree,
2524  * continuing all the way the root if required.  The root is converted into
2525  * a leaf if all the nodes are emptied.
2526  */
2527 int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2528                    struct btrfs_path *path, int level, int slot)
2529 {
2530         struct extent_buffer *parent = path->nodes[level];
2531         u32 nritems;
2532         int ret = 0;
2533
2534         nritems = btrfs_header_nritems(parent);
2535         if (slot != nritems -1) {
2536                 memmove_extent_buffer(parent,
2537                               btrfs_node_key_ptr_offset(slot),
2538                               btrfs_node_key_ptr_offset(slot + 1),
2539                               sizeof(struct btrfs_key_ptr) *
2540                               (nritems - slot - 1));
2541         }
2542         nritems--;
2543         btrfs_set_header_nritems(parent, nritems);
2544         if (nritems == 0 && parent == root->node) {
2545                 BUG_ON(btrfs_header_level(root->node) != 1);
2546                 /* just turn the root into a leaf and break */
2547                 btrfs_set_header_level(root->node, 0);
2548         } else if (slot == 0) {
2549                 struct btrfs_disk_key disk_key;
2550
2551                 btrfs_node_key(parent, &disk_key, 0);
2552                 btrfs_fixup_low_keys(root, path, &disk_key, level + 1);
2553         }
2554         btrfs_mark_buffer_dirty(parent);
2555         return ret;
2556 }
2557
2558 /*
2559  * a helper function to delete the leaf pointed to by path->slots[1] and
2560  * path->nodes[1].
2561  *
2562  * This deletes the pointer in path->nodes[1] and frees the leaf
2563  * block extent.  zero is returned if it all worked out, < 0 otherwise.
2564  *
2565  * The path must have already been setup for deleting the leaf, including
2566  * all the proper balancing.  path->nodes[1] must be locked.
2567  */
2568 static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
2569                                    struct btrfs_root *root,
2570                                    struct btrfs_path *path,
2571                                    struct extent_buffer *leaf)
2572 {
2573         int ret;
2574
2575         WARN_ON(btrfs_header_generation(leaf) != trans->transid);
2576         ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
2577         if (ret)
2578                 return ret;
2579
2580         ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
2581                                 0, root->root_key.objectid, 0, 0);
2582         return ret;
2583 }
2584
2585 /*
2586  * delete the item at the leaf level in path.  If that empties
2587  * the leaf, remove it from the tree
2588  */
2589 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2590                     struct btrfs_path *path, int slot, int nr)
2591 {
2592         struct extent_buffer *leaf;
2593         struct btrfs_item *item;
2594         int last_off;
2595         int dsize = 0;
2596         int ret = 0;
2597         int wret;
2598         int i;
2599         u32 nritems;
2600
2601         leaf = path->nodes[0];
2602         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2603
2604         for (i = 0; i < nr; i++)
2605                 dsize += btrfs_item_size_nr(leaf, slot + i);
2606
2607         nritems = btrfs_header_nritems(leaf);
2608
2609         if (slot + nr != nritems) {
2610                 int data_end = leaf_data_end(root, leaf);
2611
2612                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2613                               data_end + dsize,
2614                               btrfs_leaf_data(leaf) + data_end,
2615                               last_off - data_end);
2616
2617                 for (i = slot + nr; i < nritems; i++) {
2618                         u32 ioff;
2619
2620                         item = btrfs_item_nr(i);
2621                         ioff = btrfs_item_offset(leaf, item);
2622                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2623                 }
2624
2625                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2626                               btrfs_item_nr_offset(slot + nr),
2627                               sizeof(struct btrfs_item) *
2628                               (nritems - slot - nr));
2629         }
2630         btrfs_set_header_nritems(leaf, nritems - nr);
2631         nritems -= nr;
2632
2633         /* delete the leaf if we've emptied it */
2634         if (nritems == 0) {
2635                 if (leaf == root->node) {
2636                         btrfs_set_header_level(leaf, 0);
2637                 } else {
2638                         clean_tree_block(trans, root, leaf);
2639                         wait_on_tree_block_writeback(root, leaf);
2640
2641                         wret = btrfs_del_leaf(trans, root, path, leaf);
2642                         BUG_ON(ret);
2643                         if (wret)
2644                                 ret = wret;
2645                 }
2646         } else {
2647                 int used = leaf_space_used(leaf, 0, nritems);
2648                 if (slot == 0) {
2649                         struct btrfs_disk_key disk_key;
2650
2651                         btrfs_item_key(leaf, &disk_key, 0);
2652                         btrfs_fixup_low_keys(root, path, &disk_key, 1);
2653                 }
2654
2655                 /* delete the leaf if it is mostly empty */
2656                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2657                         /* push_leaf_left fixes the path.
2658                          * make sure the path still points to our leaf
2659                          * for possible call to del_ptr below
2660                          */
2661                         slot = path->slots[1];
2662                         extent_buffer_get(leaf);
2663
2664                         wret = push_leaf_left(trans, root, path, 1, 1);
2665                         if (wret < 0 && wret != -ENOSPC)
2666                                 ret = wret;
2667
2668                         if (path->nodes[0] == leaf &&
2669                             btrfs_header_nritems(leaf)) {
2670                                 wret = push_leaf_right(trans, root, path, 1, 1);
2671                                 if (wret < 0 && wret != -ENOSPC)
2672                                         ret = wret;
2673                         }
2674
2675                         if (btrfs_header_nritems(leaf) == 0) {
2676                                 clean_tree_block(trans, root, leaf);
2677                                 wait_on_tree_block_writeback(root, leaf);
2678
2679                                 path->slots[1] = slot;
2680                                 ret = btrfs_del_leaf(trans, root, path, leaf);
2681                                 BUG_ON(ret);
2682                                 free_extent_buffer(leaf);
2683
2684                         } else {
2685                                 btrfs_mark_buffer_dirty(leaf);
2686                                 free_extent_buffer(leaf);
2687                         }
2688                 } else {
2689                         btrfs_mark_buffer_dirty(leaf);
2690                 }
2691         }
2692         return ret;
2693 }
2694
2695 /*
2696  * walk up the tree as far as required to find the previous leaf.
2697  * returns 0 if it found something or 1 if there are no lesser leaves.
2698  * returns < 0 on io errors.
2699  */
2700 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2701 {
2702         int slot;
2703         int level = 1;
2704         struct extent_buffer *c;
2705         struct extent_buffer *next = NULL;
2706
2707         while(level < BTRFS_MAX_LEVEL) {
2708                 if (!path->nodes[level])
2709                         return 1;
2710
2711                 slot = path->slots[level];
2712                 c = path->nodes[level];
2713                 if (slot == 0) {
2714                         level++;
2715                         if (level == BTRFS_MAX_LEVEL)
2716                                 return 1;
2717                         continue;
2718                 }
2719                 slot--;
2720
2721                 next = read_node_slot(root, c, slot);
2722                 break;
2723         }
2724         path->slots[level] = slot;
2725         while(1) {
2726                 level--;
2727                 c = path->nodes[level];
2728                 free_extent_buffer(c);
2729                 slot = btrfs_header_nritems(next);
2730                 if (slot != 0)
2731                         slot--;
2732                 path->nodes[level] = next;
2733                 path->slots[level] = slot;
2734                 if (!level)
2735                         break;
2736                 next = read_node_slot(root, next, slot);
2737         }
2738         return 0;
2739 }
2740
2741 /*
2742  * walk up the tree as far as required to find the next leaf.
2743  * returns 0 if it found something or 1 if there are no greater leaves.
2744  * returns < 0 on io errors.
2745  */
2746 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2747 {
2748         int slot;
2749         int level = 1;
2750         struct extent_buffer *c;
2751         struct extent_buffer *next = NULL;
2752
2753         while(level < BTRFS_MAX_LEVEL) {
2754                 if (!path->nodes[level])
2755                         return 1;
2756
2757                 slot = path->slots[level] + 1;
2758                 c = path->nodes[level];
2759                 if (slot >= btrfs_header_nritems(c)) {
2760                         level++;
2761                         if (level == BTRFS_MAX_LEVEL)
2762                                 return 1;
2763                         continue;
2764                 }
2765
2766                 if (path->reada)
2767                         reada_for_search(root, path, level, slot, 0);
2768
2769                 next = read_node_slot(root, c, slot);
2770                 if (!next)
2771                         return -EIO;
2772                 break;
2773         }
2774         path->slots[level] = slot;
2775         while(1) {
2776                 level--;
2777                 c = path->nodes[level];
2778                 free_extent_buffer(c);
2779                 path->nodes[level] = next;
2780                 path->slots[level] = 0;
2781                 if (!level)
2782                         break;
2783                 if (path->reada)
2784                         reada_for_search(root, path, level, 0, 0);
2785                 next = read_node_slot(root, next, 0);
2786                 if (!next)
2787                         return -EIO;
2788         }
2789         return 0;
2790 }
2791
2792 int btrfs_previous_item(struct btrfs_root *root,
2793                         struct btrfs_path *path, u64 min_objectid,
2794                         int type)
2795 {
2796         struct btrfs_key found_key;
2797         struct extent_buffer *leaf;
2798         int ret;
2799
2800         while(1) {
2801                 if (path->slots[0] == 0) {
2802                         ret = btrfs_prev_leaf(root, path);
2803                         if (ret != 0)
2804                                 return ret;
2805                 } else {
2806                         path->slots[0]--;
2807                 }
2808                 leaf = path->nodes[0];
2809                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2810                 if (found_key.type == type)
2811                         return 0;
2812         }
2813         return 1;
2814 }
2815