btrfs-progs: remove unused function wait_on_tree_block_writeback
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include "ctree.h"
19 #include "disk-io.h"
20 #include "transaction.h"
21 #include "print-tree.h"
22 #include "repair.h"
23 #include "internal.h"
24 #include "sizes.h"
25
26 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
27                       *root, struct btrfs_path *path, int level);
28 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
29                       *root, struct btrfs_key *ins_key,
30                       struct btrfs_path *path, int data_size, int extend);
31 static int push_node_left(struct btrfs_trans_handle *trans,
32                           struct btrfs_root *root, struct extent_buffer *dst,
33                           struct extent_buffer *src, int empty);
34 static int balance_node_right(struct btrfs_trans_handle *trans,
35                               struct btrfs_root *root,
36                               struct extent_buffer *dst_buf,
37                               struct extent_buffer *src_buf);
38
39 inline void btrfs_init_path(struct btrfs_path *p)
40 {
41         memset(p, 0, sizeof(*p));
42 }
43
44 struct btrfs_path *btrfs_alloc_path(void)
45 {
46         struct btrfs_path *path;
47         path = kzalloc(sizeof(struct btrfs_path), GFP_NOFS);
48         return path;
49 }
50
51 void btrfs_free_path(struct btrfs_path *p)
52 {
53         if (!p)
54                 return;
55         btrfs_release_path(p);
56         kfree(p);
57 }
58
59 void btrfs_release_path(struct btrfs_path *p)
60 {
61         int i;
62         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
63                 if (!p->nodes[i])
64                         continue;
65                 free_extent_buffer(p->nodes[i]);
66         }
67         memset(p, 0, sizeof(*p));
68 }
69
70 void add_root_to_dirty_list(struct btrfs_root *root)
71 {
72         if (root->track_dirty && list_empty(&root->dirty_list)) {
73                 list_add(&root->dirty_list,
74                          &root->fs_info->dirty_cowonly_roots);
75         }
76 }
77
78 int btrfs_copy_root(struct btrfs_trans_handle *trans,
79                       struct btrfs_root *root,
80                       struct extent_buffer *buf,
81                       struct extent_buffer **cow_ret, u64 new_root_objectid)
82 {
83         struct extent_buffer *cow;
84         int ret = 0;
85         int level;
86         struct btrfs_root *new_root;
87         struct btrfs_disk_key disk_key;
88
89         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
90         if (!new_root)
91                 return -ENOMEM;
92
93         memcpy(new_root, root, sizeof(*new_root));
94         new_root->root_key.objectid = new_root_objectid;
95
96         WARN_ON(root->ref_cows && trans->transid !=
97                 root->fs_info->running_transaction->transid);
98         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
99
100         level = btrfs_header_level(buf);
101         if (level == 0)
102                 btrfs_item_key(buf, &disk_key, 0);
103         else
104                 btrfs_node_key(buf, &disk_key, 0);
105         cow = btrfs_alloc_free_block(trans, new_root, buf->len,
106                                      new_root_objectid, &disk_key,
107                                      level, buf->start, 0);
108         if (IS_ERR(cow)) {
109                 kfree(new_root);
110                 return PTR_ERR(cow);
111         }
112
113         copy_extent_buffer(cow, buf, 0, 0, cow->len);
114         btrfs_set_header_bytenr(cow, cow->start);
115         btrfs_set_header_generation(cow, trans->transid);
116         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
117         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
118                                      BTRFS_HEADER_FLAG_RELOC);
119         if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
120                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
121         else
122                 btrfs_set_header_owner(cow, new_root_objectid);
123
124         write_extent_buffer(cow, root->fs_info->fsid,
125                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
126
127         WARN_ON(btrfs_header_generation(buf) > trans->transid);
128         ret = btrfs_inc_ref(trans, new_root, cow, 0);
129         kfree(new_root);
130
131         if (ret)
132                 return ret;
133
134         btrfs_mark_buffer_dirty(cow);
135         *cow_ret = cow;
136         return 0;
137 }
138
139 /*
140  * check if the tree block can be shared by multiple trees
141  */
142 static int btrfs_block_can_be_shared(struct btrfs_root *root,
143                                      struct extent_buffer *buf)
144 {
145         /*
146          * Tree blocks not in reference counted trees and tree roots
147          * are never shared. If a block was allocated after the last
148          * snapshot and the block was not allocated by tree relocation,
149          * we know the block is not shared.
150          */
151         if (root->ref_cows &&
152             buf != root->node && buf != root->commit_root &&
153             (btrfs_header_generation(buf) <=
154              btrfs_root_last_snapshot(&root->root_item) ||
155              btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
156                 return 1;
157 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
158         if (root->ref_cows &&
159             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
160                 return 1;
161 #endif
162         return 0;
163 }
164
165 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
166                                        struct btrfs_root *root,
167                                        struct extent_buffer *buf,
168                                        struct extent_buffer *cow)
169 {
170         u64 refs;
171         u64 owner;
172         u64 flags;
173         u64 new_flags = 0;
174         int ret;
175
176         /*
177          * Backrefs update rules:
178          *
179          * Always use full backrefs for extent pointers in tree block
180          * allocated by tree relocation.
181          *
182          * If a shared tree block is no longer referenced by its owner
183          * tree (btrfs_header_owner(buf) == root->root_key.objectid),
184          * use full backrefs for extent pointers in tree block.
185          *
186          * If a tree block is been relocating
187          * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
188          * use full backrefs for extent pointers in tree block.
189          * The reason for this is some operations (such as drop tree)
190          * are only allowed for blocks use full backrefs.
191          */
192
193         if (btrfs_block_can_be_shared(root, buf)) {
194                 ret = btrfs_lookup_extent_info(trans, root, buf->start,
195                                                btrfs_header_level(buf), 1,
196                                                &refs, &flags);
197                 BUG_ON(ret);
198                 BUG_ON(refs == 0);
199         } else {
200                 refs = 1;
201                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
202                     btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
203                         flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
204                 else
205                         flags = 0;
206         }
207
208         owner = btrfs_header_owner(buf);
209         BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
210                owner == BTRFS_TREE_RELOC_OBJECTID);
211
212         if (refs > 1) {
213                 if ((owner == root->root_key.objectid ||
214                      root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
215                     !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
216                         ret = btrfs_inc_ref(trans, root, buf, 1);
217                         BUG_ON(ret);
218
219                         if (root->root_key.objectid ==
220                             BTRFS_TREE_RELOC_OBJECTID) {
221                                 ret = btrfs_dec_ref(trans, root, buf, 0);
222                                 BUG_ON(ret);
223                                 ret = btrfs_inc_ref(trans, root, cow, 1);
224                                 BUG_ON(ret);
225                         }
226                         new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
227                 } else {
228
229                         if (root->root_key.objectid ==
230                             BTRFS_TREE_RELOC_OBJECTID)
231                                 ret = btrfs_inc_ref(trans, root, cow, 1);
232                         else
233                                 ret = btrfs_inc_ref(trans, root, cow, 0);
234                         BUG_ON(ret);
235                 }
236                 if (new_flags != 0) {
237                         ret = btrfs_set_block_flags(trans, root, buf->start,
238                                                     btrfs_header_level(buf),
239                                                     new_flags);
240                         BUG_ON(ret);
241                 }
242         } else {
243                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
244                         if (root->root_key.objectid ==
245                             BTRFS_TREE_RELOC_OBJECTID)
246                                 ret = btrfs_inc_ref(trans, root, cow, 1);
247                         else
248                                 ret = btrfs_inc_ref(trans, root, cow, 0);
249                         BUG_ON(ret);
250                         ret = btrfs_dec_ref(trans, root, buf, 1);
251                         BUG_ON(ret);
252                 }
253                 clean_tree_block(trans, root, buf);
254         }
255         return 0;
256 }
257
258 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
259                              struct btrfs_root *root,
260                              struct extent_buffer *buf,
261                              struct extent_buffer *parent, int parent_slot,
262                              struct extent_buffer **cow_ret,
263                              u64 search_start, u64 empty_size)
264 {
265         struct extent_buffer *cow;
266         struct btrfs_disk_key disk_key;
267         int level;
268
269         WARN_ON(root->ref_cows && trans->transid !=
270                 root->fs_info->running_transaction->transid);
271         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
272
273         level = btrfs_header_level(buf);
274
275         if (level == 0)
276                 btrfs_item_key(buf, &disk_key, 0);
277         else
278                 btrfs_node_key(buf, &disk_key, 0);
279
280         cow = btrfs_alloc_free_block(trans, root, buf->len,
281                                      root->root_key.objectid, &disk_key,
282                                      level, search_start, empty_size);
283         if (IS_ERR(cow))
284                 return PTR_ERR(cow);
285
286         copy_extent_buffer(cow, buf, 0, 0, cow->len);
287         btrfs_set_header_bytenr(cow, cow->start);
288         btrfs_set_header_generation(cow, trans->transid);
289         btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
290         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
291                                      BTRFS_HEADER_FLAG_RELOC);
292         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
293                 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
294         else
295                 btrfs_set_header_owner(cow, root->root_key.objectid);
296
297         write_extent_buffer(cow, root->fs_info->fsid,
298                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
299
300         WARN_ON(!(buf->flags & EXTENT_BAD_TRANSID) &&
301                 btrfs_header_generation(buf) > trans->transid);
302
303         update_ref_for_cow(trans, root, buf, cow);
304
305         if (buf == root->node) {
306                 root->node = cow;
307                 extent_buffer_get(cow);
308
309                 btrfs_free_extent(trans, root, buf->start, buf->len,
310                                   0, root->root_key.objectid, level, 0);
311                 free_extent_buffer(buf);
312                 add_root_to_dirty_list(root);
313         } else {
314                 btrfs_set_node_blockptr(parent, parent_slot,
315                                         cow->start);
316                 WARN_ON(trans->transid == 0);
317                 btrfs_set_node_ptr_generation(parent, parent_slot,
318                                               trans->transid);
319                 btrfs_mark_buffer_dirty(parent);
320                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
321
322                 btrfs_free_extent(trans, root, buf->start, buf->len,
323                                   0, root->root_key.objectid, level, 1);
324         }
325         if (!list_empty(&buf->recow)) {
326                 list_del_init(&buf->recow);
327                 free_extent_buffer(buf);
328         }
329         free_extent_buffer(buf);
330         btrfs_mark_buffer_dirty(cow);
331         *cow_ret = cow;
332         return 0;
333 }
334
335 static inline int should_cow_block(struct btrfs_trans_handle *trans,
336                                    struct btrfs_root *root,
337                                    struct extent_buffer *buf)
338 {
339         if (btrfs_header_generation(buf) == trans->transid &&
340             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
341             !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
342               btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
343                 return 0;
344         return 1;
345 }
346
347 int btrfs_cow_block(struct btrfs_trans_handle *trans,
348                     struct btrfs_root *root, struct extent_buffer *buf,
349                     struct extent_buffer *parent, int parent_slot,
350                     struct extent_buffer **cow_ret)
351 {
352         u64 search_start;
353         int ret;
354         /*
355         if (trans->transaction != root->fs_info->running_transaction) {
356                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
357                        root->fs_info->running_transaction->transid);
358                 WARN_ON(1);
359         }
360         */
361         if (trans->transid != root->fs_info->generation) {
362                 printk(KERN_CRIT "trans %llu running %llu\n",
363                         (unsigned long long)trans->transid,
364                         (unsigned long long)root->fs_info->generation);
365                 WARN_ON(1);
366         }
367         if (!should_cow_block(trans, root, buf)) {
368                 *cow_ret = buf;
369                 return 0;
370         }
371
372         search_start = buf->start & ~((u64)SZ_1G - 1);
373         ret = __btrfs_cow_block(trans, root, buf, parent,
374                                  parent_slot, cow_ret, search_start, 0);
375         return ret;
376 }
377
378 int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
379 {
380         if (k1->objectid > k2->objectid)
381                 return 1;
382         if (k1->objectid < k2->objectid)
383                 return -1;
384         if (k1->type > k2->type)
385                 return 1;
386         if (k1->type < k2->type)
387                 return -1;
388         if (k1->offset > k2->offset)
389                 return 1;
390         if (k1->offset < k2->offset)
391                 return -1;
392         return 0;
393 }
394
395 /*
396  * compare two keys in a memcmp fashion
397  */
398 static int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
399 {
400         struct btrfs_key k1;
401
402         btrfs_disk_key_to_cpu(&k1, disk);
403         return btrfs_comp_cpu_keys(&k1, k2);
404 }
405
406 /*
407  * The leaf data grows from end-to-front in the node.
408  * this returns the address of the start of the last item,
409  * which is the stop of the leaf data stack
410  */
411 static inline unsigned int leaf_data_end(struct btrfs_root *root,
412                                          struct extent_buffer *leaf)
413 {
414         u32 nr = btrfs_header_nritems(leaf);
415         if (nr == 0)
416                 return BTRFS_LEAF_DATA_SIZE(root);
417         return btrfs_item_offset_nr(leaf, nr - 1);
418 }
419
420 enum btrfs_tree_block_status
421 btrfs_check_node(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
422                  struct extent_buffer *buf)
423 {
424         int i;
425         struct btrfs_key cpukey;
426         struct btrfs_disk_key key;
427         u32 nritems = btrfs_header_nritems(buf);
428         enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
429
430         if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
431                 goto fail;
432
433         ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
434         if (parent_key && parent_key->type) {
435                 btrfs_node_key(buf, &key, 0);
436                 if (memcmp(parent_key, &key, sizeof(key)))
437                         goto fail;
438         }
439         ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
440         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
441                 btrfs_node_key(buf, &key, i);
442                 btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
443                 if (btrfs_comp_keys(&key, &cpukey) >= 0)
444                         goto fail;
445         }
446         return BTRFS_TREE_BLOCK_CLEAN;
447 fail:
448         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
449                 if (parent_key)
450                         btrfs_disk_key_to_cpu(&cpukey, parent_key);
451                 else
452                         btrfs_node_key_to_cpu(buf, &cpukey, 0);
453                 btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
454                                                 buf->start, buf->len,
455                                                 btrfs_header_level(buf));
456         }
457         return ret;
458 }
459
460 enum btrfs_tree_block_status
461 btrfs_check_leaf(struct btrfs_root *root, struct btrfs_disk_key *parent_key,
462                  struct extent_buffer *buf)
463 {
464         int i;
465         struct btrfs_key cpukey;
466         struct btrfs_disk_key key;
467         u32 nritems = btrfs_header_nritems(buf);
468         enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
469
470         if (nritems * sizeof(struct btrfs_item) > buf->len)  {
471                 fprintf(stderr, "invalid number of items %llu\n",
472                         (unsigned long long)buf->start);
473                 goto fail;
474         }
475
476         if (btrfs_header_level(buf) != 0) {
477                 ret = BTRFS_TREE_BLOCK_INVALID_LEVEL;
478                 fprintf(stderr, "leaf is not a leaf %llu\n",
479                        (unsigned long long)btrfs_header_bytenr(buf));
480                 goto fail;
481         }
482         if (btrfs_leaf_free_space(root, buf) < 0) {
483                 ret = BTRFS_TREE_BLOCK_INVALID_FREE_SPACE;
484                 fprintf(stderr, "leaf free space incorrect %llu %d\n",
485                         (unsigned long long)btrfs_header_bytenr(buf),
486                         btrfs_leaf_free_space(root, buf));
487                 goto fail;
488         }
489
490         if (nritems == 0)
491                 return BTRFS_TREE_BLOCK_CLEAN;
492
493         btrfs_item_key(buf, &key, 0);
494         if (parent_key && parent_key->type &&
495             memcmp(parent_key, &key, sizeof(key))) {
496                 ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
497                 fprintf(stderr, "leaf parent key incorrect %llu\n",
498                        (unsigned long long)btrfs_header_bytenr(buf));
499                 goto fail;
500         }
501         for (i = 0; nritems > 1 && i < nritems - 1; i++) {
502                 btrfs_item_key(buf, &key, i);
503                 btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
504                 if (btrfs_comp_keys(&key, &cpukey) >= 0) {
505                         ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
506                         fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
507                         goto fail;
508                 }
509                 if (btrfs_item_offset_nr(buf, i) !=
510                         btrfs_item_end_nr(buf, i + 1)) {
511                         ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
512                         fprintf(stderr, "incorrect offsets %u %u\n",
513                                 btrfs_item_offset_nr(buf, i),
514                                 btrfs_item_end_nr(buf, i + 1));
515                         goto fail;
516                 }
517                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
518                     BTRFS_LEAF_DATA_SIZE(root)) {
519                         ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
520                         fprintf(stderr, "bad item end %u wanted %u\n",
521                                 btrfs_item_end_nr(buf, i),
522                                 (unsigned)BTRFS_LEAF_DATA_SIZE(root));
523                         goto fail;
524                 }
525         }
526
527         for (i = 0; i < nritems; i++) {
528                 if (btrfs_item_end_nr(buf, i) > BTRFS_LEAF_DATA_SIZE(root)) {
529                         btrfs_item_key(buf, &key, 0);
530                         btrfs_print_key(&key);
531                         fflush(stdout);
532                         ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
533                         fprintf(stderr, "slot end outside of leaf %llu > %llu\n",
534                                 (unsigned long long)btrfs_item_end_nr(buf, i),
535                                 (unsigned long long)BTRFS_LEAF_DATA_SIZE(root));
536                         goto fail;
537                 }
538         }
539
540         return BTRFS_TREE_BLOCK_CLEAN;
541 fail:
542         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
543                 if (parent_key)
544                         btrfs_disk_key_to_cpu(&cpukey, parent_key);
545                 else
546                         btrfs_item_key_to_cpu(buf, &cpukey, 0);
547
548                 btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
549                                                 buf->start, buf->len, 0);
550         }
551         return ret;
552 }
553
554 static int noinline check_block(struct btrfs_root *root,
555                                 struct btrfs_path *path, int level)
556 {
557         struct btrfs_disk_key key;
558         struct btrfs_disk_key *key_ptr = NULL;
559         struct extent_buffer *parent;
560         enum btrfs_tree_block_status ret;
561
562         if (path->skip_check_block)
563                 return 0;
564         if (path->nodes[level + 1]) {
565                 parent = path->nodes[level + 1];
566                 btrfs_node_key(parent, &key, path->slots[level + 1]);
567                 key_ptr = &key;
568         }
569         if (level == 0)
570                 ret =  btrfs_check_leaf(root, key_ptr, path->nodes[0]);
571         else
572                 ret = btrfs_check_node(root, key_ptr, path->nodes[level]);
573         if (ret == BTRFS_TREE_BLOCK_CLEAN)
574                 return 0;
575         return -EIO;
576 }
577
578 /*
579  * search for key in the extent_buffer.  The items start at offset p,
580  * and they are item_size apart.  There are 'max' items in p.
581  *
582  * the slot in the array is returned via slot, and it points to
583  * the place where you would insert key if it is not found in
584  * the array.
585  *
586  * slot may point to max if the key is bigger than all of the keys
587  */
588 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
589                               int item_size, struct btrfs_key *key,
590                               int max, int *slot)
591 {
592         int low = 0;
593         int high = max;
594         int mid;
595         int ret;
596         unsigned long offset;
597         struct btrfs_disk_key *tmp;
598
599         while(low < high) {
600                 mid = (low + high) / 2;
601                 offset = p + mid * item_size;
602
603                 tmp = (struct btrfs_disk_key *)(eb->data + offset);
604                 ret = btrfs_comp_keys(tmp, key);
605
606                 if (ret < 0)
607                         low = mid + 1;
608                 else if (ret > 0)
609                         high = mid;
610                 else {
611                         *slot = mid;
612                         return 0;
613                 }
614         }
615         *slot = low;
616         return 1;
617 }
618
619 /*
620  * simple bin_search frontend that does the right thing for
621  * leaves vs nodes
622  */
623 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
624                       int level, int *slot)
625 {
626         if (level == 0)
627                 return generic_bin_search(eb,
628                                           offsetof(struct btrfs_leaf, items),
629                                           sizeof(struct btrfs_item),
630                                           key, btrfs_header_nritems(eb),
631                                           slot);
632         else
633                 return generic_bin_search(eb,
634                                           offsetof(struct btrfs_node, ptrs),
635                                           sizeof(struct btrfs_key_ptr),
636                                           key, btrfs_header_nritems(eb),
637                                           slot);
638 }
639
640 struct extent_buffer *read_node_slot(struct btrfs_root *root,
641                                    struct extent_buffer *parent, int slot)
642 {
643         int level = btrfs_header_level(parent);
644         if (slot < 0)
645                 return NULL;
646         if (slot >= btrfs_header_nritems(parent))
647                 return NULL;
648
649         if (level == 0)
650                 return NULL;
651
652         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
653                        root->nodesize,
654                        btrfs_node_ptr_generation(parent, slot));
655 }
656
657 static int balance_level(struct btrfs_trans_handle *trans,
658                          struct btrfs_root *root,
659                          struct btrfs_path *path, int level)
660 {
661         struct extent_buffer *right = NULL;
662         struct extent_buffer *mid;
663         struct extent_buffer *left = NULL;
664         struct extent_buffer *parent = NULL;
665         int ret = 0;
666         int wret;
667         int pslot;
668         int orig_slot = path->slots[level];
669         u64 orig_ptr;
670
671         if (level == 0)
672                 return 0;
673
674         mid = path->nodes[level];
675         WARN_ON(btrfs_header_generation(mid) != trans->transid);
676
677         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
678
679         if (level < BTRFS_MAX_LEVEL - 1) {
680                 parent = path->nodes[level + 1];
681                 pslot = path->slots[level + 1];
682         }
683
684         /*
685          * deal with the case where there is only one pointer in the root
686          * by promoting the node below to a root
687          */
688         if (!parent) {
689                 struct extent_buffer *child;
690
691                 if (btrfs_header_nritems(mid) != 1)
692                         return 0;
693
694                 /* promote the child to a root */
695                 child = read_node_slot(root, mid, 0);
696                 BUG_ON(!extent_buffer_uptodate(child));
697                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
698                 BUG_ON(ret);
699
700                 root->node = child;
701                 add_root_to_dirty_list(root);
702                 path->nodes[level] = NULL;
703                 clean_tree_block(trans, root, mid);
704                 /* once for the path */
705                 free_extent_buffer(mid);
706
707                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
708                                         0, root->root_key.objectid,
709                                         level, 1);
710                 /* once for the root ptr */
711                 free_extent_buffer(mid);
712                 return ret;
713         }
714         if (btrfs_header_nritems(mid) >
715             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
716                 return 0;
717
718         left = read_node_slot(root, parent, pslot - 1);
719         if (extent_buffer_uptodate(left)) {
720                 wret = btrfs_cow_block(trans, root, left,
721                                        parent, pslot - 1, &left);
722                 if (wret) {
723                         ret = wret;
724                         goto enospc;
725                 }
726         }
727         right = read_node_slot(root, parent, pslot + 1);
728         if (extent_buffer_uptodate(right)) {
729                 wret = btrfs_cow_block(trans, root, right,
730                                        parent, pslot + 1, &right);
731                 if (wret) {
732                         ret = wret;
733                         goto enospc;
734                 }
735         }
736
737         /* first, try to make some room in the middle buffer */
738         if (left) {
739                 orig_slot += btrfs_header_nritems(left);
740                 wret = push_node_left(trans, root, left, mid, 1);
741                 if (wret < 0)
742                         ret = wret;
743         }
744
745         /*
746          * then try to empty the right most buffer into the middle
747          */
748         if (right) {
749                 wret = push_node_left(trans, root, mid, right, 1);
750                 if (wret < 0 && wret != -ENOSPC)
751                         ret = wret;
752                 if (btrfs_header_nritems(right) == 0) {
753                         u64 bytenr = right->start;
754                         u32 blocksize = right->len;
755
756                         clean_tree_block(trans, root, right);
757                         free_extent_buffer(right);
758                         right = NULL;
759                         wret = btrfs_del_ptr(root, path, level + 1, pslot + 1);
760                         if (wret)
761                                 ret = wret;
762                         wret = btrfs_free_extent(trans, root, bytenr,
763                                                  blocksize, 0,
764                                                  root->root_key.objectid,
765                                                  level, 0);
766                         if (wret)
767                                 ret = wret;
768                 } else {
769                         struct btrfs_disk_key right_key;
770                         btrfs_node_key(right, &right_key, 0);
771                         btrfs_set_node_key(parent, &right_key, pslot + 1);
772                         btrfs_mark_buffer_dirty(parent);
773                 }
774         }
775         if (btrfs_header_nritems(mid) == 1) {
776                 /*
777                  * we're not allowed to leave a node with one item in the
778                  * tree during a delete.  A deletion from lower in the tree
779                  * could try to delete the only pointer in this node.
780                  * So, pull some keys from the left.
781                  * There has to be a left pointer at this point because
782                  * otherwise we would have pulled some pointers from the
783                  * right
784                  */
785                 BUG_ON(!left);
786                 wret = balance_node_right(trans, root, mid, left);
787                 if (wret < 0) {
788                         ret = wret;
789                         goto enospc;
790                 }
791                 if (wret == 1) {
792                         wret = push_node_left(trans, root, left, mid, 1);
793                         if (wret < 0)
794                                 ret = wret;
795                 }
796                 BUG_ON(wret == 1);
797         }
798         if (btrfs_header_nritems(mid) == 0) {
799                 /* we've managed to empty the middle node, drop it */
800                 u64 bytenr = mid->start;
801                 u32 blocksize = mid->len;
802                 clean_tree_block(trans, root, mid);
803                 free_extent_buffer(mid);
804                 mid = NULL;
805                 wret = btrfs_del_ptr(root, path, level + 1, pslot);
806                 if (wret)
807                         ret = wret;
808                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
809                                          0, root->root_key.objectid,
810                                          level, 0);
811                 if (wret)
812                         ret = wret;
813         } else {
814                 /* update the parent key to reflect our changes */
815                 struct btrfs_disk_key mid_key;
816                 btrfs_node_key(mid, &mid_key, 0);
817                 btrfs_set_node_key(parent, &mid_key, pslot);
818                 btrfs_mark_buffer_dirty(parent);
819         }
820
821         /* update the path */
822         if (left) {
823                 if (btrfs_header_nritems(left) > orig_slot) {
824                         extent_buffer_get(left);
825                         path->nodes[level] = left;
826                         path->slots[level + 1] -= 1;
827                         path->slots[level] = orig_slot;
828                         if (mid)
829                                 free_extent_buffer(mid);
830                 } else {
831                         orig_slot -= btrfs_header_nritems(left);
832                         path->slots[level] = orig_slot;
833                 }
834         }
835         /* double check we haven't messed things up */
836         check_block(root, path, level);
837         if (orig_ptr !=
838             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
839                 BUG();
840 enospc:
841         if (right)
842                 free_extent_buffer(right);
843         if (left)
844                 free_extent_buffer(left);
845         return ret;
846 }
847
848 /* returns zero if the push worked, non-zero otherwise */
849 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
850                                           struct btrfs_root *root,
851                                           struct btrfs_path *path, int level)
852 {
853         struct extent_buffer *right = NULL;
854         struct extent_buffer *mid;
855         struct extent_buffer *left = NULL;
856         struct extent_buffer *parent = NULL;
857         int ret = 0;
858         int wret;
859         int pslot;
860         int orig_slot = path->slots[level];
861
862         if (level == 0)
863                 return 1;
864
865         mid = path->nodes[level];
866         WARN_ON(btrfs_header_generation(mid) != trans->transid);
867
868         if (level < BTRFS_MAX_LEVEL - 1) {
869                 parent = path->nodes[level + 1];
870                 pslot = path->slots[level + 1];
871         }
872
873         if (!parent)
874                 return 1;
875
876         left = read_node_slot(root, parent, pslot - 1);
877
878         /* first, try to make some room in the middle buffer */
879         if (extent_buffer_uptodate(left)) {
880                 u32 left_nr;
881                 left_nr = btrfs_header_nritems(left);
882                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
883                         wret = 1;
884                 } else {
885                         ret = btrfs_cow_block(trans, root, left, parent,
886                                               pslot - 1, &left);
887                         if (ret)
888                                 wret = 1;
889                         else {
890                                 wret = push_node_left(trans, root,
891                                                       left, mid, 0);
892                         }
893                 }
894                 if (wret < 0)
895                         ret = wret;
896                 if (wret == 0) {
897                         struct btrfs_disk_key disk_key;
898                         orig_slot += left_nr;
899                         btrfs_node_key(mid, &disk_key, 0);
900                         btrfs_set_node_key(parent, &disk_key, pslot);
901                         btrfs_mark_buffer_dirty(parent);
902                         if (btrfs_header_nritems(left) > orig_slot) {
903                                 path->nodes[level] = left;
904                                 path->slots[level + 1] -= 1;
905                                 path->slots[level] = orig_slot;
906                                 free_extent_buffer(mid);
907                         } else {
908                                 orig_slot -=
909                                         btrfs_header_nritems(left);
910                                 path->slots[level] = orig_slot;
911                                 free_extent_buffer(left);
912                         }
913                         return 0;
914                 }
915                 free_extent_buffer(left);
916         }
917         right= read_node_slot(root, parent, pslot + 1);
918
919         /*
920          * then try to empty the right most buffer into the middle
921          */
922         if (extent_buffer_uptodate(right)) {
923                 u32 right_nr;
924                 right_nr = btrfs_header_nritems(right);
925                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
926                         wret = 1;
927                 } else {
928                         ret = btrfs_cow_block(trans, root, right,
929                                               parent, pslot + 1,
930                                               &right);
931                         if (ret)
932                                 wret = 1;
933                         else {
934                                 wret = balance_node_right(trans, root,
935                                                           right, mid);
936                         }
937                 }
938                 if (wret < 0)
939                         ret = wret;
940                 if (wret == 0) {
941                         struct btrfs_disk_key disk_key;
942
943                         btrfs_node_key(right, &disk_key, 0);
944                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
945                         btrfs_mark_buffer_dirty(parent);
946
947                         if (btrfs_header_nritems(mid) <= orig_slot) {
948                                 path->nodes[level] = right;
949                                 path->slots[level + 1] += 1;
950                                 path->slots[level] = orig_slot -
951                                         btrfs_header_nritems(mid);
952                                 free_extent_buffer(mid);
953                         } else {
954                                 free_extent_buffer(right);
955                         }
956                         return 0;
957                 }
958                 free_extent_buffer(right);
959         }
960         return 1;
961 }
962
963 /*
964  * readahead one full node of leaves
965  */
966 void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
967                              int level, int slot, u64 objectid)
968 {
969         struct extent_buffer *node;
970         struct btrfs_disk_key disk_key;
971         u32 nritems;
972         u64 search;
973         u64 lowest_read;
974         u64 highest_read;
975         u64 nread = 0;
976         int direction = path->reada;
977         struct extent_buffer *eb;
978         u32 nr;
979         u32 blocksize;
980         u32 nscan = 0;
981
982         if (level != 1)
983                 return;
984
985         if (!path->nodes[level])
986                 return;
987
988         node = path->nodes[level];
989         search = btrfs_node_blockptr(node, slot);
990         blocksize = root->nodesize;
991         eb = btrfs_find_tree_block(root, search, blocksize);
992         if (eb) {
993                 free_extent_buffer(eb);
994                 return;
995         }
996
997         highest_read = search;
998         lowest_read = search;
999
1000         nritems = btrfs_header_nritems(node);
1001         nr = slot;
1002         while(1) {
1003                 if (direction < 0) {
1004                         if (nr == 0)
1005                                 break;
1006                         nr--;
1007                 } else if (direction > 0) {
1008                         nr++;
1009                         if (nr >= nritems)
1010                                 break;
1011                 }
1012                 if (path->reada < 0 && objectid) {
1013                         btrfs_node_key(node, &disk_key, nr);
1014                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1015                                 break;
1016                 }
1017                 search = btrfs_node_blockptr(node, nr);
1018                 if ((search >= lowest_read && search <= highest_read) ||
1019                     (search < lowest_read && lowest_read - search <= 32768) ||
1020                     (search > highest_read && search - highest_read <= 32768)) {
1021                         readahead_tree_block(root, search, blocksize,
1022                                      btrfs_node_ptr_generation(node, nr));
1023                         nread += blocksize;
1024                 }
1025                 nscan++;
1026                 if (path->reada < 2 && (nread > SZ_256K || nscan > 32))
1027                         break;
1028                 if(nread > SZ_1M || nscan > 128)
1029                         break;
1030
1031                 if (search < lowest_read)
1032                         lowest_read = search;
1033                 if (search > highest_read)
1034                         highest_read = search;
1035         }
1036 }
1037
1038 int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
1039                 u64 iobjectid, u64 ioff, u8 key_type,
1040                 struct btrfs_key *found_key)
1041 {
1042         int ret;
1043         struct btrfs_key key;
1044         struct extent_buffer *eb;
1045         struct btrfs_path *path;
1046
1047         key.type = key_type;
1048         key.objectid = iobjectid;
1049         key.offset = ioff;
1050
1051         if (found_path == NULL) {
1052                 path = btrfs_alloc_path();
1053                 if (!path)
1054                         return -ENOMEM;
1055         } else
1056                 path = found_path;
1057
1058         ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
1059         if ((ret < 0) || (found_key == NULL))
1060                 goto out;
1061
1062         eb = path->nodes[0];
1063         if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
1064                 ret = btrfs_next_leaf(fs_root, path);
1065                 if (ret)
1066                         goto out;
1067                 eb = path->nodes[0];
1068         }
1069
1070         btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
1071         if (found_key->type != key.type ||
1072                         found_key->objectid != key.objectid) {
1073                 ret = 1;
1074                 goto out;
1075         }
1076
1077 out:
1078         if (path != found_path)
1079                 btrfs_free_path(path);
1080         return ret;
1081 }
1082
1083 /*
1084  * look for key in the tree.  path is filled in with nodes along the way
1085  * if key is found, we return zero and you can find the item in the leaf
1086  * level of the path (level 0)
1087  *
1088  * If the key isn't found, the path points to the slot where it should
1089  * be inserted, and 1 is returned.  If there are other errors during the
1090  * search a negative error number is returned.
1091  *
1092  * if ins_len > 0, nodes and leaves will be split as we walk down the
1093  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1094  * possible)
1095  */
1096 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1097                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1098                       ins_len, int cow)
1099 {
1100         struct extent_buffer *b;
1101         int slot;
1102         int ret;
1103         int level;
1104         int should_reada = p->reada;
1105         u8 lowest_level = 0;
1106
1107         lowest_level = p->lowest_level;
1108         WARN_ON(lowest_level && ins_len > 0);
1109         WARN_ON(p->nodes[0] != NULL);
1110         /*
1111         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1112         */
1113 again:
1114         b = root->node;
1115         extent_buffer_get(b);
1116         while (b) {
1117                 level = btrfs_header_level(b);
1118                 if (cow) {
1119                         int wret;
1120                         wret = btrfs_cow_block(trans, root, b,
1121                                                p->nodes[level + 1],
1122                                                p->slots[level + 1],
1123                                                &b);
1124                         if (wret) {
1125                                 free_extent_buffer(b);
1126                                 return wret;
1127                         }
1128                 }
1129                 BUG_ON(!cow && ins_len);
1130                 if (level != btrfs_header_level(b))
1131                         WARN_ON(1);
1132                 level = btrfs_header_level(b);
1133                 p->nodes[level] = b;
1134                 ret = check_block(root, p, level);
1135                 if (ret)
1136                         return -1;
1137                 ret = bin_search(b, key, level, &slot);
1138                 if (level != 0) {
1139                         if (ret && slot > 0)
1140                                 slot -= 1;
1141                         p->slots[level] = slot;
1142                         if ((p->search_for_split || ins_len > 0) &&
1143                             btrfs_header_nritems(b) >=
1144                             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1145                                 int sret = split_node(trans, root, p, level);
1146                                 BUG_ON(sret > 0);
1147                                 if (sret)
1148                                         return sret;
1149                                 b = p->nodes[level];
1150                                 slot = p->slots[level];
1151                         } else if (ins_len < 0) {
1152                                 int sret = balance_level(trans, root, p,
1153                                                          level);
1154                                 if (sret)
1155                                         return sret;
1156                                 b = p->nodes[level];
1157                                 if (!b) {
1158                                         btrfs_release_path(p);
1159                                         goto again;
1160                                 }
1161                                 slot = p->slots[level];
1162                                 BUG_ON(btrfs_header_nritems(b) == 1);
1163                         }
1164                         /* this is only true while dropping a snapshot */
1165                         if (level == lowest_level)
1166                                 break;
1167
1168                         if (should_reada)
1169                                 reada_for_search(root, p, level, slot,
1170                                                  key->objectid);
1171
1172                         b = read_node_slot(root, b, slot);
1173                         if (!extent_buffer_uptodate(b))
1174                                 return -EIO;
1175                 } else {
1176                         p->slots[level] = slot;
1177                         if (ins_len > 0 &&
1178                             ins_len > btrfs_leaf_free_space(root, b)) {
1179                                 int sret = split_leaf(trans, root, key,
1180                                                       p, ins_len, ret == 0);
1181                                 BUG_ON(sret > 0);
1182                                 if (sret)
1183                                         return sret;
1184                         }
1185                         return ret;
1186                 }
1187         }
1188         return 1;
1189 }
1190
1191 /*
1192  * adjust the pointers going up the tree, starting at level
1193  * making sure the right key of each node is points to 'key'.
1194  * This is used after shifting pointers to the left, so it stops
1195  * fixing up pointers when a given leaf/node is not in slot 0 of the
1196  * higher levels
1197  */
1198 void btrfs_fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
1199                           struct btrfs_disk_key *key, int level)
1200 {
1201         int i;
1202         struct extent_buffer *t;
1203
1204         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1205                 int tslot = path->slots[i];
1206                 if (!path->nodes[i])
1207                         break;
1208                 t = path->nodes[i];
1209                 btrfs_set_node_key(t, key, tslot);
1210                 btrfs_mark_buffer_dirty(path->nodes[i]);
1211                 if (tslot != 0)
1212                         break;
1213         }
1214 }
1215
1216 /*
1217  * update item key.
1218  *
1219  * This function isn't completely safe. It's the caller's responsibility
1220  * that the new key won't break the order
1221  */
1222 int btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
1223                             struct btrfs_key *new_key)
1224 {
1225         struct btrfs_disk_key disk_key;
1226         struct extent_buffer *eb;
1227         int slot;
1228
1229         eb = path->nodes[0];
1230         slot = path->slots[0];
1231         if (slot > 0) {
1232                 btrfs_item_key(eb, &disk_key, slot - 1);
1233                 if (btrfs_comp_keys(&disk_key, new_key) >= 0)
1234                         return -1;
1235         }
1236         if (slot < btrfs_header_nritems(eb) - 1) {
1237                 btrfs_item_key(eb, &disk_key, slot + 1);
1238                 if (btrfs_comp_keys(&disk_key, new_key) <= 0)
1239                         return -1;
1240         }
1241
1242         btrfs_cpu_key_to_disk(&disk_key, new_key);
1243         btrfs_set_item_key(eb, &disk_key, slot);
1244         btrfs_mark_buffer_dirty(eb);
1245         if (slot == 0)
1246                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
1247         return 0;
1248 }
1249
1250 /*
1251  * update an item key without the safety checks.  This is meant to be called by
1252  * fsck only.
1253  */
1254 void btrfs_set_item_key_unsafe(struct btrfs_root *root,
1255                                struct btrfs_path *path,
1256                                struct btrfs_key *new_key)
1257 {
1258         struct btrfs_disk_key disk_key;
1259         struct extent_buffer *eb;
1260         int slot;
1261
1262         eb = path->nodes[0];
1263         slot = path->slots[0];
1264
1265         btrfs_cpu_key_to_disk(&disk_key, new_key);
1266         btrfs_set_item_key(eb, &disk_key, slot);
1267         btrfs_mark_buffer_dirty(eb);
1268         if (slot == 0)
1269                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
1270 }
1271
1272 /*
1273  * try to push data from one node into the next node left in the
1274  * tree.
1275  *
1276  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1277  * error, and > 0 if there was no room in the left hand block.
1278  */
1279 static int push_node_left(struct btrfs_trans_handle *trans,
1280                           struct btrfs_root *root, struct extent_buffer *dst,
1281                           struct extent_buffer *src, int empty)
1282 {
1283         int push_items = 0;
1284         int src_nritems;
1285         int dst_nritems;
1286         int ret = 0;
1287
1288         src_nritems = btrfs_header_nritems(src);
1289         dst_nritems = btrfs_header_nritems(dst);
1290         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1291         WARN_ON(btrfs_header_generation(src) != trans->transid);
1292         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1293
1294         if (!empty && src_nritems <= 8)
1295                 return 1;
1296
1297         if (push_items <= 0) {
1298                 return 1;
1299         }
1300
1301         if (empty) {
1302                 push_items = min(src_nritems, push_items);
1303                 if (push_items < src_nritems) {
1304                         /* leave at least 8 pointers in the node if
1305                          * we aren't going to empty it
1306                          */
1307                         if (src_nritems - push_items < 8) {
1308                                 if (push_items <= 8)
1309                                         return 1;
1310                                 push_items -= 8;
1311                         }
1312                 }
1313         } else
1314                 push_items = min(src_nritems - 8, push_items);
1315
1316         copy_extent_buffer(dst, src,
1317                            btrfs_node_key_ptr_offset(dst_nritems),
1318                            btrfs_node_key_ptr_offset(0),
1319                            push_items * sizeof(struct btrfs_key_ptr));
1320
1321         if (push_items < src_nritems) {
1322                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1323                                       btrfs_node_key_ptr_offset(push_items),
1324                                       (src_nritems - push_items) *
1325                                       sizeof(struct btrfs_key_ptr));
1326         }
1327         btrfs_set_header_nritems(src, src_nritems - push_items);
1328         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1329         btrfs_mark_buffer_dirty(src);
1330         btrfs_mark_buffer_dirty(dst);
1331
1332         return ret;
1333 }
1334
1335 /*
1336  * try to push data from one node into the next node right in the
1337  * tree.
1338  *
1339  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1340  * error, and > 0 if there was no room in the right hand block.
1341  *
1342  * this will  only push up to 1/2 the contents of the left node over
1343  */
1344 static int balance_node_right(struct btrfs_trans_handle *trans,
1345                               struct btrfs_root *root,
1346                               struct extent_buffer *dst,
1347                               struct extent_buffer *src)
1348 {
1349         int push_items = 0;
1350         int max_push;
1351         int src_nritems;
1352         int dst_nritems;
1353         int ret = 0;
1354
1355         WARN_ON(btrfs_header_generation(src) != trans->transid);
1356         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1357
1358         src_nritems = btrfs_header_nritems(src);
1359         dst_nritems = btrfs_header_nritems(dst);
1360         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1361         if (push_items <= 0) {
1362                 return 1;
1363         }
1364
1365         if (src_nritems < 4) {
1366                 return 1;
1367         }
1368
1369         max_push = src_nritems / 2 + 1;
1370         /* don't try to empty the node */
1371         if (max_push >= src_nritems) {
1372                 return 1;
1373         }
1374
1375         if (max_push < push_items)
1376                 push_items = max_push;
1377
1378         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1379                                       btrfs_node_key_ptr_offset(0),
1380                                       (dst_nritems) *
1381                                       sizeof(struct btrfs_key_ptr));
1382
1383         copy_extent_buffer(dst, src,
1384                            btrfs_node_key_ptr_offset(0),
1385                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1386                            push_items * sizeof(struct btrfs_key_ptr));
1387
1388         btrfs_set_header_nritems(src, src_nritems - push_items);
1389         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1390
1391         btrfs_mark_buffer_dirty(src);
1392         btrfs_mark_buffer_dirty(dst);
1393
1394         return ret;
1395 }
1396
1397 /*
1398  * helper function to insert a new root level in the tree.
1399  * A new node is allocated, and a single item is inserted to
1400  * point to the existing root
1401  *
1402  * returns zero on success or < 0 on failure.
1403  */
1404 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1405                            struct btrfs_root *root,
1406                            struct btrfs_path *path, int level)
1407 {
1408         u64 lower_gen;
1409         struct extent_buffer *lower;
1410         struct extent_buffer *c;
1411         struct extent_buffer *old;
1412         struct btrfs_disk_key lower_key;
1413
1414         BUG_ON(path->nodes[level]);
1415         BUG_ON(path->nodes[level-1] != root->node);
1416
1417         lower = path->nodes[level-1];
1418         if (level == 1)
1419                 btrfs_item_key(lower, &lower_key, 0);
1420         else
1421                 btrfs_node_key(lower, &lower_key, 0);
1422
1423         c = btrfs_alloc_free_block(trans, root, root->nodesize,
1424                                    root->root_key.objectid, &lower_key, 
1425                                    level, root->node->start, 0);
1426
1427         if (IS_ERR(c))
1428                 return PTR_ERR(c);
1429
1430         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
1431         btrfs_set_header_nritems(c, 1);
1432         btrfs_set_header_level(c, level);
1433         btrfs_set_header_bytenr(c, c->start);
1434         btrfs_set_header_generation(c, trans->transid);
1435         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
1436         btrfs_set_header_owner(c, root->root_key.objectid);
1437
1438         write_extent_buffer(c, root->fs_info->fsid,
1439                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
1440
1441         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
1442                             btrfs_header_chunk_tree_uuid(c),
1443                             BTRFS_UUID_SIZE);
1444
1445         btrfs_set_node_key(c, &lower_key, 0);
1446         btrfs_set_node_blockptr(c, 0, lower->start);
1447         lower_gen = btrfs_header_generation(lower);
1448         WARN_ON(lower_gen != trans->transid);
1449
1450         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1451
1452         btrfs_mark_buffer_dirty(c);
1453
1454         old = root->node;
1455         root->node = c;
1456
1457         /* the super has an extra ref to root->node */
1458         free_extent_buffer(old);
1459
1460         add_root_to_dirty_list(root);
1461         extent_buffer_get(c);
1462         path->nodes[level] = c;
1463         path->slots[level] = 0;
1464         return 0;
1465 }
1466
1467 /*
1468  * worker function to insert a single pointer in a node.
1469  * the node should have enough room for the pointer already
1470  *
1471  * slot and level indicate where you want the key to go, and
1472  * blocknr is the block the key points to.
1473  *
1474  * returns zero on success and < 0 on any error
1475  */
1476 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1477                       *root, struct btrfs_path *path, struct btrfs_disk_key
1478                       *key, u64 bytenr, int slot, int level)
1479 {
1480         struct extent_buffer *lower;
1481         int nritems;
1482
1483         BUG_ON(!path->nodes[level]);
1484         lower = path->nodes[level];
1485         nritems = btrfs_header_nritems(lower);
1486         if (slot > nritems)
1487                 BUG();
1488         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1489                 BUG();
1490         if (slot != nritems) {
1491                 memmove_extent_buffer(lower,
1492                               btrfs_node_key_ptr_offset(slot + 1),
1493                               btrfs_node_key_ptr_offset(slot),
1494                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1495         }
1496         btrfs_set_node_key(lower, key, slot);
1497         btrfs_set_node_blockptr(lower, slot, bytenr);
1498         WARN_ON(trans->transid == 0);
1499         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1500         btrfs_set_header_nritems(lower, nritems + 1);
1501         btrfs_mark_buffer_dirty(lower);
1502         return 0;
1503 }
1504
1505 /*
1506  * split the node at the specified level in path in two.
1507  * The path is corrected to point to the appropriate node after the split
1508  *
1509  * Before splitting this tries to make some room in the node by pushing
1510  * left and right, if either one works, it returns right away.
1511  *
1512  * returns 0 on success and < 0 on failure
1513  */
1514 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1515                       *root, struct btrfs_path *path, int level)
1516 {
1517         struct extent_buffer *c;
1518         struct extent_buffer *split;
1519         struct btrfs_disk_key disk_key;
1520         int mid;
1521         int ret;
1522         int wret;
1523         u32 c_nritems;
1524
1525         c = path->nodes[level];
1526         WARN_ON(btrfs_header_generation(c) != trans->transid);
1527         if (c == root->node) {
1528                 /* trying to split the root, lets make a new one */
1529                 ret = insert_new_root(trans, root, path, level + 1);
1530                 if (ret)
1531                         return ret;
1532         } else {
1533                 ret = push_nodes_for_insert(trans, root, path, level);
1534                 c = path->nodes[level];
1535                 if (!ret && btrfs_header_nritems(c) <
1536                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
1537                         return 0;
1538                 if (ret < 0)
1539                         return ret;
1540         }
1541
1542         c_nritems = btrfs_header_nritems(c);
1543         mid = (c_nritems + 1) / 2;
1544         btrfs_node_key(c, &disk_key, mid);
1545
1546         split = btrfs_alloc_free_block(trans, root, root->nodesize,
1547                                         root->root_key.objectid,
1548                                         &disk_key, level, c->start, 0);
1549         if (IS_ERR(split))
1550                 return PTR_ERR(split);
1551
1552         memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
1553         btrfs_set_header_level(split, btrfs_header_level(c));
1554         btrfs_set_header_bytenr(split, split->start);
1555         btrfs_set_header_generation(split, trans->transid);
1556         btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
1557         btrfs_set_header_owner(split, root->root_key.objectid);
1558         write_extent_buffer(split, root->fs_info->fsid,
1559                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
1560         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
1561                             btrfs_header_chunk_tree_uuid(split),
1562                             BTRFS_UUID_SIZE);
1563
1564
1565         copy_extent_buffer(split, c,
1566                            btrfs_node_key_ptr_offset(0),
1567                            btrfs_node_key_ptr_offset(mid),
1568                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1569         btrfs_set_header_nritems(split, c_nritems - mid);
1570         btrfs_set_header_nritems(c, mid);
1571         ret = 0;
1572
1573         btrfs_mark_buffer_dirty(c);
1574         btrfs_mark_buffer_dirty(split);
1575
1576         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1577                           path->slots[level + 1] + 1,
1578                           level + 1);
1579         if (wret)
1580                 ret = wret;
1581
1582         if (path->slots[level] >= mid) {
1583                 path->slots[level] -= mid;
1584                 free_extent_buffer(c);
1585                 path->nodes[level] = split;
1586                 path->slots[level + 1] += 1;
1587         } else {
1588                 free_extent_buffer(split);
1589         }
1590         return ret;
1591 }
1592
1593 /*
1594  * how many bytes are required to store the items in a leaf.  start
1595  * and nr indicate which items in the leaf to check.  This totals up the
1596  * space used both by the item structs and the item data
1597  */
1598 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1599 {
1600         int data_len;
1601         int nritems = btrfs_header_nritems(l);
1602         int end = min(nritems, start + nr) - 1;
1603
1604         if (!nr)
1605                 return 0;
1606         data_len = btrfs_item_end_nr(l, start);
1607         data_len = data_len - btrfs_item_offset_nr(l, end);
1608         data_len += sizeof(struct btrfs_item) * nr;
1609         WARN_ON(data_len < 0);
1610         return data_len;
1611 }
1612
1613 /*
1614  * The space between the end of the leaf items and
1615  * the start of the leaf data.  IOW, how much room
1616  * the leaf has left for both items and data
1617  */
1618 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1619 {
1620         u32 nodesize = (root ? BTRFS_LEAF_DATA_SIZE(root) : leaf->len);
1621         int nritems = btrfs_header_nritems(leaf);
1622         int ret;
1623         ret = nodesize - leaf_space_used(leaf, 0, nritems);
1624         if (ret < 0) {
1625                 printk("leaf free space ret %d, leaf data size %u, used %d nritems %d\n",
1626                        ret, nodesize, leaf_space_used(leaf, 0, nritems),
1627                        nritems);
1628         }
1629         return ret;
1630 }
1631
1632 /*
1633  * push some data in the path leaf to the right, trying to free up at
1634  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1635  *
1636  * returns 1 if the push failed because the other node didn't have enough
1637  * room, 0 if everything worked out and < 0 if there were major errors.
1638  */
1639 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1640                            *root, struct btrfs_path *path, int data_size,
1641                            int empty)
1642 {
1643         struct extent_buffer *left = path->nodes[0];
1644         struct extent_buffer *right;
1645         struct extent_buffer *upper;
1646         struct btrfs_disk_key disk_key;
1647         int slot;
1648         u32 i;
1649         int free_space;
1650         int push_space = 0;
1651         int push_items = 0;
1652         struct btrfs_item *item;
1653         u32 left_nritems;
1654         u32 nr;
1655         u32 right_nritems;
1656         u32 data_end;
1657         u32 this_item_size;
1658         int ret;
1659
1660         slot = path->slots[1];
1661         if (!path->nodes[1]) {
1662                 return 1;
1663         }
1664         upper = path->nodes[1];
1665         if (slot >= btrfs_header_nritems(upper) - 1)
1666                 return 1;
1667
1668         right = read_node_slot(root, upper, slot + 1);
1669         if (!extent_buffer_uptodate(right)) {
1670                 if (IS_ERR(right))
1671                         return PTR_ERR(right);
1672                 return -EIO;
1673         }
1674         free_space = btrfs_leaf_free_space(root, right);
1675         if (free_space < data_size) {
1676                 free_extent_buffer(right);
1677                 return 1;
1678         }
1679
1680         /* cow and double check */
1681         ret = btrfs_cow_block(trans, root, right, upper,
1682                               slot + 1, &right);
1683         if (ret) {
1684                 free_extent_buffer(right);
1685                 return 1;
1686         }
1687         free_space = btrfs_leaf_free_space(root, right);
1688         if (free_space < data_size) {
1689                 free_extent_buffer(right);
1690                 return 1;
1691         }
1692
1693         left_nritems = btrfs_header_nritems(left);
1694         if (left_nritems == 0) {
1695                 free_extent_buffer(right);
1696                 return 1;
1697         }
1698
1699         if (empty)
1700                 nr = 0;
1701         else
1702                 nr = 1;
1703
1704         i = left_nritems - 1;
1705         while (i >= nr) {
1706                 item = btrfs_item_nr(i);
1707
1708                 if (path->slots[0] == i)
1709                         push_space += data_size + sizeof(*item);
1710
1711                 this_item_size = btrfs_item_size(left, item);
1712                 if (this_item_size + sizeof(*item) + push_space > free_space)
1713                         break;
1714                 push_items++;
1715                 push_space += this_item_size + sizeof(*item);
1716                 if (i == 0)
1717                         break;
1718                 i--;
1719         }
1720
1721         if (push_items == 0) {
1722                 free_extent_buffer(right);
1723                 return 1;
1724         }
1725
1726         if (!empty && push_items == left_nritems)
1727                 WARN_ON(1);
1728
1729         /* push left to right */
1730         right_nritems = btrfs_header_nritems(right);
1731
1732         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1733         push_space -= leaf_data_end(root, left);
1734
1735         /* make room in the right data area */
1736         data_end = leaf_data_end(root, right);
1737         memmove_extent_buffer(right,
1738                               btrfs_leaf_data(right) + data_end - push_space,
1739                               btrfs_leaf_data(right) + data_end,
1740                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1741
1742         /* copy from the left data area */
1743         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1744                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1745                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1746                      push_space);
1747
1748         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1749                               btrfs_item_nr_offset(0),
1750                               right_nritems * sizeof(struct btrfs_item));
1751
1752         /* copy the items from left to right */
1753         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1754                    btrfs_item_nr_offset(left_nritems - push_items),
1755                    push_items * sizeof(struct btrfs_item));
1756
1757         /* update the item pointers */
1758         right_nritems += push_items;
1759         btrfs_set_header_nritems(right, right_nritems);
1760         push_space = BTRFS_LEAF_DATA_SIZE(root);
1761         for (i = 0; i < right_nritems; i++) {
1762                 item = btrfs_item_nr(i);
1763                 push_space -= btrfs_item_size(right, item);
1764                 btrfs_set_item_offset(right, item, push_space);
1765         }
1766
1767         left_nritems -= push_items;
1768         btrfs_set_header_nritems(left, left_nritems);
1769
1770         if (left_nritems)
1771                 btrfs_mark_buffer_dirty(left);
1772         btrfs_mark_buffer_dirty(right);
1773
1774         btrfs_item_key(right, &disk_key, 0);
1775         btrfs_set_node_key(upper, &disk_key, slot + 1);
1776         btrfs_mark_buffer_dirty(upper);
1777
1778         /* then fixup the leaf pointer in the path */
1779         if (path->slots[0] >= left_nritems) {
1780                 path->slots[0] -= left_nritems;
1781                 free_extent_buffer(path->nodes[0]);
1782                 path->nodes[0] = right;
1783                 path->slots[1] += 1;
1784         } else {
1785                 free_extent_buffer(right);
1786         }
1787         return 0;
1788 }
1789 /*
1790  * push some data in the path leaf to the left, trying to free up at
1791  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1792  */
1793 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1794                           *root, struct btrfs_path *path, int data_size,
1795                           int empty)
1796 {
1797         struct btrfs_disk_key disk_key;
1798         struct extent_buffer *right = path->nodes[0];
1799         struct extent_buffer *left;
1800         int slot;
1801         int i;
1802         int free_space;
1803         int push_space = 0;
1804         int push_items = 0;
1805         struct btrfs_item *item;
1806         u32 old_left_nritems;
1807         u32 right_nritems;
1808         u32 nr;
1809         int ret = 0;
1810         u32 this_item_size;
1811         u32 old_left_item_size;
1812
1813         slot = path->slots[1];
1814         if (slot == 0)
1815                 return 1;
1816         if (!path->nodes[1])
1817                 return 1;
1818
1819         right_nritems = btrfs_header_nritems(right);
1820         if (right_nritems == 0) {
1821                 return 1;
1822         }
1823
1824         left = read_node_slot(root, path->nodes[1], slot - 1);
1825         free_space = btrfs_leaf_free_space(root, left);
1826         if (free_space < data_size) {
1827                 free_extent_buffer(left);
1828                 return 1;
1829         }
1830
1831         /* cow and double check */
1832         ret = btrfs_cow_block(trans, root, left,
1833                               path->nodes[1], slot - 1, &left);
1834         if (ret) {
1835                 /* we hit -ENOSPC, but it isn't fatal here */
1836                 free_extent_buffer(left);
1837                 return 1;
1838         }
1839
1840         free_space = btrfs_leaf_free_space(root, left);
1841         if (free_space < data_size) {
1842                 free_extent_buffer(left);
1843                 return 1;
1844         }
1845
1846         if (empty)
1847                 nr = right_nritems;
1848         else
1849                 nr = right_nritems - 1;
1850
1851         for (i = 0; i < nr; i++) {
1852                 item = btrfs_item_nr(i);
1853
1854                 if (path->slots[0] == i)
1855                         push_space += data_size + sizeof(*item);
1856
1857                 this_item_size = btrfs_item_size(right, item);
1858                 if (this_item_size + sizeof(*item) + push_space > free_space)
1859                         break;
1860
1861                 push_items++;
1862                 push_space += this_item_size + sizeof(*item);
1863         }
1864
1865         if (push_items == 0) {
1866                 free_extent_buffer(left);
1867                 return 1;
1868         }
1869         if (!empty && push_items == btrfs_header_nritems(right))
1870                 WARN_ON(1);
1871
1872         /* push data from right to left */
1873         copy_extent_buffer(left, right,
1874                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1875                            btrfs_item_nr_offset(0),
1876                            push_items * sizeof(struct btrfs_item));
1877
1878         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1879                      btrfs_item_offset_nr(right, push_items -1);
1880
1881         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1882                      leaf_data_end(root, left) - push_space,
1883                      btrfs_leaf_data(right) +
1884                      btrfs_item_offset_nr(right, push_items - 1),
1885                      push_space);
1886         old_left_nritems = btrfs_header_nritems(left);
1887         BUG_ON(old_left_nritems == 0);
1888
1889         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1890         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1891                 u32 ioff;
1892
1893                 item = btrfs_item_nr(i);
1894                 ioff = btrfs_item_offset(left, item);
1895                 btrfs_set_item_offset(left, item,
1896                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1897         }
1898         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1899
1900         /* fixup right node */
1901         if (push_items > right_nritems) {
1902                 printk("push items %d nr %u\n", push_items, right_nritems);
1903                 WARN_ON(1);
1904         }
1905
1906         if (push_items < right_nritems) {
1907                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1908                                                   leaf_data_end(root, right);
1909                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1910                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1911                                       btrfs_leaf_data(right) +
1912                                       leaf_data_end(root, right), push_space);
1913
1914                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1915                               btrfs_item_nr_offset(push_items),
1916                              (btrfs_header_nritems(right) - push_items) *
1917                              sizeof(struct btrfs_item));
1918         }
1919         right_nritems -= push_items;
1920         btrfs_set_header_nritems(right, right_nritems);
1921         push_space = BTRFS_LEAF_DATA_SIZE(root);
1922         for (i = 0; i < right_nritems; i++) {
1923                 item = btrfs_item_nr(i);
1924                 push_space = push_space - btrfs_item_size(right, item);
1925                 btrfs_set_item_offset(right, item, push_space);
1926         }
1927
1928         btrfs_mark_buffer_dirty(left);
1929         if (right_nritems)
1930                 btrfs_mark_buffer_dirty(right);
1931
1932         btrfs_item_key(right, &disk_key, 0);
1933         btrfs_fixup_low_keys(root, path, &disk_key, 1);
1934
1935         /* then fixup the leaf pointer in the path */
1936         if (path->slots[0] < push_items) {
1937                 path->slots[0] += old_left_nritems;
1938                 free_extent_buffer(path->nodes[0]);
1939                 path->nodes[0] = left;
1940                 path->slots[1] -= 1;
1941         } else {
1942                 free_extent_buffer(left);
1943                 path->slots[0] -= push_items;
1944         }
1945         BUG_ON(path->slots[0] < 0);
1946         return ret;
1947 }
1948
1949 /*
1950  * split the path's leaf in two, making sure there is at least data_size
1951  * available for the resulting leaf level of the path.
1952  *
1953  * returns 0 if all went well and < 0 on failure.
1954  */
1955 static noinline int copy_for_split(struct btrfs_trans_handle *trans,
1956                                struct btrfs_root *root,
1957                                struct btrfs_path *path,
1958                                struct extent_buffer *l,
1959                                struct extent_buffer *right,
1960                                int slot, int mid, int nritems)
1961 {
1962         int data_copy_size;
1963         int rt_data_off;
1964         int i;
1965         int ret = 0;
1966         int wret;
1967         struct btrfs_disk_key disk_key;
1968
1969         nritems = nritems - mid;
1970         btrfs_set_header_nritems(right, nritems);
1971         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1972
1973         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1974                            btrfs_item_nr_offset(mid),
1975                            nritems * sizeof(struct btrfs_item));
1976
1977         copy_extent_buffer(right, l,
1978                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1979                      data_copy_size, btrfs_leaf_data(l) +
1980                      leaf_data_end(root, l), data_copy_size);
1981
1982         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1983                       btrfs_item_end_nr(l, mid);
1984
1985         for (i = 0; i < nritems; i++) {
1986                 struct btrfs_item *item = btrfs_item_nr(i);
1987                 u32 ioff = btrfs_item_offset(right, item);
1988                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
1989         }
1990
1991         btrfs_set_header_nritems(l, mid);
1992         ret = 0;
1993         btrfs_item_key(right, &disk_key, 0);
1994         wret = insert_ptr(trans, root, path, &disk_key, right->start,
1995                           path->slots[1] + 1, 1);
1996         if (wret)
1997                 ret = wret;
1998
1999         btrfs_mark_buffer_dirty(right);
2000         btrfs_mark_buffer_dirty(l);
2001         BUG_ON(path->slots[0] != slot);
2002
2003         if (mid <= slot) {
2004                 free_extent_buffer(path->nodes[0]);
2005                 path->nodes[0] = right;
2006                 path->slots[0] -= mid;
2007                 path->slots[1] += 1;
2008         } else {
2009                 free_extent_buffer(right);
2010         }
2011
2012         BUG_ON(path->slots[0] < 0);
2013
2014         return ret;
2015 }
2016
2017 /*
2018  * split the path's leaf in two, making sure there is at least data_size
2019  * available for the resulting leaf level of the path.
2020  *
2021  * returns 0 if all went well and < 0 on failure.
2022  */
2023 static noinline int split_leaf(struct btrfs_trans_handle *trans,
2024                                struct btrfs_root *root,
2025                                struct btrfs_key *ins_key,
2026                                struct btrfs_path *path, int data_size,
2027                                int extend)
2028 {
2029         struct btrfs_disk_key disk_key;
2030         struct extent_buffer *l;
2031         u32 nritems;
2032         int mid;
2033         int slot;
2034         struct extent_buffer *right;
2035         int ret = 0;
2036         int wret;
2037         int split;
2038         int num_doubles = 0;
2039
2040         l = path->nodes[0];
2041         slot = path->slots[0];
2042         if (extend && data_size + btrfs_item_size_nr(l, slot) +
2043             sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
2044                 return -EOVERFLOW;
2045
2046         /* first try to make some room by pushing left and right */
2047         if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
2048                 wret = push_leaf_right(trans, root, path, data_size, 0);
2049                 if (wret < 0)
2050                         return wret;
2051                 if (wret) {
2052                         wret = push_leaf_left(trans, root, path, data_size, 0);
2053                         if (wret < 0)
2054                                 return wret;
2055                 }
2056                 l = path->nodes[0];
2057
2058                 /* did the pushes work? */
2059                 if (btrfs_leaf_free_space(root, l) >= data_size)
2060                         return 0;
2061         }
2062
2063         if (!path->nodes[1]) {
2064                 ret = insert_new_root(trans, root, path, 1);
2065                 if (ret)
2066                         return ret;
2067         }
2068 again:
2069         split = 1;
2070         l = path->nodes[0];
2071         slot = path->slots[0];
2072         nritems = btrfs_header_nritems(l);
2073         mid = (nritems + 1) / 2;
2074
2075         if (mid <= slot) {
2076                 if (nritems == 1 ||
2077                     leaf_space_used(l, mid, nritems - mid) + data_size >
2078                         BTRFS_LEAF_DATA_SIZE(root)) {
2079                         if (slot >= nritems) {
2080                                 split = 0;
2081                         } else {
2082                                 mid = slot;
2083                                 if (mid != nritems &&
2084                                     leaf_space_used(l, mid, nritems - mid) +
2085                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2086                                         split = 2;
2087                                 }
2088                         }
2089                 }
2090         } else {
2091                 if (leaf_space_used(l, 0, mid) + data_size >
2092                         BTRFS_LEAF_DATA_SIZE(root)) {
2093                         if (!extend && data_size && slot == 0) {
2094                                 split = 0;
2095                         } else if ((extend || !data_size) && slot == 0) {
2096                                 mid = 1;
2097                         } else {
2098                                 mid = slot;
2099                                 if (mid != nritems &&
2100                                     leaf_space_used(l, mid, nritems - mid) +
2101                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2102                                         split = 2 ;
2103                                 }
2104                         }
2105                 }
2106         }
2107         
2108         if (split == 0)
2109                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2110         else
2111                 btrfs_item_key(l, &disk_key, mid);
2112
2113         right = btrfs_alloc_free_block(trans, root, root->nodesize,
2114                                         root->root_key.objectid,
2115                                         &disk_key, 0, l->start, 0);
2116         if (IS_ERR(right)) {
2117                 BUG_ON(1);
2118                 return PTR_ERR(right);
2119         }
2120
2121         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2122         btrfs_set_header_bytenr(right, right->start);
2123         btrfs_set_header_generation(right, trans->transid);
2124         btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
2125         btrfs_set_header_owner(right, root->root_key.objectid);
2126         btrfs_set_header_level(right, 0);
2127         write_extent_buffer(right, root->fs_info->fsid,
2128                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
2129
2130         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2131                             btrfs_header_chunk_tree_uuid(right),
2132                             BTRFS_UUID_SIZE);
2133
2134         if (split == 0) {
2135                 if (mid <= slot) {
2136                         btrfs_set_header_nritems(right, 0);
2137                         wret = insert_ptr(trans, root, path,
2138                                           &disk_key, right->start,
2139                                           path->slots[1] + 1, 1);
2140                         if (wret)
2141                                 ret = wret;
2142
2143                         free_extent_buffer(path->nodes[0]);
2144                         path->nodes[0] = right;
2145                         path->slots[0] = 0;
2146                         path->slots[1] += 1;
2147                 } else {
2148                         btrfs_set_header_nritems(right, 0);
2149                         wret = insert_ptr(trans, root, path,
2150                                           &disk_key,
2151                                           right->start,
2152                                           path->slots[1], 1);
2153                         if (wret)
2154                                 ret = wret;
2155                         free_extent_buffer(path->nodes[0]);
2156                         path->nodes[0] = right;
2157                         path->slots[0] = 0;
2158                         if (path->slots[1] == 0) {
2159                                 btrfs_fixup_low_keys(root, path,
2160                                                      &disk_key, 1);
2161                         }
2162                 }
2163                 btrfs_mark_buffer_dirty(right);
2164                 return ret;
2165         }
2166
2167         ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
2168         BUG_ON(ret);
2169
2170         if (split == 2) {
2171                 BUG_ON(num_doubles != 0);
2172                 num_doubles++;
2173                 goto again;
2174         }
2175
2176         return ret;
2177 }
2178
2179 /*
2180  * This function splits a single item into two items,
2181  * giving 'new_key' to the new item and splitting the
2182  * old one at split_offset (from the start of the item).
2183  *
2184  * The path may be released by this operation.  After
2185  * the split, the path is pointing to the old item.  The
2186  * new item is going to be in the same node as the old one.
2187  *
2188  * Note, the item being split must be smaller enough to live alone on
2189  * a tree block with room for one extra struct btrfs_item
2190  *
2191  * This allows us to split the item in place, keeping a lock on the
2192  * leaf the entire time.
2193  */
2194 int btrfs_split_item(struct btrfs_trans_handle *trans,
2195                      struct btrfs_root *root,
2196                      struct btrfs_path *path,
2197                      struct btrfs_key *new_key,
2198                      unsigned long split_offset)
2199 {
2200         u32 item_size;
2201         struct extent_buffer *leaf;
2202         struct btrfs_key orig_key;
2203         struct btrfs_item *item;
2204         struct btrfs_item *new_item;
2205         int ret = 0;
2206         int slot;
2207         u32 nritems;
2208         u32 orig_offset;
2209         struct btrfs_disk_key disk_key;
2210         char *buf;
2211
2212         leaf = path->nodes[0];
2213         btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
2214         if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
2215                 goto split;
2216
2217         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2218         btrfs_release_path(path);
2219
2220         path->search_for_split = 1;
2221
2222         ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
2223         path->search_for_split = 0;
2224
2225         /* if our item isn't there or got smaller, return now */
2226         if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
2227                                                         path->slots[0])) {
2228                 return -EAGAIN;
2229         }
2230
2231         ret = split_leaf(trans, root, &orig_key, path, 0, 0);
2232         BUG_ON(ret);
2233
2234         BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
2235         leaf = path->nodes[0];
2236
2237 split:
2238         item = btrfs_item_nr(path->slots[0]);
2239         orig_offset = btrfs_item_offset(leaf, item);
2240         item_size = btrfs_item_size(leaf, item);
2241
2242
2243         buf = kmalloc(item_size, GFP_NOFS);
2244         BUG_ON(!buf);
2245         read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
2246                             path->slots[0]), item_size);
2247         slot = path->slots[0] + 1;
2248         leaf = path->nodes[0];
2249
2250         nritems = btrfs_header_nritems(leaf);
2251
2252         if (slot != nritems) {
2253                 /* shift the items */
2254                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2255                               btrfs_item_nr_offset(slot),
2256                               (nritems - slot) * sizeof(struct btrfs_item));
2257
2258         }
2259
2260         btrfs_cpu_key_to_disk(&disk_key, new_key);
2261         btrfs_set_item_key(leaf, &disk_key, slot);
2262
2263         new_item = btrfs_item_nr(slot);
2264
2265         btrfs_set_item_offset(leaf, new_item, orig_offset);
2266         btrfs_set_item_size(leaf, new_item, item_size - split_offset);
2267
2268         btrfs_set_item_offset(leaf, item,
2269                               orig_offset + item_size - split_offset);
2270         btrfs_set_item_size(leaf, item, split_offset);
2271
2272         btrfs_set_header_nritems(leaf, nritems + 1);
2273
2274         /* write the data for the start of the original item */
2275         write_extent_buffer(leaf, buf,
2276                             btrfs_item_ptr_offset(leaf, path->slots[0]),
2277                             split_offset);
2278
2279         /* write the data for the new item */
2280         write_extent_buffer(leaf, buf + split_offset,
2281                             btrfs_item_ptr_offset(leaf, slot),
2282                             item_size - split_offset);
2283         btrfs_mark_buffer_dirty(leaf);
2284
2285         ret = 0;
2286         if (btrfs_leaf_free_space(root, leaf) < 0) {
2287                 btrfs_print_leaf(root, leaf);
2288                 BUG();
2289         }
2290         kfree(buf);
2291         return ret;
2292 }
2293
2294 int btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
2295                         u32 new_size, int from_end)
2296 {
2297         int ret = 0;
2298         int slot;
2299         struct extent_buffer *leaf;
2300         struct btrfs_item *item;
2301         u32 nritems;
2302         unsigned int data_end;
2303         unsigned int old_data_start;
2304         unsigned int old_size;
2305         unsigned int size_diff;
2306         int i;
2307
2308         leaf = path->nodes[0];
2309         slot = path->slots[0];
2310
2311         old_size = btrfs_item_size_nr(leaf, slot);
2312         if (old_size == new_size)
2313                 return 0;
2314
2315         nritems = btrfs_header_nritems(leaf);
2316         data_end = leaf_data_end(root, leaf);
2317
2318         old_data_start = btrfs_item_offset_nr(leaf, slot);
2319
2320         size_diff = old_size - new_size;
2321
2322         BUG_ON(slot < 0);
2323         BUG_ON(slot >= nritems);
2324
2325         /*
2326          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2327          */
2328         /* first correct the data pointers */
2329         for (i = slot; i < nritems; i++) {
2330                 u32 ioff;
2331                 item = btrfs_item_nr(i);
2332                 ioff = btrfs_item_offset(leaf, item);
2333                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2334         }
2335
2336         /* shift the data */
2337         if (from_end) {
2338                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2339                               data_end + size_diff, btrfs_leaf_data(leaf) +
2340                               data_end, old_data_start + new_size - data_end);
2341         } else {
2342                 struct btrfs_disk_key disk_key;
2343                 u64 offset;
2344
2345                 btrfs_item_key(leaf, &disk_key, slot);
2346
2347                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2348                         unsigned long ptr;
2349                         struct btrfs_file_extent_item *fi;
2350
2351                         fi = btrfs_item_ptr(leaf, slot,
2352                                             struct btrfs_file_extent_item);
2353                         fi = (struct btrfs_file_extent_item *)(
2354                              (unsigned long)fi - size_diff);
2355
2356                         if (btrfs_file_extent_type(leaf, fi) ==
2357                             BTRFS_FILE_EXTENT_INLINE) {
2358                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2359                                 memmove_extent_buffer(leaf, ptr,
2360                                         (unsigned long)fi,
2361                                         offsetof(struct btrfs_file_extent_item,
2362                                                  disk_bytenr));
2363                         }
2364                 }
2365
2366                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2367                               data_end + size_diff, btrfs_leaf_data(leaf) +
2368                               data_end, old_data_start - data_end);
2369
2370                 offset = btrfs_disk_key_offset(&disk_key);
2371                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2372                 btrfs_set_item_key(leaf, &disk_key, slot);
2373                 if (slot == 0)
2374                         btrfs_fixup_low_keys(root, path, &disk_key, 1);
2375         }
2376
2377         item = btrfs_item_nr(slot);
2378         btrfs_set_item_size(leaf, item, new_size);
2379         btrfs_mark_buffer_dirty(leaf);
2380
2381         ret = 0;
2382         if (btrfs_leaf_free_space(root, leaf) < 0) {
2383                 btrfs_print_leaf(root, leaf);
2384                 BUG();
2385         }
2386         return ret;
2387 }
2388
2389 int btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
2390                       u32 data_size)
2391 {
2392         int ret = 0;
2393         int slot;
2394         struct extent_buffer *leaf;
2395         struct btrfs_item *item;
2396         u32 nritems;
2397         unsigned int data_end;
2398         unsigned int old_data;
2399         unsigned int old_size;
2400         int i;
2401
2402         leaf = path->nodes[0];
2403
2404         nritems = btrfs_header_nritems(leaf);
2405         data_end = leaf_data_end(root, leaf);
2406
2407         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2408                 btrfs_print_leaf(root, leaf);
2409                 BUG();
2410         }
2411         slot = path->slots[0];
2412         old_data = btrfs_item_end_nr(leaf, slot);
2413
2414         BUG_ON(slot < 0);
2415         if (slot >= nritems) {
2416                 btrfs_print_leaf(root, leaf);
2417                 printk("slot %d too large, nritems %d\n", slot, nritems);
2418                 BUG_ON(1);
2419         }
2420
2421         /*
2422          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2423          */
2424         /* first correct the data pointers */
2425         for (i = slot; i < nritems; i++) {
2426                 u32 ioff;
2427                 item = btrfs_item_nr(i);
2428                 ioff = btrfs_item_offset(leaf, item);
2429                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2430         }
2431
2432         /* shift the data */
2433         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2434                       data_end - data_size, btrfs_leaf_data(leaf) +
2435                       data_end, old_data - data_end);
2436
2437         data_end = old_data;
2438         old_size = btrfs_item_size_nr(leaf, slot);
2439         item = btrfs_item_nr(slot);
2440         btrfs_set_item_size(leaf, item, old_size + data_size);
2441         btrfs_mark_buffer_dirty(leaf);
2442
2443         ret = 0;
2444         if (btrfs_leaf_free_space(root, leaf) < 0) {
2445                 btrfs_print_leaf(root, leaf);
2446                 BUG();
2447         }
2448         return ret;
2449 }
2450
2451 /*
2452  * Given a key and some data, insert an item into the tree.
2453  * This does all the path init required, making room in the tree if needed.
2454  */
2455 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2456                             struct btrfs_root *root,
2457                             struct btrfs_path *path,
2458                             struct btrfs_key *cpu_key, u32 *data_size,
2459                             int nr)
2460 {
2461         struct extent_buffer *leaf;
2462         struct btrfs_item *item;
2463         int ret = 0;
2464         int slot;
2465         int i;
2466         u32 nritems;
2467         u32 total_size = 0;
2468         u32 total_data = 0;
2469         unsigned int data_end;
2470         struct btrfs_disk_key disk_key;
2471
2472         for (i = 0; i < nr; i++) {
2473                 total_data += data_size[i];
2474         }
2475
2476         /* create a root if there isn't one */
2477         if (!root->node)
2478                 BUG();
2479
2480         total_size = total_data + nr * sizeof(struct btrfs_item);
2481         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2482         if (ret == 0) {
2483                 return -EEXIST;
2484         }
2485         if (ret < 0)
2486                 goto out;
2487
2488         leaf = path->nodes[0];
2489
2490         nritems = btrfs_header_nritems(leaf);
2491         data_end = leaf_data_end(root, leaf);
2492
2493         if (btrfs_leaf_free_space(root, leaf) < total_size) {
2494                 btrfs_print_leaf(root, leaf);
2495                 printk("not enough freespace need %u have %d\n",
2496                        total_size, btrfs_leaf_free_space(root, leaf));
2497                 BUG();
2498         }
2499
2500         slot = path->slots[0];
2501         BUG_ON(slot < 0);
2502
2503         if (slot != nritems) {
2504                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2505
2506                 if (old_data < data_end) {
2507                         btrfs_print_leaf(root, leaf);
2508                         printk("slot %d old_data %d data_end %d\n",
2509                                slot, old_data, data_end);
2510                         BUG_ON(1);
2511                 }
2512                 /*
2513                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2514                  */
2515                 /* first correct the data pointers */
2516                 for (i = slot; i < nritems; i++) {
2517                         u32 ioff;
2518
2519                         item = btrfs_item_nr(i);
2520                         ioff = btrfs_item_offset(leaf, item);
2521                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2522                 }
2523
2524                 /* shift the items */
2525                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2526                               btrfs_item_nr_offset(slot),
2527                               (nritems - slot) * sizeof(struct btrfs_item));
2528
2529                 /* shift the data */
2530                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2531                               data_end - total_data, btrfs_leaf_data(leaf) +
2532                               data_end, old_data - data_end);
2533                 data_end = old_data;
2534         }
2535
2536         /* setup the item for the new data */
2537         for (i = 0; i < nr; i++) {
2538                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2539                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2540                 item = btrfs_item_nr(slot + i);
2541                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2542                 data_end -= data_size[i];
2543                 btrfs_set_item_size(leaf, item, data_size[i]);
2544         }
2545         btrfs_set_header_nritems(leaf, nritems + nr);
2546         btrfs_mark_buffer_dirty(leaf);
2547
2548         ret = 0;
2549         if (slot == 0) {
2550                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2551                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
2552         }
2553
2554         if (btrfs_leaf_free_space(root, leaf) < 0) {
2555                 btrfs_print_leaf(root, leaf);
2556                 BUG();
2557         }
2558
2559 out:
2560         return ret;
2561 }
2562
2563 /*
2564  * Given a key and some data, insert an item into the tree.
2565  * This does all the path init required, making room in the tree if needed.
2566  */
2567 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2568                       *root, struct btrfs_key *cpu_key, void *data, u32
2569                       data_size)
2570 {
2571         int ret = 0;
2572         struct btrfs_path *path;
2573         struct extent_buffer *leaf;
2574         unsigned long ptr;
2575
2576         path = btrfs_alloc_path();
2577         if (!path)
2578                 return -ENOMEM;
2579
2580         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2581         if (!ret) {
2582                 leaf = path->nodes[0];
2583                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2584                 write_extent_buffer(leaf, data, ptr, data_size);
2585                 btrfs_mark_buffer_dirty(leaf);
2586         }
2587         btrfs_free_path(path);
2588         return ret;
2589 }
2590
2591 /*
2592  * delete the pointer from a given node.
2593  *
2594  * If the delete empties a node, the node is removed from the tree,
2595  * continuing all the way the root if required.  The root is converted into
2596  * a leaf if all the nodes are emptied.
2597  */
2598 int btrfs_del_ptr(struct btrfs_root *root, struct btrfs_path *path,
2599                 int level, int slot)
2600 {
2601         struct extent_buffer *parent = path->nodes[level];
2602         u32 nritems;
2603         int ret = 0;
2604
2605         nritems = btrfs_header_nritems(parent);
2606         if (slot != nritems -1) {
2607                 memmove_extent_buffer(parent,
2608                               btrfs_node_key_ptr_offset(slot),
2609                               btrfs_node_key_ptr_offset(slot + 1),
2610                               sizeof(struct btrfs_key_ptr) *
2611                               (nritems - slot - 1));
2612         }
2613         nritems--;
2614         btrfs_set_header_nritems(parent, nritems);
2615         if (nritems == 0 && parent == root->node) {
2616                 BUG_ON(btrfs_header_level(root->node) != 1);
2617                 /* just turn the root into a leaf and break */
2618                 btrfs_set_header_level(root->node, 0);
2619         } else if (slot == 0) {
2620                 struct btrfs_disk_key disk_key;
2621
2622                 btrfs_node_key(parent, &disk_key, 0);
2623                 btrfs_fixup_low_keys(root, path, &disk_key, level + 1);
2624         }
2625         btrfs_mark_buffer_dirty(parent);
2626         return ret;
2627 }
2628
2629 /*
2630  * a helper function to delete the leaf pointed to by path->slots[1] and
2631  * path->nodes[1].
2632  *
2633  * This deletes the pointer in path->nodes[1] and frees the leaf
2634  * block extent.  zero is returned if it all worked out, < 0 otherwise.
2635  *
2636  * The path must have already been setup for deleting the leaf, including
2637  * all the proper balancing.  path->nodes[1] must be locked.
2638  */
2639 static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
2640                                    struct btrfs_root *root,
2641                                    struct btrfs_path *path,
2642                                    struct extent_buffer *leaf)
2643 {
2644         int ret;
2645
2646         WARN_ON(btrfs_header_generation(leaf) != trans->transid);
2647         ret = btrfs_del_ptr(root, path, 1, path->slots[1]);
2648         if (ret)
2649                 return ret;
2650
2651         ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
2652                                 0, root->root_key.objectid, 0, 0);
2653         return ret;
2654 }
2655
2656 /*
2657  * delete the item at the leaf level in path.  If that empties
2658  * the leaf, remove it from the tree
2659  */
2660 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2661                     struct btrfs_path *path, int slot, int nr)
2662 {
2663         struct extent_buffer *leaf;
2664         struct btrfs_item *item;
2665         int last_off;
2666         int dsize = 0;
2667         int ret = 0;
2668         int wret;
2669         int i;
2670         u32 nritems;
2671
2672         leaf = path->nodes[0];
2673         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2674
2675         for (i = 0; i < nr; i++)
2676                 dsize += btrfs_item_size_nr(leaf, slot + i);
2677
2678         nritems = btrfs_header_nritems(leaf);
2679
2680         if (slot + nr != nritems) {
2681                 int data_end = leaf_data_end(root, leaf);
2682
2683                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2684                               data_end + dsize,
2685                               btrfs_leaf_data(leaf) + data_end,
2686                               last_off - data_end);
2687
2688                 for (i = slot + nr; i < nritems; i++) {
2689                         u32 ioff;
2690
2691                         item = btrfs_item_nr(i);
2692                         ioff = btrfs_item_offset(leaf, item);
2693                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2694                 }
2695
2696                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2697                               btrfs_item_nr_offset(slot + nr),
2698                               sizeof(struct btrfs_item) *
2699                               (nritems - slot - nr));
2700         }
2701         btrfs_set_header_nritems(leaf, nritems - nr);
2702         nritems -= nr;
2703
2704         /* delete the leaf if we've emptied it */
2705         if (nritems == 0) {
2706                 if (leaf == root->node) {
2707                         btrfs_set_header_level(leaf, 0);
2708                 } else {
2709                         clean_tree_block(trans, root, leaf);
2710                         wret = btrfs_del_leaf(trans, root, path, leaf);
2711                         BUG_ON(ret);
2712                         if (wret)
2713                                 ret = wret;
2714                 }
2715         } else {
2716                 int used = leaf_space_used(leaf, 0, nritems);
2717                 if (slot == 0) {
2718                         struct btrfs_disk_key disk_key;
2719
2720                         btrfs_item_key(leaf, &disk_key, 0);
2721                         btrfs_fixup_low_keys(root, path, &disk_key, 1);
2722                 }
2723
2724                 /* delete the leaf if it is mostly empty */
2725                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2726                         /* push_leaf_left fixes the path.
2727                          * make sure the path still points to our leaf
2728                          * for possible call to del_ptr below
2729                          */
2730                         slot = path->slots[1];
2731                         extent_buffer_get(leaf);
2732
2733                         wret = push_leaf_left(trans, root, path, 1, 1);
2734                         if (wret < 0 && wret != -ENOSPC)
2735                                 ret = wret;
2736
2737                         if (path->nodes[0] == leaf &&
2738                             btrfs_header_nritems(leaf)) {
2739                                 wret = push_leaf_right(trans, root, path, 1, 1);
2740                                 if (wret < 0 && wret != -ENOSPC)
2741                                         ret = wret;
2742                         }
2743
2744                         if (btrfs_header_nritems(leaf) == 0) {
2745                                 clean_tree_block(trans, root, leaf);
2746                                 path->slots[1] = slot;
2747                                 ret = btrfs_del_leaf(trans, root, path, leaf);
2748                                 BUG_ON(ret);
2749                                 free_extent_buffer(leaf);
2750
2751                         } else {
2752                                 btrfs_mark_buffer_dirty(leaf);
2753                                 free_extent_buffer(leaf);
2754                         }
2755                 } else {
2756                         btrfs_mark_buffer_dirty(leaf);
2757                 }
2758         }
2759         return ret;
2760 }
2761
2762 /*
2763  * walk up the tree as far as required to find the previous leaf.
2764  * returns 0 if it found something or 1 if there are no lesser leaves.
2765  * returns < 0 on io errors.
2766  */
2767 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2768 {
2769         int slot;
2770         int level = 1;
2771         struct extent_buffer *c;
2772         struct extent_buffer *next = NULL;
2773
2774         while(level < BTRFS_MAX_LEVEL) {
2775                 if (!path->nodes[level])
2776                         return 1;
2777
2778                 slot = path->slots[level];
2779                 c = path->nodes[level];
2780                 if (slot == 0) {
2781                         level++;
2782                         if (level == BTRFS_MAX_LEVEL)
2783                                 return 1;
2784                         continue;
2785                 }
2786                 slot--;
2787
2788                 next = read_node_slot(root, c, slot);
2789                 if (!extent_buffer_uptodate(next)) {
2790                         if (IS_ERR(next))
2791                                 return PTR_ERR(next);
2792                         return -EIO;
2793                 }
2794                 break;
2795         }
2796         path->slots[level] = slot;
2797         while(1) {
2798                 level--;
2799                 c = path->nodes[level];
2800                 free_extent_buffer(c);
2801                 slot = btrfs_header_nritems(next);
2802                 if (slot != 0)
2803                         slot--;
2804                 path->nodes[level] = next;
2805                 path->slots[level] = slot;
2806                 if (!level)
2807                         break;
2808                 next = read_node_slot(root, next, slot);
2809                 if (!extent_buffer_uptodate(next)) {
2810                         if (IS_ERR(next))
2811                                 return PTR_ERR(next);
2812                         return -EIO;
2813                 }
2814         }
2815         return 0;
2816 }
2817
2818 /*
2819  * walk up the tree as far as required to find the next leaf.
2820  * returns 0 if it found something or 1 if there are no greater leaves.
2821  * returns < 0 on io errors.
2822  */
2823 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2824 {
2825         int slot;
2826         int level = 1;
2827         struct extent_buffer *c;
2828         struct extent_buffer *next = NULL;
2829
2830         while(level < BTRFS_MAX_LEVEL) {
2831                 if (!path->nodes[level])
2832                         return 1;
2833
2834                 slot = path->slots[level] + 1;
2835                 c = path->nodes[level];
2836                 if (slot >= btrfs_header_nritems(c)) {
2837                         level++;
2838                         if (level == BTRFS_MAX_LEVEL)
2839                                 return 1;
2840                         continue;
2841                 }
2842
2843                 if (path->reada)
2844                         reada_for_search(root, path, level, slot, 0);
2845
2846                 next = read_node_slot(root, c, slot);
2847                 if (!extent_buffer_uptodate(next))
2848                         return -EIO;
2849                 break;
2850         }
2851         path->slots[level] = slot;
2852         while(1) {
2853                 level--;
2854                 c = path->nodes[level];
2855                 free_extent_buffer(c);
2856                 path->nodes[level] = next;
2857                 path->slots[level] = 0;
2858                 if (!level)
2859                         break;
2860                 if (path->reada)
2861                         reada_for_search(root, path, level, 0, 0);
2862                 next = read_node_slot(root, next, 0);
2863                 if (!extent_buffer_uptodate(next))
2864                         return -EIO;
2865         }
2866         return 0;
2867 }
2868
2869 int btrfs_previous_item(struct btrfs_root *root,
2870                         struct btrfs_path *path, u64 min_objectid,
2871                         int type)
2872 {
2873         struct btrfs_key found_key;
2874         struct extent_buffer *leaf;
2875         u32 nritems;
2876         int ret;
2877
2878         while(1) {
2879                 if (path->slots[0] == 0) {
2880                         ret = btrfs_prev_leaf(root, path);
2881                         if (ret != 0)
2882                                 return ret;
2883                 } else {
2884                         path->slots[0]--;
2885                 }
2886                 leaf = path->nodes[0];
2887                 nritems = btrfs_header_nritems(leaf);
2888                 if (nritems == 0)
2889                         return 1;
2890                 if (path->slots[0] == nritems)
2891                         path->slots[0]--;
2892
2893                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2894                 if (found_key.objectid < min_objectid)
2895                         break;
2896                 if (found_key.type == type)
2897                         return 0;
2898                 if (found_key.objectid == min_objectid &&
2899                     found_key.type < type)
2900                         break;
2901         }
2902         return 1;
2903 }
2904
2905 /*
2906  * search in extent tree to find a previous Metadata/Data extent item with
2907  * min objecitd.
2908  *
2909  * returns 0 if something is found, 1 if nothing was found and < 0 on error
2910  */
2911 int btrfs_previous_extent_item(struct btrfs_root *root,
2912                         struct btrfs_path *path, u64 min_objectid)
2913 {
2914         struct btrfs_key found_key;
2915         struct extent_buffer *leaf;
2916         u32 nritems;
2917         int ret;
2918
2919         while (1) {
2920                 if (path->slots[0] == 0) {
2921                         ret = btrfs_prev_leaf(root, path);
2922                         if (ret != 0)
2923                                 return ret;
2924                 } else {
2925                         path->slots[0]--;
2926                 }
2927                 leaf = path->nodes[0];
2928                 nritems = btrfs_header_nritems(leaf);
2929                 if (nritems == 0)
2930                         return 1;
2931                 if (path->slots[0] == nritems)
2932                         path->slots[0]--;
2933
2934                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2935                 if (found_key.objectid < min_objectid)
2936                         break;
2937                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
2938                     found_key.type == BTRFS_METADATA_ITEM_KEY)
2939                         return 0;
2940                 if (found_key.objectid == min_objectid &&
2941                     found_key.type < BTRFS_EXTENT_ITEM_KEY)
2942                         break;
2943         }
2944         return 1;
2945 }
2946
2947 /*
2948  * Search in extent tree to found next meta/data extent
2949  * Caller needs to check for no-hole or skinny metadata features.
2950  */
2951 int btrfs_next_extent_item(struct btrfs_root *root,
2952                         struct btrfs_path *path, u64 max_objectid)
2953 {
2954         struct btrfs_key found_key;
2955         int ret;
2956
2957         while (1) {
2958                 ret = btrfs_next_item(root, path);
2959                 if (ret)
2960                         return ret;
2961                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2962                                       path->slots[0]);
2963                 if (found_key.objectid > max_objectid)
2964                         return 1;
2965                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
2966                     found_key.type == BTRFS_METADATA_ITEM_KEY)
2967                 return 0;
2968         }
2969 }