87574d9e0f2dd9478281b59d56553b8ed16a0ade
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include "ctree.h"
19 #include "disk-io.h"
20 #include "transaction.h"
21 #include "print-tree.h"
22
23 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
24                       *root, struct btrfs_path *path, int level);
25 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_key *ins_key,
27                       struct btrfs_path *path, int data_size, int extend);
28 static int push_node_left(struct btrfs_trans_handle *trans,
29                           struct btrfs_root *root, struct extent_buffer *dst,
30                           struct extent_buffer *src, int empty);
31 static int balance_node_right(struct btrfs_trans_handle *trans,
32                               struct btrfs_root *root,
33                               struct extent_buffer *dst_buf,
34                               struct extent_buffer *src_buf);
35 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
36                    struct btrfs_path *path, int level, int slot);
37
38 inline void btrfs_init_path(struct btrfs_path *p)
39 {
40         memset(p, 0, sizeof(*p));
41 }
42
43 struct btrfs_path *btrfs_alloc_path(void)
44 {
45         struct btrfs_path *path;
46         path = kmalloc(sizeof(struct btrfs_path), GFP_NOFS);
47         if (path) {
48                 btrfs_init_path(path);
49                 path->reada = 0;
50         }
51         return path;
52 }
53
54 void btrfs_free_path(struct btrfs_path *p)
55 {
56         btrfs_release_path(NULL, p);
57         kfree(p);
58 }
59
60 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
61 {
62         int i;
63         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
64                 if (!p->nodes[i])
65                         break;
66                 free_extent_buffer(p->nodes[i]);
67         }
68         memset(p, 0, sizeof(*p));
69 }
70
71 static void add_root_to_dirty_list(struct btrfs_root *root)
72 {
73         if (root->track_dirty && list_empty(&root->dirty_list)) {
74                 list_add(&root->dirty_list,
75                          &root->fs_info->dirty_cowonly_roots);
76         }
77 }
78
79 int btrfs_copy_root(struct btrfs_trans_handle *trans,
80                       struct btrfs_root *root,
81                       struct extent_buffer *buf,
82                       struct extent_buffer **cow_ret, u64 new_root_objectid)
83 {
84         struct extent_buffer *cow;
85         u32 nritems;
86         int ret = 0;
87         int level;
88         struct btrfs_key first_key;
89         struct btrfs_root *new_root;
90
91         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
92         if (!new_root)
93                 return -ENOMEM;
94
95         memcpy(new_root, root, sizeof(*new_root));
96         new_root->root_key.objectid = new_root_objectid;
97
98         WARN_ON(root->ref_cows && trans->transid !=
99                 root->fs_info->running_transaction->transid);
100         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
101
102         level = btrfs_header_level(buf);
103         nritems = btrfs_header_nritems(buf);
104         if (nritems) {
105                 if (level == 0)
106                         btrfs_item_key_to_cpu(buf, &first_key, 0);
107                 else
108                         btrfs_node_key_to_cpu(buf, &first_key, 0);
109         } else {
110                 first_key.objectid = 0;
111         }
112         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
113                                        new_root_objectid,
114                                        trans->transid, first_key.objectid,
115                                        level, buf->start, 0);
116         if (IS_ERR(cow)) {
117                 kfree(new_root);
118                 return PTR_ERR(cow);
119         }
120
121         copy_extent_buffer(cow, buf, 0, 0, cow->len);
122         btrfs_set_header_bytenr(cow, cow->start);
123         btrfs_set_header_generation(cow, trans->transid);
124         btrfs_set_header_owner(cow, new_root_objectid);
125         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
126
127         WARN_ON(btrfs_header_generation(buf) > trans->transid);
128         ret = btrfs_inc_ref(trans, new_root, buf);
129         kfree(new_root);
130
131         if (ret)
132                 return ret;
133
134         btrfs_mark_buffer_dirty(cow);
135         *cow_ret = cow;
136         return 0;
137 }
138
139 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
140                              struct btrfs_root *root,
141                              struct extent_buffer *buf,
142                              struct extent_buffer *parent, int parent_slot,
143                              struct extent_buffer **cow_ret,
144                              u64 search_start, u64 empty_size)
145 {
146         u64 root_gen;
147         struct extent_buffer *cow;
148         u32 nritems;
149         int ret = 0;
150         int different_trans = 0;
151         int level;
152         struct btrfs_key first_key;
153
154         if (root->ref_cows) {
155                 root_gen = trans->transid;
156         } else {
157                 root_gen = 0;
158         }
159
160         WARN_ON(root->ref_cows && trans->transid !=
161                 root->fs_info->running_transaction->transid);
162         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
163
164         level = btrfs_header_level(buf);
165         nritems = btrfs_header_nritems(buf);
166         if (nritems) {
167                 if (level == 0)
168                         btrfs_item_key_to_cpu(buf, &first_key, 0);
169                 else
170                         btrfs_node_key_to_cpu(buf, &first_key, 0);
171         } else {
172                 first_key.objectid = 0;
173         }
174         cow = __btrfs_alloc_free_block(trans, root, buf->len,
175                                      root->root_key.objectid,
176                                      root_gen, first_key.objectid, level,
177                                      search_start, empty_size);
178         if (IS_ERR(cow))
179                 return PTR_ERR(cow);
180
181         copy_extent_buffer(cow, buf, 0, 0, cow->len);
182         btrfs_set_header_bytenr(cow, cow->start);
183         btrfs_set_header_generation(cow, trans->transid);
184         btrfs_set_header_owner(cow, root->root_key.objectid);
185         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
186
187         WARN_ON(btrfs_header_generation(buf) > trans->transid);
188         if (btrfs_header_generation(buf) != trans->transid) {
189                 different_trans = 1;
190                 ret = btrfs_inc_ref(trans, root, buf);
191                 if (ret)
192                         return ret;
193         } else {
194                 clean_tree_block(trans, root, buf);
195         }
196
197         if (buf == root->node) {
198                 root_gen = btrfs_header_generation(buf);
199                 root->node = cow;
200                 extent_buffer_get(cow);
201                 if (buf != root->commit_root) {
202                         btrfs_free_extent(trans, root, buf->start,
203                                           buf->len, root->root_key.objectid,
204                                           root_gen, 0, 0, 1);
205                 }
206                 free_extent_buffer(buf);
207                 add_root_to_dirty_list(root);
208         } else {
209                 root_gen = btrfs_header_generation(parent);
210                 btrfs_set_node_blockptr(parent, parent_slot,
211                                         cow->start);
212                 WARN_ON(trans->transid == 0);
213                 btrfs_set_node_ptr_generation(parent, parent_slot,
214                                               trans->transid);
215                 btrfs_mark_buffer_dirty(parent);
216                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
217                 btrfs_free_extent(trans, root, buf->start, buf->len,
218                                   btrfs_header_owner(parent), root_gen,
219                                   0, 0, 1);
220         }
221         free_extent_buffer(buf);
222         btrfs_mark_buffer_dirty(cow);
223         *cow_ret = cow;
224         return 0;
225 }
226
227 int btrfs_cow_block(struct btrfs_trans_handle *trans,
228                     struct btrfs_root *root, struct extent_buffer *buf,
229                     struct extent_buffer *parent, int parent_slot,
230                     struct extent_buffer **cow_ret)
231 {
232         u64 search_start;
233         int ret;
234         /*
235         if (trans->transaction != root->fs_info->running_transaction) {
236                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
237                        root->fs_info->running_transaction->transid);
238                 WARN_ON(1);
239         }
240         */
241         if (trans->transid != root->fs_info->generation) {
242                 printk(KERN_CRIT "trans %llu running %llu\n",
243                         (unsigned long long)trans->transid,
244                         (unsigned long long)root->fs_info->generation);
245                 WARN_ON(1);
246         }
247         if (btrfs_header_generation(buf) == trans->transid &&
248             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
249                 *cow_ret = buf;
250                 return 0;
251         }
252
253         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
254         ret = __btrfs_cow_block(trans, root, buf, parent,
255                                  parent_slot, cow_ret, search_start, 0);
256         return ret;
257 }
258
259 /*
260 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
261 {
262         if (blocknr < other && other - (blocknr + blocksize) < 32768)
263                 return 1;
264         if (blocknr > other && blocknr - (other + blocksize) < 32768)
265                 return 1;
266         return 0;
267 }
268 */
269
270 /*
271  * compare two keys in a memcmp fashion
272  */
273 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
274 {
275         struct btrfs_key k1;
276
277         btrfs_disk_key_to_cpu(&k1, disk);
278
279         if (k1.objectid > k2->objectid)
280                 return 1;
281         if (k1.objectid < k2->objectid)
282                 return -1;
283         if (k1.type > k2->type)
284                 return 1;
285         if (k1.type < k2->type)
286                 return -1;
287         if (k1.offset > k2->offset)
288                 return 1;
289         if (k1.offset < k2->offset)
290                 return -1;
291         return 0;
292 }
293
294
295 #if 0
296 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
297                        struct btrfs_root *root, struct extent_buffer *parent,
298                        int start_slot, int cache_only, u64 *last_ret,
299                        struct btrfs_key *progress)
300 {
301         struct extent_buffer *cur;
302         struct extent_buffer *tmp;
303         u64 blocknr;
304         u64 gen;
305         u64 search_start = *last_ret;
306         u64 last_block = 0;
307         u64 other;
308         u32 parent_nritems;
309         int end_slot;
310         int i;
311         int err = 0;
312         int parent_level;
313         int uptodate;
314         u32 blocksize;
315         int progress_passed = 0;
316         struct btrfs_disk_key disk_key;
317
318         parent_level = btrfs_header_level(parent);
319         if (cache_only && parent_level != 1)
320                 return 0;
321
322         if (trans->transaction != root->fs_info->running_transaction) {
323                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
324                        root->fs_info->running_transaction->transid);
325                 WARN_ON(1);
326         }
327         if (trans->transid != root->fs_info->generation) {
328                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
329                        root->fs_info->generation);
330                 WARN_ON(1);
331         }
332
333         parent_nritems = btrfs_header_nritems(parent);
334         blocksize = btrfs_level_size(root, parent_level - 1);
335         end_slot = parent_nritems;
336
337         if (parent_nritems == 1)
338                 return 0;
339
340         for (i = start_slot; i < end_slot; i++) {
341                 int close = 1;
342
343                 if (!parent->map_token) {
344                         map_extent_buffer(parent,
345                                         btrfs_node_key_ptr_offset(i),
346                                         sizeof(struct btrfs_key_ptr),
347                                         &parent->map_token, &parent->kaddr,
348                                         &parent->map_start, &parent->map_len,
349                                         KM_USER1);
350                 }
351                 btrfs_node_key(parent, &disk_key, i);
352                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
353                         continue;
354
355                 progress_passed = 1;
356                 blocknr = btrfs_node_blockptr(parent, i);
357                 gen = btrfs_node_ptr_generation(parent, i);
358                 if (last_block == 0)
359                         last_block = blocknr;
360
361                 if (i > 0) {
362                         other = btrfs_node_blockptr(parent, i - 1);
363                         close = close_blocks(blocknr, other, blocksize);
364                 }
365                 if (close && i < end_slot - 2) {
366                         other = btrfs_node_blockptr(parent, i + 1);
367                         close = close_blocks(blocknr, other, blocksize);
368                 }
369                 if (close) {
370                         last_block = blocknr;
371                         continue;
372                 }
373                 if (parent->map_token) {
374                         unmap_extent_buffer(parent, parent->map_token,
375                                             KM_USER1);
376                         parent->map_token = NULL;
377                 }
378
379                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
380                 if (cur)
381                         uptodate = btrfs_buffer_uptodate(cur);
382                 else
383                         uptodate = 0;
384                 if (!cur || !uptodate) {
385                         if (cache_only) {
386                                 free_extent_buffer(cur);
387                                 continue;
388                         }
389                         if (!cur) {
390                                 cur = read_tree_block(root, blocknr,
391                                                          blocksize, gen);
392                         } else if (!uptodate) {
393                                 btrfs_read_buffer(cur, gen);
394                         }
395                 }
396                 if (search_start == 0)
397                         search_start = last_block;
398
399                 err = __btrfs_cow_block(trans, root, cur, parent, i,
400                                         &tmp, search_start,
401                                         min(16 * blocksize,
402                                             (end_slot - i) * blocksize));
403                 if (err) {
404                         free_extent_buffer(cur);
405                         break;
406                 }
407                 search_start = tmp->start;
408                 last_block = tmp->start;
409                 *last_ret = search_start;
410                 if (parent_level == 1)
411                         btrfs_clear_buffer_defrag(tmp);
412                 free_extent_buffer(tmp);
413         }
414         if (parent->map_token) {
415                 unmap_extent_buffer(parent, parent->map_token,
416                                     KM_USER1);
417                 parent->map_token = NULL;
418         }
419         return err;
420 }
421 #endif
422
423 /*
424  * The leaf data grows from end-to-front in the node.
425  * this returns the address of the start of the last item,
426  * which is the stop of the leaf data stack
427  */
428 static inline unsigned int leaf_data_end(struct btrfs_root *root,
429                                          struct extent_buffer *leaf)
430 {
431         u32 nr = btrfs_header_nritems(leaf);
432         if (nr == 0)
433                 return BTRFS_LEAF_DATA_SIZE(root);
434         return btrfs_item_offset_nr(leaf, nr - 1);
435 }
436
437 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
438                       int level)
439 {
440         struct extent_buffer *parent = NULL;
441         struct extent_buffer *node = path->nodes[level];
442         struct btrfs_disk_key parent_key;
443         struct btrfs_disk_key node_key;
444         int parent_slot;
445         int slot;
446         struct btrfs_key cpukey;
447         u32 nritems = btrfs_header_nritems(node);
448
449         if (path->nodes[level + 1])
450                 parent = path->nodes[level + 1];
451
452         slot = path->slots[level];
453         BUG_ON(nritems == 0);
454         if (parent) {
455                 parent_slot = path->slots[level + 1];
456                 btrfs_node_key(parent, &parent_key, parent_slot);
457                 btrfs_node_key(node, &node_key, 0);
458                 BUG_ON(memcmp(&parent_key, &node_key,
459                               sizeof(struct btrfs_disk_key)));
460                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
461                        btrfs_header_bytenr(node));
462         }
463         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
464         if (slot != 0) {
465                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
466                 btrfs_node_key(node, &node_key, slot);
467                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0);
468         }
469         if (slot < nritems - 1) {
470                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
471                 btrfs_node_key(node, &node_key, slot);
472                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0);
473         }
474         return 0;
475 }
476
477 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
478                       int level)
479 {
480         struct extent_buffer *leaf = path->nodes[level];
481         struct extent_buffer *parent = NULL;
482         int parent_slot;
483         struct btrfs_key cpukey;
484         struct btrfs_disk_key parent_key;
485         struct btrfs_disk_key leaf_key;
486         int slot = path->slots[0];
487
488         u32 nritems = btrfs_header_nritems(leaf);
489
490         if (path->nodes[level + 1])
491                 parent = path->nodes[level + 1];
492
493         if (nritems == 0)
494                 return 0;
495
496         if (parent) {
497                 parent_slot = path->slots[level + 1];
498                 btrfs_node_key(parent, &parent_key, parent_slot);
499                 btrfs_item_key(leaf, &leaf_key, 0);
500
501                 BUG_ON(memcmp(&parent_key, &leaf_key,
502                        sizeof(struct btrfs_disk_key)));
503                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
504                        btrfs_header_bytenr(leaf));
505         }
506 #if 0
507         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
508                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
509                 btrfs_item_key(leaf, &leaf_key, i);
510                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
511                         btrfs_print_leaf(root, leaf);
512                         printk("slot %d offset bad key\n", i);
513                         BUG_ON(1);
514                 }
515                 if (btrfs_item_offset_nr(leaf, i) !=
516                         btrfs_item_end_nr(leaf, i + 1)) {
517                         btrfs_print_leaf(root, leaf);
518                         printk("slot %d offset bad\n", i);
519                         BUG_ON(1);
520                 }
521                 if (i == 0) {
522                         if (btrfs_item_offset_nr(leaf, i) +
523                                btrfs_item_size_nr(leaf, i) !=
524                                BTRFS_LEAF_DATA_SIZE(root)) {
525                                 btrfs_print_leaf(root, leaf);
526                                 printk("slot %d first offset bad\n", i);
527                                 BUG_ON(1);
528                         }
529                 }
530         }
531         if (nritems > 0) {
532                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
533                                 btrfs_print_leaf(root, leaf);
534                                 printk("slot %d bad size \n", nritems - 1);
535                                 BUG_ON(1);
536                 }
537         }
538 #endif
539         if (slot != 0 && slot < nritems - 1) {
540                 btrfs_item_key(leaf, &leaf_key, slot);
541                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
542                 if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) {
543                         btrfs_print_leaf(root, leaf);
544                         printk("slot %d offset bad key\n", slot);
545                         BUG_ON(1);
546                 }
547                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
548                        btrfs_item_end_nr(leaf, slot)) {
549                         btrfs_print_leaf(root, leaf);
550                         printk("slot %d offset bad\n", slot);
551                         BUG_ON(1);
552                 }
553         }
554         if (slot < nritems - 1) {
555                 btrfs_item_key(leaf, &leaf_key, slot);
556                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
557                 BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0);
558                 if (btrfs_item_offset_nr(leaf, slot) !=
559                         btrfs_item_end_nr(leaf, slot + 1)) {
560                         btrfs_print_leaf(root, leaf);
561                         printk("slot %d offset bad\n", slot);
562                         BUG_ON(1);
563                 }
564         }
565         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
566                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
567         return 0;
568 }
569
570 static int noinline check_block(struct btrfs_root *root,
571                                 struct btrfs_path *path, int level)
572 {
573         return 0;
574 #if 0
575         struct extent_buffer *buf = path->nodes[level];
576
577         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
578                                  (unsigned long)btrfs_header_fsid(buf),
579                                  BTRFS_FSID_SIZE)) {
580                 printk("warning bad block %Lu\n", buf->start);
581                 return 1;
582         }
583 #endif
584         if (level == 0)
585                 return check_leaf(root, path, level);
586         return check_node(root, path, level);
587 }
588
589 /*
590  * search for key in the extent_buffer.  The items start at offset p,
591  * and they are item_size apart.  There are 'max' items in p.
592  *
593  * the slot in the array is returned via slot, and it points to
594  * the place where you would insert key if it is not found in
595  * the array.
596  *
597  * slot may point to max if the key is bigger than all of the keys
598  */
599 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
600                               int item_size, struct btrfs_key *key,
601                               int max, int *slot)
602 {
603         int low = 0;
604         int high = max;
605         int mid;
606         int ret;
607         unsigned long offset;
608         struct btrfs_disk_key *tmp;
609
610         while(low < high) {
611                 mid = (low + high) / 2;
612                 offset = p + mid * item_size;
613
614                 tmp = (struct btrfs_disk_key *)(eb->data + offset);
615                 ret = btrfs_comp_keys(tmp, key);
616
617                 if (ret < 0)
618                         low = mid + 1;
619                 else if (ret > 0)
620                         high = mid;
621                 else {
622                         *slot = mid;
623                         return 0;
624                 }
625         }
626         *slot = low;
627         return 1;
628 }
629
630 /*
631  * simple bin_search frontend that does the right thing for
632  * leaves vs nodes
633  */
634 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
635                       int level, int *slot)
636 {
637         if (level == 0) {
638                 return generic_bin_search(eb,
639                                           offsetof(struct btrfs_leaf, items),
640                                           sizeof(struct btrfs_item),
641                                           key, btrfs_header_nritems(eb),
642                                           slot);
643         } else {
644                 return generic_bin_search(eb,
645                                           offsetof(struct btrfs_node, ptrs),
646                                           sizeof(struct btrfs_key_ptr),
647                                           key, btrfs_header_nritems(eb),
648                                           slot);
649         }
650         return -1;
651 }
652
653 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
654                                    struct extent_buffer *parent, int slot)
655 {
656         int level = btrfs_header_level(parent);
657         if (slot < 0)
658                 return NULL;
659         if (slot >= btrfs_header_nritems(parent))
660                 return NULL;
661
662         BUG_ON(level == 0);
663
664         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
665                        btrfs_level_size(root, level - 1),
666                        btrfs_node_ptr_generation(parent, slot));
667 }
668
669 static int balance_level(struct btrfs_trans_handle *trans,
670                          struct btrfs_root *root,
671                          struct btrfs_path *path, int level)
672 {
673         struct extent_buffer *right = NULL;
674         struct extent_buffer *mid;
675         struct extent_buffer *left = NULL;
676         struct extent_buffer *parent = NULL;
677         int ret = 0;
678         int wret;
679         int pslot;
680         int orig_slot = path->slots[level];
681         int err_on_enospc = 0;
682         u64 orig_ptr;
683
684         if (level == 0)
685                 return 0;
686
687         mid = path->nodes[level];
688         WARN_ON(btrfs_header_generation(mid) != trans->transid);
689
690         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
691
692         if (level < BTRFS_MAX_LEVEL - 1)
693                 parent = path->nodes[level + 1];
694         pslot = path->slots[level + 1];
695
696         /*
697          * deal with the case where there is only one pointer in the root
698          * by promoting the node below to a root
699          */
700         if (!parent) {
701                 struct extent_buffer *child;
702
703                 if (btrfs_header_nritems(mid) != 1)
704                         return 0;
705
706                 /* promote the child to a root */
707                 child = read_node_slot(root, mid, 0);
708                 BUG_ON(!child);
709                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
710                 BUG_ON(ret);
711
712                 root->node = child;
713                 add_root_to_dirty_list(root);
714                 path->nodes[level] = NULL;
715                 clean_tree_block(trans, root, mid);
716                 wait_on_tree_block_writeback(root, mid);
717                 /* once for the path */
718                 free_extent_buffer(mid);
719                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
720                                         root->root_key.objectid,
721                                         btrfs_header_generation(mid), 0, 0, 1);
722                 /* once for the root ptr */
723                 free_extent_buffer(mid);
724                 return ret;
725         }
726         if (btrfs_header_nritems(mid) >
727             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
728                 return 0;
729
730         if (btrfs_header_nritems(mid) < 2)
731                 err_on_enospc = 1;
732
733         left = read_node_slot(root, parent, pslot - 1);
734         if (left) {
735                 wret = btrfs_cow_block(trans, root, left,
736                                        parent, pslot - 1, &left);
737                 if (wret) {
738                         ret = wret;
739                         goto enospc;
740                 }
741         }
742         right = read_node_slot(root, parent, pslot + 1);
743         if (right) {
744                 wret = btrfs_cow_block(trans, root, right,
745                                        parent, pslot + 1, &right);
746                 if (wret) {
747                         ret = wret;
748                         goto enospc;
749                 }
750         }
751
752         /* first, try to make some room in the middle buffer */
753         if (left) {
754                 orig_slot += btrfs_header_nritems(left);
755                 wret = push_node_left(trans, root, left, mid, 1);
756                 if (wret < 0)
757                         ret = wret;
758                 if (btrfs_header_nritems(mid) < 2)
759                         err_on_enospc = 1;
760         }
761
762         /*
763          * then try to empty the right most buffer into the middle
764          */
765         if (right) {
766                 wret = push_node_left(trans, root, mid, right, 1);
767                 if (wret < 0 && wret != -ENOSPC)
768                         ret = wret;
769                 if (btrfs_header_nritems(right) == 0) {
770                         u64 bytenr = right->start;
771                         u64 generation = btrfs_header_generation(parent);
772                         u32 blocksize = right->len;
773
774                         clean_tree_block(trans, root, right);
775                         wait_on_tree_block_writeback(root, right);
776                         free_extent_buffer(right);
777                         right = NULL;
778                         wret = del_ptr(trans, root, path, level + 1, pslot +
779                                        1);
780                         if (wret)
781                                 ret = wret;
782                         wret = btrfs_free_extent(trans, root, bytenr,
783                                                  blocksize,
784                                                  btrfs_header_owner(parent),
785                                                  generation, 0, 0, 1);
786                         if (wret)
787                                 ret = wret;
788                 } else {
789                         struct btrfs_disk_key right_key;
790                         btrfs_node_key(right, &right_key, 0);
791                         btrfs_set_node_key(parent, &right_key, pslot + 1);
792                         btrfs_mark_buffer_dirty(parent);
793                 }
794         }
795         if (btrfs_header_nritems(mid) == 1) {
796                 /*
797                  * we're not allowed to leave a node with one item in the
798                  * tree during a delete.  A deletion from lower in the tree
799                  * could try to delete the only pointer in this node.
800                  * So, pull some keys from the left.
801                  * There has to be a left pointer at this point because
802                  * otherwise we would have pulled some pointers from the
803                  * right
804                  */
805                 BUG_ON(!left);
806                 wret = balance_node_right(trans, root, mid, left);
807                 if (wret < 0) {
808                         ret = wret;
809                         goto enospc;
810                 }
811                 if (wret == 1) {
812                         wret = push_node_left(trans, root, left, mid, 1);
813                         if (wret < 0)
814                                 ret = wret;
815                 }
816                 BUG_ON(wret == 1);
817         }
818         if (btrfs_header_nritems(mid) == 0) {
819                 /* we've managed to empty the middle node, drop it */
820                 u64 root_gen = btrfs_header_generation(parent);
821                 u64 bytenr = mid->start;
822                 u32 blocksize = mid->len;
823                 clean_tree_block(trans, root, mid);
824                 wait_on_tree_block_writeback(root, mid);
825                 free_extent_buffer(mid);
826                 mid = NULL;
827                 wret = del_ptr(trans, root, path, level + 1, pslot);
828                 if (wret)
829                         ret = wret;
830                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
831                                          btrfs_header_owner(parent),
832                                          root_gen, 0, 0, 1);
833                 if (wret)
834                         ret = wret;
835         } else {
836                 /* update the parent key to reflect our changes */
837                 struct btrfs_disk_key mid_key;
838                 btrfs_node_key(mid, &mid_key, 0);
839                 btrfs_set_node_key(parent, &mid_key, pslot);
840                 btrfs_mark_buffer_dirty(parent);
841         }
842
843         /* update the path */
844         if (left) {
845                 if (btrfs_header_nritems(left) > orig_slot) {
846                         extent_buffer_get(left);
847                         path->nodes[level] = left;
848                         path->slots[level + 1] -= 1;
849                         path->slots[level] = orig_slot;
850                         if (mid)
851                                 free_extent_buffer(mid);
852                 } else {
853                         orig_slot -= btrfs_header_nritems(left);
854                         path->slots[level] = orig_slot;
855                 }
856         }
857         /* double check we haven't messed things up */
858         check_block(root, path, level);
859         if (orig_ptr !=
860             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
861                 BUG();
862 enospc:
863         if (right)
864                 free_extent_buffer(right);
865         if (left)
866                 free_extent_buffer(left);
867         return ret;
868 }
869
870 /* returns zero if the push worked, non-zero otherwise */
871 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
872                                           struct btrfs_root *root,
873                                           struct btrfs_path *path, int level)
874 {
875         struct extent_buffer *right = NULL;
876         struct extent_buffer *mid;
877         struct extent_buffer *left = NULL;
878         struct extent_buffer *parent = NULL;
879         int ret = 0;
880         int wret;
881         int pslot;
882         int orig_slot = path->slots[level];
883         u64 orig_ptr;
884
885         if (level == 0)
886                 return 1;
887
888         mid = path->nodes[level];
889         WARN_ON(btrfs_header_generation(mid) != trans->transid);
890         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
891
892         if (level < BTRFS_MAX_LEVEL - 1)
893                 parent = path->nodes[level + 1];
894         pslot = path->slots[level + 1];
895
896         if (!parent)
897                 return 1;
898
899         left = read_node_slot(root, parent, pslot - 1);
900
901         /* first, try to make some room in the middle buffer */
902         if (left) {
903                 u32 left_nr;
904                 left_nr = btrfs_header_nritems(left);
905                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
906                         wret = 1;
907                 } else {
908                         ret = btrfs_cow_block(trans, root, left, parent,
909                                               pslot - 1, &left);
910                         if (ret)
911                                 wret = 1;
912                         else {
913                                 wret = push_node_left(trans, root,
914                                                       left, mid, 0);
915                         }
916                 }
917                 if (wret < 0)
918                         ret = wret;
919                 if (wret == 0) {
920                         struct btrfs_disk_key disk_key;
921                         orig_slot += left_nr;
922                         btrfs_node_key(mid, &disk_key, 0);
923                         btrfs_set_node_key(parent, &disk_key, pslot);
924                         btrfs_mark_buffer_dirty(parent);
925                         if (btrfs_header_nritems(left) > orig_slot) {
926                                 path->nodes[level] = left;
927                                 path->slots[level + 1] -= 1;
928                                 path->slots[level] = orig_slot;
929                                 free_extent_buffer(mid);
930                         } else {
931                                 orig_slot -=
932                                         btrfs_header_nritems(left);
933                                 path->slots[level] = orig_slot;
934                                 free_extent_buffer(left);
935                         }
936                         return 0;
937                 }
938                 free_extent_buffer(left);
939         }
940         right= read_node_slot(root, parent, pslot + 1);
941
942         /*
943          * then try to empty the right most buffer into the middle
944          */
945         if (right) {
946                 u32 right_nr;
947                 right_nr = btrfs_header_nritems(right);
948                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
949                         wret = 1;
950                 } else {
951                         ret = btrfs_cow_block(trans, root, right,
952                                               parent, pslot + 1,
953                                               &right);
954                         if (ret)
955                                 wret = 1;
956                         else {
957                                 wret = balance_node_right(trans, root,
958                                                           right, mid);
959                         }
960                 }
961                 if (wret < 0)
962                         ret = wret;
963                 if (wret == 0) {
964                         struct btrfs_disk_key disk_key;
965
966                         btrfs_node_key(right, &disk_key, 0);
967                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
968                         btrfs_mark_buffer_dirty(parent);
969
970                         if (btrfs_header_nritems(mid) <= orig_slot) {
971                                 path->nodes[level] = right;
972                                 path->slots[level + 1] += 1;
973                                 path->slots[level] = orig_slot -
974                                         btrfs_header_nritems(mid);
975                                 free_extent_buffer(mid);
976                         } else {
977                                 free_extent_buffer(right);
978                         }
979                         return 0;
980                 }
981                 free_extent_buffer(right);
982         }
983         return 1;
984 }
985
986 /*
987  * readahead one full node of leaves
988  */
989 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
990                              int level, int slot, u64 objectid)
991 {
992         struct extent_buffer *node;
993         struct btrfs_disk_key disk_key;
994         u32 nritems;
995         u64 search;
996         u64 lowest_read;
997         u64 highest_read;
998         u64 nread = 0;
999         int direction = path->reada;
1000         struct extent_buffer *eb;
1001         u32 nr;
1002         u32 blocksize;
1003         u32 nscan = 0;
1004
1005         if (level != 1)
1006                 return;
1007
1008         if (!path->nodes[level])
1009                 return;
1010
1011         node = path->nodes[level];
1012         search = btrfs_node_blockptr(node, slot);
1013         blocksize = btrfs_level_size(root, level - 1);
1014         eb = btrfs_find_tree_block(root, search, blocksize);
1015         if (eb) {
1016                 free_extent_buffer(eb);
1017                 return;
1018         }
1019
1020         highest_read = search;
1021         lowest_read = search;
1022
1023         nritems = btrfs_header_nritems(node);
1024         nr = slot;
1025         while(1) {
1026                 if (direction < 0) {
1027                         if (nr == 0)
1028                                 break;
1029                         nr--;
1030                 } else if (direction > 0) {
1031                         nr++;
1032                         if (nr >= nritems)
1033                                 break;
1034                 }
1035                 if (path->reada < 0 && objectid) {
1036                         btrfs_node_key(node, &disk_key, nr);
1037                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1038                                 break;
1039                 }
1040                 search = btrfs_node_blockptr(node, nr);
1041                 if ((search >= lowest_read && search <= highest_read) ||
1042                     (search < lowest_read && lowest_read - search <= 32768) ||
1043                     (search > highest_read && search - highest_read <= 32768)) {
1044                         readahead_tree_block(root, search, blocksize,
1045                                      btrfs_node_ptr_generation(node, nr));
1046                         nread += blocksize;
1047                 }
1048                 nscan++;
1049                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1050                         break;
1051                 if(nread > (1024 * 1024) || nscan > 128)
1052                         break;
1053
1054                 if (search < lowest_read)
1055                         lowest_read = search;
1056                 if (search > highest_read)
1057                         highest_read = search;
1058         }
1059 }
1060
1061 /*
1062  * look for key in the tree.  path is filled in with nodes along the way
1063  * if key is found, we return zero and you can find the item in the leaf
1064  * level of the path (level 0)
1065  *
1066  * If the key isn't found, the path points to the slot where it should
1067  * be inserted, and 1 is returned.  If there are other errors during the
1068  * search a negative error number is returned.
1069  *
1070  * if ins_len > 0, nodes and leaves will be split as we walk down the
1071  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1072  * possible)
1073  */
1074 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1075                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1076                       ins_len, int cow)
1077 {
1078         struct extent_buffer *b;
1079         int slot;
1080         int ret;
1081         int level;
1082         int should_reada = p->reada;
1083         u8 lowest_level = 0;
1084
1085         lowest_level = p->lowest_level;
1086         WARN_ON(lowest_level && ins_len);
1087         WARN_ON(p->nodes[0] != NULL);
1088         /*
1089         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1090         */
1091 again:
1092         b = root->node;
1093         extent_buffer_get(b);
1094         while (b) {
1095                 level = btrfs_header_level(b);
1096                 if (cow) {
1097                         int wret;
1098                         wret = btrfs_cow_block(trans, root, b,
1099                                                p->nodes[level + 1],
1100                                                p->slots[level + 1],
1101                                                &b);
1102                         if (wret) {
1103                                 free_extent_buffer(b);
1104                                 return wret;
1105                         }
1106                 }
1107                 BUG_ON(!cow && ins_len);
1108                 if (level != btrfs_header_level(b))
1109                         WARN_ON(1);
1110                 level = btrfs_header_level(b);
1111                 p->nodes[level] = b;
1112                 ret = check_block(root, p, level);
1113                 if (ret)
1114                         return -1;
1115                 ret = bin_search(b, key, level, &slot);
1116                 if (level != 0) {
1117                         if (ret && slot > 0)
1118                                 slot -= 1;
1119                         p->slots[level] = slot;
1120                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1121                             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1122                                 int sret = split_node(trans, root, p, level);
1123                                 BUG_ON(sret > 0);
1124                                 if (sret)
1125                                         return sret;
1126                                 b = p->nodes[level];
1127                                 slot = p->slots[level];
1128                         } else if (ins_len < 0) {
1129                                 int sret = balance_level(trans, root, p,
1130                                                          level);
1131                                 if (sret)
1132                                         return sret;
1133                                 b = p->nodes[level];
1134                                 if (!b) {
1135                                         btrfs_release_path(NULL, p);
1136                                         goto again;
1137                                 }
1138                                 slot = p->slots[level];
1139                                 BUG_ON(btrfs_header_nritems(b) == 1);
1140                         }
1141                         /* this is only true while dropping a snapshot */
1142                         if (level == lowest_level)
1143                                 break;
1144
1145                         if (should_reada)
1146                                 reada_for_search(root, p, level, slot,
1147                                                  key->objectid);
1148
1149                         b = read_node_slot(root, b, slot);
1150                 } else {
1151                         p->slots[level] = slot;
1152                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1153                             sizeof(struct btrfs_item) + ins_len) {
1154                                 int sret = split_leaf(trans, root, key,
1155                                                       p, ins_len, ret == 0);
1156                                 BUG_ON(sret > 0);
1157                                 if (sret)
1158                                         return sret;
1159                         }
1160                         return ret;
1161                 }
1162         }
1163         return 1;
1164 }
1165
1166 /*
1167  * adjust the pointers going up the tree, starting at level
1168  * making sure the right key of each node is points to 'key'.
1169  * This is used after shifting pointers to the left, so it stops
1170  * fixing up pointers when a given leaf/node is not in slot 0 of the
1171  * higher levels
1172  *
1173  * If this fails to write a tree block, it returns -1, but continues
1174  * fixing up the blocks in ram so the tree is consistent.
1175  */
1176 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1177                           struct btrfs_root *root, struct btrfs_path *path,
1178                           struct btrfs_disk_key *key, int level)
1179 {
1180         int i;
1181         int ret = 0;
1182         struct extent_buffer *t;
1183
1184         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1185                 int tslot = path->slots[i];
1186                 if (!path->nodes[i])
1187                         break;
1188                 t = path->nodes[i];
1189                 btrfs_set_node_key(t, key, tslot);
1190                 btrfs_mark_buffer_dirty(path->nodes[i]);
1191                 if (tslot != 0)
1192                         break;
1193         }
1194         return ret;
1195 }
1196
1197 /*
1198  * try to push data from one node into the next node left in the
1199  * tree.
1200  *
1201  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1202  * error, and > 0 if there was no room in the left hand block.
1203  */
1204 static int push_node_left(struct btrfs_trans_handle *trans,
1205                           struct btrfs_root *root, struct extent_buffer *dst,
1206                           struct extent_buffer *src, int empty)
1207 {
1208         int push_items = 0;
1209         int src_nritems;
1210         int dst_nritems;
1211         int ret = 0;
1212
1213         src_nritems = btrfs_header_nritems(src);
1214         dst_nritems = btrfs_header_nritems(dst);
1215         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1216         WARN_ON(btrfs_header_generation(src) != trans->transid);
1217         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1218
1219         if (!empty && src_nritems <= 8)
1220                 return 1;
1221
1222         if (push_items <= 0) {
1223                 return 1;
1224         }
1225
1226         if (empty) {
1227                 push_items = min(src_nritems, push_items);
1228                 if (push_items < src_nritems) {
1229                         /* leave at least 8 pointers in the node if
1230                          * we aren't going to empty it
1231                          */
1232                         if (src_nritems - push_items < 8) {
1233                                 if (push_items <= 8)
1234                                         return 1;
1235                                 push_items -= 8;
1236                         }
1237                 }
1238         } else
1239                 push_items = min(src_nritems - 8, push_items);
1240
1241         copy_extent_buffer(dst, src,
1242                            btrfs_node_key_ptr_offset(dst_nritems),
1243                            btrfs_node_key_ptr_offset(0),
1244                            push_items * sizeof(struct btrfs_key_ptr));
1245
1246         if (push_items < src_nritems) {
1247                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1248                                       btrfs_node_key_ptr_offset(push_items),
1249                                       (src_nritems - push_items) *
1250                                       sizeof(struct btrfs_key_ptr));
1251         }
1252         btrfs_set_header_nritems(src, src_nritems - push_items);
1253         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1254         btrfs_mark_buffer_dirty(src);
1255         btrfs_mark_buffer_dirty(dst);
1256         return ret;
1257 }
1258
1259 /*
1260  * try to push data from one node into the next node right in the
1261  * tree.
1262  *
1263  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1264  * error, and > 0 if there was no room in the right hand block.
1265  *
1266  * this will  only push up to 1/2 the contents of the left node over
1267  */
1268 static int balance_node_right(struct btrfs_trans_handle *trans,
1269                               struct btrfs_root *root,
1270                               struct extent_buffer *dst,
1271                               struct extent_buffer *src)
1272 {
1273         int push_items = 0;
1274         int max_push;
1275         int src_nritems;
1276         int dst_nritems;
1277         int ret = 0;
1278
1279         WARN_ON(btrfs_header_generation(src) != trans->transid);
1280         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1281
1282         src_nritems = btrfs_header_nritems(src);
1283         dst_nritems = btrfs_header_nritems(dst);
1284         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1285         if (push_items <= 0) {
1286                 return 1;
1287         }
1288
1289         if (src_nritems < 4) {
1290                 return 1;
1291         }
1292
1293         max_push = src_nritems / 2 + 1;
1294         /* don't try to empty the node */
1295         if (max_push >= src_nritems) {
1296                 return 1;
1297         }
1298
1299         if (max_push < push_items)
1300                 push_items = max_push;
1301
1302         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1303                                       btrfs_node_key_ptr_offset(0),
1304                                       (dst_nritems) *
1305                                       sizeof(struct btrfs_key_ptr));
1306
1307         copy_extent_buffer(dst, src,
1308                            btrfs_node_key_ptr_offset(0),
1309                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1310                            push_items * sizeof(struct btrfs_key_ptr));
1311
1312         btrfs_set_header_nritems(src, src_nritems - push_items);
1313         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1314
1315         btrfs_mark_buffer_dirty(src);
1316         btrfs_mark_buffer_dirty(dst);
1317         return ret;
1318 }
1319
1320 /*
1321  * helper function to insert a new root level in the tree.
1322  * A new node is allocated, and a single item is inserted to
1323  * point to the existing root
1324  *
1325  * returns zero on success or < 0 on failure.
1326  */
1327 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1328                            struct btrfs_root *root,
1329                            struct btrfs_path *path, int level)
1330 {
1331         u64 root_gen;
1332         u64 lower_gen;
1333         struct extent_buffer *lower;
1334         struct extent_buffer *c;
1335         struct btrfs_disk_key lower_key;
1336
1337         BUG_ON(path->nodes[level]);
1338         BUG_ON(path->nodes[level-1] != root->node);
1339
1340         if (root->ref_cows)
1341                 root_gen = trans->transid;
1342         else
1343                 root_gen = 0;
1344
1345         lower = path->nodes[level-1];
1346         if (level == 1)
1347                 btrfs_item_key(lower, &lower_key, 0);
1348         else
1349                 btrfs_node_key(lower, &lower_key, 0);
1350
1351         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1352                                    root->root_key.objectid,
1353                                    root_gen, lower_key.objectid, level,
1354                                    root->node->start, 0);
1355         if (IS_ERR(c))
1356                 return PTR_ERR(c);
1357         memset_extent_buffer(c, 0, 0, root->nodesize);
1358         btrfs_set_header_nritems(c, 1);
1359         btrfs_set_header_level(c, level);
1360         btrfs_set_header_bytenr(c, c->start);
1361         btrfs_set_header_generation(c, trans->transid);
1362         btrfs_set_header_owner(c, root->root_key.objectid);
1363
1364         write_extent_buffer(c, root->fs_info->fsid,
1365                             (unsigned long)btrfs_header_fsid(c),
1366                             BTRFS_FSID_SIZE);
1367
1368         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
1369                             (unsigned long)btrfs_header_chunk_tree_uuid(c),
1370                             BTRFS_UUID_SIZE);
1371
1372         btrfs_set_node_key(c, &lower_key, 0);
1373         btrfs_set_node_blockptr(c, 0, lower->start);
1374         lower_gen = btrfs_header_generation(lower);
1375         WARN_ON(lower_gen == 0);
1376
1377         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1378
1379         btrfs_mark_buffer_dirty(c);
1380
1381         /* the super has an extra ref to root->node */
1382         free_extent_buffer(root->node);
1383         root->node = c;
1384         add_root_to_dirty_list(root);
1385         extent_buffer_get(c);
1386         path->nodes[level] = c;
1387         path->slots[level] = 0;
1388
1389         if (root->ref_cows && lower_gen != trans->transid) {
1390                 struct btrfs_path *back_path = btrfs_alloc_path();
1391                 int ret;
1392                 ret = btrfs_insert_extent_backref(trans,
1393                                                   root->fs_info->extent_root,
1394                                                   path, lower->start,
1395                                                   root->root_key.objectid,
1396                                                   trans->transid, 0, 0);
1397                 BUG_ON(ret);
1398                 btrfs_free_path(back_path);
1399         }
1400         return 0;
1401 }
1402
1403 /*
1404  * worker function to insert a single pointer in a node.
1405  * the node should have enough room for the pointer already
1406  *
1407  * slot and level indicate where you want the key to go, and
1408  * blocknr is the block the key points to.
1409  *
1410  * returns zero on success and < 0 on any error
1411  */
1412 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1413                       *root, struct btrfs_path *path, struct btrfs_disk_key
1414                       *key, u64 bytenr, int slot, int level)
1415 {
1416         struct extent_buffer *lower;
1417         int nritems;
1418
1419         BUG_ON(!path->nodes[level]);
1420         lower = path->nodes[level];
1421         nritems = btrfs_header_nritems(lower);
1422         if (slot > nritems)
1423                 BUG();
1424         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1425                 BUG();
1426         if (slot != nritems) {
1427                 memmove_extent_buffer(lower,
1428                               btrfs_node_key_ptr_offset(slot + 1),
1429                               btrfs_node_key_ptr_offset(slot),
1430                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1431         }
1432         btrfs_set_node_key(lower, key, slot);
1433         btrfs_set_node_blockptr(lower, slot, bytenr);
1434         WARN_ON(trans->transid == 0);
1435         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1436         btrfs_set_header_nritems(lower, nritems + 1);
1437         btrfs_mark_buffer_dirty(lower);
1438         return 0;
1439 }
1440
1441 /*
1442  * split the node at the specified level in path in two.
1443  * The path is corrected to point to the appropriate node after the split
1444  *
1445  * Before splitting this tries to make some room in the node by pushing
1446  * left and right, if either one works, it returns right away.
1447  *
1448  * returns 0 on success and < 0 on failure
1449  */
1450 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1451                       *root, struct btrfs_path *path, int level)
1452 {
1453         u64 root_gen;
1454         struct extent_buffer *c;
1455         struct extent_buffer *split;
1456         struct btrfs_disk_key disk_key;
1457         int mid;
1458         int ret;
1459         int wret;
1460         u32 c_nritems;
1461
1462         c = path->nodes[level];
1463         WARN_ON(btrfs_header_generation(c) != trans->transid);
1464         if (c == root->node) {
1465                 /* trying to split the root, lets make a new one */
1466                 ret = insert_new_root(trans, root, path, level + 1);
1467                 if (ret)
1468                         return ret;
1469         } else {
1470                 ret = push_nodes_for_insert(trans, root, path, level);
1471                 c = path->nodes[level];
1472                 if (!ret && btrfs_header_nritems(c) <
1473                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
1474                         return 0;
1475                 if (ret < 0)
1476                         return ret;
1477         }
1478
1479         c_nritems = btrfs_header_nritems(c);
1480         if (root->ref_cows)
1481                 root_gen = trans->transid;
1482         else
1483                 root_gen = 0;
1484
1485         btrfs_node_key(c, &disk_key, 0);
1486         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1487                                          root->root_key.objectid,
1488                                          root_gen,
1489                                          btrfs_disk_key_objectid(&disk_key),
1490                                          level, c->start, 0);
1491         if (IS_ERR(split))
1492                 return PTR_ERR(split);
1493
1494         btrfs_set_header_flags(split, btrfs_header_flags(c));
1495         btrfs_set_header_level(split, btrfs_header_level(c));
1496         btrfs_set_header_bytenr(split, split->start);
1497         btrfs_set_header_generation(split, trans->transid);
1498         btrfs_set_header_owner(split, root->root_key.objectid);
1499         btrfs_set_header_flags(split, 0);
1500         write_extent_buffer(split, root->fs_info->fsid,
1501                             (unsigned long)btrfs_header_fsid(split),
1502                             BTRFS_FSID_SIZE);
1503         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
1504                             (unsigned long)btrfs_header_chunk_tree_uuid(split),
1505                             BTRFS_UUID_SIZE);
1506
1507         mid = (c_nritems + 1) / 2;
1508
1509         copy_extent_buffer(split, c,
1510                            btrfs_node_key_ptr_offset(0),
1511                            btrfs_node_key_ptr_offset(mid),
1512                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1513         btrfs_set_header_nritems(split, c_nritems - mid);
1514         btrfs_set_header_nritems(c, mid);
1515         ret = 0;
1516
1517         btrfs_mark_buffer_dirty(c);
1518         btrfs_mark_buffer_dirty(split);
1519
1520         btrfs_node_key(split, &disk_key, 0);
1521         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1522                           path->slots[level + 1] + 1,
1523                           level + 1);
1524         if (wret)
1525                 ret = wret;
1526
1527         if (path->slots[level] >= mid) {
1528                 path->slots[level] -= mid;
1529                 free_extent_buffer(c);
1530                 path->nodes[level] = split;
1531                 path->slots[level + 1] += 1;
1532         } else {
1533                 free_extent_buffer(split);
1534         }
1535         return ret;
1536 }
1537
1538 /*
1539  * how many bytes are required to store the items in a leaf.  start
1540  * and nr indicate which items in the leaf to check.  This totals up the
1541  * space used both by the item structs and the item data
1542  */
1543 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1544 {
1545         int data_len;
1546         int nritems = btrfs_header_nritems(l);
1547         int end = min(nritems, start + nr) - 1;
1548
1549         if (!nr)
1550                 return 0;
1551         data_len = btrfs_item_end_nr(l, start);
1552         data_len = data_len - btrfs_item_offset_nr(l, end);
1553         data_len += sizeof(struct btrfs_item) * nr;
1554         WARN_ON(data_len < 0);
1555         return data_len;
1556 }
1557
1558 /*
1559  * The space between the end of the leaf items and
1560  * the start of the leaf data.  IOW, how much room
1561  * the leaf has left for both items and data
1562  */
1563 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1564 {
1565         int nritems = btrfs_header_nritems(leaf);
1566         int ret;
1567         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1568         if (ret < 0) {
1569                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1570                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1571                        leaf_space_used(leaf, 0, nritems), nritems);
1572         }
1573         return ret;
1574 }
1575
1576 /*
1577  * push some data in the path leaf to the right, trying to free up at
1578  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1579  *
1580  * returns 1 if the push failed because the other node didn't have enough
1581  * room, 0 if everything worked out and < 0 if there were major errors.
1582  */
1583 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1584                            *root, struct btrfs_path *path, int data_size,
1585                            int empty)
1586 {
1587         struct extent_buffer *left = path->nodes[0];
1588         struct extent_buffer *right;
1589         struct extent_buffer *upper;
1590         struct btrfs_disk_key disk_key;
1591         int slot;
1592         u32 i;
1593         int free_space;
1594         int push_space = 0;
1595         int push_items = 0;
1596         struct btrfs_item *item;
1597         u32 left_nritems;
1598         u32 nr;
1599         u32 right_nritems;
1600         u32 data_end;
1601         u32 this_item_size;
1602         int ret;
1603
1604         slot = path->slots[1];
1605         if (!path->nodes[1]) {
1606                 return 1;
1607         }
1608         upper = path->nodes[1];
1609         if (slot >= btrfs_header_nritems(upper) - 1)
1610                 return 1;
1611
1612         right = read_node_slot(root, upper, slot + 1);
1613         free_space = btrfs_leaf_free_space(root, right);
1614         if (free_space < data_size + sizeof(struct btrfs_item)) {
1615                 free_extent_buffer(right);
1616                 return 1;
1617         }
1618
1619         /* cow and double check */
1620         ret = btrfs_cow_block(trans, root, right, upper,
1621                               slot + 1, &right);
1622         if (ret) {
1623                 free_extent_buffer(right);
1624                 return 1;
1625         }
1626         free_space = btrfs_leaf_free_space(root, right);
1627         if (free_space < data_size + sizeof(struct btrfs_item)) {
1628                 free_extent_buffer(right);
1629                 return 1;
1630         }
1631
1632         left_nritems = btrfs_header_nritems(left);
1633         if (left_nritems == 0) {
1634                 free_extent_buffer(right);
1635                 return 1;
1636         }
1637
1638         if (empty)
1639                 nr = 0;
1640         else
1641                 nr = 1;
1642
1643         i = left_nritems - 1;
1644         while (i >= nr) {
1645                 item = btrfs_item_nr(left, i);
1646
1647                 if (path->slots[0] == i)
1648                         push_space += data_size + sizeof(*item);
1649
1650                 this_item_size = btrfs_item_size(left, item);
1651                 if (this_item_size + sizeof(*item) + push_space > free_space)
1652                         break;
1653                 push_items++;
1654                 push_space += this_item_size + sizeof(*item);
1655                 if (i == 0)
1656                         break;
1657                 i--;
1658         }
1659
1660         if (push_items == 0) {
1661                 free_extent_buffer(right);
1662                 return 1;
1663         }
1664
1665         if (!empty && push_items == left_nritems)
1666                 WARN_ON(1);
1667
1668         /* push left to right */
1669         right_nritems = btrfs_header_nritems(right);
1670
1671         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1672         push_space -= leaf_data_end(root, left);
1673
1674         /* make room in the right data area */
1675         data_end = leaf_data_end(root, right);
1676         memmove_extent_buffer(right,
1677                               btrfs_leaf_data(right) + data_end - push_space,
1678                               btrfs_leaf_data(right) + data_end,
1679                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1680
1681         /* copy from the left data area */
1682         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1683                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1684                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1685                      push_space);
1686
1687         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1688                               btrfs_item_nr_offset(0),
1689                               right_nritems * sizeof(struct btrfs_item));
1690
1691         /* copy the items from left to right */
1692         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1693                    btrfs_item_nr_offset(left_nritems - push_items),
1694                    push_items * sizeof(struct btrfs_item));
1695
1696         /* update the item pointers */
1697         right_nritems += push_items;
1698         btrfs_set_header_nritems(right, right_nritems);
1699         push_space = BTRFS_LEAF_DATA_SIZE(root);
1700         for (i = 0; i < right_nritems; i++) {
1701                 item = btrfs_item_nr(right, i);
1702                 push_space -= btrfs_item_size(right, item);
1703                 btrfs_set_item_offset(right, item, push_space);
1704         }
1705
1706         left_nritems -= push_items;
1707         btrfs_set_header_nritems(left, left_nritems);
1708
1709         if (left_nritems)
1710                 btrfs_mark_buffer_dirty(left);
1711         btrfs_mark_buffer_dirty(right);
1712
1713         btrfs_item_key(right, &disk_key, 0);
1714         btrfs_set_node_key(upper, &disk_key, slot + 1);
1715         btrfs_mark_buffer_dirty(upper);
1716
1717         /* then fixup the leaf pointer in the path */
1718         if (path->slots[0] >= left_nritems) {
1719                 path->slots[0] -= left_nritems;
1720                 free_extent_buffer(path->nodes[0]);
1721                 path->nodes[0] = right;
1722                 path->slots[1] += 1;
1723         } else {
1724                 free_extent_buffer(right);
1725         }
1726         return 0;
1727 }
1728 /*
1729  * push some data in the path leaf to the left, trying to free up at
1730  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1731  */
1732 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1733                           *root, struct btrfs_path *path, int data_size,
1734                           int empty)
1735 {
1736         struct btrfs_disk_key disk_key;
1737         struct extent_buffer *right = path->nodes[0];
1738         struct extent_buffer *left;
1739         int slot;
1740         int i;
1741         int free_space;
1742         int push_space = 0;
1743         int push_items = 0;
1744         struct btrfs_item *item;
1745         u32 old_left_nritems;
1746         u32 right_nritems;
1747         u32 nr;
1748         int ret = 0;
1749         int wret;
1750         u32 this_item_size;
1751         u32 old_left_item_size;
1752
1753         slot = path->slots[1];
1754         if (slot == 0)
1755                 return 1;
1756         if (!path->nodes[1])
1757                 return 1;
1758
1759         right_nritems = btrfs_header_nritems(right);
1760         if (right_nritems == 0) {
1761                 return 1;
1762         }
1763
1764         left = read_node_slot(root, path->nodes[1], slot - 1);
1765         free_space = btrfs_leaf_free_space(root, left);
1766         if (free_space < data_size + sizeof(struct btrfs_item)) {
1767                 free_extent_buffer(left);
1768                 return 1;
1769         }
1770
1771         /* cow and double check */
1772         ret = btrfs_cow_block(trans, root, left,
1773                               path->nodes[1], slot - 1, &left);
1774         if (ret) {
1775                 /* we hit -ENOSPC, but it isn't fatal here */
1776                 free_extent_buffer(left);
1777                 return 1;
1778         }
1779
1780         free_space = btrfs_leaf_free_space(root, left);
1781         if (free_space < data_size + sizeof(struct btrfs_item)) {
1782                 free_extent_buffer(left);
1783                 return 1;
1784         }
1785
1786         if (empty)
1787                 nr = right_nritems;
1788         else
1789                 nr = right_nritems - 1;
1790
1791         for (i = 0; i < nr; i++) {
1792                 item = btrfs_item_nr(right, i);
1793
1794                 if (path->slots[0] == i)
1795                         push_space += data_size + sizeof(*item);
1796
1797                 this_item_size = btrfs_item_size(right, item);
1798                 if (this_item_size + sizeof(*item) + push_space > free_space)
1799                         break;
1800
1801                 push_items++;
1802                 push_space += this_item_size + sizeof(*item);
1803         }
1804
1805         if (push_items == 0) {
1806                 free_extent_buffer(left);
1807                 return 1;
1808         }
1809         if (!empty && push_items == btrfs_header_nritems(right))
1810                 WARN_ON(1);
1811
1812         /* push data from right to left */
1813         copy_extent_buffer(left, right,
1814                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1815                            btrfs_item_nr_offset(0),
1816                            push_items * sizeof(struct btrfs_item));
1817
1818         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1819                      btrfs_item_offset_nr(right, push_items -1);
1820
1821         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1822                      leaf_data_end(root, left) - push_space,
1823                      btrfs_leaf_data(right) +
1824                      btrfs_item_offset_nr(right, push_items - 1),
1825                      push_space);
1826         old_left_nritems = btrfs_header_nritems(left);
1827         BUG_ON(old_left_nritems < 0);
1828
1829         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1830         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1831                 u32 ioff;
1832
1833                 item = btrfs_item_nr(left, i);
1834                 ioff = btrfs_item_offset(left, item);
1835                 btrfs_set_item_offset(left, item,
1836                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1837         }
1838         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1839
1840         /* fixup right node */
1841         if (push_items > right_nritems) {
1842                 printk("push items %d nr %u\n", push_items, right_nritems);
1843                 WARN_ON(1);
1844         }
1845
1846         if (push_items < right_nritems) {
1847                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1848                                                   leaf_data_end(root, right);
1849                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1850                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1851                                       btrfs_leaf_data(right) +
1852                                       leaf_data_end(root, right), push_space);
1853
1854                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1855                               btrfs_item_nr_offset(push_items),
1856                              (btrfs_header_nritems(right) - push_items) *
1857                              sizeof(struct btrfs_item));
1858         }
1859         right_nritems -= push_items;
1860         btrfs_set_header_nritems(right, right_nritems);
1861         push_space = BTRFS_LEAF_DATA_SIZE(root);
1862         for (i = 0; i < right_nritems; i++) {
1863                 item = btrfs_item_nr(right, i);
1864                 push_space = push_space - btrfs_item_size(right, item);
1865                 btrfs_set_item_offset(right, item, push_space);
1866         }
1867
1868         btrfs_mark_buffer_dirty(left);
1869         if (right_nritems)
1870                 btrfs_mark_buffer_dirty(right);
1871
1872         btrfs_item_key(right, &disk_key, 0);
1873         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1874         if (wret)
1875                 ret = wret;
1876
1877         /* then fixup the leaf pointer in the path */
1878         if (path->slots[0] < push_items) {
1879                 path->slots[0] += old_left_nritems;
1880                 free_extent_buffer(path->nodes[0]);
1881                 path->nodes[0] = left;
1882                 path->slots[1] -= 1;
1883         } else {
1884                 free_extent_buffer(left);
1885                 path->slots[0] -= push_items;
1886         }
1887         BUG_ON(path->slots[0] < 0);
1888         return ret;
1889 }
1890
1891 /*
1892  * split the path's leaf in two, making sure there is at least data_size
1893  * available for the resulting leaf level of the path.
1894  *
1895  * returns 0 if all went well and < 0 on failure.
1896  */
1897 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1898                       *root, struct btrfs_key *ins_key,
1899                       struct btrfs_path *path, int data_size, int extend)
1900 {
1901         u64 root_gen;
1902         struct extent_buffer *l;
1903         u32 nritems;
1904         int mid;
1905         int slot;
1906         struct extent_buffer *right;
1907         int space_needed = data_size + sizeof(struct btrfs_item);
1908         int data_copy_size;
1909         int rt_data_off;
1910         int i;
1911         int ret = 0;
1912         int wret;
1913         int double_split;
1914         int num_doubles = 0;
1915         struct btrfs_disk_key disk_key;
1916
1917         if (extend)
1918                 space_needed = data_size;
1919
1920         if (root->ref_cows)
1921                 root_gen = trans->transid;
1922         else
1923                 root_gen = 0;
1924
1925         /* first try to make some room by pushing left and right */
1926         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1927                 wret = push_leaf_right(trans, root, path, data_size, 0);
1928                 if (wret < 0) {
1929                         return wret;
1930                 }
1931                 if (wret) {
1932                         wret = push_leaf_left(trans, root, path, data_size, 0);
1933                         if (wret < 0)
1934                                 return wret;
1935                 }
1936                 l = path->nodes[0];
1937
1938                 /* did the pushes work? */
1939                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1940                         return 0;
1941         }
1942
1943         if (!path->nodes[1]) {
1944                 ret = insert_new_root(trans, root, path, 1);
1945                 if (ret)
1946                         return ret;
1947         }
1948 again:
1949         double_split = 0;
1950         l = path->nodes[0];
1951         slot = path->slots[0];
1952         nritems = btrfs_header_nritems(l);
1953         mid = (nritems + 1)/ 2;
1954
1955         btrfs_item_key(l, &disk_key, 0);
1956
1957         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
1958                                          root->root_key.objectid,
1959                                          root_gen, disk_key.objectid, 0,
1960                                          l->start, 0);
1961         if (IS_ERR(right)) {
1962                 BUG_ON(1);
1963                 return PTR_ERR(right);
1964         }
1965
1966         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1967         btrfs_set_header_bytenr(right, right->start);
1968         btrfs_set_header_generation(right, trans->transid);
1969         btrfs_set_header_owner(right, root->root_key.objectid);
1970         btrfs_set_header_level(right, 0);
1971         write_extent_buffer(right, root->fs_info->fsid,
1972                             (unsigned long)btrfs_header_fsid(right),
1973                             BTRFS_FSID_SIZE);
1974
1975         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
1976                             (unsigned long)btrfs_header_chunk_tree_uuid(right),
1977                             BTRFS_UUID_SIZE);
1978         if (mid <= slot) {
1979                 if (nritems == 1 ||
1980                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1981                         BTRFS_LEAF_DATA_SIZE(root)) {
1982                         if (slot >= nritems) {
1983                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1984                                 btrfs_set_header_nritems(right, 0);
1985                                 wret = insert_ptr(trans, root, path,
1986                                                   &disk_key, right->start,
1987                                                   path->slots[1] + 1, 1);
1988                                 if (wret)
1989                                         ret = wret;
1990                                 free_extent_buffer(path->nodes[0]);
1991                                 path->nodes[0] = right;
1992                                 path->slots[0] = 0;
1993                                 path->slots[1] += 1;
1994                                 return ret;
1995                         }
1996                         mid = slot;
1997                         if (mid != nritems &&
1998                             leaf_space_used(l, mid, nritems - mid) +
1999                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2000                                 double_split = 1;
2001                         }
2002                 }
2003         } else {
2004                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
2005                         BTRFS_LEAF_DATA_SIZE(root)) {
2006                         if (!extend && slot == 0) {
2007                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2008                                 btrfs_set_header_nritems(right, 0);
2009                                 wret = insert_ptr(trans, root, path,
2010                                                   &disk_key,
2011                                                   right->start,
2012                                                   path->slots[1], 1);
2013                                 if (wret)
2014                                         ret = wret;
2015                                 free_extent_buffer(path->nodes[0]);
2016                                 path->nodes[0] = right;
2017                                 path->slots[0] = 0;
2018                                 if (path->slots[1] == 0) {
2019                                         wret = fixup_low_keys(trans, root,
2020                                                    path, &disk_key, 1);
2021                                         if (wret)
2022                                                 ret = wret;
2023                                 }
2024                                 return ret;
2025                         } else if (extend && slot == 0) {
2026                                 mid = 1;
2027                         } else {
2028                                 mid = slot;
2029                                 if (mid != nritems &&
2030                                     leaf_space_used(l, mid, nritems - mid) +
2031                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2032                                         double_split = 1;
2033                                 }
2034                         }
2035                 }
2036         }
2037         nritems = nritems - mid;
2038         btrfs_set_header_nritems(right, nritems);
2039         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2040
2041         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2042                            btrfs_item_nr_offset(mid),
2043                            nritems * sizeof(struct btrfs_item));
2044
2045         copy_extent_buffer(right, l,
2046                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2047                      data_copy_size, btrfs_leaf_data(l) +
2048                      leaf_data_end(root, l), data_copy_size);
2049
2050         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2051                       btrfs_item_end_nr(l, mid);
2052
2053         for (i = 0; i < nritems; i++) {
2054                 struct btrfs_item *item = btrfs_item_nr(right, i);
2055                 u32 ioff = btrfs_item_offset(right, item);
2056                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2057         }
2058
2059         btrfs_set_header_nritems(l, mid);
2060         ret = 0;
2061         btrfs_item_key(right, &disk_key, 0);
2062         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2063                           path->slots[1] + 1, 1);
2064         if (wret)
2065                 ret = wret;
2066
2067         btrfs_mark_buffer_dirty(right);
2068         btrfs_mark_buffer_dirty(l);
2069         BUG_ON(path->slots[0] != slot);
2070
2071         if (mid <= slot) {
2072                 free_extent_buffer(path->nodes[0]);
2073                 path->nodes[0] = right;
2074                 path->slots[0] -= mid;
2075                 path->slots[1] += 1;
2076         } else
2077                 free_extent_buffer(right);
2078
2079         BUG_ON(path->slots[0] < 0);
2080
2081         if (double_split) {
2082                 BUG_ON(num_doubles != 0);
2083                 num_doubles++;
2084                 goto again;
2085         }
2086         return ret;
2087 }
2088
2089 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2090                         struct btrfs_root *root,
2091                         struct btrfs_path *path,
2092                         u32 new_size, int from_end)
2093 {
2094         int ret = 0;
2095         int slot;
2096         int slot_orig;
2097         struct extent_buffer *leaf;
2098         struct btrfs_item *item;
2099         u32 nritems;
2100         unsigned int data_end;
2101         unsigned int old_data_start;
2102         unsigned int old_size;
2103         unsigned int size_diff;
2104         int i;
2105
2106         slot_orig = path->slots[0];
2107         leaf = path->nodes[0];
2108         slot = path->slots[0];
2109
2110         old_size = btrfs_item_size_nr(leaf, slot);
2111         if (old_size == new_size)
2112                 return 0;
2113
2114         nritems = btrfs_header_nritems(leaf);
2115         data_end = leaf_data_end(root, leaf);
2116
2117         old_data_start = btrfs_item_offset_nr(leaf, slot);
2118
2119         size_diff = old_size - new_size;
2120
2121         BUG_ON(slot < 0);
2122         BUG_ON(slot >= nritems);
2123
2124         /*
2125          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2126          */
2127         /* first correct the data pointers */
2128         for (i = slot; i < nritems; i++) {
2129                 u32 ioff;
2130                 item = btrfs_item_nr(leaf, i);
2131                 ioff = btrfs_item_offset(leaf, item);
2132                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2133         }
2134
2135         /* shift the data */
2136         if (from_end) {
2137                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2138                               data_end + size_diff, btrfs_leaf_data(leaf) +
2139                               data_end, old_data_start + new_size - data_end);
2140         } else {
2141                 struct btrfs_disk_key disk_key;
2142                 u64 offset;
2143
2144                 btrfs_item_key(leaf, &disk_key, slot);
2145
2146                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2147                         unsigned long ptr;
2148                         struct btrfs_file_extent_item *fi;
2149
2150                         fi = btrfs_item_ptr(leaf, slot,
2151                                             struct btrfs_file_extent_item);
2152                         fi = (struct btrfs_file_extent_item *)(
2153                              (unsigned long)fi - size_diff);
2154
2155                         if (btrfs_file_extent_type(leaf, fi) ==
2156                             BTRFS_FILE_EXTENT_INLINE) {
2157                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2158                                 memmove_extent_buffer(leaf, ptr,
2159                                         (unsigned long)fi,
2160                                         offsetof(struct btrfs_file_extent_item,
2161                                                  disk_bytenr));
2162                         }
2163                 }
2164
2165                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2166                               data_end + size_diff, btrfs_leaf_data(leaf) +
2167                               data_end, old_data_start - data_end);
2168
2169                 offset = btrfs_disk_key_offset(&disk_key);
2170                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2171                 btrfs_set_item_key(leaf, &disk_key, slot);
2172                 if (slot == 0)
2173                         fixup_low_keys(trans, root, path, &disk_key, 1);
2174         }
2175
2176         item = btrfs_item_nr(leaf, slot);
2177         btrfs_set_item_size(leaf, item, new_size);
2178         btrfs_mark_buffer_dirty(leaf);
2179
2180         ret = 0;
2181         if (btrfs_leaf_free_space(root, leaf) < 0) {
2182                 btrfs_print_leaf(root, leaf);
2183                 BUG();
2184         }
2185         return ret;
2186 }
2187
2188 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2189                       struct btrfs_root *root, struct btrfs_path *path,
2190                       u32 data_size)
2191 {
2192         int ret = 0;
2193         int slot;
2194         int slot_orig;
2195         struct extent_buffer *leaf;
2196         struct btrfs_item *item;
2197         u32 nritems;
2198         unsigned int data_end;
2199         unsigned int old_data;
2200         unsigned int old_size;
2201         int i;
2202
2203         slot_orig = path->slots[0];
2204         leaf = path->nodes[0];
2205
2206         nritems = btrfs_header_nritems(leaf);
2207         data_end = leaf_data_end(root, leaf);
2208
2209         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2210                 btrfs_print_leaf(root, leaf);
2211                 BUG();
2212         }
2213         slot = path->slots[0];
2214         old_data = btrfs_item_end_nr(leaf, slot);
2215
2216         BUG_ON(slot < 0);
2217         if (slot >= nritems) {
2218                 btrfs_print_leaf(root, leaf);
2219                 printk("slot %d too large, nritems %d\n", slot, nritems);
2220                 BUG_ON(1);
2221         }
2222
2223         /*
2224          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2225          */
2226         /* first correct the data pointers */
2227         for (i = slot; i < nritems; i++) {
2228                 u32 ioff;
2229                 item = btrfs_item_nr(leaf, i);
2230                 ioff = btrfs_item_offset(leaf, item);
2231                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2232         }
2233
2234         /* shift the data */
2235         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2236                       data_end - data_size, btrfs_leaf_data(leaf) +
2237                       data_end, old_data - data_end);
2238
2239         data_end = old_data;
2240         old_size = btrfs_item_size_nr(leaf, slot);
2241         item = btrfs_item_nr(leaf, slot);
2242         btrfs_set_item_size(leaf, item, old_size + data_size);
2243         btrfs_mark_buffer_dirty(leaf);
2244
2245         ret = 0;
2246         if (btrfs_leaf_free_space(root, leaf) < 0) {
2247                 btrfs_print_leaf(root, leaf);
2248                 BUG();
2249         }
2250         return ret;
2251 }
2252
2253 /*
2254  * Given a key and some data, insert an item into the tree.
2255  * This does all the path init required, making room in the tree if needed.
2256  */
2257 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2258                             struct btrfs_root *root,
2259                             struct btrfs_path *path,
2260                             struct btrfs_key *cpu_key, u32 *data_size,
2261                             int nr)
2262 {
2263         struct extent_buffer *leaf;
2264         struct btrfs_item *item;
2265         int ret = 0;
2266         int slot;
2267         int slot_orig;
2268         int i;
2269         u32 nritems;
2270         u32 total_size = 0;
2271         u32 total_data = 0;
2272         unsigned int data_end;
2273         struct btrfs_disk_key disk_key;
2274
2275         for (i = 0; i < nr; i++) {
2276                 total_data += data_size[i];
2277         }
2278
2279         /* create a root if there isn't one */
2280         if (!root->node)
2281                 BUG();
2282
2283         total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
2284         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2285         if (ret == 0) {
2286                 return -EEXIST;
2287         }
2288         if (ret < 0)
2289                 goto out;
2290
2291         slot_orig = path->slots[0];
2292         leaf = path->nodes[0];
2293
2294         nritems = btrfs_header_nritems(leaf);
2295         data_end = leaf_data_end(root, leaf);
2296
2297         if (btrfs_leaf_free_space(root, leaf) <
2298             sizeof(struct btrfs_item) + total_size) {
2299                 btrfs_print_leaf(root, leaf);
2300                 printk("not enough freespace need %u have %d\n",
2301                        total_size, btrfs_leaf_free_space(root, leaf));
2302                 BUG();
2303         }
2304
2305         slot = path->slots[0];
2306         BUG_ON(slot < 0);
2307
2308         if (slot != nritems) {
2309                 int i;
2310                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2311
2312                 if (old_data < data_end) {
2313                         btrfs_print_leaf(root, leaf);
2314                         printk("slot %d old_data %d data_end %d\n",
2315                                slot, old_data, data_end);
2316                         BUG_ON(1);
2317                 }
2318                 /*
2319                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2320                  */
2321                 /* first correct the data pointers */
2322                 for (i = slot; i < nritems; i++) {
2323                         u32 ioff;
2324
2325                         item = btrfs_item_nr(leaf, i);
2326                         ioff = btrfs_item_offset(leaf, item);
2327                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2328                 }
2329
2330                 /* shift the items */
2331                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2332                               btrfs_item_nr_offset(slot),
2333                               (nritems - slot) * sizeof(struct btrfs_item));
2334
2335                 /* shift the data */
2336                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2337                               data_end - total_data, btrfs_leaf_data(leaf) +
2338                               data_end, old_data - data_end);
2339                 data_end = old_data;
2340         }
2341
2342         /* setup the item for the new data */
2343         for (i = 0; i < nr; i++) {
2344                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2345                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2346                 item = btrfs_item_nr(leaf, slot + i);
2347                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2348                 data_end -= data_size[i];
2349                 btrfs_set_item_size(leaf, item, data_size[i]);
2350         }
2351         btrfs_set_header_nritems(leaf, nritems + nr);
2352         btrfs_mark_buffer_dirty(leaf);
2353
2354         ret = 0;
2355         if (slot == 0) {
2356                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2357                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2358         }
2359
2360         if (btrfs_leaf_free_space(root, leaf) < 0) {
2361                 btrfs_print_leaf(root, leaf);
2362                 BUG();
2363         }
2364
2365 out:
2366         return ret;
2367 }
2368
2369 /*
2370  * Given a key and some data, insert an item into the tree.
2371  * This does all the path init required, making room in the tree if needed.
2372  */
2373 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2374                       *root, struct btrfs_key *cpu_key, void *data, u32
2375                       data_size)
2376 {
2377         int ret = 0;
2378         struct btrfs_path *path;
2379         struct extent_buffer *leaf;
2380         unsigned long ptr;
2381
2382         path = btrfs_alloc_path();
2383         BUG_ON(!path);
2384         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2385         if (!ret) {
2386                 leaf = path->nodes[0];
2387                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2388                 write_extent_buffer(leaf, data, ptr, data_size);
2389                 btrfs_mark_buffer_dirty(leaf);
2390         }
2391         btrfs_free_path(path);
2392         return ret;
2393 }
2394
2395 /*
2396  * delete the pointer from a given node.
2397  *
2398  * If the delete empties a node, the node is removed from the tree,
2399  * continuing all the way the root if required.  The root is converted into
2400  * a leaf if all the nodes are emptied.
2401  */
2402 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2403                    struct btrfs_path *path, int level, int slot)
2404 {
2405         struct extent_buffer *parent = path->nodes[level];
2406         u32 nritems;
2407         int ret = 0;
2408         int wret;
2409
2410         nritems = btrfs_header_nritems(parent);
2411         if (slot != nritems -1) {
2412                 memmove_extent_buffer(parent,
2413                               btrfs_node_key_ptr_offset(slot),
2414                               btrfs_node_key_ptr_offset(slot + 1),
2415                               sizeof(struct btrfs_key_ptr) *
2416                               (nritems - slot - 1));
2417         }
2418         nritems--;
2419         btrfs_set_header_nritems(parent, nritems);
2420         if (nritems == 0 && parent == root->node) {
2421                 BUG_ON(btrfs_header_level(root->node) != 1);
2422                 /* just turn the root into a leaf and break */
2423                 btrfs_set_header_level(root->node, 0);
2424         } else if (slot == 0) {
2425                 struct btrfs_disk_key disk_key;
2426
2427                 btrfs_node_key(parent, &disk_key, 0);
2428                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2429                 if (wret)
2430                         ret = wret;
2431         }
2432         btrfs_mark_buffer_dirty(parent);
2433         return ret;
2434 }
2435
2436 /*
2437  * delete the item at the leaf level in path.  If that empties
2438  * the leaf, remove it from the tree
2439  */
2440 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2441                     struct btrfs_path *path, int slot, int nr)
2442 {
2443         struct extent_buffer *leaf;
2444         struct btrfs_item *item;
2445         int last_off;
2446         int dsize = 0;
2447         int ret = 0;
2448         int wret;
2449         int i;
2450         u32 nritems;
2451
2452         leaf = path->nodes[0];
2453         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2454
2455         for (i = 0; i < nr; i++)
2456                 dsize += btrfs_item_size_nr(leaf, slot + i);
2457
2458         nritems = btrfs_header_nritems(leaf);
2459
2460         if (slot + nr != nritems) {
2461                 int i;
2462                 int data_end = leaf_data_end(root, leaf);
2463
2464                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2465                               data_end + dsize,
2466                               btrfs_leaf_data(leaf) + data_end,
2467                               last_off - data_end);
2468
2469                 for (i = slot + nr; i < nritems; i++) {
2470                         u32 ioff;
2471
2472                         item = btrfs_item_nr(leaf, i);
2473                         ioff = btrfs_item_offset(leaf, item);
2474                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2475                 }
2476
2477                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2478                               btrfs_item_nr_offset(slot + nr),
2479                               sizeof(struct btrfs_item) *
2480                               (nritems - slot - nr));
2481         }
2482         btrfs_set_header_nritems(leaf, nritems - nr);
2483         nritems -= nr;
2484
2485         /* delete the leaf if we've emptied it */
2486         if (nritems == 0) {
2487                 if (leaf == root->node) {
2488                         btrfs_set_header_level(leaf, 0);
2489                 } else {
2490                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2491                         clean_tree_block(trans, root, leaf);
2492                         wait_on_tree_block_writeback(root, leaf);
2493                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2494                         if (wret)
2495                                 ret = wret;
2496                         wret = btrfs_free_extent(trans, root,
2497                                          leaf->start, leaf->len,
2498                                          btrfs_header_owner(path->nodes[1]),
2499                                          root_gen, 0, 0, 1);
2500                         if (wret)
2501                                 ret = wret;
2502                 }
2503         } else {
2504                 int used = leaf_space_used(leaf, 0, nritems);
2505                 if (slot == 0) {
2506                         struct btrfs_disk_key disk_key;
2507
2508                         btrfs_item_key(leaf, &disk_key, 0);
2509                         wret = fixup_low_keys(trans, root, path,
2510                                               &disk_key, 1);
2511                         if (wret)
2512                                 ret = wret;
2513                 }
2514
2515                 /* delete the leaf if it is mostly empty */
2516                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2517                         /* push_leaf_left fixes the path.
2518                          * make sure the path still points to our leaf
2519                          * for possible call to del_ptr below
2520                          */
2521                         slot = path->slots[1];
2522                         extent_buffer_get(leaf);
2523
2524                         wret = push_leaf_left(trans, root, path, 1, 1);
2525                         if (wret < 0 && wret != -ENOSPC)
2526                                 ret = wret;
2527
2528                         if (path->nodes[0] == leaf &&
2529                             btrfs_header_nritems(leaf)) {
2530                                 wret = push_leaf_right(trans, root, path, 1, 1);
2531                                 if (wret < 0 && wret != -ENOSPC)
2532                                         ret = wret;
2533                         }
2534
2535                         if (btrfs_header_nritems(leaf) == 0) {
2536                                 u64 root_gen;
2537                                 u64 bytenr = leaf->start;
2538                                 u32 blocksize = leaf->len;
2539
2540                                 root_gen = btrfs_header_generation(
2541                                                            path->nodes[1]);
2542
2543                                 clean_tree_block(trans, root, leaf);
2544                                 wait_on_tree_block_writeback(root, leaf);
2545
2546                                 wret = del_ptr(trans, root, path, 1, slot);
2547                                 if (wret)
2548                                         ret = wret;
2549
2550                                 free_extent_buffer(leaf);
2551                                 wret = btrfs_free_extent(trans, root, bytenr,
2552                                              blocksize,
2553                                              btrfs_header_owner(path->nodes[1]),
2554                                              root_gen, 0, 0, 1);
2555                                 if (wret)
2556                                         ret = wret;
2557                         } else {
2558                                 btrfs_mark_buffer_dirty(leaf);
2559                                 free_extent_buffer(leaf);
2560                         }
2561                 } else {
2562                         btrfs_mark_buffer_dirty(leaf);
2563                 }
2564         }
2565         return ret;
2566 }
2567
2568 /*
2569  * walk up the tree as far as required to find the previous leaf.
2570  * returns 0 if it found something or 1 if there are no lesser leaves.
2571  * returns < 0 on io errors.
2572  */
2573 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2574 {
2575         int slot;
2576         int level = 1;
2577         struct extent_buffer *c;
2578         struct extent_buffer *next = NULL;
2579
2580         while(level < BTRFS_MAX_LEVEL) {
2581                 if (!path->nodes[level])
2582                         return 1;
2583
2584                 slot = path->slots[level];
2585                 c = path->nodes[level];
2586                 if (slot == 0) {
2587                         level++;
2588                         if (level == BTRFS_MAX_LEVEL)
2589                                 return 1;
2590                         continue;
2591                 }
2592                 slot--;
2593
2594                 if (next)
2595                         free_extent_buffer(next);
2596
2597                 next = read_node_slot(root, c, slot);
2598                 break;
2599         }
2600         path->slots[level] = slot;
2601         while(1) {
2602                 level--;
2603                 c = path->nodes[level];
2604                 free_extent_buffer(c);
2605                 slot = btrfs_header_nritems(next);
2606                 if (slot != 0)
2607                         slot--;
2608                 path->nodes[level] = next;
2609                 path->slots[level] = slot;
2610                 if (!level)
2611                         break;
2612                 next = read_node_slot(root, next, slot);
2613         }
2614         return 0;
2615 }
2616
2617 /*
2618  * walk up the tree as far as required to find the next leaf.
2619  * returns 0 if it found something or 1 if there are no greater leaves.
2620  * returns < 0 on io errors.
2621  */
2622 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2623 {
2624         int slot;
2625         int level = 1;
2626         struct extent_buffer *c;
2627         struct extent_buffer *next = NULL;
2628
2629         while(level < BTRFS_MAX_LEVEL) {
2630                 if (!path->nodes[level])
2631                         return 1;
2632
2633                 slot = path->slots[level] + 1;
2634                 c = path->nodes[level];
2635                 if (slot >= btrfs_header_nritems(c)) {
2636                         level++;
2637                         if (level == BTRFS_MAX_LEVEL)
2638                                 return 1;
2639                         continue;
2640                 }
2641
2642                 if (next)
2643                         free_extent_buffer(next);
2644
2645                 if (path->reada)
2646                         reada_for_search(root, path, level, slot, 0);
2647
2648                 next = read_node_slot(root, c, slot);
2649                 break;
2650         }
2651         path->slots[level] = slot;
2652         while(1) {
2653                 level--;
2654                 c = path->nodes[level];
2655                 free_extent_buffer(c);
2656                 path->nodes[level] = next;
2657                 path->slots[level] = 0;
2658                 if (!level)
2659                         break;
2660                 if (path->reada)
2661                         reada_for_search(root, path, level, 0, 0);
2662                 next = read_node_slot(root, next, 0);
2663         }
2664         return 0;
2665 }
2666
2667 int btrfs_previous_item(struct btrfs_root *root,
2668                         struct btrfs_path *path, u64 min_objectid,
2669                         int type)
2670 {
2671         struct btrfs_key found_key;
2672         struct extent_buffer *leaf;
2673         int ret;
2674
2675         while(1) {
2676                 if (path->slots[0] == 0) {
2677                         ret = btrfs_prev_leaf(root, path);
2678                         if (ret != 0)
2679                                 return ret;
2680                 } else {
2681                         path->slots[0]--;
2682                 }
2683                 leaf = path->nodes[0];
2684                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2685                 if (found_key.type == type)
2686                         return 0;
2687         }
2688         return 1;
2689 }
2690