Keep more pointers free in the nodes for double splits
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include "ctree.h"
19 #include "disk-io.h"
20 #include "transaction.h"
21 #include "print-tree.h"
22
23 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
24                       *root, struct btrfs_path *path, int level);
25 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_key *ins_key,
27                       struct btrfs_path *path, int data_size, int extend);
28 static int push_node_left(struct btrfs_trans_handle *trans,
29                           struct btrfs_root *root, struct extent_buffer *dst,
30                           struct extent_buffer *src);
31 static int balance_node_right(struct btrfs_trans_handle *trans,
32                               struct btrfs_root *root,
33                               struct extent_buffer *dst_buf,
34                               struct extent_buffer *src_buf);
35 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
36                    struct btrfs_path *path, int level, int slot);
37
38 inline void btrfs_init_path(struct btrfs_path *p)
39 {
40         memset(p, 0, sizeof(*p));
41 }
42
43 struct btrfs_path *btrfs_alloc_path(void)
44 {
45         struct btrfs_path *path;
46         path = kmalloc(sizeof(struct btrfs_path), GFP_NOFS);
47         if (path) {
48                 btrfs_init_path(path);
49                 path->reada = 0;
50         }
51         return path;
52 }
53
54 void btrfs_free_path(struct btrfs_path *p)
55 {
56         btrfs_release_path(NULL, p);
57         kfree(p);
58 }
59
60 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
61 {
62         int i;
63         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
64                 if (!p->nodes[i])
65                         break;
66                 free_extent_buffer(p->nodes[i]);
67         }
68         memset(p, 0, sizeof(*p));
69 }
70
71 static void add_root_to_dirty_list(struct btrfs_root *root)
72 {
73         if (root->track_dirty && list_empty(&root->dirty_list)) {
74                 list_add(&root->dirty_list,
75                          &root->fs_info->dirty_cowonly_roots);
76         }
77 }
78
79 int btrfs_copy_root(struct btrfs_trans_handle *trans,
80                       struct btrfs_root *root,
81                       struct extent_buffer *buf,
82                       struct extent_buffer **cow_ret, u64 new_root_objectid)
83 {
84         struct extent_buffer *cow;
85         u32 nritems;
86         int ret = 0;
87         int level;
88         struct btrfs_key first_key;
89         struct btrfs_root *new_root;
90
91         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
92         if (!new_root)
93                 return -ENOMEM;
94
95         memcpy(new_root, root, sizeof(*new_root));
96         new_root->root_key.objectid = new_root_objectid;
97
98         WARN_ON(root->ref_cows && trans->transid !=
99                 root->fs_info->running_transaction->transid);
100         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
101
102         level = btrfs_header_level(buf);
103         nritems = btrfs_header_nritems(buf);
104         if (nritems) {
105                 if (level == 0)
106                         btrfs_item_key_to_cpu(buf, &first_key, 0);
107                 else
108                         btrfs_node_key_to_cpu(buf, &first_key, 0);
109         } else {
110                 first_key.objectid = 0;
111         }
112         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
113                                        new_root_objectid,
114                                        trans->transid, first_key.objectid,
115                                        level, buf->start, 0);
116         if (IS_ERR(cow)) {
117                 kfree(new_root);
118                 return PTR_ERR(cow);
119         }
120
121         copy_extent_buffer(cow, buf, 0, 0, cow->len);
122         btrfs_set_header_bytenr(cow, cow->start);
123         btrfs_set_header_generation(cow, trans->transid);
124         btrfs_set_header_owner(cow, new_root_objectid);
125         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
126
127         WARN_ON(btrfs_header_generation(buf) > trans->transid);
128         ret = btrfs_inc_ref(trans, new_root, buf);
129         kfree(new_root);
130
131         if (ret)
132                 return ret;
133
134         btrfs_mark_buffer_dirty(cow);
135         *cow_ret = cow;
136         return 0;
137 }
138
139 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
140                              struct btrfs_root *root,
141                              struct extent_buffer *buf,
142                              struct extent_buffer *parent, int parent_slot,
143                              struct extent_buffer **cow_ret,
144                              u64 search_start, u64 empty_size)
145 {
146         u64 root_gen;
147         struct extent_buffer *cow;
148         u32 nritems;
149         int ret = 0;
150         int different_trans = 0;
151         int level;
152         struct btrfs_key first_key;
153
154         if (root->ref_cows) {
155                 root_gen = trans->transid;
156         } else {
157                 root_gen = 0;
158         }
159
160         WARN_ON(root->ref_cows && trans->transid !=
161                 root->fs_info->running_transaction->transid);
162         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
163
164         level = btrfs_header_level(buf);
165         nritems = btrfs_header_nritems(buf);
166         if (nritems) {
167                 if (level == 0)
168                         btrfs_item_key_to_cpu(buf, &first_key, 0);
169                 else
170                         btrfs_node_key_to_cpu(buf, &first_key, 0);
171         } else {
172                 first_key.objectid = 0;
173         }
174         cow = __btrfs_alloc_free_block(trans, root, buf->len,
175                                      root->root_key.objectid,
176                                      root_gen, first_key.objectid, level,
177                                      search_start, empty_size);
178         if (IS_ERR(cow))
179                 return PTR_ERR(cow);
180
181         copy_extent_buffer(cow, buf, 0, 0, cow->len);
182         btrfs_set_header_bytenr(cow, cow->start);
183         btrfs_set_header_generation(cow, trans->transid);
184         btrfs_set_header_owner(cow, root->root_key.objectid);
185         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
186
187         WARN_ON(btrfs_header_generation(buf) > trans->transid);
188         if (btrfs_header_generation(buf) != trans->transid) {
189                 different_trans = 1;
190                 ret = btrfs_inc_ref(trans, root, buf);
191                 if (ret)
192                         return ret;
193         } else {
194                 clean_tree_block(trans, root, buf);
195         }
196
197         if (buf == root->node) {
198                 root_gen = btrfs_header_generation(buf);
199                 root->node = cow;
200                 extent_buffer_get(cow);
201                 if (buf != root->commit_root) {
202                         btrfs_free_extent(trans, root, buf->start,
203                                           buf->len, root->root_key.objectid,
204                                           root_gen, 0, 0, 1);
205                 }
206                 free_extent_buffer(buf);
207                 add_root_to_dirty_list(root);
208         } else {
209                 root_gen = btrfs_header_generation(parent);
210                 btrfs_set_node_blockptr(parent, parent_slot,
211                                         cow->start);
212                 WARN_ON(trans->transid == 0);
213                 btrfs_set_node_ptr_generation(parent, parent_slot,
214                                               trans->transid);
215                 btrfs_mark_buffer_dirty(parent);
216                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
217                 btrfs_free_extent(trans, root, buf->start, buf->len,
218                                   btrfs_header_owner(parent), root_gen,
219                                   0, 0, 1);
220         }
221         free_extent_buffer(buf);
222         btrfs_mark_buffer_dirty(cow);
223         *cow_ret = cow;
224         return 0;
225 }
226
227 int btrfs_cow_block(struct btrfs_trans_handle *trans,
228                     struct btrfs_root *root, struct extent_buffer *buf,
229                     struct extent_buffer *parent, int parent_slot,
230                     struct extent_buffer **cow_ret)
231 {
232         u64 search_start;
233         int ret;
234         /*
235         if (trans->transaction != root->fs_info->running_transaction) {
236                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
237                        root->fs_info->running_transaction->transid);
238                 WARN_ON(1);
239         }
240         */
241         if (trans->transid != root->fs_info->generation) {
242                 printk(KERN_CRIT "trans %llu running %llu\n",
243                         (unsigned long long)trans->transid,
244                         (unsigned long long)root->fs_info->generation);
245                 WARN_ON(1);
246         }
247         if (btrfs_header_generation(buf) == trans->transid &&
248             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
249                 *cow_ret = buf;
250                 return 0;
251         }
252
253         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
254         ret = __btrfs_cow_block(trans, root, buf, parent,
255                                  parent_slot, cow_ret, search_start, 0);
256         return ret;
257 }
258
259 /*
260 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
261 {
262         if (blocknr < other && other - (blocknr + blocksize) < 32768)
263                 return 1;
264         if (blocknr > other && blocknr - (other + blocksize) < 32768)
265                 return 1;
266         return 0;
267 }
268 */
269
270 /*
271  * compare two keys in a memcmp fashion
272  */
273 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
274 {
275         struct btrfs_key k1;
276
277         btrfs_disk_key_to_cpu(&k1, disk);
278
279         if (k1.objectid > k2->objectid)
280                 return 1;
281         if (k1.objectid < k2->objectid)
282                 return -1;
283         if (k1.type > k2->type)
284                 return 1;
285         if (k1.type < k2->type)
286                 return -1;
287         if (k1.offset > k2->offset)
288                 return 1;
289         if (k1.offset < k2->offset)
290                 return -1;
291         return 0;
292 }
293
294
295 #if 0
296 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
297                        struct btrfs_root *root, struct extent_buffer *parent,
298                        int start_slot, int cache_only, u64 *last_ret,
299                        struct btrfs_key *progress)
300 {
301         struct extent_buffer *cur;
302         struct extent_buffer *tmp;
303         u64 blocknr;
304         u64 search_start = *last_ret;
305         u64 last_block = 0;
306         u64 other;
307         u32 parent_nritems;
308         int end_slot;
309         int i;
310         int err = 0;
311         int parent_level;
312         int uptodate;
313         u32 blocksize;
314         int progress_passed = 0;
315         struct btrfs_disk_key disk_key;
316
317         parent_level = btrfs_header_level(parent);
318         if (cache_only && parent_level != 1)
319                 return 0;
320
321         if (trans->transaction != root->fs_info->running_transaction) {
322                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
323                        root->fs_info->running_transaction->transid);
324                 WARN_ON(1);
325         }
326         if (trans->transid != root->fs_info->generation) {
327                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
328                        root->fs_info->generation);
329                 WARN_ON(1);
330         }
331
332         parent_nritems = btrfs_header_nritems(parent);
333         blocksize = btrfs_level_size(root, parent_level - 1);
334         end_slot = parent_nritems;
335
336         if (parent_nritems == 1)
337                 return 0;
338
339         for (i = start_slot; i < end_slot; i++) {
340                 int close = 1;
341
342                 if (!parent->map_token) {
343                         map_extent_buffer(parent,
344                                         btrfs_node_key_ptr_offset(i),
345                                         sizeof(struct btrfs_key_ptr),
346                                         &parent->map_token, &parent->kaddr,
347                                         &parent->map_start, &parent->map_len,
348                                         KM_USER1);
349                 }
350                 btrfs_node_key(parent, &disk_key, i);
351                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
352                         continue;
353
354                 progress_passed = 1;
355                 blocknr = btrfs_node_blockptr(parent, i);
356                 if (last_block == 0)
357                         last_block = blocknr;
358
359                 if (i > 0) {
360                         other = btrfs_node_blockptr(parent, i - 1);
361                         close = close_blocks(blocknr, other, blocksize);
362                 }
363                 if (close && i < end_slot - 2) {
364                         other = btrfs_node_blockptr(parent, i + 1);
365                         close = close_blocks(blocknr, other, blocksize);
366                 }
367                 if (close) {
368                         last_block = blocknr;
369                         continue;
370                 }
371                 if (parent->map_token) {
372                         unmap_extent_buffer(parent, parent->map_token,
373                                             KM_USER1);
374                         parent->map_token = NULL;
375                 }
376
377                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
378                 if (cur)
379                         uptodate = btrfs_buffer_uptodate(cur);
380                 else
381                         uptodate = 0;
382                 if (!cur || !uptodate) {
383                         if (cache_only) {
384                                 free_extent_buffer(cur);
385                                 continue;
386                         }
387                         if (!cur) {
388                                 cur = read_tree_block(root, blocknr,
389                                                          blocksize);
390                         } else if (!uptodate) {
391                                 btrfs_read_buffer(cur);
392                         }
393                 }
394                 if (search_start == 0)
395                         search_start = last_block;
396
397                 err = __btrfs_cow_block(trans, root, cur, parent, i,
398                                         &tmp, search_start,
399                                         min(16 * blocksize,
400                                             (end_slot - i) * blocksize));
401                 if (err) {
402                         free_extent_buffer(cur);
403                         break;
404                 }
405                 search_start = tmp->start;
406                 last_block = tmp->start;
407                 *last_ret = search_start;
408                 if (parent_level == 1)
409                         btrfs_clear_buffer_defrag(tmp);
410                 free_extent_buffer(tmp);
411         }
412         if (parent->map_token) {
413                 unmap_extent_buffer(parent, parent->map_token,
414                                     KM_USER1);
415                 parent->map_token = NULL;
416         }
417         return err;
418 }
419 #endif
420
421 /*
422  * The leaf data grows from end-to-front in the node.
423  * this returns the address of the start of the last item,
424  * which is the stop of the leaf data stack
425  */
426 static inline unsigned int leaf_data_end(struct btrfs_root *root,
427                                          struct extent_buffer *leaf)
428 {
429         u32 nr = btrfs_header_nritems(leaf);
430         if (nr == 0)
431                 return BTRFS_LEAF_DATA_SIZE(root);
432         return btrfs_item_offset_nr(leaf, nr - 1);
433 }
434
435 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
436                       int level)
437 {
438         struct extent_buffer *parent = NULL;
439         struct extent_buffer *node = path->nodes[level];
440         struct btrfs_disk_key parent_key;
441         struct btrfs_disk_key node_key;
442         int parent_slot;
443         int slot;
444         struct btrfs_key cpukey;
445         u32 nritems = btrfs_header_nritems(node);
446
447         if (path->nodes[level + 1])
448                 parent = path->nodes[level + 1];
449
450         slot = path->slots[level];
451         BUG_ON(nritems == 0);
452         if (parent) {
453                 parent_slot = path->slots[level + 1];
454                 btrfs_node_key(parent, &parent_key, parent_slot);
455                 btrfs_node_key(node, &node_key, 0);
456                 BUG_ON(memcmp(&parent_key, &node_key,
457                               sizeof(struct btrfs_disk_key)));
458                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
459                        btrfs_header_bytenr(node));
460         }
461         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
462         if (slot != 0) {
463                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
464                 btrfs_node_key(node, &node_key, slot);
465                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0);
466         }
467         if (slot < nritems - 1) {
468                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
469                 btrfs_node_key(node, &node_key, slot);
470                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0);
471         }
472         return 0;
473 }
474
475 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
476                       int level)
477 {
478         struct extent_buffer *leaf = path->nodes[level];
479         struct extent_buffer *parent = NULL;
480         int parent_slot;
481         struct btrfs_key cpukey;
482         struct btrfs_disk_key parent_key;
483         struct btrfs_disk_key leaf_key;
484         int slot = path->slots[0];
485
486         u32 nritems = btrfs_header_nritems(leaf);
487
488         if (path->nodes[level + 1])
489                 parent = path->nodes[level + 1];
490
491         if (nritems == 0)
492                 return 0;
493
494         if (parent) {
495                 parent_slot = path->slots[level + 1];
496                 btrfs_node_key(parent, &parent_key, parent_slot);
497                 btrfs_item_key(leaf, &leaf_key, 0);
498
499                 BUG_ON(memcmp(&parent_key, &leaf_key,
500                        sizeof(struct btrfs_disk_key)));
501                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
502                        btrfs_header_bytenr(leaf));
503         }
504 #if 0
505         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
506                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
507                 btrfs_item_key(leaf, &leaf_key, i);
508                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
509                         btrfs_print_leaf(root, leaf);
510                         printk("slot %d offset bad key\n", i);
511                         BUG_ON(1);
512                 }
513                 if (btrfs_item_offset_nr(leaf, i) !=
514                         btrfs_item_end_nr(leaf, i + 1)) {
515                         btrfs_print_leaf(root, leaf);
516                         printk("slot %d offset bad\n", i);
517                         BUG_ON(1);
518                 }
519                 if (i == 0) {
520                         if (btrfs_item_offset_nr(leaf, i) +
521                                btrfs_item_size_nr(leaf, i) !=
522                                BTRFS_LEAF_DATA_SIZE(root)) {
523                                 btrfs_print_leaf(root, leaf);
524                                 printk("slot %d first offset bad\n", i);
525                                 BUG_ON(1);
526                         }
527                 }
528         }
529         if (nritems > 0) {
530                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
531                                 btrfs_print_leaf(root, leaf);
532                                 printk("slot %d bad size \n", nritems - 1);
533                                 BUG_ON(1);
534                 }
535         }
536 #endif
537         if (slot != 0 && slot < nritems - 1) {
538                 btrfs_item_key(leaf, &leaf_key, slot);
539                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
540                 if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) {
541                         btrfs_print_leaf(root, leaf);
542                         printk("slot %d offset bad key\n", slot);
543                         BUG_ON(1);
544                 }
545                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
546                        btrfs_item_end_nr(leaf, slot)) {
547                         btrfs_print_leaf(root, leaf);
548                         printk("slot %d offset bad\n", slot);
549                         BUG_ON(1);
550                 }
551         }
552         if (slot < nritems - 1) {
553                 btrfs_item_key(leaf, &leaf_key, slot);
554                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
555                 BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0);
556                 if (btrfs_item_offset_nr(leaf, slot) !=
557                         btrfs_item_end_nr(leaf, slot + 1)) {
558                         btrfs_print_leaf(root, leaf);
559                         printk("slot %d offset bad\n", slot);
560                         BUG_ON(1);
561                 }
562         }
563         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
564                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
565         return 0;
566 }
567
568 static int noinline check_block(struct btrfs_root *root,
569                                 struct btrfs_path *path, int level)
570 {
571         return 0;
572 #if 0
573         struct extent_buffer *buf = path->nodes[level];
574
575         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
576                                  (unsigned long)btrfs_header_fsid(buf),
577                                  BTRFS_FSID_SIZE)) {
578                 printk("warning bad block %Lu\n", buf->start);
579                 return 1;
580         }
581 #endif
582         if (level == 0)
583                 return check_leaf(root, path, level);
584         return check_node(root, path, level);
585 }
586
587 /*
588  * search for key in the extent_buffer.  The items start at offset p,
589  * and they are item_size apart.  There are 'max' items in p.
590  *
591  * the slot in the array is returned via slot, and it points to
592  * the place where you would insert key if it is not found in
593  * the array.
594  *
595  * slot may point to max if the key is bigger than all of the keys
596  */
597 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
598                               int item_size, struct btrfs_key *key,
599                               int max, int *slot)
600 {
601         int low = 0;
602         int high = max;
603         int mid;
604         int ret;
605         unsigned long offset;
606         struct btrfs_disk_key *tmp;
607
608         while(low < high) {
609                 mid = (low + high) / 2;
610                 offset = p + mid * item_size;
611
612                 tmp = (struct btrfs_disk_key *)(eb->data + offset);
613                 ret = btrfs_comp_keys(tmp, key);
614
615                 if (ret < 0)
616                         low = mid + 1;
617                 else if (ret > 0)
618                         high = mid;
619                 else {
620                         *slot = mid;
621                         return 0;
622                 }
623         }
624         *slot = low;
625         return 1;
626 }
627
628 /*
629  * simple bin_search frontend that does the right thing for
630  * leaves vs nodes
631  */
632 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
633                       int level, int *slot)
634 {
635         if (level == 0) {
636                 return generic_bin_search(eb,
637                                           offsetof(struct btrfs_leaf, items),
638                                           sizeof(struct btrfs_item),
639                                           key, btrfs_header_nritems(eb),
640                                           slot);
641         } else {
642                 return generic_bin_search(eb,
643                                           offsetof(struct btrfs_node, ptrs),
644                                           sizeof(struct btrfs_key_ptr),
645                                           key, btrfs_header_nritems(eb),
646                                           slot);
647         }
648         return -1;
649 }
650
651 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
652                                    struct extent_buffer *parent, int slot)
653 {
654         if (slot < 0)
655                 return NULL;
656         if (slot >= btrfs_header_nritems(parent))
657                 return NULL;
658         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
659                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
660 }
661
662 static int balance_level(struct btrfs_trans_handle *trans,
663                          struct btrfs_root *root,
664                          struct btrfs_path *path, int level)
665 {
666         struct extent_buffer *right = NULL;
667         struct extent_buffer *mid;
668         struct extent_buffer *left = NULL;
669         struct extent_buffer *parent = NULL;
670         int ret = 0;
671         int wret;
672         int pslot;
673         int orig_slot = path->slots[level];
674         int err_on_enospc = 0;
675         u64 orig_ptr;
676
677         if (level == 0)
678                 return 0;
679
680         mid = path->nodes[level];
681         WARN_ON(btrfs_header_generation(mid) != trans->transid);
682
683         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
684
685         if (level < BTRFS_MAX_LEVEL - 1)
686                 parent = path->nodes[level + 1];
687         pslot = path->slots[level + 1];
688
689         /*
690          * deal with the case where there is only one pointer in the root
691          * by promoting the node below to a root
692          */
693         if (!parent) {
694                 struct extent_buffer *child;
695
696                 if (btrfs_header_nritems(mid) != 1)
697                         return 0;
698
699                 /* promote the child to a root */
700                 child = read_node_slot(root, mid, 0);
701                 BUG_ON(!child);
702                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
703                 BUG_ON(ret);
704
705                 root->node = child;
706                 add_root_to_dirty_list(root);
707                 path->nodes[level] = NULL;
708                 clean_tree_block(trans, root, mid);
709                 wait_on_tree_block_writeback(root, mid);
710                 /* once for the path */
711                 free_extent_buffer(mid);
712                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
713                                         root->root_key.objectid,
714                                         btrfs_header_generation(mid), 0, 0, 1);
715                 /* once for the root ptr */
716                 free_extent_buffer(mid);
717                 return ret;
718         }
719         if (btrfs_header_nritems(mid) >
720             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
721                 return 0;
722
723         if (btrfs_header_nritems(mid) < 2)
724                 err_on_enospc = 1;
725
726         left = read_node_slot(root, parent, pslot - 1);
727         if (left) {
728                 wret = btrfs_cow_block(trans, root, left,
729                                        parent, pslot - 1, &left);
730                 if (wret) {
731                         ret = wret;
732                         goto enospc;
733                 }
734         }
735         right = read_node_slot(root, parent, pslot + 1);
736         if (right) {
737                 wret = btrfs_cow_block(trans, root, right,
738                                        parent, pslot + 1, &right);
739                 if (wret) {
740                         ret = wret;
741                         goto enospc;
742                 }
743         }
744
745         /* first, try to make some room in the middle buffer */
746         if (left) {
747                 orig_slot += btrfs_header_nritems(left);
748                 wret = push_node_left(trans, root, left, mid);
749                 if (wret < 0)
750                         ret = wret;
751                 if (btrfs_header_nritems(mid) < 2)
752                         err_on_enospc = 1;
753         }
754
755         /*
756          * then try to empty the right most buffer into the middle
757          */
758         if (right) {
759                 wret = push_node_left(trans, root, mid, right);
760                 if (wret < 0 && wret != -ENOSPC)
761                         ret = wret;
762                 if (btrfs_header_nritems(right) == 0) {
763                         u64 bytenr = right->start;
764                         u64 generation = btrfs_header_generation(parent);
765                         u32 blocksize = right->len;
766
767                         clean_tree_block(trans, root, right);
768                         wait_on_tree_block_writeback(root, right);
769                         free_extent_buffer(right);
770                         right = NULL;
771                         wret = del_ptr(trans, root, path, level + 1, pslot +
772                                        1);
773                         if (wret)
774                                 ret = wret;
775                         wret = btrfs_free_extent(trans, root, bytenr,
776                                                  blocksize,
777                                                  btrfs_header_owner(parent),
778                                                  generation, 0, 0, 1);
779                         if (wret)
780                                 ret = wret;
781                 } else {
782                         struct btrfs_disk_key right_key;
783                         btrfs_node_key(right, &right_key, 0);
784                         btrfs_set_node_key(parent, &right_key, pslot + 1);
785                         btrfs_mark_buffer_dirty(parent);
786                 }
787         }
788         if (btrfs_header_nritems(mid) == 1) {
789                 /*
790                  * we're not allowed to leave a node with one item in the
791                  * tree during a delete.  A deletion from lower in the tree
792                  * could try to delete the only pointer in this node.
793                  * So, pull some keys from the left.
794                  * There has to be a left pointer at this point because
795                  * otherwise we would have pulled some pointers from the
796                  * right
797                  */
798                 BUG_ON(!left);
799                 wret = balance_node_right(trans, root, mid, left);
800                 if (wret < 0) {
801                         ret = wret;
802                         goto enospc;
803                 }
804                 BUG_ON(wret == 1);
805         }
806         if (btrfs_header_nritems(mid) == 0) {
807                 /* we've managed to empty the middle node, drop it */
808                 u64 root_gen = btrfs_header_generation(parent);
809                 u64 bytenr = mid->start;
810                 u32 blocksize = mid->len;
811                 clean_tree_block(trans, root, mid);
812                 wait_on_tree_block_writeback(root, mid);
813                 free_extent_buffer(mid);
814                 mid = NULL;
815                 wret = del_ptr(trans, root, path, level + 1, pslot);
816                 if (wret)
817                         ret = wret;
818                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
819                                          btrfs_header_owner(parent),
820                                          root_gen, 0, 0, 1);
821                 if (wret)
822                         ret = wret;
823         } else {
824                 /* update the parent key to reflect our changes */
825                 struct btrfs_disk_key mid_key;
826                 btrfs_node_key(mid, &mid_key, 0);
827                 btrfs_set_node_key(parent, &mid_key, pslot);
828                 btrfs_mark_buffer_dirty(parent);
829         }
830
831         /* update the path */
832         if (left) {
833                 if (btrfs_header_nritems(left) > orig_slot) {
834                         extent_buffer_get(left);
835                         path->nodes[level] = left;
836                         path->slots[level + 1] -= 1;
837                         path->slots[level] = orig_slot;
838                         if (mid)
839                                 free_extent_buffer(mid);
840                 } else {
841                         orig_slot -= btrfs_header_nritems(left);
842                         path->slots[level] = orig_slot;
843                 }
844         }
845         /* double check we haven't messed things up */
846         check_block(root, path, level);
847         if (orig_ptr !=
848             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
849                 BUG();
850 enospc:
851         if (right)
852                 free_extent_buffer(right);
853         if (left)
854                 free_extent_buffer(left);
855         return ret;
856 }
857
858 /* returns zero if the push worked, non-zero otherwise */
859 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
860                                           struct btrfs_root *root,
861                                           struct btrfs_path *path, int level)
862 {
863         struct extent_buffer *right = NULL;
864         struct extent_buffer *mid;
865         struct extent_buffer *left = NULL;
866         struct extent_buffer *parent = NULL;
867         int ret = 0;
868         int wret;
869         int pslot;
870         int orig_slot = path->slots[level];
871         u64 orig_ptr;
872
873         if (level == 0)
874                 return 1;
875
876         mid = path->nodes[level];
877         WARN_ON(btrfs_header_generation(mid) != trans->transid);
878         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
879
880         if (level < BTRFS_MAX_LEVEL - 1)
881                 parent = path->nodes[level + 1];
882         pslot = path->slots[level + 1];
883
884         if (!parent)
885                 return 1;
886
887         left = read_node_slot(root, parent, pslot - 1);
888
889         /* first, try to make some room in the middle buffer */
890         if (left) {
891                 u32 left_nr;
892                 left_nr = btrfs_header_nritems(left);
893                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
894                         wret = 1;
895                 } else {
896                         ret = btrfs_cow_block(trans, root, left, parent,
897                                               pslot - 1, &left);
898                         if (ret)
899                                 wret = 1;
900                         else {
901                                 wret = push_node_left(trans, root,
902                                                       left, mid);
903                         }
904                 }
905                 if (wret < 0)
906                         ret = wret;
907                 if (wret == 0) {
908                         struct btrfs_disk_key disk_key;
909                         orig_slot += left_nr;
910                         btrfs_node_key(mid, &disk_key, 0);
911                         btrfs_set_node_key(parent, &disk_key, pslot);
912                         btrfs_mark_buffer_dirty(parent);
913                         if (btrfs_header_nritems(left) > orig_slot) {
914                                 path->nodes[level] = left;
915                                 path->slots[level + 1] -= 1;
916                                 path->slots[level] = orig_slot;
917                                 free_extent_buffer(mid);
918                         } else {
919                                 orig_slot -=
920                                         btrfs_header_nritems(left);
921                                 path->slots[level] = orig_slot;
922                                 free_extent_buffer(left);
923                         }
924                         return 0;
925                 }
926                 free_extent_buffer(left);
927         }
928         right= read_node_slot(root, parent, pslot + 1);
929
930         /*
931          * then try to empty the right most buffer into the middle
932          */
933         if (right) {
934                 u32 right_nr;
935                 right_nr = btrfs_header_nritems(right);
936                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
937                         wret = 1;
938                 } else {
939                         ret = btrfs_cow_block(trans, root, right,
940                                               parent, pslot + 1,
941                                               &right);
942                         if (ret)
943                                 wret = 1;
944                         else {
945                                 wret = balance_node_right(trans, root,
946                                                           right, mid);
947                         }
948                 }
949                 if (wret < 0)
950                         ret = wret;
951                 if (wret == 0) {
952                         struct btrfs_disk_key disk_key;
953
954                         btrfs_node_key(right, &disk_key, 0);
955                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
956                         btrfs_mark_buffer_dirty(parent);
957
958                         if (btrfs_header_nritems(mid) <= orig_slot) {
959                                 path->nodes[level] = right;
960                                 path->slots[level + 1] += 1;
961                                 path->slots[level] = orig_slot -
962                                         btrfs_header_nritems(mid);
963                                 free_extent_buffer(mid);
964                         } else {
965                                 free_extent_buffer(right);
966                         }
967                         return 0;
968                 }
969                 free_extent_buffer(right);
970         }
971         return 1;
972 }
973
974 /*
975  * readahead one full node of leaves
976  */
977 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
978                              int level, int slot, u64 objectid)
979 {
980         struct extent_buffer *node;
981         struct btrfs_disk_key disk_key;
982         u32 nritems;
983         u64 search;
984         u64 lowest_read;
985         u64 highest_read;
986         u64 nread = 0;
987         int direction = path->reada;
988         struct extent_buffer *eb;
989         u32 nr;
990         u32 blocksize;
991         u32 nscan = 0;
992
993         if (level != 1)
994                 return;
995
996         if (!path->nodes[level])
997                 return;
998
999         node = path->nodes[level];
1000         search = btrfs_node_blockptr(node, slot);
1001         blocksize = btrfs_level_size(root, level - 1);
1002         eb = btrfs_find_tree_block(root, search, blocksize);
1003         if (eb) {
1004                 free_extent_buffer(eb);
1005                 return;
1006         }
1007
1008         highest_read = search;
1009         lowest_read = search;
1010
1011         nritems = btrfs_header_nritems(node);
1012         nr = slot;
1013         while(1) {
1014                 if (direction < 0) {
1015                         if (nr == 0)
1016                                 break;
1017                         nr--;
1018                 } else if (direction > 0) {
1019                         nr++;
1020                         if (nr >= nritems)
1021                                 break;
1022                 }
1023                 if (path->reada < 0 && objectid) {
1024                         btrfs_node_key(node, &disk_key, nr);
1025                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1026                                 break;
1027                 }
1028                 search = btrfs_node_blockptr(node, nr);
1029                 if ((search >= lowest_read && search <= highest_read) ||
1030                     (search < lowest_read && lowest_read - search <= 32768) ||
1031                     (search > highest_read && search - highest_read <= 32768)) {
1032                         readahead_tree_block(root, search, blocksize);
1033                         nread += blocksize;
1034                 }
1035                 nscan++;
1036                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1037                         break;
1038                 if(nread > (1024 * 1024) || nscan > 128)
1039                         break;
1040
1041                 if (search < lowest_read)
1042                         lowest_read = search;
1043                 if (search > highest_read)
1044                         highest_read = search;
1045         }
1046 }
1047
1048 /*
1049  * look for key in the tree.  path is filled in with nodes along the way
1050  * if key is found, we return zero and you can find the item in the leaf
1051  * level of the path (level 0)
1052  *
1053  * If the key isn't found, the path points to the slot where it should
1054  * be inserted, and 1 is returned.  If there are other errors during the
1055  * search a negative error number is returned.
1056  *
1057  * if ins_len > 0, nodes and leaves will be split as we walk down the
1058  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1059  * possible)
1060  */
1061 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1062                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1063                       ins_len, int cow)
1064 {
1065         struct extent_buffer *b;
1066         u64 bytenr;
1067         u64 ptr_gen;
1068         int slot;
1069         int ret;
1070         int level;
1071         int should_reada = p->reada;
1072         u8 lowest_level = 0;
1073
1074         lowest_level = p->lowest_level;
1075         WARN_ON(lowest_level && ins_len);
1076         WARN_ON(p->nodes[0] != NULL);
1077         /*
1078         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1079         */
1080 again:
1081         b = root->node;
1082         extent_buffer_get(b);
1083         while (b) {
1084                 level = btrfs_header_level(b);
1085                 if (cow) {
1086                         int wret;
1087                         wret = btrfs_cow_block(trans, root, b,
1088                                                p->nodes[level + 1],
1089                                                p->slots[level + 1],
1090                                                &b);
1091                         if (wret) {
1092                                 free_extent_buffer(b);
1093                                 return wret;
1094                         }
1095                 }
1096                 BUG_ON(!cow && ins_len);
1097                 if (level != btrfs_header_level(b))
1098                         WARN_ON(1);
1099                 level = btrfs_header_level(b);
1100                 p->nodes[level] = b;
1101                 ret = check_block(root, p, level);
1102                 if (ret)
1103                         return -1;
1104                 ret = bin_search(b, key, level, &slot);
1105                 if (level != 0) {
1106                         if (ret && slot > 0)
1107                                 slot -= 1;
1108                         p->slots[level] = slot;
1109                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1110                             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1111                                 int sret = split_node(trans, root, p, level);
1112                                 BUG_ON(sret > 0);
1113                                 if (sret)
1114                                         return sret;
1115                                 b = p->nodes[level];
1116                                 slot = p->slots[level];
1117                         } else if (ins_len < 0) {
1118                                 int sret = balance_level(trans, root, p,
1119                                                          level);
1120                                 if (sret)
1121                                         return sret;
1122                                 b = p->nodes[level];
1123                                 if (!b) {
1124                                         btrfs_release_path(NULL, p);
1125                                         goto again;
1126                                 }
1127                                 slot = p->slots[level];
1128                                 BUG_ON(btrfs_header_nritems(b) == 1);
1129                         }
1130                         /* this is only true while dropping a snapshot */
1131                         if (level == lowest_level)
1132                                 break;
1133                         bytenr = btrfs_node_blockptr(b, slot);
1134                         ptr_gen = btrfs_node_ptr_generation(b, slot);
1135                         if (should_reada)
1136                                 reada_for_search(root, p, level, slot,
1137                                                  key->objectid);
1138                         b = read_tree_block(root, bytenr,
1139                                             btrfs_level_size(root, level - 1));
1140                         if (ptr_gen != btrfs_header_generation(b)) {
1141                                 printk("block %llu bad gen wanted %llu "
1142                                        "found %llu\n",
1143                                 (unsigned long long)b->start,
1144                                 (unsigned long long)ptr_gen,
1145                                 (unsigned long long)btrfs_header_generation(b));
1146                         }
1147                 } else {
1148                         p->slots[level] = slot;
1149                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1150                             sizeof(struct btrfs_item) + ins_len) {
1151                                 int sret = split_leaf(trans, root, key,
1152                                                       p, ins_len, ret == 0);
1153                                 BUG_ON(sret > 0);
1154                                 if (sret)
1155                                         return sret;
1156                         }
1157                         return ret;
1158                 }
1159         }
1160         return 1;
1161 }
1162
1163 /*
1164  * adjust the pointers going up the tree, starting at level
1165  * making sure the right key of each node is points to 'key'.
1166  * This is used after shifting pointers to the left, so it stops
1167  * fixing up pointers when a given leaf/node is not in slot 0 of the
1168  * higher levels
1169  *
1170  * If this fails to write a tree block, it returns -1, but continues
1171  * fixing up the blocks in ram so the tree is consistent.
1172  */
1173 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1174                           struct btrfs_root *root, struct btrfs_path *path,
1175                           struct btrfs_disk_key *key, int level)
1176 {
1177         int i;
1178         int ret = 0;
1179         struct extent_buffer *t;
1180
1181         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1182                 int tslot = path->slots[i];
1183                 if (!path->nodes[i])
1184                         break;
1185                 t = path->nodes[i];
1186                 btrfs_set_node_key(t, key, tslot);
1187                 btrfs_mark_buffer_dirty(path->nodes[i]);
1188                 if (tslot != 0)
1189                         break;
1190         }
1191         return ret;
1192 }
1193
1194 /*
1195  * try to push data from one node into the next node left in the
1196  * tree.
1197  *
1198  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1199  * error, and > 0 if there was no room in the left hand block.
1200  */
1201 static int push_node_left(struct btrfs_trans_handle *trans,
1202                           struct btrfs_root *root, struct extent_buffer *dst,
1203                           struct extent_buffer *src)
1204 {
1205         int push_items = 0;
1206         int src_nritems;
1207         int dst_nritems;
1208         int ret = 0;
1209
1210         src_nritems = btrfs_header_nritems(src);
1211         dst_nritems = btrfs_header_nritems(dst);
1212         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1213         WARN_ON(btrfs_header_generation(src) != trans->transid);
1214         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1215
1216         if (push_items <= 0) {
1217                 return 1;
1218         }
1219
1220         if (src_nritems < push_items)
1221                 push_items = src_nritems;
1222
1223         copy_extent_buffer(dst, src,
1224                            btrfs_node_key_ptr_offset(dst_nritems),
1225                            btrfs_node_key_ptr_offset(0),
1226                            push_items * sizeof(struct btrfs_key_ptr));
1227
1228         if (push_items < src_nritems) {
1229                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1230                                       btrfs_node_key_ptr_offset(push_items),
1231                                       (src_nritems - push_items) *
1232                                       sizeof(struct btrfs_key_ptr));
1233         }
1234         btrfs_set_header_nritems(src, src_nritems - push_items);
1235         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1236         btrfs_mark_buffer_dirty(src);
1237         btrfs_mark_buffer_dirty(dst);
1238         return ret;
1239 }
1240
1241 /*
1242  * try to push data from one node into the next node right in the
1243  * tree.
1244  *
1245  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1246  * error, and > 0 if there was no room in the right hand block.
1247  *
1248  * this will  only push up to 1/2 the contents of the left node over
1249  */
1250 static int balance_node_right(struct btrfs_trans_handle *trans,
1251                               struct btrfs_root *root,
1252                               struct extent_buffer *dst,
1253                               struct extent_buffer *src)
1254 {
1255         int push_items = 0;
1256         int max_push;
1257         int src_nritems;
1258         int dst_nritems;
1259         int ret = 0;
1260
1261         WARN_ON(btrfs_header_generation(src) != trans->transid);
1262         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1263
1264         src_nritems = btrfs_header_nritems(src);
1265         dst_nritems = btrfs_header_nritems(dst);
1266         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1267         if (push_items <= 0)
1268                 return 1;
1269
1270         max_push = src_nritems / 2 + 1;
1271         /* don't try to empty the node */
1272         if (max_push >= src_nritems)
1273                 return 1;
1274
1275         if (max_push < push_items)
1276                 push_items = max_push;
1277
1278         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1279                                       btrfs_node_key_ptr_offset(0),
1280                                       (dst_nritems) *
1281                                       sizeof(struct btrfs_key_ptr));
1282
1283         copy_extent_buffer(dst, src,
1284                            btrfs_node_key_ptr_offset(0),
1285                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1286                            push_items * sizeof(struct btrfs_key_ptr));
1287
1288         btrfs_set_header_nritems(src, src_nritems - push_items);
1289         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1290
1291         btrfs_mark_buffer_dirty(src);
1292         btrfs_mark_buffer_dirty(dst);
1293         return ret;
1294 }
1295
1296 /*
1297  * helper function to insert a new root level in the tree.
1298  * A new node is allocated, and a single item is inserted to
1299  * point to the existing root
1300  *
1301  * returns zero on success or < 0 on failure.
1302  */
1303 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1304                            struct btrfs_root *root,
1305                            struct btrfs_path *path, int level)
1306 {
1307         u64 root_gen;
1308         u64 lower_gen;
1309         struct extent_buffer *lower;
1310         struct extent_buffer *c;
1311         struct btrfs_disk_key lower_key;
1312
1313         BUG_ON(path->nodes[level]);
1314         BUG_ON(path->nodes[level-1] != root->node);
1315
1316         if (root->ref_cows)
1317                 root_gen = trans->transid;
1318         else
1319                 root_gen = 0;
1320
1321         lower = path->nodes[level-1];
1322         if (level == 1)
1323                 btrfs_item_key(lower, &lower_key, 0);
1324         else
1325                 btrfs_node_key(lower, &lower_key, 0);
1326
1327         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1328                                    root->root_key.objectid,
1329                                    root_gen, lower_key.objectid, level,
1330                                    root->node->start, 0);
1331         if (IS_ERR(c))
1332                 return PTR_ERR(c);
1333         memset_extent_buffer(c, 0, 0, root->nodesize);
1334         btrfs_set_header_nritems(c, 1);
1335         btrfs_set_header_level(c, level);
1336         btrfs_set_header_bytenr(c, c->start);
1337         btrfs_set_header_generation(c, trans->transid);
1338         btrfs_set_header_owner(c, root->root_key.objectid);
1339
1340         write_extent_buffer(c, root->fs_info->fsid,
1341                             (unsigned long)btrfs_header_fsid(c),
1342                             BTRFS_FSID_SIZE);
1343
1344         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
1345                             (unsigned long)btrfs_header_chunk_tree_uuid(c),
1346                             BTRFS_UUID_SIZE);
1347
1348         btrfs_set_node_key(c, &lower_key, 0);
1349         btrfs_set_node_blockptr(c, 0, lower->start);
1350         lower_gen = btrfs_header_generation(lower);
1351         WARN_ON(lower_gen == 0);
1352
1353         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1354
1355         btrfs_mark_buffer_dirty(c);
1356
1357         /* the super has an extra ref to root->node */
1358         free_extent_buffer(root->node);
1359         root->node = c;
1360         add_root_to_dirty_list(root);
1361         extent_buffer_get(c);
1362         path->nodes[level] = c;
1363         path->slots[level] = 0;
1364
1365         if (root->ref_cows && lower_gen != trans->transid) {
1366                 struct btrfs_path *back_path = btrfs_alloc_path();
1367                 int ret;
1368                 ret = btrfs_insert_extent_backref(trans,
1369                                                   root->fs_info->extent_root,
1370                                                   path, lower->start,
1371                                                   root->root_key.objectid,
1372                                                   trans->transid, 0, 0);
1373                 BUG_ON(ret);
1374                 btrfs_free_path(back_path);
1375         }
1376         return 0;
1377 }
1378
1379 /*
1380  * worker function to insert a single pointer in a node.
1381  * the node should have enough room for the pointer already
1382  *
1383  * slot and level indicate where you want the key to go, and
1384  * blocknr is the block the key points to.
1385  *
1386  * returns zero on success and < 0 on any error
1387  */
1388 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1389                       *root, struct btrfs_path *path, struct btrfs_disk_key
1390                       *key, u64 bytenr, int slot, int level)
1391 {
1392         struct extent_buffer *lower;
1393         int nritems;
1394
1395         BUG_ON(!path->nodes[level]);
1396         lower = path->nodes[level];
1397         nritems = btrfs_header_nritems(lower);
1398         if (slot > nritems)
1399                 BUG();
1400         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1401                 BUG();
1402         if (slot != nritems) {
1403                 memmove_extent_buffer(lower,
1404                               btrfs_node_key_ptr_offset(slot + 1),
1405                               btrfs_node_key_ptr_offset(slot),
1406                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1407         }
1408         btrfs_set_node_key(lower, key, slot);
1409         btrfs_set_node_blockptr(lower, slot, bytenr);
1410         WARN_ON(trans->transid == 0);
1411         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1412         btrfs_set_header_nritems(lower, nritems + 1);
1413         btrfs_mark_buffer_dirty(lower);
1414         return 0;
1415 }
1416
1417 /*
1418  * split the node at the specified level in path in two.
1419  * The path is corrected to point to the appropriate node after the split
1420  *
1421  * Before splitting this tries to make some room in the node by pushing
1422  * left and right, if either one works, it returns right away.
1423  *
1424  * returns 0 on success and < 0 on failure
1425  */
1426 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1427                       *root, struct btrfs_path *path, int level)
1428 {
1429         u64 root_gen;
1430         struct extent_buffer *c;
1431         struct extent_buffer *split;
1432         struct btrfs_disk_key disk_key;
1433         int mid;
1434         int ret;
1435         int wret;
1436         u32 c_nritems;
1437
1438         c = path->nodes[level];
1439         WARN_ON(btrfs_header_generation(c) != trans->transid);
1440         if (c == root->node) {
1441                 /* trying to split the root, lets make a new one */
1442                 ret = insert_new_root(trans, root, path, level + 1);
1443                 if (ret)
1444                         return ret;
1445         } else {
1446                 ret = push_nodes_for_insert(trans, root, path, level);
1447                 c = path->nodes[level];
1448                 if (!ret && btrfs_header_nritems(c) <
1449                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
1450                         return 0;
1451                 if (ret < 0)
1452                         return ret;
1453         }
1454
1455         c_nritems = btrfs_header_nritems(c);
1456         if (root->ref_cows)
1457                 root_gen = trans->transid;
1458         else
1459                 root_gen = 0;
1460
1461         btrfs_node_key(c, &disk_key, 0);
1462         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1463                                          root->root_key.objectid,
1464                                          root_gen,
1465                                          btrfs_disk_key_objectid(&disk_key),
1466                                          level, c->start, 0);
1467         if (IS_ERR(split))
1468                 return PTR_ERR(split);
1469
1470         btrfs_set_header_flags(split, btrfs_header_flags(c));
1471         btrfs_set_header_level(split, btrfs_header_level(c));
1472         btrfs_set_header_bytenr(split, split->start);
1473         btrfs_set_header_generation(split, trans->transid);
1474         btrfs_set_header_owner(split, root->root_key.objectid);
1475         btrfs_set_header_flags(split, 0);
1476         write_extent_buffer(split, root->fs_info->fsid,
1477                             (unsigned long)btrfs_header_fsid(split),
1478                             BTRFS_FSID_SIZE);
1479         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
1480                             (unsigned long)btrfs_header_chunk_tree_uuid(split),
1481                             BTRFS_UUID_SIZE);
1482
1483         mid = (c_nritems + 1) / 2;
1484
1485         copy_extent_buffer(split, c,
1486                            btrfs_node_key_ptr_offset(0),
1487                            btrfs_node_key_ptr_offset(mid),
1488                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1489         btrfs_set_header_nritems(split, c_nritems - mid);
1490         btrfs_set_header_nritems(c, mid);
1491         ret = 0;
1492
1493         btrfs_mark_buffer_dirty(c);
1494         btrfs_mark_buffer_dirty(split);
1495
1496         btrfs_node_key(split, &disk_key, 0);
1497         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1498                           path->slots[level + 1] + 1,
1499                           level + 1);
1500         if (wret)
1501                 ret = wret;
1502
1503         if (path->slots[level] >= mid) {
1504                 path->slots[level] -= mid;
1505                 free_extent_buffer(c);
1506                 path->nodes[level] = split;
1507                 path->slots[level + 1] += 1;
1508         } else {
1509                 free_extent_buffer(split);
1510         }
1511         return ret;
1512 }
1513
1514 /*
1515  * how many bytes are required to store the items in a leaf.  start
1516  * and nr indicate which items in the leaf to check.  This totals up the
1517  * space used both by the item structs and the item data
1518  */
1519 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1520 {
1521         int data_len;
1522         int nritems = btrfs_header_nritems(l);
1523         int end = min(nritems, start + nr) - 1;
1524
1525         if (!nr)
1526                 return 0;
1527         data_len = btrfs_item_end_nr(l, start);
1528         data_len = data_len - btrfs_item_offset_nr(l, end);
1529         data_len += sizeof(struct btrfs_item) * nr;
1530         WARN_ON(data_len < 0);
1531         return data_len;
1532 }
1533
1534 /*
1535  * The space between the end of the leaf items and
1536  * the start of the leaf data.  IOW, how much room
1537  * the leaf has left for both items and data
1538  */
1539 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1540 {
1541         int nritems = btrfs_header_nritems(leaf);
1542         int ret;
1543         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1544         if (ret < 0) {
1545                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1546                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1547                        leaf_space_used(leaf, 0, nritems), nritems);
1548         }
1549         return ret;
1550 }
1551
1552 /*
1553  * push some data in the path leaf to the right, trying to free up at
1554  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1555  *
1556  * returns 1 if the push failed because the other node didn't have enough
1557  * room, 0 if everything worked out and < 0 if there were major errors.
1558  */
1559 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1560                            *root, struct btrfs_path *path, int data_size,
1561                            int empty)
1562 {
1563         struct extent_buffer *left = path->nodes[0];
1564         struct extent_buffer *right;
1565         struct extent_buffer *upper;
1566         struct btrfs_disk_key disk_key;
1567         int slot;
1568         u32 i;
1569         int free_space;
1570         int push_space = 0;
1571         int push_items = 0;
1572         struct btrfs_item *item;
1573         u32 left_nritems;
1574         u32 nr;
1575         u32 right_nritems;
1576         u32 data_end;
1577         u32 this_item_size;
1578         int ret;
1579
1580         slot = path->slots[1];
1581         if (!path->nodes[1]) {
1582                 return 1;
1583         }
1584         upper = path->nodes[1];
1585         if (slot >= btrfs_header_nritems(upper) - 1)
1586                 return 1;
1587
1588         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1589                                 root->leafsize);
1590         free_space = btrfs_leaf_free_space(root, right);
1591         if (free_space < data_size + sizeof(struct btrfs_item)) {
1592                 free_extent_buffer(right);
1593                 return 1;
1594         }
1595
1596         /* cow and double check */
1597         ret = btrfs_cow_block(trans, root, right, upper,
1598                               slot + 1, &right);
1599         if (ret) {
1600                 free_extent_buffer(right);
1601                 return 1;
1602         }
1603         free_space = btrfs_leaf_free_space(root, right);
1604         if (free_space < data_size + sizeof(struct btrfs_item)) {
1605                 free_extent_buffer(right);
1606                 return 1;
1607         }
1608
1609         left_nritems = btrfs_header_nritems(left);
1610         if (left_nritems == 0) {
1611                 free_extent_buffer(right);
1612                 return 1;
1613         }
1614
1615         if (empty)
1616                 nr = 0;
1617         else
1618                 nr = 1;
1619
1620         i = left_nritems - 1;
1621         while (i >= nr) {
1622                 item = btrfs_item_nr(left, i);
1623
1624                 if (path->slots[0] == i)
1625                         push_space += data_size + sizeof(*item);
1626
1627                 this_item_size = btrfs_item_size(left, item);
1628                 if (this_item_size + sizeof(*item) + push_space > free_space)
1629                         break;
1630                 push_items++;
1631                 push_space += this_item_size + sizeof(*item);
1632                 if (i == 0)
1633                         break;
1634                 i--;
1635         }
1636
1637         if (push_items == 0) {
1638                 free_extent_buffer(right);
1639                 return 1;
1640         }
1641
1642         if (!empty && push_items == left_nritems)
1643                 WARN_ON(1);
1644
1645         /* push left to right */
1646         right_nritems = btrfs_header_nritems(right);
1647
1648         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1649         push_space -= leaf_data_end(root, left);
1650
1651         /* make room in the right data area */
1652         data_end = leaf_data_end(root, right);
1653         memmove_extent_buffer(right,
1654                               btrfs_leaf_data(right) + data_end - push_space,
1655                               btrfs_leaf_data(right) + data_end,
1656                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1657
1658         /* copy from the left data area */
1659         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1660                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1661                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1662                      push_space);
1663
1664         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1665                               btrfs_item_nr_offset(0),
1666                               right_nritems * sizeof(struct btrfs_item));
1667
1668         /* copy the items from left to right */
1669         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1670                    btrfs_item_nr_offset(left_nritems - push_items),
1671                    push_items * sizeof(struct btrfs_item));
1672
1673         /* update the item pointers */
1674         right_nritems += push_items;
1675         btrfs_set_header_nritems(right, right_nritems);
1676         push_space = BTRFS_LEAF_DATA_SIZE(root);
1677         for (i = 0; i < right_nritems; i++) {
1678                 item = btrfs_item_nr(right, i);
1679                 push_space -= btrfs_item_size(right, item);
1680                 btrfs_set_item_offset(right, item, push_space);
1681         }
1682
1683         left_nritems -= push_items;
1684         btrfs_set_header_nritems(left, left_nritems);
1685
1686         if (left_nritems)
1687                 btrfs_mark_buffer_dirty(left);
1688         btrfs_mark_buffer_dirty(right);
1689
1690         btrfs_item_key(right, &disk_key, 0);
1691         btrfs_set_node_key(upper, &disk_key, slot + 1);
1692         btrfs_mark_buffer_dirty(upper);
1693
1694         /* then fixup the leaf pointer in the path */
1695         if (path->slots[0] >= left_nritems) {
1696                 path->slots[0] -= left_nritems;
1697                 free_extent_buffer(path->nodes[0]);
1698                 path->nodes[0] = right;
1699                 path->slots[1] += 1;
1700         } else {
1701                 free_extent_buffer(right);
1702         }
1703         return 0;
1704 }
1705 /*
1706  * push some data in the path leaf to the left, trying to free up at
1707  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1708  */
1709 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1710                           *root, struct btrfs_path *path, int data_size,
1711                           int empty)
1712 {
1713         struct btrfs_disk_key disk_key;
1714         struct extent_buffer *right = path->nodes[0];
1715         struct extent_buffer *left;
1716         int slot;
1717         int i;
1718         int free_space;
1719         int push_space = 0;
1720         int push_items = 0;
1721         struct btrfs_item *item;
1722         u32 old_left_nritems;
1723         u32 right_nritems;
1724         u32 nr;
1725         int ret = 0;
1726         int wret;
1727         u32 this_item_size;
1728         u32 old_left_item_size;
1729
1730         slot = path->slots[1];
1731         if (slot == 0)
1732                 return 1;
1733         if (!path->nodes[1])
1734                 return 1;
1735
1736         right_nritems = btrfs_header_nritems(right);
1737         if (right_nritems == 0) {
1738                 return 1;
1739         }
1740
1741         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1742                                slot - 1), root->leafsize);
1743         free_space = btrfs_leaf_free_space(root, left);
1744         if (free_space < data_size + sizeof(struct btrfs_item)) {
1745                 free_extent_buffer(left);
1746                 return 1;
1747         }
1748
1749         /* cow and double check */
1750         ret = btrfs_cow_block(trans, root, left,
1751                               path->nodes[1], slot - 1, &left);
1752         if (ret) {
1753                 /* we hit -ENOSPC, but it isn't fatal here */
1754                 free_extent_buffer(left);
1755                 return 1;
1756         }
1757
1758         free_space = btrfs_leaf_free_space(root, left);
1759         if (free_space < data_size + sizeof(struct btrfs_item)) {
1760                 free_extent_buffer(left);
1761                 return 1;
1762         }
1763
1764         if (empty)
1765                 nr = right_nritems;
1766         else
1767                 nr = right_nritems - 1;
1768
1769         for (i = 0; i < nr; i++) {
1770                 item = btrfs_item_nr(right, i);
1771
1772                 if (path->slots[0] == i)
1773                         push_space += data_size + sizeof(*item);
1774
1775                 this_item_size = btrfs_item_size(right, item);
1776                 if (this_item_size + sizeof(*item) + push_space > free_space)
1777                         break;
1778
1779                 push_items++;
1780                 push_space += this_item_size + sizeof(*item);
1781         }
1782
1783         if (push_items == 0) {
1784                 free_extent_buffer(left);
1785                 return 1;
1786         }
1787         if (!empty && push_items == btrfs_header_nritems(right))
1788                 WARN_ON(1);
1789
1790         /* push data from right to left */
1791         copy_extent_buffer(left, right,
1792                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1793                            btrfs_item_nr_offset(0),
1794                            push_items * sizeof(struct btrfs_item));
1795
1796         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1797                      btrfs_item_offset_nr(right, push_items -1);
1798
1799         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1800                      leaf_data_end(root, left) - push_space,
1801                      btrfs_leaf_data(right) +
1802                      btrfs_item_offset_nr(right, push_items - 1),
1803                      push_space);
1804         old_left_nritems = btrfs_header_nritems(left);
1805         BUG_ON(old_left_nritems < 0);
1806
1807         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1808         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1809                 u32 ioff;
1810
1811                 item = btrfs_item_nr(left, i);
1812                 ioff = btrfs_item_offset(left, item);
1813                 btrfs_set_item_offset(left, item,
1814                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1815         }
1816         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1817
1818         /* fixup right node */
1819         if (push_items > right_nritems) {
1820                 printk("push items %d nr %u\n", push_items, right_nritems);
1821                 WARN_ON(1);
1822         }
1823
1824         if (push_items < right_nritems) {
1825                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1826                                                   leaf_data_end(root, right);
1827                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1828                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1829                                       btrfs_leaf_data(right) +
1830                                       leaf_data_end(root, right), push_space);
1831
1832                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1833                               btrfs_item_nr_offset(push_items),
1834                              (btrfs_header_nritems(right) - push_items) *
1835                              sizeof(struct btrfs_item));
1836         }
1837         right_nritems -= push_items;
1838         btrfs_set_header_nritems(right, right_nritems);
1839         push_space = BTRFS_LEAF_DATA_SIZE(root);
1840         for (i = 0; i < right_nritems; i++) {
1841                 item = btrfs_item_nr(right, i);
1842                 push_space = push_space - btrfs_item_size(right, item);
1843                 btrfs_set_item_offset(right, item, push_space);
1844         }
1845
1846         btrfs_mark_buffer_dirty(left);
1847         if (right_nritems)
1848                 btrfs_mark_buffer_dirty(right);
1849
1850         btrfs_item_key(right, &disk_key, 0);
1851         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1852         if (wret)
1853                 ret = wret;
1854
1855         /* then fixup the leaf pointer in the path */
1856         if (path->slots[0] < push_items) {
1857                 path->slots[0] += old_left_nritems;
1858                 free_extent_buffer(path->nodes[0]);
1859                 path->nodes[0] = left;
1860                 path->slots[1] -= 1;
1861         } else {
1862                 free_extent_buffer(left);
1863                 path->slots[0] -= push_items;
1864         }
1865         BUG_ON(path->slots[0] < 0);
1866         return ret;
1867 }
1868
1869 /*
1870  * split the path's leaf in two, making sure there is at least data_size
1871  * available for the resulting leaf level of the path.
1872  *
1873  * returns 0 if all went well and < 0 on failure.
1874  */
1875 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1876                       *root, struct btrfs_key *ins_key,
1877                       struct btrfs_path *path, int data_size, int extend)
1878 {
1879         u64 root_gen;
1880         struct extent_buffer *l;
1881         u32 nritems;
1882         int mid;
1883         int slot;
1884         struct extent_buffer *right;
1885         int space_needed = data_size + sizeof(struct btrfs_item);
1886         int data_copy_size;
1887         int rt_data_off;
1888         int i;
1889         int ret = 0;
1890         int wret;
1891         int double_split;
1892         int num_doubles = 0;
1893         struct btrfs_disk_key disk_key;
1894
1895         if (extend)
1896                 space_needed = data_size;
1897
1898         if (root->ref_cows)
1899                 root_gen = trans->transid;
1900         else
1901                 root_gen = 0;
1902
1903         /* first try to make some room by pushing left and right */
1904         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1905                 wret = push_leaf_right(trans, root, path, data_size, 0);
1906                 if (wret < 0) {
1907                         return wret;
1908                 }
1909                 if (wret) {
1910                         wret = push_leaf_left(trans, root, path, data_size, 0);
1911                         if (wret < 0)
1912                                 return wret;
1913                 }
1914                 l = path->nodes[0];
1915
1916                 /* did the pushes work? */
1917                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1918                         return 0;
1919         }
1920
1921         if (!path->nodes[1]) {
1922                 ret = insert_new_root(trans, root, path, 1);
1923                 if (ret)
1924                         return ret;
1925         }
1926 again:
1927         double_split = 0;
1928         l = path->nodes[0];
1929         slot = path->slots[0];
1930         nritems = btrfs_header_nritems(l);
1931         mid = (nritems + 1)/ 2;
1932
1933         btrfs_item_key(l, &disk_key, 0);
1934
1935         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
1936                                          root->root_key.objectid,
1937                                          root_gen, disk_key.objectid, 0,
1938                                          l->start, 0);
1939         if (IS_ERR(right)) {
1940                 BUG_ON(1);
1941                 return PTR_ERR(right);
1942         }
1943
1944         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1945         btrfs_set_header_bytenr(right, right->start);
1946         btrfs_set_header_generation(right, trans->transid);
1947         btrfs_set_header_owner(right, root->root_key.objectid);
1948         btrfs_set_header_level(right, 0);
1949         write_extent_buffer(right, root->fs_info->fsid,
1950                             (unsigned long)btrfs_header_fsid(right),
1951                             BTRFS_FSID_SIZE);
1952
1953         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
1954                             (unsigned long)btrfs_header_chunk_tree_uuid(right),
1955                             BTRFS_UUID_SIZE);
1956         if (mid <= slot) {
1957                 if (nritems == 1 ||
1958                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1959                         BTRFS_LEAF_DATA_SIZE(root)) {
1960                         if (slot >= nritems) {
1961                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1962                                 btrfs_set_header_nritems(right, 0);
1963                                 wret = insert_ptr(trans, root, path,
1964                                                   &disk_key, right->start,
1965                                                   path->slots[1] + 1, 1);
1966                                 if (wret)
1967                                         ret = wret;
1968                                 free_extent_buffer(path->nodes[0]);
1969                                 path->nodes[0] = right;
1970                                 path->slots[0] = 0;
1971                                 path->slots[1] += 1;
1972                                 return ret;
1973                         }
1974                         mid = slot;
1975                         if (mid != nritems &&
1976                             leaf_space_used(l, mid, nritems - mid) +
1977                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1978                                 double_split = 1;
1979                         }
1980                 }
1981         } else {
1982                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1983                         BTRFS_LEAF_DATA_SIZE(root)) {
1984                         if (!extend && slot == 0) {
1985                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1986                                 btrfs_set_header_nritems(right, 0);
1987                                 wret = insert_ptr(trans, root, path,
1988                                                   &disk_key,
1989                                                   right->start,
1990                                                   path->slots[1], 1);
1991                                 if (wret)
1992                                         ret = wret;
1993                                 free_extent_buffer(path->nodes[0]);
1994                                 path->nodes[0] = right;
1995                                 path->slots[0] = 0;
1996                                 if (path->slots[1] == 0) {
1997                                         wret = fixup_low_keys(trans, root,
1998                                                    path, &disk_key, 1);
1999                                         if (wret)
2000                                                 ret = wret;
2001                                 }
2002                                 return ret;
2003                         } else if (extend && slot == 0) {
2004                                 mid = 1;
2005                         } else {
2006                                 mid = slot;
2007                                 if (mid != nritems &&
2008                                     leaf_space_used(l, mid, nritems - mid) +
2009                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2010                                         double_split = 1;
2011                                 }
2012                         }
2013                 }
2014         }
2015         nritems = nritems - mid;
2016         btrfs_set_header_nritems(right, nritems);
2017         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2018
2019         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2020                            btrfs_item_nr_offset(mid),
2021                            nritems * sizeof(struct btrfs_item));
2022
2023         copy_extent_buffer(right, l,
2024                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2025                      data_copy_size, btrfs_leaf_data(l) +
2026                      leaf_data_end(root, l), data_copy_size);
2027
2028         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2029                       btrfs_item_end_nr(l, mid);
2030
2031         for (i = 0; i < nritems; i++) {
2032                 struct btrfs_item *item = btrfs_item_nr(right, i);
2033                 u32 ioff = btrfs_item_offset(right, item);
2034                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2035         }
2036
2037         btrfs_set_header_nritems(l, mid);
2038         ret = 0;
2039         btrfs_item_key(right, &disk_key, 0);
2040         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2041                           path->slots[1] + 1, 1);
2042         if (wret)
2043                 ret = wret;
2044
2045         btrfs_mark_buffer_dirty(right);
2046         btrfs_mark_buffer_dirty(l);
2047         BUG_ON(path->slots[0] != slot);
2048
2049         if (mid <= slot) {
2050                 free_extent_buffer(path->nodes[0]);
2051                 path->nodes[0] = right;
2052                 path->slots[0] -= mid;
2053                 path->slots[1] += 1;
2054         } else
2055                 free_extent_buffer(right);
2056
2057         BUG_ON(path->slots[0] < 0);
2058
2059         if (double_split) {
2060                 BUG_ON(num_doubles != 0);
2061                 num_doubles++;
2062                 goto again;
2063         }
2064         return ret;
2065 }
2066
2067 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2068                         struct btrfs_root *root,
2069                         struct btrfs_path *path,
2070                         u32 new_size, int from_end)
2071 {
2072         int ret = 0;
2073         int slot;
2074         int slot_orig;
2075         struct extent_buffer *leaf;
2076         struct btrfs_item *item;
2077         u32 nritems;
2078         unsigned int data_end;
2079         unsigned int old_data_start;
2080         unsigned int old_size;
2081         unsigned int size_diff;
2082         int i;
2083
2084         slot_orig = path->slots[0];
2085         leaf = path->nodes[0];
2086         slot = path->slots[0];
2087
2088         old_size = btrfs_item_size_nr(leaf, slot);
2089         if (old_size == new_size)
2090                 return 0;
2091
2092         nritems = btrfs_header_nritems(leaf);
2093         data_end = leaf_data_end(root, leaf);
2094
2095         old_data_start = btrfs_item_offset_nr(leaf, slot);
2096
2097         size_diff = old_size - new_size;
2098
2099         BUG_ON(slot < 0);
2100         BUG_ON(slot >= nritems);
2101
2102         /*
2103          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2104          */
2105         /* first correct the data pointers */
2106         for (i = slot; i < nritems; i++) {
2107                 u32 ioff;
2108                 item = btrfs_item_nr(leaf, i);
2109                 ioff = btrfs_item_offset(leaf, item);
2110                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2111         }
2112
2113         /* shift the data */
2114         if (from_end) {
2115                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2116                               data_end + size_diff, btrfs_leaf_data(leaf) +
2117                               data_end, old_data_start + new_size - data_end);
2118         } else {
2119                 struct btrfs_disk_key disk_key;
2120                 u64 offset;
2121
2122                 btrfs_item_key(leaf, &disk_key, slot);
2123
2124                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2125                         unsigned long ptr;
2126                         struct btrfs_file_extent_item *fi;
2127
2128                         fi = btrfs_item_ptr(leaf, slot,
2129                                             struct btrfs_file_extent_item);
2130                         fi = (struct btrfs_file_extent_item *)(
2131                              (unsigned long)fi - size_diff);
2132
2133                         if (btrfs_file_extent_type(leaf, fi) ==
2134                             BTRFS_FILE_EXTENT_INLINE) {
2135                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2136                                 memmove_extent_buffer(leaf, ptr,
2137                                         (unsigned long)fi,
2138                                         offsetof(struct btrfs_file_extent_item,
2139                                                  disk_bytenr));
2140                         }
2141                 }
2142
2143                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2144                               data_end + size_diff, btrfs_leaf_data(leaf) +
2145                               data_end, old_data_start - data_end);
2146
2147                 offset = btrfs_disk_key_offset(&disk_key);
2148                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2149                 btrfs_set_item_key(leaf, &disk_key, slot);
2150                 if (slot == 0)
2151                         fixup_low_keys(trans, root, path, &disk_key, 1);
2152         }
2153
2154         item = btrfs_item_nr(leaf, slot);
2155         btrfs_set_item_size(leaf, item, new_size);
2156         btrfs_mark_buffer_dirty(leaf);
2157
2158         ret = 0;
2159         if (btrfs_leaf_free_space(root, leaf) < 0) {
2160                 btrfs_print_leaf(root, leaf);
2161                 BUG();
2162         }
2163         return ret;
2164 }
2165
2166 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2167                       struct btrfs_root *root, struct btrfs_path *path,
2168                       u32 data_size)
2169 {
2170         int ret = 0;
2171         int slot;
2172         int slot_orig;
2173         struct extent_buffer *leaf;
2174         struct btrfs_item *item;
2175         u32 nritems;
2176         unsigned int data_end;
2177         unsigned int old_data;
2178         unsigned int old_size;
2179         int i;
2180
2181         slot_orig = path->slots[0];
2182         leaf = path->nodes[0];
2183
2184         nritems = btrfs_header_nritems(leaf);
2185         data_end = leaf_data_end(root, leaf);
2186
2187         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2188                 btrfs_print_leaf(root, leaf);
2189                 BUG();
2190         }
2191         slot = path->slots[0];
2192         old_data = btrfs_item_end_nr(leaf, slot);
2193
2194         BUG_ON(slot < 0);
2195         if (slot >= nritems) {
2196                 btrfs_print_leaf(root, leaf);
2197                 printk("slot %d too large, nritems %d\n", slot, nritems);
2198                 BUG_ON(1);
2199         }
2200
2201         /*
2202          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2203          */
2204         /* first correct the data pointers */
2205         for (i = slot; i < nritems; i++) {
2206                 u32 ioff;
2207                 item = btrfs_item_nr(leaf, i);
2208                 ioff = btrfs_item_offset(leaf, item);
2209                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2210         }
2211
2212         /* shift the data */
2213         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2214                       data_end - data_size, btrfs_leaf_data(leaf) +
2215                       data_end, old_data - data_end);
2216
2217         data_end = old_data;
2218         old_size = btrfs_item_size_nr(leaf, slot);
2219         item = btrfs_item_nr(leaf, slot);
2220         btrfs_set_item_size(leaf, item, old_size + data_size);
2221         btrfs_mark_buffer_dirty(leaf);
2222
2223         ret = 0;
2224         if (btrfs_leaf_free_space(root, leaf) < 0) {
2225                 btrfs_print_leaf(root, leaf);
2226                 BUG();
2227         }
2228         return ret;
2229 }
2230
2231 /*
2232  * Given a key and some data, insert an item into the tree.
2233  * This does all the path init required, making room in the tree if needed.
2234  */
2235 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2236                             struct btrfs_root *root,
2237                             struct btrfs_path *path,
2238                             struct btrfs_key *cpu_key, u32 *data_size,
2239                             int nr)
2240 {
2241         struct extent_buffer *leaf;
2242         struct btrfs_item *item;
2243         int ret = 0;
2244         int slot;
2245         int slot_orig;
2246         int i;
2247         u32 nritems;
2248         u32 total_size = 0;
2249         u32 total_data = 0;
2250         unsigned int data_end;
2251         struct btrfs_disk_key disk_key;
2252
2253         for (i = 0; i < nr; i++) {
2254                 total_data += data_size[i];
2255         }
2256
2257         /* create a root if there isn't one */
2258         if (!root->node)
2259                 BUG();
2260
2261         total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
2262         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2263         if (ret == 0) {
2264                 return -EEXIST;
2265         }
2266         if (ret < 0)
2267                 goto out;
2268
2269         slot_orig = path->slots[0];
2270         leaf = path->nodes[0];
2271
2272         nritems = btrfs_header_nritems(leaf);
2273         data_end = leaf_data_end(root, leaf);
2274
2275         if (btrfs_leaf_free_space(root, leaf) <
2276             sizeof(struct btrfs_item) + total_size) {
2277                 btrfs_print_leaf(root, leaf);
2278                 printk("not enough freespace need %u have %d\n",
2279                        total_size, btrfs_leaf_free_space(root, leaf));
2280                 BUG();
2281         }
2282
2283         slot = path->slots[0];
2284         BUG_ON(slot < 0);
2285
2286         if (slot != nritems) {
2287                 int i;
2288                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2289
2290                 if (old_data < data_end) {
2291                         btrfs_print_leaf(root, leaf);
2292                         printk("slot %d old_data %d data_end %d\n",
2293                                slot, old_data, data_end);
2294                         BUG_ON(1);
2295                 }
2296                 /*
2297                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2298                  */
2299                 /* first correct the data pointers */
2300                 for (i = slot; i < nritems; i++) {
2301                         u32 ioff;
2302
2303                         item = btrfs_item_nr(leaf, i);
2304                         ioff = btrfs_item_offset(leaf, item);
2305                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2306                 }
2307
2308                 /* shift the items */
2309                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2310                               btrfs_item_nr_offset(slot),
2311                               (nritems - slot) * sizeof(struct btrfs_item));
2312
2313                 /* shift the data */
2314                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2315                               data_end - total_data, btrfs_leaf_data(leaf) +
2316                               data_end, old_data - data_end);
2317                 data_end = old_data;
2318         }
2319
2320         /* setup the item for the new data */
2321         for (i = 0; i < nr; i++) {
2322                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2323                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2324                 item = btrfs_item_nr(leaf, slot + i);
2325                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2326                 data_end -= data_size[i];
2327                 btrfs_set_item_size(leaf, item, data_size[i]);
2328         }
2329         btrfs_set_header_nritems(leaf, nritems + nr);
2330         btrfs_mark_buffer_dirty(leaf);
2331
2332         ret = 0;
2333         if (slot == 0) {
2334                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2335                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2336         }
2337
2338         if (btrfs_leaf_free_space(root, leaf) < 0) {
2339                 btrfs_print_leaf(root, leaf);
2340                 BUG();
2341         }
2342
2343 out:
2344         return ret;
2345 }
2346
2347 /*
2348  * Given a key and some data, insert an item into the tree.
2349  * This does all the path init required, making room in the tree if needed.
2350  */
2351 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2352                       *root, struct btrfs_key *cpu_key, void *data, u32
2353                       data_size)
2354 {
2355         int ret = 0;
2356         struct btrfs_path *path;
2357         struct extent_buffer *leaf;
2358         unsigned long ptr;
2359
2360         path = btrfs_alloc_path();
2361         BUG_ON(!path);
2362         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2363         if (!ret) {
2364                 leaf = path->nodes[0];
2365                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2366                 write_extent_buffer(leaf, data, ptr, data_size);
2367                 btrfs_mark_buffer_dirty(leaf);
2368         }
2369         btrfs_free_path(path);
2370         return ret;
2371 }
2372
2373 /*
2374  * delete the pointer from a given node.
2375  *
2376  * If the delete empties a node, the node is removed from the tree,
2377  * continuing all the way the root if required.  The root is converted into
2378  * a leaf if all the nodes are emptied.
2379  */
2380 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2381                    struct btrfs_path *path, int level, int slot)
2382 {
2383         struct extent_buffer *parent = path->nodes[level];
2384         u32 nritems;
2385         int ret = 0;
2386         int wret;
2387
2388         nritems = btrfs_header_nritems(parent);
2389         if (slot != nritems -1) {
2390                 memmove_extent_buffer(parent,
2391                               btrfs_node_key_ptr_offset(slot),
2392                               btrfs_node_key_ptr_offset(slot + 1),
2393                               sizeof(struct btrfs_key_ptr) *
2394                               (nritems - slot - 1));
2395         }
2396         nritems--;
2397         btrfs_set_header_nritems(parent, nritems);
2398         if (nritems == 0 && parent == root->node) {
2399                 BUG_ON(btrfs_header_level(root->node) != 1);
2400                 /* just turn the root into a leaf and break */
2401                 btrfs_set_header_level(root->node, 0);
2402         } else if (slot == 0) {
2403                 struct btrfs_disk_key disk_key;
2404
2405                 btrfs_node_key(parent, &disk_key, 0);
2406                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2407                 if (wret)
2408                         ret = wret;
2409         }
2410         btrfs_mark_buffer_dirty(parent);
2411         return ret;
2412 }
2413
2414 /*
2415  * delete the item at the leaf level in path.  If that empties
2416  * the leaf, remove it from the tree
2417  */
2418 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2419                     struct btrfs_path *path, int slot, int nr)
2420 {
2421         struct extent_buffer *leaf;
2422         struct btrfs_item *item;
2423         int last_off;
2424         int dsize = 0;
2425         int ret = 0;
2426         int wret;
2427         int i;
2428         u32 nritems;
2429
2430         leaf = path->nodes[0];
2431         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2432
2433         for (i = 0; i < nr; i++)
2434                 dsize += btrfs_item_size_nr(leaf, slot + i);
2435
2436         nritems = btrfs_header_nritems(leaf);
2437
2438         if (slot + nr != nritems) {
2439                 int i;
2440                 int data_end = leaf_data_end(root, leaf);
2441
2442                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2443                               data_end + dsize,
2444                               btrfs_leaf_data(leaf) + data_end,
2445                               last_off - data_end);
2446
2447                 for (i = slot + nr; i < nritems; i++) {
2448                         u32 ioff;
2449
2450                         item = btrfs_item_nr(leaf, i);
2451                         ioff = btrfs_item_offset(leaf, item);
2452                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2453                 }
2454
2455                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2456                               btrfs_item_nr_offset(slot + nr),
2457                               sizeof(struct btrfs_item) *
2458                               (nritems - slot - nr));
2459         }
2460         btrfs_set_header_nritems(leaf, nritems - nr);
2461         nritems -= nr;
2462
2463         /* delete the leaf if we've emptied it */
2464         if (nritems == 0) {
2465                 if (leaf == root->node) {
2466                         btrfs_set_header_level(leaf, 0);
2467                 } else {
2468                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2469                         clean_tree_block(trans, root, leaf);
2470                         wait_on_tree_block_writeback(root, leaf);
2471                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2472                         if (wret)
2473                                 ret = wret;
2474                         wret = btrfs_free_extent(trans, root,
2475                                          leaf->start, leaf->len,
2476                                          btrfs_header_owner(path->nodes[1]),
2477                                          root_gen, 0, 0, 1);
2478                         if (wret)
2479                                 ret = wret;
2480                 }
2481         } else {
2482                 int used = leaf_space_used(leaf, 0, nritems);
2483                 if (slot == 0) {
2484                         struct btrfs_disk_key disk_key;
2485
2486                         btrfs_item_key(leaf, &disk_key, 0);
2487                         wret = fixup_low_keys(trans, root, path,
2488                                               &disk_key, 1);
2489                         if (wret)
2490                                 ret = wret;
2491                 }
2492
2493                 /* delete the leaf if it is mostly empty */
2494                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2495                         /* push_leaf_left fixes the path.
2496                          * make sure the path still points to our leaf
2497                          * for possible call to del_ptr below
2498                          */
2499                         slot = path->slots[1];
2500                         extent_buffer_get(leaf);
2501
2502                         wret = push_leaf_left(trans, root, path, 1, 1);
2503                         if (wret < 0 && wret != -ENOSPC)
2504                                 ret = wret;
2505
2506                         if (path->nodes[0] == leaf &&
2507                             btrfs_header_nritems(leaf)) {
2508                                 wret = push_leaf_right(trans, root, path, 1, 1);
2509                                 if (wret < 0 && wret != -ENOSPC)
2510                                         ret = wret;
2511                         }
2512
2513                         if (btrfs_header_nritems(leaf) == 0) {
2514                                 u64 root_gen;
2515                                 u64 bytenr = leaf->start;
2516                                 u32 blocksize = leaf->len;
2517
2518                                 root_gen = btrfs_header_generation(
2519                                                            path->nodes[1]);
2520
2521                                 clean_tree_block(trans, root, leaf);
2522                                 wait_on_tree_block_writeback(root, leaf);
2523
2524                                 wret = del_ptr(trans, root, path, 1, slot);
2525                                 if (wret)
2526                                         ret = wret;
2527
2528                                 free_extent_buffer(leaf);
2529                                 wret = btrfs_free_extent(trans, root, bytenr,
2530                                              blocksize,
2531                                              btrfs_header_owner(path->nodes[1]),
2532                                              root_gen, 0, 0, 1);
2533                                 if (wret)
2534                                         ret = wret;
2535                         } else {
2536                                 btrfs_mark_buffer_dirty(leaf);
2537                                 free_extent_buffer(leaf);
2538                         }
2539                 } else {
2540                         btrfs_mark_buffer_dirty(leaf);
2541                 }
2542         }
2543         return ret;
2544 }
2545
2546 /*
2547  * walk up the tree as far as required to find the previous leaf.
2548  * returns 0 if it found something or 1 if there are no lesser leaves.
2549  * returns < 0 on io errors.
2550  */
2551 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2552 {
2553         u64 bytenr;
2554         int slot;
2555         int level = 1;
2556         struct extent_buffer *c;
2557         struct extent_buffer *next = NULL;
2558
2559         while(level < BTRFS_MAX_LEVEL) {
2560                 if (!path->nodes[level])
2561                         return 1;
2562
2563                 slot = path->slots[level];
2564                 c = path->nodes[level];
2565                 if (slot == 0) {
2566                         level++;
2567                         if (level == BTRFS_MAX_LEVEL)
2568                                 return 1;
2569                         continue;
2570                 }
2571                 slot--;
2572
2573                 bytenr = btrfs_node_blockptr(c, slot);
2574                 if (next)
2575                         free_extent_buffer(next);
2576
2577                 next = read_tree_block(root, bytenr,
2578                                        btrfs_level_size(root, level - 1));
2579                 break;
2580         }
2581         path->slots[level] = slot;
2582         while(1) {
2583                 level--;
2584                 c = path->nodes[level];
2585                 free_extent_buffer(c);
2586                 slot = btrfs_header_nritems(next);
2587                 if (slot != 0)
2588                         slot--;
2589                 path->nodes[level] = next;
2590                 path->slots[level] = slot;
2591                 if (!level)
2592                         break;
2593                 next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2594                                        btrfs_level_size(root, level - 1));
2595         }
2596         return 0;
2597 }
2598
2599 /*
2600  * walk up the tree as far as required to find the next leaf.
2601  * returns 0 if it found something or 1 if there are no greater leaves.
2602  * returns < 0 on io errors.
2603  */
2604 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2605 {
2606         int slot;
2607         int level = 1;
2608         u64 bytenr;
2609         struct extent_buffer *c;
2610         struct extent_buffer *next = NULL;
2611
2612         while(level < BTRFS_MAX_LEVEL) {
2613                 if (!path->nodes[level])
2614                         return 1;
2615
2616                 slot = path->slots[level] + 1;
2617                 c = path->nodes[level];
2618                 if (slot >= btrfs_header_nritems(c)) {
2619                         level++;
2620                         if (level == BTRFS_MAX_LEVEL)
2621                                 return 1;
2622                         continue;
2623                 }
2624
2625                 bytenr = btrfs_node_blockptr(c, slot);
2626                 if (next)
2627                         free_extent_buffer(next);
2628
2629                 if (path->reada)
2630                         reada_for_search(root, path, level, slot, 0);
2631
2632                 next = read_tree_block(root, bytenr,
2633                                        btrfs_level_size(root, level -1));
2634                 break;
2635         }
2636         path->slots[level] = slot;
2637         while(1) {
2638                 level--;
2639                 c = path->nodes[level];
2640                 free_extent_buffer(c);
2641                 path->nodes[level] = next;
2642                 path->slots[level] = 0;
2643                 if (!level)
2644                         break;
2645                 if (path->reada)
2646                         reada_for_search(root, path, level, 0, 0);
2647                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2648                                        btrfs_level_size(root, level - 1));
2649         }
2650         return 0;
2651 }
2652
2653 int btrfs_previous_item(struct btrfs_root *root,
2654                         struct btrfs_path *path, u64 min_objectid,
2655                         int type)
2656 {
2657         struct btrfs_key found_key;
2658         struct extent_buffer *leaf;
2659         int ret;
2660
2661         while(1) {
2662                 if (path->slots[0] == 0) {
2663                         ret = btrfs_prev_leaf(root, path);
2664                         if (ret != 0)
2665                                 return ret;
2666                 } else {
2667                         path->slots[0]--;
2668                 }
2669                 leaf = path->nodes[0];
2670                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2671                 if (found_key.type == type)
2672                         return 0;
2673         }
2674         return 1;
2675 }
2676