Add rollback support for the converter
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_key *ins_key,
29                       struct btrfs_path *path, int data_size, int extend);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31                           struct btrfs_root *root, struct extent_buffer *dst,
32                           struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34                               struct btrfs_root *root,
35                               struct extent_buffer *dst_buf,
36                               struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47         struct btrfs_path *path;
48         path = kmalloc(sizeof(struct btrfs_path), GFP_NOFS);
49         if (path) {
50                 btrfs_init_path(path);
51                 path->reada = 0;
52         }
53         return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58         btrfs_release_path(NULL, p);
59         kfree(p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64         int i;
65         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66                 if (!p->nodes[i])
67                         break;
68                 free_extent_buffer(p->nodes[i]);
69         }
70         memset(p, 0, sizeof(*p));
71 }
72
73 int btrfs_copy_root(struct btrfs_trans_handle *trans,
74                       struct btrfs_root *root,
75                       struct extent_buffer *buf,
76                       struct extent_buffer **cow_ret, u64 new_root_objectid)
77 {
78         struct extent_buffer *cow;
79         u32 nritems;
80         int ret = 0;
81         int level;
82         struct btrfs_key first_key;
83         struct btrfs_root *new_root;
84
85         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
86         if (!new_root)
87                 return -ENOMEM;
88
89         memcpy(new_root, root, sizeof(*new_root));
90         new_root->root_key.objectid = new_root_objectid;
91
92         WARN_ON(root->ref_cows && trans->transid !=
93                 root->fs_info->running_transaction->transid);
94         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
95
96         level = btrfs_header_level(buf);
97         nritems = btrfs_header_nritems(buf);
98         if (nritems) {
99                 if (level == 0)
100                         btrfs_item_key_to_cpu(buf, &first_key, 0);
101                 else
102                         btrfs_node_key_to_cpu(buf, &first_key, 0);
103         } else {
104                 first_key.objectid = 0;
105         }
106         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
107                                        new_root_objectid,
108                                        trans->transid, first_key.objectid,
109                                        level, buf->start, 0);
110         if (IS_ERR(cow)) {
111                 kfree(new_root);
112                 return PTR_ERR(cow);
113         }
114
115         copy_extent_buffer(cow, buf, 0, 0, cow->len);
116         btrfs_set_header_bytenr(cow, cow->start);
117         btrfs_set_header_generation(cow, trans->transid);
118         btrfs_set_header_owner(cow, new_root_objectid);
119
120         WARN_ON(btrfs_header_generation(buf) > trans->transid);
121         ret = btrfs_inc_ref(trans, new_root, buf);
122         kfree(new_root);
123
124         if (ret)
125                 return ret;
126
127         btrfs_mark_buffer_dirty(cow);
128         *cow_ret = cow;
129         return 0;
130 }
131
132 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
133                              struct btrfs_root *root,
134                              struct extent_buffer *buf,
135                              struct extent_buffer *parent, int parent_slot,
136                              struct extent_buffer **cow_ret,
137                              u64 search_start, u64 empty_size)
138 {
139         u64 root_gen;
140         struct extent_buffer *cow;
141         u32 nritems;
142         int ret = 0;
143         int different_trans = 0;
144         int level;
145         struct btrfs_key first_key;
146
147         if (root->ref_cows) {
148                 root_gen = trans->transid;
149         } else {
150                 root_gen = 0;
151         }
152
153         WARN_ON(root->ref_cows && trans->transid !=
154                 root->fs_info->running_transaction->transid);
155         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
156
157         level = btrfs_header_level(buf);
158         nritems = btrfs_header_nritems(buf);
159         if (nritems) {
160                 if (level == 0)
161                         btrfs_item_key_to_cpu(buf, &first_key, 0);
162                 else
163                         btrfs_node_key_to_cpu(buf, &first_key, 0);
164         } else {
165                 first_key.objectid = 0;
166         }
167         cow = __btrfs_alloc_free_block(trans, root, buf->len,
168                                      root->root_key.objectid,
169                                      root_gen, first_key.objectid, level,
170                                      search_start, empty_size);
171         if (IS_ERR(cow))
172                 return PTR_ERR(cow);
173
174         copy_extent_buffer(cow, buf, 0, 0, cow->len);
175         btrfs_set_header_bytenr(cow, cow->start);
176         btrfs_set_header_generation(cow, trans->transid);
177         btrfs_set_header_owner(cow, root->root_key.objectid);
178
179         WARN_ON(btrfs_header_generation(buf) > trans->transid);
180         if (btrfs_header_generation(buf) != trans->transid) {
181                 different_trans = 1;
182                 ret = btrfs_inc_ref(trans, root, buf);
183                 if (ret)
184                         return ret;
185         } else {
186                 clean_tree_block(trans, root, buf);
187         }
188
189         if (buf == root->node) {
190                 root_gen = btrfs_header_generation(buf);
191                 root->node = cow;
192                 extent_buffer_get(cow);
193                 if (buf != root->commit_root) {
194                         btrfs_free_extent(trans, root, buf->start,
195                                           buf->len, root->root_key.objectid,
196                                           root_gen, 0, 0, 1);
197                 }
198                 free_extent_buffer(buf);
199         } else {
200                 root_gen = btrfs_header_generation(parent);
201                 btrfs_set_node_blockptr(parent, parent_slot,
202                                         cow->start);
203                 WARN_ON(trans->transid == 0);
204                 btrfs_set_node_ptr_generation(parent, parent_slot,
205                                               trans->transid);
206                 btrfs_mark_buffer_dirty(parent);
207                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
208                 btrfs_free_extent(trans, root, buf->start, buf->len,
209                                   btrfs_header_owner(parent), root_gen,
210                                   0, 0, 1);
211         }
212         free_extent_buffer(buf);
213         btrfs_mark_buffer_dirty(cow);
214         *cow_ret = cow;
215         return 0;
216 }
217
218 int btrfs_cow_block(struct btrfs_trans_handle *trans,
219                     struct btrfs_root *root, struct extent_buffer *buf,
220                     struct extent_buffer *parent, int parent_slot,
221                     struct extent_buffer **cow_ret)
222 {
223         u64 search_start;
224         int ret;
225         /*
226         if (trans->transaction != root->fs_info->running_transaction) {
227                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
228                        root->fs_info->running_transaction->transid);
229                 WARN_ON(1);
230         }
231         */
232         if (trans->transid != root->fs_info->generation) {
233                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
234                        root->fs_info->generation);
235                 WARN_ON(1);
236         }
237         if (btrfs_header_generation(buf) == trans->transid) {
238                 *cow_ret = buf;
239                 return 0;
240         }
241
242         search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
243         ret = __btrfs_cow_block(trans, root, buf, parent,
244                                  parent_slot, cow_ret, search_start, 0);
245         return ret;
246 }
247
248 /*
249 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
250 {
251         if (blocknr < other && other - (blocknr + blocksize) < 32768)
252                 return 1;
253         if (blocknr > other && blocknr - (other + blocksize) < 32768)
254                 return 1;
255         return 0;
256 }
257 */
258
259 /*
260  * compare two keys in a memcmp fashion
261  */
262 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
263 {
264         struct btrfs_key k1;
265
266         btrfs_disk_key_to_cpu(&k1, disk);
267
268         if (k1.objectid > k2->objectid)
269                 return 1;
270         if (k1.objectid < k2->objectid)
271                 return -1;
272         if (k1.type > k2->type)
273                 return 1;
274         if (k1.type < k2->type)
275                 return -1;
276         if (k1.offset > k2->offset)
277                 return 1;
278         if (k1.offset < k2->offset)
279                 return -1;
280         return 0;
281 }
282
283
284 #if 0
285 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
286                        struct btrfs_root *root, struct extent_buffer *parent,
287                        int start_slot, int cache_only, u64 *last_ret,
288                        struct btrfs_key *progress)
289 {
290         struct extent_buffer *cur;
291         struct extent_buffer *tmp;
292         u64 blocknr;
293         u64 search_start = *last_ret;
294         u64 last_block = 0;
295         u64 other;
296         u32 parent_nritems;
297         int end_slot;
298         int i;
299         int err = 0;
300         int parent_level;
301         int uptodate;
302         u32 blocksize;
303         int progress_passed = 0;
304         struct btrfs_disk_key disk_key;
305
306         parent_level = btrfs_header_level(parent);
307         if (cache_only && parent_level != 1)
308                 return 0;
309
310         if (trans->transaction != root->fs_info->running_transaction) {
311                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
312                        root->fs_info->running_transaction->transid);
313                 WARN_ON(1);
314         }
315         if (trans->transid != root->fs_info->generation) {
316                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
317                        root->fs_info->generation);
318                 WARN_ON(1);
319         }
320
321         parent_nritems = btrfs_header_nritems(parent);
322         blocksize = btrfs_level_size(root, parent_level - 1);
323         end_slot = parent_nritems;
324
325         if (parent_nritems == 1)
326                 return 0;
327
328         for (i = start_slot; i < end_slot; i++) {
329                 int close = 1;
330
331                 if (!parent->map_token) {
332                         map_extent_buffer(parent,
333                                         btrfs_node_key_ptr_offset(i),
334                                         sizeof(struct btrfs_key_ptr),
335                                         &parent->map_token, &parent->kaddr,
336                                         &parent->map_start, &parent->map_len,
337                                         KM_USER1);
338                 }
339                 btrfs_node_key(parent, &disk_key, i);
340                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
341                         continue;
342
343                 progress_passed = 1;
344                 blocknr = btrfs_node_blockptr(parent, i);
345                 if (last_block == 0)
346                         last_block = blocknr;
347
348                 if (i > 0) {
349                         other = btrfs_node_blockptr(parent, i - 1);
350                         close = close_blocks(blocknr, other, blocksize);
351                 }
352                 if (close && i < end_slot - 2) {
353                         other = btrfs_node_blockptr(parent, i + 1);
354                         close = close_blocks(blocknr, other, blocksize);
355                 }
356                 if (close) {
357                         last_block = blocknr;
358                         continue;
359                 }
360                 if (parent->map_token) {
361                         unmap_extent_buffer(parent, parent->map_token,
362                                             KM_USER1);
363                         parent->map_token = NULL;
364                 }
365
366                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
367                 if (cur)
368                         uptodate = btrfs_buffer_uptodate(cur);
369                 else
370                         uptodate = 0;
371                 if (!cur || !uptodate) {
372                         if (cache_only) {
373                                 free_extent_buffer(cur);
374                                 continue;
375                         }
376                         if (!cur) {
377                                 cur = read_tree_block(root, blocknr,
378                                                          blocksize);
379                         } else if (!uptodate) {
380                                 btrfs_read_buffer(cur);
381                         }
382                 }
383                 if (search_start == 0)
384                         search_start = last_block;
385
386                 err = __btrfs_cow_block(trans, root, cur, parent, i,
387                                         &tmp, search_start,
388                                         min(16 * blocksize,
389                                             (end_slot - i) * blocksize));
390                 if (err) {
391                         free_extent_buffer(cur);
392                         break;
393                 }
394                 search_start = tmp->start;
395                 last_block = tmp->start;
396                 *last_ret = search_start;
397                 if (parent_level == 1)
398                         btrfs_clear_buffer_defrag(tmp);
399                 free_extent_buffer(tmp);
400         }
401         if (parent->map_token) {
402                 unmap_extent_buffer(parent, parent->map_token,
403                                     KM_USER1);
404                 parent->map_token = NULL;
405         }
406         return err;
407 }
408 #endif
409
410 /*
411  * The leaf data grows from end-to-front in the node.
412  * this returns the address of the start of the last item,
413  * which is the stop of the leaf data stack
414  */
415 static inline unsigned int leaf_data_end(struct btrfs_root *root,
416                                          struct extent_buffer *leaf)
417 {
418         u32 nr = btrfs_header_nritems(leaf);
419         if (nr == 0)
420                 return BTRFS_LEAF_DATA_SIZE(root);
421         return btrfs_item_offset_nr(leaf, nr - 1);
422 }
423
424 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
425                       int level)
426 {
427         struct extent_buffer *parent = NULL;
428         struct extent_buffer *node = path->nodes[level];
429         struct btrfs_disk_key parent_key;
430         struct btrfs_disk_key node_key;
431         int parent_slot;
432         int slot;
433         struct btrfs_key cpukey;
434         u32 nritems = btrfs_header_nritems(node);
435
436         if (path->nodes[level + 1])
437                 parent = path->nodes[level + 1];
438
439         slot = path->slots[level];
440         BUG_ON(nritems == 0);
441         if (parent) {
442                 parent_slot = path->slots[level + 1];
443                 btrfs_node_key(parent, &parent_key, parent_slot);
444                 btrfs_node_key(node, &node_key, 0);
445                 BUG_ON(memcmp(&parent_key, &node_key,
446                               sizeof(struct btrfs_disk_key)));
447                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
448                        btrfs_header_bytenr(node));
449         }
450         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
451         if (slot != 0) {
452                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
453                 btrfs_node_key(node, &node_key, slot);
454                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0);
455         }
456         if (slot < nritems - 1) {
457                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
458                 btrfs_node_key(node, &node_key, slot);
459                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0);
460         }
461         return 0;
462 }
463
464 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
465                       int level)
466 {
467         struct extent_buffer *leaf = path->nodes[level];
468         struct extent_buffer *parent = NULL;
469         int parent_slot;
470         struct btrfs_key cpukey;
471         struct btrfs_disk_key parent_key;
472         struct btrfs_disk_key leaf_key;
473         int slot = path->slots[0];
474
475         u32 nritems = btrfs_header_nritems(leaf);
476
477         if (path->nodes[level + 1])
478                 parent = path->nodes[level + 1];
479
480         if (nritems == 0)
481                 return 0;
482
483         if (parent) {
484                 parent_slot = path->slots[level + 1];
485                 btrfs_node_key(parent, &parent_key, parent_slot);
486                 btrfs_item_key(leaf, &leaf_key, 0);
487
488                 BUG_ON(memcmp(&parent_key, &leaf_key,
489                        sizeof(struct btrfs_disk_key)));
490                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
491                        btrfs_header_bytenr(leaf));
492         }
493 #if 0
494         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
495                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
496                 btrfs_item_key(leaf, &leaf_key, i);
497                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
498                         btrfs_print_leaf(root, leaf);
499                         printk("slot %d offset bad key\n", i);
500                         BUG_ON(1);
501                 }
502                 if (btrfs_item_offset_nr(leaf, i) !=
503                         btrfs_item_end_nr(leaf, i + 1)) {
504                         btrfs_print_leaf(root, leaf);
505                         printk("slot %d offset bad\n", i);
506                         BUG_ON(1);
507                 }
508                 if (i == 0) {
509                         if (btrfs_item_offset_nr(leaf, i) +
510                                btrfs_item_size_nr(leaf, i) !=
511                                BTRFS_LEAF_DATA_SIZE(root)) {
512                                 btrfs_print_leaf(root, leaf);
513                                 printk("slot %d first offset bad\n", i);
514                                 BUG_ON(1);
515                         }
516                 }
517         }
518         if (nritems > 0) {
519                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
520                                 btrfs_print_leaf(root, leaf);
521                                 printk("slot %d bad size \n", nritems - 1);
522                                 BUG_ON(1);
523                 }
524         }
525 #endif
526         if (slot != 0 && slot < nritems - 1) {
527                 btrfs_item_key(leaf, &leaf_key, slot);
528                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
529                 if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) {
530                         btrfs_print_leaf(root, leaf);
531                         printk("slot %d offset bad key\n", slot);
532                         BUG_ON(1);
533                 }
534                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
535                        btrfs_item_end_nr(leaf, slot)) {
536                         btrfs_print_leaf(root, leaf);
537                         printk("slot %d offset bad\n", slot);
538                         BUG_ON(1);
539                 }
540         }
541         if (slot < nritems - 1) {
542                 btrfs_item_key(leaf, &leaf_key, slot);
543                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
544                 BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0);
545                 if (btrfs_item_offset_nr(leaf, slot) !=
546                         btrfs_item_end_nr(leaf, slot + 1)) {
547                         btrfs_print_leaf(root, leaf);
548                         printk("slot %d offset bad\n", slot);
549                         BUG_ON(1);
550                 }
551         }
552         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
553                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
554         return 0;
555 }
556
557 static int noinline check_block(struct btrfs_root *root,
558                                 struct btrfs_path *path, int level)
559 {
560         return 0;
561 #if 0
562         struct extent_buffer *buf = path->nodes[level];
563
564         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
565                                  (unsigned long)btrfs_header_fsid(buf),
566                                  BTRFS_FSID_SIZE)) {
567                 printk("warning bad block %Lu\n", buf->start);
568                 return 1;
569         }
570 #endif
571         if (level == 0)
572                 return check_leaf(root, path, level);
573         return check_node(root, path, level);
574 }
575
576 /*
577  * search for key in the extent_buffer.  The items start at offset p,
578  * and they are item_size apart.  There are 'max' items in p.
579  *
580  * the slot in the array is returned via slot, and it points to
581  * the place where you would insert key if it is not found in
582  * the array.
583  *
584  * slot may point to max if the key is bigger than all of the keys
585  */
586 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
587                               int item_size, struct btrfs_key *key,
588                               int max, int *slot)
589 {
590         int low = 0;
591         int high = max;
592         int mid;
593         int ret;
594         unsigned long offset;
595         struct btrfs_disk_key *tmp;
596
597         while(low < high) {
598                 mid = (low + high) / 2;
599                 offset = p + mid * item_size;
600
601                 tmp = (struct btrfs_disk_key *)(eb->data + offset);
602                 ret = btrfs_comp_keys(tmp, key);
603
604                 if (ret < 0)
605                         low = mid + 1;
606                 else if (ret > 0)
607                         high = mid;
608                 else {
609                         *slot = mid;
610                         return 0;
611                 }
612         }
613         *slot = low;
614         return 1;
615 }
616
617 /*
618  * simple bin_search frontend that does the right thing for
619  * leaves vs nodes
620  */
621 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
622                       int level, int *slot)
623 {
624         if (level == 0) {
625                 return generic_bin_search(eb,
626                                           offsetof(struct btrfs_leaf, items),
627                                           sizeof(struct btrfs_item),
628                                           key, btrfs_header_nritems(eb),
629                                           slot);
630         } else {
631                 return generic_bin_search(eb,
632                                           offsetof(struct btrfs_node, ptrs),
633                                           sizeof(struct btrfs_key_ptr),
634                                           key, btrfs_header_nritems(eb),
635                                           slot);
636         }
637         return -1;
638 }
639
640 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
641                                    struct extent_buffer *parent, int slot)
642 {
643         if (slot < 0)
644                 return NULL;
645         if (slot >= btrfs_header_nritems(parent))
646                 return NULL;
647         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
648                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
649 }
650
651 static int balance_level(struct btrfs_trans_handle *trans,
652                          struct btrfs_root *root,
653                          struct btrfs_path *path, int level)
654 {
655         struct extent_buffer *right = NULL;
656         struct extent_buffer *mid;
657         struct extent_buffer *left = NULL;
658         struct extent_buffer *parent = NULL;
659         int ret = 0;
660         int wret;
661         int pslot;
662         int orig_slot = path->slots[level];
663         int err_on_enospc = 0;
664         u64 orig_ptr;
665
666         if (level == 0)
667                 return 0;
668
669         mid = path->nodes[level];
670         WARN_ON(btrfs_header_generation(mid) != trans->transid);
671
672         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
673
674         if (level < BTRFS_MAX_LEVEL - 1)
675                 parent = path->nodes[level + 1];
676         pslot = path->slots[level + 1];
677
678         /*
679          * deal with the case where there is only one pointer in the root
680          * by promoting the node below to a root
681          */
682         if (!parent) {
683                 struct extent_buffer *child;
684
685                 if (btrfs_header_nritems(mid) != 1)
686                         return 0;
687
688                 /* promote the child to a root */
689                 child = read_node_slot(root, mid, 0);
690                 BUG_ON(!child);
691                 root->node = child;
692                 path->nodes[level] = NULL;
693                 clean_tree_block(trans, root, mid);
694                 wait_on_tree_block_writeback(root, mid);
695                 /* once for the path */
696                 free_extent_buffer(mid);
697                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
698                                         root->root_key.objectid,
699                                         btrfs_header_generation(mid), 0, 0, 1);
700                 /* once for the root ptr */
701                 free_extent_buffer(mid);
702                 return ret;
703         }
704         if (btrfs_header_nritems(mid) >
705             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
706                 return 0;
707
708         if (btrfs_header_nritems(mid) < 2)
709                 err_on_enospc = 1;
710
711         left = read_node_slot(root, parent, pslot - 1);
712         if (left) {
713                 wret = btrfs_cow_block(trans, root, left,
714                                        parent, pslot - 1, &left);
715                 if (wret) {
716                         ret = wret;
717                         goto enospc;
718                 }
719         }
720         right = read_node_slot(root, parent, pslot + 1);
721         if (right) {
722                 wret = btrfs_cow_block(trans, root, right,
723                                        parent, pslot + 1, &right);
724                 if (wret) {
725                         ret = wret;
726                         goto enospc;
727                 }
728         }
729
730         /* first, try to make some room in the middle buffer */
731         if (left) {
732                 orig_slot += btrfs_header_nritems(left);
733                 wret = push_node_left(trans, root, left, mid);
734                 if (wret < 0)
735                         ret = wret;
736                 if (btrfs_header_nritems(mid) < 2)
737                         err_on_enospc = 1;
738         }
739
740         /*
741          * then try to empty the right most buffer into the middle
742          */
743         if (right) {
744                 wret = push_node_left(trans, root, mid, right);
745                 if (wret < 0 && wret != -ENOSPC)
746                         ret = wret;
747                 if (btrfs_header_nritems(right) == 0) {
748                         u64 bytenr = right->start;
749                         u64 generation = btrfs_header_generation(parent);
750                         u32 blocksize = right->len;
751
752                         clean_tree_block(trans, root, right);
753                         wait_on_tree_block_writeback(root, right);
754                         free_extent_buffer(right);
755                         right = NULL;
756                         wret = del_ptr(trans, root, path, level + 1, pslot +
757                                        1);
758                         if (wret)
759                                 ret = wret;
760                         wret = btrfs_free_extent(trans, root, bytenr,
761                                                  blocksize,
762                                                  btrfs_header_owner(parent),
763                                                  generation, 0, 0, 1);
764                         if (wret)
765                                 ret = wret;
766                 } else {
767                         struct btrfs_disk_key right_key;
768                         btrfs_node_key(right, &right_key, 0);
769                         btrfs_set_node_key(parent, &right_key, pslot + 1);
770                         btrfs_mark_buffer_dirty(parent);
771                 }
772         }
773         if (btrfs_header_nritems(mid) == 1) {
774                 /*
775                  * we're not allowed to leave a node with one item in the
776                  * tree during a delete.  A deletion from lower in the tree
777                  * could try to delete the only pointer in this node.
778                  * So, pull some keys from the left.
779                  * There has to be a left pointer at this point because
780                  * otherwise we would have pulled some pointers from the
781                  * right
782                  */
783                 BUG_ON(!left);
784                 wret = balance_node_right(trans, root, mid, left);
785                 if (wret < 0) {
786                         ret = wret;
787                         goto enospc;
788                 }
789                 BUG_ON(wret == 1);
790         }
791         if (btrfs_header_nritems(mid) == 0) {
792                 /* we've managed to empty the middle node, drop it */
793                 u64 root_gen = btrfs_header_generation(parent);
794                 u64 bytenr = mid->start;
795                 u32 blocksize = mid->len;
796                 clean_tree_block(trans, root, mid);
797                 wait_on_tree_block_writeback(root, mid);
798                 free_extent_buffer(mid);
799                 mid = NULL;
800                 wret = del_ptr(trans, root, path, level + 1, pslot);
801                 if (wret)
802                         ret = wret;
803                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
804                                          btrfs_header_owner(parent),
805                                          root_gen, 0, 0, 1);
806                 if (wret)
807                         ret = wret;
808         } else {
809                 /* update the parent key to reflect our changes */
810                 struct btrfs_disk_key mid_key;
811                 btrfs_node_key(mid, &mid_key, 0);
812                 btrfs_set_node_key(parent, &mid_key, pslot);
813                 btrfs_mark_buffer_dirty(parent);
814         }
815
816         /* update the path */
817         if (left) {
818                 if (btrfs_header_nritems(left) > orig_slot) {
819                         extent_buffer_get(left);
820                         path->nodes[level] = left;
821                         path->slots[level + 1] -= 1;
822                         path->slots[level] = orig_slot;
823                         if (mid)
824                                 free_extent_buffer(mid);
825                 } else {
826                         orig_slot -= btrfs_header_nritems(left);
827                         path->slots[level] = orig_slot;
828                 }
829         }
830         /* double check we haven't messed things up */
831         check_block(root, path, level);
832         if (orig_ptr !=
833             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
834                 BUG();
835 enospc:
836         if (right)
837                 free_extent_buffer(right);
838         if (left)
839                 free_extent_buffer(left);
840         return ret;
841 }
842
843 /* returns zero if the push worked, non-zero otherwise */
844 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
845                                           struct btrfs_root *root,
846                                           struct btrfs_path *path, int level)
847 {
848         struct extent_buffer *right = NULL;
849         struct extent_buffer *mid;
850         struct extent_buffer *left = NULL;
851         struct extent_buffer *parent = NULL;
852         int ret = 0;
853         int wret;
854         int pslot;
855         int orig_slot = path->slots[level];
856         u64 orig_ptr;
857
858         if (level == 0)
859                 return 1;
860
861         mid = path->nodes[level];
862         WARN_ON(btrfs_header_generation(mid) != trans->transid);
863         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
864
865         if (level < BTRFS_MAX_LEVEL - 1)
866                 parent = path->nodes[level + 1];
867         pslot = path->slots[level + 1];
868
869         if (!parent)
870                 return 1;
871
872         left = read_node_slot(root, parent, pslot - 1);
873
874         /* first, try to make some room in the middle buffer */
875         if (left) {
876                 u32 left_nr;
877                 left_nr = btrfs_header_nritems(left);
878                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
879                         wret = 1;
880                 } else {
881                         ret = btrfs_cow_block(trans, root, left, parent,
882                                               pslot - 1, &left);
883                         if (ret)
884                                 wret = 1;
885                         else {
886                                 wret = push_node_left(trans, root,
887                                                       left, mid);
888                         }
889                 }
890                 if (wret < 0)
891                         ret = wret;
892                 if (wret == 0) {
893                         struct btrfs_disk_key disk_key;
894                         orig_slot += left_nr;
895                         btrfs_node_key(mid, &disk_key, 0);
896                         btrfs_set_node_key(parent, &disk_key, pslot);
897                         btrfs_mark_buffer_dirty(parent);
898                         if (btrfs_header_nritems(left) > orig_slot) {
899                                 path->nodes[level] = left;
900                                 path->slots[level + 1] -= 1;
901                                 path->slots[level] = orig_slot;
902                                 free_extent_buffer(mid);
903                         } else {
904                                 orig_slot -=
905                                         btrfs_header_nritems(left);
906                                 path->slots[level] = orig_slot;
907                                 free_extent_buffer(left);
908                         }
909                         return 0;
910                 }
911                 free_extent_buffer(left);
912         }
913         right= read_node_slot(root, parent, pslot + 1);
914
915         /*
916          * then try to empty the right most buffer into the middle
917          */
918         if (right) {
919                 u32 right_nr;
920                 right_nr = btrfs_header_nritems(right);
921                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
922                         wret = 1;
923                 } else {
924                         ret = btrfs_cow_block(trans, root, right,
925                                               parent, pslot + 1,
926                                               &right);
927                         if (ret)
928                                 wret = 1;
929                         else {
930                                 wret = balance_node_right(trans, root,
931                                                           right, mid);
932                         }
933                 }
934                 if (wret < 0)
935                         ret = wret;
936                 if (wret == 0) {
937                         struct btrfs_disk_key disk_key;
938
939                         btrfs_node_key(right, &disk_key, 0);
940                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
941                         btrfs_mark_buffer_dirty(parent);
942
943                         if (btrfs_header_nritems(mid) <= orig_slot) {
944                                 path->nodes[level] = right;
945                                 path->slots[level + 1] += 1;
946                                 path->slots[level] = orig_slot -
947                                         btrfs_header_nritems(mid);
948                                 free_extent_buffer(mid);
949                         } else {
950                                 free_extent_buffer(right);
951                         }
952                         return 0;
953                 }
954                 free_extent_buffer(right);
955         }
956         return 1;
957 }
958
959 /*
960  * readahead one full node of leaves
961  */
962 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
963                              int level, int slot, u64 objectid)
964 {
965         struct extent_buffer *node;
966         struct btrfs_disk_key disk_key;
967         u32 nritems;
968         u64 search;
969         u64 lowest_read;
970         u64 highest_read;
971         u64 nread = 0;
972         int direction = path->reada;
973         struct extent_buffer *eb;
974         u32 nr;
975         u32 blocksize;
976         u32 nscan = 0;
977
978         if (level != 1)
979                 return;
980
981         if (!path->nodes[level])
982                 return;
983
984         node = path->nodes[level];
985         search = btrfs_node_blockptr(node, slot);
986         blocksize = btrfs_level_size(root, level - 1);
987         eb = btrfs_find_tree_block(root, search, blocksize);
988         if (eb) {
989                 free_extent_buffer(eb);
990                 return;
991         }
992
993         highest_read = search;
994         lowest_read = search;
995
996         nritems = btrfs_header_nritems(node);
997         nr = slot;
998         while(1) {
999                 if (direction < 0) {
1000                         if (nr == 0)
1001                                 break;
1002                         nr--;
1003                 } else if (direction > 0) {
1004                         nr++;
1005                         if (nr >= nritems)
1006                                 break;
1007                 }
1008                 if (path->reada < 0 && objectid) {
1009                         btrfs_node_key(node, &disk_key, nr);
1010                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1011                                 break;
1012                 }
1013                 search = btrfs_node_blockptr(node, nr);
1014                 if ((search >= lowest_read && search <= highest_read) ||
1015                     (search < lowest_read && lowest_read - search <= 32768) ||
1016                     (search > highest_read && search - highest_read <= 32768)) {
1017                         readahead_tree_block(root, search, blocksize);
1018                         nread += blocksize;
1019                 }
1020                 nscan++;
1021                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1022                         break;
1023                 if(nread > (1024 * 1024) || nscan > 128)
1024                         break;
1025
1026                 if (search < lowest_read)
1027                         lowest_read = search;
1028                 if (search > highest_read)
1029                         highest_read = search;
1030         }
1031 }
1032
1033 /*
1034  * look for key in the tree.  path is filled in with nodes along the way
1035  * if key is found, we return zero and you can find the item in the leaf
1036  * level of the path (level 0)
1037  *
1038  * If the key isn't found, the path points to the slot where it should
1039  * be inserted, and 1 is returned.  If there are other errors during the
1040  * search a negative error number is returned.
1041  *
1042  * if ins_len > 0, nodes and leaves will be split as we walk down the
1043  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1044  * possible)
1045  */
1046 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1047                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1048                       ins_len, int cow)
1049 {
1050         struct extent_buffer *b;
1051         u64 bytenr;
1052         u64 ptr_gen;
1053         int slot;
1054         int ret;
1055         int level;
1056         int should_reada = p->reada;
1057         u8 lowest_level = 0;
1058
1059         lowest_level = p->lowest_level;
1060         WARN_ON(lowest_level && ins_len);
1061         WARN_ON(p->nodes[0] != NULL);
1062         /*
1063         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1064         */
1065 again:
1066         b = root->node;
1067         extent_buffer_get(b);
1068         while (b) {
1069                 level = btrfs_header_level(b);
1070                 if (cow) {
1071                         int wret;
1072                         wret = btrfs_cow_block(trans, root, b,
1073                                                p->nodes[level + 1],
1074                                                p->slots[level + 1],
1075                                                &b);
1076                         if (wret) {
1077                                 free_extent_buffer(b);
1078                                 return wret;
1079                         }
1080                 }
1081                 BUG_ON(!cow && ins_len);
1082                 if (level != btrfs_header_level(b))
1083                         WARN_ON(1);
1084                 level = btrfs_header_level(b);
1085                 p->nodes[level] = b;
1086                 ret = check_block(root, p, level);
1087                 if (ret)
1088                         return -1;
1089                 ret = bin_search(b, key, level, &slot);
1090                 if (level != 0) {
1091                         if (ret && slot > 0)
1092                                 slot -= 1;
1093                         p->slots[level] = slot;
1094                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1095                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1096                                 int sret = split_node(trans, root, p, level);
1097                                 BUG_ON(sret > 0);
1098                                 if (sret)
1099                                         return sret;
1100                                 b = p->nodes[level];
1101                                 slot = p->slots[level];
1102                         } else if (ins_len < 0) {
1103                                 int sret = balance_level(trans, root, p,
1104                                                          level);
1105                                 if (sret)
1106                                         return sret;
1107                                 b = p->nodes[level];
1108                                 if (!b) {
1109                                         btrfs_release_path(NULL, p);
1110                                         goto again;
1111                                 }
1112                                 slot = p->slots[level];
1113                                 BUG_ON(btrfs_header_nritems(b) == 1);
1114                         }
1115                         /* this is only true while dropping a snapshot */
1116                         if (level == lowest_level)
1117                                 break;
1118                         bytenr = btrfs_node_blockptr(b, slot);
1119                         ptr_gen = btrfs_node_ptr_generation(b, slot);
1120                         if (should_reada)
1121                                 reada_for_search(root, p, level, slot,
1122                                                  key->objectid);
1123                         b = read_tree_block(root, bytenr,
1124                                             btrfs_level_size(root, level - 1));
1125                         if (ptr_gen != btrfs_header_generation(b)) {
1126                                 printk("block %llu bad gen wanted %llu "
1127                                        "found %llu\n",
1128                                 (unsigned long long)b->start,
1129                                 (unsigned long long)ptr_gen,
1130                                 (unsigned long long)btrfs_header_generation(b));
1131                         }
1132                 } else {
1133                         p->slots[level] = slot;
1134                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1135                             sizeof(struct btrfs_item) + ins_len) {
1136                                 int sret = split_leaf(trans, root, key,
1137                                                       p, ins_len, ret == 0);
1138                                 BUG_ON(sret > 0);
1139                                 if (sret)
1140                                         return sret;
1141                         }
1142                         return ret;
1143                 }
1144         }
1145         return 1;
1146 }
1147
1148 /*
1149  * adjust the pointers going up the tree, starting at level
1150  * making sure the right key of each node is points to 'key'.
1151  * This is used after shifting pointers to the left, so it stops
1152  * fixing up pointers when a given leaf/node is not in slot 0 of the
1153  * higher levels
1154  *
1155  * If this fails to write a tree block, it returns -1, but continues
1156  * fixing up the blocks in ram so the tree is consistent.
1157  */
1158 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1159                           struct btrfs_root *root, struct btrfs_path *path,
1160                           struct btrfs_disk_key *key, int level)
1161 {
1162         int i;
1163         int ret = 0;
1164         struct extent_buffer *t;
1165
1166         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1167                 int tslot = path->slots[i];
1168                 if (!path->nodes[i])
1169                         break;
1170                 t = path->nodes[i];
1171                 btrfs_set_node_key(t, key, tslot);
1172                 btrfs_mark_buffer_dirty(path->nodes[i]);
1173                 if (tslot != 0)
1174                         break;
1175         }
1176         return ret;
1177 }
1178
1179 /*
1180  * try to push data from one node into the next node left in the
1181  * tree.
1182  *
1183  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1184  * error, and > 0 if there was no room in the left hand block.
1185  */
1186 static int push_node_left(struct btrfs_trans_handle *trans,
1187                           struct btrfs_root *root, struct extent_buffer *dst,
1188                           struct extent_buffer *src)
1189 {
1190         int push_items = 0;
1191         int src_nritems;
1192         int dst_nritems;
1193         int ret = 0;
1194
1195         src_nritems = btrfs_header_nritems(src);
1196         dst_nritems = btrfs_header_nritems(dst);
1197         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1198         WARN_ON(btrfs_header_generation(src) != trans->transid);
1199         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1200
1201         if (push_items <= 0) {
1202                 return 1;
1203         }
1204
1205         if (src_nritems < push_items)
1206                 push_items = src_nritems;
1207
1208         copy_extent_buffer(dst, src,
1209                            btrfs_node_key_ptr_offset(dst_nritems),
1210                            btrfs_node_key_ptr_offset(0),
1211                            push_items * sizeof(struct btrfs_key_ptr));
1212
1213         if (push_items < src_nritems) {
1214                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1215                                       btrfs_node_key_ptr_offset(push_items),
1216                                       (src_nritems - push_items) *
1217                                       sizeof(struct btrfs_key_ptr));
1218         }
1219         btrfs_set_header_nritems(src, src_nritems - push_items);
1220         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1221         btrfs_mark_buffer_dirty(src);
1222         btrfs_mark_buffer_dirty(dst);
1223         return ret;
1224 }
1225
1226 /*
1227  * try to push data from one node into the next node right in the
1228  * tree.
1229  *
1230  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1231  * error, and > 0 if there was no room in the right hand block.
1232  *
1233  * this will  only push up to 1/2 the contents of the left node over
1234  */
1235 static int balance_node_right(struct btrfs_trans_handle *trans,
1236                               struct btrfs_root *root,
1237                               struct extent_buffer *dst,
1238                               struct extent_buffer *src)
1239 {
1240         int push_items = 0;
1241         int max_push;
1242         int src_nritems;
1243         int dst_nritems;
1244         int ret = 0;
1245
1246         WARN_ON(btrfs_header_generation(src) != trans->transid);
1247         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1248
1249         src_nritems = btrfs_header_nritems(src);
1250         dst_nritems = btrfs_header_nritems(dst);
1251         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1252         if (push_items <= 0)
1253                 return 1;
1254
1255         max_push = src_nritems / 2 + 1;
1256         /* don't try to empty the node */
1257         if (max_push >= src_nritems)
1258                 return 1;
1259
1260         if (max_push < push_items)
1261                 push_items = max_push;
1262
1263         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1264                                       btrfs_node_key_ptr_offset(0),
1265                                       (dst_nritems) *
1266                                       sizeof(struct btrfs_key_ptr));
1267
1268         copy_extent_buffer(dst, src,
1269                            btrfs_node_key_ptr_offset(0),
1270                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1271                            push_items * sizeof(struct btrfs_key_ptr));
1272
1273         btrfs_set_header_nritems(src, src_nritems - push_items);
1274         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1275
1276         btrfs_mark_buffer_dirty(src);
1277         btrfs_mark_buffer_dirty(dst);
1278         return ret;
1279 }
1280
1281 /*
1282  * helper function to insert a new root level in the tree.
1283  * A new node is allocated, and a single item is inserted to
1284  * point to the existing root
1285  *
1286  * returns zero on success or < 0 on failure.
1287  */
1288 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1289                            struct btrfs_root *root,
1290                            struct btrfs_path *path, int level)
1291 {
1292         u64 root_gen;
1293         u64 lower_gen;
1294         struct extent_buffer *lower;
1295         struct extent_buffer *c;
1296         struct btrfs_disk_key lower_key;
1297
1298         BUG_ON(path->nodes[level]);
1299         BUG_ON(path->nodes[level-1] != root->node);
1300
1301         if (root->ref_cows)
1302                 root_gen = trans->transid;
1303         else
1304                 root_gen = 0;
1305
1306         lower = path->nodes[level-1];
1307         if (level == 1)
1308                 btrfs_item_key(lower, &lower_key, 0);
1309         else
1310                 btrfs_node_key(lower, &lower_key, 0);
1311
1312         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1313                                    root->root_key.objectid,
1314                                    root_gen, lower_key.objectid, level,
1315                                    root->node->start, 0);
1316         if (IS_ERR(c))
1317                 return PTR_ERR(c);
1318         memset_extent_buffer(c, 0, 0, root->nodesize);
1319         btrfs_set_header_nritems(c, 1);
1320         btrfs_set_header_level(c, level);
1321         btrfs_set_header_bytenr(c, c->start);
1322         btrfs_set_header_generation(c, trans->transid);
1323         btrfs_set_header_owner(c, root->root_key.objectid);
1324
1325         write_extent_buffer(c, root->fs_info->fsid,
1326                             (unsigned long)btrfs_header_fsid(c),
1327                             BTRFS_FSID_SIZE);
1328         btrfs_set_node_key(c, &lower_key, 0);
1329         btrfs_set_node_blockptr(c, 0, lower->start);
1330         lower_gen = btrfs_header_generation(lower);
1331         WARN_ON(lower_gen == 0);
1332
1333         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1334
1335         btrfs_mark_buffer_dirty(c);
1336
1337         /* the super has an extra ref to root->node */
1338         free_extent_buffer(root->node);
1339         root->node = c;
1340         extent_buffer_get(c);
1341         path->nodes[level] = c;
1342         path->slots[level] = 0;
1343
1344         if (root->ref_cows && lower_gen != trans->transid) {
1345                 struct btrfs_path *back_path = btrfs_alloc_path();
1346                 int ret;
1347                 ret = btrfs_insert_extent_backref(trans,
1348                                                   root->fs_info->extent_root,
1349                                                   path, lower->start,
1350                                                   root->root_key.objectid,
1351                                                   trans->transid, 0, 0);
1352                 BUG_ON(ret);
1353                 btrfs_free_path(back_path);
1354         }
1355         return 0;
1356 }
1357
1358 /*
1359  * worker function to insert a single pointer in a node.
1360  * the node should have enough room for the pointer already
1361  *
1362  * slot and level indicate where you want the key to go, and
1363  * blocknr is the block the key points to.
1364  *
1365  * returns zero on success and < 0 on any error
1366  */
1367 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1368                       *root, struct btrfs_path *path, struct btrfs_disk_key
1369                       *key, u64 bytenr, int slot, int level)
1370 {
1371         struct extent_buffer *lower;
1372         int nritems;
1373
1374         BUG_ON(!path->nodes[level]);
1375         lower = path->nodes[level];
1376         nritems = btrfs_header_nritems(lower);
1377         if (slot > nritems)
1378                 BUG();
1379         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1380                 BUG();
1381         if (slot != nritems) {
1382                 memmove_extent_buffer(lower,
1383                               btrfs_node_key_ptr_offset(slot + 1),
1384                               btrfs_node_key_ptr_offset(slot),
1385                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1386         }
1387         btrfs_set_node_key(lower, key, slot);
1388         btrfs_set_node_blockptr(lower, slot, bytenr);
1389         WARN_ON(trans->transid == 0);
1390         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1391         btrfs_set_header_nritems(lower, nritems + 1);
1392         btrfs_mark_buffer_dirty(lower);
1393         return 0;
1394 }
1395
1396 /*
1397  * split the node at the specified level in path in two.
1398  * The path is corrected to point to the appropriate node after the split
1399  *
1400  * Before splitting this tries to make some room in the node by pushing
1401  * left and right, if either one works, it returns right away.
1402  *
1403  * returns 0 on success and < 0 on failure
1404  */
1405 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1406                       *root, struct btrfs_path *path, int level)
1407 {
1408         u64 root_gen;
1409         struct extent_buffer *c;
1410         struct extent_buffer *split;
1411         struct btrfs_disk_key disk_key;
1412         int mid;
1413         int ret;
1414         int wret;
1415         u32 c_nritems;
1416
1417         c = path->nodes[level];
1418         WARN_ON(btrfs_header_generation(c) != trans->transid);
1419         if (c == root->node) {
1420                 /* trying to split the root, lets make a new one */
1421                 ret = insert_new_root(trans, root, path, level + 1);
1422                 if (ret)
1423                         return ret;
1424         } else {
1425                 ret = push_nodes_for_insert(trans, root, path, level);
1426                 c = path->nodes[level];
1427                 if (!ret && btrfs_header_nritems(c) <
1428                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1429                         return 0;
1430                 if (ret < 0)
1431                         return ret;
1432         }
1433
1434         c_nritems = btrfs_header_nritems(c);
1435         if (root->ref_cows)
1436                 root_gen = trans->transid;
1437         else
1438                 root_gen = 0;
1439
1440         btrfs_node_key(c, &disk_key, 0);
1441         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1442                                          root->root_key.objectid,
1443                                          root_gen,
1444                                          btrfs_disk_key_objectid(&disk_key),
1445                                          level, c->start, 0);
1446         if (IS_ERR(split))
1447                 return PTR_ERR(split);
1448
1449         btrfs_set_header_flags(split, btrfs_header_flags(c));
1450         btrfs_set_header_level(split, btrfs_header_level(c));
1451         btrfs_set_header_bytenr(split, split->start);
1452         btrfs_set_header_generation(split, trans->transid);
1453         btrfs_set_header_owner(split, root->root_key.objectid);
1454         write_extent_buffer(split, root->fs_info->fsid,
1455                             (unsigned long)btrfs_header_fsid(split),
1456                             BTRFS_FSID_SIZE);
1457
1458         mid = (c_nritems + 1) / 2;
1459
1460         copy_extent_buffer(split, c,
1461                            btrfs_node_key_ptr_offset(0),
1462                            btrfs_node_key_ptr_offset(mid),
1463                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1464         btrfs_set_header_nritems(split, c_nritems - mid);
1465         btrfs_set_header_nritems(c, mid);
1466         ret = 0;
1467
1468         btrfs_mark_buffer_dirty(c);
1469         btrfs_mark_buffer_dirty(split);
1470
1471         btrfs_node_key(split, &disk_key, 0);
1472         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1473                           path->slots[level + 1] + 1,
1474                           level + 1);
1475         if (wret)
1476                 ret = wret;
1477
1478         if (path->slots[level] >= mid) {
1479                 path->slots[level] -= mid;
1480                 free_extent_buffer(c);
1481                 path->nodes[level] = split;
1482                 path->slots[level + 1] += 1;
1483         } else {
1484                 free_extent_buffer(split);
1485         }
1486         return ret;
1487 }
1488
1489 /*
1490  * how many bytes are required to store the items in a leaf.  start
1491  * and nr indicate which items in the leaf to check.  This totals up the
1492  * space used both by the item structs and the item data
1493  */
1494 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1495 {
1496         int data_len;
1497         int nritems = btrfs_header_nritems(l);
1498         int end = min(nritems, start + nr) - 1;
1499
1500         if (!nr)
1501                 return 0;
1502         data_len = btrfs_item_end_nr(l, start);
1503         data_len = data_len - btrfs_item_offset_nr(l, end);
1504         data_len += sizeof(struct btrfs_item) * nr;
1505         WARN_ON(data_len < 0);
1506         return data_len;
1507 }
1508
1509 /*
1510  * The space between the end of the leaf items and
1511  * the start of the leaf data.  IOW, how much room
1512  * the leaf has left for both items and data
1513  */
1514 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1515 {
1516         int nritems = btrfs_header_nritems(leaf);
1517         int ret;
1518         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1519         if (ret < 0) {
1520                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1521                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1522                        leaf_space_used(leaf, 0, nritems), nritems);
1523         }
1524         return ret;
1525 }
1526
1527 /*
1528  * push some data in the path leaf to the right, trying to free up at
1529  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1530  *
1531  * returns 1 if the push failed because the other node didn't have enough
1532  * room, 0 if everything worked out and < 0 if there were major errors.
1533  */
1534 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1535                            *root, struct btrfs_path *path, int data_size,
1536                            int empty)
1537 {
1538         struct extent_buffer *left = path->nodes[0];
1539         struct extent_buffer *right;
1540         struct extent_buffer *upper;
1541         struct btrfs_disk_key disk_key;
1542         int slot;
1543         u32 i;
1544         int free_space;
1545         int push_space = 0;
1546         int push_items = 0;
1547         struct btrfs_item *item;
1548         u32 left_nritems;
1549         u32 nr;
1550         u32 right_nritems;
1551         u32 data_end;
1552         u32 this_item_size;
1553         int ret;
1554
1555         slot = path->slots[1];
1556         if (!path->nodes[1]) {
1557                 return 1;
1558         }
1559         upper = path->nodes[1];
1560         if (slot >= btrfs_header_nritems(upper) - 1)
1561                 return 1;
1562
1563         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1564                                 root->leafsize);
1565         free_space = btrfs_leaf_free_space(root, right);
1566         if (free_space < data_size + sizeof(struct btrfs_item)) {
1567                 free_extent_buffer(right);
1568                 return 1;
1569         }
1570
1571         /* cow and double check */
1572         ret = btrfs_cow_block(trans, root, right, upper,
1573                               slot + 1, &right);
1574         if (ret) {
1575                 free_extent_buffer(right);
1576                 return 1;
1577         }
1578         free_space = btrfs_leaf_free_space(root, right);
1579         if (free_space < data_size + sizeof(struct btrfs_item)) {
1580                 free_extent_buffer(right);
1581                 return 1;
1582         }
1583
1584         left_nritems = btrfs_header_nritems(left);
1585         if (left_nritems == 0) {
1586                 free_extent_buffer(right);
1587                 return 1;
1588         }
1589
1590         if (empty)
1591                 nr = 0;
1592         else
1593                 nr = 1;
1594
1595         i = left_nritems - 1;
1596         while (i >= nr) {
1597                 item = btrfs_item_nr(left, i);
1598
1599                 if (path->slots[0] == i)
1600                         push_space += data_size + sizeof(*item);
1601
1602                 this_item_size = btrfs_item_size(left, item);
1603                 if (this_item_size + sizeof(*item) + push_space > free_space)
1604                         break;
1605                 push_items++;
1606                 push_space += this_item_size + sizeof(*item);
1607                 if (i == 0)
1608                         break;
1609                 i--;
1610         }
1611
1612         if (push_items == 0) {
1613                 free_extent_buffer(right);
1614                 return 1;
1615         }
1616
1617         if (!empty && push_items == left_nritems)
1618                 WARN_ON(1);
1619
1620         /* push left to right */
1621         right_nritems = btrfs_header_nritems(right);
1622
1623         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1624         push_space -= leaf_data_end(root, left);
1625
1626         /* make room in the right data area */
1627         data_end = leaf_data_end(root, right);
1628         memmove_extent_buffer(right,
1629                               btrfs_leaf_data(right) + data_end - push_space,
1630                               btrfs_leaf_data(right) + data_end,
1631                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1632
1633         /* copy from the left data area */
1634         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1635                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1636                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1637                      push_space);
1638
1639         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1640                               btrfs_item_nr_offset(0),
1641                               right_nritems * sizeof(struct btrfs_item));
1642
1643         /* copy the items from left to right */
1644         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1645                    btrfs_item_nr_offset(left_nritems - push_items),
1646                    push_items * sizeof(struct btrfs_item));
1647
1648         /* update the item pointers */
1649         right_nritems += push_items;
1650         btrfs_set_header_nritems(right, right_nritems);
1651         push_space = BTRFS_LEAF_DATA_SIZE(root);
1652         for (i = 0; i < right_nritems; i++) {
1653                 item = btrfs_item_nr(right, i);
1654                 push_space -= btrfs_item_size(right, item);
1655                 btrfs_set_item_offset(right, item, push_space);
1656         }
1657
1658         left_nritems -= push_items;
1659         btrfs_set_header_nritems(left, left_nritems);
1660
1661         if (left_nritems)
1662                 btrfs_mark_buffer_dirty(left);
1663         btrfs_mark_buffer_dirty(right);
1664
1665         btrfs_item_key(right, &disk_key, 0);
1666         btrfs_set_node_key(upper, &disk_key, slot + 1);
1667         btrfs_mark_buffer_dirty(upper);
1668
1669         /* then fixup the leaf pointer in the path */
1670         if (path->slots[0] >= left_nritems) {
1671                 path->slots[0] -= left_nritems;
1672                 free_extent_buffer(path->nodes[0]);
1673                 path->nodes[0] = right;
1674                 path->slots[1] += 1;
1675         } else {
1676                 free_extent_buffer(right);
1677         }
1678         return 0;
1679 }
1680 /*
1681  * push some data in the path leaf to the left, trying to free up at
1682  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1683  */
1684 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1685                           *root, struct btrfs_path *path, int data_size,
1686                           int empty)
1687 {
1688         struct btrfs_disk_key disk_key;
1689         struct extent_buffer *right = path->nodes[0];
1690         struct extent_buffer *left;
1691         int slot;
1692         int i;
1693         int free_space;
1694         int push_space = 0;
1695         int push_items = 0;
1696         struct btrfs_item *item;
1697         u32 old_left_nritems;
1698         u32 right_nritems;
1699         u32 nr;
1700         int ret = 0;
1701         int wret;
1702         u32 this_item_size;
1703         u32 old_left_item_size;
1704
1705         slot = path->slots[1];
1706         if (slot == 0)
1707                 return 1;
1708         if (!path->nodes[1])
1709                 return 1;
1710
1711         right_nritems = btrfs_header_nritems(right);
1712         if (right_nritems == 0) {
1713                 return 1;
1714         }
1715
1716         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1717                                slot - 1), root->leafsize);
1718         free_space = btrfs_leaf_free_space(root, left);
1719         if (free_space < data_size + sizeof(struct btrfs_item)) {
1720                 free_extent_buffer(left);
1721                 return 1;
1722         }
1723
1724         /* cow and double check */
1725         ret = btrfs_cow_block(trans, root, left,
1726                               path->nodes[1], slot - 1, &left);
1727         if (ret) {
1728                 /* we hit -ENOSPC, but it isn't fatal here */
1729                 free_extent_buffer(left);
1730                 return 1;
1731         }
1732
1733         free_space = btrfs_leaf_free_space(root, left);
1734         if (free_space < data_size + sizeof(struct btrfs_item)) {
1735                 free_extent_buffer(left);
1736                 return 1;
1737         }
1738
1739         if (empty)
1740                 nr = right_nritems;
1741         else
1742                 nr = right_nritems - 1;
1743
1744         for (i = 0; i < nr; i++) {
1745                 item = btrfs_item_nr(right, i);
1746
1747                 if (path->slots[0] == i)
1748                         push_space += data_size + sizeof(*item);
1749
1750                 this_item_size = btrfs_item_size(right, item);
1751                 if (this_item_size + sizeof(*item) + push_space > free_space)
1752                         break;
1753
1754                 push_items++;
1755                 push_space += this_item_size + sizeof(*item);
1756         }
1757
1758         if (push_items == 0) {
1759                 free_extent_buffer(left);
1760                 return 1;
1761         }
1762         if (!empty && push_items == btrfs_header_nritems(right))
1763                 WARN_ON(1);
1764
1765         /* push data from right to left */
1766         copy_extent_buffer(left, right,
1767                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1768                            btrfs_item_nr_offset(0),
1769                            push_items * sizeof(struct btrfs_item));
1770
1771         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1772                      btrfs_item_offset_nr(right, push_items -1);
1773
1774         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1775                      leaf_data_end(root, left) - push_space,
1776                      btrfs_leaf_data(right) +
1777                      btrfs_item_offset_nr(right, push_items - 1),
1778                      push_space);
1779         old_left_nritems = btrfs_header_nritems(left);
1780         BUG_ON(old_left_nritems < 0);
1781
1782         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1783         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1784                 u32 ioff;
1785
1786                 item = btrfs_item_nr(left, i);
1787                 ioff = btrfs_item_offset(left, item);
1788                 btrfs_set_item_offset(left, item,
1789                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1790         }
1791         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1792
1793         /* fixup right node */
1794         if (push_items > right_nritems) {
1795                 printk("push items %d nr %u\n", push_items, right_nritems);
1796                 WARN_ON(1);
1797         }
1798
1799         if (push_items < right_nritems) {
1800                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1801                                                   leaf_data_end(root, right);
1802                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1803                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1804                                       btrfs_leaf_data(right) +
1805                                       leaf_data_end(root, right), push_space);
1806
1807                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1808                               btrfs_item_nr_offset(push_items),
1809                              (btrfs_header_nritems(right) - push_items) *
1810                              sizeof(struct btrfs_item));
1811         }
1812         right_nritems -= push_items;
1813         btrfs_set_header_nritems(right, right_nritems);
1814         push_space = BTRFS_LEAF_DATA_SIZE(root);
1815         for (i = 0; i < right_nritems; i++) {
1816                 item = btrfs_item_nr(right, i);
1817                 push_space = push_space - btrfs_item_size(right, item);
1818                 btrfs_set_item_offset(right, item, push_space);
1819         }
1820
1821         btrfs_mark_buffer_dirty(left);
1822         if (right_nritems)
1823                 btrfs_mark_buffer_dirty(right);
1824
1825         btrfs_item_key(right, &disk_key, 0);
1826         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1827         if (wret)
1828                 ret = wret;
1829
1830         /* then fixup the leaf pointer in the path */
1831         if (path->slots[0] < push_items) {
1832                 path->slots[0] += old_left_nritems;
1833                 free_extent_buffer(path->nodes[0]);
1834                 path->nodes[0] = left;
1835                 path->slots[1] -= 1;
1836         } else {
1837                 free_extent_buffer(left);
1838                 path->slots[0] -= push_items;
1839         }
1840         BUG_ON(path->slots[0] < 0);
1841         return ret;
1842 }
1843
1844 /*
1845  * split the path's leaf in two, making sure there is at least data_size
1846  * available for the resulting leaf level of the path.
1847  *
1848  * returns 0 if all went well and < 0 on failure.
1849  */
1850 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1851                       *root, struct btrfs_key *ins_key,
1852                       struct btrfs_path *path, int data_size, int extend)
1853 {
1854         u64 root_gen;
1855         struct extent_buffer *l;
1856         u32 nritems;
1857         int mid;
1858         int slot;
1859         struct extent_buffer *right;
1860         int space_needed = data_size + sizeof(struct btrfs_item);
1861         int data_copy_size;
1862         int rt_data_off;
1863         int i;
1864         int ret = 0;
1865         int wret;
1866         int double_split;
1867         int num_doubles = 0;
1868         struct btrfs_disk_key disk_key;
1869
1870         if (extend)
1871                 space_needed = data_size;
1872
1873         if (root->ref_cows)
1874                 root_gen = trans->transid;
1875         else
1876                 root_gen = 0;
1877
1878         /* first try to make some room by pushing left and right */
1879         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1880                 wret = push_leaf_right(trans, root, path, data_size, 0);
1881                 if (wret < 0) {
1882                         return wret;
1883                 }
1884                 if (wret) {
1885                         wret = push_leaf_left(trans, root, path, data_size, 0);
1886                         if (wret < 0)
1887                                 return wret;
1888                 }
1889                 l = path->nodes[0];
1890
1891                 /* did the pushes work? */
1892                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1893                         return 0;
1894         }
1895
1896         if (!path->nodes[1]) {
1897                 ret = insert_new_root(trans, root, path, 1);
1898                 if (ret)
1899                         return ret;
1900         }
1901 again:
1902         double_split = 0;
1903         l = path->nodes[0];
1904         slot = path->slots[0];
1905         nritems = btrfs_header_nritems(l);
1906         mid = (nritems + 1)/ 2;
1907
1908         btrfs_item_key(l, &disk_key, 0);
1909
1910         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
1911                                          root->root_key.objectid,
1912                                          root_gen, disk_key.objectid, 0,
1913                                          l->start, 0);
1914         if (IS_ERR(right))
1915                 return PTR_ERR(right);
1916
1917         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1918         btrfs_set_header_bytenr(right, right->start);
1919         btrfs_set_header_generation(right, trans->transid);
1920         btrfs_set_header_owner(right, root->root_key.objectid);
1921         btrfs_set_header_level(right, 0);
1922         write_extent_buffer(right, root->fs_info->fsid,
1923                             (unsigned long)btrfs_header_fsid(right),
1924                             BTRFS_FSID_SIZE);
1925         if (mid <= slot) {
1926                 if (nritems == 1 ||
1927                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1928                         BTRFS_LEAF_DATA_SIZE(root)) {
1929                         if (slot >= nritems) {
1930                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1931                                 btrfs_set_header_nritems(right, 0);
1932                                 wret = insert_ptr(trans, root, path,
1933                                                   &disk_key, right->start,
1934                                                   path->slots[1] + 1, 1);
1935                                 if (wret)
1936                                         ret = wret;
1937                                 free_extent_buffer(path->nodes[0]);
1938                                 path->nodes[0] = right;
1939                                 path->slots[0] = 0;
1940                                 path->slots[1] += 1;
1941                                 return ret;
1942                         }
1943                         mid = slot;
1944                         if (mid != nritems &&
1945                             leaf_space_used(l, mid, nritems - mid) +
1946                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1947                                 double_split = 1;
1948                         }
1949                 }
1950         } else {
1951                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1952                         BTRFS_LEAF_DATA_SIZE(root)) {
1953                         if (!extend && slot == 0) {
1954                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1955                                 btrfs_set_header_nritems(right, 0);
1956                                 wret = insert_ptr(trans, root, path,
1957                                                   &disk_key,
1958                                                   right->start,
1959                                                   path->slots[1], 1);
1960                                 if (wret)
1961                                         ret = wret;
1962                                 free_extent_buffer(path->nodes[0]);
1963                                 path->nodes[0] = right;
1964                                 path->slots[0] = 0;
1965                                 if (path->slots[1] == 0) {
1966                                         wret = fixup_low_keys(trans, root,
1967                                                    path, &disk_key, 1);
1968                                         if (wret)
1969                                                 ret = wret;
1970                                 }
1971                                 return ret;
1972                         } else if (extend && slot == 0) {
1973                                 mid = 1;
1974                         } else {
1975                                 mid = slot;
1976                                 if (mid != nritems &&
1977                                     leaf_space_used(l, mid, nritems - mid) +
1978                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1979                                         double_split = 1;
1980                                 }
1981                         }
1982                 }
1983         }
1984         nritems = nritems - mid;
1985         btrfs_set_header_nritems(right, nritems);
1986         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1987
1988         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1989                            btrfs_item_nr_offset(mid),
1990                            nritems * sizeof(struct btrfs_item));
1991
1992         copy_extent_buffer(right, l,
1993                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1994                      data_copy_size, btrfs_leaf_data(l) +
1995                      leaf_data_end(root, l), data_copy_size);
1996
1997         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1998                       btrfs_item_end_nr(l, mid);
1999
2000         for (i = 0; i < nritems; i++) {
2001                 struct btrfs_item *item = btrfs_item_nr(right, i);
2002                 u32 ioff = btrfs_item_offset(right, item);
2003                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2004         }
2005
2006         btrfs_set_header_nritems(l, mid);
2007         ret = 0;
2008         btrfs_item_key(right, &disk_key, 0);
2009         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2010                           path->slots[1] + 1, 1);
2011         if (wret)
2012                 ret = wret;
2013
2014         btrfs_mark_buffer_dirty(right);
2015         btrfs_mark_buffer_dirty(l);
2016         BUG_ON(path->slots[0] != slot);
2017
2018         if (mid <= slot) {
2019                 free_extent_buffer(path->nodes[0]);
2020                 path->nodes[0] = right;
2021                 path->slots[0] -= mid;
2022                 path->slots[1] += 1;
2023         } else
2024                 free_extent_buffer(right);
2025
2026         BUG_ON(path->slots[0] < 0);
2027
2028         if (double_split) {
2029                 BUG_ON(num_doubles != 0);
2030                 num_doubles++;
2031                 goto again;
2032         }
2033         return ret;
2034 }
2035
2036 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2037                         struct btrfs_root *root,
2038                         struct btrfs_path *path,
2039                         u32 new_size, int from_end)
2040 {
2041         int ret = 0;
2042         int slot;
2043         int slot_orig;
2044         struct extent_buffer *leaf;
2045         struct btrfs_item *item;
2046         u32 nritems;
2047         unsigned int data_end;
2048         unsigned int old_data_start;
2049         unsigned int old_size;
2050         unsigned int size_diff;
2051         int i;
2052
2053         slot_orig = path->slots[0];
2054         leaf = path->nodes[0];
2055         slot = path->slots[0];
2056
2057         old_size = btrfs_item_size_nr(leaf, slot);
2058         if (old_size == new_size)
2059                 return 0;
2060
2061         nritems = btrfs_header_nritems(leaf);
2062         data_end = leaf_data_end(root, leaf);
2063
2064         old_data_start = btrfs_item_offset_nr(leaf, slot);
2065
2066         size_diff = old_size - new_size;
2067
2068         BUG_ON(slot < 0);
2069         BUG_ON(slot >= nritems);
2070
2071         /*
2072          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2073          */
2074         /* first correct the data pointers */
2075         for (i = slot; i < nritems; i++) {
2076                 u32 ioff;
2077                 item = btrfs_item_nr(leaf, i);
2078                 ioff = btrfs_item_offset(leaf, item);
2079                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2080         }
2081
2082         /* shift the data */
2083         if (from_end) {
2084                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2085                               data_end + size_diff, btrfs_leaf_data(leaf) +
2086                               data_end, old_data_start + new_size - data_end);
2087         } else {
2088                 struct btrfs_disk_key disk_key;
2089                 u64 offset;
2090
2091                 btrfs_item_key(leaf, &disk_key, slot);
2092
2093                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2094                         unsigned long ptr;
2095                         struct btrfs_file_extent_item *fi;
2096
2097                         fi = btrfs_item_ptr(leaf, slot,
2098                                             struct btrfs_file_extent_item);
2099                         fi = (struct btrfs_file_extent_item *)(
2100                              (unsigned long)fi - size_diff);
2101
2102                         if (btrfs_file_extent_type(leaf, fi) ==
2103                             BTRFS_FILE_EXTENT_INLINE) {
2104                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2105                                 memmove_extent_buffer(leaf, ptr,
2106                                         (unsigned long)fi,
2107                                         offsetof(struct btrfs_file_extent_item,
2108                                                  disk_bytenr));
2109                         }
2110                 }
2111
2112                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2113                               data_end + size_diff, btrfs_leaf_data(leaf) +
2114                               data_end, old_data_start - data_end);
2115
2116                 offset = btrfs_disk_key_offset(&disk_key);
2117                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2118                 btrfs_set_item_key(leaf, &disk_key, slot);
2119                 if (slot == 0)
2120                         fixup_low_keys(trans, root, path, &disk_key, 1);
2121         }
2122
2123         item = btrfs_item_nr(leaf, slot);
2124         btrfs_set_item_size(leaf, item, new_size);
2125         btrfs_mark_buffer_dirty(leaf);
2126
2127         ret = 0;
2128         if (btrfs_leaf_free_space(root, leaf) < 0) {
2129                 btrfs_print_leaf(root, leaf);
2130                 BUG();
2131         }
2132         return ret;
2133 }
2134
2135 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2136                       struct btrfs_root *root, struct btrfs_path *path,
2137                       u32 data_size)
2138 {
2139         int ret = 0;
2140         int slot;
2141         int slot_orig;
2142         struct extent_buffer *leaf;
2143         struct btrfs_item *item;
2144         u32 nritems;
2145         unsigned int data_end;
2146         unsigned int old_data;
2147         unsigned int old_size;
2148         int i;
2149
2150         slot_orig = path->slots[0];
2151         leaf = path->nodes[0];
2152
2153         nritems = btrfs_header_nritems(leaf);
2154         data_end = leaf_data_end(root, leaf);
2155
2156         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2157                 btrfs_print_leaf(root, leaf);
2158                 BUG();
2159         }
2160         slot = path->slots[0];
2161         old_data = btrfs_item_end_nr(leaf, slot);
2162
2163         BUG_ON(slot < 0);
2164         if (slot >= nritems) {
2165                 btrfs_print_leaf(root, leaf);
2166                 printk("slot %d too large, nritems %d\n", slot, nritems);
2167                 BUG_ON(1);
2168         }
2169
2170         /*
2171          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2172          */
2173         /* first correct the data pointers */
2174         for (i = slot; i < nritems; i++) {
2175                 u32 ioff;
2176                 item = btrfs_item_nr(leaf, i);
2177                 ioff = btrfs_item_offset(leaf, item);
2178                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2179         }
2180
2181         /* shift the data */
2182         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2183                       data_end - data_size, btrfs_leaf_data(leaf) +
2184                       data_end, old_data - data_end);
2185
2186         data_end = old_data;
2187         old_size = btrfs_item_size_nr(leaf, slot);
2188         item = btrfs_item_nr(leaf, slot);
2189         btrfs_set_item_size(leaf, item, old_size + data_size);
2190         btrfs_mark_buffer_dirty(leaf);
2191
2192         ret = 0;
2193         if (btrfs_leaf_free_space(root, leaf) < 0) {
2194                 btrfs_print_leaf(root, leaf);
2195                 BUG();
2196         }
2197         return ret;
2198 }
2199
2200 /*
2201  * Given a key and some data, insert an item into the tree.
2202  * This does all the path init required, making room in the tree if needed.
2203  */
2204 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2205                             struct btrfs_root *root,
2206                             struct btrfs_path *path,
2207                             struct btrfs_key *cpu_key, u32 data_size)
2208 {
2209         struct extent_buffer *leaf;
2210         struct btrfs_item *item;
2211         int ret = 0;
2212         int slot;
2213         int slot_orig;
2214         u32 nritems;
2215         unsigned int data_end;
2216         struct btrfs_disk_key disk_key;
2217
2218         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2219
2220         /* create a root if there isn't one */
2221         if (!root->node)
2222                 BUG();
2223
2224         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2225         if (ret == 0) {
2226                 return -EEXIST;
2227         }
2228         if (ret < 0)
2229                 goto out;
2230
2231         slot_orig = path->slots[0];
2232         leaf = path->nodes[0];
2233
2234         nritems = btrfs_header_nritems(leaf);
2235         data_end = leaf_data_end(root, leaf);
2236
2237         if (btrfs_leaf_free_space(root, leaf) <
2238             sizeof(struct btrfs_item) + data_size) {
2239                 btrfs_print_leaf(root, leaf);
2240                 printk("not enough freespace need %u have %d\n",
2241                        data_size, btrfs_leaf_free_space(root, leaf));
2242                 BUG();
2243         }
2244
2245         slot = path->slots[0];
2246         BUG_ON(slot < 0);
2247
2248         if (slot != nritems) {
2249                 int i;
2250                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2251
2252                 if (old_data < data_end) {
2253                         btrfs_print_leaf(root, leaf);
2254                         printk("slot %d old_data %d data_end %d\n",
2255                                slot, old_data, data_end);
2256                         BUG_ON(1);
2257                 }
2258                 /*
2259                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2260                  */
2261                 /* first correct the data pointers */
2262                 for (i = slot; i < nritems; i++) {
2263                         u32 ioff;
2264
2265                         item = btrfs_item_nr(leaf, i);
2266                         ioff = btrfs_item_offset(leaf, item);
2267                         btrfs_set_item_offset(leaf, item, ioff - data_size);
2268                 }
2269
2270                 /* shift the items */
2271                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2272                               btrfs_item_nr_offset(slot),
2273                               (nritems - slot) * sizeof(struct btrfs_item));
2274
2275                 /* shift the data */
2276                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2277                               data_end - data_size, btrfs_leaf_data(leaf) +
2278                               data_end, old_data - data_end);
2279                 data_end = old_data;
2280         }
2281
2282         /* setup the item for the new data */
2283         btrfs_set_item_key(leaf, &disk_key, slot);
2284         item = btrfs_item_nr(leaf, slot);
2285         btrfs_set_item_offset(leaf, item, data_end - data_size);
2286         btrfs_set_item_size(leaf, item, data_size);
2287         btrfs_set_header_nritems(leaf, nritems + 1);
2288         btrfs_mark_buffer_dirty(leaf);
2289
2290         ret = 0;
2291         if (slot == 0)
2292                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2293
2294         if (btrfs_leaf_free_space(root, leaf) < 0) {
2295                 btrfs_print_leaf(root, leaf);
2296                 BUG();
2297         }
2298 out:
2299         return ret;
2300 }
2301
2302 /*
2303  * Given a key and some data, insert an item into the tree.
2304  * This does all the path init required, making room in the tree if needed.
2305  */
2306 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2307                       *root, struct btrfs_key *cpu_key, void *data, u32
2308                       data_size)
2309 {
2310         int ret = 0;
2311         struct btrfs_path *path;
2312         struct extent_buffer *leaf;
2313         unsigned long ptr;
2314
2315         path = btrfs_alloc_path();
2316         BUG_ON(!path);
2317         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2318         if (!ret) {
2319                 leaf = path->nodes[0];
2320                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2321                 write_extent_buffer(leaf, data, ptr, data_size);
2322                 btrfs_mark_buffer_dirty(leaf);
2323         }
2324         btrfs_free_path(path);
2325         return ret;
2326 }
2327
2328 /*
2329  * delete the pointer from a given node.
2330  *
2331  * If the delete empties a node, the node is removed from the tree,
2332  * continuing all the way the root if required.  The root is converted into
2333  * a leaf if all the nodes are emptied.
2334  */
2335 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2336                    struct btrfs_path *path, int level, int slot)
2337 {
2338         struct extent_buffer *parent = path->nodes[level];
2339         u32 nritems;
2340         int ret = 0;
2341         int wret;
2342
2343         nritems = btrfs_header_nritems(parent);
2344         if (slot != nritems -1) {
2345                 memmove_extent_buffer(parent,
2346                               btrfs_node_key_ptr_offset(slot),
2347                               btrfs_node_key_ptr_offset(slot + 1),
2348                               sizeof(struct btrfs_key_ptr) *
2349                               (nritems - slot - 1));
2350         }
2351         nritems--;
2352         btrfs_set_header_nritems(parent, nritems);
2353         if (nritems == 0 && parent == root->node) {
2354                 BUG_ON(btrfs_header_level(root->node) != 1);
2355                 /* just turn the root into a leaf and break */
2356                 btrfs_set_header_level(root->node, 0);
2357         } else if (slot == 0) {
2358                 struct btrfs_disk_key disk_key;
2359
2360                 btrfs_node_key(parent, &disk_key, 0);
2361                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2362                 if (wret)
2363                         ret = wret;
2364         }
2365         btrfs_mark_buffer_dirty(parent);
2366         return ret;
2367 }
2368
2369 /*
2370  * delete the item at the leaf level in path.  If that empties
2371  * the leaf, remove it from the tree
2372  */
2373 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2374                    struct btrfs_path *path)
2375 {
2376         int slot;
2377         struct extent_buffer *leaf;
2378         struct btrfs_item *item;
2379         int doff;
2380         int dsize;
2381         int ret = 0;
2382         int wret;
2383         u32 nritems;
2384
2385         leaf = path->nodes[0];
2386         slot = path->slots[0];
2387         doff = btrfs_item_offset_nr(leaf, slot);
2388         dsize = btrfs_item_size_nr(leaf, slot);
2389         nritems = btrfs_header_nritems(leaf);
2390
2391         if (slot != nritems - 1) {
2392                 int i;
2393                 int data_end = leaf_data_end(root, leaf);
2394
2395                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2396                               data_end + dsize,
2397                               btrfs_leaf_data(leaf) + data_end,
2398                               doff - data_end);
2399
2400                 for (i = slot + 1; i < nritems; i++) {
2401                         u32 ioff;
2402
2403                         item = btrfs_item_nr(leaf, i);
2404                         ioff = btrfs_item_offset(leaf, item);
2405                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2406                 }
2407
2408                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2409                               btrfs_item_nr_offset(slot + 1),
2410                               sizeof(struct btrfs_item) *
2411                               (nritems - slot - 1));
2412         }
2413         btrfs_set_header_nritems(leaf, nritems - 1);
2414         nritems--;
2415
2416         /* delete the leaf if we've emptied it */
2417         if (nritems == 0) {
2418                 if (leaf == root->node) {
2419                         btrfs_set_header_level(leaf, 0);
2420                 } else {
2421                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2422                         clean_tree_block(trans, root, leaf);
2423                         wait_on_tree_block_writeback(root, leaf);
2424                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2425                         if (wret)
2426                                 ret = wret;
2427                         wret = btrfs_free_extent(trans, root,
2428                                          leaf->start, leaf->len,
2429                                          btrfs_header_owner(path->nodes[1]),
2430                                          root_gen, 0, 0, 1);
2431                         if (wret)
2432                                 ret = wret;
2433                 }
2434         } else {
2435                 int used = leaf_space_used(leaf, 0, nritems);
2436                 if (slot == 0) {
2437                         struct btrfs_disk_key disk_key;
2438
2439                         btrfs_item_key(leaf, &disk_key, 0);
2440                         wret = fixup_low_keys(trans, root, path,
2441                                               &disk_key, 1);
2442                         if (wret)
2443                                 ret = wret;
2444                 }
2445
2446                 /* delete the leaf if it is mostly empty */
2447                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2448                         /* push_leaf_left fixes the path.
2449                          * make sure the path still points to our leaf
2450                          * for possible call to del_ptr below
2451                          */
2452                         slot = path->slots[1];
2453                         extent_buffer_get(leaf);
2454
2455                         wret = push_leaf_right(trans, root, path, 1, 1);
2456                         if (wret < 0 && wret != -ENOSPC)
2457                                 ret = wret;
2458
2459                         if (path->nodes[0] == leaf &&
2460                             btrfs_header_nritems(leaf)) {
2461                                 wret = push_leaf_left(trans, root, path, 1, 1);
2462                                 if (wret < 0 && wret != -ENOSPC)
2463                                         ret = wret;
2464                         }
2465
2466                         if (btrfs_header_nritems(leaf) == 0) {
2467                                 u64 root_gen;
2468                                 u64 bytenr = leaf->start;
2469                                 u32 blocksize = leaf->len;
2470
2471                                 root_gen = btrfs_header_generation(
2472                                                            path->nodes[1]);
2473
2474                                 clean_tree_block(trans, root, leaf);
2475                                 wait_on_tree_block_writeback(root, leaf);
2476
2477                                 wret = del_ptr(trans, root, path, 1, slot);
2478                                 if (wret)
2479                                         ret = wret;
2480
2481                                 free_extent_buffer(leaf);
2482                                 wret = btrfs_free_extent(trans, root, bytenr,
2483                                              blocksize,
2484                                              btrfs_header_owner(path->nodes[1]),
2485                                              root_gen, 0, 0, 1);
2486                                 if (wret)
2487                                         ret = wret;
2488                         } else {
2489                                 btrfs_mark_buffer_dirty(leaf);
2490                                 free_extent_buffer(leaf);
2491                         }
2492                 } else {
2493                         btrfs_mark_buffer_dirty(leaf);
2494                 }
2495         }
2496         return ret;
2497 }
2498
2499 /*
2500  * walk up the tree as far as required to find the previous leaf.
2501  * returns 0 if it found something or 1 if there are no lesser leaves.
2502  * returns < 0 on io errors.
2503  */
2504 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2505 {
2506         u64 bytenr;
2507         int slot;
2508         int level = 1;
2509         struct extent_buffer *c;
2510         struct extent_buffer *next = NULL;
2511
2512         while(level < BTRFS_MAX_LEVEL) {
2513                 if (!path->nodes[level])
2514                         return 1;
2515
2516                 slot = path->slots[level];
2517                 c = path->nodes[level];
2518                 if (slot == 0) {
2519                         level++;
2520                         if (level == BTRFS_MAX_LEVEL)
2521                                 return 1;
2522                         continue;
2523                 }
2524                 slot--;
2525
2526                 bytenr = btrfs_node_blockptr(c, slot);
2527                 if (next)
2528                         free_extent_buffer(next);
2529
2530                 next = read_tree_block(root, bytenr,
2531                                        btrfs_level_size(root, level - 1));
2532                 break;
2533         }
2534         path->slots[level] = slot;
2535         while(1) {
2536                 level--;
2537                 c = path->nodes[level];
2538                 free_extent_buffer(c);
2539                 slot = btrfs_header_nritems(next);
2540                 if (slot != 0)
2541                         slot--;
2542                 path->nodes[level] = next;
2543                 path->slots[level] = slot;
2544                 if (!level)
2545                         break;
2546                 next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2547                                        btrfs_level_size(root, level - 1));
2548         }
2549         return 0;
2550 }
2551
2552 /*
2553  * walk up the tree as far as required to find the next leaf.
2554  * returns 0 if it found something or 1 if there are no greater leaves.
2555  * returns < 0 on io errors.
2556  */
2557 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2558 {
2559         int slot;
2560         int level = 1;
2561         u64 bytenr;
2562         struct extent_buffer *c;
2563         struct extent_buffer *next = NULL;
2564
2565         while(level < BTRFS_MAX_LEVEL) {
2566                 if (!path->nodes[level])
2567                         return 1;
2568
2569                 slot = path->slots[level] + 1;
2570                 c = path->nodes[level];
2571                 if (slot >= btrfs_header_nritems(c)) {
2572                         level++;
2573                         if (level == BTRFS_MAX_LEVEL)
2574                                 return 1;
2575                         continue;
2576                 }
2577
2578                 bytenr = btrfs_node_blockptr(c, slot);
2579                 if (next)
2580                         free_extent_buffer(next);
2581
2582                 if (path->reada)
2583                         reada_for_search(root, path, level, slot, 0);
2584
2585                 next = read_tree_block(root, bytenr,
2586                                        btrfs_level_size(root, level -1));
2587                 break;
2588         }
2589         path->slots[level] = slot;
2590         while(1) {
2591                 level--;
2592                 c = path->nodes[level];
2593                 free_extent_buffer(c);
2594                 path->nodes[level] = next;
2595                 path->slots[level] = 0;
2596                 if (!level)
2597                         break;
2598                 if (path->reada)
2599                         reada_for_search(root, path, level, 0, 0);
2600                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2601                                        btrfs_level_size(root, level - 1));
2602         }
2603         return 0;
2604 }