During deletes and truncate, remove many items at once from the tree
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include "ctree.h"
19 #include "disk-io.h"
20 #include "transaction.h"
21 #include "print-tree.h"
22
23 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
24                       *root, struct btrfs_path *path, int level);
25 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_key *ins_key,
27                       struct btrfs_path *path, int data_size, int extend);
28 static int push_node_left(struct btrfs_trans_handle *trans,
29                           struct btrfs_root *root, struct extent_buffer *dst,
30                           struct extent_buffer *src);
31 static int balance_node_right(struct btrfs_trans_handle *trans,
32                               struct btrfs_root *root,
33                               struct extent_buffer *dst_buf,
34                               struct extent_buffer *src_buf);
35 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
36                    struct btrfs_path *path, int level, int slot);
37
38 inline void btrfs_init_path(struct btrfs_path *p)
39 {
40         memset(p, 0, sizeof(*p));
41 }
42
43 struct btrfs_path *btrfs_alloc_path(void)
44 {
45         struct btrfs_path *path;
46         path = kmalloc(sizeof(struct btrfs_path), GFP_NOFS);
47         if (path) {
48                 btrfs_init_path(path);
49                 path->reada = 0;
50         }
51         return path;
52 }
53
54 void btrfs_free_path(struct btrfs_path *p)
55 {
56         btrfs_release_path(NULL, p);
57         kfree(p);
58 }
59
60 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
61 {
62         int i;
63         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
64                 if (!p->nodes[i])
65                         break;
66                 free_extent_buffer(p->nodes[i]);
67         }
68         memset(p, 0, sizeof(*p));
69 }
70
71 int btrfs_copy_root(struct btrfs_trans_handle *trans,
72                       struct btrfs_root *root,
73                       struct extent_buffer *buf,
74                       struct extent_buffer **cow_ret, u64 new_root_objectid)
75 {
76         struct extent_buffer *cow;
77         u32 nritems;
78         int ret = 0;
79         int level;
80         struct btrfs_key first_key;
81         struct btrfs_root *new_root;
82
83         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
84         if (!new_root)
85                 return -ENOMEM;
86
87         memcpy(new_root, root, sizeof(*new_root));
88         new_root->root_key.objectid = new_root_objectid;
89
90         WARN_ON(root->ref_cows && trans->transid !=
91                 root->fs_info->running_transaction->transid);
92         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
93
94         level = btrfs_header_level(buf);
95         nritems = btrfs_header_nritems(buf);
96         if (nritems) {
97                 if (level == 0)
98                         btrfs_item_key_to_cpu(buf, &first_key, 0);
99                 else
100                         btrfs_node_key_to_cpu(buf, &first_key, 0);
101         } else {
102                 first_key.objectid = 0;
103         }
104         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
105                                        new_root_objectid,
106                                        trans->transid, first_key.objectid,
107                                        level, buf->start, 0);
108         if (IS_ERR(cow)) {
109                 kfree(new_root);
110                 return PTR_ERR(cow);
111         }
112
113         copy_extent_buffer(cow, buf, 0, 0, cow->len);
114         btrfs_set_header_bytenr(cow, cow->start);
115         btrfs_set_header_generation(cow, trans->transid);
116         btrfs_set_header_owner(cow, new_root_objectid);
117
118         WARN_ON(btrfs_header_generation(buf) > trans->transid);
119         ret = btrfs_inc_ref(trans, new_root, buf);
120         kfree(new_root);
121
122         if (ret)
123                 return ret;
124
125         btrfs_mark_buffer_dirty(cow);
126         *cow_ret = cow;
127         return 0;
128 }
129
130 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
131                              struct btrfs_root *root,
132                              struct extent_buffer *buf,
133                              struct extent_buffer *parent, int parent_slot,
134                              struct extent_buffer **cow_ret,
135                              u64 search_start, u64 empty_size)
136 {
137         u64 root_gen;
138         struct extent_buffer *cow;
139         u32 nritems;
140         int ret = 0;
141         int different_trans = 0;
142         int level;
143         struct btrfs_key first_key;
144
145         if (root->ref_cows) {
146                 root_gen = trans->transid;
147         } else {
148                 root_gen = 0;
149         }
150
151         WARN_ON(root->ref_cows && trans->transid !=
152                 root->fs_info->running_transaction->transid);
153         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
154
155         level = btrfs_header_level(buf);
156         nritems = btrfs_header_nritems(buf);
157         if (nritems) {
158                 if (level == 0)
159                         btrfs_item_key_to_cpu(buf, &first_key, 0);
160                 else
161                         btrfs_node_key_to_cpu(buf, &first_key, 0);
162         } else {
163                 first_key.objectid = 0;
164         }
165         cow = __btrfs_alloc_free_block(trans, root, buf->len,
166                                      root->root_key.objectid,
167                                      root_gen, first_key.objectid, level,
168                                      search_start, empty_size);
169         if (IS_ERR(cow))
170                 return PTR_ERR(cow);
171
172         copy_extent_buffer(cow, buf, 0, 0, cow->len);
173         btrfs_set_header_bytenr(cow, cow->start);
174         btrfs_set_header_generation(cow, trans->transid);
175         btrfs_set_header_owner(cow, root->root_key.objectid);
176
177         WARN_ON(btrfs_header_generation(buf) > trans->transid);
178         if (btrfs_header_generation(buf) != trans->transid) {
179                 different_trans = 1;
180                 ret = btrfs_inc_ref(trans, root, buf);
181                 if (ret)
182                         return ret;
183         } else {
184                 clean_tree_block(trans, root, buf);
185         }
186
187         if (buf == root->node) {
188                 root_gen = btrfs_header_generation(buf);
189                 root->node = cow;
190                 extent_buffer_get(cow);
191                 if (buf != root->commit_root) {
192                         btrfs_free_extent(trans, root, buf->start,
193                                           buf->len, root->root_key.objectid,
194                                           root_gen, 0, 0, 1);
195                 }
196                 free_extent_buffer(buf);
197         } else {
198                 root_gen = btrfs_header_generation(parent);
199                 btrfs_set_node_blockptr(parent, parent_slot,
200                                         cow->start);
201                 WARN_ON(trans->transid == 0);
202                 btrfs_set_node_ptr_generation(parent, parent_slot,
203                                               trans->transid);
204                 btrfs_mark_buffer_dirty(parent);
205                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
206                 btrfs_free_extent(trans, root, buf->start, buf->len,
207                                   btrfs_header_owner(parent), root_gen,
208                                   0, 0, 1);
209         }
210         free_extent_buffer(buf);
211         btrfs_mark_buffer_dirty(cow);
212         *cow_ret = cow;
213         return 0;
214 }
215
216 int btrfs_cow_block(struct btrfs_trans_handle *trans,
217                     struct btrfs_root *root, struct extent_buffer *buf,
218                     struct extent_buffer *parent, int parent_slot,
219                     struct extent_buffer **cow_ret)
220 {
221         u64 search_start;
222         int ret;
223         /*
224         if (trans->transaction != root->fs_info->running_transaction) {
225                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
226                        root->fs_info->running_transaction->transid);
227                 WARN_ON(1);
228         }
229         */
230         if (trans->transid != root->fs_info->generation) {
231                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
232                        root->fs_info->generation);
233                 WARN_ON(1);
234         }
235         if (btrfs_header_generation(buf) == trans->transid) {
236                 *cow_ret = buf;
237                 return 0;
238         }
239
240         search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
241         ret = __btrfs_cow_block(trans, root, buf, parent,
242                                  parent_slot, cow_ret, search_start, 0);
243         return ret;
244 }
245
246 /*
247 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
248 {
249         if (blocknr < other && other - (blocknr + blocksize) < 32768)
250                 return 1;
251         if (blocknr > other && blocknr - (other + blocksize) < 32768)
252                 return 1;
253         return 0;
254 }
255 */
256
257 /*
258  * compare two keys in a memcmp fashion
259  */
260 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
261 {
262         struct btrfs_key k1;
263
264         btrfs_disk_key_to_cpu(&k1, disk);
265
266         if (k1.objectid > k2->objectid)
267                 return 1;
268         if (k1.objectid < k2->objectid)
269                 return -1;
270         if (k1.type > k2->type)
271                 return 1;
272         if (k1.type < k2->type)
273                 return -1;
274         if (k1.offset > k2->offset)
275                 return 1;
276         if (k1.offset < k2->offset)
277                 return -1;
278         return 0;
279 }
280
281
282 #if 0
283 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
284                        struct btrfs_root *root, struct extent_buffer *parent,
285                        int start_slot, int cache_only, u64 *last_ret,
286                        struct btrfs_key *progress)
287 {
288         struct extent_buffer *cur;
289         struct extent_buffer *tmp;
290         u64 blocknr;
291         u64 search_start = *last_ret;
292         u64 last_block = 0;
293         u64 other;
294         u32 parent_nritems;
295         int end_slot;
296         int i;
297         int err = 0;
298         int parent_level;
299         int uptodate;
300         u32 blocksize;
301         int progress_passed = 0;
302         struct btrfs_disk_key disk_key;
303
304         parent_level = btrfs_header_level(parent);
305         if (cache_only && parent_level != 1)
306                 return 0;
307
308         if (trans->transaction != root->fs_info->running_transaction) {
309                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
310                        root->fs_info->running_transaction->transid);
311                 WARN_ON(1);
312         }
313         if (trans->transid != root->fs_info->generation) {
314                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
315                        root->fs_info->generation);
316                 WARN_ON(1);
317         }
318
319         parent_nritems = btrfs_header_nritems(parent);
320         blocksize = btrfs_level_size(root, parent_level - 1);
321         end_slot = parent_nritems;
322
323         if (parent_nritems == 1)
324                 return 0;
325
326         for (i = start_slot; i < end_slot; i++) {
327                 int close = 1;
328
329                 if (!parent->map_token) {
330                         map_extent_buffer(parent,
331                                         btrfs_node_key_ptr_offset(i),
332                                         sizeof(struct btrfs_key_ptr),
333                                         &parent->map_token, &parent->kaddr,
334                                         &parent->map_start, &parent->map_len,
335                                         KM_USER1);
336                 }
337                 btrfs_node_key(parent, &disk_key, i);
338                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
339                         continue;
340
341                 progress_passed = 1;
342                 blocknr = btrfs_node_blockptr(parent, i);
343                 if (last_block == 0)
344                         last_block = blocknr;
345
346                 if (i > 0) {
347                         other = btrfs_node_blockptr(parent, i - 1);
348                         close = close_blocks(blocknr, other, blocksize);
349                 }
350                 if (close && i < end_slot - 2) {
351                         other = btrfs_node_blockptr(parent, i + 1);
352                         close = close_blocks(blocknr, other, blocksize);
353                 }
354                 if (close) {
355                         last_block = blocknr;
356                         continue;
357                 }
358                 if (parent->map_token) {
359                         unmap_extent_buffer(parent, parent->map_token,
360                                             KM_USER1);
361                         parent->map_token = NULL;
362                 }
363
364                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
365                 if (cur)
366                         uptodate = btrfs_buffer_uptodate(cur);
367                 else
368                         uptodate = 0;
369                 if (!cur || !uptodate) {
370                         if (cache_only) {
371                                 free_extent_buffer(cur);
372                                 continue;
373                         }
374                         if (!cur) {
375                                 cur = read_tree_block(root, blocknr,
376                                                          blocksize);
377                         } else if (!uptodate) {
378                                 btrfs_read_buffer(cur);
379                         }
380                 }
381                 if (search_start == 0)
382                         search_start = last_block;
383
384                 err = __btrfs_cow_block(trans, root, cur, parent, i,
385                                         &tmp, search_start,
386                                         min(16 * blocksize,
387                                             (end_slot - i) * blocksize));
388                 if (err) {
389                         free_extent_buffer(cur);
390                         break;
391                 }
392                 search_start = tmp->start;
393                 last_block = tmp->start;
394                 *last_ret = search_start;
395                 if (parent_level == 1)
396                         btrfs_clear_buffer_defrag(tmp);
397                 free_extent_buffer(tmp);
398         }
399         if (parent->map_token) {
400                 unmap_extent_buffer(parent, parent->map_token,
401                                     KM_USER1);
402                 parent->map_token = NULL;
403         }
404         return err;
405 }
406 #endif
407
408 /*
409  * The leaf data grows from end-to-front in the node.
410  * this returns the address of the start of the last item,
411  * which is the stop of the leaf data stack
412  */
413 static inline unsigned int leaf_data_end(struct btrfs_root *root,
414                                          struct extent_buffer *leaf)
415 {
416         u32 nr = btrfs_header_nritems(leaf);
417         if (nr == 0)
418                 return BTRFS_LEAF_DATA_SIZE(root);
419         return btrfs_item_offset_nr(leaf, nr - 1);
420 }
421
422 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
423                       int level)
424 {
425         struct extent_buffer *parent = NULL;
426         struct extent_buffer *node = path->nodes[level];
427         struct btrfs_disk_key parent_key;
428         struct btrfs_disk_key node_key;
429         int parent_slot;
430         int slot;
431         struct btrfs_key cpukey;
432         u32 nritems = btrfs_header_nritems(node);
433
434         if (path->nodes[level + 1])
435                 parent = path->nodes[level + 1];
436
437         slot = path->slots[level];
438         BUG_ON(nritems == 0);
439         if (parent) {
440                 parent_slot = path->slots[level + 1];
441                 btrfs_node_key(parent, &parent_key, parent_slot);
442                 btrfs_node_key(node, &node_key, 0);
443                 BUG_ON(memcmp(&parent_key, &node_key,
444                               sizeof(struct btrfs_disk_key)));
445                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
446                        btrfs_header_bytenr(node));
447         }
448         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
449         if (slot != 0) {
450                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
451                 btrfs_node_key(node, &node_key, slot);
452                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0);
453         }
454         if (slot < nritems - 1) {
455                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
456                 btrfs_node_key(node, &node_key, slot);
457                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0);
458         }
459         return 0;
460 }
461
462 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
463                       int level)
464 {
465         struct extent_buffer *leaf = path->nodes[level];
466         struct extent_buffer *parent = NULL;
467         int parent_slot;
468         struct btrfs_key cpukey;
469         struct btrfs_disk_key parent_key;
470         struct btrfs_disk_key leaf_key;
471         int slot = path->slots[0];
472
473         u32 nritems = btrfs_header_nritems(leaf);
474
475         if (path->nodes[level + 1])
476                 parent = path->nodes[level + 1];
477
478         if (nritems == 0)
479                 return 0;
480
481         if (parent) {
482                 parent_slot = path->slots[level + 1];
483                 btrfs_node_key(parent, &parent_key, parent_slot);
484                 btrfs_item_key(leaf, &leaf_key, 0);
485
486                 BUG_ON(memcmp(&parent_key, &leaf_key,
487                        sizeof(struct btrfs_disk_key)));
488                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
489                        btrfs_header_bytenr(leaf));
490         }
491 #if 0
492         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
493                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
494                 btrfs_item_key(leaf, &leaf_key, i);
495                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
496                         btrfs_print_leaf(root, leaf);
497                         printk("slot %d offset bad key\n", i);
498                         BUG_ON(1);
499                 }
500                 if (btrfs_item_offset_nr(leaf, i) !=
501                         btrfs_item_end_nr(leaf, i + 1)) {
502                         btrfs_print_leaf(root, leaf);
503                         printk("slot %d offset bad\n", i);
504                         BUG_ON(1);
505                 }
506                 if (i == 0) {
507                         if (btrfs_item_offset_nr(leaf, i) +
508                                btrfs_item_size_nr(leaf, i) !=
509                                BTRFS_LEAF_DATA_SIZE(root)) {
510                                 btrfs_print_leaf(root, leaf);
511                                 printk("slot %d first offset bad\n", i);
512                                 BUG_ON(1);
513                         }
514                 }
515         }
516         if (nritems > 0) {
517                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
518                                 btrfs_print_leaf(root, leaf);
519                                 printk("slot %d bad size \n", nritems - 1);
520                                 BUG_ON(1);
521                 }
522         }
523 #endif
524         if (slot != 0 && slot < nritems - 1) {
525                 btrfs_item_key(leaf, &leaf_key, slot);
526                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
527                 if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) {
528                         btrfs_print_leaf(root, leaf);
529                         printk("slot %d offset bad key\n", slot);
530                         BUG_ON(1);
531                 }
532                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
533                        btrfs_item_end_nr(leaf, slot)) {
534                         btrfs_print_leaf(root, leaf);
535                         printk("slot %d offset bad\n", slot);
536                         BUG_ON(1);
537                 }
538         }
539         if (slot < nritems - 1) {
540                 btrfs_item_key(leaf, &leaf_key, slot);
541                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
542                 BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0);
543                 if (btrfs_item_offset_nr(leaf, slot) !=
544                         btrfs_item_end_nr(leaf, slot + 1)) {
545                         btrfs_print_leaf(root, leaf);
546                         printk("slot %d offset bad\n", slot);
547                         BUG_ON(1);
548                 }
549         }
550         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
551                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
552         return 0;
553 }
554
555 static int noinline check_block(struct btrfs_root *root,
556                                 struct btrfs_path *path, int level)
557 {
558         return 0;
559 #if 0
560         struct extent_buffer *buf = path->nodes[level];
561
562         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
563                                  (unsigned long)btrfs_header_fsid(buf),
564                                  BTRFS_FSID_SIZE)) {
565                 printk("warning bad block %Lu\n", buf->start);
566                 return 1;
567         }
568 #endif
569         if (level == 0)
570                 return check_leaf(root, path, level);
571         return check_node(root, path, level);
572 }
573
574 /*
575  * search for key in the extent_buffer.  The items start at offset p,
576  * and they are item_size apart.  There are 'max' items in p.
577  *
578  * the slot in the array is returned via slot, and it points to
579  * the place where you would insert key if it is not found in
580  * the array.
581  *
582  * slot may point to max if the key is bigger than all of the keys
583  */
584 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
585                               int item_size, struct btrfs_key *key,
586                               int max, int *slot)
587 {
588         int low = 0;
589         int high = max;
590         int mid;
591         int ret;
592         unsigned long offset;
593         struct btrfs_disk_key *tmp;
594
595         while(low < high) {
596                 mid = (low + high) / 2;
597                 offset = p + mid * item_size;
598
599                 tmp = (struct btrfs_disk_key *)(eb->data + offset);
600                 ret = btrfs_comp_keys(tmp, key);
601
602                 if (ret < 0)
603                         low = mid + 1;
604                 else if (ret > 0)
605                         high = mid;
606                 else {
607                         *slot = mid;
608                         return 0;
609                 }
610         }
611         *slot = low;
612         return 1;
613 }
614
615 /*
616  * simple bin_search frontend that does the right thing for
617  * leaves vs nodes
618  */
619 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
620                       int level, int *slot)
621 {
622         if (level == 0) {
623                 return generic_bin_search(eb,
624                                           offsetof(struct btrfs_leaf, items),
625                                           sizeof(struct btrfs_item),
626                                           key, btrfs_header_nritems(eb),
627                                           slot);
628         } else {
629                 return generic_bin_search(eb,
630                                           offsetof(struct btrfs_node, ptrs),
631                                           sizeof(struct btrfs_key_ptr),
632                                           key, btrfs_header_nritems(eb),
633                                           slot);
634         }
635         return -1;
636 }
637
638 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
639                                    struct extent_buffer *parent, int slot)
640 {
641         if (slot < 0)
642                 return NULL;
643         if (slot >= btrfs_header_nritems(parent))
644                 return NULL;
645         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
646                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
647 }
648
649 static int balance_level(struct btrfs_trans_handle *trans,
650                          struct btrfs_root *root,
651                          struct btrfs_path *path, int level)
652 {
653         struct extent_buffer *right = NULL;
654         struct extent_buffer *mid;
655         struct extent_buffer *left = NULL;
656         struct extent_buffer *parent = NULL;
657         int ret = 0;
658         int wret;
659         int pslot;
660         int orig_slot = path->slots[level];
661         int err_on_enospc = 0;
662         u64 orig_ptr;
663
664         if (level == 0)
665                 return 0;
666
667         mid = path->nodes[level];
668         WARN_ON(btrfs_header_generation(mid) != trans->transid);
669
670         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
671
672         if (level < BTRFS_MAX_LEVEL - 1)
673                 parent = path->nodes[level + 1];
674         pslot = path->slots[level + 1];
675
676         /*
677          * deal with the case where there is only one pointer in the root
678          * by promoting the node below to a root
679          */
680         if (!parent) {
681                 struct extent_buffer *child;
682
683                 if (btrfs_header_nritems(mid) != 1)
684                         return 0;
685
686                 /* promote the child to a root */
687                 child = read_node_slot(root, mid, 0);
688                 BUG_ON(!child);
689                 root->node = child;
690                 path->nodes[level] = NULL;
691                 clean_tree_block(trans, root, mid);
692                 wait_on_tree_block_writeback(root, mid);
693                 /* once for the path */
694                 free_extent_buffer(mid);
695                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
696                                         root->root_key.objectid,
697                                         btrfs_header_generation(mid), 0, 0, 1);
698                 /* once for the root ptr */
699                 free_extent_buffer(mid);
700                 return ret;
701         }
702         if (btrfs_header_nritems(mid) >
703             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
704                 return 0;
705
706         if (btrfs_header_nritems(mid) < 2)
707                 err_on_enospc = 1;
708
709         left = read_node_slot(root, parent, pslot - 1);
710         if (left) {
711                 wret = btrfs_cow_block(trans, root, left,
712                                        parent, pslot - 1, &left);
713                 if (wret) {
714                         ret = wret;
715                         goto enospc;
716                 }
717         }
718         right = read_node_slot(root, parent, pslot + 1);
719         if (right) {
720                 wret = btrfs_cow_block(trans, root, right,
721                                        parent, pslot + 1, &right);
722                 if (wret) {
723                         ret = wret;
724                         goto enospc;
725                 }
726         }
727
728         /* first, try to make some room in the middle buffer */
729         if (left) {
730                 orig_slot += btrfs_header_nritems(left);
731                 wret = push_node_left(trans, root, left, mid);
732                 if (wret < 0)
733                         ret = wret;
734                 if (btrfs_header_nritems(mid) < 2)
735                         err_on_enospc = 1;
736         }
737
738         /*
739          * then try to empty the right most buffer into the middle
740          */
741         if (right) {
742                 wret = push_node_left(trans, root, mid, right);
743                 if (wret < 0 && wret != -ENOSPC)
744                         ret = wret;
745                 if (btrfs_header_nritems(right) == 0) {
746                         u64 bytenr = right->start;
747                         u64 generation = btrfs_header_generation(parent);
748                         u32 blocksize = right->len;
749
750                         clean_tree_block(trans, root, right);
751                         wait_on_tree_block_writeback(root, right);
752                         free_extent_buffer(right);
753                         right = NULL;
754                         wret = del_ptr(trans, root, path, level + 1, pslot +
755                                        1);
756                         if (wret)
757                                 ret = wret;
758                         wret = btrfs_free_extent(trans, root, bytenr,
759                                                  blocksize,
760                                                  btrfs_header_owner(parent),
761                                                  generation, 0, 0, 1);
762                         if (wret)
763                                 ret = wret;
764                 } else {
765                         struct btrfs_disk_key right_key;
766                         btrfs_node_key(right, &right_key, 0);
767                         btrfs_set_node_key(parent, &right_key, pslot + 1);
768                         btrfs_mark_buffer_dirty(parent);
769                 }
770         }
771         if (btrfs_header_nritems(mid) == 1) {
772                 /*
773                  * we're not allowed to leave a node with one item in the
774                  * tree during a delete.  A deletion from lower in the tree
775                  * could try to delete the only pointer in this node.
776                  * So, pull some keys from the left.
777                  * There has to be a left pointer at this point because
778                  * otherwise we would have pulled some pointers from the
779                  * right
780                  */
781                 BUG_ON(!left);
782                 wret = balance_node_right(trans, root, mid, left);
783                 if (wret < 0) {
784                         ret = wret;
785                         goto enospc;
786                 }
787                 BUG_ON(wret == 1);
788         }
789         if (btrfs_header_nritems(mid) == 0) {
790                 /* we've managed to empty the middle node, drop it */
791                 u64 root_gen = btrfs_header_generation(parent);
792                 u64 bytenr = mid->start;
793                 u32 blocksize = mid->len;
794                 clean_tree_block(trans, root, mid);
795                 wait_on_tree_block_writeback(root, mid);
796                 free_extent_buffer(mid);
797                 mid = NULL;
798                 wret = del_ptr(trans, root, path, level + 1, pslot);
799                 if (wret)
800                         ret = wret;
801                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
802                                          btrfs_header_owner(parent),
803                                          root_gen, 0, 0, 1);
804                 if (wret)
805                         ret = wret;
806         } else {
807                 /* update the parent key to reflect our changes */
808                 struct btrfs_disk_key mid_key;
809                 btrfs_node_key(mid, &mid_key, 0);
810                 btrfs_set_node_key(parent, &mid_key, pslot);
811                 btrfs_mark_buffer_dirty(parent);
812         }
813
814         /* update the path */
815         if (left) {
816                 if (btrfs_header_nritems(left) > orig_slot) {
817                         extent_buffer_get(left);
818                         path->nodes[level] = left;
819                         path->slots[level + 1] -= 1;
820                         path->slots[level] = orig_slot;
821                         if (mid)
822                                 free_extent_buffer(mid);
823                 } else {
824                         orig_slot -= btrfs_header_nritems(left);
825                         path->slots[level] = orig_slot;
826                 }
827         }
828         /* double check we haven't messed things up */
829         check_block(root, path, level);
830         if (orig_ptr !=
831             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
832                 BUG();
833 enospc:
834         if (right)
835                 free_extent_buffer(right);
836         if (left)
837                 free_extent_buffer(left);
838         return ret;
839 }
840
841 /* returns zero if the push worked, non-zero otherwise */
842 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
843                                           struct btrfs_root *root,
844                                           struct btrfs_path *path, int level)
845 {
846         struct extent_buffer *right = NULL;
847         struct extent_buffer *mid;
848         struct extent_buffer *left = NULL;
849         struct extent_buffer *parent = NULL;
850         int ret = 0;
851         int wret;
852         int pslot;
853         int orig_slot = path->slots[level];
854         u64 orig_ptr;
855
856         if (level == 0)
857                 return 1;
858
859         mid = path->nodes[level];
860         WARN_ON(btrfs_header_generation(mid) != trans->transid);
861         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
862
863         if (level < BTRFS_MAX_LEVEL - 1)
864                 parent = path->nodes[level + 1];
865         pslot = path->slots[level + 1];
866
867         if (!parent)
868                 return 1;
869
870         left = read_node_slot(root, parent, pslot - 1);
871
872         /* first, try to make some room in the middle buffer */
873         if (left) {
874                 u32 left_nr;
875                 left_nr = btrfs_header_nritems(left);
876                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
877                         wret = 1;
878                 } else {
879                         ret = btrfs_cow_block(trans, root, left, parent,
880                                               pslot - 1, &left);
881                         if (ret)
882                                 wret = 1;
883                         else {
884                                 wret = push_node_left(trans, root,
885                                                       left, mid);
886                         }
887                 }
888                 if (wret < 0)
889                         ret = wret;
890                 if (wret == 0) {
891                         struct btrfs_disk_key disk_key;
892                         orig_slot += left_nr;
893                         btrfs_node_key(mid, &disk_key, 0);
894                         btrfs_set_node_key(parent, &disk_key, pslot);
895                         btrfs_mark_buffer_dirty(parent);
896                         if (btrfs_header_nritems(left) > orig_slot) {
897                                 path->nodes[level] = left;
898                                 path->slots[level + 1] -= 1;
899                                 path->slots[level] = orig_slot;
900                                 free_extent_buffer(mid);
901                         } else {
902                                 orig_slot -=
903                                         btrfs_header_nritems(left);
904                                 path->slots[level] = orig_slot;
905                                 free_extent_buffer(left);
906                         }
907                         return 0;
908                 }
909                 free_extent_buffer(left);
910         }
911         right= read_node_slot(root, parent, pslot + 1);
912
913         /*
914          * then try to empty the right most buffer into the middle
915          */
916         if (right) {
917                 u32 right_nr;
918                 right_nr = btrfs_header_nritems(right);
919                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
920                         wret = 1;
921                 } else {
922                         ret = btrfs_cow_block(trans, root, right,
923                                               parent, pslot + 1,
924                                               &right);
925                         if (ret)
926                                 wret = 1;
927                         else {
928                                 wret = balance_node_right(trans, root,
929                                                           right, mid);
930                         }
931                 }
932                 if (wret < 0)
933                         ret = wret;
934                 if (wret == 0) {
935                         struct btrfs_disk_key disk_key;
936
937                         btrfs_node_key(right, &disk_key, 0);
938                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
939                         btrfs_mark_buffer_dirty(parent);
940
941                         if (btrfs_header_nritems(mid) <= orig_slot) {
942                                 path->nodes[level] = right;
943                                 path->slots[level + 1] += 1;
944                                 path->slots[level] = orig_slot -
945                                         btrfs_header_nritems(mid);
946                                 free_extent_buffer(mid);
947                         } else {
948                                 free_extent_buffer(right);
949                         }
950                         return 0;
951                 }
952                 free_extent_buffer(right);
953         }
954         return 1;
955 }
956
957 /*
958  * readahead one full node of leaves
959  */
960 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
961                              int level, int slot, u64 objectid)
962 {
963         struct extent_buffer *node;
964         struct btrfs_disk_key disk_key;
965         u32 nritems;
966         u64 search;
967         u64 lowest_read;
968         u64 highest_read;
969         u64 nread = 0;
970         int direction = path->reada;
971         struct extent_buffer *eb;
972         u32 nr;
973         u32 blocksize;
974         u32 nscan = 0;
975
976         if (level != 1)
977                 return;
978
979         if (!path->nodes[level])
980                 return;
981
982         node = path->nodes[level];
983         search = btrfs_node_blockptr(node, slot);
984         blocksize = btrfs_level_size(root, level - 1);
985         eb = btrfs_find_tree_block(root, search, blocksize);
986         if (eb) {
987                 free_extent_buffer(eb);
988                 return;
989         }
990
991         highest_read = search;
992         lowest_read = search;
993
994         nritems = btrfs_header_nritems(node);
995         nr = slot;
996         while(1) {
997                 if (direction < 0) {
998                         if (nr == 0)
999                                 break;
1000                         nr--;
1001                 } else if (direction > 0) {
1002                         nr++;
1003                         if (nr >= nritems)
1004                                 break;
1005                 }
1006                 if (path->reada < 0 && objectid) {
1007                         btrfs_node_key(node, &disk_key, nr);
1008                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1009                                 break;
1010                 }
1011                 search = btrfs_node_blockptr(node, nr);
1012                 if ((search >= lowest_read && search <= highest_read) ||
1013                     (search < lowest_read && lowest_read - search <= 32768) ||
1014                     (search > highest_read && search - highest_read <= 32768)) {
1015                         readahead_tree_block(root, search, blocksize);
1016                         nread += blocksize;
1017                 }
1018                 nscan++;
1019                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1020                         break;
1021                 if(nread > (1024 * 1024) || nscan > 128)
1022                         break;
1023
1024                 if (search < lowest_read)
1025                         lowest_read = search;
1026                 if (search > highest_read)
1027                         highest_read = search;
1028         }
1029 }
1030
1031 /*
1032  * look for key in the tree.  path is filled in with nodes along the way
1033  * if key is found, we return zero and you can find the item in the leaf
1034  * level of the path (level 0)
1035  *
1036  * If the key isn't found, the path points to the slot where it should
1037  * be inserted, and 1 is returned.  If there are other errors during the
1038  * search a negative error number is returned.
1039  *
1040  * if ins_len > 0, nodes and leaves will be split as we walk down the
1041  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1042  * possible)
1043  */
1044 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1045                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1046                       ins_len, int cow)
1047 {
1048         struct extent_buffer *b;
1049         u64 bytenr;
1050         u64 ptr_gen;
1051         int slot;
1052         int ret;
1053         int level;
1054         int should_reada = p->reada;
1055         u8 lowest_level = 0;
1056
1057         lowest_level = p->lowest_level;
1058         WARN_ON(lowest_level && ins_len);
1059         WARN_ON(p->nodes[0] != NULL);
1060         /*
1061         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1062         */
1063 again:
1064         b = root->node;
1065         extent_buffer_get(b);
1066         while (b) {
1067                 level = btrfs_header_level(b);
1068                 if (cow) {
1069                         int wret;
1070                         wret = btrfs_cow_block(trans, root, b,
1071                                                p->nodes[level + 1],
1072                                                p->slots[level + 1],
1073                                                &b);
1074                         if (wret) {
1075                                 free_extent_buffer(b);
1076                                 return wret;
1077                         }
1078                 }
1079                 BUG_ON(!cow && ins_len);
1080                 if (level != btrfs_header_level(b))
1081                         WARN_ON(1);
1082                 level = btrfs_header_level(b);
1083                 p->nodes[level] = b;
1084                 ret = check_block(root, p, level);
1085                 if (ret)
1086                         return -1;
1087                 ret = bin_search(b, key, level, &slot);
1088                 if (level != 0) {
1089                         if (ret && slot > 0)
1090                                 slot -= 1;
1091                         p->slots[level] = slot;
1092                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1093                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1094                                 int sret = split_node(trans, root, p, level);
1095                                 BUG_ON(sret > 0);
1096                                 if (sret)
1097                                         return sret;
1098                                 b = p->nodes[level];
1099                                 slot = p->slots[level];
1100                         } else if (ins_len < 0) {
1101                                 int sret = balance_level(trans, root, p,
1102                                                          level);
1103                                 if (sret)
1104                                         return sret;
1105                                 b = p->nodes[level];
1106                                 if (!b) {
1107                                         btrfs_release_path(NULL, p);
1108                                         goto again;
1109                                 }
1110                                 slot = p->slots[level];
1111                                 BUG_ON(btrfs_header_nritems(b) == 1);
1112                         }
1113                         /* this is only true while dropping a snapshot */
1114                         if (level == lowest_level)
1115                                 break;
1116                         bytenr = btrfs_node_blockptr(b, slot);
1117                         ptr_gen = btrfs_node_ptr_generation(b, slot);
1118                         if (should_reada)
1119                                 reada_for_search(root, p, level, slot,
1120                                                  key->objectid);
1121                         b = read_tree_block(root, bytenr,
1122                                             btrfs_level_size(root, level - 1));
1123                         if (ptr_gen != btrfs_header_generation(b)) {
1124                                 printk("block %llu bad gen wanted %llu "
1125                                        "found %llu\n",
1126                                 (unsigned long long)b->start,
1127                                 (unsigned long long)ptr_gen,
1128                                 (unsigned long long)btrfs_header_generation(b));
1129                         }
1130                 } else {
1131                         p->slots[level] = slot;
1132                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1133                             sizeof(struct btrfs_item) + ins_len) {
1134                                 int sret = split_leaf(trans, root, key,
1135                                                       p, ins_len, ret == 0);
1136                                 BUG_ON(sret > 0);
1137                                 if (sret)
1138                                         return sret;
1139                         }
1140                         return ret;
1141                 }
1142         }
1143         return 1;
1144 }
1145
1146 /*
1147  * adjust the pointers going up the tree, starting at level
1148  * making sure the right key of each node is points to 'key'.
1149  * This is used after shifting pointers to the left, so it stops
1150  * fixing up pointers when a given leaf/node is not in slot 0 of the
1151  * higher levels
1152  *
1153  * If this fails to write a tree block, it returns -1, but continues
1154  * fixing up the blocks in ram so the tree is consistent.
1155  */
1156 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1157                           struct btrfs_root *root, struct btrfs_path *path,
1158                           struct btrfs_disk_key *key, int level)
1159 {
1160         int i;
1161         int ret = 0;
1162         struct extent_buffer *t;
1163
1164         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1165                 int tslot = path->slots[i];
1166                 if (!path->nodes[i])
1167                         break;
1168                 t = path->nodes[i];
1169                 btrfs_set_node_key(t, key, tslot);
1170                 btrfs_mark_buffer_dirty(path->nodes[i]);
1171                 if (tslot != 0)
1172                         break;
1173         }
1174         return ret;
1175 }
1176
1177 /*
1178  * try to push data from one node into the next node left in the
1179  * tree.
1180  *
1181  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1182  * error, and > 0 if there was no room in the left hand block.
1183  */
1184 static int push_node_left(struct btrfs_trans_handle *trans,
1185                           struct btrfs_root *root, struct extent_buffer *dst,
1186                           struct extent_buffer *src)
1187 {
1188         int push_items = 0;
1189         int src_nritems;
1190         int dst_nritems;
1191         int ret = 0;
1192
1193         src_nritems = btrfs_header_nritems(src);
1194         dst_nritems = btrfs_header_nritems(dst);
1195         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1196         WARN_ON(btrfs_header_generation(src) != trans->transid);
1197         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1198
1199         if (push_items <= 0) {
1200                 return 1;
1201         }
1202
1203         if (src_nritems < push_items)
1204                 push_items = src_nritems;
1205
1206         copy_extent_buffer(dst, src,
1207                            btrfs_node_key_ptr_offset(dst_nritems),
1208                            btrfs_node_key_ptr_offset(0),
1209                            push_items * sizeof(struct btrfs_key_ptr));
1210
1211         if (push_items < src_nritems) {
1212                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1213                                       btrfs_node_key_ptr_offset(push_items),
1214                                       (src_nritems - push_items) *
1215                                       sizeof(struct btrfs_key_ptr));
1216         }
1217         btrfs_set_header_nritems(src, src_nritems - push_items);
1218         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1219         btrfs_mark_buffer_dirty(src);
1220         btrfs_mark_buffer_dirty(dst);
1221         return ret;
1222 }
1223
1224 /*
1225  * try to push data from one node into the next node right in the
1226  * tree.
1227  *
1228  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1229  * error, and > 0 if there was no room in the right hand block.
1230  *
1231  * this will  only push up to 1/2 the contents of the left node over
1232  */
1233 static int balance_node_right(struct btrfs_trans_handle *trans,
1234                               struct btrfs_root *root,
1235                               struct extent_buffer *dst,
1236                               struct extent_buffer *src)
1237 {
1238         int push_items = 0;
1239         int max_push;
1240         int src_nritems;
1241         int dst_nritems;
1242         int ret = 0;
1243
1244         WARN_ON(btrfs_header_generation(src) != trans->transid);
1245         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1246
1247         src_nritems = btrfs_header_nritems(src);
1248         dst_nritems = btrfs_header_nritems(dst);
1249         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1250         if (push_items <= 0)
1251                 return 1;
1252
1253         max_push = src_nritems / 2 + 1;
1254         /* don't try to empty the node */
1255         if (max_push >= src_nritems)
1256                 return 1;
1257
1258         if (max_push < push_items)
1259                 push_items = max_push;
1260
1261         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1262                                       btrfs_node_key_ptr_offset(0),
1263                                       (dst_nritems) *
1264                                       sizeof(struct btrfs_key_ptr));
1265
1266         copy_extent_buffer(dst, src,
1267                            btrfs_node_key_ptr_offset(0),
1268                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1269                            push_items * sizeof(struct btrfs_key_ptr));
1270
1271         btrfs_set_header_nritems(src, src_nritems - push_items);
1272         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1273
1274         btrfs_mark_buffer_dirty(src);
1275         btrfs_mark_buffer_dirty(dst);
1276         return ret;
1277 }
1278
1279 /*
1280  * helper function to insert a new root level in the tree.
1281  * A new node is allocated, and a single item is inserted to
1282  * point to the existing root
1283  *
1284  * returns zero on success or < 0 on failure.
1285  */
1286 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1287                            struct btrfs_root *root,
1288                            struct btrfs_path *path, int level)
1289 {
1290         u64 root_gen;
1291         u64 lower_gen;
1292         struct extent_buffer *lower;
1293         struct extent_buffer *c;
1294         struct btrfs_disk_key lower_key;
1295
1296         BUG_ON(path->nodes[level]);
1297         BUG_ON(path->nodes[level-1] != root->node);
1298
1299         if (root->ref_cows)
1300                 root_gen = trans->transid;
1301         else
1302                 root_gen = 0;
1303
1304         lower = path->nodes[level-1];
1305         if (level == 1)
1306                 btrfs_item_key(lower, &lower_key, 0);
1307         else
1308                 btrfs_node_key(lower, &lower_key, 0);
1309
1310         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1311                                    root->root_key.objectid,
1312                                    root_gen, lower_key.objectid, level,
1313                                    root->node->start, 0);
1314         if (IS_ERR(c))
1315                 return PTR_ERR(c);
1316         memset_extent_buffer(c, 0, 0, root->nodesize);
1317         btrfs_set_header_nritems(c, 1);
1318         btrfs_set_header_level(c, level);
1319         btrfs_set_header_bytenr(c, c->start);
1320         btrfs_set_header_generation(c, trans->transid);
1321         btrfs_set_header_owner(c, root->root_key.objectid);
1322
1323         write_extent_buffer(c, root->fs_info->fsid,
1324                             (unsigned long)btrfs_header_fsid(c),
1325                             BTRFS_FSID_SIZE);
1326         btrfs_set_node_key(c, &lower_key, 0);
1327         btrfs_set_node_blockptr(c, 0, lower->start);
1328         lower_gen = btrfs_header_generation(lower);
1329         WARN_ON(lower_gen == 0);
1330
1331         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1332
1333         btrfs_mark_buffer_dirty(c);
1334
1335         /* the super has an extra ref to root->node */
1336         free_extent_buffer(root->node);
1337         root->node = c;
1338         extent_buffer_get(c);
1339         path->nodes[level] = c;
1340         path->slots[level] = 0;
1341
1342         if (root->ref_cows && lower_gen != trans->transid) {
1343                 struct btrfs_path *back_path = btrfs_alloc_path();
1344                 int ret;
1345                 ret = btrfs_insert_extent_backref(trans,
1346                                                   root->fs_info->extent_root,
1347                                                   path, lower->start,
1348                                                   root->root_key.objectid,
1349                                                   trans->transid, 0, 0);
1350                 BUG_ON(ret);
1351                 btrfs_free_path(back_path);
1352         }
1353         return 0;
1354 }
1355
1356 /*
1357  * worker function to insert a single pointer in a node.
1358  * the node should have enough room for the pointer already
1359  *
1360  * slot and level indicate where you want the key to go, and
1361  * blocknr is the block the key points to.
1362  *
1363  * returns zero on success and < 0 on any error
1364  */
1365 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1366                       *root, struct btrfs_path *path, struct btrfs_disk_key
1367                       *key, u64 bytenr, int slot, int level)
1368 {
1369         struct extent_buffer *lower;
1370         int nritems;
1371
1372         BUG_ON(!path->nodes[level]);
1373         lower = path->nodes[level];
1374         nritems = btrfs_header_nritems(lower);
1375         if (slot > nritems)
1376                 BUG();
1377         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1378                 BUG();
1379         if (slot != nritems) {
1380                 memmove_extent_buffer(lower,
1381                               btrfs_node_key_ptr_offset(slot + 1),
1382                               btrfs_node_key_ptr_offset(slot),
1383                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1384         }
1385         btrfs_set_node_key(lower, key, slot);
1386         btrfs_set_node_blockptr(lower, slot, bytenr);
1387         WARN_ON(trans->transid == 0);
1388         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1389         btrfs_set_header_nritems(lower, nritems + 1);
1390         btrfs_mark_buffer_dirty(lower);
1391         return 0;
1392 }
1393
1394 /*
1395  * split the node at the specified level in path in two.
1396  * The path is corrected to point to the appropriate node after the split
1397  *
1398  * Before splitting this tries to make some room in the node by pushing
1399  * left and right, if either one works, it returns right away.
1400  *
1401  * returns 0 on success and < 0 on failure
1402  */
1403 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1404                       *root, struct btrfs_path *path, int level)
1405 {
1406         u64 root_gen;
1407         struct extent_buffer *c;
1408         struct extent_buffer *split;
1409         struct btrfs_disk_key disk_key;
1410         int mid;
1411         int ret;
1412         int wret;
1413         u32 c_nritems;
1414
1415         c = path->nodes[level];
1416         WARN_ON(btrfs_header_generation(c) != trans->transid);
1417         if (c == root->node) {
1418                 /* trying to split the root, lets make a new one */
1419                 ret = insert_new_root(trans, root, path, level + 1);
1420                 if (ret)
1421                         return ret;
1422         } else {
1423                 ret = push_nodes_for_insert(trans, root, path, level);
1424                 c = path->nodes[level];
1425                 if (!ret && btrfs_header_nritems(c) <
1426                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1427                         return 0;
1428                 if (ret < 0)
1429                         return ret;
1430         }
1431
1432         c_nritems = btrfs_header_nritems(c);
1433         if (root->ref_cows)
1434                 root_gen = trans->transid;
1435         else
1436                 root_gen = 0;
1437
1438         btrfs_node_key(c, &disk_key, 0);
1439         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1440                                          root->root_key.objectid,
1441                                          root_gen,
1442                                          btrfs_disk_key_objectid(&disk_key),
1443                                          level, c->start, 0);
1444         if (IS_ERR(split))
1445                 return PTR_ERR(split);
1446
1447         btrfs_set_header_flags(split, btrfs_header_flags(c));
1448         btrfs_set_header_level(split, btrfs_header_level(c));
1449         btrfs_set_header_bytenr(split, split->start);
1450         btrfs_set_header_generation(split, trans->transid);
1451         btrfs_set_header_owner(split, root->root_key.objectid);
1452         write_extent_buffer(split, root->fs_info->fsid,
1453                             (unsigned long)btrfs_header_fsid(split),
1454                             BTRFS_FSID_SIZE);
1455
1456         mid = (c_nritems + 1) / 2;
1457
1458         copy_extent_buffer(split, c,
1459                            btrfs_node_key_ptr_offset(0),
1460                            btrfs_node_key_ptr_offset(mid),
1461                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1462         btrfs_set_header_nritems(split, c_nritems - mid);
1463         btrfs_set_header_nritems(c, mid);
1464         ret = 0;
1465
1466         btrfs_mark_buffer_dirty(c);
1467         btrfs_mark_buffer_dirty(split);
1468
1469         btrfs_node_key(split, &disk_key, 0);
1470         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1471                           path->slots[level + 1] + 1,
1472                           level + 1);
1473         if (wret)
1474                 ret = wret;
1475
1476         if (path->slots[level] >= mid) {
1477                 path->slots[level] -= mid;
1478                 free_extent_buffer(c);
1479                 path->nodes[level] = split;
1480                 path->slots[level + 1] += 1;
1481         } else {
1482                 free_extent_buffer(split);
1483         }
1484         return ret;
1485 }
1486
1487 /*
1488  * how many bytes are required to store the items in a leaf.  start
1489  * and nr indicate which items in the leaf to check.  This totals up the
1490  * space used both by the item structs and the item data
1491  */
1492 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1493 {
1494         int data_len;
1495         int nritems = btrfs_header_nritems(l);
1496         int end = min(nritems, start + nr) - 1;
1497
1498         if (!nr)
1499                 return 0;
1500         data_len = btrfs_item_end_nr(l, start);
1501         data_len = data_len - btrfs_item_offset_nr(l, end);
1502         data_len += sizeof(struct btrfs_item) * nr;
1503         WARN_ON(data_len < 0);
1504         return data_len;
1505 }
1506
1507 /*
1508  * The space between the end of the leaf items and
1509  * the start of the leaf data.  IOW, how much room
1510  * the leaf has left for both items and data
1511  */
1512 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1513 {
1514         int nritems = btrfs_header_nritems(leaf);
1515         int ret;
1516         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1517         if (ret < 0) {
1518                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1519                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1520                        leaf_space_used(leaf, 0, nritems), nritems);
1521         }
1522         return ret;
1523 }
1524
1525 /*
1526  * push some data in the path leaf to the right, trying to free up at
1527  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1528  *
1529  * returns 1 if the push failed because the other node didn't have enough
1530  * room, 0 if everything worked out and < 0 if there were major errors.
1531  */
1532 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1533                            *root, struct btrfs_path *path, int data_size,
1534                            int empty)
1535 {
1536         struct extent_buffer *left = path->nodes[0];
1537         struct extent_buffer *right;
1538         struct extent_buffer *upper;
1539         struct btrfs_disk_key disk_key;
1540         int slot;
1541         u32 i;
1542         int free_space;
1543         int push_space = 0;
1544         int push_items = 0;
1545         struct btrfs_item *item;
1546         u32 left_nritems;
1547         u32 nr;
1548         u32 right_nritems;
1549         u32 data_end;
1550         u32 this_item_size;
1551         int ret;
1552
1553         slot = path->slots[1];
1554         if (!path->nodes[1]) {
1555                 return 1;
1556         }
1557         upper = path->nodes[1];
1558         if (slot >= btrfs_header_nritems(upper) - 1)
1559                 return 1;
1560
1561         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1562                                 root->leafsize);
1563         free_space = btrfs_leaf_free_space(root, right);
1564         if (free_space < data_size + sizeof(struct btrfs_item)) {
1565                 free_extent_buffer(right);
1566                 return 1;
1567         }
1568
1569         /* cow and double check */
1570         ret = btrfs_cow_block(trans, root, right, upper,
1571                               slot + 1, &right);
1572         if (ret) {
1573                 free_extent_buffer(right);
1574                 return 1;
1575         }
1576         free_space = btrfs_leaf_free_space(root, right);
1577         if (free_space < data_size + sizeof(struct btrfs_item)) {
1578                 free_extent_buffer(right);
1579                 return 1;
1580         }
1581
1582         left_nritems = btrfs_header_nritems(left);
1583         if (left_nritems == 0) {
1584                 free_extent_buffer(right);
1585                 return 1;
1586         }
1587
1588         if (empty)
1589                 nr = 0;
1590         else
1591                 nr = 1;
1592
1593         i = left_nritems - 1;
1594         while (i >= nr) {
1595                 item = btrfs_item_nr(left, i);
1596
1597                 if (path->slots[0] == i)
1598                         push_space += data_size + sizeof(*item);
1599
1600                 this_item_size = btrfs_item_size(left, item);
1601                 if (this_item_size + sizeof(*item) + push_space > free_space)
1602                         break;
1603                 push_items++;
1604                 push_space += this_item_size + sizeof(*item);
1605                 if (i == 0)
1606                         break;
1607                 i--;
1608         }
1609
1610         if (push_items == 0) {
1611                 free_extent_buffer(right);
1612                 return 1;
1613         }
1614
1615         if (!empty && push_items == left_nritems)
1616                 WARN_ON(1);
1617
1618         /* push left to right */
1619         right_nritems = btrfs_header_nritems(right);
1620
1621         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1622         push_space -= leaf_data_end(root, left);
1623
1624         /* make room in the right data area */
1625         data_end = leaf_data_end(root, right);
1626         memmove_extent_buffer(right,
1627                               btrfs_leaf_data(right) + data_end - push_space,
1628                               btrfs_leaf_data(right) + data_end,
1629                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1630
1631         /* copy from the left data area */
1632         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1633                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1634                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1635                      push_space);
1636
1637         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1638                               btrfs_item_nr_offset(0),
1639                               right_nritems * sizeof(struct btrfs_item));
1640
1641         /* copy the items from left to right */
1642         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1643                    btrfs_item_nr_offset(left_nritems - push_items),
1644                    push_items * sizeof(struct btrfs_item));
1645
1646         /* update the item pointers */
1647         right_nritems += push_items;
1648         btrfs_set_header_nritems(right, right_nritems);
1649         push_space = BTRFS_LEAF_DATA_SIZE(root);
1650         for (i = 0; i < right_nritems; i++) {
1651                 item = btrfs_item_nr(right, i);
1652                 push_space -= btrfs_item_size(right, item);
1653                 btrfs_set_item_offset(right, item, push_space);
1654         }
1655
1656         left_nritems -= push_items;
1657         btrfs_set_header_nritems(left, left_nritems);
1658
1659         if (left_nritems)
1660                 btrfs_mark_buffer_dirty(left);
1661         btrfs_mark_buffer_dirty(right);
1662
1663         btrfs_item_key(right, &disk_key, 0);
1664         btrfs_set_node_key(upper, &disk_key, slot + 1);
1665         btrfs_mark_buffer_dirty(upper);
1666
1667         /* then fixup the leaf pointer in the path */
1668         if (path->slots[0] >= left_nritems) {
1669                 path->slots[0] -= left_nritems;
1670                 free_extent_buffer(path->nodes[0]);
1671                 path->nodes[0] = right;
1672                 path->slots[1] += 1;
1673         } else {
1674                 free_extent_buffer(right);
1675         }
1676         return 0;
1677 }
1678 /*
1679  * push some data in the path leaf to the left, trying to free up at
1680  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1681  */
1682 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1683                           *root, struct btrfs_path *path, int data_size,
1684                           int empty)
1685 {
1686         struct btrfs_disk_key disk_key;
1687         struct extent_buffer *right = path->nodes[0];
1688         struct extent_buffer *left;
1689         int slot;
1690         int i;
1691         int free_space;
1692         int push_space = 0;
1693         int push_items = 0;
1694         struct btrfs_item *item;
1695         u32 old_left_nritems;
1696         u32 right_nritems;
1697         u32 nr;
1698         int ret = 0;
1699         int wret;
1700         u32 this_item_size;
1701         u32 old_left_item_size;
1702
1703         slot = path->slots[1];
1704         if (slot == 0)
1705                 return 1;
1706         if (!path->nodes[1])
1707                 return 1;
1708
1709         right_nritems = btrfs_header_nritems(right);
1710         if (right_nritems == 0) {
1711                 return 1;
1712         }
1713
1714         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1715                                slot - 1), root->leafsize);
1716         free_space = btrfs_leaf_free_space(root, left);
1717         if (free_space < data_size + sizeof(struct btrfs_item)) {
1718                 free_extent_buffer(left);
1719                 return 1;
1720         }
1721
1722         /* cow and double check */
1723         ret = btrfs_cow_block(trans, root, left,
1724                               path->nodes[1], slot - 1, &left);
1725         if (ret) {
1726                 /* we hit -ENOSPC, but it isn't fatal here */
1727                 free_extent_buffer(left);
1728                 return 1;
1729         }
1730
1731         free_space = btrfs_leaf_free_space(root, left);
1732         if (free_space < data_size + sizeof(struct btrfs_item)) {
1733                 free_extent_buffer(left);
1734                 return 1;
1735         }
1736
1737         if (empty)
1738                 nr = right_nritems;
1739         else
1740                 nr = right_nritems - 1;
1741
1742         for (i = 0; i < nr; i++) {
1743                 item = btrfs_item_nr(right, i);
1744
1745                 if (path->slots[0] == i)
1746                         push_space += data_size + sizeof(*item);
1747
1748                 this_item_size = btrfs_item_size(right, item);
1749                 if (this_item_size + sizeof(*item) + push_space > free_space)
1750                         break;
1751
1752                 push_items++;
1753                 push_space += this_item_size + sizeof(*item);
1754         }
1755
1756         if (push_items == 0) {
1757                 free_extent_buffer(left);
1758                 return 1;
1759         }
1760         if (!empty && push_items == btrfs_header_nritems(right))
1761                 WARN_ON(1);
1762
1763         /* push data from right to left */
1764         copy_extent_buffer(left, right,
1765                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1766                            btrfs_item_nr_offset(0),
1767                            push_items * sizeof(struct btrfs_item));
1768
1769         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1770                      btrfs_item_offset_nr(right, push_items -1);
1771
1772         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1773                      leaf_data_end(root, left) - push_space,
1774                      btrfs_leaf_data(right) +
1775                      btrfs_item_offset_nr(right, push_items - 1),
1776                      push_space);
1777         old_left_nritems = btrfs_header_nritems(left);
1778         BUG_ON(old_left_nritems < 0);
1779
1780         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1781         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1782                 u32 ioff;
1783
1784                 item = btrfs_item_nr(left, i);
1785                 ioff = btrfs_item_offset(left, item);
1786                 btrfs_set_item_offset(left, item,
1787                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1788         }
1789         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1790
1791         /* fixup right node */
1792         if (push_items > right_nritems) {
1793                 printk("push items %d nr %u\n", push_items, right_nritems);
1794                 WARN_ON(1);
1795         }
1796
1797         if (push_items < right_nritems) {
1798                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1799                                                   leaf_data_end(root, right);
1800                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1801                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1802                                       btrfs_leaf_data(right) +
1803                                       leaf_data_end(root, right), push_space);
1804
1805                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1806                               btrfs_item_nr_offset(push_items),
1807                              (btrfs_header_nritems(right) - push_items) *
1808                              sizeof(struct btrfs_item));
1809         }
1810         right_nritems -= push_items;
1811         btrfs_set_header_nritems(right, right_nritems);
1812         push_space = BTRFS_LEAF_DATA_SIZE(root);
1813         for (i = 0; i < right_nritems; i++) {
1814                 item = btrfs_item_nr(right, i);
1815                 push_space = push_space - btrfs_item_size(right, item);
1816                 btrfs_set_item_offset(right, item, push_space);
1817         }
1818
1819         btrfs_mark_buffer_dirty(left);
1820         if (right_nritems)
1821                 btrfs_mark_buffer_dirty(right);
1822
1823         btrfs_item_key(right, &disk_key, 0);
1824         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1825         if (wret)
1826                 ret = wret;
1827
1828         /* then fixup the leaf pointer in the path */
1829         if (path->slots[0] < push_items) {
1830                 path->slots[0] += old_left_nritems;
1831                 free_extent_buffer(path->nodes[0]);
1832                 path->nodes[0] = left;
1833                 path->slots[1] -= 1;
1834         } else {
1835                 free_extent_buffer(left);
1836                 path->slots[0] -= push_items;
1837         }
1838         BUG_ON(path->slots[0] < 0);
1839         return ret;
1840 }
1841
1842 /*
1843  * split the path's leaf in two, making sure there is at least data_size
1844  * available for the resulting leaf level of the path.
1845  *
1846  * returns 0 if all went well and < 0 on failure.
1847  */
1848 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1849                       *root, struct btrfs_key *ins_key,
1850                       struct btrfs_path *path, int data_size, int extend)
1851 {
1852         u64 root_gen;
1853         struct extent_buffer *l;
1854         u32 nritems;
1855         int mid;
1856         int slot;
1857         struct extent_buffer *right;
1858         int space_needed = data_size + sizeof(struct btrfs_item);
1859         int data_copy_size;
1860         int rt_data_off;
1861         int i;
1862         int ret = 0;
1863         int wret;
1864         int double_split;
1865         int num_doubles = 0;
1866         struct btrfs_disk_key disk_key;
1867
1868         if (extend)
1869                 space_needed = data_size;
1870
1871         if (root->ref_cows)
1872                 root_gen = trans->transid;
1873         else
1874                 root_gen = 0;
1875
1876         /* first try to make some room by pushing left and right */
1877         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1878                 wret = push_leaf_right(trans, root, path, data_size, 0);
1879                 if (wret < 0) {
1880                         return wret;
1881                 }
1882                 if (wret) {
1883                         wret = push_leaf_left(trans, root, path, data_size, 0);
1884                         if (wret < 0)
1885                                 return wret;
1886                 }
1887                 l = path->nodes[0];
1888
1889                 /* did the pushes work? */
1890                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1891                         return 0;
1892         }
1893
1894         if (!path->nodes[1]) {
1895                 ret = insert_new_root(trans, root, path, 1);
1896                 if (ret)
1897                         return ret;
1898         }
1899 again:
1900         double_split = 0;
1901         l = path->nodes[0];
1902         slot = path->slots[0];
1903         nritems = btrfs_header_nritems(l);
1904         mid = (nritems + 1)/ 2;
1905
1906         btrfs_item_key(l, &disk_key, 0);
1907
1908         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
1909                                          root->root_key.objectid,
1910                                          root_gen, disk_key.objectid, 0,
1911                                          l->start, 0);
1912         if (IS_ERR(right))
1913                 return PTR_ERR(right);
1914
1915         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1916         btrfs_set_header_bytenr(right, right->start);
1917         btrfs_set_header_generation(right, trans->transid);
1918         btrfs_set_header_owner(right, root->root_key.objectid);
1919         btrfs_set_header_level(right, 0);
1920         write_extent_buffer(right, root->fs_info->fsid,
1921                             (unsigned long)btrfs_header_fsid(right),
1922                             BTRFS_FSID_SIZE);
1923         if (mid <= slot) {
1924                 if (nritems == 1 ||
1925                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1926                         BTRFS_LEAF_DATA_SIZE(root)) {
1927                         if (slot >= nritems) {
1928                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1929                                 btrfs_set_header_nritems(right, 0);
1930                                 wret = insert_ptr(trans, root, path,
1931                                                   &disk_key, right->start,
1932                                                   path->slots[1] + 1, 1);
1933                                 if (wret)
1934                                         ret = wret;
1935                                 free_extent_buffer(path->nodes[0]);
1936                                 path->nodes[0] = right;
1937                                 path->slots[0] = 0;
1938                                 path->slots[1] += 1;
1939                                 return ret;
1940                         }
1941                         mid = slot;
1942                         if (mid != nritems &&
1943                             leaf_space_used(l, mid, nritems - mid) +
1944                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1945                                 double_split = 1;
1946                         }
1947                 }
1948         } else {
1949                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1950                         BTRFS_LEAF_DATA_SIZE(root)) {
1951                         if (!extend && slot == 0) {
1952                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1953                                 btrfs_set_header_nritems(right, 0);
1954                                 wret = insert_ptr(trans, root, path,
1955                                                   &disk_key,
1956                                                   right->start,
1957                                                   path->slots[1], 1);
1958                                 if (wret)
1959                                         ret = wret;
1960                                 free_extent_buffer(path->nodes[0]);
1961                                 path->nodes[0] = right;
1962                                 path->slots[0] = 0;
1963                                 if (path->slots[1] == 0) {
1964                                         wret = fixup_low_keys(trans, root,
1965                                                    path, &disk_key, 1);
1966                                         if (wret)
1967                                                 ret = wret;
1968                                 }
1969                                 return ret;
1970                         } else if (extend && slot == 0) {
1971                                 mid = 1;
1972                         } else {
1973                                 mid = slot;
1974                                 if (mid != nritems &&
1975                                     leaf_space_used(l, mid, nritems - mid) +
1976                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1977                                         double_split = 1;
1978                                 }
1979                         }
1980                 }
1981         }
1982         nritems = nritems - mid;
1983         btrfs_set_header_nritems(right, nritems);
1984         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1985
1986         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1987                            btrfs_item_nr_offset(mid),
1988                            nritems * sizeof(struct btrfs_item));
1989
1990         copy_extent_buffer(right, l,
1991                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1992                      data_copy_size, btrfs_leaf_data(l) +
1993                      leaf_data_end(root, l), data_copy_size);
1994
1995         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1996                       btrfs_item_end_nr(l, mid);
1997
1998         for (i = 0; i < nritems; i++) {
1999                 struct btrfs_item *item = btrfs_item_nr(right, i);
2000                 u32 ioff = btrfs_item_offset(right, item);
2001                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2002         }
2003
2004         btrfs_set_header_nritems(l, mid);
2005         ret = 0;
2006         btrfs_item_key(right, &disk_key, 0);
2007         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2008                           path->slots[1] + 1, 1);
2009         if (wret)
2010                 ret = wret;
2011
2012         btrfs_mark_buffer_dirty(right);
2013         btrfs_mark_buffer_dirty(l);
2014         BUG_ON(path->slots[0] != slot);
2015
2016         if (mid <= slot) {
2017                 free_extent_buffer(path->nodes[0]);
2018                 path->nodes[0] = right;
2019                 path->slots[0] -= mid;
2020                 path->slots[1] += 1;
2021         } else
2022                 free_extent_buffer(right);
2023
2024         BUG_ON(path->slots[0] < 0);
2025
2026         if (double_split) {
2027                 BUG_ON(num_doubles != 0);
2028                 num_doubles++;
2029                 goto again;
2030         }
2031         return ret;
2032 }
2033
2034 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2035                         struct btrfs_root *root,
2036                         struct btrfs_path *path,
2037                         u32 new_size, int from_end)
2038 {
2039         int ret = 0;
2040         int slot;
2041         int slot_orig;
2042         struct extent_buffer *leaf;
2043         struct btrfs_item *item;
2044         u32 nritems;
2045         unsigned int data_end;
2046         unsigned int old_data_start;
2047         unsigned int old_size;
2048         unsigned int size_diff;
2049         int i;
2050
2051         slot_orig = path->slots[0];
2052         leaf = path->nodes[0];
2053         slot = path->slots[0];
2054
2055         old_size = btrfs_item_size_nr(leaf, slot);
2056         if (old_size == new_size)
2057                 return 0;
2058
2059         nritems = btrfs_header_nritems(leaf);
2060         data_end = leaf_data_end(root, leaf);
2061
2062         old_data_start = btrfs_item_offset_nr(leaf, slot);
2063
2064         size_diff = old_size - new_size;
2065
2066         BUG_ON(slot < 0);
2067         BUG_ON(slot >= nritems);
2068
2069         /*
2070          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2071          */
2072         /* first correct the data pointers */
2073         for (i = slot; i < nritems; i++) {
2074                 u32 ioff;
2075                 item = btrfs_item_nr(leaf, i);
2076                 ioff = btrfs_item_offset(leaf, item);
2077                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2078         }
2079
2080         /* shift the data */
2081         if (from_end) {
2082                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2083                               data_end + size_diff, btrfs_leaf_data(leaf) +
2084                               data_end, old_data_start + new_size - data_end);
2085         } else {
2086                 struct btrfs_disk_key disk_key;
2087                 u64 offset;
2088
2089                 btrfs_item_key(leaf, &disk_key, slot);
2090
2091                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2092                         unsigned long ptr;
2093                         struct btrfs_file_extent_item *fi;
2094
2095                         fi = btrfs_item_ptr(leaf, slot,
2096                                             struct btrfs_file_extent_item);
2097                         fi = (struct btrfs_file_extent_item *)(
2098                              (unsigned long)fi - size_diff);
2099
2100                         if (btrfs_file_extent_type(leaf, fi) ==
2101                             BTRFS_FILE_EXTENT_INLINE) {
2102                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2103                                 memmove_extent_buffer(leaf, ptr,
2104                                         (unsigned long)fi,
2105                                         offsetof(struct btrfs_file_extent_item,
2106                                                  disk_bytenr));
2107                         }
2108                 }
2109
2110                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2111                               data_end + size_diff, btrfs_leaf_data(leaf) +
2112                               data_end, old_data_start - data_end);
2113
2114                 offset = btrfs_disk_key_offset(&disk_key);
2115                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2116                 btrfs_set_item_key(leaf, &disk_key, slot);
2117                 if (slot == 0)
2118                         fixup_low_keys(trans, root, path, &disk_key, 1);
2119         }
2120
2121         item = btrfs_item_nr(leaf, slot);
2122         btrfs_set_item_size(leaf, item, new_size);
2123         btrfs_mark_buffer_dirty(leaf);
2124
2125         ret = 0;
2126         if (btrfs_leaf_free_space(root, leaf) < 0) {
2127                 btrfs_print_leaf(root, leaf);
2128                 BUG();
2129         }
2130         return ret;
2131 }
2132
2133 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2134                       struct btrfs_root *root, struct btrfs_path *path,
2135                       u32 data_size)
2136 {
2137         int ret = 0;
2138         int slot;
2139         int slot_orig;
2140         struct extent_buffer *leaf;
2141         struct btrfs_item *item;
2142         u32 nritems;
2143         unsigned int data_end;
2144         unsigned int old_data;
2145         unsigned int old_size;
2146         int i;
2147
2148         slot_orig = path->slots[0];
2149         leaf = path->nodes[0];
2150
2151         nritems = btrfs_header_nritems(leaf);
2152         data_end = leaf_data_end(root, leaf);
2153
2154         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2155                 btrfs_print_leaf(root, leaf);
2156                 BUG();
2157         }
2158         slot = path->slots[0];
2159         old_data = btrfs_item_end_nr(leaf, slot);
2160
2161         BUG_ON(slot < 0);
2162         if (slot >= nritems) {
2163                 btrfs_print_leaf(root, leaf);
2164                 printk("slot %d too large, nritems %d\n", slot, nritems);
2165                 BUG_ON(1);
2166         }
2167
2168         /*
2169          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2170          */
2171         /* first correct the data pointers */
2172         for (i = slot; i < nritems; i++) {
2173                 u32 ioff;
2174                 item = btrfs_item_nr(leaf, i);
2175                 ioff = btrfs_item_offset(leaf, item);
2176                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2177         }
2178
2179         /* shift the data */
2180         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2181                       data_end - data_size, btrfs_leaf_data(leaf) +
2182                       data_end, old_data - data_end);
2183
2184         data_end = old_data;
2185         old_size = btrfs_item_size_nr(leaf, slot);
2186         item = btrfs_item_nr(leaf, slot);
2187         btrfs_set_item_size(leaf, item, old_size + data_size);
2188         btrfs_mark_buffer_dirty(leaf);
2189
2190         ret = 0;
2191         if (btrfs_leaf_free_space(root, leaf) < 0) {
2192                 btrfs_print_leaf(root, leaf);
2193                 BUG();
2194         }
2195         return ret;
2196 }
2197
2198 /*
2199  * Given a key and some data, insert an item into the tree.
2200  * This does all the path init required, making room in the tree if needed.
2201  */
2202 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2203                             struct btrfs_root *root,
2204                             struct btrfs_path *path,
2205                             struct btrfs_key *cpu_key, u32 data_size)
2206 {
2207         struct extent_buffer *leaf;
2208         struct btrfs_item *item;
2209         int ret = 0;
2210         int slot;
2211         int slot_orig;
2212         u32 nritems;
2213         unsigned int data_end;
2214         struct btrfs_disk_key disk_key;
2215
2216         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2217
2218         /* create a root if there isn't one */
2219         if (!root->node)
2220                 BUG();
2221
2222         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2223         if (ret == 0) {
2224                 return -EEXIST;
2225         }
2226         if (ret < 0)
2227                 goto out;
2228
2229         slot_orig = path->slots[0];
2230         leaf = path->nodes[0];
2231
2232         nritems = btrfs_header_nritems(leaf);
2233         data_end = leaf_data_end(root, leaf);
2234
2235         if (btrfs_leaf_free_space(root, leaf) <
2236             sizeof(struct btrfs_item) + data_size) {
2237                 btrfs_print_leaf(root, leaf);
2238                 printk("not enough freespace need %u have %d\n",
2239                        data_size, btrfs_leaf_free_space(root, leaf));
2240                 BUG();
2241         }
2242
2243         slot = path->slots[0];
2244         BUG_ON(slot < 0);
2245
2246         if (slot != nritems) {
2247                 int i;
2248                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2249
2250                 if (old_data < data_end) {
2251                         btrfs_print_leaf(root, leaf);
2252                         printk("slot %d old_data %d data_end %d\n",
2253                                slot, old_data, data_end);
2254                         BUG_ON(1);
2255                 }
2256                 /*
2257                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2258                  */
2259                 /* first correct the data pointers */
2260                 for (i = slot; i < nritems; i++) {
2261                         u32 ioff;
2262
2263                         item = btrfs_item_nr(leaf, i);
2264                         ioff = btrfs_item_offset(leaf, item);
2265                         btrfs_set_item_offset(leaf, item, ioff - data_size);
2266                 }
2267
2268                 /* shift the items */
2269                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2270                               btrfs_item_nr_offset(slot),
2271                               (nritems - slot) * sizeof(struct btrfs_item));
2272
2273                 /* shift the data */
2274                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2275                               data_end - data_size, btrfs_leaf_data(leaf) +
2276                               data_end, old_data - data_end);
2277                 data_end = old_data;
2278         }
2279
2280         /* setup the item for the new data */
2281         btrfs_set_item_key(leaf, &disk_key, slot);
2282         item = btrfs_item_nr(leaf, slot);
2283         btrfs_set_item_offset(leaf, item, data_end - data_size);
2284         btrfs_set_item_size(leaf, item, data_size);
2285         btrfs_set_header_nritems(leaf, nritems + 1);
2286         btrfs_mark_buffer_dirty(leaf);
2287
2288         ret = 0;
2289         if (slot == 0)
2290                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2291
2292         if (btrfs_leaf_free_space(root, leaf) < 0) {
2293                 btrfs_print_leaf(root, leaf);
2294                 BUG();
2295         }
2296 out:
2297         return ret;
2298 }
2299
2300 /*
2301  * Given a key and some data, insert an item into the tree.
2302  * This does all the path init required, making room in the tree if needed.
2303  */
2304 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2305                       *root, struct btrfs_key *cpu_key, void *data, u32
2306                       data_size)
2307 {
2308         int ret = 0;
2309         struct btrfs_path *path;
2310         struct extent_buffer *leaf;
2311         unsigned long ptr;
2312
2313         path = btrfs_alloc_path();
2314         BUG_ON(!path);
2315         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2316         if (!ret) {
2317                 leaf = path->nodes[0];
2318                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2319                 write_extent_buffer(leaf, data, ptr, data_size);
2320                 btrfs_mark_buffer_dirty(leaf);
2321         }
2322         btrfs_free_path(path);
2323         return ret;
2324 }
2325
2326 /*
2327  * delete the pointer from a given node.
2328  *
2329  * If the delete empties a node, the node is removed from the tree,
2330  * continuing all the way the root if required.  The root is converted into
2331  * a leaf if all the nodes are emptied.
2332  */
2333 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2334                    struct btrfs_path *path, int level, int slot)
2335 {
2336         struct extent_buffer *parent = path->nodes[level];
2337         u32 nritems;
2338         int ret = 0;
2339         int wret;
2340
2341         nritems = btrfs_header_nritems(parent);
2342         if (slot != nritems -1) {
2343                 memmove_extent_buffer(parent,
2344                               btrfs_node_key_ptr_offset(slot),
2345                               btrfs_node_key_ptr_offset(slot + 1),
2346                               sizeof(struct btrfs_key_ptr) *
2347                               (nritems - slot - 1));
2348         }
2349         nritems--;
2350         btrfs_set_header_nritems(parent, nritems);
2351         if (nritems == 0 && parent == root->node) {
2352                 BUG_ON(btrfs_header_level(root->node) != 1);
2353                 /* just turn the root into a leaf and break */
2354                 btrfs_set_header_level(root->node, 0);
2355         } else if (slot == 0) {
2356                 struct btrfs_disk_key disk_key;
2357
2358                 btrfs_node_key(parent, &disk_key, 0);
2359                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2360                 if (wret)
2361                         ret = wret;
2362         }
2363         btrfs_mark_buffer_dirty(parent);
2364         return ret;
2365 }
2366
2367 /*
2368  * delete the item at the leaf level in path.  If that empties
2369  * the leaf, remove it from the tree
2370  */
2371 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2372                     struct btrfs_path *path, int slot, int nr)
2373 {
2374         struct extent_buffer *leaf;
2375         struct btrfs_item *item;
2376         int last_off;
2377         int dsize = 0;
2378         int ret = 0;
2379         int wret;
2380         int i;
2381         u32 nritems;
2382
2383         leaf = path->nodes[0];
2384         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2385
2386         for (i = 0; i < nr; i++)
2387                 dsize += btrfs_item_size_nr(leaf, slot + i);
2388
2389         nritems = btrfs_header_nritems(leaf);
2390
2391         if (slot + nr != nritems) {
2392                 int i;
2393                 int data_end = leaf_data_end(root, leaf);
2394
2395                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2396                               data_end + dsize,
2397                               btrfs_leaf_data(leaf) + data_end,
2398                               last_off - data_end);
2399
2400                 for (i = slot + nr; i < nritems; i++) {
2401                         u32 ioff;
2402
2403                         item = btrfs_item_nr(leaf, i);
2404                         ioff = btrfs_item_offset(leaf, item);
2405                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2406                 }
2407
2408                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2409                               btrfs_item_nr_offset(slot + nr),
2410                               sizeof(struct btrfs_item) *
2411                               (nritems - slot - nr));
2412         }
2413         btrfs_set_header_nritems(leaf, nritems - nr);
2414         nritems -= nr;
2415
2416         /* delete the leaf if we've emptied it */
2417         if (nritems == 0) {
2418                 if (leaf == root->node) {
2419                         btrfs_set_header_level(leaf, 0);
2420                 } else {
2421                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2422                         clean_tree_block(trans, root, leaf);
2423                         wait_on_tree_block_writeback(root, leaf);
2424                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2425                         if (wret)
2426                                 ret = wret;
2427                         wret = btrfs_free_extent(trans, root,
2428                                          leaf->start, leaf->len,
2429                                          btrfs_header_owner(path->nodes[1]),
2430                                          root_gen, 0, 0, 1);
2431                         if (wret)
2432                                 ret = wret;
2433                 }
2434         } else {
2435                 int used = leaf_space_used(leaf, 0, nritems);
2436                 if (slot == 0) {
2437                         struct btrfs_disk_key disk_key;
2438
2439                         btrfs_item_key(leaf, &disk_key, 0);
2440                         wret = fixup_low_keys(trans, root, path,
2441                                               &disk_key, 1);
2442                         if (wret)
2443                                 ret = wret;
2444                 }
2445
2446                 /* delete the leaf if it is mostly empty */
2447                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2448                         /* push_leaf_left fixes the path.
2449                          * make sure the path still points to our leaf
2450                          * for possible call to del_ptr below
2451                          */
2452                         slot = path->slots[1];
2453                         extent_buffer_get(leaf);
2454
2455                         wret = push_leaf_left(trans, root, path, 1, 1);
2456                         if (wret < 0 && wret != -ENOSPC)
2457                                 ret = wret;
2458
2459                         if (path->nodes[0] == leaf &&
2460                             btrfs_header_nritems(leaf)) {
2461                                 wret = push_leaf_right(trans, root, path, 1, 1);
2462                                 if (wret < 0 && wret != -ENOSPC)
2463                                         ret = wret;
2464                         }
2465
2466                         if (btrfs_header_nritems(leaf) == 0) {
2467                                 u64 root_gen;
2468                                 u64 bytenr = leaf->start;
2469                                 u32 blocksize = leaf->len;
2470
2471                                 root_gen = btrfs_header_generation(
2472                                                            path->nodes[1]);
2473
2474                                 clean_tree_block(trans, root, leaf);
2475                                 wait_on_tree_block_writeback(root, leaf);
2476
2477                                 wret = del_ptr(trans, root, path, 1, slot);
2478                                 if (wret)
2479                                         ret = wret;
2480
2481                                 free_extent_buffer(leaf);
2482                                 wret = btrfs_free_extent(trans, root, bytenr,
2483                                              blocksize,
2484                                              btrfs_header_owner(path->nodes[1]),
2485                                              root_gen, 0, 0, 1);
2486                                 if (wret)
2487                                         ret = wret;
2488                         } else {
2489                                 btrfs_mark_buffer_dirty(leaf);
2490                                 free_extent_buffer(leaf);
2491                         }
2492                 } else {
2493                         btrfs_mark_buffer_dirty(leaf);
2494                 }
2495         }
2496         return ret;
2497 }
2498
2499 /*
2500  * walk up the tree as far as required to find the previous leaf.
2501  * returns 0 if it found something or 1 if there are no lesser leaves.
2502  * returns < 0 on io errors.
2503  */
2504 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2505 {
2506         u64 bytenr;
2507         int slot;
2508         int level = 1;
2509         struct extent_buffer *c;
2510         struct extent_buffer *next = NULL;
2511
2512         while(level < BTRFS_MAX_LEVEL) {
2513                 if (!path->nodes[level])
2514                         return 1;
2515
2516                 slot = path->slots[level];
2517                 c = path->nodes[level];
2518                 if (slot == 0) {
2519                         level++;
2520                         if (level == BTRFS_MAX_LEVEL)
2521                                 return 1;
2522                         continue;
2523                 }
2524                 slot--;
2525
2526                 bytenr = btrfs_node_blockptr(c, slot);
2527                 if (next)
2528                         free_extent_buffer(next);
2529
2530                 next = read_tree_block(root, bytenr,
2531                                        btrfs_level_size(root, level - 1));
2532                 break;
2533         }
2534         path->slots[level] = slot;
2535         while(1) {
2536                 level--;
2537                 c = path->nodes[level];
2538                 free_extent_buffer(c);
2539                 slot = btrfs_header_nritems(next);
2540                 if (slot != 0)
2541                         slot--;
2542                 path->nodes[level] = next;
2543                 path->slots[level] = slot;
2544                 if (!level)
2545                         break;
2546                 next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2547                                        btrfs_level_size(root, level - 1));
2548         }
2549         return 0;
2550 }
2551
2552 /*
2553  * walk up the tree as far as required to find the next leaf.
2554  * returns 0 if it found something or 1 if there are no greater leaves.
2555  * returns < 0 on io errors.
2556  */
2557 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2558 {
2559         int slot;
2560         int level = 1;
2561         u64 bytenr;
2562         struct extent_buffer *c;
2563         struct extent_buffer *next = NULL;
2564
2565         while(level < BTRFS_MAX_LEVEL) {
2566                 if (!path->nodes[level])
2567                         return 1;
2568
2569                 slot = path->slots[level] + 1;
2570                 c = path->nodes[level];
2571                 if (slot >= btrfs_header_nritems(c)) {
2572                         level++;
2573                         if (level == BTRFS_MAX_LEVEL)
2574                                 return 1;
2575                         continue;
2576                 }
2577
2578                 bytenr = btrfs_node_blockptr(c, slot);
2579                 if (next)
2580                         free_extent_buffer(next);
2581
2582                 if (path->reada)
2583                         reada_for_search(root, path, level, slot, 0);
2584
2585                 next = read_tree_block(root, bytenr,
2586                                        btrfs_level_size(root, level -1));
2587                 break;
2588         }
2589         path->slots[level] = slot;
2590         while(1) {
2591                 level--;
2592                 c = path->nodes[level];
2593                 free_extent_buffer(c);
2594                 path->nodes[level] = next;
2595                 path->slots[level] = 0;
2596                 if (!level)
2597                         break;
2598                 if (path->reada)
2599                         reada_for_search(root, path, level, 0, 0);
2600                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2601                                        btrfs_level_size(root, level - 1));
2602         }
2603         return 0;
2604 }