Call btrfs_cow_block while lowering tree level.
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include "ctree.h"
19 #include "disk-io.h"
20 #include "transaction.h"
21 #include "print-tree.h"
22
23 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
24                       *root, struct btrfs_path *path, int level);
25 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_key *ins_key,
27                       struct btrfs_path *path, int data_size, int extend);
28 static int push_node_left(struct btrfs_trans_handle *trans,
29                           struct btrfs_root *root, struct extent_buffer *dst,
30                           struct extent_buffer *src);
31 static int balance_node_right(struct btrfs_trans_handle *trans,
32                               struct btrfs_root *root,
33                               struct extent_buffer *dst_buf,
34                               struct extent_buffer *src_buf);
35 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
36                    struct btrfs_path *path, int level, int slot);
37
38 inline void btrfs_init_path(struct btrfs_path *p)
39 {
40         memset(p, 0, sizeof(*p));
41 }
42
43 struct btrfs_path *btrfs_alloc_path(void)
44 {
45         struct btrfs_path *path;
46         path = kmalloc(sizeof(struct btrfs_path), GFP_NOFS);
47         if (path) {
48                 btrfs_init_path(path);
49                 path->reada = 0;
50         }
51         return path;
52 }
53
54 void btrfs_free_path(struct btrfs_path *p)
55 {
56         btrfs_release_path(NULL, p);
57         kfree(p);
58 }
59
60 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
61 {
62         int i;
63         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
64                 if (!p->nodes[i])
65                         break;
66                 free_extent_buffer(p->nodes[i]);
67         }
68         memset(p, 0, sizeof(*p));
69 }
70
71 int btrfs_copy_root(struct btrfs_trans_handle *trans,
72                       struct btrfs_root *root,
73                       struct extent_buffer *buf,
74                       struct extent_buffer **cow_ret, u64 new_root_objectid)
75 {
76         struct extent_buffer *cow;
77         u32 nritems;
78         int ret = 0;
79         int level;
80         struct btrfs_key first_key;
81         struct btrfs_root *new_root;
82
83         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
84         if (!new_root)
85                 return -ENOMEM;
86
87         memcpy(new_root, root, sizeof(*new_root));
88         new_root->root_key.objectid = new_root_objectid;
89
90         WARN_ON(root->ref_cows && trans->transid !=
91                 root->fs_info->running_transaction->transid);
92         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
93
94         level = btrfs_header_level(buf);
95         nritems = btrfs_header_nritems(buf);
96         if (nritems) {
97                 if (level == 0)
98                         btrfs_item_key_to_cpu(buf, &first_key, 0);
99                 else
100                         btrfs_node_key_to_cpu(buf, &first_key, 0);
101         } else {
102                 first_key.objectid = 0;
103         }
104         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
105                                        new_root_objectid,
106                                        trans->transid, first_key.objectid,
107                                        level, buf->start, 0);
108         if (IS_ERR(cow)) {
109                 kfree(new_root);
110                 return PTR_ERR(cow);
111         }
112
113         copy_extent_buffer(cow, buf, 0, 0, cow->len);
114         btrfs_set_header_bytenr(cow, cow->start);
115         btrfs_set_header_generation(cow, trans->transid);
116         btrfs_set_header_owner(cow, new_root_objectid);
117
118         WARN_ON(btrfs_header_generation(buf) > trans->transid);
119         ret = btrfs_inc_ref(trans, new_root, buf);
120         kfree(new_root);
121
122         if (ret)
123                 return ret;
124
125         btrfs_mark_buffer_dirty(cow);
126         *cow_ret = cow;
127         return 0;
128 }
129
130 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
131                              struct btrfs_root *root,
132                              struct extent_buffer *buf,
133                              struct extent_buffer *parent, int parent_slot,
134                              struct extent_buffer **cow_ret,
135                              u64 search_start, u64 empty_size)
136 {
137         u64 root_gen;
138         struct extent_buffer *cow;
139         u32 nritems;
140         int ret = 0;
141         int different_trans = 0;
142         int level;
143         struct btrfs_key first_key;
144
145         if (root->ref_cows) {
146                 root_gen = trans->transid;
147         } else {
148                 root_gen = 0;
149         }
150
151         WARN_ON(root->ref_cows && trans->transid !=
152                 root->fs_info->running_transaction->transid);
153         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
154
155         level = btrfs_header_level(buf);
156         nritems = btrfs_header_nritems(buf);
157         if (nritems) {
158                 if (level == 0)
159                         btrfs_item_key_to_cpu(buf, &first_key, 0);
160                 else
161                         btrfs_node_key_to_cpu(buf, &first_key, 0);
162         } else {
163                 first_key.objectid = 0;
164         }
165         cow = __btrfs_alloc_free_block(trans, root, buf->len,
166                                      root->root_key.objectid,
167                                      root_gen, first_key.objectid, level,
168                                      search_start, empty_size);
169         if (IS_ERR(cow))
170                 return PTR_ERR(cow);
171
172         copy_extent_buffer(cow, buf, 0, 0, cow->len);
173         btrfs_set_header_bytenr(cow, cow->start);
174         btrfs_set_header_generation(cow, trans->transid);
175         btrfs_set_header_owner(cow, root->root_key.objectid);
176
177         WARN_ON(btrfs_header_generation(buf) > trans->transid);
178         if (btrfs_header_generation(buf) != trans->transid) {
179                 different_trans = 1;
180                 ret = btrfs_inc_ref(trans, root, buf);
181                 if (ret)
182                         return ret;
183         } else {
184                 clean_tree_block(trans, root, buf);
185         }
186
187         if (buf == root->node) {
188                 root_gen = btrfs_header_generation(buf);
189                 root->node = cow;
190                 extent_buffer_get(cow);
191                 if (buf != root->commit_root) {
192                         btrfs_free_extent(trans, root, buf->start,
193                                           buf->len, root->root_key.objectid,
194                                           root_gen, 0, 0, 1);
195                 }
196                 free_extent_buffer(buf);
197         } else {
198                 root_gen = btrfs_header_generation(parent);
199                 btrfs_set_node_blockptr(parent, parent_slot,
200                                         cow->start);
201                 WARN_ON(trans->transid == 0);
202                 btrfs_set_node_ptr_generation(parent, parent_slot,
203                                               trans->transid);
204                 btrfs_mark_buffer_dirty(parent);
205                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
206                 btrfs_free_extent(trans, root, buf->start, buf->len,
207                                   btrfs_header_owner(parent), root_gen,
208                                   0, 0, 1);
209         }
210         free_extent_buffer(buf);
211         btrfs_mark_buffer_dirty(cow);
212         *cow_ret = cow;
213         return 0;
214 }
215
216 int btrfs_cow_block(struct btrfs_trans_handle *trans,
217                     struct btrfs_root *root, struct extent_buffer *buf,
218                     struct extent_buffer *parent, int parent_slot,
219                     struct extent_buffer **cow_ret)
220 {
221         u64 search_start;
222         int ret;
223         /*
224         if (trans->transaction != root->fs_info->running_transaction) {
225                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
226                        root->fs_info->running_transaction->transid);
227                 WARN_ON(1);
228         }
229         */
230         if (trans->transid != root->fs_info->generation) {
231                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
232                        root->fs_info->generation);
233                 WARN_ON(1);
234         }
235         if (btrfs_header_generation(buf) == trans->transid) {
236                 *cow_ret = buf;
237                 return 0;
238         }
239
240         search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
241         ret = __btrfs_cow_block(trans, root, buf, parent,
242                                  parent_slot, cow_ret, search_start, 0);
243         return ret;
244 }
245
246 /*
247 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
248 {
249         if (blocknr < other && other - (blocknr + blocksize) < 32768)
250                 return 1;
251         if (blocknr > other && blocknr - (other + blocksize) < 32768)
252                 return 1;
253         return 0;
254 }
255 */
256
257 /*
258  * compare two keys in a memcmp fashion
259  */
260 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
261 {
262         struct btrfs_key k1;
263
264         btrfs_disk_key_to_cpu(&k1, disk);
265
266         if (k1.objectid > k2->objectid)
267                 return 1;
268         if (k1.objectid < k2->objectid)
269                 return -1;
270         if (k1.type > k2->type)
271                 return 1;
272         if (k1.type < k2->type)
273                 return -1;
274         if (k1.offset > k2->offset)
275                 return 1;
276         if (k1.offset < k2->offset)
277                 return -1;
278         return 0;
279 }
280
281
282 #if 0
283 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
284                        struct btrfs_root *root, struct extent_buffer *parent,
285                        int start_slot, int cache_only, u64 *last_ret,
286                        struct btrfs_key *progress)
287 {
288         struct extent_buffer *cur;
289         struct extent_buffer *tmp;
290         u64 blocknr;
291         u64 search_start = *last_ret;
292         u64 last_block = 0;
293         u64 other;
294         u32 parent_nritems;
295         int end_slot;
296         int i;
297         int err = 0;
298         int parent_level;
299         int uptodate;
300         u32 blocksize;
301         int progress_passed = 0;
302         struct btrfs_disk_key disk_key;
303
304         parent_level = btrfs_header_level(parent);
305         if (cache_only && parent_level != 1)
306                 return 0;
307
308         if (trans->transaction != root->fs_info->running_transaction) {
309                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
310                        root->fs_info->running_transaction->transid);
311                 WARN_ON(1);
312         }
313         if (trans->transid != root->fs_info->generation) {
314                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
315                        root->fs_info->generation);
316                 WARN_ON(1);
317         }
318
319         parent_nritems = btrfs_header_nritems(parent);
320         blocksize = btrfs_level_size(root, parent_level - 1);
321         end_slot = parent_nritems;
322
323         if (parent_nritems == 1)
324                 return 0;
325
326         for (i = start_slot; i < end_slot; i++) {
327                 int close = 1;
328
329                 if (!parent->map_token) {
330                         map_extent_buffer(parent,
331                                         btrfs_node_key_ptr_offset(i),
332                                         sizeof(struct btrfs_key_ptr),
333                                         &parent->map_token, &parent->kaddr,
334                                         &parent->map_start, &parent->map_len,
335                                         KM_USER1);
336                 }
337                 btrfs_node_key(parent, &disk_key, i);
338                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
339                         continue;
340
341                 progress_passed = 1;
342                 blocknr = btrfs_node_blockptr(parent, i);
343                 if (last_block == 0)
344                         last_block = blocknr;
345
346                 if (i > 0) {
347                         other = btrfs_node_blockptr(parent, i - 1);
348                         close = close_blocks(blocknr, other, blocksize);
349                 }
350                 if (close && i < end_slot - 2) {
351                         other = btrfs_node_blockptr(parent, i + 1);
352                         close = close_blocks(blocknr, other, blocksize);
353                 }
354                 if (close) {
355                         last_block = blocknr;
356                         continue;
357                 }
358                 if (parent->map_token) {
359                         unmap_extent_buffer(parent, parent->map_token,
360                                             KM_USER1);
361                         parent->map_token = NULL;
362                 }
363
364                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
365                 if (cur)
366                         uptodate = btrfs_buffer_uptodate(cur);
367                 else
368                         uptodate = 0;
369                 if (!cur || !uptodate) {
370                         if (cache_only) {
371                                 free_extent_buffer(cur);
372                                 continue;
373                         }
374                         if (!cur) {
375                                 cur = read_tree_block(root, blocknr,
376                                                          blocksize);
377                         } else if (!uptodate) {
378                                 btrfs_read_buffer(cur);
379                         }
380                 }
381                 if (search_start == 0)
382                         search_start = last_block;
383
384                 err = __btrfs_cow_block(trans, root, cur, parent, i,
385                                         &tmp, search_start,
386                                         min(16 * blocksize,
387                                             (end_slot - i) * blocksize));
388                 if (err) {
389                         free_extent_buffer(cur);
390                         break;
391                 }
392                 search_start = tmp->start;
393                 last_block = tmp->start;
394                 *last_ret = search_start;
395                 if (parent_level == 1)
396                         btrfs_clear_buffer_defrag(tmp);
397                 free_extent_buffer(tmp);
398         }
399         if (parent->map_token) {
400                 unmap_extent_buffer(parent, parent->map_token,
401                                     KM_USER1);
402                 parent->map_token = NULL;
403         }
404         return err;
405 }
406 #endif
407
408 /*
409  * The leaf data grows from end-to-front in the node.
410  * this returns the address of the start of the last item,
411  * which is the stop of the leaf data stack
412  */
413 static inline unsigned int leaf_data_end(struct btrfs_root *root,
414                                          struct extent_buffer *leaf)
415 {
416         u32 nr = btrfs_header_nritems(leaf);
417         if (nr == 0)
418                 return BTRFS_LEAF_DATA_SIZE(root);
419         return btrfs_item_offset_nr(leaf, nr - 1);
420 }
421
422 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
423                       int level)
424 {
425         struct extent_buffer *parent = NULL;
426         struct extent_buffer *node = path->nodes[level];
427         struct btrfs_disk_key parent_key;
428         struct btrfs_disk_key node_key;
429         int parent_slot;
430         int slot;
431         struct btrfs_key cpukey;
432         u32 nritems = btrfs_header_nritems(node);
433
434         if (path->nodes[level + 1])
435                 parent = path->nodes[level + 1];
436
437         slot = path->slots[level];
438         BUG_ON(nritems == 0);
439         if (parent) {
440                 parent_slot = path->slots[level + 1];
441                 btrfs_node_key(parent, &parent_key, parent_slot);
442                 btrfs_node_key(node, &node_key, 0);
443                 BUG_ON(memcmp(&parent_key, &node_key,
444                               sizeof(struct btrfs_disk_key)));
445                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
446                        btrfs_header_bytenr(node));
447         }
448         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
449         if (slot != 0) {
450                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
451                 btrfs_node_key(node, &node_key, slot);
452                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0);
453         }
454         if (slot < nritems - 1) {
455                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
456                 btrfs_node_key(node, &node_key, slot);
457                 BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0);
458         }
459         return 0;
460 }
461
462 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
463                       int level)
464 {
465         struct extent_buffer *leaf = path->nodes[level];
466         struct extent_buffer *parent = NULL;
467         int parent_slot;
468         struct btrfs_key cpukey;
469         struct btrfs_disk_key parent_key;
470         struct btrfs_disk_key leaf_key;
471         int slot = path->slots[0];
472
473         u32 nritems = btrfs_header_nritems(leaf);
474
475         if (path->nodes[level + 1])
476                 parent = path->nodes[level + 1];
477
478         if (nritems == 0)
479                 return 0;
480
481         if (parent) {
482                 parent_slot = path->slots[level + 1];
483                 btrfs_node_key(parent, &parent_key, parent_slot);
484                 btrfs_item_key(leaf, &leaf_key, 0);
485
486                 BUG_ON(memcmp(&parent_key, &leaf_key,
487                        sizeof(struct btrfs_disk_key)));
488                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
489                        btrfs_header_bytenr(leaf));
490         }
491 #if 0
492         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
493                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
494                 btrfs_item_key(leaf, &leaf_key, i);
495                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
496                         btrfs_print_leaf(root, leaf);
497                         printk("slot %d offset bad key\n", i);
498                         BUG_ON(1);
499                 }
500                 if (btrfs_item_offset_nr(leaf, i) !=
501                         btrfs_item_end_nr(leaf, i + 1)) {
502                         btrfs_print_leaf(root, leaf);
503                         printk("slot %d offset bad\n", i);
504                         BUG_ON(1);
505                 }
506                 if (i == 0) {
507                         if (btrfs_item_offset_nr(leaf, i) +
508                                btrfs_item_size_nr(leaf, i) !=
509                                BTRFS_LEAF_DATA_SIZE(root)) {
510                                 btrfs_print_leaf(root, leaf);
511                                 printk("slot %d first offset bad\n", i);
512                                 BUG_ON(1);
513                         }
514                 }
515         }
516         if (nritems > 0) {
517                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
518                                 btrfs_print_leaf(root, leaf);
519                                 printk("slot %d bad size \n", nritems - 1);
520                                 BUG_ON(1);
521                 }
522         }
523 #endif
524         if (slot != 0 && slot < nritems - 1) {
525                 btrfs_item_key(leaf, &leaf_key, slot);
526                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
527                 if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) {
528                         btrfs_print_leaf(root, leaf);
529                         printk("slot %d offset bad key\n", slot);
530                         BUG_ON(1);
531                 }
532                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
533                        btrfs_item_end_nr(leaf, slot)) {
534                         btrfs_print_leaf(root, leaf);
535                         printk("slot %d offset bad\n", slot);
536                         BUG_ON(1);
537                 }
538         }
539         if (slot < nritems - 1) {
540                 btrfs_item_key(leaf, &leaf_key, slot);
541                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
542                 BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0);
543                 if (btrfs_item_offset_nr(leaf, slot) !=
544                         btrfs_item_end_nr(leaf, slot + 1)) {
545                         btrfs_print_leaf(root, leaf);
546                         printk("slot %d offset bad\n", slot);
547                         BUG_ON(1);
548                 }
549         }
550         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
551                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
552         return 0;
553 }
554
555 static int noinline check_block(struct btrfs_root *root,
556                                 struct btrfs_path *path, int level)
557 {
558         return 0;
559 #if 0
560         struct extent_buffer *buf = path->nodes[level];
561
562         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
563                                  (unsigned long)btrfs_header_fsid(buf),
564                                  BTRFS_FSID_SIZE)) {
565                 printk("warning bad block %Lu\n", buf->start);
566                 return 1;
567         }
568 #endif
569         if (level == 0)
570                 return check_leaf(root, path, level);
571         return check_node(root, path, level);
572 }
573
574 /*
575  * search for key in the extent_buffer.  The items start at offset p,
576  * and they are item_size apart.  There are 'max' items in p.
577  *
578  * the slot in the array is returned via slot, and it points to
579  * the place where you would insert key if it is not found in
580  * the array.
581  *
582  * slot may point to max if the key is bigger than all of the keys
583  */
584 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
585                               int item_size, struct btrfs_key *key,
586                               int max, int *slot)
587 {
588         int low = 0;
589         int high = max;
590         int mid;
591         int ret;
592         unsigned long offset;
593         struct btrfs_disk_key *tmp;
594
595         while(low < high) {
596                 mid = (low + high) / 2;
597                 offset = p + mid * item_size;
598
599                 tmp = (struct btrfs_disk_key *)(eb->data + offset);
600                 ret = btrfs_comp_keys(tmp, key);
601
602                 if (ret < 0)
603                         low = mid + 1;
604                 else if (ret > 0)
605                         high = mid;
606                 else {
607                         *slot = mid;
608                         return 0;
609                 }
610         }
611         *slot = low;
612         return 1;
613 }
614
615 /*
616  * simple bin_search frontend that does the right thing for
617  * leaves vs nodes
618  */
619 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
620                       int level, int *slot)
621 {
622         if (level == 0) {
623                 return generic_bin_search(eb,
624                                           offsetof(struct btrfs_leaf, items),
625                                           sizeof(struct btrfs_item),
626                                           key, btrfs_header_nritems(eb),
627                                           slot);
628         } else {
629                 return generic_bin_search(eb,
630                                           offsetof(struct btrfs_node, ptrs),
631                                           sizeof(struct btrfs_key_ptr),
632                                           key, btrfs_header_nritems(eb),
633                                           slot);
634         }
635         return -1;
636 }
637
638 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
639                                    struct extent_buffer *parent, int slot)
640 {
641         if (slot < 0)
642                 return NULL;
643         if (slot >= btrfs_header_nritems(parent))
644                 return NULL;
645         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
646                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
647 }
648
649 static int balance_level(struct btrfs_trans_handle *trans,
650                          struct btrfs_root *root,
651                          struct btrfs_path *path, int level)
652 {
653         struct extent_buffer *right = NULL;
654         struct extent_buffer *mid;
655         struct extent_buffer *left = NULL;
656         struct extent_buffer *parent = NULL;
657         int ret = 0;
658         int wret;
659         int pslot;
660         int orig_slot = path->slots[level];
661         int err_on_enospc = 0;
662         u64 orig_ptr;
663
664         if (level == 0)
665                 return 0;
666
667         mid = path->nodes[level];
668         WARN_ON(btrfs_header_generation(mid) != trans->transid);
669
670         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
671
672         if (level < BTRFS_MAX_LEVEL - 1)
673                 parent = path->nodes[level + 1];
674         pslot = path->slots[level + 1];
675
676         /*
677          * deal with the case where there is only one pointer in the root
678          * by promoting the node below to a root
679          */
680         if (!parent) {
681                 struct extent_buffer *child;
682
683                 if (btrfs_header_nritems(mid) != 1)
684                         return 0;
685
686                 /* promote the child to a root */
687                 child = read_node_slot(root, mid, 0);
688                 BUG_ON(!child);
689                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
690                 BUG_ON(ret);
691
692                 root->node = child;
693                 path->nodes[level] = NULL;
694                 clean_tree_block(trans, root, mid);
695                 wait_on_tree_block_writeback(root, mid);
696                 /* once for the path */
697                 free_extent_buffer(mid);
698                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
699                                         root->root_key.objectid,
700                                         btrfs_header_generation(mid), 0, 0, 1);
701                 /* once for the root ptr */
702                 free_extent_buffer(mid);
703                 return ret;
704         }
705         if (btrfs_header_nritems(mid) >
706             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
707                 return 0;
708
709         if (btrfs_header_nritems(mid) < 2)
710                 err_on_enospc = 1;
711
712         left = read_node_slot(root, parent, pslot - 1);
713         if (left) {
714                 wret = btrfs_cow_block(trans, root, left,
715                                        parent, pslot - 1, &left);
716                 if (wret) {
717                         ret = wret;
718                         goto enospc;
719                 }
720         }
721         right = read_node_slot(root, parent, pslot + 1);
722         if (right) {
723                 wret = btrfs_cow_block(trans, root, right,
724                                        parent, pslot + 1, &right);
725                 if (wret) {
726                         ret = wret;
727                         goto enospc;
728                 }
729         }
730
731         /* first, try to make some room in the middle buffer */
732         if (left) {
733                 orig_slot += btrfs_header_nritems(left);
734                 wret = push_node_left(trans, root, left, mid);
735                 if (wret < 0)
736                         ret = wret;
737                 if (btrfs_header_nritems(mid) < 2)
738                         err_on_enospc = 1;
739         }
740
741         /*
742          * then try to empty the right most buffer into the middle
743          */
744         if (right) {
745                 wret = push_node_left(trans, root, mid, right);
746                 if (wret < 0 && wret != -ENOSPC)
747                         ret = wret;
748                 if (btrfs_header_nritems(right) == 0) {
749                         u64 bytenr = right->start;
750                         u64 generation = btrfs_header_generation(parent);
751                         u32 blocksize = right->len;
752
753                         clean_tree_block(trans, root, right);
754                         wait_on_tree_block_writeback(root, right);
755                         free_extent_buffer(right);
756                         right = NULL;
757                         wret = del_ptr(trans, root, path, level + 1, pslot +
758                                        1);
759                         if (wret)
760                                 ret = wret;
761                         wret = btrfs_free_extent(trans, root, bytenr,
762                                                  blocksize,
763                                                  btrfs_header_owner(parent),
764                                                  generation, 0, 0, 1);
765                         if (wret)
766                                 ret = wret;
767                 } else {
768                         struct btrfs_disk_key right_key;
769                         btrfs_node_key(right, &right_key, 0);
770                         btrfs_set_node_key(parent, &right_key, pslot + 1);
771                         btrfs_mark_buffer_dirty(parent);
772                 }
773         }
774         if (btrfs_header_nritems(mid) == 1) {
775                 /*
776                  * we're not allowed to leave a node with one item in the
777                  * tree during a delete.  A deletion from lower in the tree
778                  * could try to delete the only pointer in this node.
779                  * So, pull some keys from the left.
780                  * There has to be a left pointer at this point because
781                  * otherwise we would have pulled some pointers from the
782                  * right
783                  */
784                 BUG_ON(!left);
785                 wret = balance_node_right(trans, root, mid, left);
786                 if (wret < 0) {
787                         ret = wret;
788                         goto enospc;
789                 }
790                 BUG_ON(wret == 1);
791         }
792         if (btrfs_header_nritems(mid) == 0) {
793                 /* we've managed to empty the middle node, drop it */
794                 u64 root_gen = btrfs_header_generation(parent);
795                 u64 bytenr = mid->start;
796                 u32 blocksize = mid->len;
797                 clean_tree_block(trans, root, mid);
798                 wait_on_tree_block_writeback(root, mid);
799                 free_extent_buffer(mid);
800                 mid = NULL;
801                 wret = del_ptr(trans, root, path, level + 1, pslot);
802                 if (wret)
803                         ret = wret;
804                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
805                                          btrfs_header_owner(parent),
806                                          root_gen, 0, 0, 1);
807                 if (wret)
808                         ret = wret;
809         } else {
810                 /* update the parent key to reflect our changes */
811                 struct btrfs_disk_key mid_key;
812                 btrfs_node_key(mid, &mid_key, 0);
813                 btrfs_set_node_key(parent, &mid_key, pslot);
814                 btrfs_mark_buffer_dirty(parent);
815         }
816
817         /* update the path */
818         if (left) {
819                 if (btrfs_header_nritems(left) > orig_slot) {
820                         extent_buffer_get(left);
821                         path->nodes[level] = left;
822                         path->slots[level + 1] -= 1;
823                         path->slots[level] = orig_slot;
824                         if (mid)
825                                 free_extent_buffer(mid);
826                 } else {
827                         orig_slot -= btrfs_header_nritems(left);
828                         path->slots[level] = orig_slot;
829                 }
830         }
831         /* double check we haven't messed things up */
832         check_block(root, path, level);
833         if (orig_ptr !=
834             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
835                 BUG();
836 enospc:
837         if (right)
838                 free_extent_buffer(right);
839         if (left)
840                 free_extent_buffer(left);
841         return ret;
842 }
843
844 /* returns zero if the push worked, non-zero otherwise */
845 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
846                                           struct btrfs_root *root,
847                                           struct btrfs_path *path, int level)
848 {
849         struct extent_buffer *right = NULL;
850         struct extent_buffer *mid;
851         struct extent_buffer *left = NULL;
852         struct extent_buffer *parent = NULL;
853         int ret = 0;
854         int wret;
855         int pslot;
856         int orig_slot = path->slots[level];
857         u64 orig_ptr;
858
859         if (level == 0)
860                 return 1;
861
862         mid = path->nodes[level];
863         WARN_ON(btrfs_header_generation(mid) != trans->transid);
864         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
865
866         if (level < BTRFS_MAX_LEVEL - 1)
867                 parent = path->nodes[level + 1];
868         pslot = path->slots[level + 1];
869
870         if (!parent)
871                 return 1;
872
873         left = read_node_slot(root, parent, pslot - 1);
874
875         /* first, try to make some room in the middle buffer */
876         if (left) {
877                 u32 left_nr;
878                 left_nr = btrfs_header_nritems(left);
879                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
880                         wret = 1;
881                 } else {
882                         ret = btrfs_cow_block(trans, root, left, parent,
883                                               pslot - 1, &left);
884                         if (ret)
885                                 wret = 1;
886                         else {
887                                 wret = push_node_left(trans, root,
888                                                       left, mid);
889                         }
890                 }
891                 if (wret < 0)
892                         ret = wret;
893                 if (wret == 0) {
894                         struct btrfs_disk_key disk_key;
895                         orig_slot += left_nr;
896                         btrfs_node_key(mid, &disk_key, 0);
897                         btrfs_set_node_key(parent, &disk_key, pslot);
898                         btrfs_mark_buffer_dirty(parent);
899                         if (btrfs_header_nritems(left) > orig_slot) {
900                                 path->nodes[level] = left;
901                                 path->slots[level + 1] -= 1;
902                                 path->slots[level] = orig_slot;
903                                 free_extent_buffer(mid);
904                         } else {
905                                 orig_slot -=
906                                         btrfs_header_nritems(left);
907                                 path->slots[level] = orig_slot;
908                                 free_extent_buffer(left);
909                         }
910                         return 0;
911                 }
912                 free_extent_buffer(left);
913         }
914         right= read_node_slot(root, parent, pslot + 1);
915
916         /*
917          * then try to empty the right most buffer into the middle
918          */
919         if (right) {
920                 u32 right_nr;
921                 right_nr = btrfs_header_nritems(right);
922                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
923                         wret = 1;
924                 } else {
925                         ret = btrfs_cow_block(trans, root, right,
926                                               parent, pslot + 1,
927                                               &right);
928                         if (ret)
929                                 wret = 1;
930                         else {
931                                 wret = balance_node_right(trans, root,
932                                                           right, mid);
933                         }
934                 }
935                 if (wret < 0)
936                         ret = wret;
937                 if (wret == 0) {
938                         struct btrfs_disk_key disk_key;
939
940                         btrfs_node_key(right, &disk_key, 0);
941                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
942                         btrfs_mark_buffer_dirty(parent);
943
944                         if (btrfs_header_nritems(mid) <= orig_slot) {
945                                 path->nodes[level] = right;
946                                 path->slots[level + 1] += 1;
947                                 path->slots[level] = orig_slot -
948                                         btrfs_header_nritems(mid);
949                                 free_extent_buffer(mid);
950                         } else {
951                                 free_extent_buffer(right);
952                         }
953                         return 0;
954                 }
955                 free_extent_buffer(right);
956         }
957         return 1;
958 }
959
960 /*
961  * readahead one full node of leaves
962  */
963 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
964                              int level, int slot, u64 objectid)
965 {
966         struct extent_buffer *node;
967         struct btrfs_disk_key disk_key;
968         u32 nritems;
969         u64 search;
970         u64 lowest_read;
971         u64 highest_read;
972         u64 nread = 0;
973         int direction = path->reada;
974         struct extent_buffer *eb;
975         u32 nr;
976         u32 blocksize;
977         u32 nscan = 0;
978
979         if (level != 1)
980                 return;
981
982         if (!path->nodes[level])
983                 return;
984
985         node = path->nodes[level];
986         search = btrfs_node_blockptr(node, slot);
987         blocksize = btrfs_level_size(root, level - 1);
988         eb = btrfs_find_tree_block(root, search, blocksize);
989         if (eb) {
990                 free_extent_buffer(eb);
991                 return;
992         }
993
994         highest_read = search;
995         lowest_read = search;
996
997         nritems = btrfs_header_nritems(node);
998         nr = slot;
999         while(1) {
1000                 if (direction < 0) {
1001                         if (nr == 0)
1002                                 break;
1003                         nr--;
1004                 } else if (direction > 0) {
1005                         nr++;
1006                         if (nr >= nritems)
1007                                 break;
1008                 }
1009                 if (path->reada < 0 && objectid) {
1010                         btrfs_node_key(node, &disk_key, nr);
1011                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1012                                 break;
1013                 }
1014                 search = btrfs_node_blockptr(node, nr);
1015                 if ((search >= lowest_read && search <= highest_read) ||
1016                     (search < lowest_read && lowest_read - search <= 32768) ||
1017                     (search > highest_read && search - highest_read <= 32768)) {
1018                         readahead_tree_block(root, search, blocksize);
1019                         nread += blocksize;
1020                 }
1021                 nscan++;
1022                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1023                         break;
1024                 if(nread > (1024 * 1024) || nscan > 128)
1025                         break;
1026
1027                 if (search < lowest_read)
1028                         lowest_read = search;
1029                 if (search > highest_read)
1030                         highest_read = search;
1031         }
1032 }
1033
1034 /*
1035  * look for key in the tree.  path is filled in with nodes along the way
1036  * if key is found, we return zero and you can find the item in the leaf
1037  * level of the path (level 0)
1038  *
1039  * If the key isn't found, the path points to the slot where it should
1040  * be inserted, and 1 is returned.  If there are other errors during the
1041  * search a negative error number is returned.
1042  *
1043  * if ins_len > 0, nodes and leaves will be split as we walk down the
1044  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1045  * possible)
1046  */
1047 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1048                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1049                       ins_len, int cow)
1050 {
1051         struct extent_buffer *b;
1052         u64 bytenr;
1053         u64 ptr_gen;
1054         int slot;
1055         int ret;
1056         int level;
1057         int should_reada = p->reada;
1058         u8 lowest_level = 0;
1059
1060         lowest_level = p->lowest_level;
1061         WARN_ON(lowest_level && ins_len);
1062         WARN_ON(p->nodes[0] != NULL);
1063         /*
1064         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1065         */
1066 again:
1067         b = root->node;
1068         extent_buffer_get(b);
1069         while (b) {
1070                 level = btrfs_header_level(b);
1071                 if (cow) {
1072                         int wret;
1073                         wret = btrfs_cow_block(trans, root, b,
1074                                                p->nodes[level + 1],
1075                                                p->slots[level + 1],
1076                                                &b);
1077                         if (wret) {
1078                                 free_extent_buffer(b);
1079                                 return wret;
1080                         }
1081                 }
1082                 BUG_ON(!cow && ins_len);
1083                 if (level != btrfs_header_level(b))
1084                         WARN_ON(1);
1085                 level = btrfs_header_level(b);
1086                 p->nodes[level] = b;
1087                 ret = check_block(root, p, level);
1088                 if (ret)
1089                         return -1;
1090                 ret = bin_search(b, key, level, &slot);
1091                 if (level != 0) {
1092                         if (ret && slot > 0)
1093                                 slot -= 1;
1094                         p->slots[level] = slot;
1095                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1096                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1097                                 int sret = split_node(trans, root, p, level);
1098                                 BUG_ON(sret > 0);
1099                                 if (sret)
1100                                         return sret;
1101                                 b = p->nodes[level];
1102                                 slot = p->slots[level];
1103                         } else if (ins_len < 0) {
1104                                 int sret = balance_level(trans, root, p,
1105                                                          level);
1106                                 if (sret)
1107                                         return sret;
1108                                 b = p->nodes[level];
1109                                 if (!b) {
1110                                         btrfs_release_path(NULL, p);
1111                                         goto again;
1112                                 }
1113                                 slot = p->slots[level];
1114                                 BUG_ON(btrfs_header_nritems(b) == 1);
1115                         }
1116                         /* this is only true while dropping a snapshot */
1117                         if (level == lowest_level)
1118                                 break;
1119                         bytenr = btrfs_node_blockptr(b, slot);
1120                         ptr_gen = btrfs_node_ptr_generation(b, slot);
1121                         if (should_reada)
1122                                 reada_for_search(root, p, level, slot,
1123                                                  key->objectid);
1124                         b = read_tree_block(root, bytenr,
1125                                             btrfs_level_size(root, level - 1));
1126                         if (ptr_gen != btrfs_header_generation(b)) {
1127                                 printk("block %llu bad gen wanted %llu "
1128                                        "found %llu\n",
1129                                 (unsigned long long)b->start,
1130                                 (unsigned long long)ptr_gen,
1131                                 (unsigned long long)btrfs_header_generation(b));
1132                         }
1133                 } else {
1134                         p->slots[level] = slot;
1135                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1136                             sizeof(struct btrfs_item) + ins_len) {
1137                                 int sret = split_leaf(trans, root, key,
1138                                                       p, ins_len, ret == 0);
1139                                 BUG_ON(sret > 0);
1140                                 if (sret)
1141                                         return sret;
1142                         }
1143                         return ret;
1144                 }
1145         }
1146         return 1;
1147 }
1148
1149 /*
1150  * adjust the pointers going up the tree, starting at level
1151  * making sure the right key of each node is points to 'key'.
1152  * This is used after shifting pointers to the left, so it stops
1153  * fixing up pointers when a given leaf/node is not in slot 0 of the
1154  * higher levels
1155  *
1156  * If this fails to write a tree block, it returns -1, but continues
1157  * fixing up the blocks in ram so the tree is consistent.
1158  */
1159 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1160                           struct btrfs_root *root, struct btrfs_path *path,
1161                           struct btrfs_disk_key *key, int level)
1162 {
1163         int i;
1164         int ret = 0;
1165         struct extent_buffer *t;
1166
1167         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1168                 int tslot = path->slots[i];
1169                 if (!path->nodes[i])
1170                         break;
1171                 t = path->nodes[i];
1172                 btrfs_set_node_key(t, key, tslot);
1173                 btrfs_mark_buffer_dirty(path->nodes[i]);
1174                 if (tslot != 0)
1175                         break;
1176         }
1177         return ret;
1178 }
1179
1180 /*
1181  * try to push data from one node into the next node left in the
1182  * tree.
1183  *
1184  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1185  * error, and > 0 if there was no room in the left hand block.
1186  */
1187 static int push_node_left(struct btrfs_trans_handle *trans,
1188                           struct btrfs_root *root, struct extent_buffer *dst,
1189                           struct extent_buffer *src)
1190 {
1191         int push_items = 0;
1192         int src_nritems;
1193         int dst_nritems;
1194         int ret = 0;
1195
1196         src_nritems = btrfs_header_nritems(src);
1197         dst_nritems = btrfs_header_nritems(dst);
1198         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1199         WARN_ON(btrfs_header_generation(src) != trans->transid);
1200         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1201
1202         if (push_items <= 0) {
1203                 return 1;
1204         }
1205
1206         if (src_nritems < push_items)
1207                 push_items = src_nritems;
1208
1209         copy_extent_buffer(dst, src,
1210                            btrfs_node_key_ptr_offset(dst_nritems),
1211                            btrfs_node_key_ptr_offset(0),
1212                            push_items * sizeof(struct btrfs_key_ptr));
1213
1214         if (push_items < src_nritems) {
1215                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1216                                       btrfs_node_key_ptr_offset(push_items),
1217                                       (src_nritems - push_items) *
1218                                       sizeof(struct btrfs_key_ptr));
1219         }
1220         btrfs_set_header_nritems(src, src_nritems - push_items);
1221         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1222         btrfs_mark_buffer_dirty(src);
1223         btrfs_mark_buffer_dirty(dst);
1224         return ret;
1225 }
1226
1227 /*
1228  * try to push data from one node into the next node right in the
1229  * tree.
1230  *
1231  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1232  * error, and > 0 if there was no room in the right hand block.
1233  *
1234  * this will  only push up to 1/2 the contents of the left node over
1235  */
1236 static int balance_node_right(struct btrfs_trans_handle *trans,
1237                               struct btrfs_root *root,
1238                               struct extent_buffer *dst,
1239                               struct extent_buffer *src)
1240 {
1241         int push_items = 0;
1242         int max_push;
1243         int src_nritems;
1244         int dst_nritems;
1245         int ret = 0;
1246
1247         WARN_ON(btrfs_header_generation(src) != trans->transid);
1248         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1249
1250         src_nritems = btrfs_header_nritems(src);
1251         dst_nritems = btrfs_header_nritems(dst);
1252         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1253         if (push_items <= 0)
1254                 return 1;
1255
1256         max_push = src_nritems / 2 + 1;
1257         /* don't try to empty the node */
1258         if (max_push >= src_nritems)
1259                 return 1;
1260
1261         if (max_push < push_items)
1262                 push_items = max_push;
1263
1264         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1265                                       btrfs_node_key_ptr_offset(0),
1266                                       (dst_nritems) *
1267                                       sizeof(struct btrfs_key_ptr));
1268
1269         copy_extent_buffer(dst, src,
1270                            btrfs_node_key_ptr_offset(0),
1271                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1272                            push_items * sizeof(struct btrfs_key_ptr));
1273
1274         btrfs_set_header_nritems(src, src_nritems - push_items);
1275         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1276
1277         btrfs_mark_buffer_dirty(src);
1278         btrfs_mark_buffer_dirty(dst);
1279         return ret;
1280 }
1281
1282 /*
1283  * helper function to insert a new root level in the tree.
1284  * A new node is allocated, and a single item is inserted to
1285  * point to the existing root
1286  *
1287  * returns zero on success or < 0 on failure.
1288  */
1289 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1290                            struct btrfs_root *root,
1291                            struct btrfs_path *path, int level)
1292 {
1293         u64 root_gen;
1294         u64 lower_gen;
1295         struct extent_buffer *lower;
1296         struct extent_buffer *c;
1297         struct btrfs_disk_key lower_key;
1298
1299         BUG_ON(path->nodes[level]);
1300         BUG_ON(path->nodes[level-1] != root->node);
1301
1302         if (root->ref_cows)
1303                 root_gen = trans->transid;
1304         else
1305                 root_gen = 0;
1306
1307         lower = path->nodes[level-1];
1308         if (level == 1)
1309                 btrfs_item_key(lower, &lower_key, 0);
1310         else
1311                 btrfs_node_key(lower, &lower_key, 0);
1312
1313         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1314                                    root->root_key.objectid,
1315                                    root_gen, lower_key.objectid, level,
1316                                    root->node->start, 0);
1317         if (IS_ERR(c))
1318                 return PTR_ERR(c);
1319         memset_extent_buffer(c, 0, 0, root->nodesize);
1320         btrfs_set_header_nritems(c, 1);
1321         btrfs_set_header_level(c, level);
1322         btrfs_set_header_bytenr(c, c->start);
1323         btrfs_set_header_generation(c, trans->transid);
1324         btrfs_set_header_owner(c, root->root_key.objectid);
1325
1326         write_extent_buffer(c, root->fs_info->fsid,
1327                             (unsigned long)btrfs_header_fsid(c),
1328                             BTRFS_FSID_SIZE);
1329         btrfs_set_node_key(c, &lower_key, 0);
1330         btrfs_set_node_blockptr(c, 0, lower->start);
1331         lower_gen = btrfs_header_generation(lower);
1332         WARN_ON(lower_gen == 0);
1333
1334         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1335
1336         btrfs_mark_buffer_dirty(c);
1337
1338         /* the super has an extra ref to root->node */
1339         free_extent_buffer(root->node);
1340         root->node = c;
1341         extent_buffer_get(c);
1342         path->nodes[level] = c;
1343         path->slots[level] = 0;
1344
1345         if (root->ref_cows && lower_gen != trans->transid) {
1346                 struct btrfs_path *back_path = btrfs_alloc_path();
1347                 int ret;
1348                 ret = btrfs_insert_extent_backref(trans,
1349                                                   root->fs_info->extent_root,
1350                                                   path, lower->start,
1351                                                   root->root_key.objectid,
1352                                                   trans->transid, 0, 0);
1353                 BUG_ON(ret);
1354                 btrfs_free_path(back_path);
1355         }
1356         return 0;
1357 }
1358
1359 /*
1360  * worker function to insert a single pointer in a node.
1361  * the node should have enough room for the pointer already
1362  *
1363  * slot and level indicate where you want the key to go, and
1364  * blocknr is the block the key points to.
1365  *
1366  * returns zero on success and < 0 on any error
1367  */
1368 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1369                       *root, struct btrfs_path *path, struct btrfs_disk_key
1370                       *key, u64 bytenr, int slot, int level)
1371 {
1372         struct extent_buffer *lower;
1373         int nritems;
1374
1375         BUG_ON(!path->nodes[level]);
1376         lower = path->nodes[level];
1377         nritems = btrfs_header_nritems(lower);
1378         if (slot > nritems)
1379                 BUG();
1380         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1381                 BUG();
1382         if (slot != nritems) {
1383                 memmove_extent_buffer(lower,
1384                               btrfs_node_key_ptr_offset(slot + 1),
1385                               btrfs_node_key_ptr_offset(slot),
1386                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1387         }
1388         btrfs_set_node_key(lower, key, slot);
1389         btrfs_set_node_blockptr(lower, slot, bytenr);
1390         WARN_ON(trans->transid == 0);
1391         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1392         btrfs_set_header_nritems(lower, nritems + 1);
1393         btrfs_mark_buffer_dirty(lower);
1394         return 0;
1395 }
1396
1397 /*
1398  * split the node at the specified level in path in two.
1399  * The path is corrected to point to the appropriate node after the split
1400  *
1401  * Before splitting this tries to make some room in the node by pushing
1402  * left and right, if either one works, it returns right away.
1403  *
1404  * returns 0 on success and < 0 on failure
1405  */
1406 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1407                       *root, struct btrfs_path *path, int level)
1408 {
1409         u64 root_gen;
1410         struct extent_buffer *c;
1411         struct extent_buffer *split;
1412         struct btrfs_disk_key disk_key;
1413         int mid;
1414         int ret;
1415         int wret;
1416         u32 c_nritems;
1417
1418         c = path->nodes[level];
1419         WARN_ON(btrfs_header_generation(c) != trans->transid);
1420         if (c == root->node) {
1421                 /* trying to split the root, lets make a new one */
1422                 ret = insert_new_root(trans, root, path, level + 1);
1423                 if (ret)
1424                         return ret;
1425         } else {
1426                 ret = push_nodes_for_insert(trans, root, path, level);
1427                 c = path->nodes[level];
1428                 if (!ret && btrfs_header_nritems(c) <
1429                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1430                         return 0;
1431                 if (ret < 0)
1432                         return ret;
1433         }
1434
1435         c_nritems = btrfs_header_nritems(c);
1436         if (root->ref_cows)
1437                 root_gen = trans->transid;
1438         else
1439                 root_gen = 0;
1440
1441         btrfs_node_key(c, &disk_key, 0);
1442         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1443                                          root->root_key.objectid,
1444                                          root_gen,
1445                                          btrfs_disk_key_objectid(&disk_key),
1446                                          level, c->start, 0);
1447         if (IS_ERR(split))
1448                 return PTR_ERR(split);
1449
1450         btrfs_set_header_flags(split, btrfs_header_flags(c));
1451         btrfs_set_header_level(split, btrfs_header_level(c));
1452         btrfs_set_header_bytenr(split, split->start);
1453         btrfs_set_header_generation(split, trans->transid);
1454         btrfs_set_header_owner(split, root->root_key.objectid);
1455         write_extent_buffer(split, root->fs_info->fsid,
1456                             (unsigned long)btrfs_header_fsid(split),
1457                             BTRFS_FSID_SIZE);
1458
1459         mid = (c_nritems + 1) / 2;
1460
1461         copy_extent_buffer(split, c,
1462                            btrfs_node_key_ptr_offset(0),
1463                            btrfs_node_key_ptr_offset(mid),
1464                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1465         btrfs_set_header_nritems(split, c_nritems - mid);
1466         btrfs_set_header_nritems(c, mid);
1467         ret = 0;
1468
1469         btrfs_mark_buffer_dirty(c);
1470         btrfs_mark_buffer_dirty(split);
1471
1472         btrfs_node_key(split, &disk_key, 0);
1473         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1474                           path->slots[level + 1] + 1,
1475                           level + 1);
1476         if (wret)
1477                 ret = wret;
1478
1479         if (path->slots[level] >= mid) {
1480                 path->slots[level] -= mid;
1481                 free_extent_buffer(c);
1482                 path->nodes[level] = split;
1483                 path->slots[level + 1] += 1;
1484         } else {
1485                 free_extent_buffer(split);
1486         }
1487         return ret;
1488 }
1489
1490 /*
1491  * how many bytes are required to store the items in a leaf.  start
1492  * and nr indicate which items in the leaf to check.  This totals up the
1493  * space used both by the item structs and the item data
1494  */
1495 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1496 {
1497         int data_len;
1498         int nritems = btrfs_header_nritems(l);
1499         int end = min(nritems, start + nr) - 1;
1500
1501         if (!nr)
1502                 return 0;
1503         data_len = btrfs_item_end_nr(l, start);
1504         data_len = data_len - btrfs_item_offset_nr(l, end);
1505         data_len += sizeof(struct btrfs_item) * nr;
1506         WARN_ON(data_len < 0);
1507         return data_len;
1508 }
1509
1510 /*
1511  * The space between the end of the leaf items and
1512  * the start of the leaf data.  IOW, how much room
1513  * the leaf has left for both items and data
1514  */
1515 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1516 {
1517         int nritems = btrfs_header_nritems(leaf);
1518         int ret;
1519         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1520         if (ret < 0) {
1521                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1522                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1523                        leaf_space_used(leaf, 0, nritems), nritems);
1524         }
1525         return ret;
1526 }
1527
1528 /*
1529  * push some data in the path leaf to the right, trying to free up at
1530  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1531  *
1532  * returns 1 if the push failed because the other node didn't have enough
1533  * room, 0 if everything worked out and < 0 if there were major errors.
1534  */
1535 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1536                            *root, struct btrfs_path *path, int data_size,
1537                            int empty)
1538 {
1539         struct extent_buffer *left = path->nodes[0];
1540         struct extent_buffer *right;
1541         struct extent_buffer *upper;
1542         struct btrfs_disk_key disk_key;
1543         int slot;
1544         u32 i;
1545         int free_space;
1546         int push_space = 0;
1547         int push_items = 0;
1548         struct btrfs_item *item;
1549         u32 left_nritems;
1550         u32 nr;
1551         u32 right_nritems;
1552         u32 data_end;
1553         u32 this_item_size;
1554         int ret;
1555
1556         slot = path->slots[1];
1557         if (!path->nodes[1]) {
1558                 return 1;
1559         }
1560         upper = path->nodes[1];
1561         if (slot >= btrfs_header_nritems(upper) - 1)
1562                 return 1;
1563
1564         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1565                                 root->leafsize);
1566         free_space = btrfs_leaf_free_space(root, right);
1567         if (free_space < data_size + sizeof(struct btrfs_item)) {
1568                 free_extent_buffer(right);
1569                 return 1;
1570         }
1571
1572         /* cow and double check */
1573         ret = btrfs_cow_block(trans, root, right, upper,
1574                               slot + 1, &right);
1575         if (ret) {
1576                 free_extent_buffer(right);
1577                 return 1;
1578         }
1579         free_space = btrfs_leaf_free_space(root, right);
1580         if (free_space < data_size + sizeof(struct btrfs_item)) {
1581                 free_extent_buffer(right);
1582                 return 1;
1583         }
1584
1585         left_nritems = btrfs_header_nritems(left);
1586         if (left_nritems == 0) {
1587                 free_extent_buffer(right);
1588                 return 1;
1589         }
1590
1591         if (empty)
1592                 nr = 0;
1593         else
1594                 nr = 1;
1595
1596         i = left_nritems - 1;
1597         while (i >= nr) {
1598                 item = btrfs_item_nr(left, i);
1599
1600                 if (path->slots[0] == i)
1601                         push_space += data_size + sizeof(*item);
1602
1603                 this_item_size = btrfs_item_size(left, item);
1604                 if (this_item_size + sizeof(*item) + push_space > free_space)
1605                         break;
1606                 push_items++;
1607                 push_space += this_item_size + sizeof(*item);
1608                 if (i == 0)
1609                         break;
1610                 i--;
1611         }
1612
1613         if (push_items == 0) {
1614                 free_extent_buffer(right);
1615                 return 1;
1616         }
1617
1618         if (!empty && push_items == left_nritems)
1619                 WARN_ON(1);
1620
1621         /* push left to right */
1622         right_nritems = btrfs_header_nritems(right);
1623
1624         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1625         push_space -= leaf_data_end(root, left);
1626
1627         /* make room in the right data area */
1628         data_end = leaf_data_end(root, right);
1629         memmove_extent_buffer(right,
1630                               btrfs_leaf_data(right) + data_end - push_space,
1631                               btrfs_leaf_data(right) + data_end,
1632                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1633
1634         /* copy from the left data area */
1635         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1636                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1637                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1638                      push_space);
1639
1640         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1641                               btrfs_item_nr_offset(0),
1642                               right_nritems * sizeof(struct btrfs_item));
1643
1644         /* copy the items from left to right */
1645         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1646                    btrfs_item_nr_offset(left_nritems - push_items),
1647                    push_items * sizeof(struct btrfs_item));
1648
1649         /* update the item pointers */
1650         right_nritems += push_items;
1651         btrfs_set_header_nritems(right, right_nritems);
1652         push_space = BTRFS_LEAF_DATA_SIZE(root);
1653         for (i = 0; i < right_nritems; i++) {
1654                 item = btrfs_item_nr(right, i);
1655                 push_space -= btrfs_item_size(right, item);
1656                 btrfs_set_item_offset(right, item, push_space);
1657         }
1658
1659         left_nritems -= push_items;
1660         btrfs_set_header_nritems(left, left_nritems);
1661
1662         if (left_nritems)
1663                 btrfs_mark_buffer_dirty(left);
1664         btrfs_mark_buffer_dirty(right);
1665
1666         btrfs_item_key(right, &disk_key, 0);
1667         btrfs_set_node_key(upper, &disk_key, slot + 1);
1668         btrfs_mark_buffer_dirty(upper);
1669
1670         /* then fixup the leaf pointer in the path */
1671         if (path->slots[0] >= left_nritems) {
1672                 path->slots[0] -= left_nritems;
1673                 free_extent_buffer(path->nodes[0]);
1674                 path->nodes[0] = right;
1675                 path->slots[1] += 1;
1676         } else {
1677                 free_extent_buffer(right);
1678         }
1679         return 0;
1680 }
1681 /*
1682  * push some data in the path leaf to the left, trying to free up at
1683  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1684  */
1685 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1686                           *root, struct btrfs_path *path, int data_size,
1687                           int empty)
1688 {
1689         struct btrfs_disk_key disk_key;
1690         struct extent_buffer *right = path->nodes[0];
1691         struct extent_buffer *left;
1692         int slot;
1693         int i;
1694         int free_space;
1695         int push_space = 0;
1696         int push_items = 0;
1697         struct btrfs_item *item;
1698         u32 old_left_nritems;
1699         u32 right_nritems;
1700         u32 nr;
1701         int ret = 0;
1702         int wret;
1703         u32 this_item_size;
1704         u32 old_left_item_size;
1705
1706         slot = path->slots[1];
1707         if (slot == 0)
1708                 return 1;
1709         if (!path->nodes[1])
1710                 return 1;
1711
1712         right_nritems = btrfs_header_nritems(right);
1713         if (right_nritems == 0) {
1714                 return 1;
1715         }
1716
1717         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1718                                slot - 1), root->leafsize);
1719         free_space = btrfs_leaf_free_space(root, left);
1720         if (free_space < data_size + sizeof(struct btrfs_item)) {
1721                 free_extent_buffer(left);
1722                 return 1;
1723         }
1724
1725         /* cow and double check */
1726         ret = btrfs_cow_block(trans, root, left,
1727                               path->nodes[1], slot - 1, &left);
1728         if (ret) {
1729                 /* we hit -ENOSPC, but it isn't fatal here */
1730                 free_extent_buffer(left);
1731                 return 1;
1732         }
1733
1734         free_space = btrfs_leaf_free_space(root, left);
1735         if (free_space < data_size + sizeof(struct btrfs_item)) {
1736                 free_extent_buffer(left);
1737                 return 1;
1738         }
1739
1740         if (empty)
1741                 nr = right_nritems;
1742         else
1743                 nr = right_nritems - 1;
1744
1745         for (i = 0; i < nr; i++) {
1746                 item = btrfs_item_nr(right, i);
1747
1748                 if (path->slots[0] == i)
1749                         push_space += data_size + sizeof(*item);
1750
1751                 this_item_size = btrfs_item_size(right, item);
1752                 if (this_item_size + sizeof(*item) + push_space > free_space)
1753                         break;
1754
1755                 push_items++;
1756                 push_space += this_item_size + sizeof(*item);
1757         }
1758
1759         if (push_items == 0) {
1760                 free_extent_buffer(left);
1761                 return 1;
1762         }
1763         if (!empty && push_items == btrfs_header_nritems(right))
1764                 WARN_ON(1);
1765
1766         /* push data from right to left */
1767         copy_extent_buffer(left, right,
1768                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1769                            btrfs_item_nr_offset(0),
1770                            push_items * sizeof(struct btrfs_item));
1771
1772         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1773                      btrfs_item_offset_nr(right, push_items -1);
1774
1775         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1776                      leaf_data_end(root, left) - push_space,
1777                      btrfs_leaf_data(right) +
1778                      btrfs_item_offset_nr(right, push_items - 1),
1779                      push_space);
1780         old_left_nritems = btrfs_header_nritems(left);
1781         BUG_ON(old_left_nritems < 0);
1782
1783         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1784         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1785                 u32 ioff;
1786
1787                 item = btrfs_item_nr(left, i);
1788                 ioff = btrfs_item_offset(left, item);
1789                 btrfs_set_item_offset(left, item,
1790                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1791         }
1792         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1793
1794         /* fixup right node */
1795         if (push_items > right_nritems) {
1796                 printk("push items %d nr %u\n", push_items, right_nritems);
1797                 WARN_ON(1);
1798         }
1799
1800         if (push_items < right_nritems) {
1801                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1802                                                   leaf_data_end(root, right);
1803                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1804                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1805                                       btrfs_leaf_data(right) +
1806                                       leaf_data_end(root, right), push_space);
1807
1808                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1809                               btrfs_item_nr_offset(push_items),
1810                              (btrfs_header_nritems(right) - push_items) *
1811                              sizeof(struct btrfs_item));
1812         }
1813         right_nritems -= push_items;
1814         btrfs_set_header_nritems(right, right_nritems);
1815         push_space = BTRFS_LEAF_DATA_SIZE(root);
1816         for (i = 0; i < right_nritems; i++) {
1817                 item = btrfs_item_nr(right, i);
1818                 push_space = push_space - btrfs_item_size(right, item);
1819                 btrfs_set_item_offset(right, item, push_space);
1820         }
1821
1822         btrfs_mark_buffer_dirty(left);
1823         if (right_nritems)
1824                 btrfs_mark_buffer_dirty(right);
1825
1826         btrfs_item_key(right, &disk_key, 0);
1827         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1828         if (wret)
1829                 ret = wret;
1830
1831         /* then fixup the leaf pointer in the path */
1832         if (path->slots[0] < push_items) {
1833                 path->slots[0] += old_left_nritems;
1834                 free_extent_buffer(path->nodes[0]);
1835                 path->nodes[0] = left;
1836                 path->slots[1] -= 1;
1837         } else {
1838                 free_extent_buffer(left);
1839                 path->slots[0] -= push_items;
1840         }
1841         BUG_ON(path->slots[0] < 0);
1842         return ret;
1843 }
1844
1845 /*
1846  * split the path's leaf in two, making sure there is at least data_size
1847  * available for the resulting leaf level of the path.
1848  *
1849  * returns 0 if all went well and < 0 on failure.
1850  */
1851 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1852                       *root, struct btrfs_key *ins_key,
1853                       struct btrfs_path *path, int data_size, int extend)
1854 {
1855         u64 root_gen;
1856         struct extent_buffer *l;
1857         u32 nritems;
1858         int mid;
1859         int slot;
1860         struct extent_buffer *right;
1861         int space_needed = data_size + sizeof(struct btrfs_item);
1862         int data_copy_size;
1863         int rt_data_off;
1864         int i;
1865         int ret = 0;
1866         int wret;
1867         int double_split;
1868         int num_doubles = 0;
1869         struct btrfs_disk_key disk_key;
1870
1871         if (extend)
1872                 space_needed = data_size;
1873
1874         if (root->ref_cows)
1875                 root_gen = trans->transid;
1876         else
1877                 root_gen = 0;
1878
1879         /* first try to make some room by pushing left and right */
1880         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1881                 wret = push_leaf_right(trans, root, path, data_size, 0);
1882                 if (wret < 0) {
1883                         return wret;
1884                 }
1885                 if (wret) {
1886                         wret = push_leaf_left(trans, root, path, data_size, 0);
1887                         if (wret < 0)
1888                                 return wret;
1889                 }
1890                 l = path->nodes[0];
1891
1892                 /* did the pushes work? */
1893                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1894                         return 0;
1895         }
1896
1897         if (!path->nodes[1]) {
1898                 ret = insert_new_root(trans, root, path, 1);
1899                 if (ret)
1900                         return ret;
1901         }
1902 again:
1903         double_split = 0;
1904         l = path->nodes[0];
1905         slot = path->slots[0];
1906         nritems = btrfs_header_nritems(l);
1907         mid = (nritems + 1)/ 2;
1908
1909         btrfs_item_key(l, &disk_key, 0);
1910
1911         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
1912                                          root->root_key.objectid,
1913                                          root_gen, disk_key.objectid, 0,
1914                                          l->start, 0);
1915         if (IS_ERR(right))
1916                 return PTR_ERR(right);
1917
1918         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
1919         btrfs_set_header_bytenr(right, right->start);
1920         btrfs_set_header_generation(right, trans->transid);
1921         btrfs_set_header_owner(right, root->root_key.objectid);
1922         btrfs_set_header_level(right, 0);
1923         write_extent_buffer(right, root->fs_info->fsid,
1924                             (unsigned long)btrfs_header_fsid(right),
1925                             BTRFS_FSID_SIZE);
1926         if (mid <= slot) {
1927                 if (nritems == 1 ||
1928                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1929                         BTRFS_LEAF_DATA_SIZE(root)) {
1930                         if (slot >= nritems) {
1931                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1932                                 btrfs_set_header_nritems(right, 0);
1933                                 wret = insert_ptr(trans, root, path,
1934                                                   &disk_key, right->start,
1935                                                   path->slots[1] + 1, 1);
1936                                 if (wret)
1937                                         ret = wret;
1938                                 free_extent_buffer(path->nodes[0]);
1939                                 path->nodes[0] = right;
1940                                 path->slots[0] = 0;
1941                                 path->slots[1] += 1;
1942                                 return ret;
1943                         }
1944                         mid = slot;
1945                         if (mid != nritems &&
1946                             leaf_space_used(l, mid, nritems - mid) +
1947                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1948                                 double_split = 1;
1949                         }
1950                 }
1951         } else {
1952                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1953                         BTRFS_LEAF_DATA_SIZE(root)) {
1954                         if (!extend && slot == 0) {
1955                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1956                                 btrfs_set_header_nritems(right, 0);
1957                                 wret = insert_ptr(trans, root, path,
1958                                                   &disk_key,
1959                                                   right->start,
1960                                                   path->slots[1], 1);
1961                                 if (wret)
1962                                         ret = wret;
1963                                 free_extent_buffer(path->nodes[0]);
1964                                 path->nodes[0] = right;
1965                                 path->slots[0] = 0;
1966                                 if (path->slots[1] == 0) {
1967                                         wret = fixup_low_keys(trans, root,
1968                                                    path, &disk_key, 1);
1969                                         if (wret)
1970                                                 ret = wret;
1971                                 }
1972                                 return ret;
1973                         } else if (extend && slot == 0) {
1974                                 mid = 1;
1975                         } else {
1976                                 mid = slot;
1977                                 if (mid != nritems &&
1978                                     leaf_space_used(l, mid, nritems - mid) +
1979                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1980                                         double_split = 1;
1981                                 }
1982                         }
1983                 }
1984         }
1985         nritems = nritems - mid;
1986         btrfs_set_header_nritems(right, nritems);
1987         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
1988
1989         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
1990                            btrfs_item_nr_offset(mid),
1991                            nritems * sizeof(struct btrfs_item));
1992
1993         copy_extent_buffer(right, l,
1994                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1995                      data_copy_size, btrfs_leaf_data(l) +
1996                      leaf_data_end(root, l), data_copy_size);
1997
1998         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1999                       btrfs_item_end_nr(l, mid);
2000
2001         for (i = 0; i < nritems; i++) {
2002                 struct btrfs_item *item = btrfs_item_nr(right, i);
2003                 u32 ioff = btrfs_item_offset(right, item);
2004                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2005         }
2006
2007         btrfs_set_header_nritems(l, mid);
2008         ret = 0;
2009         btrfs_item_key(right, &disk_key, 0);
2010         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2011                           path->slots[1] + 1, 1);
2012         if (wret)
2013                 ret = wret;
2014
2015         btrfs_mark_buffer_dirty(right);
2016         btrfs_mark_buffer_dirty(l);
2017         BUG_ON(path->slots[0] != slot);
2018
2019         if (mid <= slot) {
2020                 free_extent_buffer(path->nodes[0]);
2021                 path->nodes[0] = right;
2022                 path->slots[0] -= mid;
2023                 path->slots[1] += 1;
2024         } else
2025                 free_extent_buffer(right);
2026
2027         BUG_ON(path->slots[0] < 0);
2028
2029         if (double_split) {
2030                 BUG_ON(num_doubles != 0);
2031                 num_doubles++;
2032                 goto again;
2033         }
2034         return ret;
2035 }
2036
2037 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2038                         struct btrfs_root *root,
2039                         struct btrfs_path *path,
2040                         u32 new_size, int from_end)
2041 {
2042         int ret = 0;
2043         int slot;
2044         int slot_orig;
2045         struct extent_buffer *leaf;
2046         struct btrfs_item *item;
2047         u32 nritems;
2048         unsigned int data_end;
2049         unsigned int old_data_start;
2050         unsigned int old_size;
2051         unsigned int size_diff;
2052         int i;
2053
2054         slot_orig = path->slots[0];
2055         leaf = path->nodes[0];
2056         slot = path->slots[0];
2057
2058         old_size = btrfs_item_size_nr(leaf, slot);
2059         if (old_size == new_size)
2060                 return 0;
2061
2062         nritems = btrfs_header_nritems(leaf);
2063         data_end = leaf_data_end(root, leaf);
2064
2065         old_data_start = btrfs_item_offset_nr(leaf, slot);
2066
2067         size_diff = old_size - new_size;
2068
2069         BUG_ON(slot < 0);
2070         BUG_ON(slot >= nritems);
2071
2072         /*
2073          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2074          */
2075         /* first correct the data pointers */
2076         for (i = slot; i < nritems; i++) {
2077                 u32 ioff;
2078                 item = btrfs_item_nr(leaf, i);
2079                 ioff = btrfs_item_offset(leaf, item);
2080                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2081         }
2082
2083         /* shift the data */
2084         if (from_end) {
2085                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2086                               data_end + size_diff, btrfs_leaf_data(leaf) +
2087                               data_end, old_data_start + new_size - data_end);
2088         } else {
2089                 struct btrfs_disk_key disk_key;
2090                 u64 offset;
2091
2092                 btrfs_item_key(leaf, &disk_key, slot);
2093
2094                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2095                         unsigned long ptr;
2096                         struct btrfs_file_extent_item *fi;
2097
2098                         fi = btrfs_item_ptr(leaf, slot,
2099                                             struct btrfs_file_extent_item);
2100                         fi = (struct btrfs_file_extent_item *)(
2101                              (unsigned long)fi - size_diff);
2102
2103                         if (btrfs_file_extent_type(leaf, fi) ==
2104                             BTRFS_FILE_EXTENT_INLINE) {
2105                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2106                                 memmove_extent_buffer(leaf, ptr,
2107                                         (unsigned long)fi,
2108                                         offsetof(struct btrfs_file_extent_item,
2109                                                  disk_bytenr));
2110                         }
2111                 }
2112
2113                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2114                               data_end + size_diff, btrfs_leaf_data(leaf) +
2115                               data_end, old_data_start - data_end);
2116
2117                 offset = btrfs_disk_key_offset(&disk_key);
2118                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2119                 btrfs_set_item_key(leaf, &disk_key, slot);
2120                 if (slot == 0)
2121                         fixup_low_keys(trans, root, path, &disk_key, 1);
2122         }
2123
2124         item = btrfs_item_nr(leaf, slot);
2125         btrfs_set_item_size(leaf, item, new_size);
2126         btrfs_mark_buffer_dirty(leaf);
2127
2128         ret = 0;
2129         if (btrfs_leaf_free_space(root, leaf) < 0) {
2130                 btrfs_print_leaf(root, leaf);
2131                 BUG();
2132         }
2133         return ret;
2134 }
2135
2136 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2137                       struct btrfs_root *root, struct btrfs_path *path,
2138                       u32 data_size)
2139 {
2140         int ret = 0;
2141         int slot;
2142         int slot_orig;
2143         struct extent_buffer *leaf;
2144         struct btrfs_item *item;
2145         u32 nritems;
2146         unsigned int data_end;
2147         unsigned int old_data;
2148         unsigned int old_size;
2149         int i;
2150
2151         slot_orig = path->slots[0];
2152         leaf = path->nodes[0];
2153
2154         nritems = btrfs_header_nritems(leaf);
2155         data_end = leaf_data_end(root, leaf);
2156
2157         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2158                 btrfs_print_leaf(root, leaf);
2159                 BUG();
2160         }
2161         slot = path->slots[0];
2162         old_data = btrfs_item_end_nr(leaf, slot);
2163
2164         BUG_ON(slot < 0);
2165         if (slot >= nritems) {
2166                 btrfs_print_leaf(root, leaf);
2167                 printk("slot %d too large, nritems %d\n", slot, nritems);
2168                 BUG_ON(1);
2169         }
2170
2171         /*
2172          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2173          */
2174         /* first correct the data pointers */
2175         for (i = slot; i < nritems; i++) {
2176                 u32 ioff;
2177                 item = btrfs_item_nr(leaf, i);
2178                 ioff = btrfs_item_offset(leaf, item);
2179                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2180         }
2181
2182         /* shift the data */
2183         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2184                       data_end - data_size, btrfs_leaf_data(leaf) +
2185                       data_end, old_data - data_end);
2186
2187         data_end = old_data;
2188         old_size = btrfs_item_size_nr(leaf, slot);
2189         item = btrfs_item_nr(leaf, slot);
2190         btrfs_set_item_size(leaf, item, old_size + data_size);
2191         btrfs_mark_buffer_dirty(leaf);
2192
2193         ret = 0;
2194         if (btrfs_leaf_free_space(root, leaf) < 0) {
2195                 btrfs_print_leaf(root, leaf);
2196                 BUG();
2197         }
2198         return ret;
2199 }
2200
2201 /*
2202  * Given a key and some data, insert an item into the tree.
2203  * This does all the path init required, making room in the tree if needed.
2204  */
2205 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2206                             struct btrfs_root *root,
2207                             struct btrfs_path *path,
2208                             struct btrfs_key *cpu_key, u32 *data_size,
2209                             int nr)
2210 {
2211         struct extent_buffer *leaf;
2212         struct btrfs_item *item;
2213         int ret = 0;
2214         int slot;
2215         int slot_orig;
2216         int i;
2217         u32 nritems;
2218         u32 total_size = 0;
2219         u32 total_data = 0;
2220         unsigned int data_end;
2221         struct btrfs_disk_key disk_key;
2222
2223         for (i = 0; i < nr; i++) {
2224                 total_data += data_size[i];
2225         }
2226
2227         /* create a root if there isn't one */
2228         if (!root->node)
2229                 BUG();
2230
2231         total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
2232         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2233         if (ret == 0) {
2234                 return -EEXIST;
2235         }
2236         if (ret < 0)
2237                 goto out;
2238
2239         slot_orig = path->slots[0];
2240         leaf = path->nodes[0];
2241
2242         nritems = btrfs_header_nritems(leaf);
2243         data_end = leaf_data_end(root, leaf);
2244
2245         if (btrfs_leaf_free_space(root, leaf) <
2246             sizeof(struct btrfs_item) + total_size) {
2247                 btrfs_print_leaf(root, leaf);
2248                 printk("not enough freespace need %u have %d\n",
2249                        total_size, btrfs_leaf_free_space(root, leaf));
2250                 BUG();
2251         }
2252
2253         slot = path->slots[0];
2254         BUG_ON(slot < 0);
2255
2256         if (slot != nritems) {
2257                 int i;
2258                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2259
2260                 if (old_data < data_end) {
2261                         btrfs_print_leaf(root, leaf);
2262                         printk("slot %d old_data %d data_end %d\n",
2263                                slot, old_data, data_end);
2264                         BUG_ON(1);
2265                 }
2266                 /*
2267                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2268                  */
2269                 /* first correct the data pointers */
2270                 for (i = slot; i < nritems; i++) {
2271                         u32 ioff;
2272
2273                         item = btrfs_item_nr(leaf, i);
2274                         ioff = btrfs_item_offset(leaf, item);
2275                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2276                 }
2277
2278                 /* shift the items */
2279                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2280                               btrfs_item_nr_offset(slot),
2281                               (nritems - slot) * sizeof(struct btrfs_item));
2282
2283                 /* shift the data */
2284                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2285                               data_end - total_data, btrfs_leaf_data(leaf) +
2286                               data_end, old_data - data_end);
2287                 data_end = old_data;
2288         }
2289
2290         /* setup the item for the new data */
2291         for (i = 0; i < nr; i++) {
2292                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2293                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2294                 item = btrfs_item_nr(leaf, slot + i);
2295                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2296                 data_end -= data_size[i];
2297                 btrfs_set_item_size(leaf, item, data_size[i]);
2298         }
2299         btrfs_set_header_nritems(leaf, nritems + nr);
2300         btrfs_mark_buffer_dirty(leaf);
2301
2302         ret = 0;
2303         if (slot == 0) {
2304                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2305                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2306         }
2307
2308         if (btrfs_leaf_free_space(root, leaf) < 0) {
2309                 btrfs_print_leaf(root, leaf);
2310                 BUG();
2311         }
2312
2313 out:
2314         return ret;
2315 }
2316
2317 /*
2318  * Given a key and some data, insert an item into the tree.
2319  * This does all the path init required, making room in the tree if needed.
2320  */
2321 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2322                       *root, struct btrfs_key *cpu_key, void *data, u32
2323                       data_size)
2324 {
2325         int ret = 0;
2326         struct btrfs_path *path;
2327         struct extent_buffer *leaf;
2328         unsigned long ptr;
2329
2330         path = btrfs_alloc_path();
2331         BUG_ON(!path);
2332         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2333         if (!ret) {
2334                 leaf = path->nodes[0];
2335                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2336                 write_extent_buffer(leaf, data, ptr, data_size);
2337                 btrfs_mark_buffer_dirty(leaf);
2338         }
2339         btrfs_free_path(path);
2340         return ret;
2341 }
2342
2343 /*
2344  * delete the pointer from a given node.
2345  *
2346  * If the delete empties a node, the node is removed from the tree,
2347  * continuing all the way the root if required.  The root is converted into
2348  * a leaf if all the nodes are emptied.
2349  */
2350 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2351                    struct btrfs_path *path, int level, int slot)
2352 {
2353         struct extent_buffer *parent = path->nodes[level];
2354         u32 nritems;
2355         int ret = 0;
2356         int wret;
2357
2358         nritems = btrfs_header_nritems(parent);
2359         if (slot != nritems -1) {
2360                 memmove_extent_buffer(parent,
2361                               btrfs_node_key_ptr_offset(slot),
2362                               btrfs_node_key_ptr_offset(slot + 1),
2363                               sizeof(struct btrfs_key_ptr) *
2364                               (nritems - slot - 1));
2365         }
2366         nritems--;
2367         btrfs_set_header_nritems(parent, nritems);
2368         if (nritems == 0 && parent == root->node) {
2369                 BUG_ON(btrfs_header_level(root->node) != 1);
2370                 /* just turn the root into a leaf and break */
2371                 btrfs_set_header_level(root->node, 0);
2372         } else if (slot == 0) {
2373                 struct btrfs_disk_key disk_key;
2374
2375                 btrfs_node_key(parent, &disk_key, 0);
2376                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2377                 if (wret)
2378                         ret = wret;
2379         }
2380         btrfs_mark_buffer_dirty(parent);
2381         return ret;
2382 }
2383
2384 /*
2385  * delete the item at the leaf level in path.  If that empties
2386  * the leaf, remove it from the tree
2387  */
2388 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2389                     struct btrfs_path *path, int slot, int nr)
2390 {
2391         struct extent_buffer *leaf;
2392         struct btrfs_item *item;
2393         int last_off;
2394         int dsize = 0;
2395         int ret = 0;
2396         int wret;
2397         int i;
2398         u32 nritems;
2399
2400         leaf = path->nodes[0];
2401         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2402
2403         for (i = 0; i < nr; i++)
2404                 dsize += btrfs_item_size_nr(leaf, slot + i);
2405
2406         nritems = btrfs_header_nritems(leaf);
2407
2408         if (slot + nr != nritems) {
2409                 int i;
2410                 int data_end = leaf_data_end(root, leaf);
2411
2412                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2413                               data_end + dsize,
2414                               btrfs_leaf_data(leaf) + data_end,
2415                               last_off - data_end);
2416
2417                 for (i = slot + nr; i < nritems; i++) {
2418                         u32 ioff;
2419
2420                         item = btrfs_item_nr(leaf, i);
2421                         ioff = btrfs_item_offset(leaf, item);
2422                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2423                 }
2424
2425                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2426                               btrfs_item_nr_offset(slot + nr),
2427                               sizeof(struct btrfs_item) *
2428                               (nritems - slot - nr));
2429         }
2430         btrfs_set_header_nritems(leaf, nritems - nr);
2431         nritems -= nr;
2432
2433         /* delete the leaf if we've emptied it */
2434         if (nritems == 0) {
2435                 if (leaf == root->node) {
2436                         btrfs_set_header_level(leaf, 0);
2437                 } else {
2438                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2439                         clean_tree_block(trans, root, leaf);
2440                         wait_on_tree_block_writeback(root, leaf);
2441                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2442                         if (wret)
2443                                 ret = wret;
2444                         wret = btrfs_free_extent(trans, root,
2445                                          leaf->start, leaf->len,
2446                                          btrfs_header_owner(path->nodes[1]),
2447                                          root_gen, 0, 0, 1);
2448                         if (wret)
2449                                 ret = wret;
2450                 }
2451         } else {
2452                 int used = leaf_space_used(leaf, 0, nritems);
2453                 if (slot == 0) {
2454                         struct btrfs_disk_key disk_key;
2455
2456                         btrfs_item_key(leaf, &disk_key, 0);
2457                         wret = fixup_low_keys(trans, root, path,
2458                                               &disk_key, 1);
2459                         if (wret)
2460                                 ret = wret;
2461                 }
2462
2463                 /* delete the leaf if it is mostly empty */
2464                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2465                         /* push_leaf_left fixes the path.
2466                          * make sure the path still points to our leaf
2467                          * for possible call to del_ptr below
2468                          */
2469                         slot = path->slots[1];
2470                         extent_buffer_get(leaf);
2471
2472                         wret = push_leaf_left(trans, root, path, 1, 1);
2473                         if (wret < 0 && wret != -ENOSPC)
2474                                 ret = wret;
2475
2476                         if (path->nodes[0] == leaf &&
2477                             btrfs_header_nritems(leaf)) {
2478                                 wret = push_leaf_right(trans, root, path, 1, 1);
2479                                 if (wret < 0 && wret != -ENOSPC)
2480                                         ret = wret;
2481                         }
2482
2483                         if (btrfs_header_nritems(leaf) == 0) {
2484                                 u64 root_gen;
2485                                 u64 bytenr = leaf->start;
2486                                 u32 blocksize = leaf->len;
2487
2488                                 root_gen = btrfs_header_generation(
2489                                                            path->nodes[1]);
2490
2491                                 clean_tree_block(trans, root, leaf);
2492                                 wait_on_tree_block_writeback(root, leaf);
2493
2494                                 wret = del_ptr(trans, root, path, 1, slot);
2495                                 if (wret)
2496                                         ret = wret;
2497
2498                                 free_extent_buffer(leaf);
2499                                 wret = btrfs_free_extent(trans, root, bytenr,
2500                                              blocksize,
2501                                              btrfs_header_owner(path->nodes[1]),
2502                                              root_gen, 0, 0, 1);
2503                                 if (wret)
2504                                         ret = wret;
2505                         } else {
2506                                 btrfs_mark_buffer_dirty(leaf);
2507                                 free_extent_buffer(leaf);
2508                         }
2509                 } else {
2510                         btrfs_mark_buffer_dirty(leaf);
2511                 }
2512         }
2513         return ret;
2514 }
2515
2516 /*
2517  * walk up the tree as far as required to find the previous leaf.
2518  * returns 0 if it found something or 1 if there are no lesser leaves.
2519  * returns < 0 on io errors.
2520  */
2521 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2522 {
2523         u64 bytenr;
2524         int slot;
2525         int level = 1;
2526         struct extent_buffer *c;
2527         struct extent_buffer *next = NULL;
2528
2529         while(level < BTRFS_MAX_LEVEL) {
2530                 if (!path->nodes[level])
2531                         return 1;
2532
2533                 slot = path->slots[level];
2534                 c = path->nodes[level];
2535                 if (slot == 0) {
2536                         level++;
2537                         if (level == BTRFS_MAX_LEVEL)
2538                                 return 1;
2539                         continue;
2540                 }
2541                 slot--;
2542
2543                 bytenr = btrfs_node_blockptr(c, slot);
2544                 if (next)
2545                         free_extent_buffer(next);
2546
2547                 next = read_tree_block(root, bytenr,
2548                                        btrfs_level_size(root, level - 1));
2549                 break;
2550         }
2551         path->slots[level] = slot;
2552         while(1) {
2553                 level--;
2554                 c = path->nodes[level];
2555                 free_extent_buffer(c);
2556                 slot = btrfs_header_nritems(next);
2557                 if (slot != 0)
2558                         slot--;
2559                 path->nodes[level] = next;
2560                 path->slots[level] = slot;
2561                 if (!level)
2562                         break;
2563                 next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2564                                        btrfs_level_size(root, level - 1));
2565         }
2566         return 0;
2567 }
2568
2569 /*
2570  * walk up the tree as far as required to find the next leaf.
2571  * returns 0 if it found something or 1 if there are no greater leaves.
2572  * returns < 0 on io errors.
2573  */
2574 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2575 {
2576         int slot;
2577         int level = 1;
2578         u64 bytenr;
2579         struct extent_buffer *c;
2580         struct extent_buffer *next = NULL;
2581
2582         while(level < BTRFS_MAX_LEVEL) {
2583                 if (!path->nodes[level])
2584                         return 1;
2585
2586                 slot = path->slots[level] + 1;
2587                 c = path->nodes[level];
2588                 if (slot >= btrfs_header_nritems(c)) {
2589                         level++;
2590                         if (level == BTRFS_MAX_LEVEL)
2591                                 return 1;
2592                         continue;
2593                 }
2594
2595                 bytenr = btrfs_node_blockptr(c, slot);
2596                 if (next)
2597                         free_extent_buffer(next);
2598
2599                 if (path->reada)
2600                         reada_for_search(root, path, level, slot, 0);
2601
2602                 next = read_tree_block(root, bytenr,
2603                                        btrfs_level_size(root, level -1));
2604                 break;
2605         }
2606         path->slots[level] = slot;
2607         while(1) {
2608                 level--;
2609                 c = path->nodes[level];
2610                 free_extent_buffer(c);
2611                 path->nodes[level] = next;
2612                 path->slots[level] = 0;
2613                 if (!level)
2614                         break;
2615                 if (path->reada)
2616                         reada_for_search(root, path, level, 0, 0);
2617                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2618                                        btrfs_level_size(root, level - 1));
2619         }
2620         return 0;
2621 }