Update btrfs-progs to better match the kernel
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "ctree.h"
23 #include "disk-io.h"
24 #include "transaction.h"
25 #include "print-tree.h"
26 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
27                       *root, struct btrfs_path *path, int level);
28 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
29                       *root, struct btrfs_key *ins_key,
30                       struct btrfs_path *path, int data_size, int extend);
31 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
32                           *root, struct btrfs_buffer *dst, struct btrfs_buffer
33                           *src);
34 static int balance_node_right(struct btrfs_trans_handle *trans, struct
35                               btrfs_root *root, struct btrfs_buffer *dst_buf,
36                               struct btrfs_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
46 {
47         int i;
48         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
49                 if (!p->nodes[i])
50                         break;
51                 btrfs_block_release(root, p->nodes[i]);
52         }
53         memset(p, 0, sizeof(*p));
54 }
55 int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
56                            *root, struct btrfs_buffer *buf, struct btrfs_buffer
57                            *parent, int parent_slot, struct btrfs_buffer
58                            **cow_ret)
59 {
60         struct btrfs_buffer *cow;
61
62         if (!list_empty(&buf->dirty)) {
63                 *cow_ret = buf;
64                 return 0;
65         }
66         cow = btrfs_alloc_free_block(trans, root, buf->size);
67         memcpy(&cow->node, &buf->node, buf->size);
68         btrfs_set_header_bytenr(&cow->node.header, cow->bytenr);
69         btrfs_set_header_generation(&cow->node.header, trans->transid);
70         btrfs_set_header_owner(&cow->node.header, root->root_key.objectid);
71         *cow_ret = cow;
72         btrfs_inc_ref(trans, root, buf);
73         if (buf == root->node) {
74                 root->node = cow;
75                 cow->count++;
76                 if (buf != root->commit_root)
77                         btrfs_free_extent(trans, root, buf->bytenr,
78                                           buf->size, 1);
79                 btrfs_block_release(root, buf);
80         } else {
81                 btrfs_set_node_blockptr(&parent->node, parent_slot,
82                                         cow->bytenr);
83                 BUG_ON(list_empty(&parent->dirty));
84                 btrfs_free_extent(trans, root, buf->bytenr, buf->size, 1);
85         }
86         btrfs_block_release(root, buf);
87         return 0;
88 }
89
90 /*
91  * The leaf data grows from end-to-front in the node.
92  * this returns the address of the start of the last item,
93  * which is the stop of the leaf data stack
94  */
95 static inline unsigned int leaf_data_end(struct btrfs_root *root,
96                                          struct btrfs_leaf *leaf)
97 {
98         u32 nr = btrfs_header_nritems(&leaf->header);
99         if (nr == 0)
100                 return BTRFS_LEAF_DATA_SIZE(root);
101         return btrfs_item_offset(leaf->items + nr - 1);
102 }
103
104 /*
105  * how many bytes are required to store the items in a leaf.  start
106  * and nr indicate which items in the leaf to check.  This totals up the
107  * space used both by the item structs and the item data
108  */
109 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
110 {
111         int data_len;
112         int nritems = btrfs_header_nritems(&l->header);
113         int end;
114
115         if (nritems < start + nr)
116                 end = nritems - 1;
117         else
118                 end = start + nr - 1;
119
120         if (!nr)
121                 return 0;
122         data_len = btrfs_item_end(l->items + start);
123         data_len = data_len - btrfs_item_offset(l->items + end);
124         data_len += sizeof(struct btrfs_item) * nr;
125         return data_len;
126 }
127
128 /*
129  * The space between the end of the leaf items and
130  * the start of the leaf data.  IOW, how much room
131  * the leaf has left for both items and data
132  */
133 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
134 {
135         int nritems = btrfs_header_nritems(&leaf->header);
136         return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
137 }
138
139 /*
140  * compare two keys in a memcmp fashion
141  */
142 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
143 {
144         struct btrfs_key k1;
145
146         btrfs_disk_key_to_cpu(&k1, disk);
147
148         if (k1.objectid > k2->objectid)
149                 return 1;
150         if (k1.objectid < k2->objectid)
151                 return -1;
152         if (k1.type > k2->type)
153                 return 1;
154         if (k1.type < k2->type)
155                 return -1;
156         if (k1.offset > k2->offset)
157                 return 1;
158         if (k1.offset < k2->offset)
159                 return -1;
160         return 0;
161 }
162
163 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
164                       int level)
165 {
166         int i;
167         struct btrfs_node *parent = NULL;
168         struct btrfs_node *node = &path->nodes[level]->node;
169         int parent_slot;
170         u32 nritems = btrfs_header_nritems(&node->header);
171
172         if (path->nodes[level + 1])
173                 parent = &path->nodes[level + 1]->node;
174         parent_slot = path->slots[level + 1];
175         BUG_ON(nritems == 0);
176         if (parent) {
177                 struct btrfs_disk_key *parent_key;
178                 parent_key = &parent->ptrs[parent_slot].key;
179                 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
180                               sizeof(struct btrfs_disk_key)));
181                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
182                        btrfs_header_bytenr(&node->header));
183         }
184         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
185         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
186                 struct btrfs_key cpukey;
187                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
188                 BUG_ON(btrfs_comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
189         }
190         return 0;
191 }
192
193 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
194                       int level)
195 {
196         int i;
197         struct btrfs_leaf *leaf = &path->nodes[level]->leaf;
198         struct btrfs_node *parent = NULL;
199         int parent_slot;
200         u32 nritems = btrfs_header_nritems(&leaf->header);
201
202         if (path->nodes[level + 1])
203                 parent = &path->nodes[level + 1]->node;
204         parent_slot = path->slots[level + 1];
205         BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
206
207         if (nritems == 0)
208                 return 0;
209
210         if (parent) {
211                 struct btrfs_disk_key *parent_key;
212                 parent_key = &parent->ptrs[parent_slot].key;
213                 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
214                        sizeof(struct btrfs_disk_key)));
215                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
216                        btrfs_header_bytenr(&leaf->header));
217         }
218         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
219                 struct btrfs_key cpukey;
220                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
221                 BUG_ON(btrfs_comp_keys(&leaf->items[i].key,
222                                  &cpukey) >= 0);
223                 BUG_ON(btrfs_item_offset(leaf->items + i) !=
224                         btrfs_item_end(leaf->items + i + 1));
225                 if (i == 0) {
226                         BUG_ON(btrfs_item_offset(leaf->items + i) +
227                                btrfs_item_size(leaf->items + i) !=
228                                BTRFS_LEAF_DATA_SIZE(root));
229                 }
230         }
231         return 0;
232 }
233
234 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
235                         int level)
236 {
237         if (level == 0)
238                 return check_leaf(root, path, level);
239         return check_node(root, path, level);
240 }
241
242 /*
243  * search for key in the array p.  items p are item_size apart
244  * and there are 'max' items in p
245  * the slot in the array is returned via slot, and it points to
246  * the place where you would insert key if it is not found in
247  * the array.
248  *
249  * slot may point to max if the key is bigger than all of the keys
250  */
251 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
252                        int max, int *slot)
253 {
254         int low = 0;
255         int high = max;
256         int mid;
257         int ret;
258         struct btrfs_disk_key *tmp;
259
260         while(low < high) {
261                 mid = (low + high) / 2;
262                 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
263                 ret = btrfs_comp_keys(tmp, key);
264
265                 if (ret < 0)
266                         low = mid + 1;
267                 else if (ret > 0)
268                         high = mid;
269                 else {
270                         *slot = mid;
271                         return 0;
272                 }
273         }
274         *slot = low;
275         return 1;
276 }
277
278 /*
279  * simple bin_search frontend that does the right thing for
280  * leaves vs nodes
281  */
282 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
283 {
284         if (btrfs_is_leaf(c)) {
285                 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
286                 return generic_bin_search((void *)l->items,
287                                           sizeof(struct btrfs_item),
288                                           key, btrfs_header_nritems(&c->header),
289                                           slot);
290         } else {
291                 return generic_bin_search((void *)c->ptrs,
292                                           sizeof(struct btrfs_key_ptr),
293                                           key, btrfs_header_nritems(&c->header),
294                                           slot);
295         }
296         return -1;
297 }
298
299 static struct btrfs_buffer *read_node_slot(struct btrfs_root *root,
300                                    struct btrfs_buffer *parent_buf,
301                                    int slot)
302 {
303         struct btrfs_node *node = &parent_buf->node;
304         int level = btrfs_header_level(&node->header);
305         if (slot < 0)
306                 return NULL;
307         if (slot >= btrfs_header_nritems(&node->header))
308                 return NULL;
309         return read_tree_block(root, btrfs_node_blockptr(node, slot),
310                                btrfs_level_size(root, level - 1));
311 }
312
313 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
314                          *root, struct btrfs_path *path, int level)
315 {
316         struct btrfs_buffer *right_buf;
317         struct btrfs_buffer *mid_buf;
318         struct btrfs_buffer *left_buf;
319         struct btrfs_buffer *parent_buf = NULL;
320         struct btrfs_node *right = NULL;
321         struct btrfs_node *mid;
322         struct btrfs_node *left = NULL;
323         struct btrfs_node *parent = NULL;
324         int ret = 0;
325         int wret;
326         int pslot;
327         int orig_slot = path->slots[level];
328         u64 orig_ptr;
329
330         if (level == 0)
331                 return 0;
332
333         mid_buf = path->nodes[level];
334         mid = &mid_buf->node;
335         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
336
337         if (level < BTRFS_MAX_LEVEL - 1)
338                 parent_buf = path->nodes[level + 1];
339         pslot = path->slots[level + 1];
340
341         /*
342          * deal with the case where there is only one pointer in the root
343          * by promoting the node below to a root
344          */
345         if (!parent_buf) {
346                 struct btrfs_buffer *child;
347                 u64 bytenr = mid_buf->bytenr;
348
349                 if (btrfs_header_nritems(&mid->header) != 1)
350                         return 0;
351
352                 /* promote the child to a root */
353                 child = read_node_slot(root, mid_buf, 0);
354                 BUG_ON(!child);
355                 root->node = child;
356                 path->nodes[level] = NULL;
357                 /* once for the path */
358                 btrfs_block_release(root, mid_buf);
359                 /* once for the root ptr */
360                 btrfs_block_release(root, mid_buf);
361                 clean_tree_block(trans, root, mid_buf);
362                 return btrfs_free_extent(trans, root, bytenr,
363                                          root->nodesize, 1);
364         }
365         parent = &parent_buf->node;
366
367         if (btrfs_header_nritems(&mid->header) >
368             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
369                 return 0;
370
371         left_buf = read_node_slot(root, parent_buf, pslot - 1);
372         right_buf = read_node_slot(root, parent_buf, pslot + 1);
373
374         /* first, try to make some room in the middle buffer */
375         if (left_buf) {
376                 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
377                                 &left_buf);
378                 left = &left_buf->node;
379                 orig_slot += btrfs_header_nritems(&left->header);
380                 wret = push_node_left(trans, root, left_buf, mid_buf);
381                 if (wret < 0)
382                         ret = wret;
383         }
384
385         /*
386          * then try to empty the right most buffer into the middle
387          */
388         if (right_buf) {
389                 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
390                                 &right_buf);
391                 right = &right_buf->node;
392                 wret = push_node_left(trans, root, mid_buf, right_buf);
393                 if (wret < 0)
394                         ret = wret;
395                 if (btrfs_header_nritems(&right->header) == 0) {
396                         u64 bytenr = right_buf->bytenr;
397                         btrfs_block_release(root, right_buf);
398                         clean_tree_block(trans, root, right_buf);
399                         right_buf = NULL;
400                         right = NULL;
401                         wret = del_ptr(trans, root, path, level + 1, pslot +
402                                        1);
403                         if (wret)
404                                 ret = wret;
405                         wret = btrfs_free_extent(trans, root, bytenr,
406                                                  root->nodesize, 1);
407                         if (wret)
408                                 ret = wret;
409                 } else {
410                         memcpy(&parent->ptrs[pslot + 1].key,
411                                 &right->ptrs[0].key,
412                                 sizeof(struct btrfs_disk_key));
413                         BUG_ON(list_empty(&parent_buf->dirty));
414                 }
415         }
416         if (btrfs_header_nritems(&mid->header) == 1) {
417                 /*
418                  * we're not allowed to leave a node with one item in the
419                  * tree during a delete.  A deletion from lower in the tree
420                  * could try to delete the only pointer in this node.
421                  * So, pull some keys from the left.
422                  * There has to be a left pointer at this point because
423                  * otherwise we would have pulled some pointers from the
424                  * right
425                  */
426                 BUG_ON(!left_buf);
427                 wret = balance_node_right(trans, root, mid_buf, left_buf);
428                 if (wret < 0)
429                         ret = wret;
430                 BUG_ON(wret == 1);
431         }
432         if (btrfs_header_nritems(&mid->header) == 0) {
433                 /* we've managed to empty the middle node, drop it */
434                 u64 bytenr = mid_buf->bytenr;
435                 btrfs_block_release(root, mid_buf);
436                 clean_tree_block(trans, root, mid_buf);
437                 mid_buf = NULL;
438                 mid = NULL;
439                 wret = del_ptr(trans, root, path, level + 1, pslot);
440                 if (wret)
441                         ret = wret;
442                 wret = btrfs_free_extent(trans, root, bytenr,
443                                          root->nodesize, 1);
444                 if (wret)
445                         ret = wret;
446         } else {
447                 /* update the parent key to reflect our changes */
448                 memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
449                        sizeof(struct btrfs_disk_key));
450                 BUG_ON(list_empty(&parent_buf->dirty));
451         }
452
453         /* update the path */
454         if (left_buf) {
455                 if (btrfs_header_nritems(&left->header) > orig_slot) {
456                         left_buf->count++; // released below
457                         path->nodes[level] = left_buf;
458                         path->slots[level + 1] -= 1;
459                         path->slots[level] = orig_slot;
460                         if (mid_buf)
461                                 btrfs_block_release(root, mid_buf);
462                 } else {
463                         orig_slot -= btrfs_header_nritems(&left->header);
464                         path->slots[level] = orig_slot;
465                 }
466         }
467         /* double check we haven't messed things up */
468         check_block(root, path, level);
469         if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node,
470                                             path->slots[level]))
471                 BUG();
472
473         if (right_buf)
474                 btrfs_block_release(root, right_buf);
475         if (left_buf)
476                 btrfs_block_release(root, left_buf);
477         return ret;
478 }
479 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
480                                 struct btrfs_root *root,
481                                 struct btrfs_path *path, int level)
482 {
483         struct btrfs_node *right;
484         struct btrfs_node *mid;
485         struct btrfs_node *left;
486         struct btrfs_node *parent;
487         struct btrfs_buffer *right_buf;
488         struct btrfs_buffer *mid_buf;
489         struct btrfs_buffer *left_buf;
490         struct btrfs_buffer *parent_buf = NULL;
491         int ret = 0;
492         int wret;
493         int pslot;
494         int orig_slot = path->slots[level];
495         u64 orig_ptr;
496
497         if (level == 0)
498                 return 1;
499
500         mid_buf = path->nodes[level];
501         mid = &mid_buf->node;
502         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
503
504         if (level < BTRFS_MAX_LEVEL - 1)
505                 parent_buf = path->nodes[level + 1];
506         pslot = path->slots[level + 1];
507
508         if (!parent_buf)
509                 return 1;
510         parent = &parent_buf->node;
511
512         left_buf = read_node_slot(root, parent_buf, pslot - 1);
513         left = &left_buf->node;
514
515         /* first, try to make some room in the middle buffer */
516         if (left_buf) {
517                 u32 left_nr;
518                 left_nr = btrfs_header_nritems(&left->header);
519                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
520                         wret = 1;
521                 } else {
522                         ret = btrfs_cow_block(trans, root, left_buf,
523                                               parent_buf, pslot - 1,
524                                               &left_buf);
525                         left = &left_buf->node;
526                         if (ret)
527                                 wret = 1;
528                         else {
529                                 wret = push_node_left(trans, root,
530                                                       left_buf, mid_buf);
531                         }
532                 }
533                 if (wret < 0)
534                         ret = wret;
535                 if (wret == 0) {
536                         orig_slot += left_nr;
537                         memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
538                                 sizeof(struct btrfs_disk_key));
539                         BUG_ON(list_empty(&parent_buf->dirty));
540                         if (btrfs_header_nritems(&left->header) > orig_slot) {
541                                 path->nodes[level] = left_buf;
542                                 path->slots[level + 1] -= 1;
543                                 path->slots[level] = orig_slot;
544                                 btrfs_block_release(root, mid_buf);
545                         } else {
546                                 orig_slot -=
547                                         btrfs_header_nritems(&left->header);
548                                 path->slots[level] = orig_slot;
549                                 btrfs_block_release(root, left_buf);
550                         }
551                         return 0;
552                 }
553                 btrfs_block_release(root, left_buf);
554         }
555
556         right_buf = read_node_slot(root, parent_buf, pslot + 1);
557         right = &right_buf->node;
558
559         /*
560          * then try to empty the right most buffer into the middle
561          */
562         if (right_buf) {
563                 u32 right_nr;
564                 right_nr = btrfs_header_nritems(&right->header);
565                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
566                         wret = 1;
567                 } else {
568                         ret = btrfs_cow_block(trans, root, right_buf,
569                                               parent_buf, pslot + 1,
570                                               &right_buf);
571                         right = &right_buf->node;
572                         if (ret)
573                                 wret = 1;
574                         else {
575                                 wret = balance_node_right(trans, root,
576                                                 right_buf, mid_buf);
577                         }
578                 }
579                 if (wret < 0)
580                         ret = wret;
581                 if (wret == 0) {
582                         memcpy(&parent->ptrs[pslot + 1].key,
583                                &right->ptrs[0].key,
584                                sizeof(struct btrfs_disk_key));
585                         BUG_ON(list_empty(&parent_buf->dirty));
586                         if (btrfs_header_nritems(&mid->header) <= orig_slot) {
587                                 path->nodes[level] = right_buf;
588                                 path->slots[level + 1] += 1;
589                                 path->slots[level] = orig_slot -
590                                         btrfs_header_nritems(&mid->header);
591                                 btrfs_block_release(root, mid_buf);
592                         } else {
593                                 btrfs_block_release(root, right_buf);
594                         }
595                         return 0;
596                 }
597                 btrfs_block_release(root, right_buf);
598         }
599         return 1;
600 }
601
602 /*
603  * look for key in the tree.  path is filled in with nodes along the way
604  * if key is found, we return zero and you can find the item in the leaf
605  * level of the path (level 0)
606  *
607  * If the key isn't found, the path points to the slot where it should
608  * be inserted, and 1 is returned.  If there are other errors during the
609  * search a negative error number is returned.
610  *
611  * if ins_len > 0, nodes and leaves will be split as we walk down the
612  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
613  * possible)
614  */
615 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
616                       *root, struct btrfs_key *key, struct btrfs_path *p, int
617                       ins_len, int cow)
618 {
619         struct btrfs_buffer *b;
620         struct btrfs_node *c;
621         int slot;
622         int ret;
623         int level;
624
625 again:
626         b = root->node;
627         b->count++;
628         while (b) {
629                 level = btrfs_header_level(&b->node.header);
630                 if (cow) {
631                         int wret;
632                         wret = btrfs_cow_block(trans, root, b,
633                                               p->nodes[level + 1],
634                                               p->slots[level + 1],
635                                               &b);
636                         if (wret) {
637                                 btrfs_block_release(root, b);
638                                 return wret;
639                         }
640                 }
641                 BUG_ON(!cow && ins_len);
642                 c = &b->node;
643                 p->nodes[level] = b;
644                 ret = check_block(root, p, level);
645                 if (ret)
646                         return -1;
647                 ret = bin_search(c, key, &slot);
648                 if (!btrfs_is_leaf(c)) {
649                         if (ret && slot > 0)
650                                 slot -= 1;
651                         p->slots[level] = slot;
652                         if (ins_len > 0 && btrfs_header_nritems(&c->header) >=
653                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
654                                 int sret = split_node(trans, root, p, level);
655                                 BUG_ON(sret > 0);
656                                 if (sret)
657                                         return sret;
658                                 b = p->nodes[level];
659                                 c = &b->node;
660                                 slot = p->slots[level];
661                         } else if (ins_len < 0) {
662                                 int sret = balance_level(trans, root, p,
663                                                          level);
664                                 if (sret)
665                                         return sret;
666                                 b = p->nodes[level];
667                                 if (!b) {
668                                         btrfs_release_path(NULL, p);
669                                         goto again;
670                                 }
671                                 c = &b->node;
672                                 slot = p->slots[level];
673                                 BUG_ON(btrfs_header_nritems(&c->header) == 1);
674                         }
675                         b = read_tree_block(root,
676                                             btrfs_node_blockptr(c, slot),
677                                             btrfs_level_size(root, level - 1));
678                 } else {
679                         struct btrfs_leaf *l = (struct btrfs_leaf *)c;
680                         p->slots[level] = slot;
681                         if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
682                             sizeof(struct btrfs_item) + ins_len) {
683                                 int sret = split_leaf(trans, root, key,
684                                                       p, ins_len, ret == 0);
685                                 BUG_ON(sret > 0);
686                                 if (sret)
687                                         return sret;
688                         }
689                         BUG_ON(root->node->count == 1);
690                         return ret;
691                 }
692         }
693         BUG_ON(root->node->count == 1);
694         return 1;
695 }
696
697 /*
698  * adjust the pointers going up the tree, starting at level
699  * making sure the right key of each node is points to 'key'.
700  * This is used after shifting pointers to the left, so it stops
701  * fixing up pointers when a given leaf/node is not in slot 0 of the
702  * higher levels
703  *
704  * If this fails to write a tree block, it returns -1, but continues
705  * fixing up the blocks in ram so the tree is consistent.
706  */
707 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
708                           *root, struct btrfs_path *path, struct btrfs_disk_key
709                           *key, int level)
710 {
711         int i;
712         int ret = 0;
713         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
714                 struct btrfs_node *t;
715                 int tslot = path->slots[i];
716                 if (!path->nodes[i])
717                         break;
718                 t = &path->nodes[i]->node;
719                 memcpy(&t->ptrs[tslot].key, key, sizeof(*key));
720                 BUG_ON(list_empty(&path->nodes[i]->dirty));
721                 if (tslot != 0)
722                         break;
723         }
724         return ret;
725 }
726
727 /*
728  * try to push data from one node into the next node left in the
729  * tree.
730  *
731  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
732  * error, and > 0 if there was no room in the left hand block.
733  */
734 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
735                           *root, struct btrfs_buffer *dst_buf, struct
736                           btrfs_buffer *src_buf)
737 {
738         struct btrfs_node *src = &src_buf->node;
739         struct btrfs_node *dst = &dst_buf->node;
740         int push_items = 0;
741         int src_nritems;
742         int dst_nritems;
743         int ret = 0;
744
745         src_nritems = btrfs_header_nritems(&src->header);
746         dst_nritems = btrfs_header_nritems(&dst->header);
747         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
748         if (push_items <= 0) {
749                 return 1;
750         }
751
752         if (src_nritems < push_items)
753                 push_items = src_nritems;
754
755         memcpy(dst->ptrs + dst_nritems, src->ptrs,
756                 push_items * sizeof(struct btrfs_key_ptr));
757         if (push_items < src_nritems) {
758                 memmove(src->ptrs, src->ptrs + push_items,
759                         (src_nritems - push_items) *
760                         sizeof(struct btrfs_key_ptr));
761         }
762         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
763         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
764         BUG_ON(list_empty(&src_buf->dirty));
765         BUG_ON(list_empty(&dst_buf->dirty));
766         return ret;
767 }
768
769 /*
770  * try to push data from one node into the next node right in the
771  * tree.
772  *
773  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
774  * error, and > 0 if there was no room in the right hand block.
775  *
776  * this will  only push up to 1/2 the contents of the left node over
777  */
778 static int balance_node_right(struct btrfs_trans_handle *trans, struct
779                               btrfs_root *root, struct btrfs_buffer *dst_buf,
780                               struct btrfs_buffer *src_buf)
781 {
782         struct btrfs_node *src = &src_buf->node;
783         struct btrfs_node *dst = &dst_buf->node;
784         int push_items = 0;
785         int max_push;
786         int src_nritems;
787         int dst_nritems;
788         int ret = 0;
789
790         src_nritems = btrfs_header_nritems(&src->header);
791         dst_nritems = btrfs_header_nritems(&dst->header);
792         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
793         if (push_items <= 0) {
794                 return 1;
795         }
796         max_push = src_nritems / 2 + 1;
797         /* don't try to empty the node */
798         if (max_push >= src_nritems)
799                 return 1;
800         if (max_push < push_items)
801                 push_items = max_push;
802
803         memmove(dst->ptrs + push_items, dst->ptrs,
804                 dst_nritems * sizeof(struct btrfs_key_ptr));
805         memcpy(dst->ptrs, src->ptrs + src_nritems - push_items,
806                 push_items * sizeof(struct btrfs_key_ptr));
807
808         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
809         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
810
811         BUG_ON(list_empty(&src_buf->dirty));
812         BUG_ON(list_empty(&dst_buf->dirty));
813         return ret;
814 }
815
816 /*
817  * helper function to insert a new root level in the tree.
818  * A new node is allocated, and a single item is inserted to
819  * point to the existing root
820  *
821  * returns zero on success or < 0 on failure.
822  */
823 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
824                            *root, struct btrfs_path *path, int level)
825 {
826         struct btrfs_buffer *t;
827         struct btrfs_node *lower;
828         struct btrfs_node *c;
829         struct btrfs_disk_key *lower_key;
830
831         BUG_ON(path->nodes[level]);
832         BUG_ON(path->nodes[level-1] != root->node);
833         t = btrfs_alloc_free_block(trans, root, root->nodesize);
834         c = &t->node;
835         memset(&c->header, 0, sizeof(c->header));
836         btrfs_set_header_nritems(&c->header, 1);
837         btrfs_set_header_level(&c->header, level);
838         btrfs_set_header_bytenr(&c->header, t->bytenr);
839         btrfs_set_header_generation(&c->header, trans->transid);
840         btrfs_set_header_owner(&c->header, root->root_key.objectid);
841         memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
842                sizeof(c->header.fsid));
843         lower = &path->nodes[level-1]->node;
844
845         if (btrfs_is_leaf(lower))
846                 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
847         else
848                 lower_key = &lower->ptrs[0].key;
849         memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key));
850         btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->bytenr);
851         BUG_ON(list_empty(&t->dirty));
852         /* the super has an extra ref to root->node */
853         btrfs_block_release(root, root->node);
854         root->node = t;
855         t->count++;
856         path->nodes[level] = t;
857         path->slots[level] = 0;
858         return 0;
859 }
860
861 /*
862  * worker function to insert a single pointer in a node.
863  * the node should have enough room for the pointer already
864  *
865  * slot and level indicate where you want the key to go, and
866  * bytenr is the block the key points to.
867  *
868  * returns zero on success and < 0 on any error
869  */
870 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
871                       *root, struct btrfs_path *path, struct btrfs_disk_key
872                       *key, u64 bytenr, int slot, int level)
873 {
874         struct btrfs_node *lower;
875         int nritems;
876
877         BUG_ON(!path->nodes[level]);
878         lower = &path->nodes[level]->node;
879         nritems = btrfs_header_nritems(&lower->header);
880         if (slot > nritems)
881                 BUG();
882         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
883                 BUG();
884         if (slot != nritems) {
885                 memmove(lower->ptrs + slot + 1, lower->ptrs + slot,
886                         (nritems - slot) * sizeof(struct btrfs_key_ptr));
887         }
888         memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key));
889         btrfs_set_node_blockptr(lower, slot, bytenr);
890         btrfs_set_header_nritems(&lower->header, nritems + 1);
891         BUG_ON(list_empty(&path->nodes[level]->dirty));
892         return 0;
893 }
894
895 /*
896  * split the node at the specified level in path in two.
897  * The path is corrected to point to the appropriate node after the split
898  *
899  * Before splitting this tries to make some room in the node by pushing
900  * left and right, if either one works, it returns right away.
901  *
902  * returns 0 on success and < 0 on failure
903  */
904 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
905                       *root, struct btrfs_path *path, int level)
906 {
907         struct btrfs_buffer *t;
908         struct btrfs_node *c;
909         struct btrfs_buffer *split_buffer;
910         struct btrfs_node *split;
911         int mid;
912         int ret;
913         int wret;
914         u32 c_nritems;
915
916         t = path->nodes[level];
917         c = &t->node;
918         if (t == root->node) {
919                 /* trying to split the root, lets make a new one */
920                 ret = insert_new_root(trans, root, path, level + 1);
921                 if (ret)
922                         return ret;
923         } else {
924                 ret = push_nodes_for_insert(trans, root, path, level);
925                 t = path->nodes[level];
926                 c = &t->node;
927                 if (!ret && btrfs_header_nritems(&c->header) <
928                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
929                         return 0;
930                 if (ret < 0)
931                         return ret;
932         }
933         c_nritems = btrfs_header_nritems(&c->header);
934         split_buffer = btrfs_alloc_free_block(trans, root, root->nodesize);
935         split = &split_buffer->node;
936         btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
937         btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
938         btrfs_set_header_bytenr(&split->header, split_buffer->bytenr);
939         btrfs_set_header_generation(&split->header, trans->transid);
940         btrfs_set_header_owner(&split->header, root->root_key.objectid);
941         memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
942                sizeof(split->header.fsid));
943         mid = (c_nritems + 1) / 2;
944         memcpy(split->ptrs, c->ptrs + mid,
945                 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
946         btrfs_set_header_nritems(&split->header, c_nritems - mid);
947         btrfs_set_header_nritems(&c->header, mid);
948         ret = 0;
949
950         BUG_ON(list_empty(&t->dirty));
951         wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
952                           split_buffer->bytenr, path->slots[level + 1] + 1,
953                           level + 1);
954         if (wret)
955                 ret = wret;
956
957         if (path->slots[level] >= mid) {
958                 path->slots[level] -= mid;
959                 btrfs_block_release(root, t);
960                 path->nodes[level] = split_buffer;
961                 path->slots[level + 1] += 1;
962         } else {
963                 btrfs_block_release(root, split_buffer);
964         }
965         return ret;
966 }
967
968 /*
969  * push some data in the path leaf to the right, trying to free up at
970  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
971  *
972  * returns 1 if the push failed because the other node didn't have enough
973  * room, 0 if everything worked out and < 0 if there were major errors.
974  */
975 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
976                            *root, struct btrfs_path *path, int data_size,
977                            int empty)
978 {
979         struct btrfs_buffer *left_buf = path->nodes[0];
980         struct btrfs_leaf *left = &left_buf->leaf;
981         struct btrfs_leaf *right;
982         struct btrfs_buffer *right_buf;
983         struct btrfs_buffer *upper;
984         int slot;
985         u32 i;
986         int free_space;
987         int push_space = 0;
988         int push_items = 0;
989         struct btrfs_item *item;
990         u32 left_nritems;
991         u32 nr;
992         u32 right_nritems;
993         slot = path->slots[1];
994         if (!path->nodes[1]) {
995                 return 1;
996         }
997         upper = path->nodes[1];
998         if (slot >= btrfs_header_nritems(&upper->node.header) - 1) {
999                 return 1;
1000         }
1001         right_buf = read_tree_block(root,
1002                             btrfs_node_blockptr(&upper->node, slot + 1),
1003                             root->leafsize);
1004         right = &right_buf->leaf;
1005         free_space = btrfs_leaf_free_space(root, right);
1006         if (free_space < data_size + sizeof(struct btrfs_item)) {
1007                 btrfs_block_release(root, right_buf);
1008                 return 1;
1009         }
1010         /* cow and double check */
1011         btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
1012         right = &right_buf->leaf;
1013         free_space = btrfs_leaf_free_space(root, right);
1014         if (free_space < data_size + sizeof(struct btrfs_item)) {
1015                 btrfs_block_release(root, right_buf);
1016                 return 1;
1017         }
1018         left_nritems = btrfs_header_nritems(&left->header);
1019         if (left_nritems == 0) {
1020                 btrfs_block_release(root, right_buf);
1021                 return 1;
1022         }
1023
1024         if (empty)
1025                 nr = 0;
1026         else
1027                 nr = 1;
1028
1029         i = left_nritems - 1;
1030         while (i >= nr) {
1031                 item = left->items + i;
1032                 if (path->slots[0] == i)
1033                         push_space += data_size + sizeof(*item);
1034                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1035                     free_space)
1036                         break;
1037                 push_items++;
1038                 push_space += btrfs_item_size(item) + sizeof(*item);
1039                 if (i == 0)
1040                         break;
1041                 i--;
1042         }
1043         if (push_items == 0) {
1044                 btrfs_block_release(root, right_buf);
1045                 return 1;
1046         }
1047         right_nritems = btrfs_header_nritems(&right->header);
1048         /* push left to right */
1049         push_space = btrfs_item_end(left->items + left_nritems - push_items);
1050         push_space -= leaf_data_end(root, left);
1051         /* make room in the right data area */
1052         memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) -
1053                 push_space, btrfs_leaf_data(right) + leaf_data_end(root, right),
1054                 BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right));
1055         /* copy from the left data area */
1056         memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space,
1057                 btrfs_leaf_data(left) + leaf_data_end(root, left), push_space);
1058         memmove(right->items + push_items, right->items,
1059                 right_nritems * sizeof(struct btrfs_item));
1060         /* copy the items from left to right */
1061         memcpy(right->items, left->items + left_nritems - push_items,
1062                 push_items * sizeof(struct btrfs_item));
1063
1064         /* update the item pointers */
1065         right_nritems += push_items;
1066         btrfs_set_header_nritems(&right->header, right_nritems);
1067         push_space = BTRFS_LEAF_DATA_SIZE(root);
1068         for (i = 0; i < right_nritems; i++) {
1069                 btrfs_set_item_offset(right->items + i, push_space -
1070                                       btrfs_item_size(right->items + i));
1071                 push_space = btrfs_item_offset(right->items + i);
1072         }
1073         left_nritems -= push_items;
1074         btrfs_set_header_nritems(&left->header, left_nritems);
1075
1076         BUG_ON(list_empty(&left_buf->dirty));
1077         BUG_ON(list_empty(&right_buf->dirty));
1078         memcpy(&upper->node.ptrs[slot + 1].key,
1079                 &right->items[0].key, sizeof(struct btrfs_disk_key));
1080         BUG_ON(list_empty(&upper->dirty));
1081
1082         /* then fixup the leaf pointer in the path */
1083         if (path->slots[0] >= left_nritems) {
1084                 path->slots[0] -= left_nritems;
1085                 btrfs_block_release(root, path->nodes[0]);
1086                 path->nodes[0] = right_buf;
1087                 path->slots[1] += 1;
1088         } else {
1089                 btrfs_block_release(root, right_buf);
1090         }
1091         return 0;
1092 }
1093 /*
1094  * push some data in the path leaf to the left, trying to free up at
1095  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1096  */
1097 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1098                           *root, struct btrfs_path *path, int data_size,
1099                           int empty)
1100 {
1101         struct btrfs_buffer *right_buf = path->nodes[0];
1102         struct btrfs_leaf *right = &right_buf->leaf;
1103         struct btrfs_buffer *t;
1104         struct btrfs_leaf *left;
1105         int slot;
1106         int i;
1107         int free_space;
1108         int push_space = 0;
1109         int push_items = 0;
1110         struct btrfs_item *item;
1111         u32 old_left_nritems;
1112         u32 right_nritems;
1113         u32 nr;
1114         int ret = 0;
1115         int wret;
1116         slot = path->slots[1];
1117         if (slot == 0) {
1118                 return 1;
1119         }
1120         if (!path->nodes[1]) {
1121                 return 1;
1122         }
1123         right_nritems = btrfs_header_nritems(&right->header);
1124         if (right_nritems == 0) {
1125                 return 1;
1126         }
1127
1128         t = read_tree_block(root,
1129                     btrfs_node_blockptr(&path->nodes[1]->node, slot - 1),
1130                     root->leafsize);
1131         left = &t->leaf;
1132         free_space = btrfs_leaf_free_space(root, left);
1133         if (free_space < data_size + sizeof(struct btrfs_item)) {
1134                 btrfs_block_release(root, t);
1135                 return 1;
1136         }
1137
1138         /* cow and double check */
1139         btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
1140         left = &t->leaf;
1141         free_space = btrfs_leaf_free_space(root, left);
1142         if (free_space < data_size + sizeof(struct btrfs_item)) {
1143                 btrfs_block_release(root, t);
1144                 return 1;
1145         }
1146         if (empty)
1147                 nr = right_nritems;
1148         else
1149                 nr = right_nritems - 1;
1150
1151         for (i = 0; i < nr; i++) {
1152                 item = right->items + i;
1153                 if (path->slots[0] == i)
1154                         push_space += data_size + sizeof(*item);
1155                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1156                     free_space)
1157                         break;
1158                 push_items++;
1159                 push_space += btrfs_item_size(item) + sizeof(*item);
1160         }
1161         if (push_items == 0) {
1162                 btrfs_block_release(root, t);
1163                 return 1;
1164         }
1165         /* push data from right to left */
1166         memcpy(left->items + btrfs_header_nritems(&left->header),
1167                 right->items, push_items * sizeof(struct btrfs_item));
1168         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1169                      btrfs_item_offset(right->items + push_items -1);
1170         memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space,
1171                 btrfs_leaf_data(right) +
1172                 btrfs_item_offset(right->items + push_items - 1),
1173                 push_space);
1174         old_left_nritems = btrfs_header_nritems(&left->header);
1175         BUG_ON(old_left_nritems < 0);
1176
1177         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1178                 u32 ioff = btrfs_item_offset(left->items + i);
1179                 btrfs_set_item_offset(left->items + i, ioff -
1180                                      (BTRFS_LEAF_DATA_SIZE(root) -
1181                                       btrfs_item_offset(left->items +
1182                                                         old_left_nritems - 1)));
1183         }
1184         btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
1185         /* fixup right node */
1186         if (push_items < right_nritems) {
1187                 push_space = btrfs_item_offset(right->items + push_items - 1) -
1188                                                leaf_data_end(root, right);
1189                 memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1190                         push_space, btrfs_leaf_data(right) +
1191                         leaf_data_end(root, right), push_space);
1192                 memmove(right->items, right->items + push_items,
1193                         (right_nritems - push_items) *
1194                         sizeof(struct btrfs_item));
1195         }
1196         right_nritems -= push_items;
1197         btrfs_set_header_nritems(&right->header, right_nritems);
1198         push_space = BTRFS_LEAF_DATA_SIZE(root);
1199         for (i = 0; i < right_nritems; i++) {
1200                 btrfs_set_item_offset(right->items + i, push_space -
1201                                       btrfs_item_size(right->items + i));
1202                 push_space = btrfs_item_offset(right->items + i);
1203         }
1204
1205         BUG_ON(list_empty(&t->dirty));
1206         BUG_ON(list_empty(&right_buf->dirty));
1207
1208         wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1209         if (wret)
1210                 ret = wret;
1211
1212         /* then fixup the leaf pointer in the path */
1213         if (path->slots[0] < push_items) {
1214                 path->slots[0] += old_left_nritems;
1215                 btrfs_block_release(root, path->nodes[0]);
1216                 path->nodes[0] = t;
1217                 path->slots[1] -= 1;
1218         } else {
1219                 btrfs_block_release(root, t);
1220                 path->slots[0] -= push_items;
1221         }
1222         BUG_ON(path->slots[0] < 0);
1223         return ret;
1224 }
1225
1226 /*
1227  * split the path's leaf in two, making sure there is at least data_size
1228  * available for the resulting leaf level of the path.
1229  *
1230  * returns 0 if all went well and < 0 on failure.
1231  */
1232 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1233                       *root, struct btrfs_key *ins_key,
1234                       struct btrfs_path *path, int data_size, int extend)
1235 {
1236         struct btrfs_buffer *l_buf;
1237         struct btrfs_leaf *l;
1238         u32 nritems;
1239         int mid;
1240         int slot;
1241         struct btrfs_leaf *right;
1242         struct btrfs_buffer *right_buffer;
1243         int space_needed = data_size + sizeof(struct btrfs_item);
1244         int data_copy_size;
1245         int rt_data_off;
1246         int i;
1247         int ret = 0;
1248         int wret;
1249         int double_split;
1250         int num_doubles = 0;
1251         struct btrfs_disk_key disk_key;
1252
1253         if (extend)
1254                 space_needed = data_size;
1255         /* first try to make some room by pushing left and right */
1256         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1257                 wret = push_leaf_right(trans, root, path, data_size, 0);
1258                 if (wret < 0) {
1259                         return wret;
1260                 }
1261                 if (wret) {
1262                         wret = push_leaf_left(trans, root, path, data_size, 0);
1263                         if (wret < 0)
1264                                 return wret;
1265                 }
1266                 l_buf = path->nodes[0];
1267                 l = &l_buf->leaf;
1268
1269                 /* did the pushes work? */
1270                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1271                         return 0;
1272         }
1273         if (!path->nodes[1]) {
1274                 ret = insert_new_root(trans, root, path, 1);
1275                 if (ret)
1276                         return ret;
1277         }
1278 again:
1279         double_split = 0;
1280         l_buf = path->nodes[0];
1281         l = &l_buf->leaf;
1282         slot = path->slots[0];
1283         nritems = btrfs_header_nritems(&l->header);
1284         mid = (nritems + 1)/ 2;
1285
1286         right_buffer = btrfs_alloc_free_block(trans, root, root->leafsize);
1287         right = &right_buffer->leaf;
1288         memset(&right->header, 0, sizeof(right->header));
1289         btrfs_set_header_bytenr(&right->header, right_buffer->bytenr);
1290         btrfs_set_header_generation(&right->header, trans->transid);
1291         btrfs_set_header_level(&right->header, 0);
1292         btrfs_set_header_owner(&right->header, root->root_key.objectid);
1293         memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1294                sizeof(right->header.fsid));
1295         if (mid <= slot) {
1296                 if (nritems == 1 ||
1297                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1298                         BTRFS_LEAF_DATA_SIZE(root)) {
1299                         if (slot >= nritems) {
1300                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1301                                 btrfs_set_header_nritems(&right->header, 0);
1302                                 wret = insert_ptr(trans, root, path,
1303                                                   &disk_key, right_buffer->bytenr,
1304                                                   path->slots[1] + 1, 1);
1305                                 if (wret)
1306                                         ret = wret;
1307                                 btrfs_block_release(root, path->nodes[0]);
1308                                 path->nodes[0] = right_buffer;
1309                                 path->slots[0] = 0;
1310                                 path->slots[1] += 1;
1311                                 return ret;
1312                         }
1313                         mid = slot;
1314                         if (mid != nritems &&
1315                             leaf_space_used(l, mid, nritems - mid) +
1316                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1317                                 double_split = 1;
1318                         }
1319                 }
1320         } else {
1321                 if (leaf_space_used(l, 0, mid) + space_needed >
1322                         BTRFS_LEAF_DATA_SIZE(root)) {
1323                         if (!extend && slot == 0) {
1324                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1325                                 btrfs_set_header_nritems(&right->header, 0);
1326                                 wret = insert_ptr(trans, root, path,
1327                                                   &disk_key,
1328                                                   right_buffer->bytenr,
1329                                                   path->slots[1], 1);
1330                                 if (wret)
1331                                         ret = wret;
1332                                 btrfs_block_release(root, path->nodes[0]);
1333                                 path->nodes[0] = right_buffer;
1334                                 path->slots[0] = 0;
1335                                 if (path->slots[1] == 0) {
1336                                         wret = fixup_low_keys(trans, root,
1337                                                    path, &disk_key, 1);
1338                                         if (wret)
1339                                                 ret = wret;
1340                                 }
1341                                 return ret;
1342                         } else if (extend && slot == 0) {
1343                                 mid = 1;
1344                         } else {
1345                                 mid = slot;
1346                                 if (mid != nritems &&
1347                                     leaf_space_used(l, mid, nritems - mid) +
1348                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1349                                         double_split = 1;
1350                                 }
1351                         }
1352                 }
1353         }
1354         nritems = nritems - mid;
1355         btrfs_set_header_nritems(&right->header, nritems);
1356         data_copy_size = btrfs_item_end(l->items +  mid) -
1357                          leaf_data_end(root, l);
1358         memcpy(right->items, l->items + mid,
1359                nritems * sizeof(struct btrfs_item));
1360         memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1361                 data_copy_size, btrfs_leaf_data(l) +
1362                 leaf_data_end(root, l), data_copy_size);
1363         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1364                       btrfs_item_end(l->items + mid);
1365         for (i = 0; i < nritems; i++) {
1366                 u32 ioff = btrfs_item_offset(right->items + i);
1367                 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1368         }
1369
1370         btrfs_set_header_nritems(&l->header, mid);
1371         ret = 0;
1372         wret = insert_ptr(trans, root, path, &right->items[0].key,
1373                           right_buffer->bytenr, path->slots[1] + 1, 1);
1374         if (wret)
1375                 ret = wret;
1376
1377         BUG_ON(list_empty(&right_buffer->dirty));
1378         BUG_ON(list_empty(&l_buf->dirty));
1379         BUG_ON(path->slots[0] != slot);
1380         if (mid <= slot) {
1381                 btrfs_block_release(root, path->nodes[0]);
1382                 path->nodes[0] = right_buffer;
1383                 path->slots[0] -= mid;
1384                 path->slots[1] += 1;
1385         } else
1386                 btrfs_block_release(root, right_buffer);
1387
1388         BUG_ON(path->slots[0] < 0);
1389         if (double_split) {
1390                 BUG_ON(num_doubles != 0);
1391                 num_doubles++;
1392                 goto again;
1393         }
1394         return ret;
1395 }
1396 /*
1397  * Given a key and some data, insert an item into the tree.
1398  * This does all the path init required, making room in the tree if needed.
1399  */
1400 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1401                             *root, struct btrfs_path *path, struct btrfs_key
1402                             *cpu_key, u32 data_size)
1403 {
1404         int ret = 0;
1405         int slot;
1406         int slot_orig;
1407         struct btrfs_leaf *leaf;
1408         struct btrfs_buffer *leaf_buf;
1409         u32 nritems;
1410         unsigned int data_end;
1411         struct btrfs_disk_key disk_key;
1412
1413         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1414
1415         /* create a root if there isn't one */
1416         if (!root->node)
1417                 BUG();
1418         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1419         if (ret == 0) {
1420                 return -EEXIST;
1421         }
1422         if (ret < 0)
1423                 goto out;
1424
1425         slot_orig = path->slots[0];
1426         leaf_buf = path->nodes[0];
1427         leaf = &leaf_buf->leaf;
1428
1429         nritems = btrfs_header_nritems(&leaf->header);
1430         data_end = leaf_data_end(root, leaf);
1431
1432         if (btrfs_leaf_free_space(root, leaf) <
1433             sizeof(struct btrfs_item) + data_size)
1434                 BUG();
1435
1436         slot = path->slots[0];
1437         BUG_ON(slot < 0);
1438         if (slot != nritems) {
1439                 int i;
1440                 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1441
1442                 /*
1443                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
1444                  */
1445                 /* first correct the data pointers */
1446                 for (i = slot; i < nritems; i++) {
1447                         u32 ioff = btrfs_item_offset(leaf->items + i);
1448                         btrfs_set_item_offset(leaf->items + i,
1449                                               ioff - data_size);
1450                 }
1451
1452                 /* shift the items */
1453                 memmove(leaf->items + slot + 1, leaf->items + slot,
1454                         (nritems - slot) * sizeof(struct btrfs_item));
1455
1456                 /* shift the data */
1457                 memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1458                         btrfs_leaf_data(leaf) +
1459                         data_end, old_data - data_end);
1460                 data_end = old_data;
1461         }
1462         /* setup the item for the new data */
1463         memcpy(&leaf->items[slot].key, &disk_key,
1464                 sizeof(struct btrfs_disk_key));
1465         btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1466         btrfs_set_item_size(leaf->items + slot, data_size);
1467         btrfs_set_header_nritems(&leaf->header, nritems + 1);
1468
1469         ret = 0;
1470         if (slot == 0)
1471                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1472
1473         BUG_ON(list_empty(&leaf_buf->dirty));
1474         if (btrfs_leaf_free_space(root, leaf) < 0)
1475                 BUG();
1476         check_leaf(root, path, 0);
1477 out:
1478         return ret;
1479 }
1480
1481 /*
1482  * Given a key and some data, insert an item into the tree.
1483  * This does all the path init required, making room in the tree if needed.
1484  */
1485 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1486                       *root, struct btrfs_key *cpu_key, void *data, u32
1487                       data_size)
1488 {
1489         int ret = 0;
1490         struct btrfs_path path;
1491         u8 *ptr;
1492
1493         btrfs_init_path(&path);
1494         ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size);
1495         if (!ret) {
1496                 ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8);
1497                 memcpy(ptr, data, data_size);
1498         }
1499         btrfs_release_path(root, &path);
1500         return ret;
1501 }
1502
1503 /*
1504  * delete the pointer from a given node.
1505  *
1506  * If the delete empties a node, the node is removed from the tree,
1507  * continuing all the way the root if required.  The root is converted into
1508  * a leaf if all the nodes are emptied.
1509  */
1510 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1511                    struct btrfs_path *path, int level, int slot)
1512 {
1513         struct btrfs_node *node;
1514         struct btrfs_buffer *parent = path->nodes[level];
1515         u32 nritems;
1516         int ret = 0;
1517         int wret;
1518
1519         node = &parent->node;
1520         nritems = btrfs_header_nritems(&node->header);
1521         if (slot != nritems -1) {
1522                 memmove(node->ptrs + slot, node->ptrs + slot + 1,
1523                         sizeof(struct btrfs_key_ptr) * (nritems - slot - 1));
1524         }
1525         nritems--;
1526         btrfs_set_header_nritems(&node->header, nritems);
1527         if (nritems == 0 && parent == root->node) {
1528                 BUG_ON(btrfs_header_level(&root->node->node.header) != 1);
1529                 /* just turn the root into a leaf and break */
1530                 btrfs_set_header_level(&root->node->node.header, 0);
1531         } else if (slot == 0) {
1532                 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1533                                       level + 1);
1534                 if (wret)
1535                         ret = wret;
1536         }
1537         BUG_ON(list_empty(&parent->dirty));
1538         return ret;
1539 }
1540
1541 /*
1542  * delete the item at the leaf level in path.  If that empties
1543  * the leaf, remove it from the tree
1544  */
1545 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1546                    struct btrfs_path *path)
1547 {
1548         int slot;
1549         struct btrfs_leaf *leaf;
1550         struct btrfs_buffer *leaf_buf;
1551         int doff;
1552         int dsize;
1553         int ret = 0;
1554         int wret;
1555         u32 nritems;
1556
1557         leaf_buf = path->nodes[0];
1558         leaf = &leaf_buf->leaf;
1559         slot = path->slots[0];
1560         doff = btrfs_item_offset(leaf->items + slot);
1561         dsize = btrfs_item_size(leaf->items + slot);
1562         nritems = btrfs_header_nritems(&leaf->header);
1563
1564         if (slot != nritems - 1) {
1565                 int i;
1566                 int data_end = leaf_data_end(root, leaf);
1567                 memmove(btrfs_leaf_data(leaf) + data_end + dsize,
1568                         btrfs_leaf_data(leaf) + data_end,
1569                         doff - data_end);
1570                 for (i = slot + 1; i < nritems; i++) {
1571                         u32 ioff = btrfs_item_offset(leaf->items + i);
1572                         btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1573                 }
1574                 memmove(leaf->items + slot, leaf->items + slot + 1,
1575                         sizeof(struct btrfs_item) *
1576                         (nritems - slot - 1));
1577         }
1578         btrfs_set_header_nritems(&leaf->header, nritems - 1);
1579         nritems--;
1580         /* delete the leaf if we've emptied it */
1581         if (nritems == 0) {
1582                 if (leaf_buf == root->node) {
1583                         btrfs_set_header_level(&leaf->header, 0);
1584                         BUG_ON(list_empty(&leaf_buf->dirty));
1585                 } else {
1586                         clean_tree_block(trans, root, leaf_buf);
1587                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
1588                         if (wret)
1589                                 ret = wret;
1590                         wret = btrfs_free_extent(trans, root,
1591                                                  leaf_buf->bytenr,
1592                                                  leaf_buf->size, 1);
1593                         if (wret)
1594                                 ret = wret;
1595                 }
1596         } else {
1597                 int used = leaf_space_used(leaf, 0, nritems);
1598                 if (slot == 0) {
1599                         wret = fixup_low_keys(trans, root, path,
1600                                               &leaf->items[0].key, 1);
1601                         if (wret)
1602                                 ret = wret;
1603                 }
1604                 BUG_ON(list_empty(&leaf_buf->dirty));
1605
1606                 /* delete the leaf if it is mostly empty */
1607                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1608                         /* push_leaf_left fixes the path.
1609                          * make sure the path still points to our leaf
1610                          * for possible call to del_ptr below
1611                          */
1612                         slot = path->slots[1];
1613                         leaf_buf->count++;
1614                         wret = push_leaf_right(trans, root, path, 1, 1);
1615                         if (wret < 0)
1616                                 ret = wret;
1617                         if (path->nodes[0] == leaf_buf &&
1618                             btrfs_header_nritems(&leaf->header)) {
1619                                 wret = push_leaf_left(trans, root, path, 1, 1);
1620                                 if (wret < 0)
1621                                         ret = wret;
1622                         }
1623                         if (btrfs_header_nritems(&leaf->header) == 0) {
1624                                 u64 bytenr = leaf_buf->bytenr;
1625                                 clean_tree_block(trans, root, leaf_buf);
1626                                 wret = del_ptr(trans, root, path, 1, slot);
1627                                 if (wret)
1628                                         ret = wret;
1629                                 wret = btrfs_free_extent(trans, root, bytenr,
1630                                                  leaf_buf->size, 1);
1631                                 btrfs_block_release(root, leaf_buf);
1632                                 if (wret)
1633                                         ret = wret;
1634                         } else {
1635                                 btrfs_block_release(root, leaf_buf);
1636                         }
1637                 }
1638         }
1639         return ret;
1640 }
1641 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1642                         struct btrfs_root *root,
1643                         struct btrfs_path *path,
1644                         u32 new_size, int from_end)
1645 {
1646         int ret = 0;
1647         int slot;
1648         int slot_orig;
1649         struct btrfs_leaf *leaf;
1650         struct btrfs_item *item;
1651         u32 nritems;
1652         unsigned int data_end;
1653         unsigned int old_data_start;
1654         unsigned int old_size;
1655         unsigned int size_diff;
1656         int i;
1657
1658         slot_orig = path->slots[0];
1659         leaf = &path->nodes[0]->leaf;
1660         slot = path->slots[0];
1661
1662         old_size = btrfs_item_size(leaf->items + slot);
1663         if (old_size == new_size)
1664                 return 0;
1665
1666         nritems = btrfs_header_nritems(&leaf->header);
1667         data_end = leaf_data_end(root, leaf);
1668
1669         old_data_start = btrfs_item_offset(leaf->items + slot);
1670
1671         size_diff = old_size - new_size;
1672
1673         BUG_ON(slot < 0);
1674         BUG_ON(slot >= nritems);
1675
1676         /*
1677          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1678          */
1679         /* first correct the data pointers */
1680         for (i = slot; i < nritems; i++) {
1681                 u32 ioff;
1682                 item = leaf->items + i;
1683                 ioff = btrfs_item_offset(item);
1684                 btrfs_set_item_offset(item, ioff + size_diff);
1685         }
1686
1687         /* shift the data */
1688         if (from_end) {
1689                 memmove(btrfs_leaf_data(leaf) + data_end + size_diff,
1690                         btrfs_leaf_data(leaf) + data_end,
1691                         old_data_start + new_size - data_end);
1692         } else {
1693                 struct btrfs_disk_key *disk_key;
1694                 u64 offset;
1695
1696                 disk_key = &leaf->items[slot].key;
1697                 if (btrfs_disk_key_type(disk_key) == BTRFS_EXTENT_DATA_KEY) {
1698                         char *ptr;
1699                         struct btrfs_file_extent_item *fi;
1700
1701                         fi = btrfs_item_ptr(leaf, slot,
1702                                             struct btrfs_file_extent_item);
1703                         fi = (struct btrfs_file_extent_item *)(
1704                              (unsigned long)fi - size_diff);
1705
1706                         if (btrfs_file_extent_type(fi) ==
1707                             BTRFS_FILE_EXTENT_INLINE) {
1708                                 ptr = btrfs_item_ptr(leaf, slot, char);
1709                                 memmove(ptr, (char *)fi,
1710                                         offsetof(struct btrfs_file_extent_item,
1711                                                  disk_bytenr));
1712                         }
1713                 }
1714
1715                 memmove(btrfs_leaf_data(leaf) + data_end + size_diff,
1716                         btrfs_leaf_data(leaf) + data_end,
1717                         old_data_start - data_end);
1718
1719                 offset = btrfs_disk_key_offset(disk_key);
1720                 btrfs_set_disk_key_offset(disk_key, offset + size_diff);
1721                 if (slot == 0)
1722                         fixup_low_keys(trans, root, path, disk_key, 1);
1723         }
1724
1725         item = leaf->items + slot;
1726         btrfs_set_item_size(item, new_size);
1727         BUG_ON(list_empty(&path->nodes[0]->dirty));
1728
1729         ret = 0;
1730         if (btrfs_leaf_free_space(root, leaf) < 0) {
1731                 btrfs_print_leaf(root, leaf);
1732                 BUG();
1733         }
1734         return ret;
1735 }
1736
1737 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1738                       *root, struct btrfs_path *path, u32 data_size)
1739 {
1740         int ret = 0;
1741         int slot;
1742         int slot_orig;
1743         struct btrfs_leaf *leaf;
1744         struct btrfs_buffer *leaf_buf;
1745         u32 nritems;
1746         unsigned int data_end;
1747         unsigned int old_data;
1748         unsigned int old_size;
1749         int i;
1750
1751         slot_orig = path->slots[0];
1752         leaf_buf = path->nodes[0];
1753         leaf = &leaf_buf->leaf;
1754
1755         nritems = btrfs_header_nritems(&leaf->header);
1756         data_end = leaf_data_end(root, leaf);
1757
1758         if (btrfs_leaf_free_space(root, leaf) < data_size)
1759                 BUG();
1760         slot = path->slots[0];
1761         old_data = btrfs_item_end(leaf->items + slot);
1762
1763         BUG_ON(slot < 0);
1764         BUG_ON(slot >= nritems);
1765
1766         /*
1767          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1768          */
1769         /* first correct the data pointers */
1770         for (i = slot; i < nritems; i++) {
1771                 u32 ioff = btrfs_item_offset(leaf->items + i);
1772                 btrfs_set_item_offset(leaf->items + i,
1773                                       ioff - data_size);
1774         }
1775         /* shift the data */
1776         memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1777                 btrfs_leaf_data(leaf) + data_end, old_data - data_end);
1778         data_end = old_data;
1779         old_size = btrfs_item_size(leaf->items + slot);
1780         btrfs_set_item_size(leaf->items + slot, old_size + data_size);
1781
1782         ret = 0;
1783         if (btrfs_leaf_free_space(root, leaf) < 0)
1784                 BUG();
1785         check_leaf(root, path, 0);
1786         return ret;
1787 }
1788
1789 /*
1790  * walk up the tree as far as required to find the next leaf.
1791  * returns 0 if it found something or 1 if there are no greater leaves.
1792  * returns < 0 on io errors.
1793  */
1794 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1795 {
1796         int slot;
1797         int level = 1;
1798         u64 bytenr;
1799         struct btrfs_buffer *c;
1800         struct btrfs_buffer *next = NULL;
1801
1802         while(level < BTRFS_MAX_LEVEL) {
1803                 if (!path->nodes[level])
1804                         return 1;
1805                 slot = path->slots[level] + 1;
1806                 c = path->nodes[level];
1807                 if (slot >= btrfs_header_nritems(&c->node.header)) {
1808                         level++;
1809                         continue;
1810                 }
1811                 bytenr = btrfs_node_blockptr(&c->node, slot);
1812                 if (next)
1813                         btrfs_block_release(root, next);
1814                 next = read_tree_block(root, bytenr,
1815                                        btrfs_level_size(root, level - 1));
1816                 break;
1817         }
1818         path->slots[level] = slot;
1819         while(1) {
1820                 level--;
1821                 c = path->nodes[level];
1822                 btrfs_block_release(root, c);
1823                 path->nodes[level] = next;
1824                 path->slots[level] = 0;
1825                 if (!level)
1826                         break;
1827                 next = read_tree_block(root,
1828                                        btrfs_node_blockptr(&next->node, 0),
1829                                        btrfs_level_size(root, level - 1));
1830         }
1831         check_leaf(root, path, 0);
1832         return 0;
1833 }