2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
21 #include "kerncompat.h"
22 #include "radix-tree.h"
25 #include "print-tree.h"
27 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
28 *root, struct btrfs_path *path, int level);
29 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
30 *root, struct btrfs_path *path, int data_size);
31 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
32 *root, struct btrfs_buffer *dst, struct btrfs_buffer
34 static int balance_node_right(struct btrfs_trans_handle *trans, struct
35 btrfs_root *root, struct btrfs_buffer *dst_buf,
36 struct btrfs_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38 struct btrfs_path *path, int level, int slot);
40 inline void btrfs_init_path(struct btrfs_path *p)
42 memset(p, 0, sizeof(*p));
45 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
48 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
51 btrfs_block_release(root, p->nodes[i]);
53 memset(p, 0, sizeof(*p));
56 static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
57 *root, struct btrfs_buffer *buf, struct btrfs_buffer
58 *parent, int parent_slot, struct btrfs_buffer
61 struct btrfs_buffer *cow;
63 if (!list_empty(&buf->dirty)) {
67 cow = btrfs_alloc_free_block(trans, root, buf->size);
68 memcpy(&cow->node, &buf->node, buf->size);
69 btrfs_set_header_bytenr(&cow->node.header, cow->bytenr);
70 btrfs_set_header_owner(&cow->node.header, root->root_key.objectid);
72 btrfs_inc_ref(trans, root, buf);
73 if (buf == root->node) {
76 if (buf != root->commit_root)
77 btrfs_free_extent(trans, root, buf->bytenr,
79 btrfs_block_release(root, buf);
81 btrfs_set_node_blockptr(&parent->node, parent_slot,
83 BUG_ON(list_empty(&parent->dirty));
84 btrfs_free_extent(trans, root, buf->bytenr, buf->size, 1);
86 btrfs_block_release(root, buf);
91 * The leaf data grows from end-to-front in the node.
92 * this returns the address of the start of the last item,
93 * which is the stop of the leaf data stack
95 static inline unsigned int leaf_data_end(struct btrfs_root *root,
96 struct btrfs_leaf *leaf)
98 u32 nr = btrfs_header_nritems(&leaf->header);
100 return BTRFS_LEAF_DATA_SIZE(root);
101 return btrfs_item_offset(leaf->items + nr - 1);
105 * how many bytes are required to store the items in a leaf. start
106 * and nr indicate which items in the leaf to check. This totals up the
107 * space used both by the item structs and the item data
109 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
112 int nritems = btrfs_header_nritems(&l->header);
115 if (nritems < start + nr)
118 end = start + nr - 1;
122 data_len = btrfs_item_end(l->items + start);
123 data_len = data_len - btrfs_item_offset(l->items + end);
124 data_len += sizeof(struct btrfs_item) * nr;
129 * The space between the end of the leaf items and
130 * the start of the leaf data. IOW, how much room
131 * the leaf has left for both items and data
133 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
135 int nritems = btrfs_header_nritems(&leaf->header);
136 return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
140 * compare two keys in a memcmp fashion
142 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
146 btrfs_disk_key_to_cpu(&k1, disk);
148 if (k1.objectid > k2->objectid)
150 if (k1.objectid < k2->objectid)
152 if (k1.type > k2->type)
154 if (k1.type < k2->type)
156 if (k1.offset > k2->offset)
158 if (k1.offset < k2->offset)
163 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
167 struct btrfs_node *parent = NULL;
168 struct btrfs_node *node = &path->nodes[level]->node;
170 u32 nritems = btrfs_header_nritems(&node->header);
172 if (path->nodes[level + 1])
173 parent = &path->nodes[level + 1]->node;
174 parent_slot = path->slots[level + 1];
175 BUG_ON(nritems == 0);
177 struct btrfs_disk_key *parent_key;
178 parent_key = &parent->ptrs[parent_slot].key;
179 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
180 sizeof(struct btrfs_disk_key)));
181 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
182 btrfs_header_bytenr(&node->header));
184 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
185 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
186 struct btrfs_key cpukey;
187 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
188 BUG_ON(btrfs_comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
193 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
197 struct btrfs_leaf *leaf = &path->nodes[level]->leaf;
198 struct btrfs_node *parent = NULL;
200 u32 nritems = btrfs_header_nritems(&leaf->header);
202 if (path->nodes[level + 1])
203 parent = &path->nodes[level + 1]->node;
204 parent_slot = path->slots[level + 1];
205 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
211 struct btrfs_disk_key *parent_key;
212 parent_key = &parent->ptrs[parent_slot].key;
213 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
214 sizeof(struct btrfs_disk_key)));
215 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
216 btrfs_header_bytenr(&leaf->header));
218 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
219 struct btrfs_key cpukey;
220 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
221 BUG_ON(btrfs_comp_keys(&leaf->items[i].key,
223 BUG_ON(btrfs_item_offset(leaf->items + i) !=
224 btrfs_item_end(leaf->items + i + 1));
226 BUG_ON(btrfs_item_offset(leaf->items + i) +
227 btrfs_item_size(leaf->items + i) !=
228 BTRFS_LEAF_DATA_SIZE(root));
234 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
238 return check_leaf(root, path, level);
239 return check_node(root, path, level);
243 * search for key in the array p. items p are item_size apart
244 * and there are 'max' items in p
245 * the slot in the array is returned via slot, and it points to
246 * the place where you would insert key if it is not found in
249 * slot may point to max if the key is bigger than all of the keys
251 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
258 struct btrfs_disk_key *tmp;
261 mid = (low + high) / 2;
262 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
263 ret = btrfs_comp_keys(tmp, key);
279 * simple bin_search frontend that does the right thing for
282 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
284 if (btrfs_is_leaf(c)) {
285 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
286 return generic_bin_search((void *)l->items,
287 sizeof(struct btrfs_item),
288 key, btrfs_header_nritems(&c->header),
291 return generic_bin_search((void *)c->ptrs,
292 sizeof(struct btrfs_key_ptr),
293 key, btrfs_header_nritems(&c->header),
299 static struct btrfs_buffer *read_node_slot(struct btrfs_root *root,
300 struct btrfs_buffer *parent_buf,
303 struct btrfs_node *node = &parent_buf->node;
304 int level = btrfs_header_level(&node->header);
307 if (slot >= btrfs_header_nritems(&node->header))
309 return read_tree_block(root, btrfs_node_blockptr(node, slot),
310 btrfs_level_size(root, level - 1));
313 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
314 *root, struct btrfs_path *path, int level)
316 struct btrfs_buffer *right_buf;
317 struct btrfs_buffer *mid_buf;
318 struct btrfs_buffer *left_buf;
319 struct btrfs_buffer *parent_buf = NULL;
320 struct btrfs_node *right = NULL;
321 struct btrfs_node *mid;
322 struct btrfs_node *left = NULL;
323 struct btrfs_node *parent = NULL;
327 int orig_slot = path->slots[level];
333 mid_buf = path->nodes[level];
334 mid = &mid_buf->node;
335 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
337 if (level < BTRFS_MAX_LEVEL - 1)
338 parent_buf = path->nodes[level + 1];
339 pslot = path->slots[level + 1];
342 * deal with the case where there is only one pointer in the root
343 * by promoting the node below to a root
346 struct btrfs_buffer *child;
347 u64 bytenr = mid_buf->bytenr;
349 if (btrfs_header_nritems(&mid->header) != 1)
352 /* promote the child to a root */
353 child = read_node_slot(root, mid_buf, 0);
356 path->nodes[level] = NULL;
357 /* once for the path */
358 btrfs_block_release(root, mid_buf);
359 /* once for the root ptr */
360 btrfs_block_release(root, mid_buf);
361 clean_tree_block(trans, root, mid_buf);
362 return btrfs_free_extent(trans, root, bytenr,
365 parent = &parent_buf->node;
367 if (btrfs_header_nritems(&mid->header) >
368 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
371 left_buf = read_node_slot(root, parent_buf, pslot - 1);
372 right_buf = read_node_slot(root, parent_buf, pslot + 1);
374 /* first, try to make some room in the middle buffer */
376 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
378 left = &left_buf->node;
379 orig_slot += btrfs_header_nritems(&left->header);
380 wret = push_node_left(trans, root, left_buf, mid_buf);
386 * then try to empty the right most buffer into the middle
389 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
391 right = &right_buf->node;
392 wret = push_node_left(trans, root, mid_buf, right_buf);
395 if (btrfs_header_nritems(&right->header) == 0) {
396 u64 bytenr = right_buf->bytenr;
397 btrfs_block_release(root, right_buf);
398 clean_tree_block(trans, root, right_buf);
401 wret = del_ptr(trans, root, path, level + 1, pslot +
405 wret = btrfs_free_extent(trans, root, bytenr,
410 memcpy(&parent->ptrs[pslot + 1].key,
412 sizeof(struct btrfs_disk_key));
413 BUG_ON(list_empty(&parent_buf->dirty));
416 if (btrfs_header_nritems(&mid->header) == 1) {
418 * we're not allowed to leave a node with one item in the
419 * tree during a delete. A deletion from lower in the tree
420 * could try to delete the only pointer in this node.
421 * So, pull some keys from the left.
422 * There has to be a left pointer at this point because
423 * otherwise we would have pulled some pointers from the
427 wret = balance_node_right(trans, root, mid_buf, left_buf);
432 if (btrfs_header_nritems(&mid->header) == 0) {
433 /* we've managed to empty the middle node, drop it */
434 u64 bytenr = mid_buf->bytenr;
435 btrfs_block_release(root, mid_buf);
436 clean_tree_block(trans, root, mid_buf);
439 wret = del_ptr(trans, root, path, level + 1, pslot);
442 wret = btrfs_free_extent(trans, root, bytenr,
447 /* update the parent key to reflect our changes */
448 memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
449 sizeof(struct btrfs_disk_key));
450 BUG_ON(list_empty(&parent_buf->dirty));
453 /* update the path */
455 if (btrfs_header_nritems(&left->header) > orig_slot) {
456 left_buf->count++; // released below
457 path->nodes[level] = left_buf;
458 path->slots[level + 1] -= 1;
459 path->slots[level] = orig_slot;
461 btrfs_block_release(root, mid_buf);
463 orig_slot -= btrfs_header_nritems(&left->header);
464 path->slots[level] = orig_slot;
467 /* double check we haven't messed things up */
468 check_block(root, path, level);
469 if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node,
474 btrfs_block_release(root, right_buf);
476 btrfs_block_release(root, left_buf);
481 * look for key in the tree. path is filled in with nodes along the way
482 * if key is found, we return zero and you can find the item in the leaf
483 * level of the path (level 0)
485 * If the key isn't found, the path points to the slot where it should
486 * be inserted, and 1 is returned. If there are other errors during the
487 * search a negative error number is returned.
489 * if ins_len > 0, nodes and leaves will be split as we walk down the
490 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
493 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
494 *root, struct btrfs_key *key, struct btrfs_path *p, int
497 struct btrfs_buffer *b;
498 struct btrfs_buffer *cow_buf;
499 struct btrfs_node *c;
508 level = btrfs_header_level(&b->node.header);
511 wret = btrfs_cow_block(trans, root, b, p->nodes[level +
512 1], p->slots[level + 1],
516 BUG_ON(!cow && ins_len);
519 ret = check_block(root, p, level);
522 ret = bin_search(c, key, &slot);
523 if (!btrfs_is_leaf(c)) {
526 p->slots[level] = slot;
527 if (ins_len > 0 && btrfs_header_nritems(&c->header) ==
528 BTRFS_NODEPTRS_PER_BLOCK(root)) {
529 int sret = split_node(trans, root, p, level);
535 slot = p->slots[level];
536 } else if (ins_len < 0) {
537 int sret = balance_level(trans, root, p,
545 slot = p->slots[level];
546 BUG_ON(btrfs_header_nritems(&c->header) == 1);
548 b = read_tree_block(root,
549 btrfs_node_blockptr(c, slot),
550 btrfs_level_size(root, level - 1));
552 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
553 p->slots[level] = slot;
554 if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
555 sizeof(struct btrfs_item) + ins_len) {
556 int sret = split_leaf(trans, root, p, ins_len);
561 BUG_ON(root->node->count == 1);
565 BUG_ON(root->node->count == 1);
570 * adjust the pointers going up the tree, starting at level
571 * making sure the right key of each node is points to 'key'.
572 * This is used after shifting pointers to the left, so it stops
573 * fixing up pointers when a given leaf/node is not in slot 0 of the
576 * If this fails to write a tree block, it returns -1, but continues
577 * fixing up the blocks in ram so the tree is consistent.
579 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
580 *root, struct btrfs_path *path, struct btrfs_disk_key
585 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
586 struct btrfs_node *t;
587 int tslot = path->slots[i];
590 t = &path->nodes[i]->node;
591 memcpy(&t->ptrs[tslot].key, key, sizeof(*key));
592 BUG_ON(list_empty(&path->nodes[i]->dirty));
600 * try to push data from one node into the next node left in the
603 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
604 * error, and > 0 if there was no room in the left hand block.
606 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
607 *root, struct btrfs_buffer *dst_buf, struct
608 btrfs_buffer *src_buf)
610 struct btrfs_node *src = &src_buf->node;
611 struct btrfs_node *dst = &dst_buf->node;
617 src_nritems = btrfs_header_nritems(&src->header);
618 dst_nritems = btrfs_header_nritems(&dst->header);
619 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
620 if (push_items <= 0) {
624 if (src_nritems < push_items)
625 push_items = src_nritems;
627 memcpy(dst->ptrs + dst_nritems, src->ptrs,
628 push_items * sizeof(struct btrfs_key_ptr));
629 if (push_items < src_nritems) {
630 memmove(src->ptrs, src->ptrs + push_items,
631 (src_nritems - push_items) *
632 sizeof(struct btrfs_key_ptr));
634 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
635 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
636 BUG_ON(list_empty(&src_buf->dirty));
637 BUG_ON(list_empty(&dst_buf->dirty));
642 * try to push data from one node into the next node right in the
645 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
646 * error, and > 0 if there was no room in the right hand block.
648 * this will only push up to 1/2 the contents of the left node over
650 static int balance_node_right(struct btrfs_trans_handle *trans, struct
651 btrfs_root *root, struct btrfs_buffer *dst_buf,
652 struct btrfs_buffer *src_buf)
654 struct btrfs_node *src = &src_buf->node;
655 struct btrfs_node *dst = &dst_buf->node;
662 src_nritems = btrfs_header_nritems(&src->header);
663 dst_nritems = btrfs_header_nritems(&dst->header);
664 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
665 if (push_items <= 0) {
669 max_push = src_nritems / 2 + 1;
670 /* don't try to empty the node */
671 if (max_push > src_nritems)
673 if (max_push < push_items)
674 push_items = max_push;
676 memmove(dst->ptrs + push_items, dst->ptrs,
677 dst_nritems * sizeof(struct btrfs_key_ptr));
678 memcpy(dst->ptrs, src->ptrs + src_nritems - push_items,
679 push_items * sizeof(struct btrfs_key_ptr));
681 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
682 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
684 BUG_ON(list_empty(&src_buf->dirty));
685 BUG_ON(list_empty(&dst_buf->dirty));
690 * helper function to insert a new root level in the tree.
691 * A new node is allocated, and a single item is inserted to
692 * point to the existing root
694 * returns zero on success or < 0 on failure.
696 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
697 *root, struct btrfs_path *path, int level)
699 struct btrfs_buffer *t;
700 struct btrfs_node *lower;
701 struct btrfs_node *c;
702 struct btrfs_disk_key *lower_key;
704 BUG_ON(path->nodes[level]);
705 BUG_ON(path->nodes[level-1] != root->node);
707 t = btrfs_alloc_free_block(trans, root, root->nodesize);
709 memset(c, 0, root->nodesize);
710 btrfs_set_header_nritems(&c->header, 1);
711 btrfs_set_header_level(&c->header, level);
712 btrfs_set_header_bytenr(&c->header, t->bytenr);
713 btrfs_set_header_owner(&c->header, root->root_key.objectid);
714 memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
715 sizeof(c->header.fsid));
716 lower = &path->nodes[level-1]->node;
718 if (btrfs_is_leaf(lower))
719 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
721 lower_key = &lower->ptrs[0].key;
723 memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key));
724 btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->bytenr);
725 /* the super has an extra ref to root->node */
726 btrfs_block_release(root, root->node);
729 path->nodes[level] = t;
730 path->slots[level] = 0;
735 * worker function to insert a single pointer in a node.
736 * the node should have enough room for the pointer already
738 * slot and level indicate where you want the key to go, and
739 * bytenr is the block the key points to.
741 * returns zero on success and < 0 on any error
743 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
744 *root, struct btrfs_path *path, struct btrfs_disk_key
745 *key, u64 bytenr, int slot, int level)
747 struct btrfs_node *lower;
750 BUG_ON(!path->nodes[level]);
751 lower = &path->nodes[level]->node;
752 nritems = btrfs_header_nritems(&lower->header);
755 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
757 if (slot != nritems) {
758 memmove(lower->ptrs + slot + 1, lower->ptrs + slot,
759 (nritems - slot) * sizeof(struct btrfs_key_ptr));
761 memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key));
762 btrfs_set_node_blockptr(lower, slot, bytenr);
763 btrfs_set_header_nritems(&lower->header, nritems + 1);
764 BUG_ON(list_empty(&path->nodes[level]->dirty));
769 * split the node at the specified level in path in two.
770 * The path is corrected to point to the appropriate node after the split
772 * Before splitting this tries to make some room in the node by pushing
773 * left and right, if either one works, it returns right away.
775 * returns 0 on success and < 0 on failure
777 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
778 *root, struct btrfs_path *path, int level)
780 struct btrfs_buffer *t;
781 struct btrfs_node *c;
782 struct btrfs_buffer *split_buffer;
783 struct btrfs_node *split;
789 t = path->nodes[level];
791 if (t == root->node) {
792 /* trying to split the root, lets make a new one */
793 ret = insert_new_root(trans, root, path, level + 1);
797 c_nritems = btrfs_header_nritems(&c->header);
798 split_buffer = btrfs_alloc_free_block(trans, root, root->nodesize);
799 split = &split_buffer->node;
800 btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
801 btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
802 btrfs_set_header_bytenr(&split->header, split_buffer->bytenr);
803 btrfs_set_header_owner(&split->header, root->root_key.objectid);
804 memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
805 sizeof(split->header.fsid));
806 mid = (c_nritems + 1) / 2;
807 memcpy(split->ptrs, c->ptrs + mid,
808 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
809 btrfs_set_header_nritems(&split->header, c_nritems - mid);
810 btrfs_set_header_nritems(&c->header, mid);
813 BUG_ON(list_empty(&t->dirty));
814 wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
815 split_buffer->bytenr, path->slots[level + 1] + 1,
820 if (path->slots[level] >= mid) {
821 path->slots[level] -= mid;
822 btrfs_block_release(root, t);
823 path->nodes[level] = split_buffer;
824 path->slots[level + 1] += 1;
826 btrfs_block_release(root, split_buffer);
832 * push some data in the path leaf to the right, trying to free up at
833 * least data_size bytes. returns zero if the push worked, nonzero otherwise
835 * returns 1 if the push failed because the other node didn't have enough
836 * room, 0 if everything worked out and < 0 if there were major errors.
838 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
839 *root, struct btrfs_path *path, int data_size)
841 struct btrfs_buffer *left_buf = path->nodes[0];
842 struct btrfs_leaf *left = &left_buf->leaf;
843 struct btrfs_leaf *right;
844 struct btrfs_buffer *right_buf;
845 struct btrfs_buffer *upper;
851 struct btrfs_item *item;
855 slot = path->slots[1];
856 if (!path->nodes[1]) {
859 upper = path->nodes[1];
860 if (slot >= btrfs_header_nritems(&upper->node.header) - 1) {
863 right_buf = read_tree_block(root,
864 btrfs_node_blockptr(&upper->node, slot + 1),
866 right = &right_buf->leaf;
867 free_space = btrfs_leaf_free_space(root, right);
868 if (free_space < data_size + sizeof(struct btrfs_item)) {
869 btrfs_block_release(root, right_buf);
872 /* cow and double check */
873 btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
874 right = &right_buf->leaf;
875 free_space = btrfs_leaf_free_space(root, right);
876 if (free_space < data_size + sizeof(struct btrfs_item)) {
877 btrfs_block_release(root, right_buf);
881 left_nritems = btrfs_header_nritems(&left->header);
882 for (i = left_nritems - 1; i >= 0; i--) {
883 item = left->items + i;
884 if (path->slots[0] == i)
885 push_space += data_size + sizeof(*item);
886 if (btrfs_item_size(item) + sizeof(*item) + push_space >
890 push_space += btrfs_item_size(item) + sizeof(*item);
892 if (push_items == 0) {
893 btrfs_block_release(root, right_buf);
896 right_nritems = btrfs_header_nritems(&right->header);
897 /* push left to right */
898 push_space = btrfs_item_end(left->items + left_nritems - push_items);
899 push_space -= leaf_data_end(root, left);
900 /* make room in the right data area */
901 memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) -
902 push_space, btrfs_leaf_data(right) + leaf_data_end(root, right),
903 BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right));
904 /* copy from the left data area */
905 memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space,
906 btrfs_leaf_data(left) + leaf_data_end(root, left), push_space);
907 memmove(right->items + push_items, right->items,
908 right_nritems * sizeof(struct btrfs_item));
909 /* copy the items from left to right */
910 memcpy(right->items, left->items + left_nritems - push_items,
911 push_items * sizeof(struct btrfs_item));
913 /* update the item pointers */
914 right_nritems += push_items;
915 btrfs_set_header_nritems(&right->header, right_nritems);
916 push_space = BTRFS_LEAF_DATA_SIZE(root);
917 for (i = 0; i < right_nritems; i++) {
918 btrfs_set_item_offset(right->items + i, push_space -
919 btrfs_item_size(right->items + i));
920 push_space = btrfs_item_offset(right->items + i);
922 left_nritems -= push_items;
923 btrfs_set_header_nritems(&left->header, left_nritems);
925 BUG_ON(list_empty(&left_buf->dirty));
926 BUG_ON(list_empty(&right_buf->dirty));
927 memcpy(&upper->node.ptrs[slot + 1].key,
928 &right->items[0].key, sizeof(struct btrfs_disk_key));
929 BUG_ON(list_empty(&upper->dirty));
931 /* then fixup the leaf pointer in the path */
932 if (path->slots[0] >= left_nritems) {
933 path->slots[0] -= left_nritems;
934 btrfs_block_release(root, path->nodes[0]);
935 path->nodes[0] = right_buf;
938 btrfs_block_release(root, right_buf);
943 * push some data in the path leaf to the left, trying to free up at
944 * least data_size bytes. returns zero if the push worked, nonzero otherwise
946 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
947 *root, struct btrfs_path *path, int data_size)
949 struct btrfs_buffer *right_buf = path->nodes[0];
950 struct btrfs_leaf *right = &right_buf->leaf;
951 struct btrfs_buffer *t;
952 struct btrfs_leaf *left;
958 struct btrfs_item *item;
959 u32 old_left_nritems;
963 slot = path->slots[1];
967 if (!path->nodes[1]) {
970 t = read_tree_block(root,
971 btrfs_node_blockptr(&path->nodes[1]->node, slot - 1),
974 free_space = btrfs_leaf_free_space(root, left);
975 if (free_space < data_size + sizeof(struct btrfs_item)) {
976 btrfs_block_release(root, t);
980 /* cow and double check */
981 btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
983 free_space = btrfs_leaf_free_space(root, left);
984 if (free_space < data_size + sizeof(struct btrfs_item)) {
985 btrfs_block_release(root, t);
989 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
990 item = right->items + i;
991 if (path->slots[0] == i)
992 push_space += data_size + sizeof(*item);
993 if (btrfs_item_size(item) + sizeof(*item) + push_space >
997 push_space += btrfs_item_size(item) + sizeof(*item);
999 if (push_items == 0) {
1000 btrfs_block_release(root, t);
1003 /* push data from right to left */
1004 memcpy(left->items + btrfs_header_nritems(&left->header),
1005 right->items, push_items * sizeof(struct btrfs_item));
1006 push_space = BTRFS_LEAF_DATA_SIZE(root) -
1007 btrfs_item_offset(right->items + push_items -1);
1008 memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space,
1009 btrfs_leaf_data(right) +
1010 btrfs_item_offset(right->items + push_items - 1),
1012 old_left_nritems = btrfs_header_nritems(&left->header);
1013 BUG_ON(old_left_nritems < 0);
1015 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1016 u32 ioff = btrfs_item_offset(left->items + i);
1017 btrfs_set_item_offset(left->items + i, ioff -
1018 (BTRFS_LEAF_DATA_SIZE(root) -
1019 btrfs_item_offset(left->items +
1020 old_left_nritems - 1)));
1022 btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
1024 /* fixup right node */
1025 push_space = btrfs_item_offset(right->items + push_items - 1) -
1026 leaf_data_end(root, right);
1027 memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1028 push_space, btrfs_leaf_data(right) +
1029 leaf_data_end(root, right), push_space);
1030 memmove(right->items, right->items + push_items,
1031 (btrfs_header_nritems(&right->header) - push_items) *
1032 sizeof(struct btrfs_item));
1033 btrfs_set_header_nritems(&right->header,
1034 btrfs_header_nritems(&right->header) -
1036 push_space = BTRFS_LEAF_DATA_SIZE(root);
1038 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1039 btrfs_set_item_offset(right->items + i, push_space -
1040 btrfs_item_size(right->items + i));
1041 push_space = btrfs_item_offset(right->items + i);
1044 BUG_ON(list_empty(&t->dirty));
1045 BUG_ON(list_empty(&right_buf->dirty));
1047 wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1051 /* then fixup the leaf pointer in the path */
1052 if (path->slots[0] < push_items) {
1053 path->slots[0] += old_left_nritems;
1054 btrfs_block_release(root, path->nodes[0]);
1056 path->slots[1] -= 1;
1058 btrfs_block_release(root, t);
1059 path->slots[0] -= push_items;
1061 BUG_ON(path->slots[0] < 0);
1066 * split the path's leaf in two, making sure there is at least data_size
1067 * available for the resulting leaf level of the path.
1069 * returns 0 if all went well and < 0 on failure.
1071 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1072 *root, struct btrfs_path *path, int data_size)
1074 struct btrfs_buffer *l_buf;
1075 struct btrfs_leaf *l;
1079 struct btrfs_leaf *right;
1080 struct btrfs_buffer *right_buffer;
1081 int space_needed = data_size + sizeof(struct btrfs_item);
1088 /* first try to make some room by pushing left and right */
1089 wret = push_leaf_left(trans, root, path, data_size);
1093 wret = push_leaf_right(trans, root, path, data_size);
1097 l_buf = path->nodes[0];
1100 /* did the pushes work? */
1101 if (btrfs_leaf_free_space(root, l) >=
1102 sizeof(struct btrfs_item) + data_size)
1105 if (!path->nodes[1]) {
1106 ret = insert_new_root(trans, root, path, 1);
1110 slot = path->slots[0];
1111 nritems = btrfs_header_nritems(&l->header);
1112 mid = (nritems + 1)/ 2;
1113 right_buffer = btrfs_alloc_free_block(trans, root, root->leafsize);
1114 BUG_ON(!right_buffer);
1115 BUG_ON(mid == nritems);
1116 right = &right_buffer->leaf;
1117 memset(&right->header, 0, sizeof(right->header));
1119 /* FIXME, just alloc a new leaf here */
1120 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
1121 BTRFS_LEAF_DATA_SIZE(root))
1124 /* FIXME, just alloc a new leaf here */
1125 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1126 BTRFS_LEAF_DATA_SIZE(root))
1129 btrfs_set_header_nritems(&right->header, nritems - mid);
1130 btrfs_set_header_bytenr(&right->header, right_buffer->bytenr);
1131 btrfs_set_header_level(&right->header, 0);
1132 btrfs_set_header_owner(&right->header, root->root_key.objectid);
1133 memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1134 sizeof(right->header.fsid));
1135 data_copy_size = btrfs_item_end(l->items + mid) -
1136 leaf_data_end(root, l);
1137 memcpy(right->items, l->items + mid,
1138 (nritems - mid) * sizeof(struct btrfs_item));
1139 memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1140 data_copy_size, btrfs_leaf_data(l) +
1141 leaf_data_end(root, l), data_copy_size);
1142 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1143 btrfs_item_end(l->items + mid);
1145 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1146 u32 ioff = btrfs_item_offset(right->items + i);
1147 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1150 btrfs_set_header_nritems(&l->header, mid);
1152 wret = insert_ptr(trans, root, path, &right->items[0].key,
1153 right_buffer->bytenr, path->slots[1] + 1, 1);
1156 BUG_ON(list_empty(&right_buffer->dirty));
1157 BUG_ON(list_empty(&l_buf->dirty));
1158 BUG_ON(path->slots[0] != slot);
1160 btrfs_block_release(root, path->nodes[0]);
1161 path->nodes[0] = right_buffer;
1162 path->slots[0] -= mid;
1163 path->slots[1] += 1;
1165 btrfs_block_release(root, right_buffer);
1166 BUG_ON(path->slots[0] < 0);
1171 * Given a key and some data, insert an item into the tree.
1172 * This does all the path init required, making room in the tree if needed.
1174 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1175 *root, struct btrfs_path *path, struct btrfs_key
1176 *cpu_key, u32 data_size)
1181 struct btrfs_leaf *leaf;
1182 struct btrfs_buffer *leaf_buf;
1184 unsigned int data_end;
1185 struct btrfs_disk_key disk_key;
1187 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1189 /* create a root if there isn't one */
1192 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1199 slot_orig = path->slots[0];
1200 leaf_buf = path->nodes[0];
1201 leaf = &leaf_buf->leaf;
1203 nritems = btrfs_header_nritems(&leaf->header);
1204 data_end = leaf_data_end(root, leaf);
1206 if (btrfs_leaf_free_space(root, leaf) <
1207 sizeof(struct btrfs_item) + data_size)
1210 slot = path->slots[0];
1212 if (slot != nritems) {
1214 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1217 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1219 /* first correct the data pointers */
1220 for (i = slot; i < nritems; i++) {
1221 u32 ioff = btrfs_item_offset(leaf->items + i);
1222 btrfs_set_item_offset(leaf->items + i,
1226 /* shift the items */
1227 memmove(leaf->items + slot + 1, leaf->items + slot,
1228 (nritems - slot) * sizeof(struct btrfs_item));
1230 /* shift the data */
1231 memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1232 btrfs_leaf_data(leaf) +
1233 data_end, old_data - data_end);
1234 data_end = old_data;
1236 /* setup the item for the new data */
1237 memcpy(&leaf->items[slot].key, &disk_key,
1238 sizeof(struct btrfs_disk_key));
1239 btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1240 btrfs_set_item_size(leaf->items + slot, data_size);
1241 btrfs_set_header_nritems(&leaf->header, nritems + 1);
1245 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1247 BUG_ON(list_empty(&leaf_buf->dirty));
1248 if (btrfs_leaf_free_space(root, leaf) < 0)
1250 check_leaf(root, path, 0);
1256 * Given a key and some data, insert an item into the tree.
1257 * This does all the path init required, making room in the tree if needed.
1259 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1260 *root, struct btrfs_key *cpu_key, void *data, u32
1264 struct btrfs_path path;
1267 btrfs_init_path(&path);
1268 ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size);
1270 ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8);
1271 memcpy(ptr, data, data_size);
1273 btrfs_release_path(root, &path);
1278 * delete the pointer from a given node.
1280 * If the delete empties a node, the node is removed from the tree,
1281 * continuing all the way the root if required. The root is converted into
1282 * a leaf if all the nodes are emptied.
1284 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1285 struct btrfs_path *path, int level, int slot)
1287 struct btrfs_node *node;
1288 struct btrfs_buffer *parent = path->nodes[level];
1293 node = &parent->node;
1294 nritems = btrfs_header_nritems(&node->header);
1295 if (slot != nritems -1) {
1296 memmove(node->ptrs + slot, node->ptrs + slot + 1,
1297 sizeof(struct btrfs_key_ptr) * (nritems - slot - 1));
1300 btrfs_set_header_nritems(&node->header, nritems);
1301 if (nritems == 0 && parent == root->node) {
1302 BUG_ON(btrfs_header_level(&root->node->node.header) != 1);
1303 /* just turn the root into a leaf and break */
1304 btrfs_set_header_level(&root->node->node.header, 0);
1305 } else if (slot == 0) {
1306 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1311 BUG_ON(list_empty(&parent->dirty));
1316 * delete the item at the leaf level in path. If that empties
1317 * the leaf, remove it from the tree
1319 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1320 struct btrfs_path *path)
1323 struct btrfs_leaf *leaf;
1324 struct btrfs_buffer *leaf_buf;
1331 leaf_buf = path->nodes[0];
1332 leaf = &leaf_buf->leaf;
1333 slot = path->slots[0];
1334 doff = btrfs_item_offset(leaf->items + slot);
1335 dsize = btrfs_item_size(leaf->items + slot);
1336 nritems = btrfs_header_nritems(&leaf->header);
1338 if (slot != nritems - 1) {
1340 int data_end = leaf_data_end(root, leaf);
1341 memmove(btrfs_leaf_data(leaf) + data_end + dsize,
1342 btrfs_leaf_data(leaf) + data_end,
1344 for (i = slot + 1; i < nritems; i++) {
1345 u32 ioff = btrfs_item_offset(leaf->items + i);
1346 btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1348 memmove(leaf->items + slot, leaf->items + slot + 1,
1349 sizeof(struct btrfs_item) *
1350 (nritems - slot - 1));
1352 btrfs_set_header_nritems(&leaf->header, nritems - 1);
1354 /* delete the leaf if we've emptied it */
1356 if (leaf_buf == root->node) {
1357 btrfs_set_header_level(&leaf->header, 0);
1358 BUG_ON(list_empty(&leaf_buf->dirty));
1360 clean_tree_block(trans, root, leaf_buf);
1361 wret = del_ptr(trans, root, path, 1, path->slots[1]);
1364 wret = btrfs_free_extent(trans, root,
1371 int used = leaf_space_used(leaf, 0, nritems);
1373 wret = fixup_low_keys(trans, root, path,
1374 &leaf->items[0].key, 1);
1378 BUG_ON(list_empty(&leaf_buf->dirty));
1380 /* delete the leaf if it is mostly empty */
1381 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1382 /* push_leaf_left fixes the path.
1383 * make sure the path still points to our leaf
1384 * for possible call to del_ptr below
1386 slot = path->slots[1];
1388 wret = push_leaf_left(trans, root, path, 1);
1391 if (path->nodes[0] == leaf_buf &&
1392 btrfs_header_nritems(&leaf->header)) {
1393 wret = push_leaf_right(trans, root, path, 1);
1397 if (btrfs_header_nritems(&leaf->header) == 0) {
1398 u64 bytenr = leaf_buf->bytenr;
1399 clean_tree_block(trans, root, leaf_buf);
1400 wret = del_ptr(trans, root, path, 1, slot);
1403 wret = btrfs_free_extent(trans, root, bytenr,
1405 btrfs_block_release(root, leaf_buf);
1409 btrfs_block_release(root, leaf_buf);
1416 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1417 *root, struct btrfs_path *path, u32 data_size)
1422 struct btrfs_leaf *leaf;
1423 struct btrfs_buffer *leaf_buf;
1425 unsigned int data_end;
1426 unsigned int old_data;
1427 unsigned int old_size;
1430 slot_orig = path->slots[0];
1431 leaf_buf = path->nodes[0];
1432 leaf = &leaf_buf->leaf;
1434 nritems = btrfs_header_nritems(&leaf->header);
1435 data_end = leaf_data_end(root, leaf);
1437 if (btrfs_leaf_free_space(root, leaf) < data_size)
1439 slot = path->slots[0];
1440 old_data = btrfs_item_end(leaf->items + slot);
1443 BUG_ON(slot >= nritems);
1446 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1448 /* first correct the data pointers */
1449 for (i = slot; i < nritems; i++) {
1450 u32 ioff = btrfs_item_offset(leaf->items + i);
1451 btrfs_set_item_offset(leaf->items + i,
1454 /* shift the data */
1455 memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1456 btrfs_leaf_data(leaf) + data_end, old_data - data_end);
1457 data_end = old_data;
1458 old_size = btrfs_item_size(leaf->items + slot);
1459 btrfs_set_item_size(leaf->items + slot, old_size + data_size);
1462 if (btrfs_leaf_free_space(root, leaf) < 0)
1464 check_leaf(root, path, 0);
1469 * walk up the tree as far as required to find the next leaf.
1470 * returns 0 if it found something or 1 if there are no greater leaves.
1471 * returns < 0 on io errors.
1473 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1478 struct btrfs_buffer *c;
1479 struct btrfs_buffer *next = NULL;
1481 while(level < BTRFS_MAX_LEVEL) {
1482 if (!path->nodes[level])
1484 slot = path->slots[level] + 1;
1485 c = path->nodes[level];
1486 if (slot >= btrfs_header_nritems(&c->node.header)) {
1490 bytenr = btrfs_node_blockptr(&c->node, slot);
1492 btrfs_block_release(root, next);
1493 next = read_tree_block(root, bytenr,
1494 btrfs_level_size(root, level - 1));
1497 path->slots[level] = slot;
1500 c = path->nodes[level];
1501 btrfs_block_release(root, c);
1502 path->nodes[level] = next;
1503 path->slots[level] = 0;
1506 next = read_tree_block(root,
1507 btrfs_node_blockptr(&next->node, 0),
1508 btrfs_level_size(root, level - 1));