Take out the merge-during-search-on-delete code, it is buggy.
[platform/upstream/btrfs-progs.git] / ctree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7 #include "print-tree.h"
8
9 int split_node(struct ctree_root *root, struct ctree_path *path, int level);
10 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
11 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level);
12 int push_node_right(struct ctree_root *root,
13                     struct ctree_path *path, int level);
14 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level);
15
16 inline void init_path(struct ctree_path *p)
17 {
18         memset(p, 0, sizeof(*p));
19 }
20
21 void release_path(struct ctree_root *root, struct ctree_path *p)
22 {
23         int i;
24         for (i = 0; i < MAX_LEVEL; i++) {
25                 if (!p->nodes[i])
26                         break;
27                 tree_block_release(root, p->nodes[i]);
28         }
29 }
30
31 /*
32  * The leaf data grows from end-to-front in the node.
33  * this returns the address of the start of the last item,
34  * which is the stop of the leaf data stack
35  */
36 static inline unsigned int leaf_data_end(struct leaf *leaf)
37 {
38         unsigned int nr = leaf->header.nritems;
39         if (nr == 0)
40                 return sizeof(leaf->data);
41         return leaf->items[nr-1].offset;
42 }
43
44 /*
45  * The space between the end of the leaf items and
46  * the start of the leaf data.  IOW, how much room
47  * the leaf has left for both items and data
48  */
49 int leaf_free_space(struct leaf *leaf)
50 {
51         int data_end = leaf_data_end(leaf);
52         int nritems = leaf->header.nritems;
53         char *items_end = (char *)(leaf->items + nritems + 1);
54         return (char *)(leaf->data + data_end) - (char *)items_end;
55 }
56
57 /*
58  * compare two keys in a memcmp fashion
59  */
60 int comp_keys(struct key *k1, struct key *k2)
61 {
62         if (k1->objectid > k2->objectid)
63                 return 1;
64         if (k1->objectid < k2->objectid)
65                 return -1;
66         if (k1->flags > k2->flags)
67                 return 1;
68         if (k1->flags < k2->flags)
69                 return -1;
70         if (k1->offset > k2->offset)
71                 return 1;
72         if (k1->offset < k2->offset)
73                 return -1;
74         return 0;
75 }
76
77 /*
78  * search for key in the array p.  items p are item_size apart
79  * and there are 'max' items in p
80  * the slot in the array is returned via slot, and it points to
81  * the place where you would insert key if it is not found in
82  * the array.
83  *
84  * slot may point to max if the key is bigger than all of the keys
85  */
86 int generic_bin_search(char *p, int item_size, struct key *key,
87                        int max, int *slot)
88 {
89         int low = 0;
90         int high = max;
91         int mid;
92         int ret;
93         struct key *tmp;
94
95         while(low < high) {
96                 mid = (low + high) / 2;
97                 tmp = (struct key *)(p + mid * item_size);
98                 ret = comp_keys(tmp, key);
99
100                 if (ret < 0)
101                         low = mid + 1;
102                 else if (ret > 0)
103                         high = mid;
104                 else {
105                         *slot = mid;
106                         return 0;
107                 }
108         }
109         *slot = low;
110         return 1;
111 }
112
113 /*
114  * simple bin_search frontend that does the right thing for
115  * leaves vs nodes
116  */
117 int bin_search(struct node *c, struct key *key, int *slot)
118 {
119         if (is_leaf(c->header.flags)) {
120                 struct leaf *l = (struct leaf *)c;
121                 return generic_bin_search((void *)l->items, sizeof(struct item),
122                                           key, c->header.nritems, slot);
123         } else {
124                 return generic_bin_search((void *)c->keys, sizeof(struct key),
125                                           key, c->header.nritems, slot);
126         }
127         return -1;
128 }
129
130 /*
131  * look for key in the tree.  path is filled in with nodes along the way
132  * if key is found, we return zero and you can find the item in the leaf
133  * level of the path (level 0)
134  *
135  * If the key isn't found, the path points to the slot where it should
136  * be inserted.
137  *
138  * if ins_len > 0, nodes and leaves will be split as we walk down the
139  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
140  * possible)
141  */
142 int search_slot(struct ctree_root *root, struct key *key,
143                 struct ctree_path *p, int ins_len)
144 {
145         struct tree_buffer *b = root->node;
146         struct node *c;
147         int slot;
148         int ret;
149         int level;
150
151         b->count++;
152         while (b) {
153                 c = &b->node;
154                 level = node_level(c->header.flags);
155                 p->nodes[level] = b;
156                 ret = bin_search(c, key, &slot);
157                 if (!is_leaf(c->header.flags)) {
158                         if (ret && slot > 0)
159                                 slot -= 1;
160                         p->slots[level] = slot;
161                         if (ins_len > 0 &&
162                             c->header.nritems == NODEPTRS_PER_BLOCK) {
163                                 int sret = split_node(root, p, level);
164                                 BUG_ON(sret > 0);
165                                 if (sret)
166                                         return sret;
167                                 b = p->nodes[level];
168                                 c = &b->node;
169                                 slot = p->slots[level];
170                         }
171                         b = read_tree_block(root, c->blockptrs[slot]);
172                         continue;
173                 } else {
174                         struct leaf *l = (struct leaf *)c;
175                         p->slots[level] = slot;
176                         if (ins_len > 0 && leaf_free_space(l) <
177                             sizeof(struct item) + ins_len) {
178                                 int sret = split_leaf(root, p, ins_len);
179                                 BUG_ON(sret > 0);
180                                 if (sret)
181                                         return sret;
182                         }
183                         return ret;
184                 }
185         }
186         return -1;
187 }
188
189 /*
190  * adjust the pointers going up the tree, starting at level
191  * making sure the right key of each node is points to 'key'.
192  * This is used after shifting pointers to the left, so it stops
193  * fixing up pointers when a given leaf/node is not in slot 0 of the
194  * higher levels
195  */
196 static void fixup_low_keys(struct ctree_root *root,
197                            struct ctree_path *path, struct key *key,
198                            int level)
199 {
200         int i;
201         for (i = level; i < MAX_LEVEL; i++) {
202                 struct node *t;
203                 int tslot = path->slots[i];
204                 if (!path->nodes[i])
205                         break;
206                 t = &path->nodes[i]->node;
207                 memcpy(t->keys + tslot, key, sizeof(*key));
208                 write_tree_block(root, path->nodes[i]);
209                 if (tslot != 0)
210                         break;
211         }
212 }
213
214 /*
215  * try to push data from one node into the next node left in the
216  * tree.  The src node is found at specified level in the path.
217  * If some bytes were pushed, return 0, otherwise return 1.
218  *
219  * Lower nodes/leaves in the path are not touched, higher nodes may
220  * be modified to reflect the push.
221  *
222  * The path is altered to reflect the push.
223  */
224 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
225 {
226         int slot;
227         struct node *left;
228         struct node *right;
229         int push_items = 0;
230         int left_nritems;
231         int right_nritems;
232         struct tree_buffer *t;
233         struct tree_buffer *right_buf;
234
235         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
236                 return 1;
237         slot = path->slots[level + 1];
238         if (slot == 0)
239                 return 1;
240
241         t = read_tree_block(root,
242                             path->nodes[level + 1]->node.blockptrs[slot - 1]);
243         left = &t->node;
244         right_buf = path->nodes[level];
245         right = &right_buf->node;
246         left_nritems = left->header.nritems;
247         right_nritems = right->header.nritems;
248         push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
249         if (push_items <= 0) {
250                 tree_block_release(root, t);
251                 return 1;
252         }
253
254         if (right_nritems < push_items)
255                 push_items = right_nritems;
256         memcpy(left->keys + left_nritems, right->keys,
257                 push_items * sizeof(struct key));
258         memcpy(left->blockptrs + left_nritems, right->blockptrs,
259                 push_items * sizeof(u64));
260         memmove(right->keys, right->keys + push_items,
261                 (right_nritems - push_items) * sizeof(struct key));
262         memmove(right->blockptrs, right->blockptrs + push_items,
263                 (right_nritems - push_items) * sizeof(u64));
264         right->header.nritems -= push_items;
265         left->header.nritems += push_items;
266
267         /* adjust the pointers going up the tree */
268         fixup_low_keys(root, path, right->keys, level + 1);
269
270         write_tree_block(root, t);
271         write_tree_block(root, right_buf);
272
273         /* then fixup the leaf pointer in the path */
274         if (path->slots[level] < push_items) {
275                 path->slots[level] += left_nritems;
276                 tree_block_release(root, path->nodes[level]);
277                 path->nodes[level] = t;
278                 path->slots[level + 1] -= 1;
279         } else {
280                 path->slots[level] -= push_items;
281                 tree_block_release(root, t);
282         }
283         return 0;
284 }
285
286 /*
287  * try to push data from one node into the next node right in the
288  * tree.  The src node is found at specified level in the path.
289  * If some bytes were pushed, return 0, otherwise return 1.
290  *
291  * Lower nodes/leaves in the path are not touched, higher nodes may
292  * be modified to reflect the push.
293  *
294  * The path is altered to reflect the push.
295  */
296 int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
297 {
298         int slot;
299         struct tree_buffer *t;
300         struct tree_buffer *src_buffer;
301         struct node *dst;
302         struct node *src;
303         int push_items = 0;
304         int dst_nritems;
305         int src_nritems;
306
307         /* can't push from the root */
308         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
309                 return 1;
310
311         /* only try to push inside the node higher up */
312         slot = path->slots[level + 1];
313         if (slot == NODEPTRS_PER_BLOCK - 1)
314                 return 1;
315
316         if (slot >= path->nodes[level + 1]->node.header.nritems -1)
317                 return 1;
318
319         t = read_tree_block(root,
320                             path->nodes[level + 1]->node.blockptrs[slot + 1]);
321         dst = &t->node;
322         src_buffer = path->nodes[level];
323         src = &src_buffer->node;
324         dst_nritems = dst->header.nritems;
325         src_nritems = src->header.nritems;
326         push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
327         if (push_items <= 0) {
328                 tree_block_release(root, t);
329                 return 1;
330         }
331
332         if (src_nritems < push_items)
333                 push_items = src_nritems;
334         memmove(dst->keys + push_items, dst->keys,
335                 dst_nritems * sizeof(struct key));
336         memcpy(dst->keys, src->keys + src_nritems - push_items,
337                 push_items * sizeof(struct key));
338
339         memmove(dst->blockptrs + push_items, dst->blockptrs,
340                 dst_nritems * sizeof(u64));
341         memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
342                 push_items * sizeof(u64));
343
344         src->header.nritems -= push_items;
345         dst->header.nritems += push_items;
346
347         /* adjust the pointers going up the tree */
348         memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
349                 dst->keys, sizeof(struct key));
350
351         write_tree_block(root, path->nodes[level + 1]);
352         write_tree_block(root, t);
353         write_tree_block(root, src_buffer);
354
355         /* then fixup the pointers in the path */
356         if (path->slots[level] >= src->header.nritems) {
357                 path->slots[level] -= src->header.nritems;
358                 tree_block_release(root, path->nodes[level]);
359                 path->nodes[level] = t;
360                 path->slots[level + 1] += 1;
361         } else {
362                 tree_block_release(root, t);
363         }
364         return 0;
365 }
366
367 /*
368  * helper function to insert a new root level in the tree.
369  * A new node is allocated, and a single item is inserted to
370  * point to the existing root
371  */
372 static int insert_new_root(struct ctree_root *root,
373                            struct ctree_path *path, int level)
374 {
375         struct tree_buffer *t;
376         struct node *lower;
377         struct node *c;
378         struct key *lower_key;
379
380         BUG_ON(path->nodes[level]);
381         BUG_ON(path->nodes[level-1] != root->node);
382
383         t = alloc_free_block(root);
384         c = &t->node;
385         memset(c, 0, sizeof(c));
386         c->header.nritems = 1;
387         c->header.flags = node_level(level);
388         c->header.blocknr = t->blocknr;
389         c->header.parentid = root->node->node.header.parentid;
390         lower = &path->nodes[level-1]->node;
391         if (is_leaf(lower->header.flags))
392                 lower_key = &((struct leaf *)lower)->items[0].key;
393         else
394                 lower_key = lower->keys;
395         memcpy(c->keys, lower_key, sizeof(struct key));
396         c->blockptrs[0] = path->nodes[level-1]->blocknr;
397         /* the super has an extra ref to root->node */
398         tree_block_release(root, root->node);
399         root->node = t;
400         t->count++;
401         write_tree_block(root, t);
402         path->nodes[level] = t;
403         path->slots[level] = 0;
404         return 0;
405 }
406
407 /*
408  * worker function to insert a single pointer in a node.
409  * the node should have enough room for the pointer already
410  *
411  * slot and level indicate where you want the key to go, and
412  * blocknr is the block the key points to.
413  */
414 int insert_ptr(struct ctree_root *root,
415                 struct ctree_path *path, struct key *key,
416                 u64 blocknr, int slot, int level)
417 {
418         struct node *lower;
419         int nritems;
420
421         BUG_ON(!path->nodes[level]);
422         lower = &path->nodes[level]->node;
423         nritems = lower->header.nritems;
424         if (slot > nritems)
425                 BUG();
426         if (nritems == NODEPTRS_PER_BLOCK)
427                 BUG();
428         if (slot != nritems) {
429                 memmove(lower->keys + slot + 1, lower->keys + slot,
430                         (nritems - slot) * sizeof(struct key));
431                 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
432                         (nritems - slot) * sizeof(u64));
433         }
434         memcpy(lower->keys + slot, key, sizeof(struct key));
435         lower->blockptrs[slot] = blocknr;
436         lower->header.nritems++;
437         if (lower->keys[1].objectid == 0)
438                         BUG();
439         write_tree_block(root, path->nodes[level]);
440         return 0;
441 }
442
443 /*
444  * split the node at the specified level in path in two.
445  * The path is corrected to point to the appropriate node after the split
446  *
447  * Before splitting this tries to make some room in the node by pushing
448  * left and right, if either one works, it returns right away.
449  */
450 int split_node(struct ctree_root *root, struct ctree_path *path, int level)
451 {
452         struct tree_buffer *t;
453         struct node *c;
454         struct tree_buffer *split_buffer;
455         struct node *split;
456         int mid;
457         int ret;
458
459         ret = push_node_left(root, path, level);
460         if (!ret)
461                 return 0;
462         ret = push_node_right(root, path, level);
463         if (!ret)
464                 return 0;
465         t = path->nodes[level];
466         c = &t->node;
467         if (t == root->node) {
468                 /* trying to split the root, lets make a new one */
469                 ret = insert_new_root(root, path, level + 1);
470                 if (ret)
471                         return ret;
472         }
473         split_buffer = alloc_free_block(root);
474         split = &split_buffer->node;
475         split->header.flags = c->header.flags;
476         split->header.blocknr = split_buffer->blocknr;
477         split->header.parentid = root->node->node.header.parentid;
478         mid = (c->header.nritems + 1) / 2;
479         memcpy(split->keys, c->keys + mid,
480                 (c->header.nritems - mid) * sizeof(struct key));
481         memcpy(split->blockptrs, c->blockptrs + mid,
482                 (c->header.nritems - mid) * sizeof(u64));
483         split->header.nritems = c->header.nritems - mid;
484         c->header.nritems = mid;
485         write_tree_block(root, t);
486         write_tree_block(root, split_buffer);
487         insert_ptr(root, path, split->keys, split_buffer->blocknr,
488                      path->slots[level + 1] + 1, level + 1);
489         if (path->slots[level] >= mid) {
490                 path->slots[level] -= mid;
491                 tree_block_release(root, t);
492                 path->nodes[level] = split_buffer;
493                 path->slots[level + 1] += 1;
494         } else {
495                 tree_block_release(root, split_buffer);
496         }
497         return 0;
498 }
499
500 /*
501  * how many bytes are required to store the items in a leaf.  start
502  * and nr indicate which items in the leaf to check.  This totals up the
503  * space used both by the item structs and the item data
504  */
505 int leaf_space_used(struct leaf *l, int start, int nr)
506 {
507         int data_len;
508         int end = start + nr - 1;
509
510         if (!nr)
511                 return 0;
512         data_len = l->items[start].offset + l->items[start].size;
513         data_len = data_len - l->items[end].offset;
514         data_len += sizeof(struct item) * nr;
515         return data_len;
516 }
517
518 /*
519  * push some data in the path leaf to the right, trying to free up at
520  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
521  */
522 int push_leaf_right(struct ctree_root *root, struct ctree_path *path,
523                    int data_size)
524 {
525         struct tree_buffer *left_buf = path->nodes[0];
526         struct leaf *left = &left_buf->leaf;
527         struct leaf *right;
528         struct tree_buffer *right_buf;
529         struct tree_buffer *upper;
530         int slot;
531         int i;
532         int free_space;
533         int push_space = 0;
534         int push_items = 0;
535         struct item *item;
536
537         slot = path->slots[1];
538         if (!path->nodes[1]) {
539                 return 1;
540         }
541         upper = path->nodes[1];
542         if (slot >= upper->node.header.nritems - 1) {
543                 return 1;
544         }
545         right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]);
546         right = &right_buf->leaf;
547         free_space = leaf_free_space(right);
548         if (free_space < data_size + sizeof(struct item)) {
549                 tree_block_release(root, right_buf);
550                 return 1;
551         }
552         for (i = left->header.nritems - 1; i >= 0; i--) {
553                 item = left->items + i;
554                 if (path->slots[0] == i)
555                         push_space += data_size + sizeof(*item);
556                 if (item->size + sizeof(*item) + push_space > free_space)
557                         break;
558                 push_items++;
559                 push_space += item->size + sizeof(*item);
560         }
561         if (push_items == 0) {
562                 tree_block_release(root, right_buf);
563                 return 1;
564         }
565         /* push left to right */
566         push_space = left->items[left->header.nritems - push_items].offset +
567                      left->items[left->header.nritems - push_items].size;
568         push_space -= leaf_data_end(left);
569         /* make room in the right data area */
570         memmove(right->data + leaf_data_end(right) - push_space,
571                 right->data + leaf_data_end(right),
572                 LEAF_DATA_SIZE - leaf_data_end(right));
573         /* copy from the left data area */
574         memcpy(right->data + LEAF_DATA_SIZE - push_space,
575                 left->data + leaf_data_end(left),
576                 push_space);
577         memmove(right->items + push_items, right->items,
578                 right->header.nritems * sizeof(struct item));
579         /* copy the items from left to right */
580         memcpy(right->items, left->items + left->header.nritems - push_items,
581                 push_items * sizeof(struct item));
582
583         /* update the item pointers */
584         right->header.nritems += push_items;
585         push_space = LEAF_DATA_SIZE;
586         for (i = 0; i < right->header.nritems; i++) {
587                 right->items[i].offset = push_space - right->items[i].size;
588                 push_space = right->items[i].offset;
589         }
590         left->header.nritems -= push_items;
591
592         write_tree_block(root, left_buf);
593         write_tree_block(root, right_buf);
594         memcpy(upper->node.keys + slot + 1,
595                 &right->items[0].key, sizeof(struct key));
596         write_tree_block(root, upper);
597         /* then fixup the leaf pointer in the path */
598         if (path->slots[0] >= left->header.nritems) {
599                 path->slots[0] -= left->header.nritems;
600                 tree_block_release(root, path->nodes[0]);
601                 path->nodes[0] = right_buf;
602                 path->slots[1] += 1;
603         } else {
604                 tree_block_release(root, right_buf);
605         }
606         return 0;
607 }
608 /*
609  * push some data in the path leaf to the left, trying to free up at
610  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
611  */
612 int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
613                    int data_size)
614 {
615         struct tree_buffer *right_buf = path->nodes[0];
616         struct leaf *right = &right_buf->leaf;
617         struct tree_buffer *t;
618         struct leaf *left;
619         int slot;
620         int i;
621         int free_space;
622         int push_space = 0;
623         int push_items = 0;
624         struct item *item;
625         int old_left_nritems;
626
627         slot = path->slots[1];
628         if (slot == 0) {
629                 return 1;
630         }
631         if (!path->nodes[1]) {
632                 return 1;
633         }
634         t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
635         left = &t->leaf;
636         free_space = leaf_free_space(left);
637         if (free_space < data_size + sizeof(struct item)) {
638                 tree_block_release(root, t);
639                 return 1;
640         }
641         for (i = 0; i < right->header.nritems; i++) {
642                 item = right->items + i;
643                 if (path->slots[0] == i)
644                         push_space += data_size + sizeof(*item);
645                 if (item->size + sizeof(*item) + push_space > free_space)
646                         break;
647                 push_items++;
648                 push_space += item->size + sizeof(*item);
649         }
650         if (push_items == 0) {
651                 tree_block_release(root, t);
652                 return 1;
653         }
654         /* push data from right to left */
655         memcpy(left->items + left->header.nritems,
656                 right->items, push_items * sizeof(struct item));
657         push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
658         memcpy(left->data + leaf_data_end(left) - push_space,
659                 right->data + right->items[push_items - 1].offset,
660                 push_space);
661         old_left_nritems = left->header.nritems;
662         BUG_ON(old_left_nritems < 0);
663
664         for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
665                 left->items[i].offset -= LEAF_DATA_SIZE -
666                         left->items[old_left_nritems -1].offset;
667         }
668         left->header.nritems += push_items;
669
670         /* fixup right node */
671         push_space = right->items[push_items-1].offset - leaf_data_end(right);
672         memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
673                 leaf_data_end(right), push_space);
674         memmove(right->items, right->items + push_items,
675                 (right->header.nritems - push_items) * sizeof(struct item));
676         right->header.nritems -= push_items;
677         push_space = LEAF_DATA_SIZE;
678
679         for (i = 0; i < right->header.nritems; i++) {
680                 right->items[i].offset = push_space - right->items[i].size;
681                 push_space = right->items[i].offset;
682         }
683
684         write_tree_block(root, t);
685         write_tree_block(root, right_buf);
686
687         fixup_low_keys(root, path, &right->items[0].key, 1);
688
689         /* then fixup the leaf pointer in the path */
690         if (path->slots[0] < push_items) {
691                 path->slots[0] += old_left_nritems;
692                 tree_block_release(root, path->nodes[0]);
693                 path->nodes[0] = t;
694                 path->slots[1] -= 1;
695         } else {
696                 tree_block_release(root, t);
697                 path->slots[0] -= push_items;
698         }
699         BUG_ON(path->slots[0] < 0);
700         return 0;
701 }
702
703 /*
704  * split the path's leaf in two, making sure there is at least data_size
705  * available for the resulting leaf level of the path.
706  */
707 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
708 {
709         struct tree_buffer *l_buf = path->nodes[0];
710         struct leaf *l = &l_buf->leaf;
711         int nritems;
712         int mid;
713         int slot;
714         struct leaf *right;
715         struct tree_buffer *right_buffer;
716         int space_needed = data_size + sizeof(struct item);
717         int data_copy_size;
718         int rt_data_off;
719         int i;
720         int ret;
721
722         if (push_leaf_left(root, path, data_size) == 0 ||
723             push_leaf_right(root, path, data_size) == 0) {
724                 l_buf = path->nodes[0];
725                 l = &l_buf->leaf;
726                 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
727                         return 0;
728         }
729         if (!path->nodes[1]) {
730                 ret = insert_new_root(root, path, 1);
731                 if (ret)
732                         return ret;
733         }
734         slot = path->slots[0];
735         nritems = l->header.nritems;
736         mid = (nritems + 1)/ 2;
737
738         right_buffer = alloc_free_block(root);
739         BUG_ON(!right_buffer);
740         BUG_ON(mid == nritems);
741         right = &right_buffer->leaf;
742         memset(right, 0, sizeof(*right));
743         if (mid <= slot) {
744                 /* FIXME, just alloc a new leaf here */
745                 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
746                         LEAF_DATA_SIZE)
747                         BUG();
748         } else {
749                 /* FIXME, just alloc a new leaf here */
750                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
751                         LEAF_DATA_SIZE)
752                         BUG();
753         }
754         right->header.nritems = nritems - mid;
755         right->header.blocknr = right_buffer->blocknr;
756         right->header.flags = node_level(0);
757         right->header.parentid = root->node->node.header.parentid;
758         data_copy_size = l->items[mid].offset + l->items[mid].size -
759                          leaf_data_end(l);
760         memcpy(right->items, l->items + mid,
761                (nritems - mid) * sizeof(struct item));
762         memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
763                l->data + leaf_data_end(l), data_copy_size);
764         rt_data_off = LEAF_DATA_SIZE -
765                      (l->items[mid].offset + l->items[mid].size);
766
767         for (i = 0; i < right->header.nritems; i++)
768                 right->items[i].offset += rt_data_off;
769
770         l->header.nritems = mid;
771         ret = insert_ptr(root, path, &right->items[0].key,
772                           right_buffer->blocknr, path->slots[1] + 1, 1);
773         write_tree_block(root, right_buffer);
774         write_tree_block(root, l_buf);
775
776         BUG_ON(path->slots[0] != slot);
777         if (mid <= slot) {
778                 tree_block_release(root, path->nodes[0]);
779                 path->nodes[0] = right_buffer;
780                 path->slots[0] -= mid;
781                 path->slots[1] += 1;
782         } else
783                 tree_block_release(root, right_buffer);
784         BUG_ON(path->slots[0] < 0);
785         return ret;
786 }
787
788 /*
789  * Given a key and some data, insert an item into the tree.
790  * This does all the path init required, making room in the tree if needed.
791  */
792 int insert_item(struct ctree_root *root, struct key *key,
793                           void *data, int data_size)
794 {
795         int ret;
796         int slot;
797         int slot_orig;
798         struct leaf *leaf;
799         struct tree_buffer *leaf_buf;
800         unsigned int nritems;
801         unsigned int data_end;
802         struct ctree_path path;
803
804         /* create a root if there isn't one */
805         if (!root->node)
806                 BUG();
807         init_path(&path);
808         ret = search_slot(root, key, &path, data_size);
809         if (ret == 0) {
810                 release_path(root, &path);
811                 return -EEXIST;
812         }
813
814         slot_orig = path.slots[0];
815         leaf_buf = path.nodes[0];
816         leaf = &leaf_buf->leaf;
817
818         nritems = leaf->header.nritems;
819         data_end = leaf_data_end(leaf);
820
821         if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
822                 BUG();
823
824         slot = path.slots[0];
825         BUG_ON(slot < 0);
826         if (slot != nritems) {
827                 int i;
828                 unsigned int old_data = leaf->items[slot].offset +
829                                         leaf->items[slot].size;
830
831                 /*
832                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
833                  */
834                 /* first correct the data pointers */
835                 for (i = slot; i < nritems; i++)
836                         leaf->items[i].offset -= data_size;
837
838                 /* shift the items */
839                 memmove(leaf->items + slot + 1, leaf->items + slot,
840                         (nritems - slot) * sizeof(struct item));
841
842                 /* shift the data */
843                 memmove(leaf->data + data_end - data_size, leaf->data +
844                         data_end, old_data - data_end);
845                 data_end = old_data;
846         }
847         /* copy the new data in */
848         memcpy(&leaf->items[slot].key, key, sizeof(struct key));
849         leaf->items[slot].offset = data_end - data_size;
850         leaf->items[slot].size = data_size;
851         memcpy(leaf->data + data_end - data_size, data, data_size);
852         leaf->header.nritems += 1;
853         write_tree_block(root, leaf_buf);
854         if (slot == 0)
855                 fixup_low_keys(root, &path, key, 1);
856         if (leaf_free_space(leaf) < 0)
857                 BUG();
858         release_path(root, &path);
859         return 0;
860 }
861
862 /*
863  * delete the pointer from a given node.
864  *
865  * If the delete empties a node, the node is removed from the tree,
866  * continuing all the way the root if required.  The root is converted into
867  * a leaf if all the nodes are emptied.
868  */
869 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
870 {
871         int slot;
872         struct tree_buffer *t;
873         struct node *node;
874         int nritems;
875         u64 blocknr;
876
877         while(1) {
878                 t = path->nodes[level];
879                 if (!t)
880                         break;
881                 node = &t->node;
882                 slot = path->slots[level];
883                 nritems = node->header.nritems;
884
885                 if (slot != nritems -1) {
886                         memmove(node->keys + slot, node->keys + slot + 1,
887                                 sizeof(struct key) * (nritems - slot - 1));
888                         memmove(node->blockptrs + slot,
889                                 node->blockptrs + slot + 1,
890                                 sizeof(u64) * (nritems - slot - 1));
891                 }
892                 node->header.nritems--;
893                 blocknr = t->blocknr;
894                 write_tree_block(root, t);
895                 if (node->header.nritems != 0) {
896                         int tslot;
897                         if (slot == 0)
898                                 fixup_low_keys(root, path, node->keys,
899                                                level + 1);
900                         tslot = path->slots[level + 1];
901                         t->count++;
902                         if (push_node_left(root, path, level))
903                                 push_node_right(root, path, level);
904                         path->slots[level + 1] = tslot;
905                         if (node->header.nritems != 0) {
906                                 tree_block_release(root, t);
907                                 break;
908                         }
909                         tree_block_release(root, t);
910                 }
911                 if (t == root->node) {
912                         /* just turn the root into a leaf and break */
913                         root->node->node.header.flags = node_level(0);
914                         write_tree_block(root, t);
915                         break;
916                 }
917                 level++;
918                 free_extent(root, blocknr, 1);
919                 if (!path->nodes[level])
920                         BUG();
921         }
922         return 0;
923 }
924
925 /*
926  * delete the item at the leaf level in path.  If that empties
927  * the leaf, remove it from the tree
928  */
929 int del_item(struct ctree_root *root, struct ctree_path *path)
930 {
931         int slot;
932         struct leaf *leaf;
933         struct tree_buffer *leaf_buf;
934         int doff;
935         int dsize;
936
937         leaf_buf = path->nodes[0];
938         leaf = &leaf_buf->leaf;
939         slot = path->slots[0];
940         doff = leaf->items[slot].offset;
941         dsize = leaf->items[slot].size;
942
943         if (slot != leaf->header.nritems - 1) {
944                 int i;
945                 int data_end = leaf_data_end(leaf);
946                 memmove(leaf->data + data_end + dsize,
947                         leaf->data + data_end,
948                         doff - data_end);
949                 for (i = slot + 1; i < leaf->header.nritems; i++)
950                         leaf->items[i].offset += dsize;
951                 memmove(leaf->items + slot, leaf->items + slot + 1,
952                         sizeof(struct item) *
953                         (leaf->header.nritems - slot - 1));
954         }
955         leaf->header.nritems -= 1;
956         /* delete the leaf if we've emptied it */
957         if (leaf->header.nritems == 0) {
958                 if (leaf_buf == root->node) {
959                         leaf->header.flags = node_level(0);
960                         write_tree_block(root, leaf_buf);
961                 } else {
962                         del_ptr(root, path, 1);
963                         free_extent(root, leaf_buf->blocknr, 1);
964                 }
965         } else {
966                 int used = leaf_space_used(leaf, 0, leaf->header.nritems);
967                 if (slot == 0)
968                         fixup_low_keys(root, path, &leaf->items[0].key, 1);
969                 write_tree_block(root, leaf_buf);
970                 /* delete the leaf if it is mostly empty */
971                 if (used < LEAF_DATA_SIZE / 3) {
972                         /* push_leaf_left fixes the path.
973                          * make sure the path still points to our leaf
974                          * for possible call to del_ptr below
975                          */
976                         slot = path->slots[1];
977                         leaf_buf->count++;
978                         push_leaf_left(root, path, 1);
979                         if (leaf->header.nritems)
980                                 push_leaf_right(root, path, 1);
981                         if (leaf->header.nritems == 0) {
982                                 u64 blocknr = leaf_buf->blocknr;
983                                 path->slots[1] = slot;
984                                 del_ptr(root, path, 1);
985                                 tree_block_release(root, leaf_buf);
986                                 free_extent(root, blocknr, 1);
987                         } else {
988                                 tree_block_release(root, leaf_buf);
989                         }
990                 }
991         }
992         return 0;
993 }
994
995 /*
996  * walk up the tree as far as required to find the next leaf.
997  * returns 0 if it found something or -1 if there are no greater leaves.
998  */
999 int next_leaf(struct ctree_root *root, struct ctree_path *path)
1000 {
1001         int slot;
1002         int level = 1;
1003         u64 blocknr;
1004         struct tree_buffer *c;
1005         struct tree_buffer *next = NULL;
1006
1007         while(level < MAX_LEVEL) {
1008                 if (!path->nodes[level])
1009                         return -1;
1010                 slot = path->slots[level] + 1;
1011                 c = path->nodes[level];
1012                 if (slot >= c->node.header.nritems) {
1013                         level++;
1014                         continue;
1015                 }
1016                 blocknr = c->node.blockptrs[slot];
1017                 if (next)
1018                         tree_block_release(root, next);
1019                 next = read_tree_block(root, blocknr);
1020                 break;
1021         }
1022         path->slots[level] = slot;
1023         while(1) {
1024                 level--;
1025                 c = path->nodes[level];
1026                 tree_block_release(root, c);
1027                 path->nodes[level] = next;
1028                 path->slots[level] = 0;
1029                 if (!level)
1030                         break;
1031                 next = read_tree_block(root, next->node.blockptrs[0]);
1032         }
1033         return 0;
1034 }
1035
1036 /* some sample code to insert,search & delete items */
1037 #if 0
1038 /* for testing only */
1039 int next_key(int i, int max_key) {
1040         return rand() % max_key;
1041         //return i;
1042 }
1043 int main() {
1044         struct key ins;
1045         struct key last = { (u64)-1, 0, 0};
1046         char *buf;
1047         int i;
1048         int num;
1049         int ret;
1050         int run_size = 20000000;
1051         int max_key =  100000000;
1052         int tree_size = 0;
1053         struct ctree_path path;
1054         struct ctree_super_block super;
1055         struct ctree_root *root;
1056
1057         radix_tree_init();
1058
1059
1060         root = open_ctree("dbfile", &super);
1061         srand(55);
1062         for (i = 0; i < run_size; i++) {
1063                 buf = malloc(64);
1064                 num = next_key(i, max_key);
1065                 // num = i;
1066                 sprintf(buf, "string-%d", num);
1067                 if (i % 10000 == 0)
1068                         fprintf(stderr, "insert %d:%d\n", num, i);
1069                 ins.objectid = num;
1070                 ins.offset = 0;
1071                 ins.flags = 0;
1072                 ret = insert_item(root, &ins, buf, strlen(buf));
1073                 if (!ret)
1074                         tree_size++;
1075                 free(buf);
1076         }
1077         write_ctree_super(root, &super);
1078         close_ctree(root);
1079
1080         root = open_ctree("dbfile", &super);
1081         printf("starting search\n");
1082         srand(55);
1083         for (i = 0; i < run_size; i++) {
1084                 num = next_key(i, max_key);
1085                 ins.objectid = num;
1086                 init_path(&path);
1087                 if (i % 10000 == 0)
1088                         fprintf(stderr, "search %d:%d\n", num, i);
1089                 ret = search_slot(root, &ins, &path, 0);
1090                 if (ret) {
1091                         print_tree(root, root->node);
1092                         printf("unable to find %d\n", num);
1093                         exit(1);
1094                 }
1095                 release_path(root, &path);
1096         }
1097         write_ctree_super(root, &super);
1098         close_ctree(root);
1099         root = open_ctree("dbfile", &super);
1100         printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
1101                 node_level(root->node->node.header.flags),
1102                 root->node->node.header.nritems,
1103                 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
1104         printf("all searches good, deleting some items\n");
1105         i = 0;
1106         srand(55);
1107         for (i = 0 ; i < run_size/4; i++) {
1108                 num = next_key(i, max_key);
1109                 ins.objectid = num;
1110                 init_path(&path);
1111                 ret = search_slot(root, &ins, &path, -1);
1112                 if (!ret) {
1113                         if (i % 10000 == 0)
1114                                 fprintf(stderr, "del %d:%d\n", num, i);
1115                         ret = del_item(root, &path);
1116                         if (ret != 0)
1117                                 BUG();
1118                         tree_size--;
1119                 }
1120                 release_path(root, &path);
1121         }
1122         write_ctree_super(root, &super);
1123         close_ctree(root);
1124         root = open_ctree("dbfile", &super);
1125         srand(128);
1126         for (i = 0; i < run_size; i++) {
1127                 buf = malloc(64);
1128                 num = next_key(i, max_key);
1129                 sprintf(buf, "string-%d", num);
1130                 ins.objectid = num;
1131                 if (i % 10000 == 0)
1132                         fprintf(stderr, "insert %d:%d\n", num, i);
1133                 ret = insert_item(root, &ins, buf, strlen(buf));
1134                 if (!ret)
1135                         tree_size++;
1136                 free(buf);
1137         }
1138         write_ctree_super(root, &super);
1139         close_ctree(root);
1140         root = open_ctree("dbfile", &super);
1141         srand(128);
1142         printf("starting search2\n");
1143         for (i = 0; i < run_size; i++) {
1144                 num = next_key(i, max_key);
1145                 ins.objectid = num;
1146                 init_path(&path);
1147                 if (i % 10000 == 0)
1148                         fprintf(stderr, "search %d:%d\n", num, i);
1149                 ret = search_slot(root, &ins, &path, 0);
1150                 if (ret) {
1151                         print_tree(root, root->node);
1152                         printf("unable to find %d\n", num);
1153                         exit(1);
1154                 }
1155                 release_path(root, &path);
1156         }
1157         printf("starting big long delete run\n");
1158         while(root->node && root->node->node.header.nritems > 0) {
1159                 struct leaf *leaf;
1160                 int slot;
1161                 ins.objectid = (u64)-1;
1162                 init_path(&path);
1163                 ret = search_slot(root, &ins, &path, -1);
1164                 if (ret == 0)
1165                         BUG();
1166
1167                 leaf = &path.nodes[0]->leaf;
1168                 slot = path.slots[0];
1169                 if (slot != leaf->header.nritems)
1170                         BUG();
1171                 while(path.slots[0] > 0) {
1172                         path.slots[0] -= 1;
1173                         slot = path.slots[0];
1174                         leaf = &path.nodes[0]->leaf;
1175
1176                         if (comp_keys(&last, &leaf->items[slot].key) <= 0)
1177                                 BUG();
1178                         memcpy(&last, &leaf->items[slot].key, sizeof(last));
1179                         if (tree_size % 10000 == 0)
1180                                 printf("big del %d:%d\n", tree_size, i);
1181                         ret = del_item(root, &path);
1182                         if (ret != 0) {
1183                                 printf("del_item returned %d\n", ret);
1184                                 BUG();
1185                         }
1186                         tree_size--;
1187                 }
1188                 release_path(root, &path);
1189         }
1190         printf("tree size is now %d\n", tree_size);
1191         printf("map tree\n");
1192         print_tree(root->extent_root, root->extent_root->node);
1193         write_ctree_super(root, &super);
1194         close_ctree(root);
1195         return 0;
1196 }
1197 #endif