push_leaf_right
[platform/upstream/btrfs-progs.git] / ctree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7 #include "print-tree.h"
8
9 int split_node(struct ctree_root *root, struct ctree_path *path, int level);
10 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
11 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level);
12 int push_node_right(struct ctree_root *root,
13                     struct ctree_path *path, int level);
14 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level);
15
16 inline void init_path(struct ctree_path *p)
17 {
18         memset(p, 0, sizeof(*p));
19 }
20
21 void release_path(struct ctree_root *root, struct ctree_path *p)
22 {
23         int i;
24         for (i = 0; i < MAX_LEVEL; i++) {
25                 if (!p->nodes[i])
26                         break;
27                 tree_block_release(root, p->nodes[i]);
28         }
29 }
30
31 /*
32  * The leaf data grows from end-to-front in the node.
33  * this returns the address of the start of the last item,
34  * which is the stop of the leaf data stack
35  */
36 static inline unsigned int leaf_data_end(struct leaf *leaf)
37 {
38         unsigned int nr = leaf->header.nritems;
39         if (nr == 0)
40                 return sizeof(leaf->data);
41         return leaf->items[nr-1].offset;
42 }
43
44 /*
45  * The space between the end of the leaf items and
46  * the start of the leaf data.  IOW, how much room
47  * the leaf has left for both items and data
48  */
49 int leaf_free_space(struct leaf *leaf)
50 {
51         int data_end = leaf_data_end(leaf);
52         int nritems = leaf->header.nritems;
53         char *items_end = (char *)(leaf->items + nritems + 1);
54         return (char *)(leaf->data + data_end) - (char *)items_end;
55 }
56
57 /*
58  * compare two keys in a memcmp fashion
59  */
60 int comp_keys(struct key *k1, struct key *k2)
61 {
62         if (k1->objectid > k2->objectid)
63                 return 1;
64         if (k1->objectid < k2->objectid)
65                 return -1;
66         if (k1->flags > k2->flags)
67                 return 1;
68         if (k1->flags < k2->flags)
69                 return -1;
70         if (k1->offset > k2->offset)
71                 return 1;
72         if (k1->offset < k2->offset)
73                 return -1;
74         return 0;
75 }
76
77 /*
78  * search for key in the array p.  items p are item_size apart
79  * and there are 'max' items in p
80  * the slot in the array is returned via slot, and it points to
81  * the place where you would insert key if it is not found in
82  * the array.
83  *
84  * slot may point to max if the key is bigger than all of the keys
85  */
86 int generic_bin_search(char *p, int item_size, struct key *key,
87                        int max, int *slot)
88 {
89         int low = 0;
90         int high = max;
91         int mid;
92         int ret;
93         struct key *tmp;
94
95         while(low < high) {
96                 mid = (low + high) / 2;
97                 tmp = (struct key *)(p + mid * item_size);
98                 ret = comp_keys(tmp, key);
99
100                 if (ret < 0)
101                         low = mid + 1;
102                 else if (ret > 0)
103                         high = mid;
104                 else {
105                         *slot = mid;
106                         return 0;
107                 }
108         }
109         *slot = low;
110         return 1;
111 }
112
113 int bin_search(struct node *c, struct key *key, int *slot)
114 {
115         if (is_leaf(c->header.flags)) {
116                 struct leaf *l = (struct leaf *)c;
117                 return generic_bin_search((void *)l->items, sizeof(struct item),
118                                           key, c->header.nritems, slot);
119         } else {
120                 return generic_bin_search((void *)c->keys, sizeof(struct key),
121                                           key, c->header.nritems, slot);
122         }
123         return -1;
124 }
125
126 /*
127  * look for key in the tree.  path is filled in with nodes along the way
128  * if key is found, we return zero and you can find the item in the leaf
129  * level of the path (level 0)
130  *
131  * If the key isn't found, the path points to the slot where it should
132  * be inserted.
133  */
134 int search_slot(struct ctree_root *root, struct key *key,
135                 struct ctree_path *p, int ins_len)
136 {
137         struct tree_buffer *b = root->node;
138         struct node *c;
139         int slot;
140         int ret;
141         int level;
142
143         b->count++;
144         while (b) {
145                 c = &b->node;
146                 level = node_level(c->header.flags);
147                 p->nodes[level] = b;
148                 ret = bin_search(c, key, &slot);
149                 if (!is_leaf(c->header.flags)) {
150                         if (ret && slot > 0)
151                                 slot -= 1;
152                         p->slots[level] = slot;
153                         if (ins_len > 0 &&
154                             c->header.nritems == NODEPTRS_PER_BLOCK) {
155                                 int sret = split_node(root, p, level);
156                                 BUG_ON(sret > 0);
157                                 if (sret)
158                                         return sret;
159                                 b = p->nodes[level];
160                                 c = &b->node;
161                                 slot = p->slots[level];
162                         } else if (ins_len < 0 &&
163                                    c->header.nritems <= NODEPTRS_PER_BLOCK/4) {
164                                 u64 blocknr = b->blocknr;
165                                 slot = p->slots[level +1];
166                                 b->count++;
167                                 if (push_node_left(root, p, level))
168                                         push_node_right(root, p, level);
169                                 if (c->header.nritems == 0 &&
170                                     level < MAX_LEVEL - 1 &&
171                                     p->nodes[level + 1]) {
172                                         int tslot = p->slots[level + 1];
173
174                                         p->slots[level + 1] = slot;
175                                         del_ptr(root, p, level + 1);
176                                         p->slots[level + 1] = tslot;
177                                         tree_block_release(root, b);
178                                         free_extent(root, blocknr, 1);
179                                 } else {
180                                         tree_block_release(root, b);
181                                 }
182                                 b = p->nodes[level];
183                                 c = &b->node;
184                                 slot = p->slots[level];
185                         }
186                         b = read_tree_block(root, c->blockptrs[slot]);
187                         continue;
188                 } else {
189                         struct leaf *l = (struct leaf *)c;
190                         p->slots[level] = slot;
191                         if (ins_len > 0 && leaf_free_space(l) <
192                             sizeof(struct item) + ins_len) {
193                                 int sret = split_leaf(root, p, ins_len);
194                                 BUG_ON(sret > 0);
195                                 if (sret)
196                                         return sret;
197                         }
198                         return ret;
199                 }
200         }
201         return -1;
202 }
203
204 /*
205  * adjust the pointers going up the tree, starting at level
206  * making sure the right key of each node is points to 'key'.
207  * This is used after shifting pointers to the left, so it stops
208  * fixing up pointers when a given leaf/node is not in slot 0 of the
209  * higher levels
210  */
211 static void fixup_low_keys(struct ctree_root *root,
212                            struct ctree_path *path, struct key *key,
213                            int level)
214 {
215         int i;
216         for (i = level; i < MAX_LEVEL; i++) {
217                 struct node *t;
218                 int tslot = path->slots[i];
219                 if (!path->nodes[i])
220                         break;
221                 t = &path->nodes[i]->node;
222                 memcpy(t->keys + tslot, key, sizeof(*key));
223                 write_tree_block(root, path->nodes[i]);
224                 if (tslot != 0)
225                         break;
226         }
227 }
228
229 /*
230  * try to push data from one node into the next node left in the
231  * tree.  The src node is found at specified level in the path.
232  * If some bytes were pushed, return 0, otherwise return 1.
233  *
234  * Lower nodes/leaves in the path are not touched, higher nodes may
235  * be modified to reflect the push.
236  *
237  * The path is altered to reflect the push.
238  */
239 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
240 {
241         int slot;
242         struct node *left;
243         struct node *right;
244         int push_items = 0;
245         int left_nritems;
246         int right_nritems;
247         struct tree_buffer *t;
248         struct tree_buffer *right_buf;
249
250         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
251                 return 1;
252         slot = path->slots[level + 1];
253         if (slot == 0)
254                 return 1;
255
256         t = read_tree_block(root,
257                             path->nodes[level + 1]->node.blockptrs[slot - 1]);
258         left = &t->node;
259         right_buf = path->nodes[level];
260         right = &right_buf->node;
261         left_nritems = left->header.nritems;
262         right_nritems = right->header.nritems;
263         push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
264         if (push_items <= 0) {
265                 tree_block_release(root, t);
266                 return 1;
267         }
268
269         if (right_nritems < push_items)
270                 push_items = right_nritems;
271         memcpy(left->keys + left_nritems, right->keys,
272                 push_items * sizeof(struct key));
273         memcpy(left->blockptrs + left_nritems, right->blockptrs,
274                 push_items * sizeof(u64));
275         memmove(right->keys, right->keys + push_items,
276                 (right_nritems - push_items) * sizeof(struct key));
277         memmove(right->blockptrs, right->blockptrs + push_items,
278                 (right_nritems - push_items) * sizeof(u64));
279         right->header.nritems -= push_items;
280         left->header.nritems += push_items;
281
282         /* adjust the pointers going up the tree */
283         fixup_low_keys(root, path, right->keys, level + 1);
284
285         write_tree_block(root, t);
286         write_tree_block(root, right_buf);
287
288         /* then fixup the leaf pointer in the path */
289         if (path->slots[level] < push_items) {
290                 path->slots[level] += left_nritems;
291                 tree_block_release(root, path->nodes[level]);
292                 path->nodes[level] = t;
293                 path->slots[level + 1] -= 1;
294         } else {
295                 path->slots[level] -= push_items;
296                 tree_block_release(root, t);
297         }
298         return 0;
299 }
300
301 /*
302  * try to push data from one node into the next node right in the
303  * tree.  The src node is found at specified level in the path.
304  * If some bytes were pushed, return 0, otherwise return 1.
305  *
306  * Lower nodes/leaves in the path are not touched, higher nodes may
307  * be modified to reflect the push.
308  *
309  * The path is altered to reflect the push.
310  */
311 int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
312 {
313         int slot;
314         struct tree_buffer *t;
315         struct tree_buffer *src_buffer;
316         struct node *dst;
317         struct node *src;
318         int push_items = 0;
319         int dst_nritems;
320         int src_nritems;
321
322         /* can't push from the root */
323         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
324                 return 1;
325
326         /* only try to push inside the node higher up */
327         slot = path->slots[level + 1];
328         if (slot == NODEPTRS_PER_BLOCK - 1)
329                 return 1;
330
331         if (slot >= path->nodes[level + 1]->node.header.nritems -1)
332                 return 1;
333
334         t = read_tree_block(root,
335                             path->nodes[level + 1]->node.blockptrs[slot + 1]);
336         dst = &t->node;
337         src_buffer = path->nodes[level];
338         src = &src_buffer->node;
339         dst_nritems = dst->header.nritems;
340         src_nritems = src->header.nritems;
341         push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
342         if (push_items <= 0) {
343                 tree_block_release(root, t);
344                 return 1;
345         }
346
347         if (src_nritems < push_items)
348                 push_items = src_nritems;
349         memmove(dst->keys + push_items, dst->keys,
350                 dst_nritems * sizeof(struct key));
351         memcpy(dst->keys, src->keys + src_nritems - push_items,
352                 push_items * sizeof(struct key));
353
354         memmove(dst->blockptrs + push_items, dst->blockptrs,
355                 dst_nritems * sizeof(u64));
356         memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
357                 push_items * sizeof(u64));
358
359         src->header.nritems -= push_items;
360         dst->header.nritems += push_items;
361
362         /* adjust the pointers going up the tree */
363         memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
364                 dst->keys, sizeof(struct key));
365
366         write_tree_block(root, path->nodes[level + 1]);
367         write_tree_block(root, t);
368         write_tree_block(root, src_buffer);
369
370         /* then fixup the pointers in the path */
371         if (path->slots[level] >= src->header.nritems) {
372                 path->slots[level] -= src->header.nritems;
373                 tree_block_release(root, path->nodes[level]);
374                 path->nodes[level] = t;
375                 path->slots[level + 1] += 1;
376         } else {
377                 tree_block_release(root, t);
378         }
379         return 0;
380 }
381
382 static int insert_new_root(struct ctree_root *root,
383                            struct ctree_path *path, int level)
384 {
385         struct tree_buffer *t;
386         struct node *lower;
387         struct node *c;
388         struct key *lower_key;
389
390         BUG_ON(path->nodes[level]);
391         BUG_ON(path->nodes[level-1] != root->node);
392
393         t = alloc_free_block(root);
394         c = &t->node;
395         memset(c, 0, sizeof(c));
396         c->header.nritems = 1;
397         c->header.flags = node_level(level);
398         c->header.blocknr = t->blocknr;
399         c->header.parentid = root->node->node.header.parentid;
400         lower = &path->nodes[level-1]->node;
401         if (is_leaf(lower->header.flags))
402                 lower_key = &((struct leaf *)lower)->items[0].key;
403         else
404                 lower_key = lower->keys;
405         memcpy(c->keys, lower_key, sizeof(struct key));
406         c->blockptrs[0] = path->nodes[level-1]->blocknr;
407         /* the super has an extra ref to root->node */
408         tree_block_release(root, root->node);
409         root->node = t;
410         t->count++;
411         write_tree_block(root, t);
412         path->nodes[level] = t;
413         path->slots[level] = 0;
414         return 0;
415 }
416
417 /*
418  * worker function to insert a single pointer in a node.
419  * the node should have enough room for the pointer already
420  * slot and level indicate where you want the key to go, and
421  * blocknr is the block the key points to.
422  */
423 int insert_ptr(struct ctree_root *root,
424                 struct ctree_path *path, struct key *key,
425                 u64 blocknr, int slot, int level)
426 {
427         struct node *lower;
428         int nritems;
429
430         BUG_ON(!path->nodes[level]);
431         lower = &path->nodes[level]->node;
432         nritems = lower->header.nritems;
433         if (slot > nritems)
434                 BUG();
435         if (nritems == NODEPTRS_PER_BLOCK)
436                 BUG();
437         if (slot != nritems) {
438                 memmove(lower->keys + slot + 1, lower->keys + slot,
439                         (nritems - slot) * sizeof(struct key));
440                 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
441                         (nritems - slot) * sizeof(u64));
442         }
443         memcpy(lower->keys + slot, key, sizeof(struct key));
444         lower->blockptrs[slot] = blocknr;
445         lower->header.nritems++;
446         if (lower->keys[1].objectid == 0)
447                         BUG();
448         write_tree_block(root, path->nodes[level]);
449         return 0;
450 }
451
452 int split_node(struct ctree_root *root, struct ctree_path *path, int level)
453 {
454         struct tree_buffer *t;
455         struct node *c;
456         struct tree_buffer *split_buffer;
457         struct node *split;
458         int mid;
459         int ret;
460
461         ret = push_node_left(root, path, level);
462         if (!ret)
463                 return 0;
464         ret = push_node_right(root, path, level);
465         if (!ret)
466                 return 0;
467         t = path->nodes[level];
468         c = &t->node;
469         if (t == root->node) {
470                 /* trying to split the root, lets make a new one */
471                 ret = insert_new_root(root, path, level + 1);
472                 if (ret)
473                         return ret;
474         }
475         split_buffer = alloc_free_block(root);
476         split = &split_buffer->node;
477         split->header.flags = c->header.flags;
478         split->header.blocknr = split_buffer->blocknr;
479         split->header.parentid = root->node->node.header.parentid;
480         mid = (c->header.nritems + 1) / 2;
481         memcpy(split->keys, c->keys + mid,
482                 (c->header.nritems - mid) * sizeof(struct key));
483         memcpy(split->blockptrs, c->blockptrs + mid,
484                 (c->header.nritems - mid) * sizeof(u64));
485         split->header.nritems = c->header.nritems - mid;
486         c->header.nritems = mid;
487         write_tree_block(root, t);
488         write_tree_block(root, split_buffer);
489         insert_ptr(root, path, split->keys, split_buffer->blocknr,
490                      path->slots[level + 1] + 1, level + 1);
491         if (path->slots[level] >= mid) {
492                 path->slots[level] -= mid;
493                 tree_block_release(root, t);
494                 path->nodes[level] = split_buffer;
495                 path->slots[level + 1] += 1;
496         } else {
497                 tree_block_release(root, split_buffer);
498         }
499         return 0;
500 }
501
502 /*
503  * how many bytes are required to store the items in a leaf.  start
504  * and nr indicate which items in the leaf to check.  This totals up the
505  * space used both by the item structs and the item data
506  */
507 int leaf_space_used(struct leaf *l, int start, int nr)
508 {
509         int data_len;
510         int end = start + nr - 1;
511
512         if (!nr)
513                 return 0;
514         data_len = l->items[start].offset + l->items[start].size;
515         data_len = data_len - l->items[end].offset;
516         data_len += sizeof(struct item) * nr;
517         return data_len;
518 }
519
520 /*
521  * push some data in the path leaf to the right, trying to free up at
522  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
523  */
524 int push_leaf_right(struct ctree_root *root, struct ctree_path *path,
525                    int data_size)
526 {
527         struct tree_buffer *left_buf = path->nodes[0];
528         struct leaf *left = &left_buf->leaf;
529         struct leaf *right;
530         struct tree_buffer *right_buf;
531         struct tree_buffer *upper;
532         int slot;
533         int i;
534         int free_space;
535         int push_space = 0;
536         int push_items = 0;
537         struct item *item;
538
539         slot = path->slots[1];
540         if (!path->nodes[1]) {
541                 return 1;
542         }
543         upper = path->nodes[1];
544         if (slot >= upper->node.header.nritems - 1) {
545                 return 1;
546         }
547         right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]);
548         right = &right_buf->leaf;
549         free_space = leaf_free_space(right);
550         if (free_space < data_size + sizeof(struct item)) {
551                 tree_block_release(root, right_buf);
552                 return 1;
553         }
554         for (i = left->header.nritems - 1; i >= 0; i--) {
555                 item = left->items + i;
556                 if (path->slots[0] == i)
557                         push_space += data_size + sizeof(*item);
558                 if (item->size + sizeof(*item) + push_space > free_space)
559                         break;
560                 push_items++;
561                 push_space += item->size + sizeof(*item);
562         }
563         if (push_items == 0) {
564                 tree_block_release(root, right_buf);
565                 return 1;
566         }
567         /* push left to right */
568         push_space = left->items[left->header.nritems - push_items].offset +
569                      left->items[left->header.nritems - push_items].size;
570         push_space -= leaf_data_end(left);
571         /* make room in the right data area */
572         memmove(right->data + leaf_data_end(right) - push_space,
573                 right->data + leaf_data_end(right),
574                 LEAF_DATA_SIZE - leaf_data_end(right));
575         /* copy from the left data area */
576         memcpy(right->data + LEAF_DATA_SIZE - push_space,
577                 left->data + leaf_data_end(left),
578                 push_space);
579         memmove(right->items + push_items, right->items,
580                 right->header.nritems * sizeof(struct item));
581         /* copy the items from left to right */
582         memcpy(right->items, left->items + left->header.nritems - push_items,
583                 push_items * sizeof(struct item));
584
585         /* update the item pointers */
586         right->header.nritems += push_items;
587         push_space = LEAF_DATA_SIZE;
588         for (i = 0; i < right->header.nritems; i++) {
589                 right->items[i].offset = push_space - right->items[i].size;
590                 push_space = right->items[i].offset;
591         }
592         left->header.nritems -= push_items;
593
594         write_tree_block(root, left_buf);
595         write_tree_block(root, right_buf);
596         memcpy(upper->node.keys + slot + 1,
597                 &right->items[0].key, sizeof(struct key));
598         write_tree_block(root, upper);
599         /* then fixup the leaf pointer in the path */
600         // FIXME use nritems in here somehow
601         if (path->slots[0] >= left->header.nritems) {
602                 path->slots[0] -= left->header.nritems;
603                 tree_block_release(root, path->nodes[0]);
604                 path->nodes[0] = right_buf;
605                 path->slots[1] += 1;
606         } else {
607                 tree_block_release(root, right_buf);
608         }
609         return 0;
610 }
611 /*
612  * push some data in the path leaf to the left, trying to free up at
613  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
614  */
615 int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
616                    int data_size)
617 {
618         struct tree_buffer *right_buf = path->nodes[0];
619         struct leaf *right = &right_buf->leaf;
620         struct tree_buffer *t;
621         struct leaf *left;
622         int slot;
623         int i;
624         int free_space;
625         int push_space = 0;
626         int push_items = 0;
627         struct item *item;
628         int old_left_nritems;
629
630         slot = path->slots[1];
631         if (slot == 0) {
632                 return 1;
633         }
634         if (!path->nodes[1]) {
635                 return 1;
636         }
637         t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
638         left = &t->leaf;
639         free_space = leaf_free_space(left);
640         if (free_space < data_size + sizeof(struct item)) {
641                 tree_block_release(root, t);
642                 return 1;
643         }
644         for (i = 0; i < right->header.nritems; i++) {
645                 item = right->items + i;
646                 if (path->slots[0] == i)
647                         push_space += data_size + sizeof(*item);
648                 if (item->size + sizeof(*item) + push_space > free_space)
649                         break;
650                 push_items++;
651                 push_space += item->size + sizeof(*item);
652         }
653         if (push_items == 0) {
654                 tree_block_release(root, t);
655                 return 1;
656         }
657         /* push data from right to left */
658         memcpy(left->items + left->header.nritems,
659                 right->items, push_items * sizeof(struct item));
660         push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
661         memcpy(left->data + leaf_data_end(left) - push_space,
662                 right->data + right->items[push_items - 1].offset,
663                 push_space);
664         old_left_nritems = left->header.nritems;
665         BUG_ON(old_left_nritems < 0);
666
667         for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
668                 left->items[i].offset -= LEAF_DATA_SIZE -
669                         left->items[old_left_nritems -1].offset;
670         }
671         left->header.nritems += push_items;
672
673         /* fixup right node */
674         push_space = right->items[push_items-1].offset - leaf_data_end(right);
675         memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
676                 leaf_data_end(right), push_space);
677         memmove(right->items, right->items + push_items,
678                 (right->header.nritems - push_items) * sizeof(struct item));
679         right->header.nritems -= push_items;
680         push_space = LEAF_DATA_SIZE;
681
682         for (i = 0; i < right->header.nritems; i++) {
683                 right->items[i].offset = push_space - right->items[i].size;
684                 push_space = right->items[i].offset;
685         }
686
687         write_tree_block(root, t);
688         write_tree_block(root, right_buf);
689
690         fixup_low_keys(root, path, &right->items[0].key, 1);
691
692         /* then fixup the leaf pointer in the path */
693         if (path->slots[0] < push_items) {
694                 path->slots[0] += old_left_nritems;
695                 tree_block_release(root, path->nodes[0]);
696                 path->nodes[0] = t;
697                 path->slots[1] -= 1;
698         } else {
699                 tree_block_release(root, t);
700                 path->slots[0] -= push_items;
701         }
702         BUG_ON(path->slots[0] < 0);
703         return 0;
704 }
705
706 /*
707  * split the path's leaf in two, making sure there is at least data_size
708  * available for the resulting leaf level of the path.
709  */
710 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
711 {
712         struct tree_buffer *l_buf = path->nodes[0];
713         struct leaf *l = &l_buf->leaf;
714         int nritems;
715         int mid;
716         int slot;
717         struct leaf *right;
718         struct tree_buffer *right_buffer;
719         int space_needed = data_size + sizeof(struct item);
720         int data_copy_size;
721         int rt_data_off;
722         int i;
723         int ret;
724
725         if (push_leaf_left(root, path, data_size) == 0 ||
726             push_leaf_right(root, path, data_size) == 0) {
727                 l_buf = path->nodes[0];
728                 l = &l_buf->leaf;
729                 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
730                         return 0;
731         }
732         if (!path->nodes[1]) {
733                 ret = insert_new_root(root, path, 1);
734                 if (ret)
735                         return ret;
736         }
737         slot = path->slots[0];
738         nritems = l->header.nritems;
739         mid = (nritems + 1)/ 2;
740
741         right_buffer = alloc_free_block(root);
742         BUG_ON(!right_buffer);
743         BUG_ON(mid == nritems);
744         right = &right_buffer->leaf;
745         memset(right, 0, sizeof(*right));
746         if (mid <= slot) {
747                 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
748                         LEAF_DATA_SIZE)
749                         BUG();
750         } else {
751                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
752                         LEAF_DATA_SIZE)
753                         BUG();
754         }
755         right->header.nritems = nritems - mid;
756         right->header.blocknr = right_buffer->blocknr;
757         right->header.flags = node_level(0);
758         right->header.parentid = root->node->node.header.parentid;
759         data_copy_size = l->items[mid].offset + l->items[mid].size -
760                          leaf_data_end(l);
761         memcpy(right->items, l->items + mid,
762                (nritems - mid) * sizeof(struct item));
763         memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
764                l->data + leaf_data_end(l), data_copy_size);
765         rt_data_off = LEAF_DATA_SIZE -
766                      (l->items[mid].offset + l->items[mid].size);
767
768         for (i = 0; i < right->header.nritems; i++)
769                 right->items[i].offset += rt_data_off;
770
771         l->header.nritems = mid;
772         ret = insert_ptr(root, path, &right->items[0].key,
773                           right_buffer->blocknr, path->slots[1] + 1, 1);
774         write_tree_block(root, right_buffer);
775         write_tree_block(root, l_buf);
776
777         BUG_ON(path->slots[0] != slot);
778         if (mid <= slot) {
779                 tree_block_release(root, path->nodes[0]);
780                 path->nodes[0] = right_buffer;
781                 path->slots[0] -= mid;
782                 path->slots[1] += 1;
783         } else
784                 tree_block_release(root, right_buffer);
785         BUG_ON(path->slots[0] < 0);
786         return ret;
787 }
788
789 /*
790  * Given a key and some data, insert an item into the tree.
791  * This does all the path init required, making room in the tree if needed.
792  */
793 int insert_item(struct ctree_root *root, struct key *key,
794                           void *data, int data_size)
795 {
796         int ret;
797         int slot;
798         int slot_orig;
799         struct leaf *leaf;
800         struct tree_buffer *leaf_buf;
801         unsigned int nritems;
802         unsigned int data_end;
803         struct ctree_path path;
804
805         /* create a root if there isn't one */
806         if (!root->node)
807                 BUG();
808         init_path(&path);
809         ret = search_slot(root, key, &path, data_size);
810         if (ret == 0) {
811                 release_path(root, &path);
812                 return -EEXIST;
813         }
814
815         slot_orig = path.slots[0];
816         leaf_buf = path.nodes[0];
817         leaf = &leaf_buf->leaf;
818
819         nritems = leaf->header.nritems;
820         data_end = leaf_data_end(leaf);
821
822         if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
823                 BUG();
824
825         slot = path.slots[0];
826         BUG_ON(slot < 0);
827         if (slot == 0)
828                 fixup_low_keys(root, &path, key, 1);
829         if (slot != nritems) {
830                 int i;
831                 unsigned int old_data = leaf->items[slot].offset +
832                                         leaf->items[slot].size;
833
834                 /*
835                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
836                  */
837                 /* first correct the data pointers */
838                 for (i = slot; i < nritems; i++)
839                         leaf->items[i].offset -= data_size;
840
841                 /* shift the items */
842                 memmove(leaf->items + slot + 1, leaf->items + slot,
843                         (nritems - slot) * sizeof(struct item));
844
845                 /* shift the data */
846                 memmove(leaf->data + data_end - data_size, leaf->data +
847                         data_end, old_data - data_end);
848                 data_end = old_data;
849         }
850         /* copy the new data in */
851         memcpy(&leaf->items[slot].key, key, sizeof(struct key));
852         leaf->items[slot].offset = data_end - data_size;
853         leaf->items[slot].size = data_size;
854         memcpy(leaf->data + data_end - data_size, data, data_size);
855         leaf->header.nritems += 1;
856         write_tree_block(root, leaf_buf);
857         if (leaf_free_space(leaf) < 0)
858                 BUG();
859         release_path(root, &path);
860         return 0;
861 }
862
863 /*
864  * delete the pointer from a given node.
865  *
866  * If the delete empties a node, the node is removed from the tree,
867  * continuing all the way the root if required.  The root is converted into
868  * a leaf if all the nodes are emptied.
869  */
870 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
871 {
872         int slot;
873         struct tree_buffer *t;
874         struct node *node;
875         int nritems;
876         u64 blocknr;
877
878         while(1) {
879                 t = path->nodes[level];
880                 if (!t)
881                         break;
882                 node = &t->node;
883                 slot = path->slots[level];
884                 nritems = node->header.nritems;
885
886                 if (slot != nritems -1) {
887                         memmove(node->keys + slot, node->keys + slot + 1,
888                                 sizeof(struct key) * (nritems - slot - 1));
889                         memmove(node->blockptrs + slot,
890                                 node->blockptrs + slot + 1,
891                                 sizeof(u64) * (nritems - slot - 1));
892                 }
893                 node->header.nritems--;
894                 write_tree_block(root, t);
895                 blocknr = t->blocknr;
896                 if (node->header.nritems != 0) {
897                         if (slot == 0)
898                                 fixup_low_keys(root, path, node->keys,
899                                                level + 1);
900                         break;
901                 }
902                 if (t == root->node) {
903                         /* just turn the root into a leaf and break */
904                         root->node->node.header.flags = node_level(0);
905                         write_tree_block(root, t);
906                         break;
907                 }
908                 level++;
909                 free_extent(root, blocknr, 1);
910                 if (!path->nodes[level])
911                         BUG();
912         }
913         return 0;
914 }
915
916 /*
917  * delete the item at the leaf level in path.  If that empties
918  * the leaf, remove it from the tree
919  */
920 int del_item(struct ctree_root *root, struct ctree_path *path)
921 {
922         int slot;
923         struct leaf *leaf;
924         struct tree_buffer *leaf_buf;
925         int doff;
926         int dsize;
927
928         leaf_buf = path->nodes[0];
929         leaf = &leaf_buf->leaf;
930         slot = path->slots[0];
931         doff = leaf->items[slot].offset;
932         dsize = leaf->items[slot].size;
933
934         if (slot != leaf->header.nritems - 1) {
935                 int i;
936                 int data_end = leaf_data_end(leaf);
937                 memmove(leaf->data + data_end + dsize,
938                         leaf->data + data_end,
939                         doff - data_end);
940                 for (i = slot + 1; i < leaf->header.nritems; i++)
941                         leaf->items[i].offset += dsize;
942                 memmove(leaf->items + slot, leaf->items + slot + 1,
943                         sizeof(struct item) *
944                         (leaf->header.nritems - slot - 1));
945         }
946         leaf->header.nritems -= 1;
947         /* delete the leaf if we've emptied it */
948         if (leaf->header.nritems == 0) {
949                 if (leaf_buf == root->node) {
950                         leaf->header.flags = node_level(0);
951                         write_tree_block(root, leaf_buf);
952                 } else {
953                         del_ptr(root, path, 1);
954                         free_extent(root, leaf_buf->blocknr, 1);
955                 }
956         } else {
957                 int used = leaf_space_used(leaf, 0, leaf->header.nritems);
958                 if (slot == 0)
959                         fixup_low_keys(root, path, &leaf->items[0].key, 1);
960                 write_tree_block(root, leaf_buf);
961                 /* delete the leaf if it is mostly empty */
962                 if (used < LEAF_DATA_SIZE / 3) {
963                         /* push_leaf_left fixes the path.
964                          * make sure the path still points to our leaf
965                          * for possible call to del_ptr below
966                          */
967                         slot = path->slots[1];
968                         leaf_buf->count++;
969                         push_leaf_left(root, path, 1);
970                         if (leaf->header.nritems)
971                                 push_leaf_right(root, path, 1);
972                         if (leaf->header.nritems == 0) {
973                                 u64 blocknr = leaf_buf->blocknr;
974                                 path->slots[1] = slot;
975                                 del_ptr(root, path, 1);
976                                 tree_block_release(root, leaf_buf);
977                                 free_extent(root, blocknr, 1);
978                         } else {
979                                 tree_block_release(root, leaf_buf);
980                         }
981                 }
982         }
983         return 0;
984 }
985
986 int next_leaf(struct ctree_root *root, struct ctree_path *path)
987 {
988         int slot;
989         int level = 1;
990         u64 blocknr;
991         struct tree_buffer *c;
992         struct tree_buffer *next = NULL;
993
994         while(level < MAX_LEVEL) {
995                 if (!path->nodes[level])
996                         return -1;
997                 slot = path->slots[level] + 1;
998                 c = path->nodes[level];
999                 if (slot >= c->node.header.nritems) {
1000                         level++;
1001                         continue;
1002                 }
1003                 blocknr = c->node.blockptrs[slot];
1004                 if (next)
1005                         tree_block_release(root, next);
1006                 next = read_tree_block(root, blocknr);
1007                 break;
1008         }
1009         path->slots[level] = slot;
1010         while(1) {
1011                 level--;
1012                 c = path->nodes[level];
1013                 tree_block_release(root, c);
1014                 path->nodes[level] = next;
1015                 path->slots[level] = 0;
1016                 if (!level)
1017                         break;
1018                 next = read_tree_block(root, next->node.blockptrs[0]);
1019         }
1020         return 0;
1021 }
1022
1023 /* for testing only */
1024 int next_key(int i, int max_key) {
1025         return rand() % max_key;
1026         //return i;
1027 }
1028
1029 int main() {
1030         struct ctree_root *root;
1031         struct key ins;
1032         struct key last = { (u64)-1, 0, 0};
1033         char *buf;
1034         int i;
1035         int num;
1036         int ret;
1037         int run_size = 20000000;
1038         int max_key =  100000000;
1039         int tree_size = 0;
1040         struct ctree_path path;
1041         struct ctree_super_block super;
1042
1043         radix_tree_init();
1044
1045
1046         root = open_ctree("dbfile", &super);
1047
1048         srand(55);
1049         for (i = 0; i < run_size; i++) {
1050                 buf = malloc(64);
1051                 num = next_key(i, max_key);
1052                 // num = i;
1053                 sprintf(buf, "string-%d", num);
1054                 if (i % 10000 == 0)
1055                         fprintf(stderr, "insert %d:%d\n", num, i);
1056                 ins.objectid = num;
1057                 ins.offset = 0;
1058                 ins.flags = 0;
1059                 ret = insert_item(root, &ins, buf, strlen(buf));
1060                 if (!ret)
1061                         tree_size++;
1062                 free(buf);
1063         }
1064         write_ctree_super(root, &super);
1065         close_ctree(root);
1066
1067         root = open_ctree("dbfile", &super);
1068         printf("starting search\n");
1069         srand(55);
1070         for (i = 0; i < run_size; i++) {
1071                 num = next_key(i, max_key);
1072                 ins.objectid = num;
1073                 init_path(&path);
1074                 if (i % 10000 == 0)
1075                         fprintf(stderr, "search %d:%d\n", num, i);
1076                 ret = search_slot(root, &ins, &path, 0);
1077                 if (ret) {
1078                         print_tree(root, root->node);
1079                         printf("unable to find %d\n", num);
1080                         exit(1);
1081                 }
1082                 release_path(root, &path);
1083         }
1084         write_ctree_super(root, &super);
1085         close_ctree(root);
1086         root = open_ctree("dbfile", &super);
1087         printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
1088                 node_level(root->node->node.header.flags),
1089                 root->node->node.header.nritems,
1090                 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
1091         printf("all searches good, deleting some items\n");
1092         i = 0;
1093         srand(55);
1094         for (i = 0 ; i < run_size/4; i++) {
1095                 num = next_key(i, max_key);
1096                 ins.objectid = num;
1097                 init_path(&path);
1098                 ret = search_slot(root, &ins, &path, -1);
1099                 if (!ret) {
1100                         if (i % 10000 == 0)
1101                                 fprintf(stderr, "del %d:%d\n", num, i);
1102                         ret = del_item(root, &path);
1103                         if (ret != 0)
1104                                 BUG();
1105                         tree_size--;
1106                 }
1107                 release_path(root, &path);
1108         }
1109         write_ctree_super(root, &super);
1110         close_ctree(root);
1111         root = open_ctree("dbfile", &super);
1112         srand(128);
1113         for (i = 0; i < run_size; i++) {
1114                 buf = malloc(64);
1115                 num = next_key(i, max_key);
1116                 sprintf(buf, "string-%d", num);
1117                 ins.objectid = num;
1118                 if (i % 10000 == 0)
1119                         fprintf(stderr, "insert %d:%d\n", num, i);
1120                 ret = insert_item(root, &ins, buf, strlen(buf));
1121                 if (!ret)
1122                         tree_size++;
1123                 free(buf);
1124         }
1125         write_ctree_super(root, &super);
1126         close_ctree(root);
1127         root = open_ctree("dbfile", &super);
1128         srand(128);
1129         printf("starting search2\n");
1130         for (i = 0; i < run_size; i++) {
1131                 num = next_key(i, max_key);
1132                 ins.objectid = num;
1133                 init_path(&path);
1134                 if (i % 10000 == 0)
1135                         fprintf(stderr, "search %d:%d\n", num, i);
1136                 ret = search_slot(root, &ins, &path, 0);
1137                 if (ret) {
1138                         print_tree(root, root->node);
1139                         printf("unable to find %d\n", num);
1140                         exit(1);
1141                 }
1142                 release_path(root, &path);
1143         }
1144         printf("starting big long delete run\n");
1145         while(root->node && root->node->node.header.nritems > 0) {
1146                 struct leaf *leaf;
1147                 int slot;
1148                 ins.objectid = (u64)-1;
1149                 init_path(&path);
1150                 ret = search_slot(root, &ins, &path, -1);
1151                 if (ret == 0)
1152                         BUG();
1153
1154                 leaf = &path.nodes[0]->leaf;
1155                 slot = path.slots[0];
1156                 if (slot != leaf->header.nritems)
1157                         BUG();
1158                 while(path.slots[0] > 0) {
1159                         path.slots[0] -= 1;
1160                         slot = path.slots[0];
1161                         leaf = &path.nodes[0]->leaf;
1162
1163                         if (comp_keys(&last, &leaf->items[slot].key) <= 0)
1164                                 BUG();
1165                         memcpy(&last, &leaf->items[slot].key, sizeof(last));
1166                         if (tree_size % 10000 == 0)
1167                                 printf("big del %d:%d\n", tree_size, i);
1168                         ret = del_item(root, &path);
1169                         if (ret != 0) {
1170                                 printf("del_item returned %d\n", ret);
1171                                 BUG();
1172                         }
1173                         tree_size--;
1174                 }
1175                 release_path(root, &path);
1176         }
1177         printf("tree size is now %d\n", tree_size);
1178         printf("map tree\n");
1179         print_tree(root->extent_root, root->extent_root->node);
1180         write_ctree_super(root, &super);
1181         close_ctree(root);
1182         return 0;
1183 }