switch to early splits
[platform/upstream/btrfs-progs.git] / ctree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7
8 #define SEARCH_READ 0
9 #define SEARCH_WRITE 1
10
11 static int refill_alloc_extent(struct ctree_root *root);
12 int split_node(struct ctree_root *root, struct ctree_path *path, int level);
13 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
14
15 static inline void init_path(struct ctree_path *p)
16 {
17         memset(p, 0, sizeof(*p));
18 }
19
20 static void release_path(struct ctree_root *root, struct ctree_path *p)
21 {
22         int i;
23         for (i = 0; i < MAX_LEVEL; i++) {
24                 if (!p->nodes[i])
25                         break;
26                 tree_block_release(root, p->nodes[i]);
27         }
28 }
29
30 /*
31  * The leaf data grows from end-to-front in the node.
32  * this returns the address of the start of the last item,
33  * which is the stop of the leaf data stack
34  */
35 static inline unsigned int leaf_data_end(struct leaf *leaf)
36 {
37         unsigned int nr = leaf->header.nritems;
38         if (nr == 0)
39                 return sizeof(leaf->data);
40         return leaf->items[nr-1].offset;
41 }
42
43 /*
44  * The space between the end of the leaf items and
45  * the start of the leaf data.  IOW, how much room
46  * the leaf has left for both items and data
47  */
48 static inline int leaf_free_space(struct leaf *leaf)
49 {
50         int data_end = leaf_data_end(leaf);
51         int nritems = leaf->header.nritems;
52         char *items_end = (char *)(leaf->items + nritems + 1);
53         return (char *)(leaf->data + data_end) - (char *)items_end;
54 }
55
56 /*
57  * compare two keys in a memcmp fashion
58  */
59 int comp_keys(struct key *k1, struct key *k2)
60 {
61         if (k1->objectid > k2->objectid)
62                 return 1;
63         if (k1->objectid < k2->objectid)
64                 return -1;
65         if (k1->flags > k2->flags)
66                 return 1;
67         if (k1->flags < k2->flags)
68                 return -1;
69         if (k1->offset > k2->offset)
70                 return 1;
71         if (k1->offset < k2->offset)
72                 return -1;
73         return 0;
74 }
75
76 /*
77  * search for key in the array p.  items p are item_size apart
78  * and there are 'max' items in p
79  * the slot in the array is returned via slot, and it points to
80  * the place where you would insert key if it is not found in
81  * the array.
82  *
83  * slot may point to max if the key is bigger than all of the keys
84  */
85 int generic_bin_search(char *p, int item_size, struct key *key,
86                        int max, int *slot)
87 {
88         int low = 0;
89         int high = max;
90         int mid;
91         int ret;
92         struct key *tmp;
93
94         while(low < high) {
95                 mid = (low + high) / 2;
96                 tmp = (struct key *)(p + mid * item_size);
97                 ret = comp_keys(tmp, key);
98
99                 if (ret < 0)
100                         low = mid + 1;
101                 else if (ret > 0)
102                         high = mid;
103                 else {
104                         *slot = mid;
105                         return 0;
106                 }
107         }
108         *slot = low;
109         return 1;
110 }
111
112 int bin_search(struct node *c, struct key *key, int *slot)
113 {
114         if (is_leaf(c->header.flags)) {
115                 struct leaf *l = (struct leaf *)c;
116                 return generic_bin_search((void *)l->items, sizeof(struct item),
117                                           key, c->header.nritems, slot);
118         } else {
119                 return generic_bin_search((void *)c->keys, sizeof(struct key),
120                                           key, c->header.nritems, slot);
121         }
122         return -1;
123 }
124
125 /*
126  * look for key in the tree.  path is filled in with nodes along the way
127  * if key is found, we return zero and you can find the item in the leaf
128  * level of the path (level 0)
129  *
130  * If the key isn't found, the path points to the slot where it should
131  * be inserted.
132  */
133 int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len)
134 {
135         struct tree_buffer *b = root->node;
136         struct node *c;
137         int slot;
138         int ret;
139         int level;
140
141         b->count++;
142         while (b) {
143                 c = &b->node;
144                 level = node_level(c->header.flags);
145                 p->nodes[level] = b;
146                 ret = bin_search(c, key, &slot);
147                 if (!is_leaf(c->header.flags)) {
148                         if (ret && slot > 0)
149                                 slot -= 1;
150                         p->slots[level] = slot;
151                         if (ins_len && c->header.nritems == NODEPTRS_PER_BLOCK) {
152                                 int sret = split_node(root, p, level);
153                                 BUG_ON(sret > 0);
154                                 if (sret)
155                                         return sret;
156                                 b = p->nodes[level];
157                                 c = &b->node;
158                                 slot = p->slots[level];
159                         }
160                         b = read_tree_block(root, c->blockptrs[slot]);
161                         continue;
162                 } else {
163                         struct leaf *l = (struct leaf *)c;
164                         p->slots[level] = slot;
165                         if (ins_len && leaf_free_space(l) <  sizeof(struct item) + ins_len) {
166                                 int sret = split_leaf(root, p, ins_len);
167                                 BUG_ON(sret > 0);
168                                 if (sret)
169                                         return sret;
170                         }
171                         return ret;
172                 }
173         }
174         return -1;
175 }
176
177 /*
178  * adjust the pointers going up the tree, starting at level
179  * making sure the right key of each node is points to 'key'.
180  * This is used after shifting pointers to the left, so it stops
181  * fixing up pointers when a given leaf/node is not in slot 0 of the
182  * higher levels
183  */
184 static void fixup_low_keys(struct ctree_root *root,
185                            struct ctree_path *path, struct key *key,
186                            int level)
187 {
188         int i;
189         for (i = level; i < MAX_LEVEL; i++) {
190                 struct node *t;
191                 int tslot = path->slots[i];
192                 if (!path->nodes[i])
193                         break;
194                 t = &path->nodes[i]->node;
195                 memcpy(t->keys + tslot, key, sizeof(*key));
196                 write_tree_block(root, path->nodes[i]);
197                 if (tslot != 0)
198                         break;
199         }
200 }
201
202 /*
203  * try to push data from one node into the next node left in the
204  * tree.  The src node is found at specified level in the path.
205  * If some bytes were pushed, return 0, otherwise return 1.
206  *
207  * Lower nodes/leaves in the path are not touched, higher nodes may
208  * be modified to reflect the push.
209  *
210  * The path is altered to reflect the push.
211  */
212 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
213 {
214         int slot;
215         struct node *left;
216         struct node *right;
217         int push_items = 0;
218         int left_nritems;
219         int right_nritems;
220         struct tree_buffer *t;
221         struct tree_buffer *right_buf;
222
223         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
224                 return 1;
225         slot = path->slots[level + 1];
226         if (slot == 0)
227                 return 1;
228
229         t = read_tree_block(root,
230                             path->nodes[level + 1]->node.blockptrs[slot - 1]);
231         left = &t->node;
232         right_buf = path->nodes[level];
233         right = &right_buf->node;
234         left_nritems = left->header.nritems;
235         right_nritems = right->header.nritems;
236         push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
237         if (push_items <= 0) {
238                 tree_block_release(root, t);
239                 return 1;
240         }
241
242         if (right_nritems < push_items)
243                 push_items = right_nritems;
244         memcpy(left->keys + left_nritems, right->keys,
245                 push_items * sizeof(struct key));
246         memcpy(left->blockptrs + left_nritems, right->blockptrs,
247                 push_items * sizeof(u64));
248         memmove(right->keys, right->keys + push_items,
249                 (right_nritems - push_items) * sizeof(struct key));
250         memmove(right->blockptrs, right->blockptrs + push_items,
251                 (right_nritems - push_items) * sizeof(u64));
252         right->header.nritems -= push_items;
253         left->header.nritems += push_items;
254
255         /* adjust the pointers going up the tree */
256         fixup_low_keys(root, path, right->keys, level + 1);
257
258         write_tree_block(root, t);
259         write_tree_block(root, right_buf);
260
261         /* then fixup the leaf pointer in the path */
262         if (path->slots[level] < push_items) {
263                 path->slots[level] += left_nritems;
264                 tree_block_release(root, path->nodes[level]);
265                 path->nodes[level] = t;
266                 path->slots[level + 1] -= 1;
267         } else {
268                 path->slots[level] -= push_items;
269                 tree_block_release(root, t);
270         }
271         return 0;
272 }
273
274 /*
275  * try to push data from one node into the next node right in the
276  * tree.  The src node is found at specified level in the path.
277  * If some bytes were pushed, return 0, otherwise return 1.
278  *
279  * Lower nodes/leaves in the path are not touched, higher nodes may
280  * be modified to reflect the push.
281  *
282  * The path is altered to reflect the push.
283  */
284 int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
285 {
286         int slot;
287         struct tree_buffer *t;
288         struct tree_buffer *src_buffer;
289         struct node *dst;
290         struct node *src;
291         int push_items = 0;
292         int dst_nritems;
293         int src_nritems;
294
295         /* can't push from the root */
296         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
297                 return 1;
298
299         /* only try to push inside the node higher up */
300         slot = path->slots[level + 1];
301         if (slot == NODEPTRS_PER_BLOCK - 1)
302                 return 1;
303
304         if (slot >= path->nodes[level + 1]->node.header.nritems -1)
305                 return 1;
306
307         t = read_tree_block(root,
308                             path->nodes[level + 1]->node.blockptrs[slot + 1]);
309         dst = &t->node;
310         src_buffer = path->nodes[level];
311         src = &src_buffer->node;
312         dst_nritems = dst->header.nritems;
313         src_nritems = src->header.nritems;
314         push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
315         if (push_items <= 0) {
316                 tree_block_release(root, t);
317                 return 1;
318         }
319
320         if (src_nritems < push_items)
321                 push_items = src_nritems;
322         memmove(dst->keys + push_items, dst->keys,
323                 dst_nritems * sizeof(struct key));
324         memcpy(dst->keys, src->keys + src_nritems - push_items,
325                 push_items * sizeof(struct key));
326
327         memmove(dst->blockptrs + push_items, dst->blockptrs,
328                 dst_nritems * sizeof(u64));
329         memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
330                 push_items * sizeof(u64));
331
332         src->header.nritems -= push_items;
333         dst->header.nritems += push_items;
334
335         /* adjust the pointers going up the tree */
336         memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
337                 dst->keys, sizeof(struct key));
338
339         write_tree_block(root, path->nodes[level + 1]);
340         write_tree_block(root, t);
341         write_tree_block(root, src_buffer);
342
343         /* then fixup the pointers in the path */
344         if (path->slots[level] >= src->header.nritems) {
345                 path->slots[level] -= src->header.nritems;
346                 tree_block_release(root, path->nodes[level]);
347                 path->nodes[level] = t;
348                 path->slots[level + 1] += 1;
349         } else {
350                 tree_block_release(root, t);
351         }
352         return 0;
353 }
354
355 static int insert_new_root(struct ctree_root *root, struct ctree_path *path, int level)
356 {
357         struct tree_buffer *t;
358         struct node *lower;
359         struct node *c;
360         struct key *lower_key;
361
362         BUG_ON(path->nodes[level]);
363         BUG_ON(path->nodes[level-1] != root->node);
364
365         t = alloc_free_block(root);
366         c = &t->node;
367         memset(c, 0, sizeof(c));
368         c->header.nritems = 1;
369         c->header.flags = node_level(level);
370         c->header.blocknr = t->blocknr;
371         c->header.parentid = root->node->node.header.parentid;
372         lower = &path->nodes[level-1]->node;
373         if (is_leaf(lower->header.flags))
374                 lower_key = &((struct leaf *)lower)->items[0].key;
375         else
376                 lower_key = lower->keys;
377         memcpy(c->keys, lower_key, sizeof(struct key));
378         c->blockptrs[0] = path->nodes[level-1]->blocknr;
379         /* the super has an extra ref to root->node */
380         tree_block_release(root, root->node);
381         root->node = t;
382         t->count++;
383         write_tree_block(root, t);
384         path->nodes[level] = t;
385         path->slots[level] = 0;
386         return 0;
387 }
388
389 /*
390  * worker function to insert a single pointer in a node.
391  * the node should have enough room for the pointer already
392  * slot and level indicate where you want the key to go, and
393  * blocknr is the block the key points to.
394  */
395 int insert_ptr(struct ctree_root *root,
396                 struct ctree_path *path, struct key *key,
397                 u64 blocknr, int slot, int level)
398 {
399         struct node *lower;
400         int nritems;
401
402         BUG_ON(!path->nodes[level]);
403         lower = &path->nodes[level]->node;
404         nritems = lower->header.nritems;
405         if (slot > nritems)
406                 BUG();
407         if (nritems == NODEPTRS_PER_BLOCK)
408                 BUG();
409         if (slot != nritems) {
410                 memmove(lower->keys + slot + 1, lower->keys + slot,
411                         (nritems - slot) * sizeof(struct key));
412                 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
413                         (nritems - slot) * sizeof(u64));
414         }
415         memcpy(lower->keys + slot, key, sizeof(struct key));
416         lower->blockptrs[slot] = blocknr;
417         lower->header.nritems++;
418         if (lower->keys[1].objectid == 0)
419                         BUG();
420         write_tree_block(root, path->nodes[level]);
421         return 0;
422 }
423
424 int split_node(struct ctree_root *root, struct ctree_path *path, int level)
425 {
426         struct tree_buffer *t;
427         struct node *c;
428         struct tree_buffer *split_buffer;
429         struct node *split;
430         int mid;
431         int ret;
432
433         ret = push_node_left(root, path, level);
434         if (!ret)
435                 return 0;
436         ret = push_node_right(root, path, level);
437         if (!ret)
438                 return 0;
439         t = path->nodes[level];
440         c = &t->node;
441         if (t == root->node) {
442                 /* trying to split the root, lets make a new one */
443                 ret = insert_new_root(root, path, level + 1);
444                 if (ret)
445                         return ret;
446         }
447         split_buffer = alloc_free_block(root);
448         split = &split_buffer->node;
449         split->header.flags = c->header.flags;
450         split->header.blocknr = split_buffer->blocknr;
451         split->header.parentid = root->node->node.header.parentid;
452         mid = (c->header.nritems + 1) / 2;
453         memcpy(split->keys, c->keys + mid,
454                 (c->header.nritems - mid) * sizeof(struct key));
455         memcpy(split->blockptrs, c->blockptrs + mid,
456                 (c->header.nritems - mid) * sizeof(u64));
457         split->header.nritems = c->header.nritems - mid;
458         c->header.nritems = mid;
459         write_tree_block(root, t);
460         write_tree_block(root, split_buffer);
461         insert_ptr(root, path, split->keys, split_buffer->blocknr,
462                      path->slots[level + 1] + 1, level + 1);
463         if (path->slots[level] > mid) {
464                 path->slots[level] -= mid;
465                 tree_block_release(root, t);
466                 path->nodes[level] = split_buffer;
467                 path->slots[level + 1] += 1;
468         } else {
469                 tree_block_release(root, split_buffer);
470         }
471         return 0;
472 }
473
474 /*
475  * how many bytes are required to store the items in a leaf.  start
476  * and nr indicate which items in the leaf to check.  This totals up the
477  * space used both by the item structs and the item data
478  */
479 int leaf_space_used(struct leaf *l, int start, int nr)
480 {
481         int data_len;
482         int end = start + nr - 1;
483
484         if (!nr)
485                 return 0;
486         data_len = l->items[start].offset + l->items[start].size;
487         data_len = data_len - l->items[end].offset;
488         data_len += sizeof(struct item) * nr;
489         return data_len;
490 }
491
492 /*
493  * push some data in the path leaf to the left, trying to free up at
494  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
495  */
496 int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
497                    int data_size)
498 {
499         struct tree_buffer *right_buf = path->nodes[0];
500         struct leaf *right = &right_buf->leaf;
501         struct tree_buffer *t;
502         struct leaf *left;
503         int slot;
504         int i;
505         int free_space;
506         int push_space = 0;
507         int push_items = 0;
508         struct item *item;
509         int old_left_nritems;
510
511         slot = path->slots[1];
512         if (slot == 0) {
513                 return 1;
514         }
515         if (!path->nodes[1]) {
516                 return 1;
517         }
518         t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
519         left = &t->leaf;
520         free_space = leaf_free_space(left);
521         if (free_space < data_size + sizeof(struct item)) {
522                 tree_block_release(root, t);
523                 return 1;
524         }
525         for (i = 0; i < right->header.nritems; i++) {
526                 item = right->items + i;
527                 if (path->slots[0] == i)
528                         push_space += data_size + sizeof(*item);
529                 if (item->size + sizeof(*item) + push_space > free_space)
530                         break;
531                 push_items++;
532                 push_space += item->size + sizeof(*item);
533         }
534         if (push_items == 0) {
535                 tree_block_release(root, t);
536                 return 1;
537         }
538         /* push data from right to left */
539         memcpy(left->items + left->header.nritems,
540                 right->items, push_items * sizeof(struct item));
541         push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
542         memcpy(left->data + leaf_data_end(left) - push_space,
543                 right->data + right->items[push_items - 1].offset,
544                 push_space);
545         old_left_nritems = left->header.nritems;
546         BUG_ON(old_left_nritems < 0);
547
548         for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
549                 left->items[i].offset -= LEAF_DATA_SIZE -
550                         left->items[old_left_nritems -1].offset;
551         }
552         left->header.nritems += push_items;
553
554         /* fixup right node */
555         push_space = right->items[push_items-1].offset - leaf_data_end(right);
556         memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
557                 leaf_data_end(right), push_space);
558         memmove(right->items, right->items + push_items,
559                 (right->header.nritems - push_items) * sizeof(struct item));
560         right->header.nritems -= push_items;
561         push_space = LEAF_DATA_SIZE;
562
563         for (i = 0; i < right->header.nritems; i++) {
564                 right->items[i].offset = push_space - right->items[i].size;
565                 push_space = right->items[i].offset;
566         }
567
568         write_tree_block(root, t);
569         write_tree_block(root, right_buf);
570
571         fixup_low_keys(root, path, &right->items[0].key, 1);
572
573         /* then fixup the leaf pointer in the path */
574         if (path->slots[0] < push_items) {
575                 path->slots[0] += old_left_nritems;
576                 tree_block_release(root, path->nodes[0]);
577                 path->nodes[0] = t;
578                 path->slots[1] -= 1;
579         } else {
580                 tree_block_release(root, t);
581                 path->slots[0] -= push_items;
582         }
583         BUG_ON(path->slots[0] < 0);
584         return 0;
585 }
586
587 /*
588  * split the path's leaf in two, making sure there is at least data_size
589  * available for the resulting leaf level of the path.
590  */
591 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
592 {
593         struct tree_buffer *l_buf = path->nodes[0];
594         struct leaf *l = &l_buf->leaf;
595         int nritems;
596         int mid;
597         int slot;
598         struct leaf *right;
599         struct tree_buffer *right_buffer;
600         int space_needed = data_size + sizeof(struct item);
601         int data_copy_size;
602         int rt_data_off;
603         int i;
604         int ret;
605
606         if (push_leaf_left(root, path, data_size) == 0) {
607                 l_buf = path->nodes[0];
608                 l = &l_buf->leaf;
609                 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
610                         return 0;
611         }
612         if (!path->nodes[1]) {
613                 ret = insert_new_root(root, path, 1);
614                 if (ret)
615                         return ret;
616         }
617         slot = path->slots[0];
618         nritems = l->header.nritems;
619         mid = (nritems + 1)/ 2;
620
621         right_buffer = alloc_free_block(root);
622         BUG_ON(!right_buffer);
623         BUG_ON(mid == nritems);
624         right = &right_buffer->leaf;
625         memset(right, 0, sizeof(*right));
626         if (mid <= slot) {
627                 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
628                         LEAF_DATA_SIZE)
629                         BUG();
630         } else {
631                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
632                         LEAF_DATA_SIZE)
633                         BUG();
634         }
635         right->header.nritems = nritems - mid;
636         right->header.blocknr = right_buffer->blocknr;
637         right->header.flags = node_level(0);
638         right->header.parentid = root->node->node.header.parentid;
639         data_copy_size = l->items[mid].offset + l->items[mid].size -
640                          leaf_data_end(l);
641         memcpy(right->items, l->items + mid,
642                (nritems - mid) * sizeof(struct item));
643         memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
644                l->data + leaf_data_end(l), data_copy_size);
645         rt_data_off = LEAF_DATA_SIZE -
646                      (l->items[mid].offset + l->items[mid].size);
647
648         for (i = 0; i < right->header.nritems; i++)
649                 right->items[i].offset += rt_data_off;
650
651         l->header.nritems = mid;
652         ret = insert_ptr(root, path, &right->items[0].key,
653                           right_buffer->blocknr, path->slots[1] + 1, 1);
654         write_tree_block(root, right_buffer);
655         write_tree_block(root, l_buf);
656
657         BUG_ON(path->slots[0] != slot);
658         if (mid <= slot) {
659                 tree_block_release(root, path->nodes[0]);
660                 path->nodes[0] = right_buffer;
661                 path->slots[0] -= mid;
662                 path->slots[1] += 1;
663         } else
664                 tree_block_release(root, right_buffer);
665         BUG_ON(path->slots[0] < 0);
666         return ret;
667 }
668
669 /*
670  * Given a key and some data, insert an item into the tree.
671  * This does all the path init required, making room in the tree if needed.
672  */
673 int insert_item(struct ctree_root *root, struct key *key,
674                           void *data, int data_size)
675 {
676         int ret;
677         int slot;
678         int slot_orig;
679         struct leaf *leaf;
680         struct tree_buffer *leaf_buf;
681         unsigned int nritems;
682         unsigned int data_end;
683         struct ctree_path path;
684
685         refill_alloc_extent(root);
686
687         /* create a root if there isn't one */
688         if (!root->node)
689                 BUG();
690         init_path(&path);
691         ret = search_slot(root, key, &path, data_size);
692         if (ret == 0) {
693                 release_path(root, &path);
694                 return -EEXIST;
695         }
696
697         slot_orig = path.slots[0];
698         leaf_buf = path.nodes[0];
699         leaf = &leaf_buf->leaf;
700
701         nritems = leaf->header.nritems;
702         data_end = leaf_data_end(leaf);
703
704         if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
705                 BUG();
706
707         slot = path.slots[0];
708         BUG_ON(slot < 0);
709         if (slot == 0)
710                 fixup_low_keys(root, &path, key, 1);
711         if (slot != nritems) {
712                 int i;
713                 unsigned int old_data = leaf->items[slot].offset +
714                                         leaf->items[slot].size;
715
716                 /*
717                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
718                  */
719                 /* first correct the data pointers */
720                 for (i = slot; i < nritems; i++)
721                         leaf->items[i].offset -= data_size;
722
723                 /* shift the items */
724                 memmove(leaf->items + slot + 1, leaf->items + slot,
725                         (nritems - slot) * sizeof(struct item));
726
727                 /* shift the data */
728                 memmove(leaf->data + data_end - data_size, leaf->data +
729                         data_end, old_data - data_end);
730                 data_end = old_data;
731         }
732         /* copy the new data in */
733         memcpy(&leaf->items[slot].key, key, sizeof(struct key));
734         leaf->items[slot].offset = data_end - data_size;
735         leaf->items[slot].size = data_size;
736         memcpy(leaf->data + data_end - data_size, data, data_size);
737         leaf->header.nritems += 1;
738         write_tree_block(root, leaf_buf);
739         if (leaf_free_space(leaf) < 0)
740                 BUG();
741         release_path(root, &path);
742         return 0;
743 }
744
745 /*
746  * delete the pointer from a given level in the path.  The path is not
747  * fixed up, so after calling this it is not valid at that level.
748  *
749  * If the delete empties a node, the node is removed from the tree,
750  * continuing all the way the root if required.  The root is converted into
751  * a leaf if all the nodes are emptied.
752  */
753 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
754 {
755         int slot;
756         struct tree_buffer *t;
757         struct node *node;
758         int nritems;
759
760         while(1) {
761                 t = path->nodes[level];
762                 if (!t)
763                         break;
764                 node = &t->node;
765                 slot = path->slots[level];
766                 nritems = node->header.nritems;
767
768                 if (slot != nritems -1) {
769                         memmove(node->keys + slot, node->keys + slot + 1,
770                                 sizeof(struct key) * (nritems - slot - 1));
771                         memmove(node->blockptrs + slot,
772                                 node->blockptrs + slot + 1,
773                                 sizeof(u64) * (nritems - slot - 1));
774                 }
775                 node->header.nritems--;
776                 write_tree_block(root, t);
777                 if (node->header.nritems != 0) {
778                         int tslot;
779                         if (slot == 0)
780                                 fixup_low_keys(root, path, node->keys,
781                                                level + 1);
782                         tslot = path->slots[level+1];
783                         t->count++;
784                         push_node_left(root, path, level);
785                         if (node->header.nritems) {
786                                 push_node_right(root, path, level);
787                         }
788                         if (node->header.nritems) {
789                                 tree_block_release(root, t);
790                                 break;
791                         }
792                         tree_block_release(root, t);
793                         path->slots[level+1] = tslot;
794                 }
795                 if (t == root->node) {
796                         /* just turn the root into a leaf and break */
797                         root->node->node.header.flags = node_level(0);
798                         write_tree_block(root, t);
799                         break;
800                 }
801                 level++;
802                 if (!path->nodes[level])
803                         BUG();
804         }
805         return 0;
806 }
807
808 /*
809  * delete the item at the leaf level in path.  If that empties
810  * the leaf, remove it from the tree
811  */
812 int del_item(struct ctree_root *root, struct ctree_path *path)
813 {
814         int slot;
815         struct leaf *leaf;
816         struct tree_buffer *leaf_buf;
817         int doff;
818         int dsize;
819
820         leaf_buf = path->nodes[0];
821         leaf = &leaf_buf->leaf;
822         slot = path->slots[0];
823         doff = leaf->items[slot].offset;
824         dsize = leaf->items[slot].size;
825
826         if (slot != leaf->header.nritems - 1) {
827                 int i;
828                 int data_end = leaf_data_end(leaf);
829                 memmove(leaf->data + data_end + dsize,
830                         leaf->data + data_end,
831                         doff - data_end);
832                 for (i = slot + 1; i < leaf->header.nritems; i++)
833                         leaf->items[i].offset += dsize;
834                 memmove(leaf->items + slot, leaf->items + slot + 1,
835                         sizeof(struct item) *
836                         (leaf->header.nritems - slot - 1));
837         }
838         leaf->header.nritems -= 1;
839         /* delete the leaf if we've emptied it */
840         if (leaf->header.nritems == 0) {
841                 if (leaf_buf == root->node) {
842                         leaf->header.flags = node_level(0);
843                         write_tree_block(root, leaf_buf);
844                 } else
845                         del_ptr(root, path, 1);
846         } else {
847                 if (slot == 0)
848                         fixup_low_keys(root, path, &leaf->items[0].key, 1);
849                 write_tree_block(root, leaf_buf);
850                 /* delete the leaf if it is mostly empty */
851                 if (leaf_space_used(leaf, 0, leaf->header.nritems) <
852                     LEAF_DATA_SIZE / 4) {
853                         /* push_leaf_left fixes the path.
854                          * make sure the path still points to our leaf
855                          * for possible call to del_ptr below
856                          */
857                         slot = path->slots[1];
858                         leaf_buf->count++;
859                         push_leaf_left(root, path, 1);
860                         if (leaf->header.nritems == 0) {
861                                 path->slots[1] = slot;
862                                 del_ptr(root, path, 1);
863                         }
864                         tree_block_release(root, leaf_buf);
865                 }
866         }
867         return 0;
868 }
869
870 int next_leaf(struct ctree_root *root, struct ctree_path *path)
871 {
872         int slot;
873         int level = 1;
874         u64 blocknr;
875         struct tree_buffer *c;
876         struct tree_buffer *next = NULL;
877
878         while(level < MAX_LEVEL) {
879                 if (!path->nodes[level])
880                         return -1;
881                 slot = path->slots[level] + 1;
882                 c = path->nodes[level];
883                 if (slot >= c->node.header.nritems) {
884                         level++;
885                         continue;
886                 }
887                 blocknr = c->node.blockptrs[slot];
888                 if (next)
889                         tree_block_release(root, next);
890                 next = read_tree_block(root, blocknr);
891                 break;
892         }
893         path->slots[level] = slot;
894         while(1) {
895                 level--;
896                 c = path->nodes[level];
897                 tree_block_release(root, c);
898                 path->nodes[level] = next;
899                 path->slots[level] = 0;
900                 if (!level)
901                         break;
902                 next = read_tree_block(root, next->node.blockptrs[0]);
903         }
904         return 0;
905 }
906
907 int alloc_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start,
908                  u64 search_end, u64 owner, struct key *ins)
909 {
910         struct ctree_path path;
911         struct key *key;
912         int ret;
913         u64 hole_size = 0;
914         int slot = 0;
915         u64 last_block;
916         int start_found = 0;
917         struct leaf *l;
918         struct extent_item extent_item;
919         struct ctree_root * root = orig_root->extent_root;
920
921         init_path(&path);
922         ins->objectid = search_start;
923         ins->offset = 0;
924         ins->flags = 0;
925
926         ret = search_slot(root, ins, &path, sizeof(struct extent_item));
927         while (1) {
928                 l = &path.nodes[0]->leaf;
929                 slot = path.slots[0];
930                 if (!l) {
931                         // FIXME allocate root
932                 }
933                 if (slot >= l->header.nritems) {
934                         ret = next_leaf(root, &path);
935                         if (ret == 0)
936                                 continue;
937                         if (!start_found) {
938                                 ins->objectid = search_start;
939                                 ins->offset = num_blocks;
940                                 hole_size = search_end - search_start;
941                                 goto insert;
942                         }
943                         ins->objectid = last_block;
944                         ins->offset = num_blocks;
945                         hole_size = search_end - last_block;
946                         goto insert;
947                 }
948                 key = &l->items[slot].key;
949                 if (start_found) {
950                         hole_size = key->objectid - last_block;
951                         if (hole_size > num_blocks) {
952                                 ins->objectid = last_block;
953                                 ins->offset = num_blocks;
954                                 goto insert;
955                         }
956                 } else
957                         start_found = 1;
958                 last_block = key->objectid + key->offset;
959                 path.slots[0]++;
960         }
961         // FIXME -ENOSPC
962 insert:
963         release_path(root, &path);
964         extent_item.refs = 1;
965         extent_item.owner = owner;
966         if (root == orig_root && root->reserve_extent->num_blocks == 0) {
967                 root->reserve_extent->blocknr = ins->objectid;
968                 root->reserve_extent->num_blocks = ins->offset;
969                 root->reserve_extent->num_used = 0;
970         }
971         ret = insert_item(root->extent_root, ins, &extent_item, sizeof(extent_item));
972         return ret;
973 }
974
975 static int refill_alloc_extent(struct ctree_root *root)
976 {
977         struct alloc_extent *ae = root->alloc_extent;
978         struct key key;
979         int ret;
980         int min_blocks = MAX_LEVEL * 2;
981
982         if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used >
983             min_blocks)
984                 return 0;
985         ae = root->reserve_extent;
986         if (ae->num_blocks > ae->num_used) {
987                 if (root->alloc_extent->num_blocks == 0) {
988                         /* we should swap reserve/alloc_extent when alloc
989                          * fills up
990                          */
991                         BUG();
992                 }
993                 if (ae->num_blocks - ae->num_used < min_blocks)
994                         BUG();
995                 return 0;
996         }
997         ret = alloc_extent(root,
998                            min_blocks * 2, 0, (unsigned long)-1,
999                            root->node->node.header.parentid, &key);
1000         ae->blocknr = key.objectid;
1001         ae->num_blocks = key.offset;
1002         ae->num_used = 0;
1003         return ret;
1004 }
1005
1006 void print_leaf(struct leaf *l)
1007 {
1008         int i;
1009         int nr = l->header.nritems;
1010         struct item *item;
1011         struct extent_item *ei;
1012         printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
1013                leaf_free_space(l));
1014         fflush(stdout);
1015         for (i = 0 ; i < nr ; i++) {
1016                 item = l->items + i;
1017                 printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n",
1018                         i,
1019                         item->key.objectid, item->key.flags, item->key.offset,
1020                         item->offset, item->size);
1021                 fflush(stdout);
1022                 printf("\t\titem data %.*s\n", item->size, l->data+item->offset);
1023                 ei = (struct extent_item *)(l->data + item->offset);
1024                 printf("\t\textent data %u %lu\n", ei->refs, ei->owner);
1025                 fflush(stdout);
1026         }
1027 }
1028 void print_tree(struct ctree_root *root, struct tree_buffer *t)
1029 {
1030         int i;
1031         int nr;
1032         struct node *c;
1033
1034         if (!t)
1035                 return;
1036         c = &t->node;
1037         nr = c->header.nritems;
1038         if (c->header.blocknr != t->blocknr)
1039                 BUG();
1040         if (is_leaf(c->header.flags)) {
1041                 print_leaf((struct leaf *)c);
1042                 return;
1043         }
1044         printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
1045                 node_level(c->header.flags), c->header.nritems,
1046                 NODEPTRS_PER_BLOCK - c->header.nritems);
1047         fflush(stdout);
1048         for (i = 0; i < nr; i++) {
1049                 printf("\tkey %d (%lu %u %lu) block %lu\n",
1050                        i,
1051                        c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
1052                        c->blockptrs[i]);
1053                 fflush(stdout);
1054         }
1055         for (i = 0; i < nr; i++) {
1056                 struct tree_buffer *next_buf = read_tree_block(root,
1057                                                             c->blockptrs[i]);
1058                 struct node *next = &next_buf->node;
1059                 if (is_leaf(next->header.flags) &&
1060                     node_level(c->header.flags) != 1)
1061                         BUG();
1062                 if (node_level(next->header.flags) !=
1063                         node_level(c->header.flags) - 1)
1064                         BUG();
1065                 print_tree(root, next_buf);
1066                 tree_block_release(root, next_buf);
1067         }
1068
1069 }
1070
1071 /* for testing only */
1072 int next_key(int i, int max_key) {
1073         // return rand() % max_key;
1074         return i;
1075 }
1076
1077 int main() {
1078         struct ctree_root *root;
1079         struct key ins;
1080         struct key last = { (u64)-1, 0, 0};
1081         char *buf;
1082         int i;
1083         int num;
1084         int ret;
1085         int run_size = 10000;
1086         int max_key = 100000000;
1087         int tree_size = 0;
1088         struct ctree_path path;
1089         struct ctree_super_block super;
1090
1091         radix_tree_init();
1092
1093
1094         root = open_ctree("dbfile", &super);
1095         printf("root tree\n");
1096         print_tree(root, root->node);
1097         printf("map tree\n");
1098         print_tree(root->extent_root, root->extent_root->node);
1099
1100         srand(55);
1101         for (i = 0; i < run_size; i++) {
1102                 buf = malloc(64);
1103                 num = next_key(i, max_key);
1104                 // num = i;
1105                 sprintf(buf, "string-%d", num);
1106                 // printf("insert %d\n", num);
1107                 ins.objectid = num;
1108                 ins.offset = 0;
1109                 ins.flags = 0;
1110                 ret = insert_item(root, &ins, buf, strlen(buf));
1111                 if (!ret)
1112                         tree_size++;
1113         }
1114         printf("root used: %lu\n", root->alloc_extent->num_used);
1115         printf("root tree\n");
1116         // print_tree(root, root->node);
1117         printf("map tree\n");
1118         printf("map used: %lu\n", root->extent_root->alloc_extent->num_used);
1119         // print_tree(root->extent_root, root->extent_root->node);
1120         write_ctree_super(root, &super);
1121         close_ctree(root);
1122
1123         root = open_ctree("dbfile", &super);
1124         printf("starting search\n");
1125         srand(55);
1126         for (i = 0; i < run_size; i++) {
1127                 num = next_key(i, max_key);
1128                 ins.objectid = num;
1129                 init_path(&path);
1130                 ret = search_slot(root, &ins, &path, 0);
1131                 if (ret) {
1132                         print_tree(root, root->node);
1133                         printf("unable to find %d\n", num);
1134                         exit(1);
1135                 }
1136                 release_path(root, &path);
1137         }
1138         write_ctree_super(root, &super);
1139         close_ctree(root);
1140         root = open_ctree("dbfile", &super);
1141         printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
1142                 node_level(root->node->node.header.flags),
1143                 root->node->node.header.nritems,
1144                 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
1145         printf("all searches good, deleting some items\n");
1146         i = 0;
1147         srand(55);
1148         for (i = 0 ; i < run_size/4; i++) {
1149                 num = next_key(i, max_key);
1150                 ins.objectid = num;
1151                 init_path(&path);
1152                 ret = search_slot(root, &ins, &path, 0);
1153                 if (ret)
1154                         continue;
1155                 ret = del_item(root, &path);
1156                 if (ret != 0)
1157                         BUG();
1158                 release_path(root, &path);
1159                 tree_size--;
1160         }
1161         srand(128);
1162         for (i = 0; i < run_size; i++) {
1163                 buf = malloc(64);
1164                 num = next_key(i, max_key);
1165                 sprintf(buf, "string-%d", num);
1166                 ins.objectid = num;
1167                 ret = insert_item(root, &ins, buf, strlen(buf));
1168                 if (!ret)
1169                         tree_size++;
1170         }
1171         write_ctree_super(root, &super);
1172         close_ctree(root);
1173         root = open_ctree("dbfile", &super);
1174         printf("starting search2\n");
1175         srand(128);
1176         for (i = 0; i < run_size; i++) {
1177                 num = next_key(i, max_key);
1178                 ins.objectid = num;
1179                 init_path(&path);
1180                 ret = search_slot(root, &ins, &path, 0);
1181                 if (ret) {
1182                         print_tree(root, root->node);
1183                         printf("unable to find %d\n", num);
1184                         exit(1);
1185                 }
1186                 release_path(root, &path);
1187         }
1188         printf("starting big long delete run\n");
1189         while(root->node && root->node->node.header.nritems > 0) {
1190                 struct leaf *leaf;
1191                 int slot;
1192                 ins.objectid = (u64)-1;
1193                 init_path(&path);
1194                 ret = search_slot(root, &ins, &path, 0);
1195                 if (ret == 0)
1196                         BUG();
1197
1198                 leaf = &path.nodes[0]->leaf;
1199                 slot = path.slots[0];
1200                 if (slot != leaf->header.nritems)
1201                         BUG();
1202                 while(path.slots[0] > 0) {
1203                         path.slots[0] -= 1;
1204                         slot = path.slots[0];
1205                         leaf = &path.nodes[0]->leaf;
1206
1207                         if (comp_keys(&last, &leaf->items[slot].key) <= 0)
1208                                 BUG();
1209                         memcpy(&last, &leaf->items[slot].key, sizeof(last));
1210                         ret = del_item(root, &path);
1211                         if (ret != 0) {
1212                                 printf("del_item returned %d\n", ret);
1213                                 BUG();
1214                         }
1215                         tree_size--;
1216                 }
1217                 release_path(root, &path);
1218         }
1219         write_ctree_super(root, &super);
1220         close_ctree(root);
1221         printf("tree size is now %d\n", tree_size);
1222         return 0;
1223 }