Commenting/cleanup
[platform/upstream/btrfs-progs.git] / ctree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7
8 static inline void init_path(struct ctree_path *p)
9 {
10         memset(p, 0, sizeof(*p));
11 }
12
13 static void release_path(struct ctree_root *root, struct ctree_path *p)
14 {
15         int i;
16         for (i = 0; i < MAX_LEVEL; i++) {
17                 if (!p->nodes[i])
18                         break;
19                 tree_block_release(root, p->nodes[i]);
20         }
21 }
22
23 /*
24  * The leaf data grows from end-to-front in the node.
25  * this returns the address of the start of the last item,
26  * which is the stop of the leaf data stack
27  */
28 static inline unsigned int leaf_data_end(struct leaf *leaf)
29 {
30         unsigned int nr = leaf->header.nritems;
31         if (nr == 0)
32                 return ARRAY_SIZE(leaf->data);
33         return leaf->items[nr-1].offset;
34 }
35
36 /*
37  * The space between the end of the leaf items and
38  * the start of the leaf data.  IOW, how much room
39  * the leaf has left for both items and data
40  */
41 static inline int leaf_free_space(struct leaf *leaf)
42 {
43         int data_end = leaf_data_end(leaf);
44         int nritems = leaf->header.nritems;
45         char *items_end = (char *)(leaf->items + nritems + 1);
46         return (char *)(leaf->data + data_end) - (char *)items_end;
47 }
48
49 /*
50  * compare two keys in a memcmp fashion
51  */
52 int comp_keys(struct key *k1, struct key *k2)
53 {
54         if (k1->objectid > k2->objectid)
55                 return 1;
56         if (k1->objectid < k2->objectid)
57                 return -1;
58         if (k1->flags > k2->flags)
59                 return 1;
60         if (k1->flags < k2->flags)
61                 return -1;
62         if (k1->offset > k2->offset)
63                 return 1;
64         if (k1->offset < k2->offset)
65                 return -1;
66         return 0;
67 }
68
69 /*
70  * search for key in the array p.  items p are item_size apart
71  * and there are 'max' items in p
72  * the slot in the array is returned via slot, and it points to
73  * the place where you would insert key if it is not found in
74  * the array.
75  *
76  * slot may point to max if the key is bigger than all of the keys
77  */
78 int generic_bin_search(char *p, int item_size, struct key *key,
79                        int max, int *slot)
80 {
81         int low = 0;
82         int high = max;
83         int mid;
84         int ret;
85         struct key *tmp;
86
87         while(low < high) {
88                 mid = (low + high) / 2;
89                 tmp = (struct key *)(p + mid * item_size);
90                 ret = comp_keys(tmp, key);
91
92                 if (ret < 0)
93                         low = mid + 1;
94                 else if (ret > 0)
95                         high = mid;
96                 else {
97                         *slot = mid;
98                         return 0;
99                 }
100         }
101         *slot = low;
102         return 1;
103 }
104
105 int bin_search(struct node *c, struct key *key, int *slot)
106 {
107         if (is_leaf(c->header.flags)) {
108                 struct leaf *l = (struct leaf *)c;
109                 return generic_bin_search((void *)l->items, sizeof(struct item),
110                                           key, c->header.nritems, slot);
111         } else {
112                 return generic_bin_search((void *)c->keys, sizeof(struct key),
113                                           key, c->header.nritems, slot);
114         }
115         return -1;
116 }
117
118 /*
119  * look for key in the tree.  path is filled in with nodes along the way
120  * if key is found, we return zero and you can find the item in the leaf
121  * level of the path (level 0)
122  *
123  * If the key isn't found, the path points to the slot where it should
124  * be inserted.
125  */
126 int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
127 {
128         struct tree_buffer *b = root->node;
129         struct node *c;
130
131         int slot;
132         int ret;
133         int level;
134         b->count++;
135         while (b) {
136                 c = &b->node;
137                 level = node_level(c->header.flags);
138                 p->nodes[level] = b;
139                 ret = bin_search(c, key, &slot);
140                 if (!is_leaf(c->header.flags)) {
141                         if (ret && slot > 0)
142                                 slot -= 1;
143                         p->slots[level] = slot;
144                         b = read_tree_block(root, c->blockptrs[slot]);
145                         continue;
146                 } else {
147                         p->slots[level] = slot;
148                         return ret;
149                 }
150         }
151         return -1;
152 }
153
154 /*
155  * adjust the pointers going up the tree, starting at level
156  * making sure the right key of each node is points to 'key'.
157  * This is used after shifting pointers to the left, so it stops
158  * fixing up pointers when a given leaf/node is not in slot 0 of the
159  * higher levels
160  */
161 static void fixup_low_keys(struct ctree_root *root,
162                            struct ctree_path *path, struct key *key,
163                            int level)
164 {
165         int i;
166         for (i = level; i < MAX_LEVEL; i++) {
167                 struct node *t;
168                 int tslot = path->slots[i];
169                 if (!path->nodes[i])
170                         break;
171                 t = &path->nodes[i]->node;
172                 memcpy(t->keys + tslot, key, sizeof(*key));
173                 write_tree_block(root, path->nodes[i]);
174                 if (tslot != 0)
175                         break;
176         }
177 }
178
179 /*
180  * try to push data from one node into the next node left in the
181  * tree.  The src node is found at specified level in the path.
182  * If some bytes were pushed, return 0, otherwise return 1.
183  *
184  * Lower nodes/leaves in the path are not touched, higher nodes may
185  * be modified to reflect the push.
186  *
187  * The path is altered to reflect the push.
188  */
189 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
190 {
191         int slot;
192         struct node *left;
193         struct node *right;
194         int push_items = 0;
195         int left_nritems;
196         int right_nritems;
197         struct tree_buffer *t;
198         struct tree_buffer *right_buf;
199
200         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
201                 return 1;
202         slot = path->slots[level + 1];
203         if (slot == 0)
204                 return 1;
205
206         t = read_tree_block(root,
207                             path->nodes[level + 1]->node.blockptrs[slot - 1]);
208         left = &t->node;
209         right_buf = path->nodes[level];
210         right = &right_buf->node;
211         left_nritems = left->header.nritems;
212         right_nritems = right->header.nritems;
213         push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
214         if (push_items <= 0) {
215                 tree_block_release(root, t);
216                 return 1;
217         }
218
219         if (right_nritems < push_items)
220                 push_items = right_nritems;
221         memcpy(left->keys + left_nritems, right->keys,
222                 push_items * sizeof(struct key));
223         memcpy(left->blockptrs + left_nritems, right->blockptrs,
224                 push_items * sizeof(u64));
225         memmove(right->keys, right->keys + push_items,
226                 (right_nritems - push_items) * sizeof(struct key));
227         memmove(right->blockptrs, right->blockptrs + push_items,
228                 (right_nritems - push_items) * sizeof(u64));
229         right->header.nritems -= push_items;
230         left->header.nritems += push_items;
231
232         /* adjust the pointers going up the tree */
233         fixup_low_keys(root, path, right->keys, level + 1);
234
235         write_tree_block(root, t);
236         write_tree_block(root, right_buf);
237
238         /* then fixup the leaf pointer in the path */
239         if (path->slots[level] < push_items) {
240                 path->slots[level] += left_nritems;
241                 tree_block_release(root, path->nodes[level]);
242                 path->nodes[level] = t;
243                 path->slots[level + 1] -= 1;
244         } else {
245                 path->slots[level] -= push_items;
246                 tree_block_release(root, t);
247         }
248         return 0;
249 }
250
251 /*
252  * try to push data from one node into the next node right in the
253  * tree.  The src node is found at specified level in the path.
254  * If some bytes were pushed, return 0, otherwise return 1.
255  *
256  * Lower nodes/leaves in the path are not touched, higher nodes may
257  * be modified to reflect the push.
258  *
259  * The path is altered to reflect the push.
260  */
261 int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
262 {
263         int slot;
264         struct tree_buffer *t;
265         struct tree_buffer *src_buffer;
266         struct node *dst;
267         struct node *src;
268         int push_items = 0;
269         int dst_nritems;
270         int src_nritems;
271
272         /* can't push from the root */
273         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
274                 return 1;
275
276         /* only try to push inside the node higher up */
277         slot = path->slots[level + 1];
278         if (slot == NODEPTRS_PER_BLOCK - 1)
279                 return 1;
280
281         if (slot >= path->nodes[level + 1]->node.header.nritems -1)
282                 return 1;
283
284         t = read_tree_block(root,
285                             path->nodes[level + 1]->node.blockptrs[slot + 1]);
286         dst = &t->node;
287         src_buffer = path->nodes[level];
288         src = &src_buffer->node;
289         dst_nritems = dst->header.nritems;
290         src_nritems = src->header.nritems;
291         push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
292         if (push_items <= 0) {
293                 tree_block_release(root, t);
294                 return 1;
295         }
296
297         if (src_nritems < push_items)
298                 push_items = src_nritems;
299         memmove(dst->keys + push_items, dst->keys,
300                 dst_nritems * sizeof(struct key));
301         memcpy(dst->keys, src->keys + src_nritems - push_items,
302                 push_items * sizeof(struct key));
303
304         memmove(dst->blockptrs + push_items, dst->blockptrs,
305                 dst_nritems * sizeof(u64));
306         memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
307                 push_items * sizeof(u64));
308
309         src->header.nritems -= push_items;
310         dst->header.nritems += push_items;
311
312         /* adjust the pointers going up the tree */
313         memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
314                 dst->keys, sizeof(struct key));
315
316         write_tree_block(root, path->nodes[level + 1]);
317         write_tree_block(root, t);
318         write_tree_block(root, src_buffer);
319
320         /* then fixup the pointers in the path */
321         if (path->slots[level] >= src->header.nritems) {
322                 path->slots[level] -= src->header.nritems;
323                 tree_block_release(root, path->nodes[level]);
324                 path->nodes[level] = t;
325                 path->slots[level + 1] += 1;
326         } else {
327                 tree_block_release(root, t);
328         }
329         return 0;
330 }
331
332 /*
333  * worker function to insert a single pointer in a node.
334  * the node should have enough room for the pointer already
335  * slot and level indicate where you want the key to go, and
336  * blocknr is the block the key points to.
337  */
338 int __insert_ptr(struct ctree_root *root,
339                 struct ctree_path *path, struct key *key,
340                 u64 blocknr, int slot, int level)
341 {
342         struct node *c;
343         struct node *lower;
344         struct key *lower_key;
345         int nritems;
346         /* need a new root */
347         if (!path->nodes[level]) {
348                 struct tree_buffer *t;
349                 t = alloc_free_block(root);
350                 c = &t->node;
351                 memset(c, 0, sizeof(c));
352                 c->header.nritems = 2;
353                 c->header.flags = node_level(level);
354                 c->header.blocknr = t->blocknr;
355                 lower = &path->nodes[level-1]->node;
356                 if (is_leaf(lower->header.flags))
357                         lower_key = &((struct leaf *)lower)->items[0].key;
358                 else
359                         lower_key = lower->keys;
360                 memcpy(c->keys, lower_key, sizeof(struct key));
361                 memcpy(c->keys + 1, key, sizeof(struct key));
362                 c->blockptrs[0] = path->nodes[level-1]->blocknr;
363                 c->blockptrs[1] = blocknr;
364                 /* the path has an extra ref to root->node */
365                 tree_block_release(root, root->node);
366                 root->node = t;
367                 t->count++;
368                 write_tree_block(root, t);
369                 path->nodes[level] = t;
370                 path->slots[level] = 0;
371                 if (c->keys[1].objectid == 0)
372                         BUG();
373                 return 0;
374         }
375         lower = &path->nodes[level]->node;
376         nritems = lower->header.nritems;
377         if (slot > nritems)
378                 BUG();
379         if (nritems == NODEPTRS_PER_BLOCK)
380                 BUG();
381         if (slot != nritems) {
382                 memmove(lower->keys + slot + 1, lower->keys + slot,
383                         (nritems - slot) * sizeof(struct key));
384                 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
385                         (nritems - slot) * sizeof(u64));
386         }
387         memcpy(lower->keys + slot, key, sizeof(struct key));
388         lower->blockptrs[slot] = blocknr;
389         lower->header.nritems++;
390         if (lower->keys[1].objectid == 0)
391                         BUG();
392         write_tree_block(root, path->nodes[level]);
393         return 0;
394 }
395
396
397 /*
398  * insert a key,blocknr pair into the tree at a given level
399  * If the node at that level in the path doesn't have room,
400  * it is split or shifted as appropriate.
401  */
402 int insert_ptr(struct ctree_root *root,
403                 struct ctree_path *path, struct key *key,
404                 u64 blocknr, int level)
405 {
406         struct tree_buffer *t = path->nodes[level];
407         struct node *c = &path->nodes[level]->node;
408         struct node *b;
409         struct tree_buffer *b_buffer;
410         struct tree_buffer *bal[MAX_LEVEL];
411         int bal_level = level;
412         int mid;
413         int bal_start = -1;
414
415         /*
416          * check to see if we need to make room in the node for this
417          * pointer.  If we do, keep walking the tree, making sure there
418          * is enough room in each level for the required insertions.
419          *
420          * The bal array is filled in with any nodes to be inserted
421          * due to splitting.  Once we've done all the splitting required
422          * do the inserts based on the data in the bal array.
423          */
424         memset(bal, 0, ARRAY_SIZE(bal));
425         while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
426                 c = &t->node;
427                 if (push_node_left(root, path,
428                    node_level(c->header.flags)) == 0)
429                         break;
430                 if (push_node_right(root, path,
431                    node_level(c->header.flags)) == 0)
432                         break;
433                 bal_start = bal_level;
434                 if (bal_level == MAX_LEVEL - 1)
435                         BUG();
436                 b_buffer = alloc_free_block(root);
437                 b = &b_buffer->node;
438                 b->header.flags = c->header.flags;
439                 b->header.blocknr = b_buffer->blocknr;
440                 mid = (c->header.nritems + 1) / 2;
441                 memcpy(b->keys, c->keys + mid,
442                         (c->header.nritems - mid) * sizeof(struct key));
443                 memcpy(b->blockptrs, c->blockptrs + mid,
444                         (c->header.nritems - mid) * sizeof(u64));
445                 b->header.nritems = c->header.nritems - mid;
446                 c->header.nritems = mid;
447
448                 write_tree_block(root, t);
449                 write_tree_block(root, b_buffer);
450
451                 bal[bal_level] = b_buffer;
452                 if (bal_level == MAX_LEVEL - 1)
453                         break;
454                 bal_level += 1;
455                 t = path->nodes[bal_level];
456         }
457         /*
458          * bal_start tells us the first level in the tree that needed to
459          * be split.  Go through the bal array inserting the new nodes
460          * as needed.  The path is fixed as we go.
461          */
462         while(bal_start > 0) {
463                 b_buffer = bal[bal_start];
464                 c = &path->nodes[bal_start]->node;
465                 __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
466                                 path->slots[bal_start + 1] + 1, bal_start + 1);
467                 if (path->slots[bal_start] >= c->header.nritems) {
468                         path->slots[bal_start] -= c->header.nritems;
469                         tree_block_release(root, path->nodes[bal_start]);
470                         path->nodes[bal_start] = b_buffer;
471                         path->slots[bal_start + 1] += 1;
472                 } else {
473                         tree_block_release(root, b_buffer);
474                 }
475                 bal_start--;
476                 if (!bal[bal_start])
477                         break;
478         }
479         /* Now that the tree has room, insert the requested pointer */
480         return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1,
481                             level);
482 }
483
484 /*
485  * how many bytes are required to store the items in a leaf.  start
486  * and nr indicate which items in the leaf to check.  This totals up the
487  * space used both by the item structs and the item data
488  */
489 int leaf_space_used(struct leaf *l, int start, int nr)
490 {
491         int data_len;
492         int end = start + nr - 1;
493
494         if (!nr)
495                 return 0;
496         data_len = l->items[start].offset + l->items[start].size;
497         data_len = data_len - l->items[end].offset;
498         data_len += sizeof(struct item) * nr;
499         return data_len;
500 }
501
502 /*
503  * push some data in the path leaf to the left, trying to free up at
504  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
505  */
506 int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
507                    int data_size)
508 {
509         struct tree_buffer *right_buf = path->nodes[0];
510         struct leaf *right = &right_buf->leaf;
511         struct tree_buffer *t;
512         struct leaf *left;
513         int slot;
514         int i;
515         int free_space;
516         int push_space = 0;
517         int push_items = 0;
518         struct item *item;
519         int old_left_nritems;
520
521         slot = path->slots[1];
522         if (slot == 0) {
523                 return 1;
524         }
525         if (!path->nodes[1]) {
526                 return 1;
527         }
528         t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
529         left = &t->leaf;
530         free_space = leaf_free_space(left);
531         if (free_space < data_size + sizeof(struct item)) {
532                 tree_block_release(root, t);
533                 return 1;
534         }
535         for (i = 0; i < right->header.nritems; i++) {
536                 item = right->items + i;
537                 if (path->slots[0] == i)
538                         push_space += data_size + sizeof(*item);
539                 if (item->size + sizeof(*item) + push_space > free_space)
540                         break;
541                 push_items++;
542                 push_space += item->size + sizeof(*item);
543         }
544         if (push_items == 0) {
545                 tree_block_release(root, t);
546                 return 1;
547         }
548         /* push data from right to left */
549         memcpy(left->items + left->header.nritems,
550                 right->items, push_items * sizeof(struct item));
551         push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
552         memcpy(left->data + leaf_data_end(left) - push_space,
553                 right->data + right->items[push_items - 1].offset,
554                 push_space);
555         old_left_nritems = left->header.nritems;
556         BUG_ON(old_left_nritems < 0);
557
558         for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
559                 left->items[i].offset -= LEAF_DATA_SIZE -
560                         left->items[old_left_nritems -1].offset;
561         }
562         left->header.nritems += push_items;
563
564         /* fixup right node */
565         push_space = right->items[push_items-1].offset - leaf_data_end(right);
566         memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
567                 leaf_data_end(right), push_space);
568         memmove(right->items, right->items + push_items,
569                 (right->header.nritems - push_items) * sizeof(struct item));
570         right->header.nritems -= push_items;
571         push_space = LEAF_DATA_SIZE;
572
573         for (i = 0; i < right->header.nritems; i++) {
574                 right->items[i].offset = push_space - right->items[i].size;
575                 push_space = right->items[i].offset;
576         }
577
578         write_tree_block(root, t);
579         write_tree_block(root, right_buf);
580
581         fixup_low_keys(root, path, &right->items[0].key, 1);
582
583         /* then fixup the leaf pointer in the path */
584         if (path->slots[0] < push_items) {
585                 path->slots[0] += old_left_nritems;
586                 tree_block_release(root, path->nodes[0]);
587                 path->nodes[0] = t;
588                 path->slots[1] -= 1;
589         } else {
590                 tree_block_release(root, t);
591                 path->slots[0] -= push_items;
592         }
593         BUG_ON(path->slots[0] < 0);
594         return 0;
595 }
596
597 /*
598  * split the path's leaf in two, making sure there is at least data_size
599  * available for the resulting leaf level of the path.
600  */
601 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
602 {
603         struct tree_buffer *l_buf = path->nodes[0];
604         struct leaf *l = &l_buf->leaf;
605         int nritems;
606         int mid;
607         int slot;
608         struct leaf *right;
609         struct tree_buffer *right_buffer;
610         int space_needed = data_size + sizeof(struct item);
611         int data_copy_size;
612         int rt_data_off;
613         int i;
614         int ret;
615
616         if (push_leaf_left(root, path, data_size) == 0) {
617                 l_buf = path->nodes[0];
618                 l = &l_buf->leaf;
619                 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
620                         return 0;
621         }
622         slot = path->slots[0];
623         nritems = l->header.nritems;
624         mid = (nritems + 1)/ 2;
625
626         right_buffer = alloc_free_block(root);
627         BUG_ON(!right_buffer);
628         BUG_ON(mid == nritems);
629         right = &right_buffer->leaf;
630         memset(right, 0, sizeof(*right));
631         if (mid <= slot) {
632                 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
633                         LEAF_DATA_SIZE)
634                         BUG();
635         } else {
636                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
637                         LEAF_DATA_SIZE)
638                         BUG();
639         }
640         right->header.nritems = nritems - mid;
641         right->header.blocknr = right_buffer->blocknr;
642         right->header.flags = node_level(0);
643         data_copy_size = l->items[mid].offset + l->items[mid].size -
644                          leaf_data_end(l);
645         memcpy(right->items, l->items + mid,
646                (nritems - mid) * sizeof(struct item));
647         memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
648                l->data + leaf_data_end(l), data_copy_size);
649         rt_data_off = LEAF_DATA_SIZE -
650                      (l->items[mid].offset + l->items[mid].size);
651
652         for (i = 0; i < right->header.nritems; i++)
653                 right->items[i].offset += rt_data_off;
654
655         l->header.nritems = mid;
656         ret = insert_ptr(root, path, &right->items[0].key,
657                           right_buffer->blocknr, 1);
658
659         write_tree_block(root, right_buffer);
660         write_tree_block(root, l_buf);
661
662         BUG_ON(path->slots[0] != slot);
663         if (mid <= slot) {
664                 tree_block_release(root, path->nodes[0]);
665                 path->nodes[0] = right_buffer;
666                 path->slots[0] -= mid;
667                 path->slots[1] += 1;
668         } else
669                 tree_block_release(root, right_buffer);
670         BUG_ON(path->slots[0] < 0);
671         return ret;
672 }
673
674 /*
675  * Given a key and some data, insert an item into the tree.
676  * This does all the path init required, making room in the tree if needed.
677  */
678 int insert_item(struct ctree_root *root, struct key *key,
679                           void *data, int data_size)
680 {
681         int ret;
682         int slot;
683         int slot_orig;
684         struct leaf *leaf;
685         struct tree_buffer *leaf_buf;
686         unsigned int nritems;
687         unsigned int data_end;
688         struct ctree_path path;
689
690         /* create a root if there isn't one */
691         if (!root->node) {
692                 struct tree_buffer *t;
693                 t = alloc_free_block(root);
694                 BUG_ON(!t);
695                 t->node.header.nritems = 0;
696                 t->node.header.flags = node_level(0);
697                 t->node.header.blocknr = t->blocknr;
698                 root->node = t;
699                 write_tree_block(root, t);
700         }
701         init_path(&path);
702         ret = search_slot(root, key, &path);
703         if (ret == 0) {
704                 release_path(root, &path);
705                 return -EEXIST;
706         }
707
708         slot_orig = path.slots[0];
709         leaf_buf = path.nodes[0];
710         leaf = &leaf_buf->leaf;
711
712         /* make room if needed */
713         if (leaf_free_space(leaf) <  sizeof(struct item) + data_size) {
714                 split_leaf(root, &path, data_size);
715                 leaf_buf = path.nodes[0];
716                 leaf = &path.nodes[0]->leaf;
717         }
718         nritems = leaf->header.nritems;
719         data_end = leaf_data_end(leaf);
720
721         if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
722                 BUG();
723
724         slot = path.slots[0];
725         BUG_ON(slot < 0);
726         if (slot == 0)
727                 fixup_low_keys(root, &path, key, 1);
728         if (slot != nritems) {
729                 int i;
730                 unsigned int old_data = leaf->items[slot].offset +
731                                         leaf->items[slot].size;
732
733                 /*
734                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
735                  */
736                 /* first correct the data pointers */
737                 for (i = slot; i < nritems; i++)
738                         leaf->items[i].offset -= data_size;
739
740                 /* shift the items */
741                 memmove(leaf->items + slot + 1, leaf->items + slot,
742                         (nritems - slot) * sizeof(struct item));
743
744                 /* shift the data */
745                 memmove(leaf->data + data_end - data_size, leaf->data +
746                         data_end, old_data - data_end);
747                 data_end = old_data;
748         }
749         /* copy the new data in */
750         memcpy(&leaf->items[slot].key, key, sizeof(struct key));
751         leaf->items[slot].offset = data_end - data_size;
752         leaf->items[slot].size = data_size;
753         memcpy(leaf->data + data_end - data_size, data, data_size);
754         leaf->header.nritems += 1;
755         write_tree_block(root, leaf_buf);
756         if (leaf_free_space(leaf) < 0)
757                 BUG();
758         release_path(root, &path);
759         return 0;
760 }
761
762 /*
763  * delete the pointer from a given level in the path.  The path is not
764  * fixed up, so after calling this it is not valid at that level.
765  *
766  * If the delete empties a node, the node is removed from the tree,
767  * continuing all the way the root if required.  The root is converted into
768  * a leaf if all the nodes are emptied.
769  */
770 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
771 {
772         int slot;
773         struct tree_buffer *t;
774         struct node *node;
775         int nritems;
776
777         while(1) {
778                 t = path->nodes[level];
779                 if (!t)
780                         break;
781                 node = &t->node;
782                 slot = path->slots[level];
783                 nritems = node->header.nritems;
784
785                 if (slot != nritems -1) {
786                         memmove(node->keys + slot, node->keys + slot + 1,
787                                 sizeof(struct key) * (nritems - slot - 1));
788                         memmove(node->blockptrs + slot,
789                                 node->blockptrs + slot + 1,
790                                 sizeof(u64) * (nritems - slot - 1));
791                 }
792                 node->header.nritems--;
793                 write_tree_block(root, t);
794                 if (node->header.nritems != 0) {
795                         int tslot;
796                         if (slot == 0)
797                                 fixup_low_keys(root, path, node->keys,
798                                                level + 1);
799                         tslot = path->slots[level+1];
800                         t->count++;
801                         push_node_left(root, path, level);
802                         if (node->header.nritems) {
803                                 push_node_right(root, path, level);
804                         }
805                         if (node->header.nritems) {
806                                 tree_block_release(root, t);
807                                 break;
808                         }
809                         tree_block_release(root, t);
810                         path->slots[level+1] = tslot;
811                 }
812                 if (t == root->node) {
813                         /* just turn the root into a leaf and break */
814                         root->node->node.header.flags = node_level(0);
815                         write_tree_block(root, t);
816                         break;
817                 }
818                 level++;
819                 if (!path->nodes[level])
820                         BUG();
821         }
822         return 0;
823 }
824
825 /*
826  * delete the item at the leaf level in path.  If that empties
827  * the leaf, remove it from the tree
828  */
829 int del_item(struct ctree_root *root, struct ctree_path *path)
830 {
831         int slot;
832         struct leaf *leaf;
833         struct tree_buffer *leaf_buf;
834         int doff;
835         int dsize;
836
837         leaf_buf = path->nodes[0];
838         leaf = &leaf_buf->leaf;
839         slot = path->slots[0];
840         doff = leaf->items[slot].offset;
841         dsize = leaf->items[slot].size;
842
843         if (slot != leaf->header.nritems - 1) {
844                 int i;
845                 int data_end = leaf_data_end(leaf);
846                 memmove(leaf->data + data_end + dsize,
847                         leaf->data + data_end,
848                         doff - data_end);
849                 for (i = slot + 1; i < leaf->header.nritems; i++)
850                         leaf->items[i].offset += dsize;
851                 memmove(leaf->items + slot, leaf->items + slot + 1,
852                         sizeof(struct item) *
853                         (leaf->header.nritems - slot - 1));
854         }
855         leaf->header.nritems -= 1;
856         /* delete the leaf if we've emptied it */
857         if (leaf->header.nritems == 0) {
858                 if (leaf_buf == root->node) {
859                         leaf->header.flags = node_level(0);
860                         write_tree_block(root, leaf_buf);
861                 } else
862                         del_ptr(root, path, 1);
863         } else {
864                 if (slot == 0)
865                         fixup_low_keys(root, path, &leaf->items[0].key, 1);
866                 write_tree_block(root, leaf_buf);
867                 /* delete the leaf if it is mostly empty */
868                 if (leaf_space_used(leaf, 0, leaf->header.nritems) <
869                     LEAF_DATA_SIZE / 4) {
870                         /* push_leaf_left fixes the path.
871                          * make sure the path still points to our leaf
872                          * for possible call to del_ptr below
873                          */
874                         slot = path->slots[1];
875                         leaf_buf->count++;
876                         push_leaf_left(root, path, 1);
877                         if (leaf->header.nritems == 0) {
878                                 path->slots[1] = slot;
879                                 del_ptr(root, path, 1);
880                         }
881                         tree_block_release(root, leaf_buf);
882                 }
883         }
884         return 0;
885 }
886
887 void print_leaf(struct leaf *l)
888 {
889         int i;
890         int nr = l->header.nritems;
891         struct item *item;
892         printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
893                leaf_free_space(l));
894         fflush(stdout);
895         for (i = 0 ; i < nr ; i++) {
896                 item = l->items + i;
897                 printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n",
898                         i,
899                         item->key.objectid, item->key.flags, item->key.offset,
900                         item->offset, item->size);
901                 fflush(stdout);
902                 printf("\t\titem data %.*s\n", item->size, l->data+item->offset);
903                 fflush(stdout);
904         }
905 }
906 void print_tree(struct ctree_root *root, struct tree_buffer *t)
907 {
908         int i;
909         int nr;
910         struct node *c;
911
912         if (!t)
913                 return;
914         c = &t->node;
915         nr = c->header.nritems;
916         if (c->header.blocknr != t->blocknr)
917                 BUG();
918         if (is_leaf(c->header.flags)) {
919                 print_leaf((struct leaf *)c);
920                 return;
921         }
922         printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
923                 node_level(c->header.flags), c->header.nritems,
924                 NODEPTRS_PER_BLOCK - c->header.nritems);
925         fflush(stdout);
926         for (i = 0; i < nr; i++) {
927                 printf("\tkey %d (%lu %u %lu) block %lu\n",
928                        i,
929                        c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
930                        c->blockptrs[i]);
931                 fflush(stdout);
932         }
933         for (i = 0; i < nr; i++) {
934                 struct tree_buffer *next_buf = read_tree_block(root,
935                                                             c->blockptrs[i]);
936                 struct node *next = &next_buf->node;
937                 if (is_leaf(next->header.flags) &&
938                     node_level(c->header.flags) != 1)
939                         BUG();
940                 if (node_level(next->header.flags) !=
941                         node_level(c->header.flags) - 1)
942                         BUG();
943                 print_tree(root, next_buf);
944                 tree_block_release(root, next_buf);
945         }
946
947 }
948
949 /* for testing only */
950 int next_key(int i, int max_key) {
951         return rand() % max_key;
952         // return i;
953 }
954
955 int main() {
956         struct ctree_root *root;
957         struct key ins;
958         struct key last = { (u64)-1, 0, 0};
959         char *buf;
960         int i;
961         int num;
962         int ret;
963         int run_size = 25000;
964         int max_key = 100000000;
965         int tree_size = 0;
966         struct ctree_path path;
967
968         radix_tree_init();
969
970
971         root = open_ctree("dbfile");
972
973         srand(55);
974         for (i = 0; i < run_size; i++) {
975                 buf = malloc(64);
976                 num = next_key(i, max_key);
977                 // num = i;
978                 sprintf(buf, "string-%d", num);
979                 // printf("insert %d\n", num);
980                 ins.objectid = num;
981                 ins.offset = 0;
982                 ins.flags = 0;
983                 ret = insert_item(root, &ins, buf, strlen(buf));
984                 if (!ret)
985                         tree_size++;
986         }
987         close_ctree(root);
988         root = open_ctree("dbfile");
989         printf("starting search\n");
990         srand(55);
991         for (i = 0; i < run_size; i++) {
992                 num = next_key(i, max_key);
993                 ins.objectid = num;
994                 init_path(&path);
995                 ret = search_slot(root, &ins, &path);
996                 if (ret) {
997                         print_tree(root, root->node);
998                         printf("unable to find %d\n", num);
999                         exit(1);
1000                 }
1001                 release_path(root, &path);
1002         }
1003         close_ctree(root);
1004         root = open_ctree("dbfile");
1005         printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
1006                 node_level(root->node->node.header.flags),
1007                 root->node->node.header.nritems,
1008                 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
1009         printf("all searches good, deleting some items\n");
1010         i = 0;
1011         srand(55);
1012         for (i = 0 ; i < run_size/4; i++) {
1013                 num = next_key(i, max_key);
1014                 ins.objectid = num;
1015                 init_path(&path);
1016                 ret = search_slot(root, &ins, &path);
1017                 if (ret)
1018                         continue;
1019                 ret = del_item(root, &path);
1020                 if (ret != 0)
1021                         BUG();
1022                 release_path(root, &path);
1023                 tree_size--;
1024         }
1025         srand(128);
1026         for (i = 0; i < run_size; i++) {
1027                 buf = malloc(64);
1028                 num = next_key(i, max_key);
1029                 sprintf(buf, "string-%d", num);
1030                 ins.objectid = num;
1031                 ret = insert_item(root, &ins, buf, strlen(buf));
1032                 if (!ret)
1033                         tree_size++;
1034         }
1035         close_ctree(root);
1036         root = open_ctree("dbfile");
1037         printf("starting search2\n");
1038         srand(128);
1039         for (i = 0; i < run_size; i++) {
1040                 num = next_key(i, max_key);
1041                 ins.objectid = num;
1042                 init_path(&path);
1043                 ret = search_slot(root, &ins, &path);
1044                 if (ret) {
1045                         print_tree(root, root->node);
1046                         printf("unable to find %d\n", num);
1047                         exit(1);
1048                 }
1049                 release_path(root, &path);
1050         }
1051         printf("starting big long delete run\n");
1052         while(root->node && root->node->node.header.nritems > 0) {
1053                 struct leaf *leaf;
1054                 int slot;
1055                 ins.objectid = (u64)-1;
1056                 init_path(&path);
1057                 ret = search_slot(root, &ins, &path);
1058                 if (ret == 0)
1059                         BUG();
1060
1061                 leaf = &path.nodes[0]->leaf;
1062                 slot = path.slots[0];
1063                 if (slot != leaf->header.nritems)
1064                         BUG();
1065                 while(path.slots[0] > 0) {
1066                         path.slots[0] -= 1;
1067                         slot = path.slots[0];
1068                         leaf = &path.nodes[0]->leaf;
1069
1070                         if (comp_keys(&last, &leaf->items[slot].key) <= 0)
1071                                 BUG();
1072                         memcpy(&last, &leaf->items[slot].key, sizeof(last));
1073                         ret = del_item(root, &path);
1074                         if (ret != 0) {
1075                                 printf("del_item returned %d\n", ret);
1076                                 BUG();
1077                         }
1078                         tree_size--;
1079                 }
1080                 release_path(root, &path);
1081         }
1082         close_ctree(root);
1083         printf("tree size is now %d\n", tree_size);
1084         return 0;
1085 }