Add generation numbers to block pointers
[platform/upstream/btrfs-progs.git] / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "ctree.h"
23 #include "disk-io.h"
24 #include "transaction.h"
25 #include "print-tree.h"
26
27 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_path *path, int level);
29 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
30                       *root, struct btrfs_key *ins_key,
31                       struct btrfs_path *path, int data_size, int extend);
32 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
33                           *root, struct btrfs_buffer *dst, struct btrfs_buffer
34                           *src);
35 static int balance_node_right(struct btrfs_trans_handle *trans, struct
36                               btrfs_root *root, struct btrfs_buffer *dst_buf,
37                               struct btrfs_buffer *src_buf);
38 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39                    struct btrfs_path *path, int level, int slot);
40
41 inline void btrfs_init_path(struct btrfs_path *p)
42 {
43         memset(p, 0, sizeof(*p));
44 }
45
46 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
47 {
48         int i;
49         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
50                 if (!p->nodes[i])
51                         break;
52                 btrfs_block_release(root, p->nodes[i]);
53         }
54         memset(p, 0, sizeof(*p));
55 }
56 int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
57                            *root, struct btrfs_buffer *buf, struct btrfs_buffer
58                            *parent, int parent_slot, struct btrfs_buffer
59                            **cow_ret)
60 {
61         struct btrfs_buffer *cow;
62
63         if (!list_empty(&buf->dirty)) {
64                 *cow_ret = buf;
65                 return 0;
66         }
67         cow = btrfs_alloc_free_block(trans, root, buf->size);
68         memcpy(&cow->node, &buf->node, buf->size);
69         btrfs_set_header_bytenr(&cow->node.header, cow->bytenr);
70         btrfs_set_header_generation(&cow->node.header, trans->transid);
71         btrfs_set_header_owner(&cow->node.header, root->root_key.objectid);
72         *cow_ret = cow;
73         btrfs_inc_ref(trans, root, buf);
74         if (buf == root->node) {
75                 root->node = cow;
76                 cow->count++;
77                 if (buf != root->commit_root)
78                         btrfs_free_extent(trans, root, buf->bytenr,
79                                           buf->size, 1);
80                 btrfs_block_release(root, buf);
81         } else {
82                 btrfs_set_node_blockptr(&parent->node, parent_slot,
83                                         cow->bytenr);
84                 btrfs_set_node_ptr_generation(&parent->node, parent_slot,
85                                               trans->transid);
86                 BUG_ON(list_empty(&parent->dirty));
87                 btrfs_free_extent(trans, root, buf->bytenr, buf->size, 1);
88         }
89         btrfs_block_release(root, buf);
90         return 0;
91 }
92
93 /*
94  * The leaf data grows from end-to-front in the node.
95  * this returns the address of the start of the last item,
96  * which is the stop of the leaf data stack
97  */
98 static inline unsigned int leaf_data_end(struct btrfs_root *root,
99                                          struct btrfs_leaf *leaf)
100 {
101         u32 nr = btrfs_header_nritems(&leaf->header);
102         if (nr == 0)
103                 return BTRFS_LEAF_DATA_SIZE(root);
104         return btrfs_item_offset(leaf->items + nr - 1);
105 }
106
107 /*
108  * how many bytes are required to store the items in a leaf.  start
109  * and nr indicate which items in the leaf to check.  This totals up the
110  * space used both by the item structs and the item data
111  */
112 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
113 {
114         int data_len;
115         int nritems = btrfs_header_nritems(&l->header);
116         int end;
117
118         if (nritems < start + nr)
119                 end = nritems - 1;
120         else
121                 end = start + nr - 1;
122
123         if (!nr)
124                 return 0;
125         data_len = btrfs_item_end(l->items + start);
126         data_len = data_len - btrfs_item_offset(l->items + end);
127         data_len += sizeof(struct btrfs_item) * nr;
128         return data_len;
129 }
130
131 /*
132  * The space between the end of the leaf items and
133  * the start of the leaf data.  IOW, how much room
134  * the leaf has left for both items and data
135  */
136 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
137 {
138         int nritems = btrfs_header_nritems(&leaf->header);
139         return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
140 }
141
142 /*
143  * compare two keys in a memcmp fashion
144  */
145 int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
146 {
147         struct btrfs_key k1;
148
149         btrfs_disk_key_to_cpu(&k1, disk);
150
151         if (k1.objectid > k2->objectid)
152                 return 1;
153         if (k1.objectid < k2->objectid)
154                 return -1;
155         if (k1.type > k2->type)
156                 return 1;
157         if (k1.type < k2->type)
158                 return -1;
159         if (k1.offset > k2->offset)
160                 return 1;
161         if (k1.offset < k2->offset)
162                 return -1;
163         return 0;
164 }
165
166 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
167                       int level)
168 {
169         int i;
170         struct btrfs_node *parent = NULL;
171         struct btrfs_node *node = &path->nodes[level]->node;
172         int parent_slot;
173         u32 nritems = btrfs_header_nritems(&node->header);
174
175         if (path->nodes[level + 1])
176                 parent = &path->nodes[level + 1]->node;
177         parent_slot = path->slots[level + 1];
178         BUG_ON(nritems == 0);
179         if (parent) {
180                 struct btrfs_disk_key *parent_key;
181                 parent_key = &parent->ptrs[parent_slot].key;
182                 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
183                               sizeof(struct btrfs_disk_key)));
184                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
185                        btrfs_header_bytenr(&node->header));
186         }
187         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
188         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
189                 struct btrfs_key cpukey;
190                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
191                 BUG_ON(btrfs_comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
192         }
193         return 0;
194 }
195
196 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
197                       int level)
198 {
199         int i;
200         struct btrfs_leaf *leaf = &path->nodes[level]->leaf;
201         struct btrfs_node *parent = NULL;
202         int parent_slot;
203         u32 nritems = btrfs_header_nritems(&leaf->header);
204
205         if (path->nodes[level + 1])
206                 parent = &path->nodes[level + 1]->node;
207         parent_slot = path->slots[level + 1];
208         BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
209
210         if (nritems == 0)
211                 return 0;
212
213         if (parent) {
214                 struct btrfs_disk_key *parent_key;
215                 parent_key = &parent->ptrs[parent_slot].key;
216                 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
217                        sizeof(struct btrfs_disk_key)));
218                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
219                        btrfs_header_bytenr(&leaf->header));
220         }
221         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
222                 struct btrfs_key cpukey;
223                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
224                 BUG_ON(btrfs_comp_keys(&leaf->items[i].key,
225                                  &cpukey) >= 0);
226                 BUG_ON(btrfs_item_offset(leaf->items + i) !=
227                         btrfs_item_end(leaf->items + i + 1));
228                 if (i == 0) {
229                         BUG_ON(btrfs_item_offset(leaf->items + i) +
230                                btrfs_item_size(leaf->items + i) !=
231                                BTRFS_LEAF_DATA_SIZE(root));
232                 }
233         }
234         return 0;
235 }
236
237 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
238                         int level)
239 {
240         if (level == 0)
241                 return check_leaf(root, path, level);
242         return check_node(root, path, level);
243 }
244
245 /*
246  * search for key in the array p.  items p are item_size apart
247  * and there are 'max' items in p
248  * the slot in the array is returned via slot, and it points to
249  * the place where you would insert key if it is not found in
250  * the array.
251  *
252  * slot may point to max if the key is bigger than all of the keys
253  */
254 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
255                        int max, int *slot)
256 {
257         int low = 0;
258         int high = max;
259         int mid;
260         int ret;
261         struct btrfs_disk_key *tmp;
262
263         while(low < high) {
264                 mid = (low + high) / 2;
265                 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
266                 ret = btrfs_comp_keys(tmp, key);
267
268                 if (ret < 0)
269                         low = mid + 1;
270                 else if (ret > 0)
271                         high = mid;
272                 else {
273                         *slot = mid;
274                         return 0;
275                 }
276         }
277         *slot = low;
278         return 1;
279 }
280
281 /*
282  * simple bin_search frontend that does the right thing for
283  * leaves vs nodes
284  */
285 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
286 {
287         if (btrfs_is_leaf(c)) {
288                 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
289                 return generic_bin_search((void *)l->items,
290                                           sizeof(struct btrfs_item),
291                                           key, btrfs_header_nritems(&c->header),
292                                           slot);
293         } else {
294                 return generic_bin_search((void *)c->ptrs,
295                                           sizeof(struct btrfs_key_ptr),
296                                           key, btrfs_header_nritems(&c->header),
297                                           slot);
298         }
299         return -1;
300 }
301
302 static struct btrfs_buffer *read_node_slot(struct btrfs_root *root,
303                                    struct btrfs_buffer *parent_buf,
304                                    int slot)
305 {
306         struct btrfs_node *node = &parent_buf->node;
307         int level = btrfs_header_level(&node->header);
308         if (slot < 0)
309                 return NULL;
310         if (slot >= btrfs_header_nritems(&node->header))
311                 return NULL;
312         return read_tree_block(root, btrfs_node_blockptr(node, slot),
313                                btrfs_level_size(root, level - 1));
314 }
315
316 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
317                          *root, struct btrfs_path *path, int level)
318 {
319         struct btrfs_buffer *right_buf;
320         struct btrfs_buffer *mid_buf;
321         struct btrfs_buffer *left_buf;
322         struct btrfs_buffer *parent_buf = NULL;
323         struct btrfs_node *right = NULL;
324         struct btrfs_node *mid;
325         struct btrfs_node *left = NULL;
326         struct btrfs_node *parent = NULL;
327         int ret = 0;
328         int wret;
329         int pslot;
330         int orig_slot = path->slots[level];
331         u64 orig_ptr;
332
333         if (level == 0)
334                 return 0;
335
336         mid_buf = path->nodes[level];
337         mid = &mid_buf->node;
338         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
339
340         if (level < BTRFS_MAX_LEVEL - 1)
341                 parent_buf = path->nodes[level + 1];
342         pslot = path->slots[level + 1];
343
344         /*
345          * deal with the case where there is only one pointer in the root
346          * by promoting the node below to a root
347          */
348         if (!parent_buf) {
349                 struct btrfs_buffer *child;
350                 u64 bytenr = mid_buf->bytenr;
351
352                 if (btrfs_header_nritems(&mid->header) != 1)
353                         return 0;
354
355                 /* promote the child to a root */
356                 child = read_node_slot(root, mid_buf, 0);
357                 BUG_ON(!child);
358                 root->node = child;
359                 path->nodes[level] = NULL;
360                 /* once for the path */
361                 btrfs_block_release(root, mid_buf);
362                 /* once for the root ptr */
363                 btrfs_block_release(root, mid_buf);
364                 clean_tree_block(trans, root, mid_buf);
365                 return btrfs_free_extent(trans, root, bytenr,
366                                          root->nodesize, 1);
367         }
368         parent = &parent_buf->node;
369
370         if (btrfs_header_nritems(&mid->header) >
371             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
372                 return 0;
373
374         left_buf = read_node_slot(root, parent_buf, pslot - 1);
375         right_buf = read_node_slot(root, parent_buf, pslot + 1);
376
377         /* first, try to make some room in the middle buffer */
378         if (left_buf) {
379                 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
380                                 &left_buf);
381                 left = &left_buf->node;
382                 orig_slot += btrfs_header_nritems(&left->header);
383                 wret = push_node_left(trans, root, left_buf, mid_buf);
384                 if (wret < 0)
385                         ret = wret;
386         }
387
388         /*
389          * then try to empty the right most buffer into the middle
390          */
391         if (right_buf) {
392                 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
393                                 &right_buf);
394                 right = &right_buf->node;
395                 wret = push_node_left(trans, root, mid_buf, right_buf);
396                 if (wret < 0)
397                         ret = wret;
398                 if (btrfs_header_nritems(&right->header) == 0) {
399                         u64 bytenr = right_buf->bytenr;
400                         btrfs_block_release(root, right_buf);
401                         clean_tree_block(trans, root, right_buf);
402                         right_buf = NULL;
403                         right = NULL;
404                         wret = del_ptr(trans, root, path, level + 1, pslot +
405                                        1);
406                         if (wret)
407                                 ret = wret;
408                         wret = btrfs_free_extent(trans, root, bytenr,
409                                                  root->nodesize, 1);
410                         if (wret)
411                                 ret = wret;
412                 } else {
413                         memcpy(&parent->ptrs[pslot + 1].key,
414                                 &right->ptrs[0].key,
415                                 sizeof(struct btrfs_disk_key));
416                         BUG_ON(list_empty(&parent_buf->dirty));
417                 }
418         }
419         if (btrfs_header_nritems(&mid->header) == 1) {
420                 /*
421                  * we're not allowed to leave a node with one item in the
422                  * tree during a delete.  A deletion from lower in the tree
423                  * could try to delete the only pointer in this node.
424                  * So, pull some keys from the left.
425                  * There has to be a left pointer at this point because
426                  * otherwise we would have pulled some pointers from the
427                  * right
428                  */
429                 BUG_ON(!left_buf);
430                 wret = balance_node_right(trans, root, mid_buf, left_buf);
431                 if (wret < 0)
432                         ret = wret;
433                 BUG_ON(wret == 1);
434         }
435         if (btrfs_header_nritems(&mid->header) == 0) {
436                 /* we've managed to empty the middle node, drop it */
437                 u64 bytenr = mid_buf->bytenr;
438                 btrfs_block_release(root, mid_buf);
439                 clean_tree_block(trans, root, mid_buf);
440                 mid_buf = NULL;
441                 mid = NULL;
442                 wret = del_ptr(trans, root, path, level + 1, pslot);
443                 if (wret)
444                         ret = wret;
445                 wret = btrfs_free_extent(trans, root, bytenr,
446                                          root->nodesize, 1);
447                 if (wret)
448                         ret = wret;
449         } else {
450                 /* update the parent key to reflect our changes */
451                 memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
452                        sizeof(struct btrfs_disk_key));
453                 BUG_ON(list_empty(&parent_buf->dirty));
454         }
455
456         /* update the path */
457         if (left_buf) {
458                 if (btrfs_header_nritems(&left->header) > orig_slot) {
459                         left_buf->count++; // released below
460                         path->nodes[level] = left_buf;
461                         path->slots[level + 1] -= 1;
462                         path->slots[level] = orig_slot;
463                         if (mid_buf)
464                                 btrfs_block_release(root, mid_buf);
465                 } else {
466                         orig_slot -= btrfs_header_nritems(&left->header);
467                         path->slots[level] = orig_slot;
468                 }
469         }
470         /* double check we haven't messed things up */
471         check_block(root, path, level);
472         if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node,
473                                             path->slots[level]))
474                 BUG();
475
476         if (right_buf)
477                 btrfs_block_release(root, right_buf);
478         if (left_buf)
479                 btrfs_block_release(root, left_buf);
480         return ret;
481 }
482 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
483                                 struct btrfs_root *root,
484                                 struct btrfs_path *path, int level)
485 {
486         struct btrfs_node *right;
487         struct btrfs_node *mid;
488         struct btrfs_node *left;
489         struct btrfs_node *parent;
490         struct btrfs_buffer *right_buf;
491         struct btrfs_buffer *mid_buf;
492         struct btrfs_buffer *left_buf;
493         struct btrfs_buffer *parent_buf = NULL;
494         int ret = 0;
495         int wret;
496         int pslot;
497         int orig_slot = path->slots[level];
498         u64 orig_ptr;
499
500         if (level == 0)
501                 return 1;
502
503         mid_buf = path->nodes[level];
504         mid = &mid_buf->node;
505         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
506
507         if (level < BTRFS_MAX_LEVEL - 1)
508                 parent_buf = path->nodes[level + 1];
509         pslot = path->slots[level + 1];
510
511         if (!parent_buf)
512                 return 1;
513         parent = &parent_buf->node;
514
515         left_buf = read_node_slot(root, parent_buf, pslot - 1);
516         left = &left_buf->node;
517
518         /* first, try to make some room in the middle buffer */
519         if (left_buf) {
520                 u32 left_nr;
521                 left_nr = btrfs_header_nritems(&left->header);
522                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
523                         wret = 1;
524                 } else {
525                         ret = btrfs_cow_block(trans, root, left_buf,
526                                               parent_buf, pslot - 1,
527                                               &left_buf);
528                         left = &left_buf->node;
529                         if (ret)
530                                 wret = 1;
531                         else {
532                                 wret = push_node_left(trans, root,
533                                                       left_buf, mid_buf);
534                         }
535                 }
536                 if (wret < 0)
537                         ret = wret;
538                 if (wret == 0) {
539                         orig_slot += left_nr;
540                         memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
541                                 sizeof(struct btrfs_disk_key));
542                         BUG_ON(list_empty(&parent_buf->dirty));
543                         if (btrfs_header_nritems(&left->header) > orig_slot) {
544                                 path->nodes[level] = left_buf;
545                                 path->slots[level + 1] -= 1;
546                                 path->slots[level] = orig_slot;
547                                 btrfs_block_release(root, mid_buf);
548                         } else {
549                                 orig_slot -=
550                                         btrfs_header_nritems(&left->header);
551                                 path->slots[level] = orig_slot;
552                                 btrfs_block_release(root, left_buf);
553                         }
554                         return 0;
555                 }
556                 btrfs_block_release(root, left_buf);
557         }
558
559         right_buf = read_node_slot(root, parent_buf, pslot + 1);
560         right = &right_buf->node;
561
562         /*
563          * then try to empty the right most buffer into the middle
564          */
565         if (right_buf) {
566                 u32 right_nr;
567                 right_nr = btrfs_header_nritems(&right->header);
568                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
569                         wret = 1;
570                 } else {
571                         ret = btrfs_cow_block(trans, root, right_buf,
572                                               parent_buf, pslot + 1,
573                                               &right_buf);
574                         right = &right_buf->node;
575                         if (ret)
576                                 wret = 1;
577                         else {
578                                 wret = balance_node_right(trans, root,
579                                                 right_buf, mid_buf);
580                         }
581                 }
582                 if (wret < 0)
583                         ret = wret;
584                 if (wret == 0) {
585                         memcpy(&parent->ptrs[pslot + 1].key,
586                                &right->ptrs[0].key,
587                                sizeof(struct btrfs_disk_key));
588                         BUG_ON(list_empty(&parent_buf->dirty));
589                         if (btrfs_header_nritems(&mid->header) <= orig_slot) {
590                                 path->nodes[level] = right_buf;
591                                 path->slots[level + 1] += 1;
592                                 path->slots[level] = orig_slot -
593                                         btrfs_header_nritems(&mid->header);
594                                 btrfs_block_release(root, mid_buf);
595                         } else {
596                                 btrfs_block_release(root, right_buf);
597                         }
598                         return 0;
599                 }
600                 btrfs_block_release(root, right_buf);
601         }
602         return 1;
603 }
604
605 /*
606  * look for key in the tree.  path is filled in with nodes along the way
607  * if key is found, we return zero and you can find the item in the leaf
608  * level of the path (level 0)
609  *
610  * If the key isn't found, the path points to the slot where it should
611  * be inserted, and 1 is returned.  If there are other errors during the
612  * search a negative error number is returned.
613  *
614  * if ins_len > 0, nodes and leaves will be split as we walk down the
615  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
616  * possible)
617  */
618 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
619                       *root, struct btrfs_key *key, struct btrfs_path *p, int
620                       ins_len, int cow)
621 {
622         struct btrfs_buffer *b;
623         struct btrfs_node *c;
624         int slot;
625         int ret;
626         int level;
627
628 again:
629         b = root->node;
630         b->count++;
631         while (b) {
632                 level = btrfs_header_level(&b->node.header);
633                 if (cow) {
634                         int wret;
635                         wret = btrfs_cow_block(trans, root, b,
636                                               p->nodes[level + 1],
637                                               p->slots[level + 1],
638                                               &b);
639                         if (wret) {
640                                 btrfs_block_release(root, b);
641                                 return wret;
642                         }
643                 }
644                 BUG_ON(!cow && ins_len);
645                 c = &b->node;
646                 p->nodes[level] = b;
647                 ret = check_block(root, p, level);
648                 if (ret)
649                         return -1;
650                 ret = bin_search(c, key, &slot);
651                 if (!btrfs_is_leaf(c)) {
652                         if (ret && slot > 0)
653                                 slot -= 1;
654                         p->slots[level] = slot;
655                         if (ins_len > 0 && btrfs_header_nritems(&c->header) >=
656                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
657                                 int sret = split_node(trans, root, p, level);
658                                 BUG_ON(sret > 0);
659                                 if (sret)
660                                         return sret;
661                                 b = p->nodes[level];
662                                 c = &b->node;
663                                 slot = p->slots[level];
664                         } else if (ins_len < 0) {
665                                 int sret = balance_level(trans, root, p,
666                                                          level);
667                                 if (sret)
668                                         return sret;
669                                 b = p->nodes[level];
670                                 if (!b) {
671                                         btrfs_release_path(NULL, p);
672                                         goto again;
673                                 }
674                                 c = &b->node;
675                                 slot = p->slots[level];
676                                 BUG_ON(btrfs_header_nritems(&c->header) == 1);
677                         }
678                         b = read_tree_block(root,
679                                             btrfs_node_blockptr(c, slot),
680                                             btrfs_level_size(root, level - 1));
681                 } else {
682                         struct btrfs_leaf *l = (struct btrfs_leaf *)c;
683                         p->slots[level] = slot;
684                         if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
685                             sizeof(struct btrfs_item) + ins_len) {
686                                 int sret = split_leaf(trans, root, key,
687                                                       p, ins_len, ret == 0);
688                                 BUG_ON(sret > 0);
689                                 if (sret)
690                                         return sret;
691                         }
692                         BUG_ON(root->node->count == 1);
693                         return ret;
694                 }
695         }
696         BUG_ON(root->node->count == 1);
697         return 1;
698 }
699
700 /*
701  * adjust the pointers going up the tree, starting at level
702  * making sure the right key of each node is points to 'key'.
703  * This is used after shifting pointers to the left, so it stops
704  * fixing up pointers when a given leaf/node is not in slot 0 of the
705  * higher levels
706  *
707  * If this fails to write a tree block, it returns -1, but continues
708  * fixing up the blocks in ram so the tree is consistent.
709  */
710 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
711                           *root, struct btrfs_path *path, struct btrfs_disk_key
712                           *key, int level)
713 {
714         int i;
715         int ret = 0;
716         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
717                 struct btrfs_node *t;
718                 int tslot = path->slots[i];
719                 if (!path->nodes[i])
720                         break;
721                 t = &path->nodes[i]->node;
722                 memcpy(&t->ptrs[tslot].key, key, sizeof(*key));
723                 BUG_ON(list_empty(&path->nodes[i]->dirty));
724                 if (tslot != 0)
725                         break;
726         }
727         return ret;
728 }
729
730 /*
731  * try to push data from one node into the next node left in the
732  * tree.
733  *
734  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
735  * error, and > 0 if there was no room in the left hand block.
736  */
737 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
738                           *root, struct btrfs_buffer *dst_buf, struct
739                           btrfs_buffer *src_buf)
740 {
741         struct btrfs_node *src = &src_buf->node;
742         struct btrfs_node *dst = &dst_buf->node;
743         int push_items = 0;
744         int src_nritems;
745         int dst_nritems;
746         int ret = 0;
747
748         src_nritems = btrfs_header_nritems(&src->header);
749         dst_nritems = btrfs_header_nritems(&dst->header);
750         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
751         if (push_items <= 0) {
752                 return 1;
753         }
754
755         if (src_nritems < push_items)
756                 push_items = src_nritems;
757
758         memcpy(dst->ptrs + dst_nritems, src->ptrs,
759                 push_items * sizeof(struct btrfs_key_ptr));
760         if (push_items < src_nritems) {
761                 memmove(src->ptrs, src->ptrs + push_items,
762                         (src_nritems - push_items) *
763                         sizeof(struct btrfs_key_ptr));
764         }
765         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
766         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
767         BUG_ON(list_empty(&src_buf->dirty));
768         BUG_ON(list_empty(&dst_buf->dirty));
769         return ret;
770 }
771
772 /*
773  * try to push data from one node into the next node right in the
774  * tree.
775  *
776  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
777  * error, and > 0 if there was no room in the right hand block.
778  *
779  * this will  only push up to 1/2 the contents of the left node over
780  */
781 static int balance_node_right(struct btrfs_trans_handle *trans, struct
782                               btrfs_root *root, struct btrfs_buffer *dst_buf,
783                               struct btrfs_buffer *src_buf)
784 {
785         struct btrfs_node *src = &src_buf->node;
786         struct btrfs_node *dst = &dst_buf->node;
787         int push_items = 0;
788         int max_push;
789         int src_nritems;
790         int dst_nritems;
791         int ret = 0;
792
793         src_nritems = btrfs_header_nritems(&src->header);
794         dst_nritems = btrfs_header_nritems(&dst->header);
795         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
796         if (push_items <= 0) {
797                 return 1;
798         }
799         max_push = src_nritems / 2 + 1;
800         /* don't try to empty the node */
801         if (max_push >= src_nritems)
802                 return 1;
803         if (max_push < push_items)
804                 push_items = max_push;
805
806         memmove(dst->ptrs + push_items, dst->ptrs,
807                 dst_nritems * sizeof(struct btrfs_key_ptr));
808         memcpy(dst->ptrs, src->ptrs + src_nritems - push_items,
809                 push_items * sizeof(struct btrfs_key_ptr));
810
811         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
812         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
813
814         BUG_ON(list_empty(&src_buf->dirty));
815         BUG_ON(list_empty(&dst_buf->dirty));
816         return ret;
817 }
818
819 /*
820  * helper function to insert a new root level in the tree.
821  * A new node is allocated, and a single item is inserted to
822  * point to the existing root
823  *
824  * returns zero on success or < 0 on failure.
825  */
826 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
827                            *root, struct btrfs_path *path, int level)
828 {
829         struct btrfs_buffer *t;
830         struct btrfs_node *lower;
831         struct btrfs_node *c;
832         struct btrfs_disk_key *lower_key;
833
834         BUG_ON(path->nodes[level]);
835         BUG_ON(path->nodes[level-1] != root->node);
836         t = btrfs_alloc_free_block(trans, root, root->nodesize);
837         c = &t->node;
838         memset(&c->header, 0, sizeof(c->header));
839         btrfs_set_header_nritems(&c->header, 1);
840         btrfs_set_header_level(&c->header, level);
841         btrfs_set_header_bytenr(&c->header, t->bytenr);
842         btrfs_set_header_generation(&c->header, trans->transid);
843         btrfs_set_header_owner(&c->header, root->root_key.objectid);
844         memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
845                sizeof(c->header.fsid));
846         lower = &path->nodes[level-1]->node;
847
848         if (btrfs_is_leaf(lower))
849                 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
850         else
851                 lower_key = &lower->ptrs[0].key;
852         memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key));
853         btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->bytenr);
854         BUG_ON(list_empty(&t->dirty));
855         btrfs_set_node_ptr_generation(c, 0,
856                  btrfs_header_generation(&path->nodes[level - 1]->node.header));
857         /* the super has an extra ref to root->node */
858         btrfs_block_release(root, root->node);
859         root->node = t;
860         t->count++;
861         path->nodes[level] = t;
862         path->slots[level] = 0;
863         return 0;
864 }
865
866 /*
867  * worker function to insert a single pointer in a node.
868  * the node should have enough room for the pointer already
869  *
870  * slot and level indicate where you want the key to go, and
871  * bytenr is the block the key points to.
872  *
873  * returns zero on success and < 0 on any error
874  */
875 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
876                       *root, struct btrfs_path *path, struct btrfs_disk_key
877                       *key, u64 bytenr, int slot, int level)
878 {
879         struct btrfs_node *lower;
880         int nritems;
881
882         BUG_ON(!path->nodes[level]);
883         lower = &path->nodes[level]->node;
884         nritems = btrfs_header_nritems(&lower->header);
885         if (slot > nritems)
886                 BUG();
887         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
888                 BUG();
889         if (slot != nritems) {
890                 memmove(lower->ptrs + slot + 1, lower->ptrs + slot,
891                         (nritems - slot) * sizeof(struct btrfs_key_ptr));
892         }
893         memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key));
894         btrfs_set_node_blockptr(lower, slot, bytenr);
895         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
896         btrfs_set_header_nritems(&lower->header, nritems + 1);
897         BUG_ON(list_empty(&path->nodes[level]->dirty));
898         return 0;
899 }
900
901 /*
902  * split the node at the specified level in path in two.
903  * The path is corrected to point to the appropriate node after the split
904  *
905  * Before splitting this tries to make some room in the node by pushing
906  * left and right, if either one works, it returns right away.
907  *
908  * returns 0 on success and < 0 on failure
909  */
910 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
911                       *root, struct btrfs_path *path, int level)
912 {
913         struct btrfs_buffer *t;
914         struct btrfs_node *c;
915         struct btrfs_buffer *split_buffer;
916         struct btrfs_node *split;
917         int mid;
918         int ret;
919         int wret;
920         u32 c_nritems;
921
922         t = path->nodes[level];
923         c = &t->node;
924         if (t == root->node) {
925                 /* trying to split the root, lets make a new one */
926                 ret = insert_new_root(trans, root, path, level + 1);
927                 if (ret)
928                         return ret;
929         } else {
930                 ret = push_nodes_for_insert(trans, root, path, level);
931                 t = path->nodes[level];
932                 c = &t->node;
933                 if (!ret && btrfs_header_nritems(&c->header) <
934                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
935                         return 0;
936                 if (ret < 0)
937                         return ret;
938         }
939         c_nritems = btrfs_header_nritems(&c->header);
940         split_buffer = btrfs_alloc_free_block(trans, root, root->nodesize);
941         split = &split_buffer->node;
942         btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
943         btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
944         btrfs_set_header_bytenr(&split->header, split_buffer->bytenr);
945         btrfs_set_header_generation(&split->header, trans->transid);
946         btrfs_set_header_owner(&split->header, root->root_key.objectid);
947         memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
948                sizeof(split->header.fsid));
949         mid = (c_nritems + 1) / 2;
950         memcpy(split->ptrs, c->ptrs + mid,
951                 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
952         btrfs_set_header_nritems(&split->header, c_nritems - mid);
953         btrfs_set_header_nritems(&c->header, mid);
954         ret = 0;
955
956         BUG_ON(list_empty(&t->dirty));
957         wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
958                           split_buffer->bytenr, path->slots[level + 1] + 1,
959                           level + 1);
960         if (wret)
961                 ret = wret;
962
963         if (path->slots[level] >= mid) {
964                 path->slots[level] -= mid;
965                 btrfs_block_release(root, t);
966                 path->nodes[level] = split_buffer;
967                 path->slots[level + 1] += 1;
968         } else {
969                 btrfs_block_release(root, split_buffer);
970         }
971         return ret;
972 }
973
974 /*
975  * push some data in the path leaf to the right, trying to free up at
976  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
977  *
978  * returns 1 if the push failed because the other node didn't have enough
979  * room, 0 if everything worked out and < 0 if there were major errors.
980  */
981 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
982                            *root, struct btrfs_path *path, int data_size,
983                            int empty)
984 {
985         struct btrfs_buffer *left_buf = path->nodes[0];
986         struct btrfs_leaf *left = &left_buf->leaf;
987         struct btrfs_leaf *right;
988         struct btrfs_buffer *right_buf;
989         struct btrfs_buffer *upper;
990         int slot;
991         u32 i;
992         int free_space;
993         int push_space = 0;
994         int push_items = 0;
995         struct btrfs_item *item;
996         u32 left_nritems;
997         u32 nr;
998         u32 right_nritems;
999         slot = path->slots[1];
1000         if (!path->nodes[1]) {
1001                 return 1;
1002         }
1003         upper = path->nodes[1];
1004         if (slot >= btrfs_header_nritems(&upper->node.header) - 1) {
1005                 return 1;
1006         }
1007         right_buf = read_tree_block(root,
1008                             btrfs_node_blockptr(&upper->node, slot + 1),
1009                             root->leafsize);
1010         right = &right_buf->leaf;
1011         free_space = btrfs_leaf_free_space(root, right);
1012         if (free_space < data_size + sizeof(struct btrfs_item)) {
1013                 btrfs_block_release(root, right_buf);
1014                 return 1;
1015         }
1016         /* cow and double check */
1017         btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
1018         right = &right_buf->leaf;
1019         free_space = btrfs_leaf_free_space(root, right);
1020         if (free_space < data_size + sizeof(struct btrfs_item)) {
1021                 btrfs_block_release(root, right_buf);
1022                 return 1;
1023         }
1024         left_nritems = btrfs_header_nritems(&left->header);
1025         if (left_nritems == 0) {
1026                 btrfs_block_release(root, right_buf);
1027                 return 1;
1028         }
1029
1030         if (empty)
1031                 nr = 0;
1032         else
1033                 nr = 1;
1034
1035         i = left_nritems - 1;
1036         while (i >= nr) {
1037                 item = left->items + i;
1038                 if (path->slots[0] == i)
1039                         push_space += data_size + sizeof(*item);
1040                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1041                     free_space)
1042                         break;
1043                 push_items++;
1044                 push_space += btrfs_item_size(item) + sizeof(*item);
1045                 if (i == 0)
1046                         break;
1047                 i--;
1048         }
1049         if (push_items == 0) {
1050                 btrfs_block_release(root, right_buf);
1051                 return 1;
1052         }
1053         right_nritems = btrfs_header_nritems(&right->header);
1054         /* push left to right */
1055         push_space = btrfs_item_end(left->items + left_nritems - push_items);
1056         push_space -= leaf_data_end(root, left);
1057         /* make room in the right data area */
1058         memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) -
1059                 push_space, btrfs_leaf_data(right) + leaf_data_end(root, right),
1060                 BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right));
1061         /* copy from the left data area */
1062         memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space,
1063                 btrfs_leaf_data(left) + leaf_data_end(root, left), push_space);
1064         memmove(right->items + push_items, right->items,
1065                 right_nritems * sizeof(struct btrfs_item));
1066         /* copy the items from left to right */
1067         memcpy(right->items, left->items + left_nritems - push_items,
1068                 push_items * sizeof(struct btrfs_item));
1069
1070         /* update the item pointers */
1071         right_nritems += push_items;
1072         btrfs_set_header_nritems(&right->header, right_nritems);
1073         push_space = BTRFS_LEAF_DATA_SIZE(root);
1074         for (i = 0; i < right_nritems; i++) {
1075                 btrfs_set_item_offset(right->items + i, push_space -
1076                                       btrfs_item_size(right->items + i));
1077                 push_space = btrfs_item_offset(right->items + i);
1078         }
1079         left_nritems -= push_items;
1080         btrfs_set_header_nritems(&left->header, left_nritems);
1081
1082         BUG_ON(list_empty(&left_buf->dirty));
1083         BUG_ON(list_empty(&right_buf->dirty));
1084         memcpy(&upper->node.ptrs[slot + 1].key,
1085                 &right->items[0].key, sizeof(struct btrfs_disk_key));
1086         BUG_ON(list_empty(&upper->dirty));
1087
1088         /* then fixup the leaf pointer in the path */
1089         if (path->slots[0] >= left_nritems) {
1090                 path->slots[0] -= left_nritems;
1091                 btrfs_block_release(root, path->nodes[0]);
1092                 path->nodes[0] = right_buf;
1093                 path->slots[1] += 1;
1094         } else {
1095                 btrfs_block_release(root, right_buf);
1096         }
1097         return 0;
1098 }
1099 /*
1100  * push some data in the path leaf to the left, trying to free up at
1101  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1102  */
1103 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1104                           *root, struct btrfs_path *path, int data_size,
1105                           int empty)
1106 {
1107         struct btrfs_buffer *right_buf = path->nodes[0];
1108         struct btrfs_leaf *right = &right_buf->leaf;
1109         struct btrfs_buffer *t;
1110         struct btrfs_leaf *left;
1111         int slot;
1112         int i;
1113         int free_space;
1114         int push_space = 0;
1115         int push_items = 0;
1116         struct btrfs_item *item;
1117         u32 old_left_nritems;
1118         u32 right_nritems;
1119         u32 nr;
1120         int ret = 0;
1121         int wret;
1122         slot = path->slots[1];
1123         if (slot == 0) {
1124                 return 1;
1125         }
1126         if (!path->nodes[1]) {
1127                 return 1;
1128         }
1129         right_nritems = btrfs_header_nritems(&right->header);
1130         if (right_nritems == 0) {
1131                 return 1;
1132         }
1133
1134         t = read_tree_block(root,
1135                     btrfs_node_blockptr(&path->nodes[1]->node, slot - 1),
1136                     root->leafsize);
1137         left = &t->leaf;
1138         free_space = btrfs_leaf_free_space(root, left);
1139         if (free_space < data_size + sizeof(struct btrfs_item)) {
1140                 btrfs_block_release(root, t);
1141                 return 1;
1142         }
1143
1144         /* cow and double check */
1145         btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
1146         left = &t->leaf;
1147         free_space = btrfs_leaf_free_space(root, left);
1148         if (free_space < data_size + sizeof(struct btrfs_item)) {
1149                 btrfs_block_release(root, t);
1150                 return 1;
1151         }
1152         if (empty)
1153                 nr = right_nritems;
1154         else
1155                 nr = right_nritems - 1;
1156
1157         for (i = 0; i < nr; i++) {
1158                 item = right->items + i;
1159                 if (path->slots[0] == i)
1160                         push_space += data_size + sizeof(*item);
1161                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1162                     free_space)
1163                         break;
1164                 push_items++;
1165                 push_space += btrfs_item_size(item) + sizeof(*item);
1166         }
1167         if (push_items == 0) {
1168                 btrfs_block_release(root, t);
1169                 return 1;
1170         }
1171         /* push data from right to left */
1172         memcpy(left->items + btrfs_header_nritems(&left->header),
1173                 right->items, push_items * sizeof(struct btrfs_item));
1174         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1175                      btrfs_item_offset(right->items + push_items -1);
1176         memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space,
1177                 btrfs_leaf_data(right) +
1178                 btrfs_item_offset(right->items + push_items - 1),
1179                 push_space);
1180         old_left_nritems = btrfs_header_nritems(&left->header);
1181         BUG_ON(old_left_nritems < 0);
1182
1183         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1184                 u32 ioff = btrfs_item_offset(left->items + i);
1185                 btrfs_set_item_offset(left->items + i, ioff -
1186                                      (BTRFS_LEAF_DATA_SIZE(root) -
1187                                       btrfs_item_offset(left->items +
1188                                                         old_left_nritems - 1)));
1189         }
1190         btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
1191         /* fixup right node */
1192         if (push_items < right_nritems) {
1193                 push_space = btrfs_item_offset(right->items + push_items - 1) -
1194                                                leaf_data_end(root, right);
1195                 memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1196                         push_space, btrfs_leaf_data(right) +
1197                         leaf_data_end(root, right), push_space);
1198                 memmove(right->items, right->items + push_items,
1199                         (right_nritems - push_items) *
1200                         sizeof(struct btrfs_item));
1201         }
1202         right_nritems -= push_items;
1203         btrfs_set_header_nritems(&right->header, right_nritems);
1204         push_space = BTRFS_LEAF_DATA_SIZE(root);
1205         for (i = 0; i < right_nritems; i++) {
1206                 btrfs_set_item_offset(right->items + i, push_space -
1207                                       btrfs_item_size(right->items + i));
1208                 push_space = btrfs_item_offset(right->items + i);
1209         }
1210
1211         BUG_ON(list_empty(&t->dirty));
1212         BUG_ON(list_empty(&right_buf->dirty));
1213
1214         wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1215         if (wret)
1216                 ret = wret;
1217
1218         /* then fixup the leaf pointer in the path */
1219         if (path->slots[0] < push_items) {
1220                 path->slots[0] += old_left_nritems;
1221                 btrfs_block_release(root, path->nodes[0]);
1222                 path->nodes[0] = t;
1223                 path->slots[1] -= 1;
1224         } else {
1225                 btrfs_block_release(root, t);
1226                 path->slots[0] -= push_items;
1227         }
1228         BUG_ON(path->slots[0] < 0);
1229         return ret;
1230 }
1231
1232 /*
1233  * split the path's leaf in two, making sure there is at least data_size
1234  * available for the resulting leaf level of the path.
1235  *
1236  * returns 0 if all went well and < 0 on failure.
1237  */
1238 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1239                       *root, struct btrfs_key *ins_key,
1240                       struct btrfs_path *path, int data_size, int extend)
1241 {
1242         struct btrfs_buffer *l_buf;
1243         struct btrfs_leaf *l;
1244         u32 nritems;
1245         int mid;
1246         int slot;
1247         struct btrfs_leaf *right;
1248         struct btrfs_buffer *right_buffer;
1249         int space_needed = data_size + sizeof(struct btrfs_item);
1250         int data_copy_size;
1251         int rt_data_off;
1252         int i;
1253         int ret = 0;
1254         int wret;
1255         int double_split;
1256         int num_doubles = 0;
1257         struct btrfs_disk_key disk_key;
1258
1259         if (extend)
1260                 space_needed = data_size;
1261         /* first try to make some room by pushing left and right */
1262         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1263                 wret = push_leaf_right(trans, root, path, data_size, 0);
1264                 if (wret < 0) {
1265                         return wret;
1266                 }
1267                 if (wret) {
1268                         wret = push_leaf_left(trans, root, path, data_size, 0);
1269                         if (wret < 0)
1270                                 return wret;
1271                 }
1272                 l_buf = path->nodes[0];
1273                 l = &l_buf->leaf;
1274
1275                 /* did the pushes work? */
1276                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1277                         return 0;
1278         }
1279         if (!path->nodes[1]) {
1280                 ret = insert_new_root(trans, root, path, 1);
1281                 if (ret)
1282                         return ret;
1283         }
1284 again:
1285         double_split = 0;
1286         l_buf = path->nodes[0];
1287         l = &l_buf->leaf;
1288         slot = path->slots[0];
1289         nritems = btrfs_header_nritems(&l->header);
1290         mid = (nritems + 1)/ 2;
1291
1292         right_buffer = btrfs_alloc_free_block(trans, root, root->leafsize);
1293         right = &right_buffer->leaf;
1294         memset(&right->header, 0, sizeof(right->header));
1295         btrfs_set_header_bytenr(&right->header, right_buffer->bytenr);
1296         btrfs_set_header_level(&right->header, 0);
1297         btrfs_set_header_owner(&right->header, root->root_key.objectid);
1298         btrfs_set_header_generation(&right->header, trans->transid);
1299         memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1300                sizeof(right->header.fsid));
1301         if (mid <= slot) {
1302                 if (nritems == 1 ||
1303                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1304                         BTRFS_LEAF_DATA_SIZE(root)) {
1305                         if (slot >= nritems) {
1306                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1307                                 btrfs_set_header_nritems(&right->header, 0);
1308                                 wret = insert_ptr(trans, root, path,
1309                                                   &disk_key, right_buffer->bytenr,
1310                                                   path->slots[1] + 1, 1);
1311                                 if (wret)
1312                                         ret = wret;
1313                                 btrfs_block_release(root, path->nodes[0]);
1314                                 path->nodes[0] = right_buffer;
1315                                 path->slots[0] = 0;
1316                                 path->slots[1] += 1;
1317                                 return ret;
1318                         }
1319                         mid = slot;
1320                         if (mid != nritems &&
1321                             leaf_space_used(l, mid, nritems - mid) +
1322                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1323                                 double_split = 1;
1324                         }
1325                 }
1326         } else {
1327                 if (leaf_space_used(l, 0, mid) + space_needed >
1328                         BTRFS_LEAF_DATA_SIZE(root)) {
1329                         if (!extend && slot == 0) {
1330                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1331                                 btrfs_set_header_nritems(&right->header, 0);
1332                                 wret = insert_ptr(trans, root, path,
1333                                                   &disk_key,
1334                                                   right_buffer->bytenr,
1335                                                   path->slots[1], 1);
1336                                 if (wret)
1337                                         ret = wret;
1338                                 btrfs_block_release(root, path->nodes[0]);
1339                                 path->nodes[0] = right_buffer;
1340                                 path->slots[0] = 0;
1341                                 if (path->slots[1] == 0) {
1342                                         wret = fixup_low_keys(trans, root,
1343                                                    path, &disk_key, 1);
1344                                         if (wret)
1345                                                 ret = wret;
1346                                 }
1347                                 return ret;
1348                         } else if (extend && slot == 0) {
1349                                 mid = 1;
1350                         } else {
1351                                 mid = slot;
1352                                 if (mid != nritems &&
1353                                     leaf_space_used(l, mid, nritems - mid) +
1354                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
1355                                         double_split = 1;
1356                                 }
1357                         }
1358                 }
1359         }
1360         nritems = nritems - mid;
1361         btrfs_set_header_nritems(&right->header, nritems);
1362         data_copy_size = btrfs_item_end(l->items +  mid) -
1363                          leaf_data_end(root, l);
1364         memcpy(right->items, l->items + mid,
1365                nritems * sizeof(struct btrfs_item));
1366         memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1367                 data_copy_size, btrfs_leaf_data(l) +
1368                 leaf_data_end(root, l), data_copy_size);
1369         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1370                       btrfs_item_end(l->items + mid);
1371         for (i = 0; i < nritems; i++) {
1372                 u32 ioff = btrfs_item_offset(right->items + i);
1373                 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1374         }
1375
1376         btrfs_set_header_nritems(&l->header, mid);
1377         ret = 0;
1378         wret = insert_ptr(trans, root, path, &right->items[0].key,
1379                           right_buffer->bytenr, path->slots[1] + 1, 1);
1380         if (wret)
1381                 ret = wret;
1382
1383         BUG_ON(list_empty(&right_buffer->dirty));
1384         BUG_ON(list_empty(&l_buf->dirty));
1385         BUG_ON(path->slots[0] != slot);
1386         if (mid <= slot) {
1387                 btrfs_block_release(root, path->nodes[0]);
1388                 path->nodes[0] = right_buffer;
1389                 path->slots[0] -= mid;
1390                 path->slots[1] += 1;
1391         } else
1392                 btrfs_block_release(root, right_buffer);
1393
1394         BUG_ON(path->slots[0] < 0);
1395         if (double_split) {
1396                 BUG_ON(num_doubles != 0);
1397                 num_doubles++;
1398                 goto again;
1399         }
1400         return ret;
1401 }
1402 /*
1403  * Given a key and some data, insert an item into the tree.
1404  * This does all the path init required, making room in the tree if needed.
1405  */
1406 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1407                             *root, struct btrfs_path *path, struct btrfs_key
1408                             *cpu_key, u32 data_size)
1409 {
1410         int ret = 0;
1411         int slot;
1412         int slot_orig;
1413         struct btrfs_leaf *leaf;
1414         struct btrfs_buffer *leaf_buf;
1415         u32 nritems;
1416         unsigned int data_end;
1417         struct btrfs_disk_key disk_key;
1418
1419         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1420
1421         /* create a root if there isn't one */
1422         if (!root->node)
1423                 BUG();
1424         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1425         if (ret == 0) {
1426                 return -EEXIST;
1427         }
1428         if (ret < 0)
1429                 goto out;
1430
1431         slot_orig = path->slots[0];
1432         leaf_buf = path->nodes[0];
1433         leaf = &leaf_buf->leaf;
1434
1435         nritems = btrfs_header_nritems(&leaf->header);
1436         data_end = leaf_data_end(root, leaf);
1437
1438         if (btrfs_leaf_free_space(root, leaf) <
1439             sizeof(struct btrfs_item) + data_size)
1440                 BUG();
1441
1442         slot = path->slots[0];
1443         BUG_ON(slot < 0);
1444         if (slot != nritems) {
1445                 int i;
1446                 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1447
1448                 /*
1449                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
1450                  */
1451                 /* first correct the data pointers */
1452                 for (i = slot; i < nritems; i++) {
1453                         u32 ioff = btrfs_item_offset(leaf->items + i);
1454                         btrfs_set_item_offset(leaf->items + i,
1455                                               ioff - data_size);
1456                 }
1457
1458                 /* shift the items */
1459                 memmove(leaf->items + slot + 1, leaf->items + slot,
1460                         (nritems - slot) * sizeof(struct btrfs_item));
1461
1462                 /* shift the data */
1463                 memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1464                         btrfs_leaf_data(leaf) +
1465                         data_end, old_data - data_end);
1466                 data_end = old_data;
1467         }
1468         /* setup the item for the new data */
1469         memcpy(&leaf->items[slot].key, &disk_key,
1470                 sizeof(struct btrfs_disk_key));
1471         btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1472         btrfs_set_item_size(leaf->items + slot, data_size);
1473         btrfs_set_header_nritems(&leaf->header, nritems + 1);
1474
1475         ret = 0;
1476         if (slot == 0)
1477                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1478
1479         BUG_ON(list_empty(&leaf_buf->dirty));
1480         if (btrfs_leaf_free_space(root, leaf) < 0)
1481                 BUG();
1482         check_leaf(root, path, 0);
1483 out:
1484         return ret;
1485 }
1486
1487 /*
1488  * Given a key and some data, insert an item into the tree.
1489  * This does all the path init required, making room in the tree if needed.
1490  */
1491 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1492                       *root, struct btrfs_key *cpu_key, void *data, u32
1493                       data_size)
1494 {
1495         int ret = 0;
1496         struct btrfs_path path;
1497         u8 *ptr;
1498
1499         btrfs_init_path(&path);
1500         ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size);
1501         if (!ret) {
1502                 ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8);
1503                 memcpy(ptr, data, data_size);
1504         }
1505         btrfs_release_path(root, &path);
1506         return ret;
1507 }
1508
1509 /*
1510  * delete the pointer from a given node.
1511  *
1512  * If the delete empties a node, the node is removed from the tree,
1513  * continuing all the way the root if required.  The root is converted into
1514  * a leaf if all the nodes are emptied.
1515  */
1516 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1517                    struct btrfs_path *path, int level, int slot)
1518 {
1519         struct btrfs_node *node;
1520         struct btrfs_buffer *parent = path->nodes[level];
1521         u32 nritems;
1522         int ret = 0;
1523         int wret;
1524
1525         node = &parent->node;
1526         nritems = btrfs_header_nritems(&node->header);
1527         if (slot != nritems -1) {
1528                 memmove(node->ptrs + slot, node->ptrs + slot + 1,
1529                         sizeof(struct btrfs_key_ptr) * (nritems - slot - 1));
1530         }
1531         nritems--;
1532         btrfs_set_header_nritems(&node->header, nritems);
1533         if (nritems == 0 && parent == root->node) {
1534                 BUG_ON(btrfs_header_level(&root->node->node.header) != 1);
1535                 /* just turn the root into a leaf and break */
1536                 btrfs_set_header_level(&root->node->node.header, 0);
1537         } else if (slot == 0) {
1538                 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1539                                       level + 1);
1540                 if (wret)
1541                         ret = wret;
1542         }
1543         BUG_ON(list_empty(&parent->dirty));
1544         return ret;
1545 }
1546
1547 /*
1548  * delete the item at the leaf level in path.  If that empties
1549  * the leaf, remove it from the tree
1550  */
1551 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1552                    struct btrfs_path *path)
1553 {
1554         int slot;
1555         struct btrfs_leaf *leaf;
1556         struct btrfs_buffer *leaf_buf;
1557         int doff;
1558         int dsize;
1559         int ret = 0;
1560         int wret;
1561         u32 nritems;
1562
1563         leaf_buf = path->nodes[0];
1564         leaf = &leaf_buf->leaf;
1565         slot = path->slots[0];
1566         doff = btrfs_item_offset(leaf->items + slot);
1567         dsize = btrfs_item_size(leaf->items + slot);
1568         nritems = btrfs_header_nritems(&leaf->header);
1569
1570         if (slot != nritems - 1) {
1571                 int i;
1572                 int data_end = leaf_data_end(root, leaf);
1573                 memmove(btrfs_leaf_data(leaf) + data_end + dsize,
1574                         btrfs_leaf_data(leaf) + data_end,
1575                         doff - data_end);
1576                 for (i = slot + 1; i < nritems; i++) {
1577                         u32 ioff = btrfs_item_offset(leaf->items + i);
1578                         btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1579                 }
1580                 memmove(leaf->items + slot, leaf->items + slot + 1,
1581                         sizeof(struct btrfs_item) *
1582                         (nritems - slot - 1));
1583         }
1584         btrfs_set_header_nritems(&leaf->header, nritems - 1);
1585         nritems--;
1586         /* delete the leaf if we've emptied it */
1587         if (nritems == 0) {
1588                 if (leaf_buf == root->node) {
1589                         btrfs_set_header_level(&leaf->header, 0);
1590                         BUG_ON(list_empty(&leaf_buf->dirty));
1591                 } else {
1592                         clean_tree_block(trans, root, leaf_buf);
1593                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
1594                         if (wret)
1595                                 ret = wret;
1596                         wret = btrfs_free_extent(trans, root,
1597                                                  leaf_buf->bytenr,
1598                                                  leaf_buf->size, 1);
1599                         if (wret)
1600                                 ret = wret;
1601                 }
1602         } else {
1603                 int used = leaf_space_used(leaf, 0, nritems);
1604                 if (slot == 0) {
1605                         wret = fixup_low_keys(trans, root, path,
1606                                               &leaf->items[0].key, 1);
1607                         if (wret)
1608                                 ret = wret;
1609                 }
1610                 BUG_ON(list_empty(&leaf_buf->dirty));
1611
1612                 /* delete the leaf if it is mostly empty */
1613                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1614                         /* push_leaf_left fixes the path.
1615                          * make sure the path still points to our leaf
1616                          * for possible call to del_ptr below
1617                          */
1618                         slot = path->slots[1];
1619                         leaf_buf->count++;
1620                         wret = push_leaf_right(trans, root, path, 1, 1);
1621                         if (wret < 0)
1622                                 ret = wret;
1623                         if (path->nodes[0] == leaf_buf &&
1624                             btrfs_header_nritems(&leaf->header)) {
1625                                 wret = push_leaf_left(trans, root, path, 1, 1);
1626                                 if (wret < 0)
1627                                         ret = wret;
1628                         }
1629                         if (btrfs_header_nritems(&leaf->header) == 0) {
1630                                 u64 bytenr = leaf_buf->bytenr;
1631                                 clean_tree_block(trans, root, leaf_buf);
1632                                 wret = del_ptr(trans, root, path, 1, slot);
1633                                 if (wret)
1634                                         ret = wret;
1635                                 wret = btrfs_free_extent(trans, root, bytenr,
1636                                                  leaf_buf->size, 1);
1637                                 btrfs_block_release(root, leaf_buf);
1638                                 if (wret)
1639                                         ret = wret;
1640                         } else {
1641                                 btrfs_block_release(root, leaf_buf);
1642                         }
1643                 }
1644         }
1645         return ret;
1646 }
1647 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1648                         struct btrfs_root *root,
1649                         struct btrfs_path *path,
1650                         u32 new_size, int from_end)
1651 {
1652         int ret = 0;
1653         int slot;
1654         int slot_orig;
1655         struct btrfs_leaf *leaf;
1656         struct btrfs_item *item;
1657         u32 nritems;
1658         unsigned int data_end;
1659         unsigned int old_data_start;
1660         unsigned int old_size;
1661         unsigned int size_diff;
1662         int i;
1663
1664         slot_orig = path->slots[0];
1665         leaf = &path->nodes[0]->leaf;
1666         slot = path->slots[0];
1667
1668         old_size = btrfs_item_size(leaf->items + slot);
1669         if (old_size == new_size)
1670                 return 0;
1671
1672         nritems = btrfs_header_nritems(&leaf->header);
1673         data_end = leaf_data_end(root, leaf);
1674
1675         old_data_start = btrfs_item_offset(leaf->items + slot);
1676
1677         size_diff = old_size - new_size;
1678
1679         BUG_ON(slot < 0);
1680         BUG_ON(slot >= nritems);
1681
1682         /*
1683          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1684          */
1685         /* first correct the data pointers */
1686         for (i = slot; i < nritems; i++) {
1687                 u32 ioff;
1688                 item = leaf->items + i;
1689                 ioff = btrfs_item_offset(item);
1690                 btrfs_set_item_offset(item, ioff + size_diff);
1691         }
1692
1693         /* shift the data */
1694         if (from_end) {
1695                 memmove(btrfs_leaf_data(leaf) + data_end + size_diff,
1696                         btrfs_leaf_data(leaf) + data_end,
1697                         old_data_start + new_size - data_end);
1698         } else {
1699                 struct btrfs_disk_key *disk_key;
1700                 u64 offset;
1701
1702                 disk_key = &leaf->items[slot].key;
1703                 if (btrfs_disk_key_type(disk_key) == BTRFS_EXTENT_DATA_KEY) {
1704                         char *ptr;
1705                         struct btrfs_file_extent_item *fi;
1706
1707                         fi = btrfs_item_ptr(leaf, slot,
1708                                             struct btrfs_file_extent_item);
1709                         fi = (struct btrfs_file_extent_item *)(
1710                              (unsigned long)fi - size_diff);
1711
1712                         if (btrfs_file_extent_type(fi) ==
1713                             BTRFS_FILE_EXTENT_INLINE) {
1714                                 ptr = btrfs_item_ptr(leaf, slot, char);
1715                                 memmove(ptr, (char *)fi,
1716                                         offsetof(struct btrfs_file_extent_item,
1717                                                  disk_bytenr));
1718                         }
1719                 }
1720
1721                 memmove(btrfs_leaf_data(leaf) + data_end + size_diff,
1722                         btrfs_leaf_data(leaf) + data_end,
1723                         old_data_start - data_end);
1724
1725                 offset = btrfs_disk_key_offset(disk_key);
1726                 btrfs_set_disk_key_offset(disk_key, offset + size_diff);
1727                 if (slot == 0)
1728                         fixup_low_keys(trans, root, path, disk_key, 1);
1729         }
1730
1731         item = leaf->items + slot;
1732         btrfs_set_item_size(item, new_size);
1733         BUG_ON(list_empty(&path->nodes[0]->dirty));
1734
1735         ret = 0;
1736         if (btrfs_leaf_free_space(root, leaf) < 0) {
1737                 btrfs_print_leaf(root, leaf);
1738                 BUG();
1739         }
1740         return ret;
1741 }
1742
1743 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1744                       *root, struct btrfs_path *path, u32 data_size)
1745 {
1746         int ret = 0;
1747         int slot;
1748         int slot_orig;
1749         struct btrfs_leaf *leaf;
1750         struct btrfs_buffer *leaf_buf;
1751         u32 nritems;
1752         unsigned int data_end;
1753         unsigned int old_data;
1754         unsigned int old_size;
1755         int i;
1756
1757         slot_orig = path->slots[0];
1758         leaf_buf = path->nodes[0];
1759         leaf = &leaf_buf->leaf;
1760
1761         nritems = btrfs_header_nritems(&leaf->header);
1762         data_end = leaf_data_end(root, leaf);
1763
1764         if (btrfs_leaf_free_space(root, leaf) < data_size)
1765                 BUG();
1766         slot = path->slots[0];
1767         old_data = btrfs_item_end(leaf->items + slot);
1768
1769         BUG_ON(slot < 0);
1770         BUG_ON(slot >= nritems);
1771
1772         /*
1773          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1774          */
1775         /* first correct the data pointers */
1776         for (i = slot; i < nritems; i++) {
1777                 u32 ioff = btrfs_item_offset(leaf->items + i);
1778                 btrfs_set_item_offset(leaf->items + i,
1779                                       ioff - data_size);
1780         }
1781         /* shift the data */
1782         memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1783                 btrfs_leaf_data(leaf) + data_end, old_data - data_end);
1784         data_end = old_data;
1785         old_size = btrfs_item_size(leaf->items + slot);
1786         btrfs_set_item_size(leaf->items + slot, old_size + data_size);
1787
1788         ret = 0;
1789         if (btrfs_leaf_free_space(root, leaf) < 0)
1790                 BUG();
1791         check_leaf(root, path, 0);
1792         return ret;
1793 }
1794
1795 /*
1796  * walk up the tree as far as required to find the next leaf.
1797  * returns 0 if it found something or 1 if there are no greater leaves.
1798  * returns < 0 on io errors.
1799  */
1800 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1801 {
1802         int slot;
1803         int level = 1;
1804         u64 bytenr;
1805         struct btrfs_buffer *c;
1806         struct btrfs_buffer *next = NULL;
1807
1808         while(level < BTRFS_MAX_LEVEL) {
1809                 if (!path->nodes[level])
1810                         return 1;
1811                 slot = path->slots[level] + 1;
1812                 c = path->nodes[level];
1813                 if (slot >= btrfs_header_nritems(&c->node.header)) {
1814                         level++;
1815                         continue;
1816                 }
1817                 bytenr = btrfs_node_blockptr(&c->node, slot);
1818                 if (next)
1819                         btrfs_block_release(root, next);
1820                 next = read_tree_block(root, bytenr,
1821                                        btrfs_level_size(root, level - 1));
1822                 break;
1823         }
1824         path->slots[level] = slot;
1825         while(1) {
1826                 level--;
1827                 c = path->nodes[level];
1828                 btrfs_block_release(root, c);
1829                 path->nodes[level] = next;
1830                 path->slots[level] = 0;
1831                 if (!level)
1832                         break;
1833                 next = read_tree_block(root,
1834                                        btrfs_node_blockptr(&next->node, 0),
1835                                        btrfs_level_size(root, level - 1));
1836         }
1837         check_leaf(root, path, 0);
1838         return 0;
1839 }