transaction handles everywhere
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7 #include "print-tree.h"
8 #include "transaction.h"
9
10 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
11                             *orig_root, u64 num_blocks, u64 search_start, u64
12                             search_end, struct btrfs_key *ins);
13 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
14                                  btrfs_root *extent_root);
15 static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root
16                        *extent_root);
17
18 /*
19  * pending extents are blocks that we're trying to allocate in the extent
20  * map while trying to grow the map because of other allocations.  To avoid
21  * recursing, they are tagged in the radix tree and cleaned up after
22  * other allocations are done.  The pending tag is also used in the same
23  * manner for deletes.
24  */
25 #define CTREE_EXTENT_PENDING_DEL 0
26
27 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
28                          *root, u64 blocknr)
29 {
30         struct btrfs_path path;
31         int ret;
32         struct btrfs_key key;
33         struct btrfs_leaf *l;
34         struct btrfs_extent_item *item;
35         struct btrfs_key ins;
36         u32 refs;
37
38         find_free_extent(trans, root->extent_root, 0, 0, (u64)-1, &ins);
39         btrfs_init_path(&path);
40         key.objectid = blocknr;
41         key.flags = 0;
42         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
43         key.offset = 1;
44         ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 1);
45         if (ret != 0)
46                 BUG();
47         BUG_ON(ret != 0);
48         l = &path.nodes[0]->leaf;
49         item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
50         refs = btrfs_extent_refs(item);
51         btrfs_set_extent_refs(item, refs + 1);
52
53         BUG_ON(list_empty(&path.nodes[0]->dirty));
54         btrfs_release_path(root->extent_root, &path);
55         finish_current_insert(trans, root->extent_root);
56         run_pending(trans, root->extent_root);
57         return 0;
58 }
59
60 static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
61                             *root, u64 blocknr, u32 *refs)
62 {
63         struct btrfs_path path;
64         int ret;
65         struct btrfs_key key;
66         struct btrfs_leaf *l;
67         struct btrfs_extent_item *item;
68         btrfs_init_path(&path);
69         key.objectid = blocknr;
70         key.offset = 1;
71         key.flags = 0;
72         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
73         ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 0);
74         if (ret != 0)
75                 BUG();
76         l = &path.nodes[0]->leaf;
77         item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
78         *refs = btrfs_extent_refs(item);
79         btrfs_release_path(root->extent_root, &path);
80         return 0;
81 }
82
83 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
84                   struct btrfs_buffer *buf)
85 {
86         u64 blocknr;
87         int i;
88
89         if (!root->ref_cows)
90                 return 0;
91         if (btrfs_is_leaf(&buf->node))
92                 return 0;
93
94         for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) {
95                 blocknr = btrfs_node_blockptr(&buf->node, i);
96                 inc_block_ref(trans, root, blocknr);
97         }
98         return 0;
99 }
100
101 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
102                                btrfs_root *root)
103 {
104         unsigned long gang[8];
105         u64 first = 0;
106         int ret;
107         int i;
108
109         while(1) {
110                 ret = radix_tree_gang_lookup(&root->pinned_radix,
111                                                  (void **)gang, 0,
112                                                  ARRAY_SIZE(gang));
113                 if (!ret)
114                         break;
115                 if (!first)
116                         first = gang[0];
117                 for (i = 0; i < ret; i++) {
118                         radix_tree_delete(&root->pinned_radix, gang[i]);
119                 }
120         }
121         root->last_insert.objectid = first;
122         root->last_insert.offset = 0;
123         return 0;
124 }
125
126 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
127                                  btrfs_root *extent_root)
128 {
129         struct btrfs_key ins;
130         struct btrfs_extent_item extent_item;
131         int i;
132         int ret;
133
134         btrfs_set_extent_refs(&extent_item, 1);
135         btrfs_set_extent_owner(&extent_item,
136                 btrfs_header_parentid(&extent_root->node->node.header));
137         ins.offset = 1;
138         ins.flags = 0;
139         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
140
141         for (i = 0; i < extent_root->current_insert.flags; i++) {
142                 ins.objectid = extent_root->current_insert.objectid + i;
143                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
144                                         sizeof(extent_item));
145                 BUG_ON(ret);
146         }
147         extent_root->current_insert.offset = 0;
148         return 0;
149 }
150
151 /*
152  * remove an extent from the root, returns 0 on success
153  */
154 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
155                          *root, u64 blocknr, u64 num_blocks, int pin)
156 {
157         struct btrfs_path path;
158         struct btrfs_key key;
159         struct btrfs_root *extent_root = root->extent_root;
160         int ret;
161         struct btrfs_extent_item *ei;
162         struct btrfs_key ins;
163         u32 refs;
164
165         BUG_ON(pin && num_blocks != 1);
166         key.objectid = blocknr;
167         key.flags = 0;
168         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
169         key.offset = num_blocks;
170
171         find_free_extent(trans, root, 0, 0, (u64)-1, &ins);
172         btrfs_init_path(&path);
173         ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1);
174         if (ret) {
175                 printf("failed to find %Lu\n", key.objectid);
176                 btrfs_print_tree(extent_root, extent_root->node);
177                 printf("failed to find %Lu\n", key.objectid);
178                 BUG();
179         }
180         ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0],
181                             struct btrfs_extent_item);
182         BUG_ON(ei->refs == 0);
183         refs = btrfs_extent_refs(ei) - 1;
184         btrfs_set_extent_refs(ei, refs);
185         if (refs == 0) {
186                 if (pin) {
187                         int err;
188                         radix_tree_preload(GFP_KERNEL);
189                         err = radix_tree_insert(&extent_root->pinned_radix,
190                                           blocknr, (void *)blocknr);
191                         BUG_ON(err);
192                         radix_tree_preload_end();
193                 }
194                 ret = btrfs_del_item(trans, extent_root, &path);
195                 if (!pin && extent_root->last_insert.objectid > blocknr)
196                         extent_root->last_insert.objectid = blocknr;
197                 if (ret)
198                         BUG();
199         }
200         btrfs_release_path(extent_root, &path);
201         finish_current_insert(trans, extent_root);
202         return ret;
203 }
204
205 /*
206  * find all the blocks marked as pending in the radix tree and remove
207  * them from the extent map
208  */
209 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
210                                btrfs_root *extent_root)
211 {
212         int ret;
213         struct btrfs_buffer *gang[4];
214         int i;
215
216         while(1) {
217                 ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix,
218                                                  (void **)gang, 0,
219                                                  ARRAY_SIZE(gang),
220                                                  CTREE_EXTENT_PENDING_DEL);
221                 if (!ret)
222                         break;
223                 for (i = 0; i < ret; i++) {
224                         ret = __free_extent(trans, extent_root,
225                                             gang[i]->blocknr, 1, 1);
226                         radix_tree_tag_clear(&extent_root->cache_radix,
227                                                 gang[i]->blocknr,
228                                                 CTREE_EXTENT_PENDING_DEL);
229                         btrfs_block_release(extent_root, gang[i]);
230                 }
231         }
232         return 0;
233 }
234
235 static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root
236                        *extent_root)
237 {
238         while(radix_tree_tagged(&extent_root->cache_radix,
239                                 CTREE_EXTENT_PENDING_DEL))
240                 del_pending_extents(trans, extent_root);
241         return 0;
242 }
243
244
245 /*
246  * remove an extent from the root, returns 0 on success
247  */
248 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
249                       *root, u64 blocknr, u64 num_blocks, int pin)
250 {
251         struct btrfs_root *extent_root = root->extent_root;
252         struct btrfs_buffer *t;
253         int pending_ret;
254         int ret;
255
256         if (root == extent_root) {
257                 t = find_tree_block(root, blocknr);
258                 radix_tree_tag_set(&root->cache_radix, blocknr,
259                                    CTREE_EXTENT_PENDING_DEL);
260                 return 0;
261         }
262         ret = __free_extent(trans, root, blocknr, num_blocks, pin);
263         pending_ret = run_pending(trans, root->extent_root);
264         return ret ? ret : pending_ret;
265 }
266
267 /*
268  * walks the btree of allocated extents and find a hole of a given size.
269  * The key ins is changed to record the hole:
270  * ins->objectid == block start
271  * ins->flags = BTRFS_EXTENT_ITEM_KEY
272  * ins->offset == number of blocks
273  * Any available blocks before search_start are skipped.
274  */
275 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
276                             *orig_root, u64 num_blocks, u64 search_start, u64
277                             search_end, struct btrfs_key *ins)
278 {
279         struct btrfs_path path;
280         struct btrfs_key key;
281         int ret;
282         u64 hole_size = 0;
283         int slot = 0;
284         u64 last_block;
285         u64 test_block;
286         int start_found;
287         struct btrfs_leaf *l;
288         struct btrfs_root * root = orig_root->extent_root;
289         int total_needed = num_blocks;
290
291         total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3;
292         if (root->last_insert.objectid > search_start)
293                 search_start = root->last_insert.objectid;
294
295         ins->flags = 0;
296         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
297
298 check_failed:
299         btrfs_init_path(&path);
300         ins->objectid = search_start;
301         ins->offset = 0;
302         start_found = 0;
303         ret = btrfs_search_slot(trans, root, ins, &path, 0, 0);
304         if (ret < 0)
305                 goto error;
306
307         if (path.slots[0] > 0)
308                 path.slots[0]--;
309
310         while (1) {
311                 l = &path.nodes[0]->leaf;
312                 slot = path.slots[0];
313                 if (slot >= btrfs_header_nritems(&l->header)) {
314                         ret = btrfs_next_leaf(root, &path);
315                         if (ret == 0)
316                                 continue;
317                         if (ret < 0)
318                                 goto error;
319                         if (!start_found) {
320                                 ins->objectid = search_start;
321                                 ins->offset = (u64)-1;
322                                 start_found = 1;
323                                 goto check_pending;
324                         }
325                         ins->objectid = last_block > search_start ?
326                                         last_block : search_start;
327                         ins->offset = (u64)-1;
328                         goto check_pending;
329                 }
330                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
331                 if (key.objectid >= search_start) {
332                         if (start_found) {
333                                 if (last_block < search_start)
334                                         last_block = search_start;
335                                 hole_size = key.objectid - last_block;
336                                 if (hole_size > total_needed) {
337                                         ins->objectid = last_block;
338                                         ins->offset = hole_size;
339                                         goto check_pending;
340                                 }
341                         }
342                 }
343                 start_found = 1;
344                 last_block = key.objectid + key.offset;
345                 path.slots[0]++;
346         }
347         // FIXME -ENOSPC
348 check_pending:
349         /* we have to make sure we didn't find an extent that has already
350          * been allocated by the map tree or the original allocation
351          */
352         btrfs_release_path(root, &path);
353         BUG_ON(ins->objectid < search_start);
354         for (test_block = ins->objectid;
355              test_block < ins->objectid + total_needed; test_block++) {
356                 if (radix_tree_lookup(&root->pinned_radix, test_block)) {
357                         search_start = test_block + 1;
358                         goto check_failed;
359                 }
360         }
361         BUG_ON(root->current_insert.offset);
362         root->current_insert.offset = total_needed - num_blocks;
363         root->current_insert.objectid = ins->objectid + num_blocks;
364         root->current_insert.flags = 0;
365         root->last_insert.objectid = ins->objectid;
366         ins->offset = num_blocks;
367         return 0;
368 error:
369         btrfs_release_path(root, &path);
370         return ret;
371 }
372
373 /*
374  * finds a free extent and does all the dirty work required for allocation
375  * returns the key for the extent through ins, and a tree buffer for
376  * the first block of the extent through buf.
377  *
378  * returns 0 if everything worked, non-zero otherwise.
379  */
380 static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root
381                         *root, u64 num_blocks, u64 search_start, u64
382                         search_end, u64 owner, struct btrfs_key *ins)
383 {
384         int ret;
385         int pending_ret;
386         struct btrfs_root *extent_root = root->extent_root;
387         struct btrfs_extent_item extent_item;
388
389         btrfs_set_extent_refs(&extent_item, 1);
390         btrfs_set_extent_owner(&extent_item, owner);
391
392         if (root == extent_root) {
393                 BUG_ON(extent_root->current_insert.offset == 0);
394                 BUG_ON(num_blocks != 1);
395                 BUG_ON(extent_root->current_insert.flags ==
396                        extent_root->current_insert.offset);
397                 ins->offset = 1;
398                 ins->objectid = extent_root->current_insert.objectid +
399                                 extent_root->current_insert.flags++;
400                 return 0;
401         }
402         ret = find_free_extent(trans, root, num_blocks, search_start,
403                                search_end, ins);
404         if (ret)
405                 return ret;
406
407         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
408                                 sizeof(extent_item));
409
410         finish_current_insert(trans, extent_root);
411         pending_ret = run_pending(trans, extent_root);
412         if (ret)
413                 return ret;
414         if (pending_ret)
415                 return pending_ret;
416         return 0;
417 }
418
419 /*
420  * helper function to allocate a block for a given tree
421  * returns the tree buffer or NULL.
422  */
423 struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
424                                             struct btrfs_root *root)
425 {
426         struct btrfs_key ins;
427         int ret;
428         struct btrfs_buffer *buf;
429
430         ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1,
431                            btrfs_header_parentid(&root->node->node.header),
432                            &ins);
433         if (ret) {
434                 BUG();
435                 return NULL;
436         }
437         buf = find_tree_block(root, ins.objectid);
438         dirty_tree_block(trans, root, buf);
439         return buf;
440 }
441
442 /*
443  * helper function for drop_snapshot, this walks down the tree dropping ref
444  * counts as it goes.
445  */
446 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
447                           *root, struct btrfs_path *path, int *level)
448 {
449         struct btrfs_buffer *next;
450         struct btrfs_buffer *cur;
451         u64 blocknr;
452         int ret;
453         u32 refs;
454
455         ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr,
456                                &refs);
457         BUG_ON(ret);
458         if (refs > 1)
459                 goto out;
460         /*
461          * walk down to the last node level and free all the leaves
462          */
463         while(*level > 0) {
464                 cur = path->nodes[*level];
465                 if (path->slots[*level] >=
466                     btrfs_header_nritems(&cur->node.header))
467                         break;
468                 blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]);
469                 ret = lookup_block_ref(trans, root, blocknr, &refs);
470                 if (refs != 1 || *level == 1) {
471                         path->slots[*level]++;
472                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
473                         BUG_ON(ret);
474                         continue;
475                 }
476                 BUG_ON(ret);
477                 next = read_tree_block(root, blocknr);
478                 if (path->nodes[*level-1])
479                         btrfs_block_release(root, path->nodes[*level-1]);
480                 path->nodes[*level-1] = next;
481                 *level = btrfs_header_level(&next->node.header);
482                 path->slots[*level] = 0;
483         }
484 out:
485         ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1,
486                                 1);
487         btrfs_block_release(root, path->nodes[*level]);
488         path->nodes[*level] = NULL;
489         *level += 1;
490         BUG_ON(ret);
491         return 0;
492 }
493
494 /*
495  * helper for dropping snapshots.  This walks back up the tree in the path
496  * to find the first node higher up where we haven't yet gone through
497  * all the slots
498  */
499 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
500                         *root, struct btrfs_path *path, int *level)
501 {
502         int i;
503         int slot;
504         int ret;
505         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
506                 slot = path->slots[i];
507                 if (slot <
508                     btrfs_header_nritems(&path->nodes[i]->node.header)- 1) {
509                         path->slots[i]++;
510                         *level = i;
511                         return 0;
512                 } else {
513                         ret = btrfs_free_extent(trans, root,
514                                                 path->nodes[*level]->blocknr,
515                                                 1, 1);
516                         btrfs_block_release(root, path->nodes[*level]);
517                         path->nodes[*level] = NULL;
518                         *level = i + 1;
519                         BUG_ON(ret);
520                 }
521         }
522         return 1;
523 }
524
525 /*
526  * drop the reference count on the tree rooted at 'snap'.  This traverses
527  * the tree freeing any blocks that have a ref count of zero after being
528  * decremented.
529  */
530 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
531                         *root, struct btrfs_buffer *snap)
532 {
533         int ret = 0;
534         int wret;
535         int level;
536         struct btrfs_path path;
537         int i;
538         int orig_level;
539
540         btrfs_init_path(&path);
541
542         level = btrfs_header_level(&snap->node.header);
543         orig_level = level;
544         path.nodes[level] = snap;
545         path.slots[level] = 0;
546         while(1) {
547                 wret = walk_down_tree(trans, root, &path, &level);
548                 if (wret > 0)
549                         break;
550                 if (wret < 0)
551                         ret = wret;
552
553                 wret = walk_up_tree(trans, root, &path, &level);
554                 if (wret > 0)
555                         break;
556                 if (wret < 0)
557                         ret = wret;
558         }
559         for (i = 0; i <= orig_level; i++) {
560                 if (path.nodes[i]) {
561                         btrfs_block_release(root, path.nodes[i]);
562                 }
563         }
564         return ret;
565 }