7f156d9b7089f505deb688a1ffeb4ff77fe22108
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7 #include "print-tree.h"
8
9 static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks,
10                             u64 search_start, u64 search_end,
11                             struct btrfs_key *ins);
12 static int finish_current_insert(struct btrfs_root *extent_root);
13 static int run_pending(struct btrfs_root *extent_root);
14
15 /*
16  * pending extents are blocks that we're trying to allocate in the extent
17  * map while trying to grow the map because of other allocations.  To avoid
18  * recursing, they are tagged in the radix tree and cleaned up after
19  * other allocations are done.  The pending tag is also used in the same
20  * manner for deletes.
21  */
22 #define CTREE_EXTENT_PENDING_DEL 0
23
24 static int inc_block_ref(struct btrfs_root *root, u64 blocknr)
25 {
26         struct btrfs_path path;
27         int ret;
28         struct btrfs_key key;
29         struct btrfs_leaf *l;
30         struct btrfs_extent_item *item;
31         struct btrfs_key ins;
32         u32 refs;
33
34         find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins);
35         btrfs_init_path(&path);
36         key.objectid = blocknr;
37         key.flags = 0;
38         key.offset = 1;
39         ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 1);
40         if (ret != 0)
41                 BUG();
42         BUG_ON(ret != 0);
43         l = &path.nodes[0]->leaf;
44         item = (struct btrfs_extent_item *)(l->data +
45                                             btrfs_item_offset(l->items +
46                                                               path.slots[0]));
47         refs = btrfs_extent_refs(item);
48         btrfs_set_extent_refs(item, refs + 1);
49
50         BUG_ON(list_empty(&path.nodes[0]->dirty));
51         btrfs_release_path(root->extent_root, &path);
52         finish_current_insert(root->extent_root);
53         run_pending(root->extent_root);
54         return 0;
55 }
56
57 static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs)
58 {
59         struct btrfs_path path;
60         int ret;
61         struct btrfs_key key;
62         struct btrfs_leaf *l;
63         struct btrfs_extent_item *item;
64         btrfs_init_path(&path);
65         key.objectid = blocknr;
66         key.flags = 0;
67         key.offset = 1;
68         ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 0);
69         if (ret != 0)
70                 BUG();
71         l = &path.nodes[0]->leaf;
72         item = (struct btrfs_extent_item *)(l->data +
73                                       btrfs_item_offset(l->items +
74                                                         path.slots[0]));
75         *refs = btrfs_extent_refs(item);
76         btrfs_release_path(root->extent_root, &path);
77         return 0;
78 }
79
80 int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf)
81 {
82         u64 blocknr;
83         int i;
84
85         if (!root->ref_cows)
86                 return 0;
87         if (btrfs_is_leaf(&buf->node))
88                 return 0;
89
90         for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) {
91                 blocknr = btrfs_node_blockptr(&buf->node, i);
92                 inc_block_ref(root, blocknr);
93         }
94         return 0;
95 }
96
97 int btrfs_finish_extent_commit(struct btrfs_root *root)
98 {
99         unsigned long gang[8];
100         int ret;
101         int i;
102
103         while(1) {
104                 ret = radix_tree_gang_lookup(&root->pinned_radix,
105                                                  (void **)gang, 0,
106                                                  ARRAY_SIZE(gang));
107                 if (!ret)
108                         break;
109                 for (i = 0; i < ret; i++) {
110                         radix_tree_delete(&root->pinned_radix, gang[i]);
111                 }
112         }
113         root->last_insert.objectid = 0;
114         root->last_insert.offset = 0;
115         return 0;
116 }
117
118 static int finish_current_insert(struct btrfs_root *extent_root)
119 {
120         struct btrfs_key ins;
121         struct btrfs_extent_item extent_item;
122         int i;
123         int ret;
124
125         btrfs_set_extent_refs(&extent_item, 1);
126         btrfs_set_extent_owner(&extent_item,
127                 btrfs_header_parentid(&extent_root->node->node.header));
128         ins.offset = 1;
129         ins.flags = 0;
130
131         for (i = 0; i < extent_root->current_insert.flags; i++) {
132                 ins.objectid = extent_root->current_insert.objectid + i;
133                 ret = btrfs_insert_item(extent_root, &ins, &extent_item,
134                                   sizeof(extent_item));
135                 BUG_ON(ret);
136         }
137         extent_root->current_insert.offset = 0;
138         return 0;
139 }
140
141 /*
142  * remove an extent from the root, returns 0 on success
143  */
144 static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks)
145 {
146         struct btrfs_path path;
147         struct btrfs_key key;
148         struct btrfs_root *extent_root = root->extent_root;
149         int ret;
150         struct btrfs_item *item;
151         struct btrfs_extent_item *ei;
152         struct btrfs_key ins;
153         u32 refs;
154
155         key.objectid = blocknr;
156         key.flags = 0;
157         key.offset = num_blocks;
158
159         find_free_extent(root, 0, 0, (u64)-1, &ins);
160         btrfs_init_path(&path);
161         ret = btrfs_search_slot(extent_root, &key, &path, -1, 1);
162         if (ret) {
163                 printf("failed to find %Lu\n", key.objectid);
164                 btrfs_print_tree(extent_root, extent_root->node);
165                 printf("failed to find %Lu\n", key.objectid);
166                 BUG();
167         }
168         item = path.nodes[0]->leaf.items + path.slots[0];
169         ei = (struct btrfs_extent_item *)(path.nodes[0]->leaf.data +
170                                     btrfs_item_offset(item));
171         BUG_ON(ei->refs == 0);
172         refs = btrfs_extent_refs(ei) - 1;
173         btrfs_set_extent_refs(ei, refs);
174         if (refs == 0) {
175                 if (!root->ref_cows) {
176                         int err;
177                         radix_tree_preload(GFP_KERNEL);
178                         err = radix_tree_insert(&extent_root->pinned_radix,
179                                           blocknr, (void *)blocknr);
180                         BUG_ON(err);
181                         radix_tree_preload_end();
182                 }
183                 ret = btrfs_del_item(extent_root, &path);
184                 if (root != extent_root &&
185                     extent_root->last_insert.objectid > blocknr)
186                         extent_root->last_insert.objectid = blocknr;
187                 if (ret)
188                         BUG();
189         }
190         btrfs_release_path(extent_root, &path);
191         finish_current_insert(extent_root);
192         return ret;
193 }
194
195 /*
196  * find all the blocks marked as pending in the radix tree and remove
197  * them from the extent map
198  */
199 static int del_pending_extents(struct btrfs_root *extent_root)
200 {
201         int ret;
202         struct btrfs_buffer *gang[4];
203         int i;
204
205         while(1) {
206                 ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix,
207                                                  (void **)gang, 0,
208                                                  ARRAY_SIZE(gang),
209                                                  CTREE_EXTENT_PENDING_DEL);
210                 if (!ret)
211                         break;
212                 for (i = 0; i < ret; i++) {
213                         ret = __free_extent(extent_root, gang[i]->blocknr, 1);
214                         radix_tree_tag_clear(&extent_root->cache_radix,
215                                                 gang[i]->blocknr,
216                                                 CTREE_EXTENT_PENDING_DEL);
217                         btrfs_block_release(extent_root, gang[i]);
218                 }
219         }
220         return 0;
221 }
222
223 static int run_pending(struct btrfs_root *extent_root)
224 {
225         while(radix_tree_tagged(&extent_root->cache_radix,
226                                 CTREE_EXTENT_PENDING_DEL))
227                 del_pending_extents(extent_root);
228         return 0;
229 }
230
231
232 /*
233  * remove an extent from the root, returns 0 on success
234  */
235 int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks)
236 {
237         struct btrfs_key key;
238         struct btrfs_root *extent_root = root->extent_root;
239         struct btrfs_buffer *t;
240         int pending_ret;
241         int ret;
242
243         if (root == extent_root) {
244                 t = find_tree_block(root, blocknr);
245                 radix_tree_tag_set(&root->cache_radix, blocknr,
246                                    CTREE_EXTENT_PENDING_DEL);
247                 return 0;
248         }
249         key.objectid = blocknr;
250         key.flags = 0;
251         key.offset = num_blocks;
252         ret = __free_extent(root, blocknr, num_blocks);
253         pending_ret = run_pending(root->extent_root);
254         return ret ? ret : pending_ret;
255 }
256
257 /*
258  * walks the btree of allocated extents and find a hole of a given size.
259  * The key ins is changed to record the hole:
260  * ins->objectid == block start
261  * ins->flags = 0
262  * ins->offset == number of blocks
263  * Any available blocks before search_start are skipped.
264  */
265 static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks,
266                             u64 search_start, u64 search_end,
267                             struct btrfs_key *ins)
268 {
269         struct btrfs_path path;
270         struct btrfs_key key;
271         int ret;
272         u64 hole_size = 0;
273         int slot = 0;
274         u64 last_block;
275         u64 test_block;
276         int start_found;
277         struct btrfs_leaf *l;
278         struct btrfs_root * root = orig_root->extent_root;
279         int total_needed = num_blocks;
280
281         total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3;
282         if (root->last_insert.objectid > search_start)
283                 search_start = root->last_insert.objectid;
284 check_failed:
285         btrfs_init_path(&path);
286         ins->objectid = search_start;
287         ins->offset = 0;
288         ins->flags = 0;
289         start_found = 0;
290         ret = btrfs_search_slot(root, ins, &path, 0, 0);
291         if (ret < 0)
292                 goto error;
293
294         if (path.slots[0] > 0)
295                 path.slots[0]--;
296
297         while (1) {
298                 l = &path.nodes[0]->leaf;
299                 slot = path.slots[0];
300                 if (slot >= btrfs_header_nritems(&l->header)) {
301                         ret = btrfs_next_leaf(root, &path);
302                         if (ret == 0)
303                                 continue;
304                         if (ret < 0)
305                                 goto error;
306                         if (!start_found) {
307                                 ins->objectid = search_start;
308                                 ins->offset = (u64)-1;
309                                 start_found = 1;
310                                 goto check_pending;
311                         }
312                         ins->objectid = last_block > search_start ?
313                                         last_block : search_start;
314                         ins->offset = (u64)-1;
315                         goto check_pending;
316                 }
317                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
318                 if (key.objectid >= search_start) {
319                         if (start_found) {
320                                 if (last_block < search_start)
321                                         last_block = search_start;
322                                 hole_size = key.objectid - last_block;
323                                 if (hole_size > total_needed) {
324                                         ins->objectid = last_block;
325                                         ins->offset = hole_size;
326                                         goto check_pending;
327                                 }
328                         }
329                 }
330                 start_found = 1;
331                 last_block = key.objectid + key.offset;
332                 path.slots[0]++;
333         }
334         // FIXME -ENOSPC
335 check_pending:
336         /* we have to make sure we didn't find an extent that has already
337          * been allocated by the map tree or the original allocation
338          */
339         btrfs_release_path(root, &path);
340         BUG_ON(ins->objectid < search_start);
341         for (test_block = ins->objectid;
342              test_block < ins->objectid + total_needed; test_block++) {
343                 if (radix_tree_lookup(&root->pinned_radix, test_block)) {
344                         search_start = test_block + 1;
345                         goto check_failed;
346                 }
347         }
348         BUG_ON(root->current_insert.offset);
349         root->current_insert.offset = total_needed - num_blocks;
350         root->current_insert.objectid = ins->objectid + num_blocks;
351         root->current_insert.flags = 0;
352         root->last_insert.objectid = ins->objectid;
353         ins->offset = num_blocks;
354         return 0;
355 error:
356         btrfs_release_path(root, &path);
357         return ret;
358 }
359
360 /*
361  * finds a free extent and does all the dirty work required for allocation
362  * returns the key for the extent through ins, and a tree buffer for
363  * the first block of the extent through buf.
364  *
365  * returns 0 if everything worked, non-zero otherwise.
366  */
367 static int alloc_extent(struct btrfs_root *root, u64 num_blocks,
368                         u64 search_start, u64 search_end, u64 owner,
369                         struct btrfs_key *ins)
370 {
371         int ret;
372         int pending_ret;
373         struct btrfs_root *extent_root = root->extent_root;
374         struct btrfs_extent_item extent_item;
375
376         btrfs_set_extent_refs(&extent_item, 1);
377         btrfs_set_extent_owner(&extent_item, owner);
378
379         if (root == extent_root) {
380                 BUG_ON(extent_root->current_insert.offset == 0);
381                 BUG_ON(num_blocks != 1);
382                 BUG_ON(extent_root->current_insert.flags ==
383                        extent_root->current_insert.offset);
384                 ins->offset = 1;
385                 ins->objectid = extent_root->current_insert.objectid +
386                                 extent_root->current_insert.flags++;
387                 return 0;
388         }
389         ret = find_free_extent(root, num_blocks, search_start,
390                                search_end, ins);
391         if (ret)
392                 return ret;
393
394         ret = btrfs_insert_item(extent_root, ins, &extent_item,
395                           sizeof(extent_item));
396
397         finish_current_insert(extent_root);
398         pending_ret = run_pending(extent_root);
399         if (ret)
400                 return ret;
401         if (pending_ret)
402                 return pending_ret;
403         return 0;
404 }
405
406 /*
407  * helper function to allocate a block for a given tree
408  * returns the tree buffer or NULL.
409  */
410 struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root)
411 {
412         struct btrfs_key ins;
413         int ret;
414         struct btrfs_buffer *buf;
415
416         ret = alloc_extent(root, 1, 0, (unsigned long)-1,
417                            btrfs_header_parentid(&root->node->node.header),
418                            &ins);
419         if (ret) {
420                 BUG();
421                 return NULL;
422         }
423         buf = find_tree_block(root, ins.objectid);
424         dirty_tree_block(root, buf);
425         return buf;
426 }
427
428 /*
429  * helper function for drop_snapshot, this walks down the tree dropping ref
430  * counts as it goes.
431  */
432 static int walk_down_tree(struct btrfs_root *root,
433                           struct btrfs_path *path, int *level)
434 {
435         struct btrfs_buffer *next;
436         struct btrfs_buffer *cur;
437         u64 blocknr;
438         int ret;
439         u32 refs;
440
441         ret = lookup_block_ref(root, path->nodes[*level]->blocknr, &refs);
442         BUG_ON(ret);
443         if (refs > 1)
444                 goto out;
445         /*
446          * walk down to the last node level and free all the leaves
447          */
448         while(*level > 0) {
449                 cur = path->nodes[*level];
450                 if (path->slots[*level] >=
451                     btrfs_header_nritems(&cur->node.header))
452                         break;
453                 blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]);
454                 ret = lookup_block_ref(root, blocknr, &refs);
455                 if (refs != 1 || *level == 1) {
456                         path->slots[*level]++;
457                         ret = btrfs_free_extent(root, blocknr, 1);
458                         BUG_ON(ret);
459                         continue;
460                 }
461                 BUG_ON(ret);
462                 next = read_tree_block(root, blocknr);
463                 if (path->nodes[*level-1])
464                         btrfs_block_release(root, path->nodes[*level-1]);
465                 path->nodes[*level-1] = next;
466                 *level = btrfs_header_level(&next->node.header);
467                 path->slots[*level] = 0;
468         }
469 out:
470         ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1);
471         btrfs_block_release(root, path->nodes[*level]);
472         path->nodes[*level] = NULL;
473         *level += 1;
474         BUG_ON(ret);
475         return 0;
476 }
477
478 /*
479  * helper for dropping snapshots.  This walks back up the tree in the path
480  * to find the first node higher up where we haven't yet gone through
481  * all the slots
482  */
483 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
484                         int *level)
485 {
486         int i;
487         int slot;
488         int ret;
489         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
490                 slot = path->slots[i];
491                 if (slot <
492                     btrfs_header_nritems(&path->nodes[i]->node.header)- 1) {
493                         path->slots[i]++;
494                         *level = i;
495                         return 0;
496                 } else {
497                         ret = btrfs_free_extent(root,
498                                           path->nodes[*level]->blocknr, 1);
499                         btrfs_block_release(root, path->nodes[*level]);
500                         path->nodes[*level] = NULL;
501                         *level = i + 1;
502                         BUG_ON(ret);
503                 }
504         }
505         return 1;
506 }
507
508 /*
509  * drop the reference count on the tree rooted at 'snap'.  This traverses
510  * the tree freeing any blocks that have a ref count of zero after being
511  * decremented.
512  */
513 int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap)
514 {
515         int ret = 0;
516         int wret;
517         int level;
518         struct btrfs_path path;
519         int i;
520         int orig_level;
521
522         btrfs_init_path(&path);
523
524         level = btrfs_header_level(&snap->node.header);
525         orig_level = level;
526         path.nodes[level] = snap;
527         path.slots[level] = 0;
528         while(1) {
529                 wret = walk_down_tree(root, &path, &level);
530                 if (wret > 0)
531                         break;
532                 if (wret < 0)
533                         ret = wret;
534
535                 wret = walk_up_tree(root, &path, &level);
536                 if (wret > 0)
537                         break;
538                 if (wret < 0)
539                         ret = wret;
540         }
541         for (i = 0; i <= orig_level; i++) {
542                 if (path.nodes[i]) {
543                         btrfs_block_release(root, path.nodes[i]);
544                 }
545         }
546         return ret;
547 }