Fixup reference counting on cows
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7 #include "print-tree.h"
8
9 /*
10  * pending extents are blocks that we're trying to allocate in the extent
11  * map while trying to grow the map because of other allocations.  To avoid
12  * recursing, they are tagged in the radix tree and cleaned up after
13  * other allocations are done.  The pending tag is also used in the same
14  * manner for deletes.
15  */
16 #define CTREE_EXTENT_PENDING_ADD 0
17 #define CTREE_EXTENT_PENDING_DEL 1
18
19 static int inc_block_ref(struct ctree_root *root, u64 blocknr)
20 {
21         struct ctree_path path;
22         int ret;
23         struct key key;
24         struct leaf *l;
25         struct extent_item *item;
26         init_path(&path);
27         key.objectid = blocknr;
28         key.flags = 0;
29         key.offset = 1;
30         ret = search_slot(root->extent_root, &key, &path, 0, 1);
31         if (ret != 0)
32                 BUG();
33         BUG_ON(ret != 0);
34         l = &path.nodes[0]->leaf;
35         item = (struct extent_item *)(l->data +
36                                       l->items[path.slots[0]].offset);
37         item->refs++;
38
39         BUG_ON(list_empty(&path.nodes[0]->dirty));
40         release_path(root->extent_root, &path);
41         return 0;
42 }
43
44 static int lookup_block_ref(struct ctree_root *root, u64 blocknr, int *refs)
45 {
46         struct ctree_path path;
47         int ret;
48         struct key key;
49         struct leaf *l;
50         struct extent_item *item;
51         init_path(&path);
52         key.objectid = blocknr;
53         key.flags = 0;
54         key.offset = 1;
55         ret = search_slot(root->extent_root, &key, &path, 0, 0);
56         if (ret != 0)
57                 BUG();
58         l = &path.nodes[0]->leaf;
59         item = (struct extent_item *)(l->data +
60                                       l->items[path.slots[0]].offset);
61         *refs = item->refs;
62         release_path(root->extent_root, &path);
63         return 0;
64 }
65
66 int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf)
67 {
68         u64 blocknr;
69         int i;
70
71         if (root == root->extent_root)
72                 return 0;
73         if (is_leaf(buf->node.header.flags))
74                 return 0;
75
76         for (i = 0; i < buf->node.header.nritems; i++) {
77                 blocknr = buf->node.blockptrs[i];
78                 inc_block_ref(root, blocknr);
79         }
80         return 0;
81 }
82
83 int btrfs_finish_extent_commit(struct ctree_root *root)
84 {
85         struct ctree_root *extent_root = root->extent_root;
86         unsigned long gang[8];
87         int ret;
88         int i;
89
90         while(1) {
91                 ret = radix_tree_gang_lookup(&extent_root->pinned_radix,
92                                                  (void **)gang, 0,
93                                                  ARRAY_SIZE(gang));
94                 if (!ret)
95                         break;
96                 for (i = 0; i < ret; i++)
97                         radix_tree_delete(&extent_root->pinned_radix, gang[i]);
98         }
99         return 0;
100 }
101
102 /*
103  * remove an extent from the root, returns 0 on success
104  */
105 int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks)
106 {
107         struct ctree_path path;
108         struct key key;
109         struct ctree_root *extent_root = root->extent_root;
110         int ret;
111         struct item *item;
112         struct extent_item *ei;
113         key.objectid = blocknr;
114         key.flags = 0;
115         key.offset = num_blocks;
116
117         init_path(&path);
118         ret = search_slot(extent_root, &key, &path, -1, 1);
119         if (ret) {
120                 printf("failed to find %Lu\n", key.objectid);
121                 print_tree(extent_root, extent_root->node);
122                 printf("failed to find %Lu\n", key.objectid);
123                 BUG();
124         }
125         item = path.nodes[0]->leaf.items + path.slots[0];
126         ei = (struct extent_item *)(path.nodes[0]->leaf.data + item->offset);
127         BUG_ON(ei->refs == 0);
128         ei->refs--;
129         if (ei->refs == 0) {
130                 if (root == extent_root) {
131                         int err;
132                         radix_tree_preload(GFP_KERNEL);
133                         err = radix_tree_insert(&extent_root->pinned_radix,
134                                           blocknr, (void *)blocknr);
135                         BUG_ON(err);
136                         radix_tree_preload_end();
137                 }
138                 ret = del_item(extent_root, &path);
139                 if (ret)
140                         BUG();
141         }
142         release_path(extent_root, &path);
143         return ret;
144 }
145
146 /*
147  * insert all of the pending extents reserved during the original
148  * allocation.  (CTREE_EXTENT_PENDING).  Returns zero if it all worked out
149  */
150 static int insert_pending_extents(struct ctree_root *extent_root)
151 {
152         int ret;
153         struct key key;
154         struct extent_item item;
155         struct tree_buffer *gang[4];
156         int i;
157
158         // FIXME -ENOSPC
159         item.owner = extent_root->node->node.header.parentid;
160         item.refs = 1;
161         while(1) {
162                 ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix,
163                                                  (void **)gang, 0,
164                                                  ARRAY_SIZE(gang),
165                                                  CTREE_EXTENT_PENDING_ADD);
166                 if (!ret)
167                         break;
168                 for (i = 0; i < ret; i++) {
169                         key.objectid = gang[i]->blocknr;
170                         key.flags = 0;
171                         key.offset = 1;
172                         ret = insert_item(extent_root, &key, &item,
173                                           sizeof(item));
174                         if (ret) {
175                                 printf("%Lu already in tree\n", key.objectid);
176                                 print_tree(extent_root, extent_root->node);
177                                 BUG();
178                                 // FIXME undo it and return sane
179                                 return ret;
180                         }
181                         radix_tree_tag_clear(&extent_root->cache_radix,
182                                              gang[i]->blocknr,
183                                              CTREE_EXTENT_PENDING_ADD);
184                         tree_block_release(extent_root, gang[i]);
185                 }
186         }
187         return 0;
188 }
189
190 /*
191  * find all the blocks marked as pending in the radix tree and remove
192  * them from the extent map
193  */
194 static int del_pending_extents(struct ctree_root *extent_root)
195 {
196         int ret;
197         struct tree_buffer *gang[4];
198         int i;
199
200         while(1) {
201                 ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix,
202                                                  (void **)gang, 0,
203                                                  ARRAY_SIZE(gang),
204                                                  CTREE_EXTENT_PENDING_DEL);
205                 if (!ret)
206                         break;
207                 for (i = 0; i < ret; i++) {
208                         ret = __free_extent(extent_root, gang[i]->blocknr, 1);
209                         radix_tree_tag_clear(&extent_root->cache_radix,
210                                                 gang[i]->blocknr,
211                                                 CTREE_EXTENT_PENDING_DEL);
212                         tree_block_release(extent_root, gang[i]);
213                 }
214         }
215         return 0;
216 }
217
218 static int run_pending(struct ctree_root *extent_root)
219 {
220         while(radix_tree_tagged(&extent_root->cache_radix,
221                                 CTREE_EXTENT_PENDING_DEL) ||
222               radix_tree_tagged(&extent_root->cache_radix,
223                                 CTREE_EXTENT_PENDING_ADD)) {
224                 insert_pending_extents(extent_root);
225                 del_pending_extents(extent_root);
226         }
227         return 0;
228 }
229
230
231 /*
232  * remove an extent from the root, returns 0 on success
233  */
234 int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks)
235 {
236         struct key key;
237         struct ctree_root *extent_root = root->extent_root;
238         struct tree_buffer *t;
239         int pending_ret;
240         int ret;
241
242         if (root == extent_root) {
243                 t = find_tree_block(root, blocknr);
244                 if (radix_tree_tag_get(&root->cache_radix, blocknr,
245                                       CTREE_EXTENT_PENDING_ADD)) {
246                         radix_tree_tag_clear(&root->cache_radix,
247                                              blocknr,
248                                              CTREE_EXTENT_PENDING_ADD);
249                         /* once for us */
250                         tree_block_release(root, t);
251                         /* once for the pending add */
252                         tree_block_release(root, t);
253                 } else {
254                         radix_tree_tag_set(&root->cache_radix, blocknr,
255                                    CTREE_EXTENT_PENDING_DEL);
256                 }
257                 return 0;
258         }
259         key.objectid = blocknr;
260         key.flags = 0;
261         key.offset = num_blocks;
262         ret = __free_extent(root, blocknr, num_blocks);
263         pending_ret = run_pending(root->extent_root);
264         return ret ? ret : pending_ret;
265 }
266
267 /*
268  * walks the btree of allocated extents and find a hole of a given size.
269  * The key ins is changed to record the hole:
270  * ins->objectid == block start
271  * ins->flags = 0
272  * ins->offset == number of blocks
273  * Any available blocks before search_start are skipped.
274  */
275 static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks,
276                             u64 search_start, u64 search_end, struct key *ins)
277 {
278         struct ctree_path path;
279         struct key *key;
280         int ret;
281         u64 hole_size = 0;
282         int slot = 0;
283         u64 last_block;
284         int start_found;
285         struct leaf *l;
286         struct ctree_root * root = orig_root->extent_root;
287
288 check_failed:
289         init_path(&path);
290         ins->objectid = search_start;
291         ins->offset = 0;
292         ins->flags = 0;
293         start_found = 0;
294         ret = search_slot(root, ins, &path, 0, 0);
295         if (ret < 0)
296                 goto error;
297
298         while (1) {
299                 l = &path.nodes[0]->leaf;
300                 slot = path.slots[0];
301                 if (slot >= l->header.nritems) {
302                         ret = next_leaf(root, &path);
303                         if (ret == 0)
304                                 continue;
305                         if (ret < 0)
306                                 goto error;
307                         if (!start_found) {
308                                 ins->objectid = search_start;
309                                 ins->offset = num_blocks;
310                                 start_found = 1;
311                                 goto check_pending;
312                         }
313                         ins->objectid = last_block > search_start ?
314                                         last_block : search_start;
315                         ins->offset = num_blocks;
316                         goto check_pending;
317                 }
318                 key = &l->items[slot].key;
319                 if (key->objectid >= search_start) {
320                         if (start_found) {
321                                 hole_size = key->objectid - last_block;
322                                 if (hole_size > num_blocks) {
323                                         ins->objectid = last_block;
324                                         ins->offset = num_blocks;
325                                         goto check_pending;
326                                 }
327                         } else
328                                 start_found = 1;
329                         last_block = key->objectid + key->offset;
330                 }
331                 path.slots[0]++;
332         }
333         // FIXME -ENOSPC
334 check_pending:
335         /* we have to make sure we didn't find an extent that has already
336          * been allocated by the map tree or the original allocation
337          */
338         release_path(root, &path);
339         BUG_ON(ins->objectid < search_start);
340         if (1 || orig_root->extent_root == orig_root) {
341                 BUG_ON(num_blocks != 1);
342                 if ((root->current_insert.objectid <= ins->objectid &&
343                     root->current_insert.objectid +
344                     root->current_insert.offset > ins->objectid) ||
345                    (root->current_insert.objectid > ins->objectid &&
346                     root->current_insert.objectid <= ins->objectid +
347                     ins->offset) ||
348                    radix_tree_lookup(&root->pinned_radix, ins->objectid) ||
349                    radix_tree_tag_get(&root->cache_radix, ins->objectid,
350                                       CTREE_EXTENT_PENDING_ADD)) {
351                         search_start = ins->objectid + 1;
352                         goto check_failed;
353                 }
354         }
355         if (ins->offset != 1)
356                 BUG();
357         return 0;
358 error:
359         release_path(root, &path);
360         return ret;
361 }
362
363 /*
364  * finds a free extent and does all the dirty work required for allocation
365  * returns the key for the extent through ins, and a tree buffer for
366  * the first block of the extent through buf.
367  *
368  * returns 0 if everything worked, non-zero otherwise.
369  */
370 int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start,
371                          u64 search_end, u64 owner, struct key *ins,
372                          struct tree_buffer **buf)
373 {
374         int ret;
375         int pending_ret;
376         struct extent_item extent_item;
377         extent_item.refs = 1;
378         extent_item.owner = owner;
379
380         ret = find_free_extent(root, num_blocks, search_start, search_end, ins);
381         if (ret)
382                 return ret;
383         if (root != root->extent_root) {
384                 memcpy(&root->extent_root->current_insert, ins, sizeof(*ins));
385                 ret = insert_item(root->extent_root, ins, &extent_item,
386                                   sizeof(extent_item));
387                 memset(&root->extent_root->current_insert, 0,
388                        sizeof(struct key));
389                 pending_ret = run_pending(root->extent_root);
390                 if (ret)
391                         return ret;
392                 if (pending_ret)
393                         return pending_ret;
394                 *buf = find_tree_block(root, ins->objectid);
395                 dirty_tree_block(root, *buf);
396                 return 0;
397         }
398         /* we're allocating an extent for the extent tree, don't recurse */
399         BUG_ON(ins->offset != 1);
400         *buf = find_tree_block(root, ins->objectid);
401         BUG_ON(!*buf);
402         radix_tree_tag_set(&root->cache_radix, ins->objectid,
403                            CTREE_EXTENT_PENDING_ADD);
404         (*buf)->count++;
405         dirty_tree_block(root, *buf);
406         return 0;
407
408 }
409
410 /*
411  * helper function to allocate a block for a given tree
412  * returns the tree buffer or NULL.
413  */
414 struct tree_buffer *alloc_free_block(struct ctree_root *root)
415 {
416         struct key ins;
417         int ret;
418         struct tree_buffer *buf = NULL;
419
420         ret = alloc_extent(root, 1, 0, (unsigned long)-1,
421                            root->node->node.header.parentid,
422                            &ins, &buf);
423         if (ret) {
424                 BUG();
425                 return NULL;
426         }
427         if (root != root->extent_root)
428                 BUG_ON(radix_tree_tag_get(&root->extent_root->cache_radix,
429                                           buf->blocknr,
430                                           CTREE_EXTENT_PENDING_ADD));
431         return buf;
432 }
433
434 int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap)
435 {
436         int ret;
437         int level;
438         int refs;
439         u64 blocknr = snap->blocknr;
440
441         level = node_level(snap->node.header.flags);
442         ret = lookup_block_ref(root, snap->blocknr, &refs);
443         BUG_ON(ret);
444         if (refs == 1 && level != 0) {
445                 struct node *n = &snap->node;
446                 struct tree_buffer *b;
447                 int i;
448                 for (i = 0; i < n->header.nritems; i++) {
449                         b = read_tree_block(root, n->blockptrs[i]);
450                         /* FIXME, don't recurse here */
451                         ret = btrfs_drop_snapshot(root, b);
452                         BUG_ON(ret);
453                         tree_block_release(root, b);
454                 }
455         }
456         ret = free_extent(root, blocknr, 1);
457         BUG_ON(ret);
458         return 0;
459 }
460