767867f98fb76d326ba6d0c83365d562d66f3f13
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "crc32c.h"
28 #include "volumes.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40
41 static int cache_block_group(struct btrfs_root *root,
42                              struct btrfs_block_group_cache *block_group)
43 {
44         struct btrfs_path *path;
45         int ret;
46         struct btrfs_key key;
47         struct extent_buffer *leaf;
48         struct extent_io_tree *free_space_cache;
49         int slot;
50         u64 last = 0;
51         u64 hole_size;
52         u64 first_free;
53         int found = 0;
54
55         if (!block_group)
56                 return 0;
57
58         root = root->fs_info->extent_root;
59         free_space_cache = &root->fs_info->free_space_cache;
60
61         if (block_group->cached)
62                 return 0;
63
64         path = btrfs_alloc_path();
65         if (!path)
66                 return -ENOMEM;
67
68         path->reada = 2;
69         first_free = block_group->key.objectid;
70         key.objectid = block_group->key.objectid;
71         key.offset = 0;
72         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
73         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
74         if (ret < 0)
75                 return ret;
76         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
77         if (ret < 0)
78                 return ret;
79         if (ret == 0) {
80                 leaf = path->nodes[0];
81                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
82                 if (key.objectid + key.offset > first_free)
83                         first_free = key.objectid + key.offset;
84         }
85         while(1) {
86                 leaf = path->nodes[0];
87                 slot = path->slots[0];
88                 if (slot >= btrfs_header_nritems(leaf)) {
89                         ret = btrfs_next_leaf(root, path);
90                         if (ret < 0)
91                                 goto err;
92                         if (ret == 0) {
93                                 continue;
94                         } else {
95                                 break;
96                         }
97                 }
98                 btrfs_item_key_to_cpu(leaf, &key, slot);
99                 if (key.objectid < block_group->key.objectid) {
100                         goto next;
101                 }
102                 if (key.objectid >= block_group->key.objectid +
103                     block_group->key.offset) {
104                         break;
105                 }
106
107                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
108                         if (!found) {
109                                 last = first_free;
110                                 found = 1;
111                         }
112                         if (key.objectid > last) {
113                                 hole_size = key.objectid - last;
114                                 set_extent_dirty(free_space_cache, last,
115                                                  last + hole_size - 1,
116                                                  GFP_NOFS);
117                         }
118                         last = key.objectid + key.offset;
119                 }
120 next:
121                 path->slots[0]++;
122         }
123
124         if (!found)
125                 last = first_free;
126         if (block_group->key.objectid +
127             block_group->key.offset > last) {
128                 hole_size = block_group->key.objectid +
129                         block_group->key.offset - last;
130                 set_extent_dirty(free_space_cache, last,
131                                  last + hole_size - 1, GFP_NOFS);
132         }
133         block_group->cached = 1;
134 err:
135         btrfs_free_path(path);
136         return 0;
137 }
138
139 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
140                                                          btrfs_fs_info *info,
141                                                          u64 bytenr)
142 {
143         struct extent_io_tree *block_group_cache;
144         struct btrfs_block_group_cache *block_group = NULL;
145         u64 ptr;
146         u64 start;
147         u64 end;
148         int ret;
149
150         block_group_cache = &info->block_group_cache;
151         ret = find_first_extent_bit(block_group_cache,
152                                     bytenr, &start, &end,
153                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
154                                     BLOCK_GROUP_SYSTEM);
155         if (ret) {
156                 return NULL;
157         }
158         ret = get_state_private(block_group_cache, start, &ptr);
159         if (ret)
160                 return NULL;
161
162         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
163         if (block_group->key.objectid <= bytenr && bytenr <
164             block_group->key.objectid + block_group->key.offset)
165                 return block_group;
166         return NULL;
167 }
168
169 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
170 {
171         return (cache->flags & bits) == bits;
172 }
173
174 static int noinline find_search_start(struct btrfs_root *root,
175                               struct btrfs_block_group_cache **cache_ret,
176                               u64 *start_ret, int num, int data)
177 {
178         int ret;
179         struct btrfs_block_group_cache *cache = *cache_ret;
180         u64 last;
181         u64 start = 0;
182         u64 end = 0;
183         u64 cache_miss = 0;
184         u64 search_start = *start_ret;
185         int wrapped = 0;
186
187         if (!cache) {
188                 goto out;
189         }
190 again:
191         ret = cache_block_group(root, cache);
192         if (ret)
193                 goto out;
194
195         last = max(search_start, cache->key.objectid);
196         if (!block_group_bits(cache, data)) {
197                 goto new_group;
198         }
199
200         while(1) {
201                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
202                                             last, &start, &end, EXTENT_DIRTY);
203                 if (ret) {
204                         if (!cache_miss)
205                                 cache_miss = last;
206                         goto new_group;
207                 }
208
209                 start = max(last, start);
210                 last = end + 1;
211                 if (last - start < num) {
212                         if (last == cache->key.objectid + cache->key.offset)
213                                 cache_miss = start;
214                         continue;
215                 }
216                 if (start + num > cache->key.objectid + cache->key.offset)
217                         goto new_group;
218                 *start_ret = start;
219                 return 0;
220         }
221 out:
222         cache = btrfs_lookup_block_group(root->fs_info, search_start);
223         if (!cache) {
224                 printk("Unable to find block group for %llu\n",
225                         (unsigned long long)search_start);
226                 WARN_ON(1);
227         }
228         return -ENOSPC;
229
230 new_group:
231         last = cache->key.objectid + cache->key.offset;
232 wrapped:
233         cache = btrfs_lookup_block_group(root->fs_info, last);
234         if (!cache) {
235 no_cache:
236                 if (!wrapped) {
237                         wrapped = 1;
238                         last = search_start;
239                         goto wrapped;
240                 }
241                 goto out;
242         }
243         if (cache_miss && !cache->cached) {
244                 cache_block_group(root, cache);
245                 last = cache_miss;
246                 cache = btrfs_lookup_block_group(root->fs_info, last);
247         }
248         cache = btrfs_find_block_group(root, cache, last, data, 0);
249         if (!cache)
250                 goto no_cache;
251         *cache_ret = cache;
252         cache_miss = 0;
253         goto again;
254 }
255
256 static u64 div_factor(u64 num, int factor)
257 {
258         if (factor == 10)
259                 return num;
260         num *= factor;
261         num /= 10;
262         return num;
263 }
264
265 static int block_group_state_bits(u64 flags)
266 {
267         int bits = 0;
268         if (flags & BTRFS_BLOCK_GROUP_DATA)
269                 bits |= BLOCK_GROUP_DATA;
270         if (flags & BTRFS_BLOCK_GROUP_METADATA)
271                 bits |= BLOCK_GROUP_METADATA;
272         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
273                 bits |= BLOCK_GROUP_SYSTEM;
274         return bits;
275 }
276
277 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
278                                                  struct btrfs_block_group_cache
279                                                  *hint, u64 search_start,
280                                                  int data, int owner)
281 {
282         struct btrfs_block_group_cache *cache;
283         struct extent_io_tree *block_group_cache;
284         struct btrfs_block_group_cache *found_group = NULL;
285         struct btrfs_fs_info *info = root->fs_info;
286         u64 used;
287         u64 last = 0;
288         u64 hint_last;
289         u64 start;
290         u64 end;
291         u64 free_check;
292         u64 ptr;
293         int bit;
294         int ret;
295         int full_search = 0;
296         int factor = 10;
297
298         block_group_cache = &info->block_group_cache;
299
300         if (!owner)
301                 factor = 10;
302
303         bit = block_group_state_bits(data);
304
305         if (search_start) {
306                 struct btrfs_block_group_cache *shint;
307                 shint = btrfs_lookup_block_group(info, search_start);
308                 if (shint && block_group_bits(shint, data)) {
309                         used = btrfs_block_group_used(&shint->item);
310                         if (used + shint->pinned <
311                             div_factor(shint->key.offset, factor)) {
312                                 return shint;
313                         }
314                 }
315         }
316         if (hint && block_group_bits(hint, data)) {
317                 used = btrfs_block_group_used(&hint->item);
318                 if (used + hint->pinned <
319                     div_factor(hint->key.offset, factor)) {
320                         return hint;
321                 }
322                 last = hint->key.objectid + hint->key.offset;
323                 hint_last = last;
324         } else {
325                 if (hint)
326                         hint_last = max(hint->key.objectid, search_start);
327                 else
328                         hint_last = search_start;
329
330                 last = hint_last;
331         }
332 again:
333         while(1) {
334                 ret = find_first_extent_bit(block_group_cache, last,
335                                             &start, &end, bit);
336                 if (ret)
337                         break;
338
339                 ret = get_state_private(block_group_cache, start, &ptr);
340                 if (ret)
341                         break;
342
343                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
344                 last = cache->key.objectid + cache->key.offset;
345                 used = btrfs_block_group_used(&cache->item);
346
347                 if (block_group_bits(cache, data)) {
348                         if (full_search)
349                                 free_check = cache->key.offset;
350                         else
351                                 free_check = div_factor(cache->key.offset,
352                                                         factor);
353
354                         if (used + cache->pinned < free_check) {
355                                 found_group = cache;
356                                 goto found;
357                         }
358                 }
359                 cond_resched();
360         }
361         if (!full_search) {
362                 last = search_start;
363                 full_search = 1;
364                 goto again;
365         }
366 found:
367         return found_group;
368 }
369
370 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
371                            u64 owner, u64 owner_offset)
372 {
373         u32 high_crc = ~(u32)0;
374         u32 low_crc = ~(u32)0;
375         __le64 lenum;
376
377         lenum = cpu_to_le64(root_objectid);
378         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
379         lenum = cpu_to_le64(ref_generation);
380         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
381         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
382                 lenum = cpu_to_le64(owner);
383                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
384                 lenum = cpu_to_le64(owner_offset);
385                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
386         }
387         return ((u64)high_crc << 32) | (u64)low_crc;
388 }
389
390 static int match_extent_ref(struct extent_buffer *leaf,
391                             struct btrfs_extent_ref *disk_ref,
392                             struct btrfs_extent_ref *cpu_ref)
393 {
394         int ret;
395         int len;
396
397         if (cpu_ref->objectid)
398                 len = sizeof(*cpu_ref);
399         else
400                 len = 2 * sizeof(u64);
401         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
402                                    len);
403         return ret == 0;
404 }
405
406 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
407                                           struct btrfs_root *root,
408                                           struct btrfs_path *path, u64 bytenr,
409                                           u64 root_objectid,
410                                           u64 ref_generation, u64 owner,
411                                           u64 owner_offset, int del)
412 {
413         u64 hash;
414         struct btrfs_key key;
415         struct btrfs_key found_key;
416         struct btrfs_extent_ref ref;
417         struct extent_buffer *leaf;
418         struct btrfs_extent_ref *disk_ref;
419         int ret;
420         int ret2;
421
422         btrfs_set_stack_ref_root(&ref, root_objectid);
423         btrfs_set_stack_ref_generation(&ref, ref_generation);
424         btrfs_set_stack_ref_objectid(&ref, owner);
425         btrfs_set_stack_ref_offset(&ref, owner_offset);
426
427         hash = hash_extent_ref(root_objectid, ref_generation, owner,
428                                owner_offset);
429         key.offset = hash;
430         key.objectid = bytenr;
431         key.type = BTRFS_EXTENT_REF_KEY;
432
433         while (1) {
434                 ret = btrfs_search_slot(trans, root, &key, path,
435                                         del ? -1 : 0, del);
436                 if (ret < 0)
437                         goto out;
438                 leaf = path->nodes[0];
439                 if (ret != 0) {
440                         u32 nritems = btrfs_header_nritems(leaf);
441                         if (path->slots[0] >= nritems) {
442                                 ret2 = btrfs_next_leaf(root, path);
443                                 if (ret2)
444                                         goto out;
445                                 leaf = path->nodes[0];
446                         }
447                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
448                         if (found_key.objectid != bytenr ||
449                             found_key.type != BTRFS_EXTENT_REF_KEY)
450                                 goto out;
451                         key.offset = found_key.offset;
452                         if (del) {
453                                 btrfs_release_path(root, path);
454                                 continue;
455                         }
456                 }
457                 disk_ref = btrfs_item_ptr(path->nodes[0],
458                                           path->slots[0],
459                                           struct btrfs_extent_ref);
460                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
461                         ret = 0;
462                         goto out;
463                 }
464                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
465                 key.offset = found_key.offset + 1;
466                 btrfs_release_path(root, path);
467         }
468 out:
469         return ret;
470 }
471
472 /*
473  * Back reference rules.  Back refs have three main goals:
474  *
475  * 1) differentiate between all holders of references to an extent so that
476  *    when a reference is dropped we can make sure it was a valid reference
477  *    before freeing the extent.
478  *
479  * 2) Provide enough information to quickly find the holders of an extent
480  *    if we notice a given block is corrupted or bad.
481  *
482  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
483  *    maintenance.  This is actually the same as #2, but with a slightly
484  *    different use case.
485  *
486  * File extents can be referenced by:
487  *
488  * - multiple snapshots, subvolumes, or different generations in one subvol
489  * - different files inside a single subvolume (in theory, not implemented yet)
490  * - different offsets inside a file (bookend extents in file.c)
491  *
492  * The extent ref structure has fields for:
493  *
494  * - Objectid of the subvolume root
495  * - Generation number of the tree holding the reference
496  * - objectid of the file holding the reference
497  * - offset in the file corresponding to the key holding the reference
498  *
499  * When a file extent is allocated the fields are filled in:
500  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
501  *
502  * When a leaf is cow'd new references are added for every file extent found
503  * in the leaf.  It looks the same as the create case, but trans->transid
504  * will be different when the block is cow'd.
505  *
506  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
507  *
508  * When a file extent is removed either during snapshot deletion or file
509  * truncation, the corresponding back reference is found
510  * by searching for:
511  *
512  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
513  *      inode objectid, offset in file)
514  *
515  * Btree extents can be referenced by:
516  *
517  * - Different subvolumes
518  * - Different generations of the same subvolume
519  *
520  * Storing sufficient information for a full reverse mapping of a btree
521  * block would require storing the lowest key of the block in the backref,
522  * and it would require updating that lowest key either before write out or
523  * every time it changed.  Instead, the objectid of the lowest key is stored
524  * along with the level of the tree block.  This provides a hint
525  * about where in the btree the block can be found.  Searches through the
526  * btree only need to look for a pointer to that block, so they stop one
527  * level higher than the level recorded in the backref.
528  *
529  * Some btrees do not do reference counting on their extents.  These
530  * include the extent tree and the tree of tree roots.  Backrefs for these
531  * trees always have a generation of zero.
532  *
533  * When a tree block is created, back references are inserted:
534  *
535  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
536  *
537  * When a tree block is cow'd in a reference counted root,
538  * new back references are added for all the blocks it points to.
539  * These are of the form (trans->transid will have increased since creation):
540  *
541  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
542  *
543  * Because the lowest_key_objectid and the level are just hints
544  * they are not used when backrefs are deleted.  When a backref is deleted:
545  *
546  * if backref was for a tree root:
547  *     root_objectid = root->root_key.objectid
548  * else
549  *     root_objectid = btrfs_header_owner(parent)
550  *
551  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
552  *
553  * Back Reference Key hashing:
554  *
555  * Back references have four fields, each 64 bits long.  Unfortunately,
556  * This is hashed into a single 64 bit number and placed into the key offset.
557  * The key objectid corresponds to the first byte in the extent, and the
558  * key type is set to BTRFS_EXTENT_REF_KEY
559  */
560 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
561                                  struct btrfs_root *root,
562                                  struct btrfs_path *path, u64 bytenr,
563                                  u64 root_objectid, u64 ref_generation,
564                                  u64 owner, u64 owner_offset)
565 {
566         u64 hash;
567         struct btrfs_key key;
568         struct btrfs_extent_ref ref;
569         struct btrfs_extent_ref *disk_ref;
570         int ret;
571
572         btrfs_set_stack_ref_root(&ref, root_objectid);
573         btrfs_set_stack_ref_generation(&ref, ref_generation);
574         btrfs_set_stack_ref_objectid(&ref, owner);
575         btrfs_set_stack_ref_offset(&ref, owner_offset);
576
577         hash = hash_extent_ref(root_objectid, ref_generation, owner,
578                                owner_offset);
579         key.offset = hash;
580         key.objectid = bytenr;
581         key.type = BTRFS_EXTENT_REF_KEY;
582
583         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
584         while (ret == -EEXIST) {
585                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
586                                           struct btrfs_extent_ref);
587                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
588                         goto out;
589                 key.offset++;
590                 btrfs_release_path(root, path);
591                 ret = btrfs_insert_empty_item(trans, root, path, &key,
592                                               sizeof(ref));
593         }
594         if (ret)
595                 goto out;
596         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
597                                   struct btrfs_extent_ref);
598         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
599                             sizeof(ref));
600         btrfs_mark_buffer_dirty(path->nodes[0]);
601 out:
602         btrfs_release_path(root, path);
603         return ret;
604 }
605
606 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
607                                 struct btrfs_root *root,
608                                 u64 bytenr, u64 num_bytes,
609                                 u64 root_objectid, u64 ref_generation,
610                                 u64 owner, u64 owner_offset)
611 {
612         struct btrfs_path *path;
613         int ret;
614         struct btrfs_key key;
615         struct extent_buffer *l;
616         struct btrfs_extent_item *item;
617         u32 refs;
618
619         WARN_ON(num_bytes < root->sectorsize);
620         path = btrfs_alloc_path();
621         if (!path)
622                 return -ENOMEM;
623
624         key.objectid = bytenr;
625         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
626         key.offset = num_bytes;
627         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
628                                 0, 1);
629         if (ret < 0)
630                 return ret;
631         if (ret != 0) {
632                 BUG();
633         }
634         BUG_ON(ret != 0);
635         l = path->nodes[0];
636         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
637         refs = btrfs_extent_refs(l, item);
638         btrfs_set_extent_refs(l, item, refs + 1);
639         btrfs_mark_buffer_dirty(path->nodes[0]);
640
641         btrfs_release_path(root->fs_info->extent_root, path);
642
643         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
644                                           path, bytenr, root_objectid,
645                                           ref_generation, owner, owner_offset);
646         BUG_ON(ret);
647         finish_current_insert(trans, root->fs_info->extent_root);
648         del_pending_extents(trans, root->fs_info->extent_root);
649
650         btrfs_free_path(path);
651         return 0;
652 }
653
654 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
655                          struct btrfs_root *root)
656 {
657         finish_current_insert(trans, root->fs_info->extent_root);
658         del_pending_extents(trans, root->fs_info->extent_root);
659         return 0;
660 }
661
662 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
663                              struct btrfs_root *root, u64 bytenr,
664                              u64 num_bytes, u32 *refs)
665 {
666         struct btrfs_path *path;
667         int ret;
668         struct btrfs_key key;
669         struct extent_buffer *l;
670         struct btrfs_extent_item *item;
671
672         WARN_ON(num_bytes < root->sectorsize);
673         path = btrfs_alloc_path();
674         key.objectid = bytenr;
675         key.offset = num_bytes;
676         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
677         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
678                                 0, 0);
679         if (ret < 0)
680                 goto out;
681         if (ret != 0) {
682                 btrfs_print_leaf(root, path->nodes[0]);
683                 printk("failed to find block number %llu\n",
684                         (unsigned long long)bytenr);
685                 BUG();
686         }
687         l = path->nodes[0];
688         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
689         *refs = btrfs_extent_refs(l, item);
690 out:
691         btrfs_free_path(path);
692         return 0;
693 }
694
695 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
696                                   struct btrfs_path *count_path,
697                                   u64 first_extent)
698 {
699         struct btrfs_root *extent_root = root->fs_info->extent_root;
700         struct btrfs_path *path;
701         u64 bytenr;
702         u64 found_objectid;
703         u64 root_objectid = root->root_key.objectid;
704         u32 total_count = 0;
705         u32 cur_count;
706         u32 refs;
707         u32 nritems;
708         int ret;
709         struct btrfs_key key;
710         struct btrfs_key found_key;
711         struct extent_buffer *l;
712         struct btrfs_extent_item *item;
713         struct btrfs_extent_ref *ref_item;
714         int level = -1;
715
716         path = btrfs_alloc_path();
717 again:
718         if (level == -1)
719                 bytenr = first_extent;
720         else
721                 bytenr = count_path->nodes[level]->start;
722
723         cur_count = 0;
724         key.objectid = bytenr;
725         key.offset = 0;
726
727         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
728         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
729         if (ret < 0)
730                 goto out;
731         BUG_ON(ret == 0);
732
733         l = path->nodes[0];
734         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
735
736         if (found_key.objectid != bytenr ||
737             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
738                 goto out;
739         }
740
741         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
742         refs = btrfs_extent_refs(l, item);
743         while (1) {
744                 nritems = btrfs_header_nritems(l);
745                 if (path->slots[0] >= nritems) {
746                         ret = btrfs_next_leaf(extent_root, path);
747                         if (ret == 0)
748                                 continue;
749                         break;
750                 }
751                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
752                 if (found_key.objectid != bytenr)
753                         break;
754                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
755                         path->slots[0]++;
756                         continue;
757                 }
758
759                 cur_count++;
760                 ref_item = btrfs_item_ptr(l, path->slots[0],
761                                           struct btrfs_extent_ref);
762                 found_objectid = btrfs_ref_root(l, ref_item);
763
764                 if (found_objectid != root_objectid) {
765                         total_count = 2;
766                         goto out;
767                 }
768                 total_count = 1;
769                 path->slots[0]++;
770         }
771         if (cur_count == 0) {
772                 total_count = 0;
773                 goto out;
774         }
775         if (level >= 0 && root->node == count_path->nodes[level])
776                 goto out;
777         level++;
778         btrfs_release_path(root, path);
779         goto again;
780
781 out:
782         btrfs_free_path(path);
783         return total_count;
784 }
785 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
786                        struct btrfs_root *root, u64 owner_objectid)
787 {
788         u64 generation;
789         u64 key_objectid;
790         u64 level;
791         u32 nritems;
792         struct btrfs_disk_key disk_key;
793
794         level = btrfs_header_level(root->node);
795         generation = trans->transid;
796         nritems = btrfs_header_nritems(root->node);
797         if (nritems > 0) {
798                 if (level == 0)
799                         btrfs_item_key(root->node, &disk_key, 0);
800                 else
801                         btrfs_node_key(root->node, &disk_key, 0);
802                 key_objectid = btrfs_disk_key_objectid(&disk_key);
803         } else {
804                 key_objectid = 0;
805         }
806         return btrfs_inc_extent_ref(trans, root, root->node->start,
807                                     root->node->len, owner_objectid,
808                                     generation, level, key_objectid);
809 }
810
811 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
812                   struct extent_buffer *buf)
813 {
814         u64 bytenr;
815         u32 nritems;
816         struct btrfs_key key;
817         struct btrfs_file_extent_item *fi;
818         int i;
819         int level;
820         int ret;
821         int faili;
822
823         if (!root->ref_cows)
824                 return 0;
825
826         level = btrfs_header_level(buf);
827         nritems = btrfs_header_nritems(buf);
828         for (i = 0; i < nritems; i++) {
829                 if (level == 0) {
830                         u64 disk_bytenr;
831                         btrfs_item_key_to_cpu(buf, &key, i);
832                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
833                                 continue;
834                         fi = btrfs_item_ptr(buf, i,
835                                             struct btrfs_file_extent_item);
836                         if (btrfs_file_extent_type(buf, fi) ==
837                             BTRFS_FILE_EXTENT_INLINE)
838                                 continue;
839                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
840                         if (disk_bytenr == 0)
841                                 continue;
842                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
843                                     btrfs_file_extent_disk_num_bytes(buf, fi),
844                                     root->root_key.objectid, trans->transid,
845                                     key.objectid, key.offset);
846                         if (ret) {
847                                 faili = i;
848                                 goto fail;
849                         }
850                 } else {
851                         bytenr = btrfs_node_blockptr(buf, i);
852                         btrfs_node_key_to_cpu(buf, &key, i);
853                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
854                                            btrfs_level_size(root, level - 1),
855                                            root->root_key.objectid,
856                                            trans->transid,
857                                            level - 1, key.objectid);
858                         if (ret) {
859                                 faili = i;
860                                 goto fail;
861                         }
862                 }
863         }
864         return 0;
865 fail:
866         WARN_ON(1);
867 #if 0
868         for (i =0; i < faili; i++) {
869                 if (level == 0) {
870                         u64 disk_bytenr;
871                         btrfs_item_key_to_cpu(buf, &key, i);
872                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
873                                 continue;
874                         fi = btrfs_item_ptr(buf, i,
875                                             struct btrfs_file_extent_item);
876                         if (btrfs_file_extent_type(buf, fi) ==
877                             BTRFS_FILE_EXTENT_INLINE)
878                                 continue;
879                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
880                         if (disk_bytenr == 0)
881                                 continue;
882                         err = btrfs_free_extent(trans, root, disk_bytenr,
883                                     btrfs_file_extent_disk_num_bytes(buf,
884                                                                       fi), 0);
885                         BUG_ON(err);
886                 } else {
887                         bytenr = btrfs_node_blockptr(buf, i);
888                         err = btrfs_free_extent(trans, root, bytenr,
889                                         btrfs_level_size(root, level - 1), 0);
890                         BUG_ON(err);
891                 }
892         }
893 #endif
894         return ret;
895 }
896
897 static int write_one_cache_group(struct btrfs_trans_handle *trans,
898                                  struct btrfs_root *root,
899                                  struct btrfs_path *path,
900                                  struct btrfs_block_group_cache *cache)
901 {
902         int ret;
903         int pending_ret;
904         struct btrfs_root *extent_root = root->fs_info->extent_root;
905         unsigned long bi;
906         struct extent_buffer *leaf;
907
908         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
909         if (ret < 0)
910                 goto fail;
911         BUG_ON(ret);
912
913         leaf = path->nodes[0];
914         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
915         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
916         btrfs_mark_buffer_dirty(leaf);
917         btrfs_release_path(extent_root, path);
918 fail:
919         finish_current_insert(trans, extent_root);
920         pending_ret = del_pending_extents(trans, extent_root);
921         if (ret)
922                 return ret;
923         if (pending_ret)
924                 return pending_ret;
925         return 0;
926
927 }
928
929 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
930                                    struct btrfs_root *root)
931 {
932         struct extent_io_tree *block_group_cache;
933         struct btrfs_block_group_cache *cache;
934         int ret;
935         int err = 0;
936         int werr = 0;
937         struct btrfs_path *path;
938         u64 last = 0;
939         u64 start;
940         u64 end;
941         u64 ptr;
942
943         block_group_cache = &root->fs_info->block_group_cache;
944         path = btrfs_alloc_path();
945         if (!path)
946                 return -ENOMEM;
947
948         while(1) {
949                 ret = find_first_extent_bit(block_group_cache, last,
950                                             &start, &end, BLOCK_GROUP_DIRTY);
951                 if (ret)
952                         break;
953
954                 last = end + 1;
955                 ret = get_state_private(block_group_cache, start, &ptr);
956                 if (ret)
957                         break;
958                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
959                 err = write_one_cache_group(trans, root,
960                                             path, cache);
961                 /*
962                  * if we fail to write the cache group, we want
963                  * to keep it marked dirty in hopes that a later
964                  * write will work
965                  */
966                 if (err) {
967                         werr = err;
968                         continue;
969                 }
970                 clear_extent_bits(block_group_cache, start, end,
971                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
972         }
973         btrfs_free_path(path);
974         return werr;
975 }
976
977 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
978                                                   u64 flags)
979 {
980         struct list_head *head = &info->space_info;
981         struct list_head *cur;
982         struct btrfs_space_info *found;
983         list_for_each(cur, head) {
984                 found = list_entry(cur, struct btrfs_space_info, list);
985                 if (found->flags == flags)
986                         return found;
987         }
988         return NULL;
989
990 }
991
992 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
993                              u64 total_bytes, u64 bytes_used,
994                              struct btrfs_space_info **space_info)
995 {
996         struct btrfs_space_info *found;
997
998         found = __find_space_info(info, flags);
999         if (found) {
1000                 found->total_bytes += total_bytes;
1001                 found->bytes_used += bytes_used;
1002                 WARN_ON(found->total_bytes < found->bytes_used);
1003                 *space_info = found;
1004                 return 0;
1005         }
1006         found = kmalloc(sizeof(*found), GFP_NOFS);
1007         if (!found)
1008                 return -ENOMEM;
1009
1010         list_add(&found->list, &info->space_info);
1011         found->flags = flags;
1012         found->total_bytes = total_bytes;
1013         found->bytes_used = bytes_used;
1014         found->bytes_pinned = 0;
1015         found->full = 0;
1016         *space_info = found;
1017         return 0;
1018 }
1019
1020
1021 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1022 {
1023         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1024                                    BTRFS_BLOCK_GROUP_RAID1 |
1025                                    BTRFS_BLOCK_GROUP_DUP);
1026         if (extra_flags) {
1027                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1028                         fs_info->avail_data_alloc_bits |= extra_flags;
1029                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1030                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1031                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1032                         fs_info->avail_system_alloc_bits |= extra_flags;
1033         }
1034 }
1035
1036 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1037                           struct btrfs_root *extent_root, u64 alloc_bytes,
1038                           u64 flags)
1039 {
1040         struct btrfs_space_info *space_info;
1041         u64 thresh;
1042         u64 start;
1043         u64 num_bytes;
1044         int ret;
1045
1046         space_info = __find_space_info(extent_root->fs_info, flags);
1047         if (!space_info) {
1048                 ret = update_space_info(extent_root->fs_info, flags,
1049                                         0, 0, &space_info);
1050                 BUG_ON(ret);
1051         }
1052         BUG_ON(!space_info);
1053
1054         if (space_info->full)
1055                 return 0;
1056
1057         thresh = div_factor(space_info->total_bytes, 7);
1058         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1059             thresh)
1060                 return 0;
1061
1062         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1063         if (ret == -ENOSPC) {
1064                 space_info->full = 1;
1065                 return 0;
1066         }
1067
1068         BUG_ON(ret);
1069
1070         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1071                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1072         BUG_ON(ret);
1073         return 0;
1074 }
1075
1076 static int update_block_group(struct btrfs_trans_handle *trans,
1077                               struct btrfs_root *root,
1078                               u64 bytenr, u64 num_bytes, int alloc,
1079                               int mark_free)
1080 {
1081         struct btrfs_block_group_cache *cache;
1082         struct btrfs_fs_info *info = root->fs_info;
1083         u64 total = num_bytes;
1084         u64 old_val;
1085         u64 byte_in_group;
1086         u64 start;
1087         u64 end;
1088
1089         while(total) {
1090                 cache = btrfs_lookup_block_group(info, bytenr);
1091                 if (!cache) {
1092                         return -1;
1093                 }
1094                 byte_in_group = bytenr - cache->key.objectid;
1095                 WARN_ON(byte_in_group > cache->key.offset);
1096                 start = cache->key.objectid;
1097                 end = start + cache->key.offset - 1;
1098                 set_extent_bits(&info->block_group_cache, start, end,
1099                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1100
1101                 old_val = btrfs_block_group_used(&cache->item);
1102                 num_bytes = min(total, cache->key.offset - byte_in_group);
1103                 if (alloc) {
1104                         old_val += num_bytes;
1105                         cache->space_info->bytes_used += num_bytes;
1106                 } else {
1107                         old_val -= num_bytes;
1108                         cache->space_info->bytes_used -= num_bytes;
1109                         if (mark_free) {
1110                                 set_extent_dirty(&info->free_space_cache,
1111                                                  bytenr, bytenr + num_bytes - 1,
1112                                                  GFP_NOFS);
1113                         }
1114                 }
1115                 btrfs_set_block_group_used(&cache->item, old_val);
1116                 total -= num_bytes;
1117                 bytenr += num_bytes;
1118         }
1119         return 0;
1120 }
1121
1122 static int update_pinned_extents(struct btrfs_root *root,
1123                                 u64 bytenr, u64 num, int pin)
1124 {
1125         u64 len;
1126         struct btrfs_block_group_cache *cache;
1127         struct btrfs_fs_info *fs_info = root->fs_info;
1128
1129         if (pin) {
1130                 set_extent_dirty(&fs_info->pinned_extents,
1131                                 bytenr, bytenr + num - 1, GFP_NOFS);
1132         } else {
1133                 clear_extent_dirty(&fs_info->pinned_extents,
1134                                 bytenr, bytenr + num - 1, GFP_NOFS);
1135         }
1136         while (num > 0) {
1137                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1138                 WARN_ON(!cache);
1139                 len = min(num, cache->key.offset -
1140                           (bytenr - cache->key.objectid));
1141                 if (pin) {
1142                         cache->pinned += len;
1143                         cache->space_info->bytes_pinned += len;
1144                         fs_info->total_pinned += len;
1145                 } else {
1146                         cache->pinned -= len;
1147                         cache->space_info->bytes_pinned -= len;
1148                         fs_info->total_pinned -= len;
1149                 }
1150                 bytenr += len;
1151                 num -= len;
1152         }
1153         return 0;
1154 }
1155
1156 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1157 {
1158         u64 last = 0;
1159         u64 start;
1160         u64 end;
1161         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1162         int ret;
1163
1164         while(1) {
1165                 ret = find_first_extent_bit(pinned_extents, last,
1166                                             &start, &end, EXTENT_DIRTY);
1167                 if (ret)
1168                         break;
1169                 set_extent_dirty(copy, start, end, GFP_NOFS);
1170                 last = end + 1;
1171         }
1172         return 0;
1173 }
1174
1175 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1176                                struct btrfs_root *root,
1177                                struct extent_io_tree *unpin)
1178 {
1179         u64 start;
1180         u64 end;
1181         int ret;
1182         struct extent_io_tree *free_space_cache;
1183         free_space_cache = &root->fs_info->free_space_cache;
1184
1185         while(1) {
1186                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1187                                             EXTENT_DIRTY);
1188                 if (ret)
1189                         break;
1190                 update_pinned_extents(root, start, end + 1 - start, 0);
1191                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1192                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1193         }
1194         return 0;
1195 }
1196
1197 static int finish_current_insert(struct btrfs_trans_handle *trans,
1198                                  struct btrfs_root *extent_root)
1199 {
1200         u64 start;
1201         u64 end;
1202         struct btrfs_fs_info *info = extent_root->fs_info;
1203         struct extent_buffer *eb;
1204         struct btrfs_path *path;
1205         struct btrfs_key ins;
1206         struct btrfs_disk_key first;
1207         struct btrfs_extent_item extent_item;
1208         int ret;
1209         int level;
1210         int err = 0;
1211
1212         btrfs_set_stack_extent_refs(&extent_item, 1);
1213         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1214         path = btrfs_alloc_path();
1215
1216         while(1) {
1217                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1218                                             &end, EXTENT_LOCKED);
1219                 if (ret)
1220                         break;
1221
1222                 ins.objectid = start;
1223                 ins.offset = end + 1 - start;
1224                 err = btrfs_insert_item(trans, extent_root, &ins,
1225                                         &extent_item, sizeof(extent_item));
1226                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1227                                   GFP_NOFS);
1228                 eb = read_tree_block(extent_root, ins.objectid, ins.offset,
1229                                      trans->transid);
1230                 level = btrfs_header_level(eb);
1231                 if (level == 0) {
1232                         btrfs_item_key(eb, &first, 0);
1233                 } else {
1234                         btrfs_node_key(eb, &first, 0);
1235                 }
1236                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1237                                           start, extent_root->root_key.objectid,
1238                                           0, level,
1239                                           btrfs_disk_key_objectid(&first));
1240                 BUG_ON(err);
1241                 free_extent_buffer(eb);
1242         }
1243         btrfs_free_path(path);
1244         return 0;
1245 }
1246
1247 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1248                           int pending)
1249 {
1250         int err = 0;
1251         struct extent_buffer *buf;
1252
1253         if (!pending) {
1254                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1255                 if (buf) {
1256                         if (btrfs_buffer_uptodate(buf, 0)) {
1257                                 u64 transid =
1258                                     root->fs_info->running_transaction->transid;
1259                                 if (btrfs_header_generation(buf) ==
1260                                     transid && !btrfs_header_flag(buf,
1261                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1262                                         free_extent_buffer(buf);
1263                                         return 1;
1264                                 }
1265                         }
1266                         free_extent_buffer(buf);
1267                 }
1268                 update_pinned_extents(root, bytenr, num_bytes, 1);
1269         } else {
1270                 set_extent_bits(&root->fs_info->pending_del,
1271                                 bytenr, bytenr + num_bytes - 1,
1272                                 EXTENT_LOCKED, GFP_NOFS);
1273         }
1274         BUG_ON(err < 0);
1275         return 0;
1276 }
1277
1278 /*
1279  * remove an extent from the root, returns 0 on success
1280  */
1281 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1282                          *root, u64 bytenr, u64 num_bytes,
1283                          u64 root_objectid, u64 ref_generation,
1284                          u64 owner_objectid, u64 owner_offset, int pin,
1285                          int mark_free)
1286 {
1287         struct btrfs_path *path;
1288         struct btrfs_key key;
1289         struct btrfs_fs_info *info = root->fs_info;
1290         struct btrfs_extent_ops *ops = info->extent_ops;
1291         struct btrfs_root *extent_root = info->extent_root;
1292         struct extent_buffer *leaf;
1293         int ret;
1294         int extent_slot = 0;
1295         int found_extent = 0;
1296         int num_to_del = 1;
1297         struct btrfs_extent_item *ei;
1298         u32 refs;
1299
1300         key.objectid = bytenr;
1301         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1302         key.offset = num_bytes;
1303
1304         path = btrfs_alloc_path();
1305         if (!path)
1306                 return -ENOMEM;
1307
1308         ret = lookup_extent_backref(trans, extent_root, path,
1309                                     bytenr, root_objectid,
1310                                     ref_generation,
1311                                     owner_objectid, owner_offset, 1);
1312         if (ret == 0) {
1313                 struct btrfs_key found_key;
1314                 extent_slot = path->slots[0];
1315                 while(extent_slot > 0) {
1316                         extent_slot--;
1317                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1318                                               extent_slot);
1319                         if (found_key.objectid != bytenr)
1320                                 break;
1321                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1322                             found_key.offset == num_bytes) {
1323                                 found_extent = 1;
1324                                 break;
1325                         }
1326                         if (path->slots[0] - extent_slot > 5)
1327                                 break;
1328                 }
1329                 if (!found_extent)
1330                         ret = btrfs_del_item(trans, extent_root, path);
1331         } else {
1332                 btrfs_print_leaf(extent_root, path->nodes[0]);
1333                 WARN_ON(1);
1334                 printk("Unable to find ref byte nr %llu root %llu "
1335                        " gen %llu owner %llu offset %llu\n",
1336                        (unsigned long long)bytenr,
1337                        (unsigned long long)root_objectid,
1338                        (unsigned long long)ref_generation,
1339                        (unsigned long long)owner_objectid,
1340                        (unsigned long long)owner_offset);
1341         }
1342         if (!found_extent) {
1343                 btrfs_release_path(extent_root, path);
1344                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1345                 if (ret < 0)
1346                         return ret;
1347                 BUG_ON(ret);
1348                 extent_slot = path->slots[0];
1349         }
1350
1351         leaf = path->nodes[0];
1352         ei = btrfs_item_ptr(leaf, extent_slot,
1353                             struct btrfs_extent_item);
1354         refs = btrfs_extent_refs(leaf, ei);
1355         BUG_ON(refs == 0);
1356         refs -= 1;
1357         btrfs_set_extent_refs(leaf, ei, refs);
1358
1359         btrfs_mark_buffer_dirty(leaf);
1360
1361         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1362                 /* if the back ref and the extent are next to each other
1363                  * they get deleted below in one shot
1364                  */
1365                 path->slots[0] = extent_slot;
1366                 num_to_del = 2;
1367         } else if (found_extent) {
1368                 /* otherwise delete the extent back ref */
1369                 ret = btrfs_del_item(trans, extent_root, path);
1370                 BUG_ON(ret);
1371                 /* if refs are 0, we need to setup the path for deletion */
1372                 if (refs == 0) {
1373                         btrfs_release_path(extent_root, path);
1374                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1375                                                 -1, 1);
1376                         if (ret < 0)
1377                                 return ret;
1378                         BUG_ON(ret);
1379                 }
1380         }
1381
1382         if (refs == 0) {
1383                 u64 super_used;
1384                 u64 root_used;
1385
1386                 if (pin) {
1387                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1388                         if (ret > 0)
1389                                 mark_free = 1;
1390                         BUG_ON(ret < 0);
1391                 }
1392
1393                 /* block accounting for super block */
1394                 super_used = btrfs_super_bytes_used(&info->super_copy);
1395                 btrfs_set_super_bytes_used(&info->super_copy,
1396                                            super_used - num_bytes);
1397
1398                 /* block accounting for root item */
1399                 root_used = btrfs_root_used(&root->root_item);
1400                 btrfs_set_root_used(&root->root_item,
1401                                            root_used - num_bytes);
1402                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1403                                       num_to_del);
1404                 if (ret)
1405                         return ret;
1406
1407                 if (ops && ops->free_extent)
1408                         ops->free_extent(root, bytenr, num_bytes);
1409
1410                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1411                                          mark_free);
1412                 BUG_ON(ret);
1413         }
1414         btrfs_free_path(path);
1415         finish_current_insert(trans, extent_root);
1416         return ret;
1417 }
1418
1419 /*
1420  * find all the blocks marked as pending in the radix tree and remove
1421  * them from the extent map
1422  */
1423 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1424                                btrfs_root *extent_root)
1425 {
1426         int ret;
1427         int err = 0;
1428         u64 start;
1429         u64 end;
1430         struct extent_io_tree *pending_del;
1431         struct extent_io_tree *pinned_extents;
1432
1433         pending_del = &extent_root->fs_info->pending_del;
1434         pinned_extents = &extent_root->fs_info->pinned_extents;
1435
1436         while(1) {
1437                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1438                                             EXTENT_LOCKED);
1439                 if (ret)
1440                         break;
1441                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1442                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1443                                   GFP_NOFS);
1444                 ret = __free_extent(trans, extent_root,
1445                                      start, end + 1 - start,
1446                                      extent_root->root_key.objectid,
1447                                      0, 0, 0, 0, 0);
1448                 if (ret)
1449                         err = ret;
1450         }
1451         return err;
1452 }
1453
1454 /*
1455  * remove an extent from the root, returns 0 on success
1456  */
1457 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1458                       *root, u64 bytenr, u64 num_bytes,
1459                       u64 root_objectid, u64 ref_generation,
1460                       u64 owner_objectid, u64 owner_offset, int pin)
1461 {
1462         struct btrfs_root *extent_root = root->fs_info->extent_root;
1463         int pending_ret;
1464         int ret;
1465
1466         WARN_ON(num_bytes < root->sectorsize);
1467         if (!root->ref_cows)
1468                 ref_generation = 0;
1469
1470         if (root == extent_root) {
1471                 pin_down_bytes(root, bytenr, num_bytes, 1);
1472                 return 0;
1473         }
1474         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1475                             ref_generation, owner_objectid, owner_offset,
1476                             pin, pin == 0);
1477         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1478         return ret ? ret : pending_ret;
1479 }
1480
1481 static u64 stripe_align(struct btrfs_root *root, u64 val)
1482 {
1483         u64 mask = ((u64)root->stripesize - 1);
1484         u64 ret = (val + mask) & ~mask;
1485         return ret;
1486 }
1487
1488 /*
1489  * walks the btree of allocated extents and find a hole of a given size.
1490  * The key ins is changed to record the hole:
1491  * ins->objectid == block start
1492  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1493  * ins->offset == number of blocks
1494  * Any available blocks before search_start are skipped.
1495  */
1496 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1497                                      struct btrfs_root *orig_root,
1498                                      u64 num_bytes, u64 empty_size,
1499                                      u64 search_start, u64 search_end,
1500                                      u64 hint_byte, struct btrfs_key *ins,
1501                                      u64 exclude_start, u64 exclude_nr,
1502                                      int data)
1503 {
1504         int ret;
1505         u64 orig_search_start = search_start;
1506         struct btrfs_root * root = orig_root->fs_info->extent_root;
1507         struct btrfs_fs_info *info = root->fs_info;
1508         u64 total_needed = num_bytes;
1509         struct btrfs_block_group_cache *block_group;
1510         int full_scan = 0;
1511         int wrapped = 0;
1512
1513         WARN_ON(num_bytes < root->sectorsize);
1514         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1515
1516         if (search_end == (u64)-1)
1517                 search_end = btrfs_super_total_bytes(&info->super_copy);
1518
1519         if (hint_byte) {
1520                 block_group = btrfs_lookup_block_group(info, hint_byte);
1521                 if (!block_group)
1522                         hint_byte = search_start;
1523                 block_group = btrfs_find_block_group(root, block_group,
1524                                                      hint_byte, data, 1);
1525         } else {
1526                 block_group = btrfs_find_block_group(root,
1527                                                      trans->block_group,
1528                                                      search_start, data, 1);
1529         }
1530
1531         total_needed += empty_size;
1532
1533 check_failed:
1534         if (!block_group) {
1535                 block_group = btrfs_lookup_block_group(info, search_start);
1536                 if (!block_group)
1537                         block_group = btrfs_lookup_block_group(info,
1538                                                        orig_search_start);
1539         }
1540         ret = find_search_start(root, &block_group, &search_start,
1541                                 total_needed, data);
1542         if (ret)
1543                 goto error;
1544
1545         search_start = stripe_align(root, search_start);
1546         ins->objectid = search_start;
1547         ins->offset = num_bytes;
1548
1549         if (ins->objectid + num_bytes >= search_end)
1550                 goto enospc;
1551
1552         if (ins->objectid + num_bytes >
1553             block_group->key.objectid + block_group->key.offset) {
1554                 search_start = block_group->key.objectid +
1555                         block_group->key.offset;
1556                 goto new_group;
1557         }
1558
1559         if (test_range_bit(&info->extent_ins, ins->objectid,
1560                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1561                 search_start = ins->objectid + num_bytes;
1562                 goto new_group;
1563         }
1564
1565         if (test_range_bit(&info->pinned_extents, ins->objectid,
1566                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1567                 search_start = ins->objectid + num_bytes;
1568                 goto new_group;
1569         }
1570
1571         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1572             ins->objectid < exclude_start + exclude_nr)) {
1573                 search_start = exclude_start + exclude_nr;
1574                 goto new_group;
1575         }
1576
1577         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1578                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1579                 if (block_group)
1580                         trans->block_group = block_group;
1581         }
1582         ins->offset = num_bytes;
1583         return 0;
1584
1585 new_group:
1586         if (search_start + num_bytes >= search_end) {
1587 enospc:
1588                 search_start = orig_search_start;
1589                 if (full_scan) {
1590                         ret = -ENOSPC;
1591                         goto error;
1592                 }
1593                 if (wrapped) {
1594                         if (!full_scan)
1595                                 total_needed -= empty_size;
1596                         full_scan = 1;
1597                 } else
1598                         wrapped = 1;
1599         }
1600         block_group = btrfs_lookup_block_group(info, search_start);
1601         cond_resched();
1602         block_group = btrfs_find_block_group(root, block_group,
1603                                              search_start, data, 0);
1604         goto check_failed;
1605
1606 error:
1607         return ret;
1608 }
1609 /*
1610  * finds a free extent and does all the dirty work required for allocation
1611  * returns the key for the extent through ins, and a tree buffer for
1612  * the first block of the extent through buf.
1613  *
1614  * returns 0 if everything worked, non-zero otherwise.
1615  */
1616 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1617                        struct btrfs_root *root,
1618                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1619                        u64 owner, u64 owner_offset,
1620                        u64 empty_size, u64 hint_byte,
1621                        u64 search_end, struct btrfs_key *ins, int data)
1622 {
1623         int ret;
1624         int pending_ret;
1625         u64 super_used, root_used;
1626         u64 search_start = 0;
1627         u64 alloc_profile;
1628         u32 sizes[2];
1629         struct btrfs_fs_info *info = root->fs_info;
1630         struct btrfs_root *extent_root = info->extent_root;
1631         struct btrfs_path *path;
1632         struct btrfs_extent_item *extent_item;
1633         struct btrfs_extent_ref *ref;
1634         struct btrfs_key keys[2];
1635
1636         if (info->extent_ops) {
1637                 struct btrfs_extent_ops *ops = info->extent_ops;
1638                 ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
1639                 BUG_ON(ret);
1640                 goto found;
1641         }
1642
1643         if (data) {
1644                 alloc_profile = info->avail_data_alloc_bits &
1645                                 info->data_alloc_profile;
1646                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1647         } else if ((info->system_allocs > 0 || root == info->chunk_root) &&
1648                    info->system_allocs >= 0) {
1649                 alloc_profile = info->avail_system_alloc_bits &
1650                                 info->system_alloc_profile;
1651                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1652         } else {
1653                 alloc_profile = info->avail_metadata_alloc_bits &
1654                                 info->metadata_alloc_profile;
1655                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1656         }
1657
1658         if (root->ref_cows) {
1659                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1660                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1661                                              num_bytes,
1662                                              BTRFS_BLOCK_GROUP_METADATA);
1663                         BUG_ON(ret);
1664                 }
1665                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1666                                      num_bytes + 2 * 1024 * 1024, data);
1667                 BUG_ON(ret);
1668         }
1669
1670         WARN_ON(num_bytes < root->sectorsize);
1671         ret = find_free_extent(trans, root, num_bytes, empty_size,
1672                                search_start, search_end, hint_byte, ins,
1673                                trans->alloc_exclude_start,
1674                                trans->alloc_exclude_nr, data);
1675         BUG_ON(ret);
1676 found:
1677         if (ret)
1678                 return ret;
1679
1680         /* block accounting for super block */
1681         super_used = btrfs_super_bytes_used(&info->super_copy);
1682         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1683
1684         /* block accounting for root item */
1685         root_used = btrfs_root_used(&root->root_item);
1686         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1687
1688         clear_extent_dirty(&root->fs_info->free_space_cache,
1689                            ins->objectid, ins->objectid + ins->offset - 1,
1690                            GFP_NOFS);
1691
1692         if (root == extent_root) {
1693                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1694                                 ins->objectid + ins->offset - 1,
1695                                 EXTENT_LOCKED, GFP_NOFS);
1696                 goto update_block;
1697         }
1698
1699         WARN_ON(trans->alloc_exclude_nr);
1700         trans->alloc_exclude_start = ins->objectid;
1701         trans->alloc_exclude_nr = ins->offset;
1702
1703         memcpy(&keys[0], ins, sizeof(*ins));
1704         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1705                                          owner, owner_offset);
1706         keys[1].objectid = ins->objectid;
1707         keys[1].type = BTRFS_EXTENT_REF_KEY;
1708         sizes[0] = sizeof(*extent_item);
1709         sizes[1] = sizeof(*ref);
1710
1711         path = btrfs_alloc_path();
1712         BUG_ON(!path);
1713
1714         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1715                                        sizes, 2);
1716
1717         BUG_ON(ret);
1718         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1719                                      struct btrfs_extent_item);
1720         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1721         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1722                              struct btrfs_extent_ref);
1723
1724         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1725         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1726         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1727         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1728
1729         btrfs_mark_buffer_dirty(path->nodes[0]);
1730
1731         trans->alloc_exclude_start = 0;
1732         trans->alloc_exclude_nr = 0;
1733         btrfs_free_path(path);
1734         finish_current_insert(trans, extent_root);
1735         pending_ret = del_pending_extents(trans, extent_root);
1736
1737         if (ret) {
1738                 return ret;
1739         }
1740         if (pending_ret) {
1741                 return pending_ret;
1742         }
1743
1744 update_block:
1745         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1746         if (ret) {
1747                 printk("update block group failed for %llu %llu\n",
1748                        (unsigned long long)ins->objectid,
1749                        (unsigned long long)ins->offset);
1750                 BUG();
1751         }
1752         return 0;
1753 }
1754
1755 /*
1756  * helper function to allocate a block for a given tree
1757  * returns the tree buffer or NULL.
1758  */
1759 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1760                                              struct btrfs_root *root,
1761                                              u32 blocksize,
1762                                              u64 root_objectid, u64 hint,
1763                                              u64 empty_size)
1764 {
1765         u64 ref_generation;
1766
1767         if (root->ref_cows)
1768                 ref_generation = trans->transid;
1769         else
1770                 ref_generation = 0;
1771
1772
1773         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1774                                         ref_generation, 0, 0, hint, empty_size);
1775 }
1776
1777 /*
1778  * helper function to allocate a block for a given tree
1779  * returns the tree buffer or NULL.
1780  */
1781 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1782                                              struct btrfs_root *root,
1783                                              u32 blocksize,
1784                                              u64 root_objectid,
1785                                              u64 ref_generation,
1786                                              u64 first_objectid,
1787                                              int level,
1788                                              u64 hint,
1789                                              u64 empty_size)
1790 {
1791         struct btrfs_key ins;
1792         int ret;
1793         struct extent_buffer *buf;
1794
1795         ret = btrfs_alloc_extent(trans, root, blocksize,
1796                                  root_objectid, ref_generation,
1797                                  level, first_objectid, empty_size, hint,
1798                                  (u64)-1, &ins, 0);
1799         if (ret) {
1800                 BUG_ON(ret > 0);
1801                 return ERR_PTR(ret);
1802         }
1803         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1804         if (!buf) {
1805                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1806                                   root->root_key.objectid, ref_generation,
1807                                   0, 0, 0);
1808                 BUG_ON(1);
1809                 return ERR_PTR(-ENOMEM);
1810         }
1811         btrfs_set_buffer_uptodate(buf);
1812         trans->blocks_used++;
1813         return buf;
1814 }
1815
1816 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1817                                   struct btrfs_root *root,
1818                                   struct extent_buffer *leaf)
1819 {
1820         u64 leaf_owner;
1821         u64 leaf_generation;
1822         struct btrfs_key key;
1823         struct btrfs_file_extent_item *fi;
1824         int i;
1825         int nritems;
1826         int ret;
1827
1828         BUG_ON(!btrfs_is_leaf(leaf));
1829         nritems = btrfs_header_nritems(leaf);
1830         leaf_owner = btrfs_header_owner(leaf);
1831         leaf_generation = btrfs_header_generation(leaf);
1832
1833         for (i = 0; i < nritems; i++) {
1834                 u64 disk_bytenr;
1835
1836                 btrfs_item_key_to_cpu(leaf, &key, i);
1837                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1838                         continue;
1839                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1840                 if (btrfs_file_extent_type(leaf, fi) ==
1841                     BTRFS_FILE_EXTENT_INLINE)
1842                         continue;
1843                 /*
1844                  * FIXME make sure to insert a trans record that
1845                  * repeats the snapshot del on crash
1846                  */
1847                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1848                 if (disk_bytenr == 0)
1849                         continue;
1850                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1851                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1852                                 leaf_owner, leaf_generation,
1853                                 key.objectid, key.offset, 0);
1854                 BUG_ON(ret);
1855         }
1856         return 0;
1857 }
1858
1859 static void noinline reada_walk_down(struct btrfs_root *root,
1860                                      struct extent_buffer *node,
1861                                      int slot)
1862 {
1863         u64 bytenr;
1864         u64 last = 0;
1865         u32 nritems;
1866         u32 refs;
1867         u32 blocksize;
1868         int ret;
1869         int i;
1870         int level;
1871         int skipped = 0;
1872
1873         nritems = btrfs_header_nritems(node);
1874         level = btrfs_header_level(node);
1875         if (level)
1876                 return;
1877
1878         for (i = slot; i < nritems && skipped < 32; i++) {
1879                 bytenr = btrfs_node_blockptr(node, i);
1880                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1881                              (last > bytenr && last - bytenr > 32 * 1024))) {
1882                         skipped++;
1883                         continue;
1884                 }
1885                 blocksize = btrfs_level_size(root, level - 1);
1886                 if (i != slot) {
1887                         ret = lookup_extent_ref(NULL, root, bytenr,
1888                                                 blocksize, &refs);
1889                         BUG_ON(ret);
1890                         if (refs != 1) {
1891                                 skipped++;
1892                                 continue;
1893                         }
1894                 }
1895                 mutex_unlock(&root->fs_info->fs_mutex);
1896                 ret = readahead_tree_block(root, bytenr, blocksize,
1897                                            btrfs_node_ptr_generation(node, i));
1898                 last = bytenr + blocksize;
1899                 cond_resched();
1900                 mutex_lock(&root->fs_info->fs_mutex);
1901                 if (ret)
1902                         break;
1903         }
1904 }
1905
1906 /*
1907  * helper function for drop_snapshot, this walks down the tree dropping ref
1908  * counts as it goes.
1909  */
1910 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1911                                    struct btrfs_root *root,
1912                                    struct btrfs_path *path, int *level)
1913 {
1914         u64 root_owner;
1915         u64 root_gen;
1916         u64 bytenr;
1917         u64 ptr_gen;
1918         struct extent_buffer *next;
1919         struct extent_buffer *cur;
1920         struct extent_buffer *parent;
1921         u32 blocksize;
1922         int ret;
1923         u32 refs;
1924
1925         WARN_ON(*level < 0);
1926         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1927         ret = lookup_extent_ref(trans, root,
1928                                 path->nodes[*level]->start,
1929                                 path->nodes[*level]->len, &refs);
1930         BUG_ON(ret);
1931         if (refs > 1)
1932                 goto out;
1933
1934         /*
1935          * walk down to the last node level and free all the leaves
1936          */
1937         while(*level >= 0) {
1938                 WARN_ON(*level < 0);
1939                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1940                 cur = path->nodes[*level];
1941
1942                 if (btrfs_header_level(cur) != *level)
1943                         WARN_ON(1);
1944
1945                 if (path->slots[*level] >=
1946                     btrfs_header_nritems(cur))
1947                         break;
1948                 if (*level == 0) {
1949                         ret = drop_leaf_ref(trans, root, cur);
1950                         BUG_ON(ret);
1951                         break;
1952                 }
1953                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1954                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1955                 blocksize = btrfs_level_size(root, *level - 1);
1956                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1957                 BUG_ON(ret);
1958                 if (refs != 1) {
1959                         parent = path->nodes[*level];
1960                         root_owner = btrfs_header_owner(parent);
1961                         root_gen = btrfs_header_generation(parent);
1962                         path->slots[*level]++;
1963                         ret = btrfs_free_extent(trans, root, bytenr,
1964                                                 blocksize, root_owner,
1965                                                 root_gen, 0, 0, 1);
1966                         BUG_ON(ret);
1967                         continue;
1968                 }
1969                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1970                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1971                         free_extent_buffer(next);
1972                         reada_walk_down(root, cur, path->slots[*level]);
1973                         mutex_unlock(&root->fs_info->fs_mutex);
1974                         next = read_tree_block(root, bytenr, blocksize,
1975                                                ptr_gen);
1976                         mutex_lock(&root->fs_info->fs_mutex);
1977
1978                         /* we dropped the lock, check one more time */
1979                         ret = lookup_extent_ref(trans, root, bytenr,
1980                                                 blocksize, &refs);
1981                         BUG_ON(ret);
1982                         if (refs != 1) {
1983                                 parent = path->nodes[*level];
1984                                 root_owner = btrfs_header_owner(parent);
1985                                 root_gen = btrfs_header_generation(parent);
1986
1987                                 path->slots[*level]++;
1988                                 free_extent_buffer(next);
1989                                 ret = btrfs_free_extent(trans, root, bytenr,
1990                                                         blocksize,
1991                                                         root_owner,
1992                                                         root_gen, 0, 0, 1);
1993                                 BUG_ON(ret);
1994                                 continue;
1995                         }
1996                 }
1997                 WARN_ON(*level <= 0);
1998                 if (path->nodes[*level-1])
1999                         free_extent_buffer(path->nodes[*level-1]);
2000                 path->nodes[*level-1] = next;
2001                 *level = btrfs_header_level(next);
2002                 path->slots[*level] = 0;
2003         }
2004 out:
2005         WARN_ON(*level < 0);
2006         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2007
2008         if (path->nodes[*level] == root->node) {
2009                 root_owner = root->root_key.objectid;
2010                 parent = path->nodes[*level];
2011         } else {
2012                 parent = path->nodes[*level + 1];
2013                 root_owner = btrfs_header_owner(parent);
2014         }
2015
2016         root_gen = btrfs_header_generation(parent);
2017         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2018                                 path->nodes[*level]->len,
2019                                 root_owner, root_gen, 0, 0, 1);
2020         free_extent_buffer(path->nodes[*level]);
2021         path->nodes[*level] = NULL;
2022         *level += 1;
2023         BUG_ON(ret);
2024         return 0;
2025 }
2026
2027 /*
2028  * helper for dropping snapshots.  This walks back up the tree in the path
2029  * to find the first node higher up where we haven't yet gone through
2030  * all the slots
2031  */
2032 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2033                                  struct btrfs_root *root,
2034                                  struct btrfs_path *path, int *level)
2035 {
2036         u64 root_owner;
2037         u64 root_gen;
2038         struct btrfs_root_item *root_item = &root->root_item;
2039         int i;
2040         int slot;
2041         int ret;
2042
2043         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2044                 slot = path->slots[i];
2045                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2046                         struct extent_buffer *node;
2047                         struct btrfs_disk_key disk_key;
2048                         node = path->nodes[i];
2049                         path->slots[i]++;
2050                         *level = i;
2051                         WARN_ON(*level == 0);
2052                         btrfs_node_key(node, &disk_key, path->slots[i]);
2053                         memcpy(&root_item->drop_progress,
2054                                &disk_key, sizeof(disk_key));
2055                         root_item->drop_level = i;
2056                         return 0;
2057                 } else {
2058                         if (path->nodes[*level] == root->node) {
2059                                 root_owner = root->root_key.objectid;
2060                                 root_gen =
2061                                    btrfs_header_generation(path->nodes[*level]);
2062                         } else {
2063                                 struct extent_buffer *node;
2064                                 node = path->nodes[*level + 1];
2065                                 root_owner = btrfs_header_owner(node);
2066                                 root_gen = btrfs_header_generation(node);
2067                         }
2068                         ret = btrfs_free_extent(trans, root,
2069                                                 path->nodes[*level]->start,
2070                                                 path->nodes[*level]->len,
2071                                                 root_owner, root_gen, 0, 0, 1);
2072                         BUG_ON(ret);
2073                         free_extent_buffer(path->nodes[*level]);
2074                         path->nodes[*level] = NULL;
2075                         *level = i + 1;
2076                 }
2077         }
2078         return 1;
2079 }
2080
2081 /*
2082  * drop the reference count on the tree rooted at 'snap'.  This traverses
2083  * the tree freeing any blocks that have a ref count of zero after being
2084  * decremented.
2085  */
2086 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2087                         *root)
2088 {
2089         int ret = 0;
2090         int wret;
2091         int level;
2092         struct btrfs_path *path;
2093         int i;
2094         int orig_level;
2095         struct btrfs_root_item *root_item = &root->root_item;
2096
2097         path = btrfs_alloc_path();
2098         BUG_ON(!path);
2099
2100         level = btrfs_header_level(root->node);
2101         orig_level = level;
2102         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2103                 path->nodes[level] = root->node;
2104                 extent_buffer_get(root->node);
2105                 path->slots[level] = 0;
2106         } else {
2107                 struct btrfs_key key;
2108                 struct btrfs_disk_key found_key;
2109                 struct extent_buffer *node;
2110
2111                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2112                 level = root_item->drop_level;
2113                 path->lowest_level = level;
2114                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2115                 if (wret < 0) {
2116                         ret = wret;
2117                         goto out;
2118                 }
2119                 node = path->nodes[level];
2120                 btrfs_node_key(node, &found_key, path->slots[level]);
2121                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2122                                sizeof(found_key)));
2123         }
2124         while(1) {
2125                 wret = walk_down_tree(trans, root, path, &level);
2126                 if (wret < 0)
2127                         ret = wret;
2128                 if (wret != 0)
2129                         break;
2130
2131                 wret = walk_up_tree(trans, root, path, &level);
2132                 if (wret < 0)
2133                         ret = wret;
2134                 if (wret != 0)
2135                         break;
2136                 /*
2137                 ret = -EAGAIN;
2138                 break;
2139                 */
2140         }
2141         for (i = 0; i <= orig_level; i++) {
2142                 if (path->nodes[i]) {
2143                         free_extent_buffer(path->nodes[i]);
2144                         path->nodes[i] = NULL;
2145                 }
2146         }
2147 out:
2148         btrfs_free_path(path);
2149         return ret;
2150 }
2151
2152 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2153 {
2154         u64 start;
2155         u64 end;
2156         u64 ptr;
2157         int ret;
2158         while(1) {
2159                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2160                                             &start, &end, (unsigned int)-1);
2161                 if (ret)
2162                         break;
2163                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2164                 if (!ret)
2165                         kfree((void *)(unsigned long)ptr);
2166                 clear_extent_bits(&info->block_group_cache, start,
2167                                   end, (unsigned int)-1, GFP_NOFS);
2168         }
2169         while(1) {
2170                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2171                                             &start, &end, EXTENT_DIRTY);
2172                 if (ret)
2173                         break;
2174                 clear_extent_dirty(&info->free_space_cache, start,
2175                                    end, GFP_NOFS);
2176         }
2177         return 0;
2178 }
2179
2180 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2181                            struct btrfs_key *key)
2182 {
2183         int ret;
2184         struct btrfs_key found_key;
2185         struct extent_buffer *leaf;
2186         int slot;
2187
2188         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2189         if (ret < 0)
2190                 return ret;
2191         while(1) {
2192                 slot = path->slots[0];
2193                 leaf = path->nodes[0];
2194                 if (slot >= btrfs_header_nritems(leaf)) {
2195                         ret = btrfs_next_leaf(root, path);
2196                         if (ret == 0)
2197                                 continue;
2198                         if (ret < 0)
2199                                 goto error;
2200                         break;
2201                 }
2202                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2203
2204                 if (found_key.objectid >= key->objectid &&
2205                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2206                         return 0;
2207                 path->slots[0]++;
2208         }
2209         ret = -ENOENT;
2210 error:
2211         return ret;
2212 }
2213
2214 int btrfs_read_block_groups(struct btrfs_root *root)
2215 {
2216         struct btrfs_path *path;
2217         int ret;
2218         int bit;
2219         struct btrfs_block_group_cache *cache;
2220         struct btrfs_fs_info *info = root->fs_info;
2221         struct btrfs_space_info *space_info;
2222         struct extent_io_tree *block_group_cache;
2223         struct btrfs_key key;
2224         struct btrfs_key found_key;
2225         struct extent_buffer *leaf;
2226
2227         block_group_cache = &info->block_group_cache;
2228
2229         root = info->extent_root;
2230         key.objectid = 0;
2231         key.offset = 0;
2232         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2233         path = btrfs_alloc_path();
2234         if (!path)
2235                 return -ENOMEM;
2236
2237         while(1) {
2238                 ret = find_first_block_group(root, path, &key);
2239                 if (ret > 0) {
2240                         ret = 0;
2241                         goto error;
2242                 }
2243                 if (ret != 0) {
2244                         goto error;
2245                 }
2246                 leaf = path->nodes[0];
2247                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2248                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2249                 if (!cache) {
2250                         ret = -ENOMEM;
2251                         break;
2252                 }
2253
2254                 read_extent_buffer(leaf, &cache->item,
2255                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2256                                    sizeof(cache->item));
2257                 memcpy(&cache->key, &found_key, sizeof(found_key));
2258                 cache->cached = 0;
2259                 cache->pinned = 0;
2260                 key.objectid = found_key.objectid + found_key.offset;
2261                 btrfs_release_path(root, path);
2262                 cache->flags = btrfs_block_group_flags(&cache->item);
2263                 bit = 0;
2264                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2265                         bit = BLOCK_GROUP_DATA;
2266                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2267                         bit = BLOCK_GROUP_SYSTEM;
2268                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2269                         bit = BLOCK_GROUP_METADATA;
2270                 }
2271                 set_avail_alloc_bits(info, cache->flags);
2272
2273                 ret = update_space_info(info, cache->flags, found_key.offset,
2274                                         btrfs_block_group_used(&cache->item),
2275                                         &space_info);
2276                 BUG_ON(ret);
2277                 cache->space_info = space_info;
2278
2279                 /* use EXTENT_LOCKED to prevent merging */
2280                 set_extent_bits(block_group_cache, found_key.objectid,
2281                                 found_key.objectid + found_key.offset - 1,
2282                                 bit | EXTENT_LOCKED, GFP_NOFS);
2283                 set_state_private(block_group_cache, found_key.objectid,
2284                                   (unsigned long)cache);
2285
2286                 if (key.objectid >=
2287                     btrfs_super_total_bytes(&info->super_copy))
2288                         break;
2289         }
2290         ret = 0;
2291 error:
2292         btrfs_free_path(path);
2293         return ret;
2294 }
2295
2296 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2297                            struct btrfs_root *root, u64 bytes_used,
2298                            u64 type, u64 chunk_objectid, u64 chunk_offset,
2299                            u64 size)
2300 {
2301         int ret;
2302         int bit = 0;
2303         struct btrfs_root *extent_root;
2304         struct btrfs_block_group_cache *cache;
2305         struct extent_io_tree *block_group_cache;
2306
2307         extent_root = root->fs_info->extent_root;
2308         block_group_cache = &root->fs_info->block_group_cache;
2309
2310         cache = kzalloc(sizeof(*cache), GFP_NOFS);
2311         BUG_ON(!cache);
2312         cache->key.objectid = chunk_offset;
2313         cache->key.offset = size;
2314
2315         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2316         btrfs_set_block_group_used(&cache->item, bytes_used);
2317         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2318         cache->flags = type;
2319         btrfs_set_block_group_flags(&cache->item, type);
2320
2321         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2322                                 &cache->space_info);
2323         BUG_ON(ret);
2324
2325         bit = block_group_state_bits(type);
2326         set_extent_bits(block_group_cache, chunk_offset,
2327                         chunk_offset + size - 1,
2328                         bit | EXTENT_LOCKED, GFP_NOFS);
2329
2330         set_state_private(block_group_cache, chunk_offset,
2331                           (unsigned long)cache);
2332         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2333                                 sizeof(cache->item));
2334         BUG_ON(ret);
2335
2336         finish_current_insert(trans, extent_root);
2337         ret = del_pending_extents(trans, extent_root);
2338         BUG_ON(ret);
2339         set_avail_alloc_bits(extent_root->fs_info, type);
2340         return 0;
2341 }
2342
2343 /*
2344  * This is for converter use only.
2345  *
2346  * In that case, we don't know where are free blocks located.
2347  * Therefore all block group cache entries must be setup properly
2348  * before doing any block allocation.
2349  */
2350 int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
2351                             struct btrfs_root *root)
2352 {
2353         u64 total_bytes;
2354         u64 cur_start;
2355         u64 group_type;
2356         u64 group_size;
2357         u64 group_align;
2358         u64 total_data = 0;
2359         u64 total_metadata = 0;
2360         u64 chunk_objectid;
2361         int ret;
2362         int bit;
2363         struct btrfs_root *extent_root;
2364         struct btrfs_block_group_cache *cache;
2365         struct extent_io_tree *block_group_cache;
2366
2367         extent_root = root->fs_info->extent_root;
2368         block_group_cache = &root->fs_info->block_group_cache;
2369         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2370         total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
2371         group_align = 64 * root->sectorsize;
2372
2373         cur_start = 0;
2374         while (cur_start < total_bytes) {
2375                 group_size = total_bytes / 12;
2376                 group_size = min_t(u64, group_size, total_bytes - cur_start);
2377                 if (cur_start == 0) {
2378                         bit = BLOCK_GROUP_SYSTEM;
2379                         group_type = BTRFS_BLOCK_GROUP_SYSTEM;
2380                         group_size /= 4;
2381                         group_size &= ~(group_align - 1);
2382                         group_size = max_t(u64, group_size, 32 * 1024 * 1024);
2383                         group_size = min_t(u64, group_size, 128 * 1024 * 1024);
2384                 } else {
2385                         group_size &= ~(group_align - 1);
2386                         if (total_data >= total_metadata * 2) {
2387                                 group_type = BTRFS_BLOCK_GROUP_METADATA;
2388                                 group_size = min_t(u64, group_size,
2389                                                    1ULL * 1024 * 1024 * 1024);
2390                                 total_metadata += group_size;
2391                         } else {
2392                                 group_type = BTRFS_BLOCK_GROUP_DATA;
2393                                 group_size = min_t(u64, group_size,
2394                                                    5ULL * 1024 * 1024 * 1024);
2395                                 total_data += group_size;
2396                         }
2397                         if ((total_bytes - cur_start) * 4 < group_size * 5)
2398                                 group_size = total_bytes - cur_start;
2399                 }
2400
2401                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
2402                 BUG_ON(!cache);
2403
2404                 cache->key.objectid = cur_start;
2405                 cache->key.offset = group_size;
2406                 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2407
2408                 btrfs_set_block_group_used(&cache->item, 0);
2409                 btrfs_set_block_group_chunk_objectid(&cache->item,
2410                                                      chunk_objectid);
2411                 btrfs_set_block_group_flags(&cache->item, group_type);
2412
2413                 cache->flags = group_type;
2414
2415                 ret = update_space_info(root->fs_info, group_type, group_size,
2416                                         0, &cache->space_info);
2417                 BUG_ON(ret);
2418                 set_avail_alloc_bits(extent_root->fs_info, group_type);
2419
2420                 set_extent_bits(block_group_cache, cur_start,
2421                                 cur_start + group_size - 1,
2422                                 bit | EXTENT_LOCKED, GFP_NOFS);
2423                 set_state_private(block_group_cache, cur_start,
2424                                   (unsigned long)cache);
2425                 cur_start += group_size;
2426         }
2427         /* then insert all the items */
2428         cur_start = 0;
2429         while(cur_start < total_bytes) {
2430                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
2431                 BUG_ON(!cache);
2432
2433                 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2434                                         sizeof(cache->item));
2435                 BUG_ON(ret);
2436
2437                 finish_current_insert(trans, extent_root);
2438                 ret = del_pending_extents(trans, extent_root);
2439                 BUG_ON(ret);
2440
2441                 cur_start = cache->key.objectid + cache->key.offset;
2442         }
2443         return 0;
2444 }
2445
2446 u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation,
2447                           u64 owner, u64 owner_offset)
2448 {
2449         return hash_extent_ref(root_objectid, ref_generation,
2450                                owner, owner_offset);
2451 }
2452
2453 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
2454                              struct btrfs_root *root,
2455                              u64 bytenr, u64 num_bytes, int alloc,
2456                              int mark_free)
2457 {
2458         return update_block_group(trans, root, bytenr, num_bytes,
2459                                   alloc, mark_free);
2460 }