Btrfs-progs: add skinny metadata support to progs V3
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "crc32c.h"
28 #include "volumes.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 #define PENDING_EXTENT_INSERT 0
37 #define PENDING_EXTENT_DELETE 1
38 #define PENDING_BACKREF_UPDATE 2
39
40 struct pending_extent_op {
41         int type;
42         u64 bytenr;
43         u64 num_bytes;
44         u64 flags;
45         struct btrfs_disk_key key;
46         int level;
47 };
48
49 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
50                                      struct btrfs_root *root,
51                                      u64 root_objectid, u64 generation,
52                                      u64 flags, struct btrfs_disk_key *key,
53                                      int level, struct btrfs_key *ins);
54 static int __free_extent(struct btrfs_trans_handle *trans,
55                          struct btrfs_root *root,
56                          u64 bytenr, u64 num_bytes, u64 parent,
57                          u64 root_objectid, u64 owner_objectid,
58                          u64 owner_offset, int refs_to_drop);
59 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
60                                  btrfs_root *extent_root);
61 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
62                                btrfs_root *extent_root);
63
64 static int remove_sb_from_cache(struct btrfs_root *root,
65                                 struct btrfs_block_group_cache *cache)
66 {
67         u64 bytenr;
68         u64 *logical;
69         int stripe_len;
70         int i, nr, ret;
71         struct extent_io_tree *free_space_cache;
72
73         free_space_cache = &root->fs_info->free_space_cache;
74         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
75                 bytenr = btrfs_sb_offset(i);
76                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
77                                        cache->key.objectid, bytenr, 0,
78                                        &logical, &nr, &stripe_len);
79                 BUG_ON(ret);
80                 while (nr--) {
81                         clear_extent_dirty(free_space_cache, logical[nr],
82                                 logical[nr] + stripe_len - 1, GFP_NOFS);
83                 }
84                 kfree(logical);
85         }
86         return 0;
87 }
88
89 static int cache_block_group(struct btrfs_root *root,
90                              struct btrfs_block_group_cache *block_group)
91 {
92         struct btrfs_path *path;
93         int ret;
94         struct btrfs_key key;
95         struct extent_buffer *leaf;
96         struct extent_io_tree *free_space_cache;
97         int slot;
98         u64 last;
99         u64 hole_size;
100
101         if (!block_group)
102                 return 0;
103
104         root = root->fs_info->extent_root;
105         free_space_cache = &root->fs_info->free_space_cache;
106
107         if (block_group->cached)
108                 return 0;
109
110         path = btrfs_alloc_path();
111         if (!path)
112                 return -ENOMEM;
113
114         path->reada = 2;
115         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
116         key.objectid = last;
117         key.offset = 0;
118         key.type = 0;
119
120         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
121         if (ret < 0)
122                 goto err;
123
124         while(1) {
125                 leaf = path->nodes[0];
126                 slot = path->slots[0];
127                 if (slot >= btrfs_header_nritems(leaf)) {
128                         ret = btrfs_next_leaf(root, path);
129                         if (ret < 0)
130                                 goto err;
131                         if (ret == 0) {
132                                 continue;
133                         } else {
134                                 break;
135                         }
136                 }
137                 btrfs_item_key_to_cpu(leaf, &key, slot);
138                 if (key.objectid < block_group->key.objectid) {
139                         goto next;
140                 }
141                 if (key.objectid >= block_group->key.objectid +
142                     block_group->key.offset) {
143                         break;
144                 }
145
146                 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
147                     key.type == BTRFS_METADATA_ITEM_KEY) {
148                         if (key.objectid > last) {
149                                 hole_size = key.objectid - last;
150                                 set_extent_dirty(free_space_cache, last,
151                                                  last + hole_size - 1,
152                                                  GFP_NOFS);
153                         }
154                         if (key.type == BTRFS_METADATA_ITEM_KEY)
155                                 last = key.objectid + root->leafsize;
156                         else
157                                 last = key.objectid + key.offset;
158                 }
159 next:
160                 path->slots[0]++;
161         }
162
163         if (block_group->key.objectid +
164             block_group->key.offset > last) {
165                 hole_size = block_group->key.objectid +
166                         block_group->key.offset - last;
167                 set_extent_dirty(free_space_cache, last,
168                                  last + hole_size - 1, GFP_NOFS);
169         }
170         remove_sb_from_cache(root, block_group);
171         block_group->cached = 1;
172 err:
173         btrfs_free_path(path);
174         return 0;
175 }
176
177 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
178                                                        btrfs_fs_info *info,
179                                                        u64 bytenr)
180 {
181         struct extent_io_tree *block_group_cache;
182         struct btrfs_block_group_cache *block_group = NULL;
183         u64 ptr;
184         u64 start;
185         u64 end;
186         int ret;
187
188         bytenr = max_t(u64, bytenr,
189                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
190         block_group_cache = &info->block_group_cache;
191         ret = find_first_extent_bit(block_group_cache,
192                                     bytenr, &start, &end,
193                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
194                                     BLOCK_GROUP_SYSTEM);
195         if (ret) {
196                 return NULL;
197         }
198         ret = get_state_private(block_group_cache, start, &ptr);
199         if (ret)
200                 return NULL;
201
202         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
203         return block_group;
204 }
205
206 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
207                                                          btrfs_fs_info *info,
208                                                          u64 bytenr)
209 {
210         struct extent_io_tree *block_group_cache;
211         struct btrfs_block_group_cache *block_group = NULL;
212         u64 ptr;
213         u64 start;
214         u64 end;
215         int ret;
216
217         block_group_cache = &info->block_group_cache;
218         ret = find_first_extent_bit(block_group_cache,
219                                     bytenr, &start, &end,
220                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
221                                     BLOCK_GROUP_SYSTEM);
222         if (ret) {
223                 return NULL;
224         }
225         ret = get_state_private(block_group_cache, start, &ptr);
226         if (ret)
227                 return NULL;
228
229         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
230         if (block_group->key.objectid <= bytenr && bytenr <
231             block_group->key.objectid + block_group->key.offset)
232                 return block_group;
233         return NULL;
234 }
235
236 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
237 {
238         return (cache->flags & bits) == bits;
239 }
240
241 static int noinline find_search_start(struct btrfs_root *root,
242                               struct btrfs_block_group_cache **cache_ret,
243                               u64 *start_ret, int num, int data)
244 {
245         int ret;
246         struct btrfs_block_group_cache *cache = *cache_ret;
247         u64 last;
248         u64 start = 0;
249         u64 end = 0;
250         u64 search_start = *start_ret;
251         int wrapped = 0;
252
253         if (!cache) {
254                 goto out;
255         }
256 again:
257         ret = cache_block_group(root, cache);
258         if (ret)
259                 goto out;
260
261         last = max(search_start, cache->key.objectid);
262         if (cache->ro || !block_group_bits(cache, data)) {
263                 goto new_group;
264         }
265
266         while(1) {
267                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
268                                             last, &start, &end, EXTENT_DIRTY);
269                 if (ret) {
270                         goto new_group;
271                 }
272
273                 start = max(last, start);
274                 last = end + 1;
275                 if (last - start < num) {
276                         continue;
277                 }
278                 if (start + num > cache->key.objectid + cache->key.offset) {
279                         goto new_group;
280                 }
281                 *start_ret = start;
282                 return 0;
283         }
284 out:
285         cache = btrfs_lookup_block_group(root->fs_info, search_start);
286         if (!cache) {
287                 printk("Unable to find block group for %llu\n",
288                         (unsigned long long)search_start);
289                 WARN_ON(1);
290         }
291         return -ENOSPC;
292
293 new_group:
294         last = cache->key.objectid + cache->key.offset;
295 wrapped:
296         cache = btrfs_lookup_first_block_group(root->fs_info, last);
297         if (!cache) {
298 no_cache:
299                 if (!wrapped) {
300                         wrapped = 1;
301                         last = search_start;
302                         goto wrapped;
303                 }
304                 goto out;
305         }
306         cache = btrfs_find_block_group(root, cache, last, data, 0);
307         cache = btrfs_find_block_group(root, cache, last, data, 0);
308         if (!cache)
309                 goto no_cache;
310
311         *cache_ret = cache;
312         goto again;
313 }
314
315 static u64 div_factor(u64 num, int factor)
316 {
317         if (factor == 10)
318                 return num;
319         num *= factor;
320         num /= 10;
321         return num;
322 }
323
324 static int block_group_state_bits(u64 flags)
325 {
326         int bits = 0;
327         if (flags & BTRFS_BLOCK_GROUP_DATA)
328                 bits |= BLOCK_GROUP_DATA;
329         if (flags & BTRFS_BLOCK_GROUP_METADATA)
330                 bits |= BLOCK_GROUP_METADATA;
331         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
332                 bits |= BLOCK_GROUP_SYSTEM;
333         return bits;
334 }
335
336 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
337                                                  struct btrfs_block_group_cache
338                                                  *hint, u64 search_start,
339                                                  int data, int owner)
340 {
341         struct btrfs_block_group_cache *cache;
342         struct extent_io_tree *block_group_cache;
343         struct btrfs_block_group_cache *found_group = NULL;
344         struct btrfs_fs_info *info = root->fs_info;
345         u64 used;
346         u64 last = 0;
347         u64 hint_last;
348         u64 start;
349         u64 end;
350         u64 free_check;
351         u64 ptr;
352         int bit;
353         int ret;
354         int full_search = 0;
355         int factor = 10;
356
357         block_group_cache = &info->block_group_cache;
358
359         if (!owner)
360                 factor = 10;
361
362         bit = block_group_state_bits(data);
363
364         if (search_start) {
365                 struct btrfs_block_group_cache *shint;
366                 shint = btrfs_lookup_block_group(info, search_start);
367                 if (shint && !shint->ro && block_group_bits(shint, data)) {
368                         used = btrfs_block_group_used(&shint->item);
369                         if (used + shint->pinned <
370                             div_factor(shint->key.offset, factor)) {
371                                 return shint;
372                         }
373                 }
374         }
375         if (hint && !hint->ro && block_group_bits(hint, data)) {
376                 used = btrfs_block_group_used(&hint->item);
377                 if (used + hint->pinned <
378                     div_factor(hint->key.offset, factor)) {
379                         return hint;
380                 }
381                 last = hint->key.objectid + hint->key.offset;
382                 hint_last = last;
383         } else {
384                 if (hint)
385                         hint_last = max(hint->key.objectid, search_start);
386                 else
387                         hint_last = search_start;
388
389                 last = hint_last;
390         }
391 again:
392         while(1) {
393                 ret = find_first_extent_bit(block_group_cache, last,
394                                             &start, &end, bit);
395                 if (ret)
396                         break;
397
398                 ret = get_state_private(block_group_cache, start, &ptr);
399                 if (ret)
400                         break;
401
402                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
403                 last = cache->key.objectid + cache->key.offset;
404                 used = btrfs_block_group_used(&cache->item);
405
406                 if (!cache->ro && block_group_bits(cache, data)) {
407                         if (full_search)
408                                 free_check = cache->key.offset;
409                         else
410                                 free_check = div_factor(cache->key.offset,
411                                                         factor);
412
413                         if (used + cache->pinned < free_check) {
414                                 found_group = cache;
415                                 goto found;
416                         }
417                 }
418                 cond_resched();
419         }
420         if (!full_search) {
421                 last = search_start;
422                 full_search = 1;
423                 goto again;
424         }
425 found:
426         return found_group;
427 }
428
429 /*
430  * Back reference rules.  Back refs have three main goals:
431  *
432  * 1) differentiate between all holders of references to an extent so that
433  *    when a reference is dropped we can make sure it was a valid reference
434  *    before freeing the extent.
435  *
436  * 2) Provide enough information to quickly find the holders of an extent
437  *    if we notice a given block is corrupted or bad.
438  *
439  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
440  *    maintenance.  This is actually the same as #2, but with a slightly
441  *    different use case.
442  *
443  * There are two kinds of back refs. The implicit back refs is optimized
444  * for pointers in non-shared tree blocks. For a given pointer in a block,
445  * back refs of this kind provide information about the block's owner tree
446  * and the pointer's key. These information allow us to find the block by
447  * b-tree searching. The full back refs is for pointers in tree blocks not
448  * referenced by their owner trees. The location of tree block is recorded
449  * in the back refs. Actually the full back refs is generic, and can be
450  * used in all cases the implicit back refs is used. The major shortcoming
451  * of the full back refs is its overhead. Every time a tree block gets
452  * COWed, we have to update back refs entry for all pointers in it.
453  *
454  * For a newly allocated tree block, we use implicit back refs for
455  * pointers in it. This means most tree related operations only involve
456  * implicit back refs. For a tree block created in old transaction, the
457  * only way to drop a reference to it is COW it. So we can detect the
458  * event that tree block loses its owner tree's reference and do the
459  * back refs conversion.
460  *
461  * When a tree block is COW'd through a tree, there are four cases:
462  *
463  * The reference count of the block is one and the tree is the block's
464  * owner tree. Nothing to do in this case.
465  *
466  * The reference count of the block is one and the tree is not the
467  * block's owner tree. In this case, full back refs is used for pointers
468  * in the block. Remove these full back refs, add implicit back refs for
469  * every pointers in the new block.
470  *
471  * The reference count of the block is greater than one and the tree is
472  * the block's owner tree. In this case, implicit back refs is used for
473  * pointers in the block. Add full back refs for every pointers in the
474  * block, increase lower level extents' reference counts. The original
475  * implicit back refs are entailed to the new block.
476  *
477  * The reference count of the block is greater than one and the tree is
478  * not the block's owner tree. Add implicit back refs for every pointer in
479  * the new block, increase lower level extents' reference count.
480  *
481  * Back Reference Key composing:
482  *
483  * The key objectid corresponds to the first byte in the extent,
484  * The key type is used to differentiate between types of back refs.
485  * There are different meanings of the key offset for different types
486  * of back refs.
487  *
488  * File extents can be referenced by:
489  *
490  * - multiple snapshots, subvolumes, or different generations in one subvol
491  * - different files inside a single subvolume
492  * - different offsets inside a file (bookend extents in file.c)
493  *
494  * The extent ref structure for the implicit back refs has fields for:
495  *
496  * - Objectid of the subvolume root
497  * - objectid of the file holding the reference
498  * - original offset in the file
499  * - how many bookend extents
500  *
501  * The key offset for the implicit back refs is hash of the first
502  * three fields.
503  *
504  * The extent ref structure for the full back refs has field for:
505  *
506  * - number of pointers in the tree leaf
507  *
508  * The key offset for the implicit back refs is the first byte of
509  * the tree leaf
510  *
511  * When a file extent is allocated, The implicit back refs is used.
512  * the fields are filled in:
513  *
514  *     (root_key.objectid, inode objectid, offset in file, 1)
515  *
516  * When a file extent is removed file truncation, we find the
517  * corresponding implicit back refs and check the following fields:
518  *
519  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
520  *
521  * Btree extents can be referenced by:
522  *
523  * - Different subvolumes
524  *
525  * Both the implicit back refs and the full back refs for tree blocks
526  * only consist of key. The key offset for the implicit back refs is
527  * objectid of block's owner tree. The key offset for the full back refs
528  * is the first byte of parent block.
529  *
530  * When implicit back refs is used, information about the lowest key and
531  * level of the tree block are required. These information are stored in
532  * tree block info structure.
533  */
534
535 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
536 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
537                                   struct btrfs_root *root,
538                                   struct btrfs_path *path,
539                                   u64 owner, u32 extra_size)
540 {
541         struct btrfs_extent_item *item;
542         struct btrfs_extent_item_v0 *ei0;
543         struct btrfs_extent_ref_v0 *ref0;
544         struct btrfs_tree_block_info *bi;
545         struct extent_buffer *leaf;
546         struct btrfs_key key;
547         struct btrfs_key found_key;
548         u32 new_size = sizeof(*item);
549         u64 refs;
550         int ret;
551
552         leaf = path->nodes[0];
553         BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
554
555         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
556         ei0 = btrfs_item_ptr(leaf, path->slots[0],
557                              struct btrfs_extent_item_v0);
558         refs = btrfs_extent_refs_v0(leaf, ei0);
559
560         if (owner == (u64)-1) {
561                 while (1) {
562                         if (path->slots[0] >= btrfs_header_nritems(leaf)) {
563                                 ret = btrfs_next_leaf(root, path);
564                                 if (ret < 0)
565                                         return ret;
566                                 BUG_ON(ret > 0);
567                                 leaf = path->nodes[0];
568                         }
569                         btrfs_item_key_to_cpu(leaf, &found_key,
570                                               path->slots[0]);
571                         BUG_ON(key.objectid != found_key.objectid);
572                         if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
573                                 path->slots[0]++;
574                                 continue;
575                         }
576                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
577                                               struct btrfs_extent_ref_v0);
578                         owner = btrfs_ref_objectid_v0(leaf, ref0);
579                         break;
580                 }
581         }
582         btrfs_release_path(root, path);
583
584         if (owner < BTRFS_FIRST_FREE_OBJECTID)
585                 new_size += sizeof(*bi);
586
587         new_size -= sizeof(*ei0);
588         ret = btrfs_search_slot(trans, root, &key, path, new_size, 1);
589         if (ret < 0)
590                 return ret;
591         BUG_ON(ret);
592
593         ret = btrfs_extend_item(trans, root, path, new_size);
594         BUG_ON(ret);
595
596         leaf = path->nodes[0];
597         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
598         btrfs_set_extent_refs(leaf, item, refs);
599         /* FIXME: get real generation */
600         btrfs_set_extent_generation(leaf, item, 0);
601         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
602                 btrfs_set_extent_flags(leaf, item,
603                                        BTRFS_EXTENT_FLAG_TREE_BLOCK |
604                                        BTRFS_BLOCK_FLAG_FULL_BACKREF);
605                 bi = (struct btrfs_tree_block_info *)(item + 1);
606                 /* FIXME: get first key of the block */
607                 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
608                 btrfs_set_tree_block_level(leaf, bi, (int)owner);
609         } else {
610                 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
611         }
612         btrfs_mark_buffer_dirty(leaf);
613         return 0;
614 }
615 #endif
616
617 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
618 {
619         u32 high_crc = ~(u32)0;
620         u32 low_crc = ~(u32)0;
621         __le64 lenum;
622
623         lenum = cpu_to_le64(root_objectid);
624         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
625         lenum = cpu_to_le64(owner);
626         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
627         lenum = cpu_to_le64(offset);
628         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
629
630         return ((u64)high_crc << 31) ^ (u64)low_crc;
631 }
632
633 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
634                                      struct btrfs_extent_data_ref *ref)
635 {
636         return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
637                                     btrfs_extent_data_ref_objectid(leaf, ref),
638                                     btrfs_extent_data_ref_offset(leaf, ref));
639 }
640
641 static int match_extent_data_ref(struct extent_buffer *leaf,
642                                  struct btrfs_extent_data_ref *ref,
643                                  u64 root_objectid, u64 owner, u64 offset)
644 {
645         if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
646             btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
647             btrfs_extent_data_ref_offset(leaf, ref) != offset)
648                 return 0;
649         return 1;
650 }
651
652 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
653                                            struct btrfs_root *root,
654                                            struct btrfs_path *path,
655                                            u64 bytenr, u64 parent,
656                                            u64 root_objectid,
657                                            u64 owner, u64 offset)
658 {
659         struct btrfs_key key;
660         struct btrfs_extent_data_ref *ref;
661         struct extent_buffer *leaf;
662         u32 nritems;
663         int ret;
664         int recow;
665         int err = -ENOENT;
666
667         key.objectid = bytenr;
668         if (parent) {
669                 key.type = BTRFS_SHARED_DATA_REF_KEY;
670                 key.offset = parent;
671         } else {
672                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
673                 key.offset = hash_extent_data_ref(root_objectid,
674                                                   owner, offset);
675         }
676 again:
677         recow = 0;
678         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
679         if (ret < 0) {
680                 err = ret;
681                 goto fail;
682         }
683
684         if (parent) {
685                 if (!ret)
686                         return 0;
687 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
688                 key.type = BTRFS_EXTENT_REF_V0_KEY;
689                 btrfs_release_path(root, path);
690                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
691                 if (ret < 0) {
692                         err = ret;
693                         goto fail;
694                 }
695                 if (!ret)
696                         return 0;
697 #endif
698                 goto fail;
699         }
700
701         leaf = path->nodes[0];
702         nritems = btrfs_header_nritems(leaf);
703         while (1) {
704                 if (path->slots[0] >= nritems) {
705                         ret = btrfs_next_leaf(root, path);
706                         if (ret < 0)
707                                 err = ret;
708                         if (ret)
709                                 goto fail;
710
711                         leaf = path->nodes[0];
712                         nritems = btrfs_header_nritems(leaf);
713                         recow = 1;
714                 }
715
716                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
717                 if (key.objectid != bytenr ||
718                     key.type != BTRFS_EXTENT_DATA_REF_KEY)
719                         goto fail;
720                 
721                 ref = btrfs_item_ptr(leaf, path->slots[0],
722                                      struct btrfs_extent_data_ref);
723
724                 if (match_extent_data_ref(leaf, ref, root_objectid,
725                                           owner, offset)) {
726                         if (recow) {
727                                 btrfs_release_path(root, path);
728                                 goto again;
729                         }
730                         err = 0;
731                         break;
732                 }
733                 path->slots[0]++;
734         }
735 fail:
736         return err;
737 }
738
739 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
740                                            struct btrfs_root *root,
741                                            struct btrfs_path *path,
742                                            u64 bytenr, u64 parent,
743                                            u64 root_objectid, u64 owner,
744                                            u64 offset, int refs_to_add)
745 {
746         struct btrfs_key key;
747         struct extent_buffer *leaf;
748         u32 size;
749         u32 num_refs;
750         int ret;
751
752         key.objectid = bytenr;
753         if (parent) {
754                 key.type = BTRFS_SHARED_DATA_REF_KEY;
755                 key.offset = parent;
756                 size = sizeof(struct btrfs_shared_data_ref);
757         } else {
758                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
759                 key.offset = hash_extent_data_ref(root_objectid,
760                                                   owner, offset);
761                 size = sizeof(struct btrfs_extent_data_ref);
762         }
763
764         ret = btrfs_insert_empty_item(trans, root, path, &key, size);
765         if (ret && ret != -EEXIST)
766                 goto fail;
767
768         leaf = path->nodes[0];
769         if (parent) {
770                 struct btrfs_shared_data_ref *ref;
771                 ref = btrfs_item_ptr(leaf, path->slots[0],
772                                      struct btrfs_shared_data_ref);
773                 if (ret == 0) {
774                         btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
775                 } else {
776                         num_refs = btrfs_shared_data_ref_count(leaf, ref);
777                         num_refs += refs_to_add;
778                         btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
779                 }
780         } else {
781                 struct btrfs_extent_data_ref *ref;
782                 while (ret == -EEXIST) {
783                         ref = btrfs_item_ptr(leaf, path->slots[0],
784                                              struct btrfs_extent_data_ref);
785                         if (match_extent_data_ref(leaf, ref, root_objectid,
786                                                   owner, offset))
787                                 break;
788                         btrfs_release_path(root, path);
789
790                         key.offset++;
791                         ret = btrfs_insert_empty_item(trans, root, path, &key,
792                                                       size);
793                         if (ret && ret != -EEXIST)
794                                 goto fail;
795
796                         leaf = path->nodes[0];
797                 }
798                 ref = btrfs_item_ptr(leaf, path->slots[0],
799                                      struct btrfs_extent_data_ref);
800                 if (ret == 0) {
801                         btrfs_set_extent_data_ref_root(leaf, ref,
802                                                        root_objectid);
803                         btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
804                         btrfs_set_extent_data_ref_offset(leaf, ref, offset);
805                         btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
806                 } else {
807                         num_refs = btrfs_extent_data_ref_count(leaf, ref);
808                         num_refs += refs_to_add;
809                         btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
810                 }
811         }
812         btrfs_mark_buffer_dirty(leaf);
813         ret = 0;
814 fail:
815         btrfs_release_path(root, path);
816         return ret;
817 }
818
819 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
820                                            struct btrfs_root *root,
821                                            struct btrfs_path *path,
822                                            int refs_to_drop)
823 {
824         struct btrfs_key key;
825         struct btrfs_extent_data_ref *ref1 = NULL;
826         struct btrfs_shared_data_ref *ref2 = NULL;
827         struct extent_buffer *leaf;
828         u32 num_refs = 0;
829         int ret = 0;
830
831         leaf = path->nodes[0];
832         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
833
834         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
835                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
836                                       struct btrfs_extent_data_ref);
837                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
838         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
839                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
840                                       struct btrfs_shared_data_ref);
841                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
842 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
843         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
844                 struct btrfs_extent_ref_v0 *ref0;
845                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
846                                       struct btrfs_extent_ref_v0);
847                 num_refs = btrfs_ref_count_v0(leaf, ref0);
848 #endif
849         } else {
850                 BUG();
851         }
852
853         BUG_ON(num_refs < refs_to_drop);
854         num_refs -= refs_to_drop;
855
856         if (num_refs == 0) {
857                 ret = btrfs_del_item(trans, root, path);
858         } else {
859                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
860                         btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
861                 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
862                         btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
863 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
864                 else {
865                         struct btrfs_extent_ref_v0 *ref0;
866                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
867                                         struct btrfs_extent_ref_v0);
868                         btrfs_set_ref_count_v0(leaf, ref0, num_refs);
869                 }
870 #endif
871                 btrfs_mark_buffer_dirty(leaf);
872         }
873         return ret;
874 }
875
876 static noinline u32 extent_data_ref_count(struct btrfs_root *root,
877                                           struct btrfs_path *path,
878                                           struct btrfs_extent_inline_ref *iref)
879 {
880         struct btrfs_key key;
881         struct extent_buffer *leaf;
882         struct btrfs_extent_data_ref *ref1;
883         struct btrfs_shared_data_ref *ref2;
884         u32 num_refs = 0;
885
886         leaf = path->nodes[0];
887         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
888         if (iref) {
889                 if (btrfs_extent_inline_ref_type(leaf, iref) ==
890                     BTRFS_EXTENT_DATA_REF_KEY) {
891                         ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
892                         num_refs = btrfs_extent_data_ref_count(leaf, ref1);
893                 } else {
894                         ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
895                         num_refs = btrfs_shared_data_ref_count(leaf, ref2);
896                 }
897         } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
898                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
899                                       struct btrfs_extent_data_ref);
900                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
901         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
902                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
903                                       struct btrfs_shared_data_ref);
904                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
905 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
906         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
907                 struct btrfs_extent_ref_v0 *ref0;
908                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
909                                       struct btrfs_extent_ref_v0);
910                 num_refs = btrfs_ref_count_v0(leaf, ref0);
911 #endif
912         } else {
913                 BUG();
914         }
915         return num_refs;
916 }
917
918 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
919                                           struct btrfs_root *root,
920                                           struct btrfs_path *path,
921                                           u64 bytenr, u64 parent,
922                                           u64 root_objectid)
923 {
924         struct btrfs_key key;
925         int ret;
926
927         key.objectid = bytenr;
928         if (parent) {
929                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
930                 key.offset = parent;
931         } else {
932                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
933                 key.offset = root_objectid;
934         }
935
936         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
937         if (ret > 0)
938                 ret = -ENOENT;
939 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
940         if (ret == -ENOENT && parent) {
941                 btrfs_release_path(root, path);
942                 key.type = BTRFS_EXTENT_REF_V0_KEY;
943                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
944                 if (ret > 0)
945                         ret = -ENOENT;
946         }
947 #endif
948         return ret;
949 }
950
951 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
952                                           struct btrfs_root *root,
953                                           struct btrfs_path *path,
954                                           u64 bytenr, u64 parent,
955                                           u64 root_objectid)
956 {
957         struct btrfs_key key;
958         int ret;
959
960         key.objectid = bytenr;
961         if (parent) {
962                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
963                 key.offset = parent;
964         } else {
965                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
966                 key.offset = root_objectid;
967         }
968
969         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
970
971         btrfs_release_path(root, path);
972         return ret;
973 }
974
975 static inline int extent_ref_type(u64 parent, u64 owner)
976 {
977         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
978                 if (parent > 0)
979                         return BTRFS_SHARED_BLOCK_REF_KEY;
980                 else
981                         return BTRFS_TREE_BLOCK_REF_KEY;
982         } else {
983                 if (parent > 0)
984                         return BTRFS_SHARED_DATA_REF_KEY;
985                 else
986                         return BTRFS_EXTENT_DATA_REF_KEY;
987         }
988 }
989
990 static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
991
992 {
993         int level;
994         for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
995                 if (!path->nodes[level])
996                         break;
997                 if (path->slots[level] + 1 >=
998                     btrfs_header_nritems(path->nodes[level]))
999                         continue;
1000                 if (level == 0)
1001                         btrfs_item_key_to_cpu(path->nodes[level], key,
1002                                               path->slots[level] + 1);
1003                 else
1004                         btrfs_node_key_to_cpu(path->nodes[level], key,
1005                                               path->slots[level] + 1);
1006                 return 0;
1007         }
1008         return 1;
1009 }
1010
1011 static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1012                                  struct btrfs_root *root,
1013                                  struct btrfs_path *path,
1014                                  struct btrfs_extent_inline_ref **ref_ret,
1015                                  u64 bytenr, u64 num_bytes,
1016                                  u64 parent, u64 root_objectid,
1017                                  u64 owner, u64 offset, int insert)
1018 {
1019         struct btrfs_key key;
1020         struct extent_buffer *leaf;
1021         struct btrfs_extent_item *ei;
1022         struct btrfs_extent_inline_ref *iref;
1023         u64 flags;
1024         u32 item_size;
1025         unsigned long ptr;
1026         unsigned long end;
1027         int extra_size;
1028         int type;
1029         int want;
1030         int ret;
1031         int err = 0;
1032         int skinny_metadata =
1033                 btrfs_fs_incompat(root->fs_info,
1034                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
1035
1036         key.objectid = bytenr;
1037         key.type = BTRFS_EXTENT_ITEM_KEY;
1038         key.offset = num_bytes;
1039
1040         want = extent_ref_type(parent, owner);
1041         if (insert)
1042                 extra_size = btrfs_extent_inline_ref_size(want);
1043         else
1044                 extra_size = -1;
1045
1046         if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) {
1047                 skinny_metadata = 1;
1048                 key.type = BTRFS_METADATA_ITEM_KEY;
1049                 key.offset = owner;
1050         } else if (skinny_metadata) {
1051                 skinny_metadata = 0;
1052         }
1053
1054 again:
1055         ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1056         if (ret < 0) {
1057                 err = ret;
1058                 goto out;
1059         }
1060
1061         /*
1062          * We may be a newly converted file system which still has the old fat
1063          * extent entries for metadata, so try and see if we have one of those.
1064          */
1065         if (ret > 0 && skinny_metadata) {
1066                 skinny_metadata = 0;
1067                 if (path->slots[0]) {
1068                         path->slots[0]--;
1069                         btrfs_item_key_to_cpu(path->nodes[0], &key,
1070                                               path->slots[0]);
1071                         if (key.objectid == bytenr &&
1072                             key.type == BTRFS_EXTENT_ITEM_KEY &&
1073                             key.offset == num_bytes)
1074                                 ret = 0;
1075                 }
1076                 if (ret) {
1077                         key.type = BTRFS_EXTENT_ITEM_KEY;
1078                         key.offset = num_bytes;
1079                         goto again;
1080                 }
1081         }
1082
1083         if (ret) {
1084                 printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset);
1085                 return -ENOENT;
1086         }
1087
1088         BUG_ON(ret);
1089
1090         leaf = path->nodes[0];
1091         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1092 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1093         if (item_size < sizeof(*ei)) {
1094                 if (!insert) {
1095                         err = -ENOENT;
1096                         goto out;
1097                 }
1098                 ret = convert_extent_item_v0(trans, root, path, owner,
1099                                              extra_size);
1100                 if (ret < 0) {
1101                         err = ret;
1102                         goto out;
1103                 }
1104                 leaf = path->nodes[0];
1105                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1106         }
1107 #endif
1108         if (item_size < sizeof(*ei)) {
1109                 printf("Size is %u, needs to be %u, slot %d\n",
1110                        (unsigned)item_size,
1111                        (unsigned)sizeof(*ei), path->slots[0]);
1112                 btrfs_print_leaf(root, leaf);
1113                 return -EINVAL;
1114         }
1115         BUG_ON(item_size < sizeof(*ei));
1116
1117         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1118         flags = btrfs_extent_flags(leaf, ei);
1119
1120         ptr = (unsigned long)(ei + 1);
1121         end = (unsigned long)ei + item_size;
1122
1123         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1124                 ptr += sizeof(struct btrfs_tree_block_info);
1125                 BUG_ON(ptr > end);
1126         } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
1127                 if (!(flags & BTRFS_EXTENT_FLAG_DATA)) {
1128                         return -EIO;
1129                 }
1130         }
1131
1132         err = -ENOENT;
1133         while (1) {
1134                 if (ptr >= end) {
1135                         WARN_ON(ptr > end);
1136                         break;
1137                 }
1138                 iref = (struct btrfs_extent_inline_ref *)ptr;
1139                 type = btrfs_extent_inline_ref_type(leaf, iref);
1140                 if (want < type)
1141                         break;
1142                 if (want > type) {
1143                         ptr += btrfs_extent_inline_ref_size(type);
1144                         continue;
1145                 }
1146
1147                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1148                         struct btrfs_extent_data_ref *dref;
1149                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1150                         if (match_extent_data_ref(leaf, dref, root_objectid,
1151                                                   owner, offset)) {
1152                                 err = 0;
1153                                 break;
1154                         }
1155                         if (hash_extent_data_ref_item(leaf, dref) <
1156                             hash_extent_data_ref(root_objectid, owner, offset))
1157                                 break;
1158                 } else {
1159                         u64 ref_offset;
1160                         ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1161                         if (parent > 0) {
1162                                 if (parent == ref_offset) {
1163                                         err = 0;
1164                                         break;
1165                                 }
1166                                 if (ref_offset < parent)
1167                                         break;
1168                         } else {
1169                                 if (root_objectid == ref_offset) {
1170                                         err = 0;
1171                                         break;
1172                                 }
1173                                 if (ref_offset < root_objectid)
1174                                         break;
1175                         }
1176                 }
1177                 ptr += btrfs_extent_inline_ref_size(type);
1178         }
1179         if (err == -ENOENT && insert) {
1180                 if (item_size + extra_size >=
1181                     BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1182                         err = -EAGAIN;
1183                         goto out;
1184                 }
1185                 /*
1186                  * To add new inline back ref, we have to make sure
1187                  * there is no corresponding back ref item.
1188                  * For simplicity, we just do not add new inline back
1189                  * ref if there is any back ref item.
1190                  */
1191                 if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
1192                     key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1193                         err = -EAGAIN;
1194                         goto out;
1195                 }
1196         }
1197         *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1198 out:
1199         return err;
1200 }
1201
1202 static int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1203                                 struct btrfs_root *root,
1204                                 struct btrfs_path *path,
1205                                 struct btrfs_extent_inline_ref *iref,
1206                                 u64 parent, u64 root_objectid,
1207                                 u64 owner, u64 offset, int refs_to_add)
1208 {
1209         struct extent_buffer *leaf;
1210         struct btrfs_extent_item *ei;
1211         unsigned long ptr;
1212         unsigned long end;
1213         unsigned long item_offset;
1214         u64 refs;
1215         int size;
1216         int type;
1217         int ret;
1218
1219         leaf = path->nodes[0];
1220         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1221         item_offset = (unsigned long)iref - (unsigned long)ei;
1222
1223         type = extent_ref_type(parent, owner);
1224         size = btrfs_extent_inline_ref_size(type);
1225
1226         ret = btrfs_extend_item(trans, root, path, size);
1227         BUG_ON(ret);
1228
1229         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1230         refs = btrfs_extent_refs(leaf, ei);
1231         refs += refs_to_add;
1232         btrfs_set_extent_refs(leaf, ei, refs);
1233
1234         ptr = (unsigned long)ei + item_offset;
1235         end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1236         if (ptr < end - size)
1237                 memmove_extent_buffer(leaf, ptr + size, ptr,
1238                                       end - size - ptr);
1239
1240         iref = (struct btrfs_extent_inline_ref *)ptr;
1241         btrfs_set_extent_inline_ref_type(leaf, iref, type);
1242         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1243                 struct btrfs_extent_data_ref *dref;
1244                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1245                 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1246                 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1247                 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1248                 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1249         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1250                 struct btrfs_shared_data_ref *sref;
1251                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1252                 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1253                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1254         } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1255                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1256         } else {
1257                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1258         }
1259         btrfs_mark_buffer_dirty(leaf);
1260         return 0;
1261 }
1262
1263 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1264                                  struct btrfs_root *root,
1265                                  struct btrfs_path *path,
1266                                  struct btrfs_extent_inline_ref **ref_ret,
1267                                  u64 bytenr, u64 num_bytes, u64 parent,
1268                                  u64 root_objectid, u64 owner, u64 offset)
1269 {
1270         int ret;
1271
1272         ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1273                                            bytenr, num_bytes, parent,
1274                                            root_objectid, owner, offset, 0);
1275         if (ret != -ENOENT)
1276                 return ret;
1277
1278         btrfs_release_path(root, path);
1279         *ref_ret = NULL;
1280
1281         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1282                 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1283                                             root_objectid);
1284         } else {
1285                 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1286                                              root_objectid, owner, offset);
1287         }
1288         return ret;
1289 }
1290
1291 static int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1292                                  struct btrfs_root *root,
1293                                  struct btrfs_path *path,
1294                                  struct btrfs_extent_inline_ref *iref,
1295                                  int refs_to_mod)
1296 {
1297         struct extent_buffer *leaf;
1298         struct btrfs_extent_item *ei;
1299         struct btrfs_extent_data_ref *dref = NULL;
1300         struct btrfs_shared_data_ref *sref = NULL;
1301         unsigned long ptr;
1302         unsigned long end;
1303         u32 item_size;
1304         int size;
1305         int type;
1306         int ret;
1307         u64 refs;
1308
1309         leaf = path->nodes[0];
1310         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1311         refs = btrfs_extent_refs(leaf, ei);
1312         WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1313         refs += refs_to_mod;
1314         btrfs_set_extent_refs(leaf, ei, refs);
1315
1316         type = btrfs_extent_inline_ref_type(leaf, iref);
1317
1318         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1319                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1320                 refs = btrfs_extent_data_ref_count(leaf, dref);
1321         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1322                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1323                 refs = btrfs_shared_data_ref_count(leaf, sref);
1324         } else {
1325                 refs = 1;
1326                 BUG_ON(refs_to_mod != -1);
1327         }
1328
1329         BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1330         refs += refs_to_mod;
1331
1332         if (refs > 0) {
1333                 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1334                         btrfs_set_extent_data_ref_count(leaf, dref, refs);
1335                 else
1336                         btrfs_set_shared_data_ref_count(leaf, sref, refs);
1337         } else {
1338                 size =  btrfs_extent_inline_ref_size(type);
1339                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1340                 ptr = (unsigned long)iref;
1341                 end = (unsigned long)ei + item_size;
1342                 if (ptr + size < end)
1343                         memmove_extent_buffer(leaf, ptr, ptr + size,
1344                                               end - ptr - size);
1345                 item_size -= size;
1346                 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1347                 BUG_ON(ret);
1348         }
1349         btrfs_mark_buffer_dirty(leaf);
1350         return 0;
1351 }
1352
1353 static int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1354                                  struct btrfs_root *root,
1355                                  struct btrfs_path *path,
1356                                  u64 bytenr, u64 num_bytes, u64 parent,
1357                                  u64 root_objectid, u64 owner,
1358                                  u64 offset, int refs_to_add)
1359 {
1360         struct btrfs_extent_inline_ref *iref;
1361         int ret;
1362
1363         ret = lookup_inline_extent_backref(trans, root, path, &iref,
1364                                            bytenr, num_bytes, parent,
1365                                            root_objectid, owner, offset, 1);
1366         if (ret == 0) {
1367                 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1368                 ret = update_inline_extent_backref(trans, root, path, iref,
1369                                                    refs_to_add);
1370         } else if (ret == -ENOENT) {
1371                 ret = setup_inline_extent_backref(trans, root, path, iref,
1372                                                   parent, root_objectid,
1373                                                   owner, offset, refs_to_add);
1374         }
1375         return ret;
1376 }
1377
1378 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1379                                  struct btrfs_root *root,
1380                                  struct btrfs_path *path,
1381                                  u64 bytenr, u64 parent, u64 root_objectid,
1382                                  u64 owner, u64 offset, int refs_to_add)
1383 {
1384         int ret;
1385
1386         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
1387                 ret = insert_extent_data_ref(trans, root, path, bytenr,
1388                                              parent, root_objectid,
1389                                              owner, offset, refs_to_add);
1390         } else {
1391                 BUG_ON(refs_to_add != 1);
1392                 ret = insert_tree_block_ref(trans, root, path, bytenr,
1393                                             parent, root_objectid);
1394         }
1395         return ret;
1396 }
1397
1398 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1399                                  struct btrfs_root *root,
1400                                  struct btrfs_path *path,
1401                                  struct btrfs_extent_inline_ref *iref,
1402                                  int refs_to_drop, int is_data)
1403 {
1404         int ret;
1405
1406         BUG_ON(!is_data && refs_to_drop != 1);
1407         if (iref) {
1408                 ret = update_inline_extent_backref(trans, root, path, iref,
1409                                                    -refs_to_drop);
1410         } else if (is_data) {
1411                 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1412         } else {
1413                 ret = btrfs_del_item(trans, root, path);
1414         }
1415         return ret;
1416 }
1417
1418 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1419                          struct btrfs_root *root,
1420                          u64 bytenr, u64 num_bytes, u64 parent,
1421                          u64 root_objectid, u64 owner, u64 offset)
1422 {
1423         struct btrfs_path *path;
1424         struct extent_buffer *leaf;
1425         struct btrfs_extent_item *item;
1426         u64 refs;
1427         int ret;
1428         int err = 0;
1429
1430         path = btrfs_alloc_path();
1431         if (!path)
1432                 return -ENOMEM;
1433
1434         path->reada = 1;
1435         path->leave_spinning = 1;
1436
1437         ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1438                                            path, bytenr, num_bytes, parent,
1439                                            root_objectid, owner, offset, 1);
1440         if (ret == 0)
1441                 goto out;
1442
1443         if (ret != -EAGAIN) {
1444                 err = ret;
1445                 goto out;
1446         }
1447         
1448         leaf = path->nodes[0];
1449         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1450         refs = btrfs_extent_refs(leaf, item);
1451         btrfs_set_extent_refs(leaf, item, refs + 1);
1452
1453         btrfs_mark_buffer_dirty(leaf);
1454         btrfs_release_path(root->fs_info->extent_root, path);
1455
1456         path->reada = 1;
1457         path->leave_spinning = 1;
1458
1459         /* now insert the actual backref */
1460         ret = insert_extent_backref(trans, root->fs_info->extent_root,
1461                                     path, bytenr, parent, root_objectid,
1462                                     owner, offset, 1);
1463         if (ret)
1464                 err = ret;
1465 out:
1466         btrfs_free_path(path);
1467         finish_current_insert(trans, root->fs_info->extent_root);
1468         del_pending_extents(trans, root->fs_info->extent_root);
1469         BUG_ON(err);
1470         return err;
1471 }
1472
1473 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1474                          struct btrfs_root *root)
1475 {
1476         finish_current_insert(trans, root->fs_info->extent_root);
1477         del_pending_extents(trans, root->fs_info->extent_root);
1478         return 0;
1479 }
1480
1481 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
1482                              struct btrfs_root *root, u64 bytenr,
1483                              u64 offset, int metadata, u64 *refs, u64 *flags)
1484 {
1485         struct btrfs_path *path;
1486         int ret;
1487         struct btrfs_key key;
1488         struct extent_buffer *l;
1489         struct btrfs_extent_item *item;
1490         u32 item_size;
1491         u64 num_refs;
1492         u64 extent_flags;
1493
1494         if (metadata &&
1495             !btrfs_fs_incompat(root->fs_info,
1496                                BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) {
1497                 offset = root->leafsize;
1498                 metadata = 0;
1499         }
1500
1501         path = btrfs_alloc_path();
1502         path->reada = 1;
1503
1504         key.objectid = bytenr;
1505         key.offset = offset;
1506         if (metadata)
1507                 key.type = BTRFS_METADATA_ITEM_KEY;
1508         else
1509                 key.type = BTRFS_EXTENT_ITEM_KEY;
1510
1511 again:
1512         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1513                                 0, 0);
1514         if (ret < 0)
1515                 goto out;
1516
1517         /*
1518          * Deal with the fact that we may have mixed SKINNY and normal refs.  If
1519          * we didn't find what we wanted check and see if we have a normal ref
1520          * right next to us, or re-search if we are on the edge of the leaf just
1521          * to make sure.
1522          */
1523         if (ret > 0 && metadata) {
1524                 if (path->slots) {
1525                         path->slots[0]--;
1526                         btrfs_item_key_to_cpu(path->nodes[0], &key,
1527                                               path->slots[0]);
1528                         if (key.objectid == bytenr &&
1529                             key.type == BTRFS_METADATA_ITEM_KEY)
1530                                 ret = 0;
1531                 }
1532
1533                 if (ret) {
1534                         btrfs_release_path(root, path);
1535                         key.type = BTRFS_EXTENT_ITEM_KEY;
1536                         key.offset = root->leafsize;
1537                         metadata = 0;
1538                         goto again;
1539                 }
1540         }
1541
1542         if (ret != 0) {
1543                 ret = -EIO;
1544                 goto out;
1545         }
1546
1547         l = path->nodes[0];
1548         item_size = btrfs_item_size_nr(l, path->slots[0]);
1549         if (item_size >= sizeof(*item)) {
1550                 item = btrfs_item_ptr(l, path->slots[0],
1551                                       struct btrfs_extent_item);
1552                 num_refs = btrfs_extent_refs(l, item);
1553                 extent_flags = btrfs_extent_flags(l, item);
1554         } else {
1555 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1556                         struct btrfs_extent_item_v0 *ei0;
1557                         BUG_ON(item_size != sizeof(*ei0));
1558                         ei0 = btrfs_item_ptr(l, path->slots[0],
1559                                              struct btrfs_extent_item_v0);
1560                         num_refs = btrfs_extent_refs_v0(l, ei0);
1561                         /* FIXME: this isn't correct for data */
1562                         extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
1563 #else
1564                         BUG();
1565 #endif
1566         }
1567         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1568         if (refs)
1569                 *refs = num_refs;
1570         if (flags)
1571                 *flags = extent_flags;
1572 out:
1573         btrfs_free_path(path);
1574         return 0;
1575 }
1576
1577 int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
1578                           struct btrfs_root *root,
1579                           u64 bytenr, int level, u64 flags)
1580 {
1581         struct btrfs_path *path;
1582         int ret;
1583         struct btrfs_key key;
1584         struct extent_buffer *l;
1585         struct btrfs_extent_item *item;
1586         u32 item_size;
1587         int skinny_metadata =
1588                 btrfs_fs_incompat(root->fs_info,
1589                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
1590
1591         path = btrfs_alloc_path();
1592         path->reada = 1;
1593
1594         key.objectid = bytenr;
1595         if (skinny_metadata) {
1596                 key.offset = level;
1597                 key.type = BTRFS_METADATA_ITEM_KEY;
1598         } else {
1599                 key.offset = root->leafsize;
1600                 key.type = BTRFS_EXTENT_ITEM_KEY;
1601         }
1602
1603 again:
1604         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1605                                 0, 0);
1606         if (ret < 0)
1607                 goto out;
1608
1609         if (ret > 0 && skinny_metadata) {
1610                 skinny_metadata = 0;
1611                 if (path->slots[0]--) {
1612                         path->slots[0]--;
1613                         btrfs_item_key_to_cpu(path->nodes[0], &key,
1614                                               path->slots[0]);
1615                         if (key.objectid == bytenr &&
1616                             key.offset == root->leafsize &&
1617                             key.type == BTRFS_EXTENT_ITEM_KEY)
1618                                 ret = 0;
1619                 }
1620                 if (ret) {
1621                         btrfs_release_path(root, path);
1622                         key.offset = root->leafsize;
1623                         key.type = BTRFS_EXTENT_ITEM_KEY;
1624                         goto again;
1625                 }
1626         }
1627
1628         if (ret != 0) {
1629                 btrfs_print_leaf(root, path->nodes[0]);
1630                 printk("failed to find block number %Lu\n",
1631                         (unsigned long long)bytenr);
1632                 BUG();
1633         }
1634         l = path->nodes[0];
1635         item_size = btrfs_item_size_nr(l, path->slots[0]);
1636 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1637         if (item_size < sizeof(*item)) {
1638                 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1639                                              path, (u64)-1, 0);
1640                 if (ret < 0)
1641                         goto out;
1642
1643                 l = path->nodes[0];
1644                 item_size = btrfs_item_size_nr(l, path->slots[0]);
1645         }
1646 #endif
1647         BUG_ON(item_size < sizeof(*item));
1648         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1649         flags |= btrfs_extent_flags(l, item);
1650         btrfs_set_extent_flags(l, item, flags);
1651 out:
1652         btrfs_free_path(path);
1653         finish_current_insert(trans, root->fs_info->extent_root);
1654         del_pending_extents(trans, root->fs_info->extent_root);
1655         return ret;
1656 }
1657
1658 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
1659                            struct btrfs_root *root,
1660                            struct extent_buffer *buf,
1661                            int record_parent, int inc)
1662 {
1663         u64 bytenr;
1664         u64 num_bytes;
1665         u64 parent;
1666         u64 ref_root;
1667         u32 nritems;
1668         struct btrfs_key key;
1669         struct btrfs_file_extent_item *fi;
1670         int i;
1671         int level;
1672         int ret = 0;
1673         int (*process_func)(struct btrfs_trans_handle *trans,
1674                             struct btrfs_root *root,
1675                             u64, u64, u64, u64, u64, u64);
1676
1677         ref_root = btrfs_header_owner(buf);
1678         nritems = btrfs_header_nritems(buf);
1679         level = btrfs_header_level(buf);
1680
1681         if (!root->ref_cows && level == 0)
1682                 return 0;
1683
1684         if (inc)
1685                 process_func = btrfs_inc_extent_ref;
1686         else
1687                 process_func = btrfs_free_extent;
1688
1689         if (record_parent)
1690                 parent = buf->start;
1691         else
1692                 parent = 0;
1693
1694         for (i = 0; i < nritems; i++) {
1695                 cond_resched();
1696                 if (level == 0) {
1697                         btrfs_item_key_to_cpu(buf, &key, i);
1698                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1699                                 continue;
1700                         fi = btrfs_item_ptr(buf, i,
1701                                             struct btrfs_file_extent_item);
1702                         if (btrfs_file_extent_type(buf, fi) ==
1703                             BTRFS_FILE_EXTENT_INLINE)
1704                                 continue;
1705                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1706                         if (bytenr == 0)
1707                                 continue;
1708                         
1709                         num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
1710                         key.offset -= btrfs_file_extent_offset(buf, fi);
1711                         ret = process_func(trans, root, bytenr, num_bytes,
1712                                            parent, ref_root, key.objectid,
1713                                            key.offset);
1714                         if (ret) {
1715                                 WARN_ON(1);
1716                                 goto fail;
1717                         }
1718                 } else {
1719                         bytenr = btrfs_node_blockptr(buf, i);
1720                         num_bytes = btrfs_level_size(root, level - 1);
1721                         ret = process_func(trans, root, bytenr, num_bytes,
1722                                            parent, ref_root, level - 1, 0);
1723                         if (ret) {
1724                                 WARN_ON(1);
1725                                 goto fail;
1726                         }
1727                 }
1728         }
1729         return 0;
1730 fail:
1731         WARN_ON(1);
1732         return ret;
1733 }
1734
1735 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1736                   struct extent_buffer *buf, int record_parent)
1737 {
1738         return __btrfs_mod_ref(trans, root, buf, record_parent, 1);
1739 }
1740
1741 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1742                   struct extent_buffer *buf, int record_parent)
1743 {
1744         return __btrfs_mod_ref(trans, root, buf, record_parent, 0);
1745 }
1746
1747 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1748                                  struct btrfs_root *root,
1749                                  struct btrfs_path *path,
1750                                  struct btrfs_block_group_cache *cache)
1751 {
1752         int ret;
1753         int pending_ret;
1754         struct btrfs_root *extent_root = root->fs_info->extent_root;
1755         unsigned long bi;
1756         struct extent_buffer *leaf;
1757
1758         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1759         if (ret < 0)
1760                 goto fail;
1761         BUG_ON(ret);
1762
1763         leaf = path->nodes[0];
1764         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1765         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1766         btrfs_mark_buffer_dirty(leaf);
1767         btrfs_release_path(extent_root, path);
1768 fail:
1769         finish_current_insert(trans, extent_root);
1770         pending_ret = del_pending_extents(trans, extent_root);
1771         if (ret)
1772                 return ret;
1773         if (pending_ret)
1774                 return pending_ret;
1775         return 0;
1776
1777 }
1778
1779 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1780                                    struct btrfs_root *root)
1781 {
1782         struct extent_io_tree *block_group_cache;
1783         struct btrfs_block_group_cache *cache;
1784         int ret;
1785         struct btrfs_path *path;
1786         u64 last = 0;
1787         u64 start;
1788         u64 end;
1789         u64 ptr;
1790
1791         block_group_cache = &root->fs_info->block_group_cache;
1792         path = btrfs_alloc_path();
1793         if (!path)
1794                 return -ENOMEM;
1795
1796         while(1) {
1797                 ret = find_first_extent_bit(block_group_cache, last,
1798                                             &start, &end, BLOCK_GROUP_DIRTY);
1799                 if (ret) {
1800                         if (last == 0)
1801                                 break;
1802                         last = 0;
1803                         continue;
1804                 }
1805
1806                 last = end + 1;
1807                 ret = get_state_private(block_group_cache, start, &ptr);
1808                 BUG_ON(ret);
1809
1810                 clear_extent_bits(block_group_cache, start, end,
1811                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1812
1813                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1814                 ret = write_one_cache_group(trans, root, path, cache);
1815         }
1816         btrfs_free_path(path);
1817         return 0;
1818 }
1819
1820 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1821                                                   u64 flags)
1822 {
1823         struct list_head *head = &info->space_info;
1824         struct list_head *cur;
1825         struct btrfs_space_info *found;
1826         list_for_each(cur, head) {
1827                 found = list_entry(cur, struct btrfs_space_info, list);
1828                 if (found->flags & flags)
1829                         return found;
1830         }
1831         return NULL;
1832
1833 }
1834
1835 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1836                              u64 total_bytes, u64 bytes_used,
1837                              struct btrfs_space_info **space_info)
1838 {
1839         struct btrfs_space_info *found;
1840
1841         found = __find_space_info(info, flags);
1842         if (found) {
1843                 found->total_bytes += total_bytes;
1844                 found->bytes_used += bytes_used;
1845                 if (found->total_bytes < found->bytes_used) {
1846                         fprintf(stderr, "warning, bad space info total_bytes "
1847                                 "%llu used %llu\n",
1848                                (unsigned long long)found->total_bytes,
1849                                (unsigned long long)found->bytes_used);
1850                 }
1851                 *space_info = found;
1852                 return 0;
1853         }
1854         found = kmalloc(sizeof(*found), GFP_NOFS);
1855         if (!found)
1856                 return -ENOMEM;
1857
1858         list_add(&found->list, &info->space_info);
1859         found->flags = flags;
1860         found->total_bytes = total_bytes;
1861         found->bytes_used = bytes_used;
1862         found->bytes_pinned = 0;
1863         found->full = 0;
1864         *space_info = found;
1865         return 0;
1866 }
1867
1868
1869 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1870 {
1871         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1872                                    BTRFS_BLOCK_GROUP_RAID1 |
1873                                    BTRFS_BLOCK_GROUP_RAID10 |
1874                                    BTRFS_BLOCK_GROUP_RAID5 |
1875                                    BTRFS_BLOCK_GROUP_RAID6 |
1876                                    BTRFS_BLOCK_GROUP_DUP);
1877         if (extra_flags) {
1878                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1879                         fs_info->avail_data_alloc_bits |= extra_flags;
1880                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1881                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1882                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1883                         fs_info->avail_system_alloc_bits |= extra_flags;
1884         }
1885 }
1886
1887 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1888                           struct btrfs_root *extent_root, u64 alloc_bytes,
1889                           u64 flags)
1890 {
1891         struct btrfs_space_info *space_info;
1892         u64 thresh;
1893         u64 start;
1894         u64 num_bytes;
1895         int ret;
1896
1897         space_info = __find_space_info(extent_root->fs_info, flags);
1898         if (!space_info) {
1899                 ret = update_space_info(extent_root->fs_info, flags,
1900                                         0, 0, &space_info);
1901                 BUG_ON(ret);
1902         }
1903         BUG_ON(!space_info);
1904
1905         if (space_info->full)
1906                 return 0;
1907
1908         thresh = div_factor(space_info->total_bytes, 7);
1909         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1910             thresh)
1911                 return 0;
1912
1913         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes,
1914                                 space_info->flags);
1915         if (ret == -ENOSPC) {
1916                 space_info->full = 1;
1917                 return 0;
1918         }
1919
1920         BUG_ON(ret);
1921
1922         ret = btrfs_make_block_group(trans, extent_root, 0, space_info->flags,
1923                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1924         BUG_ON(ret);
1925         return 0;
1926 }
1927
1928 static int update_block_group(struct btrfs_trans_handle *trans,
1929                               struct btrfs_root *root,
1930                               u64 bytenr, u64 num_bytes, int alloc,
1931                               int mark_free)
1932 {
1933         struct btrfs_block_group_cache *cache;
1934         struct btrfs_fs_info *info = root->fs_info;
1935         u64 total = num_bytes;
1936         u64 old_val;
1937         u64 byte_in_group;
1938         u64 start;
1939         u64 end;
1940
1941         /* block accounting for super block */
1942         old_val = btrfs_super_bytes_used(info->super_copy);
1943         if (alloc)
1944                 old_val += num_bytes;
1945         else
1946                 old_val -= num_bytes;
1947         btrfs_set_super_bytes_used(info->super_copy, old_val);
1948
1949         /* block accounting for root item */
1950         old_val = btrfs_root_used(&root->root_item);
1951         if (alloc)
1952                 old_val += num_bytes;
1953         else
1954                 old_val -= num_bytes;
1955         btrfs_set_root_used(&root->root_item, old_val);
1956
1957         while(total) {
1958                 cache = btrfs_lookup_block_group(info, bytenr);
1959                 if (!cache) {
1960                         return -1;
1961                 }
1962                 byte_in_group = bytenr - cache->key.objectid;
1963                 WARN_ON(byte_in_group > cache->key.offset);
1964                 start = cache->key.objectid;
1965                 end = start + cache->key.offset - 1;
1966                 set_extent_bits(&info->block_group_cache, start, end,
1967                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1968
1969                 old_val = btrfs_block_group_used(&cache->item);
1970                 num_bytes = min(total, cache->key.offset - byte_in_group);
1971
1972                 if (alloc) {
1973                         old_val += num_bytes;
1974                         cache->space_info->bytes_used += num_bytes;
1975                 } else {
1976                         old_val -= num_bytes;
1977                         cache->space_info->bytes_used -= num_bytes;
1978                         if (mark_free) {
1979                                 set_extent_dirty(&info->free_space_cache,
1980                                                  bytenr, bytenr + num_bytes - 1,
1981                                                  GFP_NOFS);
1982                         }
1983                 }
1984                 btrfs_set_block_group_used(&cache->item, old_val);
1985                 total -= num_bytes;
1986                 bytenr += num_bytes;
1987         }
1988         return 0;
1989 }
1990
1991 static int update_pinned_extents(struct btrfs_root *root,
1992                                 u64 bytenr, u64 num, int pin)
1993 {
1994         u64 len;
1995         struct btrfs_block_group_cache *cache;
1996         struct btrfs_fs_info *fs_info = root->fs_info;
1997
1998         if (pin) {
1999                 set_extent_dirty(&fs_info->pinned_extents,
2000                                 bytenr, bytenr + num - 1, GFP_NOFS);
2001         } else {
2002                 clear_extent_dirty(&fs_info->pinned_extents,
2003                                 bytenr, bytenr + num - 1, GFP_NOFS);
2004         }
2005         while (num > 0) {
2006                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2007                 if (!cache) {
2008                         len = min((u64)root->sectorsize, num);
2009                         goto next;
2010                 }
2011                 WARN_ON(!cache);
2012                 len = min(num, cache->key.offset -
2013                           (bytenr - cache->key.objectid));
2014                 if (pin) {
2015                         cache->pinned += len;
2016                         cache->space_info->bytes_pinned += len;
2017                         fs_info->total_pinned += len;
2018                 } else {
2019                         cache->pinned -= len;
2020                         cache->space_info->bytes_pinned -= len;
2021                         fs_info->total_pinned -= len;
2022                 }
2023 next:
2024                 bytenr += len;
2025                 num -= len;
2026         }
2027         return 0;
2028 }
2029
2030 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
2031 {
2032         u64 last = 0;
2033         u64 start;
2034         u64 end;
2035         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
2036         int ret;
2037
2038         while(1) {
2039                 ret = find_first_extent_bit(pinned_extents, last,
2040                                             &start, &end, EXTENT_DIRTY);
2041                 if (ret)
2042                         break;
2043                 set_extent_dirty(copy, start, end, GFP_NOFS);
2044                 last = end + 1;
2045         }
2046         return 0;
2047 }
2048
2049 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2050                                struct btrfs_root *root,
2051                                struct extent_io_tree *unpin)
2052 {
2053         u64 start;
2054         u64 end;
2055         int ret;
2056         struct extent_io_tree *free_space_cache;
2057         free_space_cache = &root->fs_info->free_space_cache;
2058
2059         while(1) {
2060                 ret = find_first_extent_bit(unpin, 0, &start, &end,
2061                                             EXTENT_DIRTY);
2062                 if (ret)
2063                         break;
2064                 update_pinned_extents(root, start, end + 1 - start, 0);
2065                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2066                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
2067         }
2068         return 0;
2069 }
2070
2071 static int extent_root_pending_ops(struct btrfs_fs_info *info)
2072 {
2073         u64 start;
2074         u64 end;
2075         int ret;
2076
2077         ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2078                                     &end, EXTENT_LOCKED);
2079         if (!ret) {
2080                 ret = find_first_extent_bit(&info->pending_del, 0, &start, &end,
2081                                             EXTENT_LOCKED);
2082         }
2083         return ret == 0;
2084
2085 }
2086 static int finish_current_insert(struct btrfs_trans_handle *trans,
2087                                  struct btrfs_root *extent_root)
2088 {
2089         u64 start;
2090         u64 end;
2091         u64 priv;
2092         struct btrfs_fs_info *info = extent_root->fs_info;
2093         struct btrfs_path *path;
2094         struct pending_extent_op *extent_op;
2095         struct btrfs_key key;
2096         int ret;
2097         int skinny_metadata =
2098                 btrfs_fs_incompat(extent_root->fs_info,
2099                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
2100
2101         path = btrfs_alloc_path();
2102
2103         while(1) {
2104                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2105                                             &end, EXTENT_LOCKED);
2106                 if (ret)
2107                         break;
2108
2109                 ret = get_state_private(&info->extent_ins, start, &priv);
2110                 BUG_ON(ret);
2111                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2112
2113                 if (extent_op->type == PENDING_EXTENT_INSERT) {
2114                         key.objectid = start;
2115                         if (skinny_metadata) {
2116                                 key.offset = extent_op->level;
2117                                 key.type = BTRFS_METADATA_ITEM_KEY;
2118                         } else {
2119                                 key.offset = extent_op->num_bytes;
2120                                 key.type = BTRFS_EXTENT_ITEM_KEY;
2121                         }
2122                         ret = alloc_reserved_tree_block(trans, extent_root,
2123                                                 extent_root->root_key.objectid,
2124                                                 trans->transid,
2125                                                 extent_op->flags,
2126                                                 &extent_op->key,
2127                                                 extent_op->level, &key);
2128                 } else {
2129                         BUG_ON(1);
2130                 }
2131
2132                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
2133                                   GFP_NOFS);
2134                 kfree(extent_op);
2135         }
2136         btrfs_free_path(path);
2137         return 0;
2138 }
2139
2140 static int pin_down_bytes(struct btrfs_trans_handle *trans,
2141                           struct btrfs_root *root,
2142                           u64 bytenr, u64 num_bytes, int is_data)
2143 {
2144         int err = 0;
2145         struct extent_buffer *buf;
2146
2147         if (is_data)
2148                 goto pinit;
2149
2150         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
2151         if (!buf)
2152                 goto pinit;
2153
2154         /* we can reuse a block if it hasn't been written
2155          * and it is from this transaction.  We can't
2156          * reuse anything from the tree log root because
2157          * it has tiny sub-transactions.
2158          */
2159         if (btrfs_buffer_uptodate(buf, 0)) {
2160                 u64 header_owner = btrfs_header_owner(buf);
2161                 u64 header_transid = btrfs_header_generation(buf);
2162                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2163                     header_transid == trans->transid &&
2164                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2165                         clean_tree_block(NULL, root, buf);
2166                         free_extent_buffer(buf);
2167                         return 1;
2168                 }
2169         }
2170         free_extent_buffer(buf);
2171 pinit:
2172         update_pinned_extents(root, bytenr, num_bytes, 1);
2173
2174         BUG_ON(err < 0);
2175         return 0;
2176 }
2177
2178 void btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2179                        u64 bytenr, u64 num_bytes)
2180 {
2181         update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 1);
2182 }
2183
2184 /*
2185  * remove an extent from the root, returns 0 on success
2186  */
2187 static int __free_extent(struct btrfs_trans_handle *trans,
2188                          struct btrfs_root *root,
2189                          u64 bytenr, u64 num_bytes, u64 parent,
2190                          u64 root_objectid, u64 owner_objectid,
2191                          u64 owner_offset, int refs_to_drop)
2192 {
2193
2194         struct btrfs_key key;
2195         struct btrfs_path *path;
2196         struct btrfs_extent_ops *ops = root->fs_info->extent_ops;
2197         struct btrfs_root *extent_root = root->fs_info->extent_root;
2198         struct extent_buffer *leaf;
2199         struct btrfs_extent_item *ei;
2200         struct btrfs_extent_inline_ref *iref;
2201         int ret;
2202         int is_data;
2203         int extent_slot = 0;
2204         int found_extent = 0;
2205         int num_to_del = 1;
2206         u32 item_size;
2207         u64 refs;
2208         int skinny_metadata =
2209                 btrfs_fs_incompat(extent_root->fs_info,
2210                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
2211
2212         if (root->fs_info->free_extent_hook) {
2213                 root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes,
2214                                                 parent, root_objectid, owner_objectid,
2215                                                 owner_offset, refs_to_drop);
2216
2217         }
2218         path = btrfs_alloc_path();
2219         if (!path)
2220                 return -ENOMEM;
2221
2222         path->reada = 1;
2223         path->leave_spinning = 1;
2224
2225         is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2226         if (is_data)
2227                 skinny_metadata = 0;
2228         BUG_ON(!is_data && refs_to_drop != 1);
2229
2230         ret = lookup_extent_backref(trans, extent_root, path, &iref,
2231                                     bytenr, num_bytes, parent,
2232                                     root_objectid, owner_objectid,
2233                                     owner_offset);
2234         if (ret == 0) {
2235                 extent_slot = path->slots[0];
2236                 while (extent_slot >= 0) {
2237                         btrfs_item_key_to_cpu(path->nodes[0], &key,
2238                                               extent_slot);
2239                         if (key.objectid != bytenr)
2240                                 break;
2241                         if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2242                             key.offset == num_bytes) {
2243                                 found_extent = 1;
2244                                 break;
2245                         }
2246                         if (key.type == BTRFS_METADATA_ITEM_KEY &&
2247                             key.offset == owner_objectid) {
2248                                 found_extent = 1;
2249                                 break;
2250                         }
2251                         if (path->slots[0] - extent_slot > 5)
2252                                 break;
2253                         extent_slot--;
2254                 }
2255 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2256                 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
2257                 if (found_extent && item_size < sizeof(*ei))
2258                         found_extent = 0;
2259 #endif
2260                 if (!found_extent) {
2261                         BUG_ON(iref);
2262                         ret = remove_extent_backref(trans, extent_root, path,
2263                                                     NULL, refs_to_drop,
2264                                                     is_data);
2265                         BUG_ON(ret);
2266                         btrfs_release_path(extent_root, path);
2267                         path->leave_spinning = 1;
2268
2269                         key.objectid = bytenr;
2270
2271                         if (skinny_metadata) {
2272                                 key.type = BTRFS_METADATA_ITEM_KEY;
2273                                 key.offset = owner_objectid;
2274                         } else {
2275                                 key.type = BTRFS_EXTENT_ITEM_KEY;
2276                                 key.offset = num_bytes;
2277                         }
2278
2279                         ret = btrfs_search_slot(trans, extent_root,
2280                                                 &key, path, -1, 1);
2281                         if (ret > 0 && skinny_metadata && path->slots[0]) {
2282                                 path->slots[0]--;
2283                                 btrfs_item_key_to_cpu(path->nodes[0],
2284                                                       &key,
2285                                                       path->slots[0]);
2286                                 if (key.objectid == bytenr &&
2287                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
2288                                     key.offset == num_bytes)
2289                                         ret = 0;
2290                         }
2291
2292                         if (ret > 0 && skinny_metadata) {
2293                                 skinny_metadata = 0;
2294                                 btrfs_release_path(extent_root, path);
2295                                 key.type = BTRFS_EXTENT_ITEM_KEY;
2296                                 key.offset = num_bytes;
2297                                 ret = btrfs_search_slot(trans, extent_root,
2298                                                         &key, path, -1, 1);
2299                         }
2300
2301                         if (ret) {
2302                                 printk(KERN_ERR "umm, got %d back from search"
2303                                        ", was looking for %llu\n", ret,
2304                                        (unsigned long long)bytenr);
2305                                 btrfs_print_leaf(extent_root, path->nodes[0]);
2306                         }
2307                         BUG_ON(ret);
2308                         extent_slot = path->slots[0];
2309                 }
2310         } else {
2311                 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2312                        "parent %llu root %llu  owner %llu offset %llu\n",
2313                        (unsigned long long)bytenr,
2314                        (unsigned long long)parent,
2315                        (unsigned long long)root_objectid,
2316                        (unsigned long long)owner_objectid,
2317                        (unsigned long long)owner_offset);
2318                 ret = -EIO;
2319                 goto fail;
2320         }
2321
2322         leaf = path->nodes[0];
2323         item_size = btrfs_item_size_nr(leaf, extent_slot);
2324 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2325         if (item_size < sizeof(*ei)) {
2326                 BUG_ON(found_extent || extent_slot != path->slots[0]);
2327                 ret = convert_extent_item_v0(trans, extent_root, path,
2328                                              owner_objectid, 0);
2329                 BUG_ON(ret < 0);
2330
2331                 btrfs_release_path(extent_root, path);
2332                 path->leave_spinning = 1;
2333
2334                 key.objectid = bytenr;
2335                 key.type = BTRFS_EXTENT_ITEM_KEY;
2336                 key.offset = num_bytes;
2337
2338                 ret = btrfs_search_slot(trans, extent_root, &key, path,
2339                                         -1, 1);
2340                 if (ret) {
2341                         printk(KERN_ERR "umm, got %d back from search"
2342                                ", was looking for %llu\n", ret,
2343                                (unsigned long long)bytenr);
2344                         btrfs_print_leaf(extent_root, path->nodes[0]);
2345                 }
2346                 BUG_ON(ret);
2347                 extent_slot = path->slots[0];
2348                 leaf = path->nodes[0];
2349                 item_size = btrfs_item_size_nr(leaf, extent_slot);
2350         }
2351 #endif
2352         BUG_ON(item_size < sizeof(*ei));
2353         ei = btrfs_item_ptr(leaf, extent_slot,
2354                             struct btrfs_extent_item);
2355         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
2356             key.type == BTRFS_EXTENT_ITEM_KEY) {
2357                 struct btrfs_tree_block_info *bi;
2358                 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
2359                 bi = (struct btrfs_tree_block_info *)(ei + 1);
2360                 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
2361         }
2362
2363         refs = btrfs_extent_refs(leaf, ei);
2364         BUG_ON(refs < refs_to_drop);
2365         refs -= refs_to_drop;
2366
2367         if (refs > 0) {
2368                 /*
2369                  * In the case of inline back ref, reference count will
2370                  * be updated by remove_extent_backref
2371                  */
2372                 if (iref) {
2373                         BUG_ON(!found_extent);
2374                 } else {
2375                         btrfs_set_extent_refs(leaf, ei, refs);
2376                         btrfs_mark_buffer_dirty(leaf);
2377                 }
2378                 if (found_extent) {
2379                         ret = remove_extent_backref(trans, extent_root, path,
2380                                                     iref, refs_to_drop,
2381                                                     is_data);
2382                         BUG_ON(ret);
2383                 }
2384         } else {
2385                 int mark_free = 0;
2386                 int pin = 1;
2387
2388                 if (found_extent) {
2389                         BUG_ON(is_data && refs_to_drop !=
2390                                extent_data_ref_count(root, path, iref));
2391                         if (iref) {
2392                                 BUG_ON(path->slots[0] != extent_slot);
2393                         } else {
2394                                 BUG_ON(path->slots[0] != extent_slot + 1);
2395                                 path->slots[0] = extent_slot;
2396                                 num_to_del = 2;
2397                         }
2398                 }
2399
2400                 if (ops && ops->free_extent) {
2401                         ret = ops->free_extent(root, bytenr, num_bytes);
2402                         if (ret > 0) {
2403                                 pin = 0;
2404                                 mark_free = 0;
2405                         }
2406                 }
2407
2408                 if (pin) {
2409                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
2410                                              is_data);
2411                         if (ret > 0)
2412                                 mark_free = 1;
2413                         BUG_ON(ret < 0);
2414                 }
2415
2416                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2417                                       num_to_del);
2418                 BUG_ON(ret);
2419                 btrfs_release_path(extent_root, path);
2420
2421                 if (is_data) {
2422                         ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2423                         BUG_ON(ret);
2424                 }
2425
2426                 update_block_group(trans, root, bytenr, num_bytes, 0, mark_free);
2427         }
2428 fail:
2429         btrfs_free_path(path);
2430         finish_current_insert(trans, extent_root);
2431         return ret;
2432 }
2433
2434 /*
2435  * find all the blocks marked as pending in the radix tree and remove
2436  * them from the extent map
2437  */
2438 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
2439                                btrfs_root *extent_root)
2440 {
2441         int ret;
2442         int err = 0;
2443         u64 start;
2444         u64 end;
2445         u64 priv;
2446         struct extent_io_tree *pending_del;
2447         struct extent_io_tree *extent_ins;
2448         struct pending_extent_op *extent_op;
2449
2450         extent_ins = &extent_root->fs_info->extent_ins;
2451         pending_del = &extent_root->fs_info->pending_del;
2452
2453         while(1) {
2454                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2455                                             EXTENT_LOCKED);
2456                 if (ret)
2457                         break;
2458
2459                 ret = get_state_private(pending_del, start, &priv);
2460                 BUG_ON(ret);
2461                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2462
2463                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
2464                                   GFP_NOFS);
2465
2466                 if (!test_range_bit(extent_ins, start, end,
2467                                     EXTENT_LOCKED, 0)) {
2468                         ret = __free_extent(trans, extent_root,
2469                                             start, end + 1 - start, 0,
2470                                             extent_root->root_key.objectid,
2471                                             extent_op->level, 0, 1);
2472                         kfree(extent_op);
2473                 } else {
2474                         kfree(extent_op);
2475                         ret = get_state_private(extent_ins, start, &priv);
2476                         BUG_ON(ret);
2477                         extent_op = (struct pending_extent_op *)
2478                                                         (unsigned long)priv;
2479
2480                         clear_extent_bits(extent_ins, start, end,
2481                                           EXTENT_LOCKED, GFP_NOFS);
2482
2483                         if (extent_op->type == PENDING_BACKREF_UPDATE)
2484                                 BUG_ON(1);
2485
2486                         kfree(extent_op);
2487                 }
2488                 if (ret)
2489                         err = ret;
2490         }
2491         return err;
2492 }
2493
2494 /*
2495  * remove an extent from the root, returns 0 on success
2496  */
2497
2498 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2499                       struct btrfs_root *root,
2500                       u64 bytenr, u64 num_bytes, u64 parent,
2501                       u64 root_objectid, u64 owner, u64 offset)
2502 {
2503         struct btrfs_root *extent_root = root->fs_info->extent_root;
2504         int pending_ret;
2505         int ret;
2506
2507         WARN_ON(num_bytes < root->sectorsize);
2508         if (root == extent_root) {
2509                 struct pending_extent_op *extent_op;
2510
2511                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2512                 BUG_ON(!extent_op);
2513
2514                 extent_op->type = PENDING_EXTENT_DELETE;
2515                 extent_op->bytenr = bytenr;
2516                 extent_op->num_bytes = num_bytes;
2517                 extent_op->level = (int)owner;
2518
2519                 set_extent_bits(&root->fs_info->pending_del,
2520                                 bytenr, bytenr + num_bytes - 1,
2521                                 EXTENT_LOCKED, GFP_NOFS);
2522                 set_state_private(&root->fs_info->pending_del,
2523                                   bytenr, (unsigned long)extent_op);
2524                 return 0;
2525         }
2526         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2527                             root_objectid, owner, offset, 1);
2528         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
2529         return ret ? ret : pending_ret;
2530 }
2531
2532 static u64 stripe_align(struct btrfs_root *root, u64 val)
2533 {
2534         u64 mask = ((u64)root->stripesize - 1);
2535         u64 ret = (val + mask) & ~mask;
2536         return ret;
2537 }
2538
2539 /*
2540  * walks the btree of allocated extents and find a hole of a given size.
2541  * The key ins is changed to record the hole:
2542  * ins->objectid == block start
2543  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2544  * ins->offset == number of blocks
2545  * Any available blocks before search_start are skipped.
2546  */
2547 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2548                                      struct btrfs_root *orig_root,
2549                                      u64 num_bytes, u64 empty_size,
2550                                      u64 search_start, u64 search_end,
2551                                      u64 hint_byte, struct btrfs_key *ins,
2552                                      u64 exclude_start, u64 exclude_nr,
2553                                      int data)
2554 {
2555         int ret;
2556         u64 orig_search_start = search_start;
2557         struct btrfs_root * root = orig_root->fs_info->extent_root;
2558         struct btrfs_fs_info *info = root->fs_info;
2559         u64 total_needed = num_bytes;
2560         struct btrfs_block_group_cache *block_group;
2561         int full_scan = 0;
2562         int wrapped = 0;
2563
2564         WARN_ON(num_bytes < root->sectorsize);
2565         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2566
2567         search_start = stripe_align(root, search_start);
2568
2569         if (hint_byte) {
2570                 block_group = btrfs_lookup_first_block_group(info, hint_byte);
2571                 if (!block_group)
2572                         hint_byte = search_start;
2573                 block_group = btrfs_find_block_group(root, block_group,
2574                                                      hint_byte, data, 1);
2575         } else {
2576                 block_group = btrfs_find_block_group(root,
2577                                                      trans->block_group,
2578                                                      search_start, data, 1);
2579         }
2580
2581         total_needed += empty_size;
2582
2583 check_failed:
2584         search_start = stripe_align(root, search_start);
2585         if (!block_group) {
2586                 block_group = btrfs_lookup_first_block_group(info,
2587                                                              search_start);
2588                 if (!block_group)
2589                         block_group = btrfs_lookup_first_block_group(info,
2590                                                        orig_search_start);
2591         }
2592         ret = find_search_start(root, &block_group, &search_start,
2593                                 total_needed, data);
2594         if (ret)
2595                 goto error;
2596
2597         ins->objectid = search_start;
2598         ins->offset = num_bytes;
2599
2600         if (ins->objectid + num_bytes >
2601             block_group->key.objectid + block_group->key.offset) {
2602                 search_start = block_group->key.objectid +
2603                         block_group->key.offset;
2604                 goto new_group;
2605         }
2606
2607         if (test_range_bit(&info->extent_ins, ins->objectid,
2608                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
2609                 search_start = ins->objectid + num_bytes;
2610                 goto new_group;
2611         }
2612
2613         if (test_range_bit(&info->pinned_extents, ins->objectid,
2614                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2615                 search_start = ins->objectid + num_bytes;
2616                 goto new_group;
2617         }
2618
2619         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2620             ins->objectid < exclude_start + exclude_nr)) {
2621                 search_start = exclude_start + exclude_nr;
2622                 goto new_group;
2623         }
2624
2625         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
2626                 block_group = btrfs_lookup_block_group(info, ins->objectid);
2627                 if (block_group)
2628                         trans->block_group = block_group;
2629         }
2630         ins->offset = num_bytes;
2631         return 0;
2632
2633 new_group:
2634         block_group = btrfs_lookup_first_block_group(info, search_start);
2635         if (!block_group) {
2636                 search_start = orig_search_start;
2637                 if (full_scan) {
2638                         ret = -ENOSPC;
2639                         goto error;
2640                 }
2641                 if (wrapped) {
2642                         if (!full_scan)
2643                                 total_needed -= empty_size;
2644                         full_scan = 1;
2645                 } else
2646                         wrapped = 1;
2647         }
2648         cond_resched();
2649         block_group = btrfs_find_block_group(root, block_group,
2650                                              search_start, data, 0);
2651         goto check_failed;
2652
2653 error:
2654         return ret;
2655 }
2656
2657 static int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2658                                 struct btrfs_root *root,
2659                                 u64 num_bytes, u64 empty_size,
2660                                 u64 hint_byte, u64 search_end,
2661                                 struct btrfs_key *ins, int data)
2662 {
2663         int ret;
2664         u64 search_start = 0;
2665         u64 alloc_profile;
2666         struct btrfs_fs_info *info = root->fs_info;
2667
2668         if (info->extent_ops) {
2669                 struct btrfs_extent_ops *ops = info->extent_ops;
2670                 ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
2671                 BUG_ON(ret);
2672                 goto found;
2673         }
2674
2675         if (data) {
2676                 alloc_profile = info->avail_data_alloc_bits &
2677                                 info->data_alloc_profile;
2678                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2679         } else if ((info->system_allocs > 0 || root == info->chunk_root) &&
2680                    info->system_allocs >= 0) {
2681                 alloc_profile = info->avail_system_alloc_bits &
2682                                 info->system_alloc_profile;
2683                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2684         } else {
2685                 alloc_profile = info->avail_metadata_alloc_bits &
2686                                 info->metadata_alloc_profile;
2687                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2688         }
2689
2690         if (root->ref_cows) {
2691                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2692                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2693                                              num_bytes,
2694                                              BTRFS_BLOCK_GROUP_METADATA);
2695                         BUG_ON(ret);
2696                 }
2697                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2698                                      num_bytes + 2 * 1024 * 1024, data);
2699                 BUG_ON(ret);
2700         }
2701
2702         WARN_ON(num_bytes < root->sectorsize);
2703         ret = find_free_extent(trans, root, num_bytes, empty_size,
2704                                search_start, search_end, hint_byte, ins,
2705                                trans->alloc_exclude_start,
2706                                trans->alloc_exclude_nr, data);
2707         BUG_ON(ret);
2708 found:
2709         clear_extent_dirty(&root->fs_info->free_space_cache,
2710                            ins->objectid, ins->objectid + ins->offset - 1,
2711                            GFP_NOFS);
2712         return ret;
2713 }
2714
2715 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
2716                                      struct btrfs_root *root,
2717                                      u64 root_objectid, u64 generation,
2718                                      u64 flags, struct btrfs_disk_key *key,
2719                                      int level, struct btrfs_key *ins)
2720 {
2721         int ret;
2722         struct btrfs_fs_info *fs_info = root->fs_info;
2723         struct btrfs_extent_item *extent_item;
2724         struct btrfs_tree_block_info *block_info;
2725         struct btrfs_extent_inline_ref *iref;
2726         struct btrfs_path *path;
2727         struct extent_buffer *leaf;
2728         u32 size = sizeof(*extent_item) + sizeof(*iref);
2729         int skinny_metadata =
2730                 btrfs_fs_incompat(fs_info,
2731                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
2732
2733         if (!skinny_metadata)
2734                 size += sizeof(*block_info);
2735
2736         path = btrfs_alloc_path();
2737         BUG_ON(!path);
2738
2739         path->leave_spinning = 1;
2740         ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
2741                                       ins, size);
2742         BUG_ON(ret);
2743
2744         leaf = path->nodes[0];
2745         extent_item = btrfs_item_ptr(leaf, path->slots[0],
2746                                      struct btrfs_extent_item);
2747         btrfs_set_extent_refs(leaf, extent_item, 1);
2748         btrfs_set_extent_generation(leaf, extent_item, generation);
2749         btrfs_set_extent_flags(leaf, extent_item,
2750                                flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
2751
2752         if (skinny_metadata) {
2753                 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
2754         } else {
2755                 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
2756                 btrfs_set_tree_block_key(leaf, block_info, key);
2757                 btrfs_set_tree_block_level(leaf, block_info, level);
2758                 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
2759         }
2760
2761         btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
2762         btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
2763
2764         btrfs_mark_buffer_dirty(leaf);
2765         btrfs_free_path(path);
2766
2767         ret = update_block_group(trans, root, ins->objectid, root->leafsize,
2768                                  1, 0);
2769         return 0;
2770 }
2771
2772 static int alloc_tree_block(struct btrfs_trans_handle *trans,
2773                             struct btrfs_root *root, u64 num_bytes,
2774                             u64 root_objectid, u64 generation,
2775                             u64 flags, struct btrfs_disk_key *key,
2776                             int level, u64 empty_size, u64 hint_byte,
2777                             u64 search_end, struct btrfs_key *ins)
2778 {
2779         int ret;
2780         ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size,
2781                                    hint_byte, search_end, ins, 0);
2782         BUG_ON(ret);
2783
2784         if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) {
2785                 struct pending_extent_op *extent_op;
2786
2787                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2788                 BUG_ON(!extent_op);
2789
2790                 extent_op->type = PENDING_EXTENT_INSERT;
2791                 extent_op->bytenr = ins->objectid;
2792                 extent_op->num_bytes = ins->offset;
2793                 extent_op->level = level;
2794                 extent_op->flags = flags;
2795                 memcpy(&extent_op->key, key, sizeof(*key));
2796
2797                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2798                                 ins->objectid + ins->offset - 1,
2799                                 EXTENT_LOCKED, GFP_NOFS);
2800                 set_state_private(&root->fs_info->extent_ins,
2801                                   ins->objectid, (unsigned long)extent_op);
2802         } else {
2803                 if (btrfs_fs_incompat(root->fs_info,
2804                                 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) {
2805                         ins->offset = level;
2806                         ins->type = BTRFS_METADATA_ITEM_KEY;
2807                 }
2808                 ret = alloc_reserved_tree_block(trans, root, root_objectid,
2809                                                 generation, flags,
2810                                                 key, level, ins);
2811                 finish_current_insert(trans, root->fs_info->extent_root);
2812                 del_pending_extents(trans, root->fs_info->extent_root);
2813         }
2814         return ret;
2815 }
2816
2817 /*
2818  * helper function to allocate a block for a given tree
2819  * returns the tree buffer or NULL.
2820  */
2821 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2822                                         struct btrfs_root *root,
2823                                         u32 blocksize, u64 root_objectid,
2824                                         struct btrfs_disk_key *key, int level,
2825                                         u64 hint, u64 empty_size)
2826 {
2827         struct btrfs_key ins;
2828         int ret;
2829         struct extent_buffer *buf;
2830
2831         ret = alloc_tree_block(trans, root, blocksize, root_objectid,
2832                                trans->transid, 0, key, level,
2833                                empty_size, hint, (u64)-1, &ins);
2834         if (ret) {
2835                 BUG_ON(ret > 0);
2836                 return ERR_PTR(ret);
2837         }
2838
2839         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2840         if (!buf) {
2841                 btrfs_free_extent(trans, root, ins.objectid, ins.offset,
2842                                   0, root->root_key.objectid, level, 0);
2843                 BUG_ON(1);
2844                 return ERR_PTR(-ENOMEM);
2845         }
2846         btrfs_set_buffer_uptodate(buf);
2847         trans->blocks_used++;
2848
2849         return buf;
2850 }
2851
2852 #if 0
2853
2854 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2855                                   struct btrfs_root *root,
2856                                   struct extent_buffer *leaf)
2857 {
2858         u64 leaf_owner;
2859         u64 leaf_generation;
2860         struct btrfs_key key;
2861         struct btrfs_file_extent_item *fi;
2862         int i;
2863         int nritems;
2864         int ret;
2865
2866         BUG_ON(!btrfs_is_leaf(leaf));
2867         nritems = btrfs_header_nritems(leaf);
2868         leaf_owner = btrfs_header_owner(leaf);
2869         leaf_generation = btrfs_header_generation(leaf);
2870
2871         for (i = 0; i < nritems; i++) {
2872                 u64 disk_bytenr;
2873
2874                 btrfs_item_key_to_cpu(leaf, &key, i);
2875                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2876                         continue;
2877                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2878                 if (btrfs_file_extent_type(leaf, fi) ==
2879                     BTRFS_FILE_EXTENT_INLINE)
2880                         continue;
2881                 /*
2882                  * FIXME make sure to insert a trans record that
2883                  * repeats the snapshot del on crash
2884                  */
2885                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2886                 if (disk_bytenr == 0)
2887                         continue;
2888                 ret = btrfs_free_extent(trans, root, disk_bytenr,
2889                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2890                                 leaf->start, leaf_owner, leaf_generation,
2891                                 key.objectid, 0);
2892                 BUG_ON(ret);
2893         }
2894         return 0;
2895 }
2896
2897 static void noinline reada_walk_down(struct btrfs_root *root,
2898                                      struct extent_buffer *node,
2899                                      int slot)
2900 {
2901         u64 bytenr;
2902         u64 last = 0;
2903         u32 nritems;
2904         u32 refs;
2905         u32 blocksize;
2906         int ret;
2907         int i;
2908         int level;
2909         int skipped = 0;
2910
2911         nritems = btrfs_header_nritems(node);
2912         level = btrfs_header_level(node);
2913         if (level)
2914                 return;
2915
2916         for (i = slot; i < nritems && skipped < 32; i++) {
2917                 bytenr = btrfs_node_blockptr(node, i);
2918                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2919                              (last > bytenr && last - bytenr > 32 * 1024))) {
2920                         skipped++;
2921                         continue;
2922                 }
2923                 blocksize = btrfs_level_size(root, level - 1);
2924                 if (i != slot) {
2925                         ret = btrfs_lookup_extent_ref(NULL, root, bytenr,
2926                                                       blocksize, &refs);
2927                         BUG_ON(ret);
2928                         if (refs != 1) {
2929                                 skipped++;
2930                                 continue;
2931                         }
2932                 }
2933                 mutex_unlock(&root->fs_info->fs_mutex);
2934                 ret = readahead_tree_block(root, bytenr, blocksize,
2935                                            btrfs_node_ptr_generation(node, i));
2936                 last = bytenr + blocksize;
2937                 cond_resched();
2938                 mutex_lock(&root->fs_info->fs_mutex);
2939                 if (ret)
2940                         break;
2941         }
2942 }
2943
2944 /*
2945  * helper function for drop_snapshot, this walks down the tree dropping ref
2946  * counts as it goes.
2947  */
2948 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2949                                    struct btrfs_root *root,
2950                                    struct btrfs_path *path, int *level)
2951 {
2952         u64 root_owner;
2953         u64 root_gen;
2954         u64 bytenr;
2955         u64 ptr_gen;
2956         struct extent_buffer *next;
2957         struct extent_buffer *cur;
2958         struct extent_buffer *parent;
2959         u32 blocksize;
2960         int ret;
2961         u32 refs;
2962
2963         WARN_ON(*level < 0);
2964         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2965         ret = btrfs_lookup_extent_ref(trans, root,
2966                                       path->nodes[*level]->start,
2967                                       path->nodes[*level]->len, &refs);
2968         BUG_ON(ret);
2969         if (refs > 1)
2970                 goto out;
2971
2972         /*
2973          * walk down to the last node level and free all the leaves
2974          */
2975         while(*level >= 0) {
2976                 WARN_ON(*level < 0);
2977                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2978                 cur = path->nodes[*level];
2979
2980                 if (btrfs_header_level(cur) != *level)
2981                         WARN_ON(1);
2982
2983                 if (path->slots[*level] >=
2984                     btrfs_header_nritems(cur))
2985                         break;
2986                 if (*level == 0) {
2987                         ret = drop_leaf_ref(trans, root, cur);
2988                         BUG_ON(ret);
2989                         break;
2990                 }
2991                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2992                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2993                 blocksize = btrfs_level_size(root, *level - 1);
2994                 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
2995                                               &refs);
2996                 BUG_ON(ret);
2997                 if (refs != 1) {
2998                         parent = path->nodes[*level];
2999                         root_owner = btrfs_header_owner(parent);
3000                         root_gen = btrfs_header_generation(parent);
3001                         path->slots[*level]++;
3002                         ret = btrfs_free_extent(trans, root, bytenr, blocksize,
3003                                                 parent->start, root_owner,
3004                                                 root_gen, *level - 1, 1);
3005                         BUG_ON(ret);
3006                         continue;
3007                 }
3008                 next = btrfs_find_tree_block(root, bytenr, blocksize);
3009                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
3010                         free_extent_buffer(next);
3011                         reada_walk_down(root, cur, path->slots[*level]);
3012                         mutex_unlock(&root->fs_info->fs_mutex);
3013                         next = read_tree_block(root, bytenr, blocksize,
3014                                                ptr_gen);
3015                         mutex_lock(&root->fs_info->fs_mutex);
3016                 }
3017                 WARN_ON(*level <= 0);
3018                 if (path->nodes[*level-1])
3019                         free_extent_buffer(path->nodes[*level-1]);
3020                 path->nodes[*level-1] = next;
3021                 *level = btrfs_header_level(next);
3022                 path->slots[*level] = 0;
3023         }
3024 out:
3025         WARN_ON(*level < 0);
3026         WARN_ON(*level >= BTRFS_MAX_LEVEL);
3027
3028         if (path->nodes[*level] == root->node) {
3029                 root_owner = root->root_key.objectid;
3030                 parent = path->nodes[*level];
3031         } else {
3032                 parent = path->nodes[*level + 1];
3033                 root_owner = btrfs_header_owner(parent);
3034         }
3035
3036         root_gen = btrfs_header_generation(parent);
3037         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
3038                                 path->nodes[*level]->len, parent->start,
3039                                 root_owner, root_gen, *level, 1);
3040         free_extent_buffer(path->nodes[*level]);
3041         path->nodes[*level] = NULL;
3042         *level += 1;
3043         BUG_ON(ret);
3044         return 0;
3045 }
3046
3047 /*
3048  * helper for dropping snapshots.  This walks back up the tree in the path
3049  * to find the first node higher up where we haven't yet gone through
3050  * all the slots
3051  */
3052 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3053                                  struct btrfs_root *root,
3054                                  struct btrfs_path *path, int *level)
3055 {
3056         u64 root_owner;
3057         u64 root_gen;
3058         struct btrfs_root_item *root_item = &root->root_item;
3059         int i;
3060         int slot;
3061         int ret;
3062
3063         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
3064                 slot = path->slots[i];
3065                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3066                         struct extent_buffer *node;
3067                         struct btrfs_disk_key disk_key;
3068                         node = path->nodes[i];
3069                         path->slots[i]++;
3070                         *level = i;
3071                         WARN_ON(*level == 0);
3072                         btrfs_node_key(node, &disk_key, path->slots[i]);
3073                         memcpy(&root_item->drop_progress,
3074                                &disk_key, sizeof(disk_key));
3075                         root_item->drop_level = i;
3076                         return 0;
3077                 } else {
3078                         struct extent_buffer *parent;
3079                         if (path->nodes[*level] == root->node)
3080                                 parent = path->nodes[*level];
3081                         else
3082                                 parent = path->nodes[*level + 1];
3083
3084                         root_owner = btrfs_header_owner(parent);
3085                         root_gen = btrfs_header_generation(parent);
3086                         ret = btrfs_free_extent(trans, root,
3087                                                 path->nodes[*level]->start,
3088                                                 path->nodes[*level]->len,
3089                                                 parent->start, root_owner,
3090                                                 root_gen, *level, 1);
3091                         BUG_ON(ret);
3092                         free_extent_buffer(path->nodes[*level]);
3093                         path->nodes[*level] = NULL;
3094                         *level = i + 1;
3095                 }
3096         }
3097         return 1;
3098 }
3099
3100 /*
3101  * drop the reference count on the tree rooted at 'snap'.  This traverses
3102  * the tree freeing any blocks that have a ref count of zero after being
3103  * decremented.
3104  */
3105 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3106                         *root)
3107 {
3108         int ret = 0;
3109         int wret;
3110         int level;
3111         struct btrfs_path *path;
3112         int i;
3113         int orig_level;
3114         struct btrfs_root_item *root_item = &root->root_item;
3115
3116         path = btrfs_alloc_path();
3117         BUG_ON(!path);
3118
3119         level = btrfs_header_level(root->node);
3120         orig_level = level;
3121         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3122                 path->nodes[level] = root->node;
3123                 extent_buffer_get(root->node);
3124                 path->slots[level] = 0;
3125         } else {
3126                 struct btrfs_key key;
3127                 struct btrfs_disk_key found_key;
3128                 struct extent_buffer *node;
3129
3130                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3131                 level = root_item->drop_level;
3132                 path->lowest_level = level;
3133                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3134                 if (wret < 0) {
3135                         ret = wret;
3136                         goto out;
3137                 }
3138                 node = path->nodes[level];
3139                 btrfs_node_key(node, &found_key, path->slots[level]);
3140                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3141                                sizeof(found_key)));
3142         }
3143         while(1) {
3144                 wret = walk_down_tree(trans, root, path, &level);
3145                 if (wret < 0)
3146                         ret = wret;
3147                 if (wret != 0)
3148                         break;
3149
3150                 wret = walk_up_tree(trans, root, path, &level);
3151                 if (wret < 0)
3152                         ret = wret;
3153                 if (wret != 0)
3154                         break;
3155                 /*
3156                 ret = -EAGAIN;
3157                 break;
3158                 */
3159         }
3160         for (i = 0; i <= orig_level; i++) {
3161                 if (path->nodes[i]) {
3162                         free_extent_buffer(path->nodes[i]);
3163                         path->nodes[i] = NULL;
3164                 }
3165         }
3166 out:
3167         btrfs_free_path(path);
3168         return ret;
3169 }
3170
3171 #endif
3172
3173 int btrfs_free_block_groups(struct btrfs_fs_info *info)
3174 {
3175         struct btrfs_space_info *sinfo;
3176         u64 start;
3177         u64 end;
3178         u64 ptr;
3179         int ret;
3180
3181         while(1) {
3182                 ret = find_first_extent_bit(&info->block_group_cache, 0,
3183                                             &start, &end, (unsigned int)-1);
3184                 if (ret)
3185                         break;
3186                 ret = get_state_private(&info->block_group_cache, start, &ptr);
3187                 if (!ret)
3188                         kfree((void *)(unsigned long)ptr);
3189                 clear_extent_bits(&info->block_group_cache, start,
3190                                   end, (unsigned int)-1, GFP_NOFS);
3191         }
3192         while(1) {
3193                 ret = find_first_extent_bit(&info->free_space_cache, 0,
3194                                             &start, &end, EXTENT_DIRTY);
3195                 if (ret)
3196                         break;
3197                 clear_extent_dirty(&info->free_space_cache, start,
3198                                    end, GFP_NOFS);
3199         }
3200
3201         while (!list_empty(&info->space_info)) {
3202                 sinfo = list_entry(info->space_info.next,
3203                                    struct btrfs_space_info, list);
3204                 list_del_init(&sinfo->list);
3205                 kfree(sinfo);
3206         }
3207         return 0;
3208 }
3209
3210 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3211                            struct btrfs_key *key)
3212 {
3213         int ret;
3214         struct btrfs_key found_key;
3215         struct extent_buffer *leaf;
3216         int slot;
3217
3218         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3219         if (ret < 0)
3220                 return ret;
3221         while(1) {
3222                 slot = path->slots[0];
3223                 leaf = path->nodes[0];
3224                 if (slot >= btrfs_header_nritems(leaf)) {
3225                         ret = btrfs_next_leaf(root, path);
3226                         if (ret == 0)
3227                                 continue;
3228                         if (ret < 0)
3229                                 goto error;
3230                         break;
3231                 }
3232                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3233
3234                 if (found_key.objectid >= key->objectid &&
3235                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
3236                         return 0;
3237                 path->slots[0]++;
3238         }
3239         ret = -ENOENT;
3240 error:
3241         return ret;
3242 }
3243
3244 int btrfs_read_block_groups(struct btrfs_root *root)
3245 {
3246         struct btrfs_path *path;
3247         int ret;
3248         int bit;
3249         struct btrfs_block_group_cache *cache;
3250         struct btrfs_fs_info *info = root->fs_info;
3251         struct btrfs_space_info *space_info;
3252         struct extent_io_tree *block_group_cache;
3253         struct btrfs_key key;
3254         struct btrfs_key found_key;
3255         struct extent_buffer *leaf;
3256
3257         block_group_cache = &info->block_group_cache;
3258
3259         root = info->extent_root;
3260         key.objectid = 0;
3261         key.offset = 0;
3262         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3263         path = btrfs_alloc_path();
3264         if (!path)
3265                 return -ENOMEM;
3266
3267         while(1) {
3268                 ret = find_first_block_group(root, path, &key);
3269                 if (ret > 0) {
3270                         ret = 0;
3271                         goto error;
3272                 }
3273                 if (ret != 0) {
3274                         goto error;
3275                 }
3276                 leaf = path->nodes[0];
3277                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3278                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3279                 if (!cache) {
3280                         ret = -ENOMEM;
3281                         break;
3282                 }
3283
3284                 read_extent_buffer(leaf, &cache->item,
3285                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3286                                    sizeof(cache->item));
3287                 memcpy(&cache->key, &found_key, sizeof(found_key));
3288                 cache->cached = 0;
3289                 cache->pinned = 0;
3290                 key.objectid = found_key.objectid + found_key.offset;
3291                 btrfs_release_path(root, path);
3292                 cache->flags = btrfs_block_group_flags(&cache->item);
3293                 bit = 0;
3294                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3295                         bit = BLOCK_GROUP_DATA;
3296                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3297                         bit = BLOCK_GROUP_SYSTEM;
3298                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3299                         bit = BLOCK_GROUP_METADATA;
3300                 }
3301                 set_avail_alloc_bits(info, cache->flags);
3302                 if (btrfs_chunk_readonly(root, cache->key.objectid))
3303                         cache->ro = 1;
3304
3305                 ret = update_space_info(info, cache->flags, found_key.offset,
3306                                         btrfs_block_group_used(&cache->item),
3307                                         &space_info);
3308                 BUG_ON(ret);
3309                 cache->space_info = space_info;
3310
3311                 /* use EXTENT_LOCKED to prevent merging */
3312                 set_extent_bits(block_group_cache, found_key.objectid,
3313                                 found_key.objectid + found_key.offset - 1,
3314                                 bit | EXTENT_LOCKED, GFP_NOFS);
3315                 set_state_private(block_group_cache, found_key.objectid,
3316                                   (unsigned long)cache);
3317         }
3318         ret = 0;
3319 error:
3320         btrfs_free_path(path);
3321         return ret;
3322 }
3323
3324 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3325                            struct btrfs_root *root, u64 bytes_used,
3326                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3327                            u64 size)
3328 {
3329         int ret;
3330         int bit = 0;
3331         struct btrfs_root *extent_root;
3332         struct btrfs_block_group_cache *cache;
3333         struct extent_io_tree *block_group_cache;
3334
3335         extent_root = root->fs_info->extent_root;
3336         block_group_cache = &root->fs_info->block_group_cache;
3337
3338         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3339         BUG_ON(!cache);
3340         cache->key.objectid = chunk_offset;
3341         cache->key.offset = size;
3342
3343         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3344         btrfs_set_block_group_used(&cache->item, bytes_used);
3345         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3346         cache->flags = type;
3347         btrfs_set_block_group_flags(&cache->item, type);
3348
3349         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3350                                 &cache->space_info);
3351         BUG_ON(ret);
3352
3353         bit = block_group_state_bits(type);
3354         set_extent_bits(block_group_cache, chunk_offset,
3355                         chunk_offset + size - 1,
3356                         bit | EXTENT_LOCKED, GFP_NOFS);
3357
3358         set_state_private(block_group_cache, chunk_offset,
3359                           (unsigned long)cache);
3360         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3361                                 sizeof(cache->item));
3362         BUG_ON(ret);
3363
3364         finish_current_insert(trans, extent_root);
3365         ret = del_pending_extents(trans, extent_root);
3366         set_avail_alloc_bits(extent_root->fs_info, type);
3367         return 0;
3368 }
3369
3370 /*
3371  * This is for converter use only.
3372  *
3373  * In that case, we don't know where are free blocks located.
3374  * Therefore all block group cache entries must be setup properly
3375  * before doing any block allocation.
3376  */
3377 int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
3378                             struct btrfs_root *root)
3379 {
3380         u64 total_bytes;
3381         u64 cur_start;
3382         u64 group_type;
3383         u64 group_size;
3384         u64 group_align;
3385         u64 total_data = 0;
3386         u64 total_metadata = 0;
3387         u64 chunk_objectid;
3388         int ret;
3389         int bit;
3390         struct btrfs_root *extent_root;
3391         struct btrfs_block_group_cache *cache;
3392         struct extent_io_tree *block_group_cache;
3393
3394         extent_root = root->fs_info->extent_root;
3395         block_group_cache = &root->fs_info->block_group_cache;
3396         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3397         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
3398         group_align = 64 * root->sectorsize;
3399
3400         cur_start = 0;
3401         while (cur_start < total_bytes) {
3402                 group_size = total_bytes / 12;
3403                 group_size = min_t(u64, group_size, total_bytes - cur_start);
3404                 if (cur_start == 0) {
3405                         bit = BLOCK_GROUP_SYSTEM;
3406                         group_type = BTRFS_BLOCK_GROUP_SYSTEM;
3407                         group_size /= 4;
3408                         group_size &= ~(group_align - 1);
3409                         group_size = max_t(u64, group_size, 8 * 1024 * 1024);
3410                         group_size = min_t(u64, group_size, 32 * 1024 * 1024);
3411                 } else {
3412                         group_size &= ~(group_align - 1);
3413                         if (total_data >= total_metadata * 2) {
3414                                 group_type = BTRFS_BLOCK_GROUP_METADATA;
3415                                 group_size = min_t(u64, group_size,
3416                                                    1ULL * 1024 * 1024 * 1024);
3417                                 total_metadata += group_size;
3418                         } else {
3419                                 group_type = BTRFS_BLOCK_GROUP_DATA;
3420                                 group_size = min_t(u64, group_size,
3421                                                    5ULL * 1024 * 1024 * 1024);
3422                                 total_data += group_size;
3423                         }
3424                         if ((total_bytes - cur_start) * 4 < group_size * 5)
3425                                 group_size = total_bytes - cur_start;
3426                 }
3427
3428                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3429                 BUG_ON(!cache);
3430
3431                 cache->key.objectid = cur_start;
3432                 cache->key.offset = group_size;
3433                 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3434
3435                 btrfs_set_block_group_used(&cache->item, 0);
3436                 btrfs_set_block_group_chunk_objectid(&cache->item,
3437                                                      chunk_objectid);
3438                 btrfs_set_block_group_flags(&cache->item, group_type);
3439
3440                 cache->flags = group_type;
3441
3442                 ret = update_space_info(root->fs_info, group_type, group_size,
3443                                         0, &cache->space_info);
3444                 BUG_ON(ret);
3445                 set_avail_alloc_bits(extent_root->fs_info, group_type);
3446
3447                 set_extent_bits(block_group_cache, cur_start,
3448                                 cur_start + group_size - 1,
3449                                 bit | EXTENT_LOCKED, GFP_NOFS);
3450                 set_state_private(block_group_cache, cur_start,
3451                                   (unsigned long)cache);
3452                 cur_start += group_size;
3453         }
3454         /* then insert all the items */
3455         cur_start = 0;
3456         while(cur_start < total_bytes) {
3457                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
3458                 BUG_ON(!cache);
3459
3460                 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3461                                         sizeof(cache->item));
3462                 BUG_ON(ret);
3463
3464                 finish_current_insert(trans, extent_root);
3465                 ret = del_pending_extents(trans, extent_root);
3466                 BUG_ON(ret);
3467
3468                 cur_start = cache->key.objectid + cache->key.offset;
3469         }
3470         return 0;
3471 }
3472
3473 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
3474                              struct btrfs_root *root,
3475                              u64 bytenr, u64 num_bytes, int alloc,
3476                              int mark_free)
3477 {
3478         return update_block_group(trans, root, bytenr, num_bytes,
3479                                   alloc, mark_free);
3480 }
3481
3482 static int btrfs_count_extents_in_block_group(struct btrfs_root *root,
3483                                               struct btrfs_path *path, u64 start,
3484                                               u64 len,
3485                                               u64 *total)
3486 {
3487         struct btrfs_key key;
3488         struct extent_buffer *leaf;
3489         u64 bytes_used = 0;
3490         int ret;
3491         int slot;
3492
3493         key.offset = 0;
3494         key.objectid = start;
3495         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
3496         ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
3497                                 &key, path, 0, 0);
3498         if (ret < 0)
3499                 return ret;
3500         while(1) {
3501                 leaf = path->nodes[0];
3502                 slot = path->slots[0];
3503                 if (slot >= btrfs_header_nritems(leaf)) {
3504                         ret = btrfs_next_leaf(root, path);
3505                         if (ret < 0)
3506                                 return ret;
3507                         if (ret > 0)
3508                                 break;
3509                         leaf = path->nodes[0];
3510                         slot = path->slots[0];
3511                 }
3512                 btrfs_item_key_to_cpu(leaf, &key, slot);
3513                 if (key.objectid > start + len)
3514                         break;
3515                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3516                         bytes_used += key.offset;
3517                 if (key.type == BTRFS_METADATA_ITEM_KEY)
3518                         bytes_used += root->leafsize;
3519                 path->slots[0]++;
3520         }
3521         *total = bytes_used;
3522         btrfs_release_path(root, path);
3523         return 0;
3524 }
3525
3526 int btrfs_check_block_accounting(struct btrfs_root *root)
3527 {
3528         int ret;
3529         u64 start = 0;
3530         u64 bytes_used = 0;
3531         struct btrfs_path path;
3532         struct btrfs_block_group_cache *cache;
3533         struct btrfs_fs_info *fs_info = root->fs_info;
3534
3535         btrfs_init_path(&path);
3536
3537         while(1) {
3538                 cache = btrfs_lookup_block_group(fs_info, start);
3539                 if (!cache)
3540                         break;
3541
3542                 ret = btrfs_count_extents_in_block_group(root, &path,
3543                                                          cache->key.objectid,
3544                                                          cache->key.offset,
3545                                                          &bytes_used);
3546
3547                 if (ret == 0) {
3548                         u64 on_disk = btrfs_block_group_used(&cache->item);
3549                         if (on_disk != bytes_used) {
3550                                 fprintf(stderr, "bad block group accounting found %llu "
3551                                         "expected %llu block group %llu\n",
3552                                         (unsigned long long)bytes_used,
3553                                         (unsigned long long)on_disk,
3554                                         (unsigned long long)cache->key.objectid);
3555                         }
3556                 }
3557                 start = cache->key.objectid + cache->key.offset;
3558
3559                 cache->space_info->bytes_used = 0;
3560         }
3561         return 0;
3562 }
3563
3564 /*
3565  * Fixup block accounting. The initial block accounting created by
3566  * make_block_groups isn't accuracy in this case.
3567  */
3568 int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans,
3569                                struct btrfs_root *root)
3570 {
3571         int ret;
3572         int slot;
3573         u64 start = 0;
3574         u64 bytes_used = 0;
3575         struct btrfs_path path;
3576         struct btrfs_key key;
3577         struct extent_buffer *leaf;
3578         struct btrfs_block_group_cache *cache;
3579         struct btrfs_fs_info *fs_info = root->fs_info;
3580
3581         root = root->fs_info->extent_root;
3582
3583         while(extent_root_pending_ops(fs_info)) {
3584                 ret = finish_current_insert(trans, root);
3585                 if (ret)
3586                         return ret;
3587                 ret = del_pending_extents(trans, root);
3588                 if (ret)
3589                         return ret;
3590         }
3591
3592         while(1) {
3593                 cache = btrfs_lookup_first_block_group(fs_info, start);
3594                 if (!cache)
3595                         break;
3596                 start = cache->key.objectid + cache->key.offset;
3597                 btrfs_set_block_group_used(&cache->item, 0);
3598                 cache->space_info->bytes_used = 0;
3599                 set_extent_bits(&root->fs_info->block_group_cache,
3600                                 cache->key.objectid,
3601                                 cache->key.objectid + cache->key.offset -1,
3602                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
3603         }
3604
3605         btrfs_init_path(&path);
3606         key.offset = 0;
3607         key.objectid = 0;
3608         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
3609         ret = btrfs_search_slot(trans, root->fs_info->extent_root,
3610                                 &key, &path, 0, 0);
3611         if (ret < 0)
3612                 return ret;
3613         while(1) {
3614                 leaf = path.nodes[0];
3615                 slot = path.slots[0];
3616                 if (slot >= btrfs_header_nritems(leaf)) {
3617                         ret = btrfs_next_leaf(root, &path);
3618                         if (ret < 0)
3619                                 return ret;
3620                         if (ret > 0)
3621                                 break;
3622                         leaf = path.nodes[0];
3623                         slot = path.slots[0];
3624                 }
3625                 btrfs_item_key_to_cpu(leaf, &key, slot);
3626                 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
3627                         bytes_used += key.offset;
3628                         ret = btrfs_update_block_group(trans, root,
3629                                   key.objectid, key.offset, 1, 0);
3630                         BUG_ON(ret);
3631                 } else if (key.type == BTRFS_METADATA_ITEM_KEY) {
3632                         bytes_used += root->leafsize;
3633                         ret = btrfs_update_block_group(trans, root,
3634                                   key.objectid, root->leafsize, 1, 0);
3635                         BUG_ON(ret);
3636                 }
3637                 path.slots[0]++;
3638         }
3639         btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used);
3640         btrfs_release_path(root, &path);
3641         return 0;
3642 }