Btrfs-progs: fix the allocator
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "crc32c.h"
28 #include "volumes.h"
29 #include "free-space-cache.h"
30
31 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
32 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
33 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
34
35 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
36
37 #define PENDING_EXTENT_INSERT 0
38 #define PENDING_EXTENT_DELETE 1
39 #define PENDING_BACKREF_UPDATE 2
40
41 struct pending_extent_op {
42         int type;
43         u64 bytenr;
44         u64 num_bytes;
45         u64 flags;
46         struct btrfs_disk_key key;
47         int level;
48 };
49
50 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
51                                      struct btrfs_root *root,
52                                      u64 root_objectid, u64 generation,
53                                      u64 flags, struct btrfs_disk_key *key,
54                                      int level, struct btrfs_key *ins);
55 static int __free_extent(struct btrfs_trans_handle *trans,
56                          struct btrfs_root *root,
57                          u64 bytenr, u64 num_bytes, u64 parent,
58                          u64 root_objectid, u64 owner_objectid,
59                          u64 owner_offset, int refs_to_drop);
60 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
61                                  btrfs_root *extent_root);
62 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
63                                btrfs_root *extent_root);
64
65 static int remove_sb_from_cache(struct btrfs_root *root,
66                                 struct btrfs_block_group_cache *cache)
67 {
68         u64 bytenr;
69         u64 *logical;
70         int stripe_len;
71         int i, nr, ret;
72         struct extent_io_tree *free_space_cache;
73
74         free_space_cache = &root->fs_info->free_space_cache;
75         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
76                 bytenr = btrfs_sb_offset(i);
77                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
78                                        cache->key.objectid, bytenr, 0,
79                                        &logical, &nr, &stripe_len);
80                 BUG_ON(ret);
81                 while (nr--) {
82                         clear_extent_dirty(free_space_cache, logical[nr],
83                                 logical[nr] + stripe_len - 1, GFP_NOFS);
84                 }
85                 kfree(logical);
86         }
87         return 0;
88 }
89
90 static int cache_block_group(struct btrfs_root *root,
91                              struct btrfs_block_group_cache *block_group)
92 {
93         struct btrfs_path *path;
94         int ret;
95         struct btrfs_key key;
96         struct extent_buffer *leaf;
97         struct extent_io_tree *free_space_cache;
98         int slot;
99         u64 last;
100         u64 hole_size;
101
102         if (!block_group)
103                 return 0;
104
105         root = root->fs_info->extent_root;
106         free_space_cache = &root->fs_info->free_space_cache;
107
108         if (block_group->cached)
109                 return 0;
110
111         path = btrfs_alloc_path();
112         if (!path)
113                 return -ENOMEM;
114
115         path->reada = 2;
116         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
117         key.objectid = last;
118         key.offset = 0;
119         key.type = 0;
120
121         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
122         if (ret < 0)
123                 goto err;
124
125         while(1) {
126                 leaf = path->nodes[0];
127                 slot = path->slots[0];
128                 if (slot >= btrfs_header_nritems(leaf)) {
129                         ret = btrfs_next_leaf(root, path);
130                         if (ret < 0)
131                                 goto err;
132                         if (ret == 0) {
133                                 continue;
134                         } else {
135                                 break;
136                         }
137                 }
138                 btrfs_item_key_to_cpu(leaf, &key, slot);
139                 if (key.objectid < block_group->key.objectid) {
140                         goto next;
141                 }
142                 if (key.objectid >= block_group->key.objectid +
143                     block_group->key.offset) {
144                         break;
145                 }
146
147                 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
148                     key.type == BTRFS_METADATA_ITEM_KEY) {
149                         if (key.objectid > last) {
150                                 hole_size = key.objectid - last;
151                                 set_extent_dirty(free_space_cache, last,
152                                                  last + hole_size - 1,
153                                                  GFP_NOFS);
154                         }
155                         if (key.type == BTRFS_METADATA_ITEM_KEY)
156                                 last = key.objectid + root->leafsize;
157                         else
158                                 last = key.objectid + key.offset;
159                 }
160 next:
161                 path->slots[0]++;
162         }
163
164         if (block_group->key.objectid +
165             block_group->key.offset > last) {
166                 hole_size = block_group->key.objectid +
167                         block_group->key.offset - last;
168                 set_extent_dirty(free_space_cache, last,
169                                  last + hole_size - 1, GFP_NOFS);
170         }
171         remove_sb_from_cache(root, block_group);
172         block_group->cached = 1;
173 err:
174         btrfs_free_path(path);
175         return 0;
176 }
177
178 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
179                                                        btrfs_fs_info *info,
180                                                        u64 bytenr)
181 {
182         struct extent_io_tree *block_group_cache;
183         struct btrfs_block_group_cache *block_group = NULL;
184         u64 ptr;
185         u64 start;
186         u64 end;
187         int ret;
188
189         bytenr = max_t(u64, bytenr,
190                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
191         block_group_cache = &info->block_group_cache;
192         ret = find_first_extent_bit(block_group_cache,
193                                     bytenr, &start, &end,
194                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
195                                     BLOCK_GROUP_SYSTEM);
196         if (ret) {
197                 return NULL;
198         }
199         ret = get_state_private(block_group_cache, start, &ptr);
200         if (ret)
201                 return NULL;
202
203         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
204         return block_group;
205 }
206
207 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
208                                                          btrfs_fs_info *info,
209                                                          u64 bytenr)
210 {
211         struct extent_io_tree *block_group_cache;
212         struct btrfs_block_group_cache *block_group = NULL;
213         u64 ptr;
214         u64 start;
215         u64 end;
216         int ret;
217
218         block_group_cache = &info->block_group_cache;
219         ret = find_first_extent_bit(block_group_cache,
220                                     bytenr, &start, &end,
221                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
222                                     BLOCK_GROUP_SYSTEM);
223         if (ret) {
224                 return NULL;
225         }
226         ret = get_state_private(block_group_cache, start, &ptr);
227         if (ret)
228                 return NULL;
229
230         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
231         if (block_group->key.objectid <= bytenr && bytenr <
232             block_group->key.objectid + block_group->key.offset)
233                 return block_group;
234         return NULL;
235 }
236
237 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
238 {
239         return (cache->flags & bits) == bits;
240 }
241
242 static int noinline find_search_start(struct btrfs_root *root,
243                               struct btrfs_block_group_cache **cache_ret,
244                               u64 *start_ret, int num, int data)
245 {
246         int ret;
247         struct btrfs_block_group_cache *cache = *cache_ret;
248         u64 last = *start_ret;
249         u64 start = 0;
250         u64 end = 0;
251         u64 search_start = *start_ret;
252         int wrapped = 0;
253
254         if (!cache)
255                 goto out;
256 again:
257         ret = cache_block_group(root, cache);
258         if (ret)
259                 goto out;
260
261         last = max(search_start, cache->key.objectid);
262         if (cache->ro || !block_group_bits(cache, data))
263                 goto new_group;
264
265         while(1) {
266                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
267                                             last, &start, &end, EXTENT_DIRTY);
268                 if (ret) {
269                         goto new_group;
270                 }
271
272                 start = max(last, start);
273                 last = end + 1;
274                 if (last - start < num) {
275                         continue;
276                 }
277                 if (start + num > cache->key.objectid + cache->key.offset) {
278                         goto new_group;
279                 }
280                 *start_ret = start;
281                 return 0;
282         }
283 out:
284         *start_ret = last;
285         cache = btrfs_lookup_block_group(root->fs_info, search_start);
286         if (!cache) {
287                 printk("Unable to find block group for %llu\n",
288                         (unsigned long long)search_start);
289                 WARN_ON(1);
290         }
291         return -ENOSPC;
292
293 new_group:
294         last = cache->key.objectid + cache->key.offset;
295 wrapped:
296         cache = btrfs_lookup_first_block_group(root->fs_info, last);
297         if (!cache) {
298                 if (!wrapped) {
299                         wrapped = 1;
300                         last = search_start;
301                         goto wrapped;
302                 }
303                 goto out;
304         }
305         *cache_ret = cache;
306         goto again;
307 }
308
309 static u64 div_factor(u64 num, int factor)
310 {
311         if (factor == 10)
312                 return num;
313         num *= factor;
314         num /= 10;
315         return num;
316 }
317
318 static int block_group_state_bits(u64 flags)
319 {
320         int bits = 0;
321         if (flags & BTRFS_BLOCK_GROUP_DATA)
322                 bits |= BLOCK_GROUP_DATA;
323         if (flags & BTRFS_BLOCK_GROUP_METADATA)
324                 bits |= BLOCK_GROUP_METADATA;
325         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
326                 bits |= BLOCK_GROUP_SYSTEM;
327         return bits;
328 }
329
330 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
331                                                  struct btrfs_block_group_cache
332                                                  *hint, u64 search_start,
333                                                  int data, int owner)
334 {
335         struct btrfs_block_group_cache *cache;
336         struct extent_io_tree *block_group_cache;
337         struct btrfs_block_group_cache *found_group = NULL;
338         struct btrfs_fs_info *info = root->fs_info;
339         u64 used;
340         u64 last = 0;
341         u64 hint_last;
342         u64 start;
343         u64 end;
344         u64 free_check;
345         u64 ptr;
346         int bit;
347         int ret;
348         int full_search = 0;
349         int factor = 10;
350
351         block_group_cache = &info->block_group_cache;
352
353         if (!owner)
354                 factor = 10;
355
356         bit = block_group_state_bits(data);
357
358         if (search_start) {
359                 struct btrfs_block_group_cache *shint;
360                 shint = btrfs_lookup_block_group(info, search_start);
361                 if (shint && !shint->ro && block_group_bits(shint, data)) {
362                         used = btrfs_block_group_used(&shint->item);
363                         if (used + shint->pinned <
364                             div_factor(shint->key.offset, factor)) {
365                                 return shint;
366                         }
367                 }
368         }
369         if (hint && !hint->ro && block_group_bits(hint, data)) {
370                 used = btrfs_block_group_used(&hint->item);
371                 if (used + hint->pinned <
372                     div_factor(hint->key.offset, factor)) {
373                         return hint;
374                 }
375                 last = hint->key.objectid + hint->key.offset;
376                 hint_last = last;
377         } else {
378                 if (hint)
379                         hint_last = max(hint->key.objectid, search_start);
380                 else
381                         hint_last = search_start;
382
383                 last = hint_last;
384         }
385 again:
386         while(1) {
387                 ret = find_first_extent_bit(block_group_cache, last,
388                                             &start, &end, bit);
389                 if (ret)
390                         break;
391
392                 ret = get_state_private(block_group_cache, start, &ptr);
393                 if (ret)
394                         break;
395
396                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
397                 last = cache->key.objectid + cache->key.offset;
398                 used = btrfs_block_group_used(&cache->item);
399
400                 if (!cache->ro && block_group_bits(cache, data)) {
401                         if (full_search)
402                                 free_check = cache->key.offset;
403                         else
404                                 free_check = div_factor(cache->key.offset,
405                                                         factor);
406
407                         if (used + cache->pinned < free_check) {
408                                 found_group = cache;
409                                 goto found;
410                         }
411                 }
412                 cond_resched();
413         }
414         if (!full_search) {
415                 last = search_start;
416                 full_search = 1;
417                 goto again;
418         }
419 found:
420         return found_group;
421 }
422
423 /*
424  * Back reference rules.  Back refs have three main goals:
425  *
426  * 1) differentiate between all holders of references to an extent so that
427  *    when a reference is dropped we can make sure it was a valid reference
428  *    before freeing the extent.
429  *
430  * 2) Provide enough information to quickly find the holders of an extent
431  *    if we notice a given block is corrupted or bad.
432  *
433  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
434  *    maintenance.  This is actually the same as #2, but with a slightly
435  *    different use case.
436  *
437  * There are two kinds of back refs. The implicit back refs is optimized
438  * for pointers in non-shared tree blocks. For a given pointer in a block,
439  * back refs of this kind provide information about the block's owner tree
440  * and the pointer's key. These information allow us to find the block by
441  * b-tree searching. The full back refs is for pointers in tree blocks not
442  * referenced by their owner trees. The location of tree block is recorded
443  * in the back refs. Actually the full back refs is generic, and can be
444  * used in all cases the implicit back refs is used. The major shortcoming
445  * of the full back refs is its overhead. Every time a tree block gets
446  * COWed, we have to update back refs entry for all pointers in it.
447  *
448  * For a newly allocated tree block, we use implicit back refs for
449  * pointers in it. This means most tree related operations only involve
450  * implicit back refs. For a tree block created in old transaction, the
451  * only way to drop a reference to it is COW it. So we can detect the
452  * event that tree block loses its owner tree's reference and do the
453  * back refs conversion.
454  *
455  * When a tree block is COW'd through a tree, there are four cases:
456  *
457  * The reference count of the block is one and the tree is the block's
458  * owner tree. Nothing to do in this case.
459  *
460  * The reference count of the block is one and the tree is not the
461  * block's owner tree. In this case, full back refs is used for pointers
462  * in the block. Remove these full back refs, add implicit back refs for
463  * every pointers in the new block.
464  *
465  * The reference count of the block is greater than one and the tree is
466  * the block's owner tree. In this case, implicit back refs is used for
467  * pointers in the block. Add full back refs for every pointers in the
468  * block, increase lower level extents' reference counts. The original
469  * implicit back refs are entailed to the new block.
470  *
471  * The reference count of the block is greater than one and the tree is
472  * not the block's owner tree. Add implicit back refs for every pointer in
473  * the new block, increase lower level extents' reference count.
474  *
475  * Back Reference Key composing:
476  *
477  * The key objectid corresponds to the first byte in the extent,
478  * The key type is used to differentiate between types of back refs.
479  * There are different meanings of the key offset for different types
480  * of back refs.
481  *
482  * File extents can be referenced by:
483  *
484  * - multiple snapshots, subvolumes, or different generations in one subvol
485  * - different files inside a single subvolume
486  * - different offsets inside a file (bookend extents in file.c)
487  *
488  * The extent ref structure for the implicit back refs has fields for:
489  *
490  * - Objectid of the subvolume root
491  * - objectid of the file holding the reference
492  * - original offset in the file
493  * - how many bookend extents
494  *
495  * The key offset for the implicit back refs is hash of the first
496  * three fields.
497  *
498  * The extent ref structure for the full back refs has field for:
499  *
500  * - number of pointers in the tree leaf
501  *
502  * The key offset for the implicit back refs is the first byte of
503  * the tree leaf
504  *
505  * When a file extent is allocated, The implicit back refs is used.
506  * the fields are filled in:
507  *
508  *     (root_key.objectid, inode objectid, offset in file, 1)
509  *
510  * When a file extent is removed file truncation, we find the
511  * corresponding implicit back refs and check the following fields:
512  *
513  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
514  *
515  * Btree extents can be referenced by:
516  *
517  * - Different subvolumes
518  *
519  * Both the implicit back refs and the full back refs for tree blocks
520  * only consist of key. The key offset for the implicit back refs is
521  * objectid of block's owner tree. The key offset for the full back refs
522  * is the first byte of parent block.
523  *
524  * When implicit back refs is used, information about the lowest key and
525  * level of the tree block are required. These information are stored in
526  * tree block info structure.
527  */
528
529 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
530 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
531                                   struct btrfs_root *root,
532                                   struct btrfs_path *path,
533                                   u64 owner, u32 extra_size)
534 {
535         struct btrfs_extent_item *item;
536         struct btrfs_extent_item_v0 *ei0;
537         struct btrfs_extent_ref_v0 *ref0;
538         struct btrfs_tree_block_info *bi;
539         struct extent_buffer *leaf;
540         struct btrfs_key key;
541         struct btrfs_key found_key;
542         u32 new_size = sizeof(*item);
543         u64 refs;
544         int ret;
545
546         leaf = path->nodes[0];
547         BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
548
549         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
550         ei0 = btrfs_item_ptr(leaf, path->slots[0],
551                              struct btrfs_extent_item_v0);
552         refs = btrfs_extent_refs_v0(leaf, ei0);
553
554         if (owner == (u64)-1) {
555                 while (1) {
556                         if (path->slots[0] >= btrfs_header_nritems(leaf)) {
557                                 ret = btrfs_next_leaf(root, path);
558                                 if (ret < 0)
559                                         return ret;
560                                 BUG_ON(ret > 0);
561                                 leaf = path->nodes[0];
562                         }
563                         btrfs_item_key_to_cpu(leaf, &found_key,
564                                               path->slots[0]);
565                         BUG_ON(key.objectid != found_key.objectid);
566                         if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
567                                 path->slots[0]++;
568                                 continue;
569                         }
570                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
571                                               struct btrfs_extent_ref_v0);
572                         owner = btrfs_ref_objectid_v0(leaf, ref0);
573                         break;
574                 }
575         }
576         btrfs_release_path(root, path);
577
578         if (owner < BTRFS_FIRST_FREE_OBJECTID)
579                 new_size += sizeof(*bi);
580
581         new_size -= sizeof(*ei0);
582         ret = btrfs_search_slot(trans, root, &key, path, new_size, 1);
583         if (ret < 0)
584                 return ret;
585         BUG_ON(ret);
586
587         ret = btrfs_extend_item(trans, root, path, new_size);
588         BUG_ON(ret);
589
590         leaf = path->nodes[0];
591         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
592         btrfs_set_extent_refs(leaf, item, refs);
593         /* FIXME: get real generation */
594         btrfs_set_extent_generation(leaf, item, 0);
595         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
596                 btrfs_set_extent_flags(leaf, item,
597                                        BTRFS_EXTENT_FLAG_TREE_BLOCK |
598                                        BTRFS_BLOCK_FLAG_FULL_BACKREF);
599                 bi = (struct btrfs_tree_block_info *)(item + 1);
600                 /* FIXME: get first key of the block */
601                 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
602                 btrfs_set_tree_block_level(leaf, bi, (int)owner);
603         } else {
604                 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
605         }
606         btrfs_mark_buffer_dirty(leaf);
607         return 0;
608 }
609 #endif
610
611 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
612 {
613         u32 high_crc = ~(u32)0;
614         u32 low_crc = ~(u32)0;
615         __le64 lenum;
616
617         lenum = cpu_to_le64(root_objectid);
618         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
619         lenum = cpu_to_le64(owner);
620         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
621         lenum = cpu_to_le64(offset);
622         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
623
624         return ((u64)high_crc << 31) ^ (u64)low_crc;
625 }
626
627 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
628                                      struct btrfs_extent_data_ref *ref)
629 {
630         return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
631                                     btrfs_extent_data_ref_objectid(leaf, ref),
632                                     btrfs_extent_data_ref_offset(leaf, ref));
633 }
634
635 static int match_extent_data_ref(struct extent_buffer *leaf,
636                                  struct btrfs_extent_data_ref *ref,
637                                  u64 root_objectid, u64 owner, u64 offset)
638 {
639         if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
640             btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
641             btrfs_extent_data_ref_offset(leaf, ref) != offset)
642                 return 0;
643         return 1;
644 }
645
646 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
647                                            struct btrfs_root *root,
648                                            struct btrfs_path *path,
649                                            u64 bytenr, u64 parent,
650                                            u64 root_objectid,
651                                            u64 owner, u64 offset)
652 {
653         struct btrfs_key key;
654         struct btrfs_extent_data_ref *ref;
655         struct extent_buffer *leaf;
656         u32 nritems;
657         int ret;
658         int recow;
659         int err = -ENOENT;
660
661         key.objectid = bytenr;
662         if (parent) {
663                 key.type = BTRFS_SHARED_DATA_REF_KEY;
664                 key.offset = parent;
665         } else {
666                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
667                 key.offset = hash_extent_data_ref(root_objectid,
668                                                   owner, offset);
669         }
670 again:
671         recow = 0;
672         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
673         if (ret < 0) {
674                 err = ret;
675                 goto fail;
676         }
677
678         if (parent) {
679                 if (!ret)
680                         return 0;
681 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
682                 key.type = BTRFS_EXTENT_REF_V0_KEY;
683                 btrfs_release_path(root, path);
684                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
685                 if (ret < 0) {
686                         err = ret;
687                         goto fail;
688                 }
689                 if (!ret)
690                         return 0;
691 #endif
692                 goto fail;
693         }
694
695         leaf = path->nodes[0];
696         nritems = btrfs_header_nritems(leaf);
697         while (1) {
698                 if (path->slots[0] >= nritems) {
699                         ret = btrfs_next_leaf(root, path);
700                         if (ret < 0)
701                                 err = ret;
702                         if (ret)
703                                 goto fail;
704
705                         leaf = path->nodes[0];
706                         nritems = btrfs_header_nritems(leaf);
707                         recow = 1;
708                 }
709
710                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
711                 if (key.objectid != bytenr ||
712                     key.type != BTRFS_EXTENT_DATA_REF_KEY)
713                         goto fail;
714                 
715                 ref = btrfs_item_ptr(leaf, path->slots[0],
716                                      struct btrfs_extent_data_ref);
717
718                 if (match_extent_data_ref(leaf, ref, root_objectid,
719                                           owner, offset)) {
720                         if (recow) {
721                                 btrfs_release_path(root, path);
722                                 goto again;
723                         }
724                         err = 0;
725                         break;
726                 }
727                 path->slots[0]++;
728         }
729 fail:
730         return err;
731 }
732
733 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
734                                            struct btrfs_root *root,
735                                            struct btrfs_path *path,
736                                            u64 bytenr, u64 parent,
737                                            u64 root_objectid, u64 owner,
738                                            u64 offset, int refs_to_add)
739 {
740         struct btrfs_key key;
741         struct extent_buffer *leaf;
742         u32 size;
743         u32 num_refs;
744         int ret;
745
746         key.objectid = bytenr;
747         if (parent) {
748                 key.type = BTRFS_SHARED_DATA_REF_KEY;
749                 key.offset = parent;
750                 size = sizeof(struct btrfs_shared_data_ref);
751         } else {
752                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
753                 key.offset = hash_extent_data_ref(root_objectid,
754                                                   owner, offset);
755                 size = sizeof(struct btrfs_extent_data_ref);
756         }
757
758         ret = btrfs_insert_empty_item(trans, root, path, &key, size);
759         if (ret && ret != -EEXIST)
760                 goto fail;
761
762         leaf = path->nodes[0];
763         if (parent) {
764                 struct btrfs_shared_data_ref *ref;
765                 ref = btrfs_item_ptr(leaf, path->slots[0],
766                                      struct btrfs_shared_data_ref);
767                 if (ret == 0) {
768                         btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
769                 } else {
770                         num_refs = btrfs_shared_data_ref_count(leaf, ref);
771                         num_refs += refs_to_add;
772                         btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
773                 }
774         } else {
775                 struct btrfs_extent_data_ref *ref;
776                 while (ret == -EEXIST) {
777                         ref = btrfs_item_ptr(leaf, path->slots[0],
778                                              struct btrfs_extent_data_ref);
779                         if (match_extent_data_ref(leaf, ref, root_objectid,
780                                                   owner, offset))
781                                 break;
782                         btrfs_release_path(root, path);
783
784                         key.offset++;
785                         ret = btrfs_insert_empty_item(trans, root, path, &key,
786                                                       size);
787                         if (ret && ret != -EEXIST)
788                                 goto fail;
789
790                         leaf = path->nodes[0];
791                 }
792                 ref = btrfs_item_ptr(leaf, path->slots[0],
793                                      struct btrfs_extent_data_ref);
794                 if (ret == 0) {
795                         btrfs_set_extent_data_ref_root(leaf, ref,
796                                                        root_objectid);
797                         btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
798                         btrfs_set_extent_data_ref_offset(leaf, ref, offset);
799                         btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
800                 } else {
801                         num_refs = btrfs_extent_data_ref_count(leaf, ref);
802                         num_refs += refs_to_add;
803                         btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
804                 }
805         }
806         btrfs_mark_buffer_dirty(leaf);
807         ret = 0;
808 fail:
809         btrfs_release_path(root, path);
810         return ret;
811 }
812
813 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
814                                            struct btrfs_root *root,
815                                            struct btrfs_path *path,
816                                            int refs_to_drop)
817 {
818         struct btrfs_key key;
819         struct btrfs_extent_data_ref *ref1 = NULL;
820         struct btrfs_shared_data_ref *ref2 = NULL;
821         struct extent_buffer *leaf;
822         u32 num_refs = 0;
823         int ret = 0;
824
825         leaf = path->nodes[0];
826         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
827
828         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
829                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
830                                       struct btrfs_extent_data_ref);
831                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
832         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
833                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
834                                       struct btrfs_shared_data_ref);
835                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
836 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
837         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
838                 struct btrfs_extent_ref_v0 *ref0;
839                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
840                                       struct btrfs_extent_ref_v0);
841                 num_refs = btrfs_ref_count_v0(leaf, ref0);
842 #endif
843         } else {
844                 BUG();
845         }
846
847         BUG_ON(num_refs < refs_to_drop);
848         num_refs -= refs_to_drop;
849
850         if (num_refs == 0) {
851                 ret = btrfs_del_item(trans, root, path);
852         } else {
853                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
854                         btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
855                 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
856                         btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
857 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
858                 else {
859                         struct btrfs_extent_ref_v0 *ref0;
860                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
861                                         struct btrfs_extent_ref_v0);
862                         btrfs_set_ref_count_v0(leaf, ref0, num_refs);
863                 }
864 #endif
865                 btrfs_mark_buffer_dirty(leaf);
866         }
867         return ret;
868 }
869
870 static noinline u32 extent_data_ref_count(struct btrfs_root *root,
871                                           struct btrfs_path *path,
872                                           struct btrfs_extent_inline_ref *iref)
873 {
874         struct btrfs_key key;
875         struct extent_buffer *leaf;
876         struct btrfs_extent_data_ref *ref1;
877         struct btrfs_shared_data_ref *ref2;
878         u32 num_refs = 0;
879
880         leaf = path->nodes[0];
881         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
882         if (iref) {
883                 if (btrfs_extent_inline_ref_type(leaf, iref) ==
884                     BTRFS_EXTENT_DATA_REF_KEY) {
885                         ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
886                         num_refs = btrfs_extent_data_ref_count(leaf, ref1);
887                 } else {
888                         ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
889                         num_refs = btrfs_shared_data_ref_count(leaf, ref2);
890                 }
891         } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
892                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
893                                       struct btrfs_extent_data_ref);
894                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
895         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
896                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
897                                       struct btrfs_shared_data_ref);
898                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
899 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
900         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
901                 struct btrfs_extent_ref_v0 *ref0;
902                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
903                                       struct btrfs_extent_ref_v0);
904                 num_refs = btrfs_ref_count_v0(leaf, ref0);
905 #endif
906         } else {
907                 BUG();
908         }
909         return num_refs;
910 }
911
912 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
913                                           struct btrfs_root *root,
914                                           struct btrfs_path *path,
915                                           u64 bytenr, u64 parent,
916                                           u64 root_objectid)
917 {
918         struct btrfs_key key;
919         int ret;
920
921         key.objectid = bytenr;
922         if (parent) {
923                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
924                 key.offset = parent;
925         } else {
926                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
927                 key.offset = root_objectid;
928         }
929
930         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
931         if (ret > 0)
932                 ret = -ENOENT;
933 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
934         if (ret == -ENOENT && parent) {
935                 btrfs_release_path(root, path);
936                 key.type = BTRFS_EXTENT_REF_V0_KEY;
937                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
938                 if (ret > 0)
939                         ret = -ENOENT;
940         }
941 #endif
942         return ret;
943 }
944
945 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
946                                           struct btrfs_root *root,
947                                           struct btrfs_path *path,
948                                           u64 bytenr, u64 parent,
949                                           u64 root_objectid)
950 {
951         struct btrfs_key key;
952         int ret;
953
954         key.objectid = bytenr;
955         if (parent) {
956                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
957                 key.offset = parent;
958         } else {
959                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
960                 key.offset = root_objectid;
961         }
962
963         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
964
965         btrfs_release_path(root, path);
966         return ret;
967 }
968
969 static inline int extent_ref_type(u64 parent, u64 owner)
970 {
971         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
972                 if (parent > 0)
973                         return BTRFS_SHARED_BLOCK_REF_KEY;
974                 else
975                         return BTRFS_TREE_BLOCK_REF_KEY;
976         } else {
977                 if (parent > 0)
978                         return BTRFS_SHARED_DATA_REF_KEY;
979                 else
980                         return BTRFS_EXTENT_DATA_REF_KEY;
981         }
982 }
983
984 static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
985
986 {
987         int level;
988         for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
989                 if (!path->nodes[level])
990                         break;
991                 if (path->slots[level] + 1 >=
992                     btrfs_header_nritems(path->nodes[level]))
993                         continue;
994                 if (level == 0)
995                         btrfs_item_key_to_cpu(path->nodes[level], key,
996                                               path->slots[level] + 1);
997                 else
998                         btrfs_node_key_to_cpu(path->nodes[level], key,
999                                               path->slots[level] + 1);
1000                 return 0;
1001         }
1002         return 1;
1003 }
1004
1005 static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1006                                  struct btrfs_root *root,
1007                                  struct btrfs_path *path,
1008                                  struct btrfs_extent_inline_ref **ref_ret,
1009                                  u64 bytenr, u64 num_bytes,
1010                                  u64 parent, u64 root_objectid,
1011                                  u64 owner, u64 offset, int insert)
1012 {
1013         struct btrfs_key key;
1014         struct extent_buffer *leaf;
1015         struct btrfs_extent_item *ei;
1016         struct btrfs_extent_inline_ref *iref;
1017         u64 flags;
1018         u32 item_size;
1019         unsigned long ptr;
1020         unsigned long end;
1021         int extra_size;
1022         int type;
1023         int want;
1024         int ret;
1025         int err = 0;
1026         int skinny_metadata =
1027                 btrfs_fs_incompat(root->fs_info,
1028                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
1029
1030         key.objectid = bytenr;
1031         key.type = BTRFS_EXTENT_ITEM_KEY;
1032         key.offset = num_bytes;
1033
1034         want = extent_ref_type(parent, owner);
1035         if (insert)
1036                 extra_size = btrfs_extent_inline_ref_size(want);
1037         else
1038                 extra_size = -1;
1039
1040         if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) {
1041                 skinny_metadata = 1;
1042                 key.type = BTRFS_METADATA_ITEM_KEY;
1043                 key.offset = owner;
1044         } else if (skinny_metadata) {
1045                 skinny_metadata = 0;
1046         }
1047
1048 again:
1049         ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1050         if (ret < 0) {
1051                 err = ret;
1052                 goto out;
1053         }
1054
1055         /*
1056          * We may be a newly converted file system which still has the old fat
1057          * extent entries for metadata, so try and see if we have one of those.
1058          */
1059         if (ret > 0 && skinny_metadata) {
1060                 skinny_metadata = 0;
1061                 if (path->slots[0]) {
1062                         path->slots[0]--;
1063                         btrfs_item_key_to_cpu(path->nodes[0], &key,
1064                                               path->slots[0]);
1065                         if (key.objectid == bytenr &&
1066                             key.type == BTRFS_EXTENT_ITEM_KEY &&
1067                             key.offset == num_bytes)
1068                                 ret = 0;
1069                 }
1070                 if (ret) {
1071                         key.type = BTRFS_EXTENT_ITEM_KEY;
1072                         key.offset = num_bytes;
1073                         goto again;
1074                 }
1075         }
1076
1077         if (ret) {
1078                 printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset);
1079                 return -ENOENT;
1080         }
1081
1082         BUG_ON(ret);
1083
1084         leaf = path->nodes[0];
1085         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1086 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1087         if (item_size < sizeof(*ei)) {
1088                 if (!insert) {
1089                         err = -ENOENT;
1090                         goto out;
1091                 }
1092                 ret = convert_extent_item_v0(trans, root, path, owner,
1093                                              extra_size);
1094                 if (ret < 0) {
1095                         err = ret;
1096                         goto out;
1097                 }
1098                 leaf = path->nodes[0];
1099                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1100         }
1101 #endif
1102         if (item_size < sizeof(*ei)) {
1103                 printf("Size is %u, needs to be %u, slot %d\n",
1104                        (unsigned)item_size,
1105                        (unsigned)sizeof(*ei), path->slots[0]);
1106                 btrfs_print_leaf(root, leaf);
1107                 return -EINVAL;
1108         }
1109         BUG_ON(item_size < sizeof(*ei));
1110
1111         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1112         flags = btrfs_extent_flags(leaf, ei);
1113
1114         ptr = (unsigned long)(ei + 1);
1115         end = (unsigned long)ei + item_size;
1116
1117         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1118                 ptr += sizeof(struct btrfs_tree_block_info);
1119                 BUG_ON(ptr > end);
1120         } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
1121                 if (!(flags & BTRFS_EXTENT_FLAG_DATA)) {
1122                         return -EIO;
1123                 }
1124         }
1125
1126         err = -ENOENT;
1127         while (1) {
1128                 if (ptr >= end) {
1129                         WARN_ON(ptr > end);
1130                         break;
1131                 }
1132                 iref = (struct btrfs_extent_inline_ref *)ptr;
1133                 type = btrfs_extent_inline_ref_type(leaf, iref);
1134                 if (want < type)
1135                         break;
1136                 if (want > type) {
1137                         ptr += btrfs_extent_inline_ref_size(type);
1138                         continue;
1139                 }
1140
1141                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1142                         struct btrfs_extent_data_ref *dref;
1143                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1144                         if (match_extent_data_ref(leaf, dref, root_objectid,
1145                                                   owner, offset)) {
1146                                 err = 0;
1147                                 break;
1148                         }
1149                         if (hash_extent_data_ref_item(leaf, dref) <
1150                             hash_extent_data_ref(root_objectid, owner, offset))
1151                                 break;
1152                 } else {
1153                         u64 ref_offset;
1154                         ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1155                         if (parent > 0) {
1156                                 if (parent == ref_offset) {
1157                                         err = 0;
1158                                         break;
1159                                 }
1160                                 if (ref_offset < parent)
1161                                         break;
1162                         } else {
1163                                 if (root_objectid == ref_offset) {
1164                                         err = 0;
1165                                         break;
1166                                 }
1167                                 if (ref_offset < root_objectid)
1168                                         break;
1169                         }
1170                 }
1171                 ptr += btrfs_extent_inline_ref_size(type);
1172         }
1173         if (err == -ENOENT && insert) {
1174                 if (item_size + extra_size >=
1175                     BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1176                         err = -EAGAIN;
1177                         goto out;
1178                 }
1179                 /*
1180                  * To add new inline back ref, we have to make sure
1181                  * there is no corresponding back ref item.
1182                  * For simplicity, we just do not add new inline back
1183                  * ref if there is any back ref item.
1184                  */
1185                 if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
1186                     key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1187                         err = -EAGAIN;
1188                         goto out;
1189                 }
1190         }
1191         *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1192 out:
1193         return err;
1194 }
1195
1196 static int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1197                                 struct btrfs_root *root,
1198                                 struct btrfs_path *path,
1199                                 struct btrfs_extent_inline_ref *iref,
1200                                 u64 parent, u64 root_objectid,
1201                                 u64 owner, u64 offset, int refs_to_add)
1202 {
1203         struct extent_buffer *leaf;
1204         struct btrfs_extent_item *ei;
1205         unsigned long ptr;
1206         unsigned long end;
1207         unsigned long item_offset;
1208         u64 refs;
1209         int size;
1210         int type;
1211         int ret;
1212
1213         leaf = path->nodes[0];
1214         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1215         item_offset = (unsigned long)iref - (unsigned long)ei;
1216
1217         type = extent_ref_type(parent, owner);
1218         size = btrfs_extent_inline_ref_size(type);
1219
1220         ret = btrfs_extend_item(trans, root, path, size);
1221         BUG_ON(ret);
1222
1223         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1224         refs = btrfs_extent_refs(leaf, ei);
1225         refs += refs_to_add;
1226         btrfs_set_extent_refs(leaf, ei, refs);
1227
1228         ptr = (unsigned long)ei + item_offset;
1229         end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1230         if (ptr < end - size)
1231                 memmove_extent_buffer(leaf, ptr + size, ptr,
1232                                       end - size - ptr);
1233
1234         iref = (struct btrfs_extent_inline_ref *)ptr;
1235         btrfs_set_extent_inline_ref_type(leaf, iref, type);
1236         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1237                 struct btrfs_extent_data_ref *dref;
1238                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1239                 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1240                 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1241                 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1242                 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1243         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1244                 struct btrfs_shared_data_ref *sref;
1245                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1246                 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1247                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1248         } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1249                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1250         } else {
1251                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1252         }
1253         btrfs_mark_buffer_dirty(leaf);
1254         return 0;
1255 }
1256
1257 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1258                                  struct btrfs_root *root,
1259                                  struct btrfs_path *path,
1260                                  struct btrfs_extent_inline_ref **ref_ret,
1261                                  u64 bytenr, u64 num_bytes, u64 parent,
1262                                  u64 root_objectid, u64 owner, u64 offset)
1263 {
1264         int ret;
1265
1266         ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1267                                            bytenr, num_bytes, parent,
1268                                            root_objectid, owner, offset, 0);
1269         if (ret != -ENOENT)
1270                 return ret;
1271
1272         btrfs_release_path(root, path);
1273         *ref_ret = NULL;
1274
1275         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1276                 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1277                                             root_objectid);
1278         } else {
1279                 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1280                                              root_objectid, owner, offset);
1281         }
1282         return ret;
1283 }
1284
1285 static int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1286                                  struct btrfs_root *root,
1287                                  struct btrfs_path *path,
1288                                  struct btrfs_extent_inline_ref *iref,
1289                                  int refs_to_mod)
1290 {
1291         struct extent_buffer *leaf;
1292         struct btrfs_extent_item *ei;
1293         struct btrfs_extent_data_ref *dref = NULL;
1294         struct btrfs_shared_data_ref *sref = NULL;
1295         unsigned long ptr;
1296         unsigned long end;
1297         u32 item_size;
1298         int size;
1299         int type;
1300         int ret;
1301         u64 refs;
1302
1303         leaf = path->nodes[0];
1304         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1305         refs = btrfs_extent_refs(leaf, ei);
1306         WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1307         refs += refs_to_mod;
1308         btrfs_set_extent_refs(leaf, ei, refs);
1309
1310         type = btrfs_extent_inline_ref_type(leaf, iref);
1311
1312         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1313                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1314                 refs = btrfs_extent_data_ref_count(leaf, dref);
1315         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1316                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1317                 refs = btrfs_shared_data_ref_count(leaf, sref);
1318         } else {
1319                 refs = 1;
1320                 BUG_ON(refs_to_mod != -1);
1321         }
1322
1323         BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1324         refs += refs_to_mod;
1325
1326         if (refs > 0) {
1327                 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1328                         btrfs_set_extent_data_ref_count(leaf, dref, refs);
1329                 else
1330                         btrfs_set_shared_data_ref_count(leaf, sref, refs);
1331         } else {
1332                 size =  btrfs_extent_inline_ref_size(type);
1333                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1334                 ptr = (unsigned long)iref;
1335                 end = (unsigned long)ei + item_size;
1336                 if (ptr + size < end)
1337                         memmove_extent_buffer(leaf, ptr, ptr + size,
1338                                               end - ptr - size);
1339                 item_size -= size;
1340                 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1341                 BUG_ON(ret);
1342         }
1343         btrfs_mark_buffer_dirty(leaf);
1344         return 0;
1345 }
1346
1347 static int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1348                                  struct btrfs_root *root,
1349                                  struct btrfs_path *path,
1350                                  u64 bytenr, u64 num_bytes, u64 parent,
1351                                  u64 root_objectid, u64 owner,
1352                                  u64 offset, int refs_to_add)
1353 {
1354         struct btrfs_extent_inline_ref *iref;
1355         int ret;
1356
1357         ret = lookup_inline_extent_backref(trans, root, path, &iref,
1358                                            bytenr, num_bytes, parent,
1359                                            root_objectid, owner, offset, 1);
1360         if (ret == 0) {
1361                 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1362                 ret = update_inline_extent_backref(trans, root, path, iref,
1363                                                    refs_to_add);
1364         } else if (ret == -ENOENT) {
1365                 ret = setup_inline_extent_backref(trans, root, path, iref,
1366                                                   parent, root_objectid,
1367                                                   owner, offset, refs_to_add);
1368         }
1369         return ret;
1370 }
1371
1372 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1373                                  struct btrfs_root *root,
1374                                  struct btrfs_path *path,
1375                                  u64 bytenr, u64 parent, u64 root_objectid,
1376                                  u64 owner, u64 offset, int refs_to_add)
1377 {
1378         int ret;
1379
1380         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
1381                 ret = insert_extent_data_ref(trans, root, path, bytenr,
1382                                              parent, root_objectid,
1383                                              owner, offset, refs_to_add);
1384         } else {
1385                 BUG_ON(refs_to_add != 1);
1386                 ret = insert_tree_block_ref(trans, root, path, bytenr,
1387                                             parent, root_objectid);
1388         }
1389         return ret;
1390 }
1391
1392 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1393                                  struct btrfs_root *root,
1394                                  struct btrfs_path *path,
1395                                  struct btrfs_extent_inline_ref *iref,
1396                                  int refs_to_drop, int is_data)
1397 {
1398         int ret;
1399
1400         BUG_ON(!is_data && refs_to_drop != 1);
1401         if (iref) {
1402                 ret = update_inline_extent_backref(trans, root, path, iref,
1403                                                    -refs_to_drop);
1404         } else if (is_data) {
1405                 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1406         } else {
1407                 ret = btrfs_del_item(trans, root, path);
1408         }
1409         return ret;
1410 }
1411
1412 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1413                          struct btrfs_root *root,
1414                          u64 bytenr, u64 num_bytes, u64 parent,
1415                          u64 root_objectid, u64 owner, u64 offset)
1416 {
1417         struct btrfs_path *path;
1418         struct extent_buffer *leaf;
1419         struct btrfs_extent_item *item;
1420         u64 refs;
1421         int ret;
1422         int err = 0;
1423
1424         path = btrfs_alloc_path();
1425         if (!path)
1426                 return -ENOMEM;
1427
1428         path->reada = 1;
1429         path->leave_spinning = 1;
1430
1431         ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1432                                            path, bytenr, num_bytes, parent,
1433                                            root_objectid, owner, offset, 1);
1434         if (ret == 0)
1435                 goto out;
1436
1437         if (ret != -EAGAIN) {
1438                 err = ret;
1439                 goto out;
1440         }
1441         
1442         leaf = path->nodes[0];
1443         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1444         refs = btrfs_extent_refs(leaf, item);
1445         btrfs_set_extent_refs(leaf, item, refs + 1);
1446
1447         btrfs_mark_buffer_dirty(leaf);
1448         btrfs_release_path(root->fs_info->extent_root, path);
1449
1450         path->reada = 1;
1451         path->leave_spinning = 1;
1452
1453         /* now insert the actual backref */
1454         ret = insert_extent_backref(trans, root->fs_info->extent_root,
1455                                     path, bytenr, parent, root_objectid,
1456                                     owner, offset, 1);
1457         if (ret)
1458                 err = ret;
1459 out:
1460         btrfs_free_path(path);
1461         finish_current_insert(trans, root->fs_info->extent_root);
1462         del_pending_extents(trans, root->fs_info->extent_root);
1463         BUG_ON(err);
1464         return err;
1465 }
1466
1467 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1468                          struct btrfs_root *root)
1469 {
1470         finish_current_insert(trans, root->fs_info->extent_root);
1471         del_pending_extents(trans, root->fs_info->extent_root);
1472         return 0;
1473 }
1474
1475 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
1476                              struct btrfs_root *root, u64 bytenr,
1477                              u64 offset, int metadata, u64 *refs, u64 *flags)
1478 {
1479         struct btrfs_path *path;
1480         int ret;
1481         struct btrfs_key key;
1482         struct extent_buffer *l;
1483         struct btrfs_extent_item *item;
1484         u32 item_size;
1485         u64 num_refs;
1486         u64 extent_flags;
1487
1488         if (metadata &&
1489             !btrfs_fs_incompat(root->fs_info,
1490                                BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) {
1491                 offset = root->leafsize;
1492                 metadata = 0;
1493         }
1494
1495         path = btrfs_alloc_path();
1496         path->reada = 1;
1497
1498         key.objectid = bytenr;
1499         key.offset = offset;
1500         if (metadata)
1501                 key.type = BTRFS_METADATA_ITEM_KEY;
1502         else
1503                 key.type = BTRFS_EXTENT_ITEM_KEY;
1504
1505 again:
1506         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1507                                 0, 0);
1508         if (ret < 0)
1509                 goto out;
1510
1511         /*
1512          * Deal with the fact that we may have mixed SKINNY and normal refs.  If
1513          * we didn't find what we wanted check and see if we have a normal ref
1514          * right next to us, or re-search if we are on the edge of the leaf just
1515          * to make sure.
1516          */
1517         if (ret > 0 && metadata) {
1518                 if (path->slots) {
1519                         path->slots[0]--;
1520                         btrfs_item_key_to_cpu(path->nodes[0], &key,
1521                                               path->slots[0]);
1522                         if (key.objectid == bytenr &&
1523                             key.type == BTRFS_METADATA_ITEM_KEY)
1524                                 ret = 0;
1525                 }
1526
1527                 if (ret) {
1528                         btrfs_release_path(root, path);
1529                         key.type = BTRFS_EXTENT_ITEM_KEY;
1530                         key.offset = root->leafsize;
1531                         metadata = 0;
1532                         goto again;
1533                 }
1534         }
1535
1536         if (ret != 0) {
1537                 ret = -EIO;
1538                 goto out;
1539         }
1540
1541         l = path->nodes[0];
1542         item_size = btrfs_item_size_nr(l, path->slots[0]);
1543         if (item_size >= sizeof(*item)) {
1544                 item = btrfs_item_ptr(l, path->slots[0],
1545                                       struct btrfs_extent_item);
1546                 num_refs = btrfs_extent_refs(l, item);
1547                 extent_flags = btrfs_extent_flags(l, item);
1548         } else {
1549 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1550                         struct btrfs_extent_item_v0 *ei0;
1551                         BUG_ON(item_size != sizeof(*ei0));
1552                         ei0 = btrfs_item_ptr(l, path->slots[0],
1553                                              struct btrfs_extent_item_v0);
1554                         num_refs = btrfs_extent_refs_v0(l, ei0);
1555                         /* FIXME: this isn't correct for data */
1556                         extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
1557 #else
1558                         BUG();
1559 #endif
1560         }
1561         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1562         if (refs)
1563                 *refs = num_refs;
1564         if (flags)
1565                 *flags = extent_flags;
1566 out:
1567         btrfs_free_path(path);
1568         return 0;
1569 }
1570
1571 int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
1572                           struct btrfs_root *root,
1573                           u64 bytenr, int level, u64 flags)
1574 {
1575         struct btrfs_path *path;
1576         int ret;
1577         struct btrfs_key key;
1578         struct extent_buffer *l;
1579         struct btrfs_extent_item *item;
1580         u32 item_size;
1581         int skinny_metadata =
1582                 btrfs_fs_incompat(root->fs_info,
1583                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
1584
1585         path = btrfs_alloc_path();
1586         path->reada = 1;
1587
1588         key.objectid = bytenr;
1589         if (skinny_metadata) {
1590                 key.offset = level;
1591                 key.type = BTRFS_METADATA_ITEM_KEY;
1592         } else {
1593                 key.offset = root->leafsize;
1594                 key.type = BTRFS_EXTENT_ITEM_KEY;
1595         }
1596
1597 again:
1598         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1599                                 0, 0);
1600         if (ret < 0)
1601                 goto out;
1602
1603         if (ret > 0 && skinny_metadata) {
1604                 skinny_metadata = 0;
1605                 if (path->slots[0]--) {
1606                         path->slots[0]--;
1607                         btrfs_item_key_to_cpu(path->nodes[0], &key,
1608                                               path->slots[0]);
1609                         if (key.objectid == bytenr &&
1610                             key.offset == root->leafsize &&
1611                             key.type == BTRFS_EXTENT_ITEM_KEY)
1612                                 ret = 0;
1613                 }
1614                 if (ret) {
1615                         btrfs_release_path(root, path);
1616                         key.offset = root->leafsize;
1617                         key.type = BTRFS_EXTENT_ITEM_KEY;
1618                         goto again;
1619                 }
1620         }
1621
1622         if (ret != 0) {
1623                 btrfs_print_leaf(root, path->nodes[0]);
1624                 printk("failed to find block number %Lu\n",
1625                         (unsigned long long)bytenr);
1626                 BUG();
1627         }
1628         l = path->nodes[0];
1629         item_size = btrfs_item_size_nr(l, path->slots[0]);
1630 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1631         if (item_size < sizeof(*item)) {
1632                 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1633                                              path, (u64)-1, 0);
1634                 if (ret < 0)
1635                         goto out;
1636
1637                 l = path->nodes[0];
1638                 item_size = btrfs_item_size_nr(l, path->slots[0]);
1639         }
1640 #endif
1641         BUG_ON(item_size < sizeof(*item));
1642         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1643         flags |= btrfs_extent_flags(l, item);
1644         btrfs_set_extent_flags(l, item, flags);
1645 out:
1646         btrfs_free_path(path);
1647         finish_current_insert(trans, root->fs_info->extent_root);
1648         del_pending_extents(trans, root->fs_info->extent_root);
1649         return ret;
1650 }
1651
1652 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
1653                            struct btrfs_root *root,
1654                            struct extent_buffer *buf,
1655                            int record_parent, int inc)
1656 {
1657         u64 bytenr;
1658         u64 num_bytes;
1659         u64 parent;
1660         u64 ref_root;
1661         u32 nritems;
1662         struct btrfs_key key;
1663         struct btrfs_file_extent_item *fi;
1664         int i;
1665         int level;
1666         int ret = 0;
1667         int (*process_func)(struct btrfs_trans_handle *trans,
1668                             struct btrfs_root *root,
1669                             u64, u64, u64, u64, u64, u64);
1670
1671         ref_root = btrfs_header_owner(buf);
1672         nritems = btrfs_header_nritems(buf);
1673         level = btrfs_header_level(buf);
1674
1675         if (!root->ref_cows && level == 0)
1676                 return 0;
1677
1678         if (inc)
1679                 process_func = btrfs_inc_extent_ref;
1680         else
1681                 process_func = btrfs_free_extent;
1682
1683         if (record_parent)
1684                 parent = buf->start;
1685         else
1686                 parent = 0;
1687
1688         for (i = 0; i < nritems; i++) {
1689                 cond_resched();
1690                 if (level == 0) {
1691                         btrfs_item_key_to_cpu(buf, &key, i);
1692                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1693                                 continue;
1694                         fi = btrfs_item_ptr(buf, i,
1695                                             struct btrfs_file_extent_item);
1696                         if (btrfs_file_extent_type(buf, fi) ==
1697                             BTRFS_FILE_EXTENT_INLINE)
1698                                 continue;
1699                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1700                         if (bytenr == 0)
1701                                 continue;
1702                         
1703                         num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
1704                         key.offset -= btrfs_file_extent_offset(buf, fi);
1705                         ret = process_func(trans, root, bytenr, num_bytes,
1706                                            parent, ref_root, key.objectid,
1707                                            key.offset);
1708                         if (ret) {
1709                                 WARN_ON(1);
1710                                 goto fail;
1711                         }
1712                 } else {
1713                         bytenr = btrfs_node_blockptr(buf, i);
1714                         num_bytes = btrfs_level_size(root, level - 1);
1715                         ret = process_func(trans, root, bytenr, num_bytes,
1716                                            parent, ref_root, level - 1, 0);
1717                         if (ret) {
1718                                 WARN_ON(1);
1719                                 goto fail;
1720                         }
1721                 }
1722         }
1723         return 0;
1724 fail:
1725         WARN_ON(1);
1726         return ret;
1727 }
1728
1729 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1730                   struct extent_buffer *buf, int record_parent)
1731 {
1732         return __btrfs_mod_ref(trans, root, buf, record_parent, 1);
1733 }
1734
1735 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1736                   struct extent_buffer *buf, int record_parent)
1737 {
1738         return __btrfs_mod_ref(trans, root, buf, record_parent, 0);
1739 }
1740
1741 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1742                                  struct btrfs_root *root,
1743                                  struct btrfs_path *path,
1744                                  struct btrfs_block_group_cache *cache)
1745 {
1746         int ret;
1747         int pending_ret;
1748         struct btrfs_root *extent_root = root->fs_info->extent_root;
1749         unsigned long bi;
1750         struct extent_buffer *leaf;
1751
1752         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1753         if (ret < 0)
1754                 goto fail;
1755         BUG_ON(ret);
1756
1757         leaf = path->nodes[0];
1758         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1759         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1760         btrfs_mark_buffer_dirty(leaf);
1761         btrfs_release_path(extent_root, path);
1762 fail:
1763         finish_current_insert(trans, extent_root);
1764         pending_ret = del_pending_extents(trans, extent_root);
1765         if (ret)
1766                 return ret;
1767         if (pending_ret)
1768                 return pending_ret;
1769         return 0;
1770
1771 }
1772
1773 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1774                                    struct btrfs_root *root)
1775 {
1776         struct extent_io_tree *block_group_cache;
1777         struct btrfs_block_group_cache *cache;
1778         int ret;
1779         struct btrfs_path *path;
1780         u64 last = 0;
1781         u64 start;
1782         u64 end;
1783         u64 ptr;
1784
1785         block_group_cache = &root->fs_info->block_group_cache;
1786         path = btrfs_alloc_path();
1787         if (!path)
1788                 return -ENOMEM;
1789
1790         while(1) {
1791                 ret = find_first_extent_bit(block_group_cache, last,
1792                                             &start, &end, BLOCK_GROUP_DIRTY);
1793                 if (ret) {
1794                         if (last == 0)
1795                                 break;
1796                         last = 0;
1797                         continue;
1798                 }
1799
1800                 last = end + 1;
1801                 ret = get_state_private(block_group_cache, start, &ptr);
1802                 BUG_ON(ret);
1803
1804                 clear_extent_bits(block_group_cache, start, end,
1805                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1806
1807                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1808                 ret = write_one_cache_group(trans, root, path, cache);
1809         }
1810         btrfs_free_path(path);
1811         return 0;
1812 }
1813
1814 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1815                                                   u64 flags)
1816 {
1817         struct list_head *head = &info->space_info;
1818         struct list_head *cur;
1819         struct btrfs_space_info *found;
1820         list_for_each(cur, head) {
1821                 found = list_entry(cur, struct btrfs_space_info, list);
1822                 if (found->flags & flags)
1823                         return found;
1824         }
1825         return NULL;
1826
1827 }
1828
1829 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1830                              u64 total_bytes, u64 bytes_used,
1831                              struct btrfs_space_info **space_info)
1832 {
1833         struct btrfs_space_info *found;
1834
1835         found = __find_space_info(info, flags);
1836         if (found) {
1837                 found->total_bytes += total_bytes;
1838                 found->bytes_used += bytes_used;
1839                 if (found->total_bytes < found->bytes_used) {
1840                         fprintf(stderr, "warning, bad space info total_bytes "
1841                                 "%llu used %llu\n",
1842                                (unsigned long long)found->total_bytes,
1843                                (unsigned long long)found->bytes_used);
1844                 }
1845                 *space_info = found;
1846                 return 0;
1847         }
1848         found = kmalloc(sizeof(*found), GFP_NOFS);
1849         if (!found)
1850                 return -ENOMEM;
1851
1852         list_add(&found->list, &info->space_info);
1853         found->flags = flags;
1854         found->total_bytes = total_bytes;
1855         found->bytes_used = bytes_used;
1856         found->bytes_pinned = 0;
1857         found->full = 0;
1858         *space_info = found;
1859         return 0;
1860 }
1861
1862
1863 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1864 {
1865         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1866                                    BTRFS_BLOCK_GROUP_RAID1 |
1867                                    BTRFS_BLOCK_GROUP_RAID10 |
1868                                    BTRFS_BLOCK_GROUP_RAID5 |
1869                                    BTRFS_BLOCK_GROUP_RAID6 |
1870                                    BTRFS_BLOCK_GROUP_DUP);
1871         if (extra_flags) {
1872                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1873                         fs_info->avail_data_alloc_bits |= extra_flags;
1874                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1875                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1876                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1877                         fs_info->avail_system_alloc_bits |= extra_flags;
1878         }
1879 }
1880
1881 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1882                           struct btrfs_root *extent_root, u64 alloc_bytes,
1883                           u64 flags)
1884 {
1885         struct btrfs_space_info *space_info;
1886         u64 thresh;
1887         u64 start;
1888         u64 num_bytes;
1889         int ret;
1890
1891         space_info = __find_space_info(extent_root->fs_info, flags);
1892         if (!space_info) {
1893                 ret = update_space_info(extent_root->fs_info, flags,
1894                                         0, 0, &space_info);
1895                 BUG_ON(ret);
1896         }
1897         BUG_ON(!space_info);
1898
1899         if (space_info->full)
1900                 return 0;
1901
1902         thresh = div_factor(space_info->total_bytes, 7);
1903         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1904             thresh)
1905                 return 0;
1906
1907         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes,
1908                                 space_info->flags);
1909         if (ret == -ENOSPC) {
1910                 space_info->full = 1;
1911                 return 0;
1912         }
1913
1914         BUG_ON(ret);
1915
1916         ret = btrfs_make_block_group(trans, extent_root, 0, space_info->flags,
1917                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1918         BUG_ON(ret);
1919         return 0;
1920 }
1921
1922 static int update_block_group(struct btrfs_trans_handle *trans,
1923                               struct btrfs_root *root,
1924                               u64 bytenr, u64 num_bytes, int alloc,
1925                               int mark_free)
1926 {
1927         struct btrfs_block_group_cache *cache;
1928         struct btrfs_fs_info *info = root->fs_info;
1929         u64 total = num_bytes;
1930         u64 old_val;
1931         u64 byte_in_group;
1932         u64 start;
1933         u64 end;
1934
1935         /* block accounting for super block */
1936         old_val = btrfs_super_bytes_used(info->super_copy);
1937         if (alloc)
1938                 old_val += num_bytes;
1939         else
1940                 old_val -= num_bytes;
1941         btrfs_set_super_bytes_used(info->super_copy, old_val);
1942
1943         /* block accounting for root item */
1944         old_val = btrfs_root_used(&root->root_item);
1945         if (alloc)
1946                 old_val += num_bytes;
1947         else
1948                 old_val -= num_bytes;
1949         btrfs_set_root_used(&root->root_item, old_val);
1950
1951         while(total) {
1952                 cache = btrfs_lookup_block_group(info, bytenr);
1953                 if (!cache) {
1954                         return -1;
1955                 }
1956                 byte_in_group = bytenr - cache->key.objectid;
1957                 WARN_ON(byte_in_group > cache->key.offset);
1958                 start = cache->key.objectid;
1959                 end = start + cache->key.offset - 1;
1960                 set_extent_bits(&info->block_group_cache, start, end,
1961                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1962
1963                 old_val = btrfs_block_group_used(&cache->item);
1964                 num_bytes = min(total, cache->key.offset - byte_in_group);
1965
1966                 if (alloc) {
1967                         old_val += num_bytes;
1968                         cache->space_info->bytes_used += num_bytes;
1969                 } else {
1970                         old_val -= num_bytes;
1971                         cache->space_info->bytes_used -= num_bytes;
1972                         if (mark_free) {
1973                                 set_extent_dirty(&info->free_space_cache,
1974                                                  bytenr, bytenr + num_bytes - 1,
1975                                                  GFP_NOFS);
1976                         }
1977                 }
1978                 btrfs_set_block_group_used(&cache->item, old_val);
1979                 total -= num_bytes;
1980                 bytenr += num_bytes;
1981         }
1982         return 0;
1983 }
1984
1985 static int update_pinned_extents(struct btrfs_root *root,
1986                                 u64 bytenr, u64 num, int pin)
1987 {
1988         u64 len;
1989         struct btrfs_block_group_cache *cache;
1990         struct btrfs_fs_info *fs_info = root->fs_info;
1991
1992         if (pin) {
1993                 set_extent_dirty(&fs_info->pinned_extents,
1994                                 bytenr, bytenr + num - 1, GFP_NOFS);
1995         } else {
1996                 clear_extent_dirty(&fs_info->pinned_extents,
1997                                 bytenr, bytenr + num - 1, GFP_NOFS);
1998         }
1999         while (num > 0) {
2000                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2001                 if (!cache) {
2002                         len = min((u64)root->sectorsize, num);
2003                         goto next;
2004                 }
2005                 WARN_ON(!cache);
2006                 len = min(num, cache->key.offset -
2007                           (bytenr - cache->key.objectid));
2008                 if (pin) {
2009                         cache->pinned += len;
2010                         cache->space_info->bytes_pinned += len;
2011                         fs_info->total_pinned += len;
2012                 } else {
2013                         cache->pinned -= len;
2014                         cache->space_info->bytes_pinned -= len;
2015                         fs_info->total_pinned -= len;
2016                 }
2017 next:
2018                 bytenr += len;
2019                 num -= len;
2020         }
2021         return 0;
2022 }
2023
2024 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
2025 {
2026         u64 last = 0;
2027         u64 start;
2028         u64 end;
2029         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
2030         int ret;
2031
2032         while(1) {
2033                 ret = find_first_extent_bit(pinned_extents, last,
2034                                             &start, &end, EXTENT_DIRTY);
2035                 if (ret)
2036                         break;
2037                 set_extent_dirty(copy, start, end, GFP_NOFS);
2038                 last = end + 1;
2039         }
2040         return 0;
2041 }
2042
2043 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2044                                struct btrfs_root *root,
2045                                struct extent_io_tree *unpin)
2046 {
2047         u64 start;
2048         u64 end;
2049         int ret;
2050         struct extent_io_tree *free_space_cache;
2051         free_space_cache = &root->fs_info->free_space_cache;
2052
2053         while(1) {
2054                 ret = find_first_extent_bit(unpin, 0, &start, &end,
2055                                             EXTENT_DIRTY);
2056                 if (ret)
2057                         break;
2058                 update_pinned_extents(root, start, end + 1 - start, 0);
2059                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2060                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
2061         }
2062         return 0;
2063 }
2064
2065 static int extent_root_pending_ops(struct btrfs_fs_info *info)
2066 {
2067         u64 start;
2068         u64 end;
2069         int ret;
2070
2071         ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2072                                     &end, EXTENT_LOCKED);
2073         if (!ret) {
2074                 ret = find_first_extent_bit(&info->pending_del, 0, &start, &end,
2075                                             EXTENT_LOCKED);
2076         }
2077         return ret == 0;
2078
2079 }
2080 static int finish_current_insert(struct btrfs_trans_handle *trans,
2081                                  struct btrfs_root *extent_root)
2082 {
2083         u64 start;
2084         u64 end;
2085         u64 priv;
2086         struct btrfs_fs_info *info = extent_root->fs_info;
2087         struct btrfs_path *path;
2088         struct pending_extent_op *extent_op;
2089         struct btrfs_key key;
2090         int ret;
2091         int skinny_metadata =
2092                 btrfs_fs_incompat(extent_root->fs_info,
2093                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
2094
2095         path = btrfs_alloc_path();
2096
2097         while(1) {
2098                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2099                                             &end, EXTENT_LOCKED);
2100                 if (ret)
2101                         break;
2102
2103                 ret = get_state_private(&info->extent_ins, start, &priv);
2104                 BUG_ON(ret);
2105                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2106
2107                 if (extent_op->type == PENDING_EXTENT_INSERT) {
2108                         key.objectid = start;
2109                         if (skinny_metadata) {
2110                                 key.offset = extent_op->level;
2111                                 key.type = BTRFS_METADATA_ITEM_KEY;
2112                         } else {
2113                                 key.offset = extent_op->num_bytes;
2114                                 key.type = BTRFS_EXTENT_ITEM_KEY;
2115                         }
2116                         ret = alloc_reserved_tree_block(trans, extent_root,
2117                                                 extent_root->root_key.objectid,
2118                                                 trans->transid,
2119                                                 extent_op->flags,
2120                                                 &extent_op->key,
2121                                                 extent_op->level, &key);
2122                 } else {
2123                         BUG_ON(1);
2124                 }
2125
2126                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
2127                                   GFP_NOFS);
2128                 kfree(extent_op);
2129         }
2130         btrfs_free_path(path);
2131         return 0;
2132 }
2133
2134 static int pin_down_bytes(struct btrfs_trans_handle *trans,
2135                           struct btrfs_root *root,
2136                           u64 bytenr, u64 num_bytes, int is_data)
2137 {
2138         int err = 0;
2139         struct extent_buffer *buf;
2140
2141         if (is_data)
2142                 goto pinit;
2143
2144         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
2145         if (!buf)
2146                 goto pinit;
2147
2148         /* we can reuse a block if it hasn't been written
2149          * and it is from this transaction.  We can't
2150          * reuse anything from the tree log root because
2151          * it has tiny sub-transactions.
2152          */
2153         if (btrfs_buffer_uptodate(buf, 0)) {
2154                 u64 header_owner = btrfs_header_owner(buf);
2155                 u64 header_transid = btrfs_header_generation(buf);
2156                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2157                     header_transid == trans->transid &&
2158                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2159                         clean_tree_block(NULL, root, buf);
2160                         free_extent_buffer(buf);
2161                         return 1;
2162                 }
2163         }
2164         free_extent_buffer(buf);
2165 pinit:
2166         update_pinned_extents(root, bytenr, num_bytes, 1);
2167
2168         BUG_ON(err < 0);
2169         return 0;
2170 }
2171
2172 void btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2173                        u64 bytenr, u64 num_bytes)
2174 {
2175         update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 1);
2176 }
2177
2178 void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
2179                         u64 bytenr, u64 num_bytes)
2180 {
2181         update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 0);
2182 }
2183
2184 /*
2185  * remove an extent from the root, returns 0 on success
2186  */
2187 static int __free_extent(struct btrfs_trans_handle *trans,
2188                          struct btrfs_root *root,
2189                          u64 bytenr, u64 num_bytes, u64 parent,
2190                          u64 root_objectid, u64 owner_objectid,
2191                          u64 owner_offset, int refs_to_drop)
2192 {
2193
2194         struct btrfs_key key;
2195         struct btrfs_path *path;
2196         struct btrfs_extent_ops *ops = root->fs_info->extent_ops;
2197         struct btrfs_root *extent_root = root->fs_info->extent_root;
2198         struct extent_buffer *leaf;
2199         struct btrfs_extent_item *ei;
2200         struct btrfs_extent_inline_ref *iref;
2201         int ret;
2202         int is_data;
2203         int extent_slot = 0;
2204         int found_extent = 0;
2205         int num_to_del = 1;
2206         u32 item_size;
2207         u64 refs;
2208         int skinny_metadata =
2209                 btrfs_fs_incompat(extent_root->fs_info,
2210                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
2211
2212         if (root->fs_info->free_extent_hook) {
2213                 root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes,
2214                                                 parent, root_objectid, owner_objectid,
2215                                                 owner_offset, refs_to_drop);
2216
2217         }
2218         path = btrfs_alloc_path();
2219         if (!path)
2220                 return -ENOMEM;
2221
2222         path->reada = 1;
2223         path->leave_spinning = 1;
2224
2225         is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2226         if (is_data)
2227                 skinny_metadata = 0;
2228         BUG_ON(!is_data && refs_to_drop != 1);
2229
2230         ret = lookup_extent_backref(trans, extent_root, path, &iref,
2231                                     bytenr, num_bytes, parent,
2232                                     root_objectid, owner_objectid,
2233                                     owner_offset);
2234         if (ret == 0) {
2235                 extent_slot = path->slots[0];
2236                 while (extent_slot >= 0) {
2237                         btrfs_item_key_to_cpu(path->nodes[0], &key,
2238                                               extent_slot);
2239                         if (key.objectid != bytenr)
2240                                 break;
2241                         if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2242                             key.offset == num_bytes) {
2243                                 found_extent = 1;
2244                                 break;
2245                         }
2246                         if (key.type == BTRFS_METADATA_ITEM_KEY &&
2247                             key.offset == owner_objectid) {
2248                                 found_extent = 1;
2249                                 break;
2250                         }
2251                         if (path->slots[0] - extent_slot > 5)
2252                                 break;
2253                         extent_slot--;
2254                 }
2255 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2256                 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
2257                 if (found_extent && item_size < sizeof(*ei))
2258                         found_extent = 0;
2259 #endif
2260                 if (!found_extent) {
2261                         BUG_ON(iref);
2262                         ret = remove_extent_backref(trans, extent_root, path,
2263                                                     NULL, refs_to_drop,
2264                                                     is_data);
2265                         BUG_ON(ret);
2266                         btrfs_release_path(extent_root, path);
2267                         path->leave_spinning = 1;
2268
2269                         key.objectid = bytenr;
2270
2271                         if (skinny_metadata) {
2272                                 key.type = BTRFS_METADATA_ITEM_KEY;
2273                                 key.offset = owner_objectid;
2274                         } else {
2275                                 key.type = BTRFS_EXTENT_ITEM_KEY;
2276                                 key.offset = num_bytes;
2277                         }
2278
2279                         ret = btrfs_search_slot(trans, extent_root,
2280                                                 &key, path, -1, 1);
2281                         if (ret > 0 && skinny_metadata && path->slots[0]) {
2282                                 path->slots[0]--;
2283                                 btrfs_item_key_to_cpu(path->nodes[0],
2284                                                       &key,
2285                                                       path->slots[0]);
2286                                 if (key.objectid == bytenr &&
2287                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
2288                                     key.offset == num_bytes)
2289                                         ret = 0;
2290                         }
2291
2292                         if (ret > 0 && skinny_metadata) {
2293                                 skinny_metadata = 0;
2294                                 btrfs_release_path(extent_root, path);
2295                                 key.type = BTRFS_EXTENT_ITEM_KEY;
2296                                 key.offset = num_bytes;
2297                                 ret = btrfs_search_slot(trans, extent_root,
2298                                                         &key, path, -1, 1);
2299                         }
2300
2301                         if (ret) {
2302                                 printk(KERN_ERR "umm, got %d back from search"
2303                                        ", was looking for %llu\n", ret,
2304                                        (unsigned long long)bytenr);
2305                                 btrfs_print_leaf(extent_root, path->nodes[0]);
2306                         }
2307                         BUG_ON(ret);
2308                         extent_slot = path->slots[0];
2309                 }
2310         } else {
2311                 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2312                        "parent %llu root %llu  owner %llu offset %llu\n",
2313                        (unsigned long long)bytenr,
2314                        (unsigned long long)parent,
2315                        (unsigned long long)root_objectid,
2316                        (unsigned long long)owner_objectid,
2317                        (unsigned long long)owner_offset);
2318                 ret = -EIO;
2319                 goto fail;
2320         }
2321
2322         leaf = path->nodes[0];
2323         item_size = btrfs_item_size_nr(leaf, extent_slot);
2324 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2325         if (item_size < sizeof(*ei)) {
2326                 BUG_ON(found_extent || extent_slot != path->slots[0]);
2327                 ret = convert_extent_item_v0(trans, extent_root, path,
2328                                              owner_objectid, 0);
2329                 BUG_ON(ret < 0);
2330
2331                 btrfs_release_path(extent_root, path);
2332                 path->leave_spinning = 1;
2333
2334                 key.objectid = bytenr;
2335                 key.type = BTRFS_EXTENT_ITEM_KEY;
2336                 key.offset = num_bytes;
2337
2338                 ret = btrfs_search_slot(trans, extent_root, &key, path,
2339                                         -1, 1);
2340                 if (ret) {
2341                         printk(KERN_ERR "umm, got %d back from search"
2342                                ", was looking for %llu\n", ret,
2343                                (unsigned long long)bytenr);
2344                         btrfs_print_leaf(extent_root, path->nodes[0]);
2345                 }
2346                 BUG_ON(ret);
2347                 extent_slot = path->slots[0];
2348                 leaf = path->nodes[0];
2349                 item_size = btrfs_item_size_nr(leaf, extent_slot);
2350         }
2351 #endif
2352         BUG_ON(item_size < sizeof(*ei));
2353         ei = btrfs_item_ptr(leaf, extent_slot,
2354                             struct btrfs_extent_item);
2355         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
2356             key.type == BTRFS_EXTENT_ITEM_KEY) {
2357                 struct btrfs_tree_block_info *bi;
2358                 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
2359                 bi = (struct btrfs_tree_block_info *)(ei + 1);
2360                 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
2361         }
2362
2363         refs = btrfs_extent_refs(leaf, ei);
2364         BUG_ON(refs < refs_to_drop);
2365         refs -= refs_to_drop;
2366
2367         if (refs > 0) {
2368                 /*
2369                  * In the case of inline back ref, reference count will
2370                  * be updated by remove_extent_backref
2371                  */
2372                 if (iref) {
2373                         BUG_ON(!found_extent);
2374                 } else {
2375                         btrfs_set_extent_refs(leaf, ei, refs);
2376                         btrfs_mark_buffer_dirty(leaf);
2377                 }
2378                 if (found_extent) {
2379                         ret = remove_extent_backref(trans, extent_root, path,
2380                                                     iref, refs_to_drop,
2381                                                     is_data);
2382                         BUG_ON(ret);
2383                 }
2384         } else {
2385                 int mark_free = 0;
2386                 int pin = 1;
2387
2388                 if (found_extent) {
2389                         BUG_ON(is_data && refs_to_drop !=
2390                                extent_data_ref_count(root, path, iref));
2391                         if (iref) {
2392                                 BUG_ON(path->slots[0] != extent_slot);
2393                         } else {
2394                                 BUG_ON(path->slots[0] != extent_slot + 1);
2395                                 path->slots[0] = extent_slot;
2396                                 num_to_del = 2;
2397                         }
2398                 }
2399
2400                 if (ops && ops->free_extent) {
2401                         ret = ops->free_extent(root, bytenr, num_bytes);
2402                         if (ret > 0) {
2403                                 pin = 0;
2404                                 mark_free = 0;
2405                         }
2406                 }
2407
2408                 if (pin) {
2409                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
2410                                              is_data);
2411                         if (ret > 0)
2412                                 mark_free = 1;
2413                         BUG_ON(ret < 0);
2414                 }
2415
2416                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2417                                       num_to_del);
2418                 BUG_ON(ret);
2419                 btrfs_release_path(extent_root, path);
2420
2421                 if (is_data) {
2422                         ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2423                         BUG_ON(ret);
2424                 }
2425
2426                 update_block_group(trans, root, bytenr, num_bytes, 0, mark_free);
2427         }
2428 fail:
2429         btrfs_free_path(path);
2430         finish_current_insert(trans, extent_root);
2431         return ret;
2432 }
2433
2434 /*
2435  * find all the blocks marked as pending in the radix tree and remove
2436  * them from the extent map
2437  */
2438 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
2439                                btrfs_root *extent_root)
2440 {
2441         int ret;
2442         int err = 0;
2443         u64 start;
2444         u64 end;
2445         u64 priv;
2446         struct extent_io_tree *pending_del;
2447         struct extent_io_tree *extent_ins;
2448         struct pending_extent_op *extent_op;
2449
2450         extent_ins = &extent_root->fs_info->extent_ins;
2451         pending_del = &extent_root->fs_info->pending_del;
2452
2453         while(1) {
2454                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2455                                             EXTENT_LOCKED);
2456                 if (ret)
2457                         break;
2458
2459                 ret = get_state_private(pending_del, start, &priv);
2460                 BUG_ON(ret);
2461                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2462
2463                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
2464                                   GFP_NOFS);
2465
2466                 if (!test_range_bit(extent_ins, start, end,
2467                                     EXTENT_LOCKED, 0)) {
2468                         ret = __free_extent(trans, extent_root,
2469                                             start, end + 1 - start, 0,
2470                                             extent_root->root_key.objectid,
2471                                             extent_op->level, 0, 1);
2472                         kfree(extent_op);
2473                 } else {
2474                         kfree(extent_op);
2475                         ret = get_state_private(extent_ins, start, &priv);
2476                         BUG_ON(ret);
2477                         extent_op = (struct pending_extent_op *)
2478                                                         (unsigned long)priv;
2479
2480                         clear_extent_bits(extent_ins, start, end,
2481                                           EXTENT_LOCKED, GFP_NOFS);
2482
2483                         if (extent_op->type == PENDING_BACKREF_UPDATE)
2484                                 BUG_ON(1);
2485
2486                         kfree(extent_op);
2487                 }
2488                 if (ret)
2489                         err = ret;
2490         }
2491         return err;
2492 }
2493
2494 /*
2495  * remove an extent from the root, returns 0 on success
2496  */
2497
2498 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2499                       struct btrfs_root *root,
2500                       u64 bytenr, u64 num_bytes, u64 parent,
2501                       u64 root_objectid, u64 owner, u64 offset)
2502 {
2503         struct btrfs_root *extent_root = root->fs_info->extent_root;
2504         int pending_ret;
2505         int ret;
2506
2507         WARN_ON(num_bytes < root->sectorsize);
2508         if (root == extent_root) {
2509                 struct pending_extent_op *extent_op;
2510
2511                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2512                 BUG_ON(!extent_op);
2513
2514                 extent_op->type = PENDING_EXTENT_DELETE;
2515                 extent_op->bytenr = bytenr;
2516                 extent_op->num_bytes = num_bytes;
2517                 extent_op->level = (int)owner;
2518
2519                 set_extent_bits(&root->fs_info->pending_del,
2520                                 bytenr, bytenr + num_bytes - 1,
2521                                 EXTENT_LOCKED, GFP_NOFS);
2522                 set_state_private(&root->fs_info->pending_del,
2523                                   bytenr, (unsigned long)extent_op);
2524                 return 0;
2525         }
2526         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2527                             root_objectid, owner, offset, 1);
2528         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
2529         return ret ? ret : pending_ret;
2530 }
2531
2532 static u64 stripe_align(struct btrfs_root *root, u64 val)
2533 {
2534         u64 mask = ((u64)root->stripesize - 1);
2535         u64 ret = (val + mask) & ~mask;
2536         return ret;
2537 }
2538
2539 /*
2540  * walks the btree of allocated extents and find a hole of a given size.
2541  * The key ins is changed to record the hole:
2542  * ins->objectid == block start
2543  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2544  * ins->offset == number of blocks
2545  * Any available blocks before search_start are skipped.
2546  */
2547 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2548                                      struct btrfs_root *orig_root,
2549                                      u64 num_bytes, u64 empty_size,
2550                                      u64 search_start, u64 search_end,
2551                                      u64 hint_byte, struct btrfs_key *ins,
2552                                      u64 exclude_start, u64 exclude_nr,
2553                                      int data)
2554 {
2555         int ret;
2556         u64 orig_search_start = search_start;
2557         struct btrfs_root * root = orig_root->fs_info->extent_root;
2558         struct btrfs_fs_info *info = root->fs_info;
2559         u64 total_needed = num_bytes;
2560         struct btrfs_block_group_cache *block_group;
2561         int full_scan = 0;
2562         int wrapped = 0;
2563
2564         WARN_ON(num_bytes < root->sectorsize);
2565         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2566
2567         search_start = stripe_align(root, search_start);
2568
2569         if (hint_byte) {
2570                 block_group = btrfs_lookup_first_block_group(info, hint_byte);
2571                 if (!block_group)
2572                         hint_byte = search_start;
2573                 block_group = btrfs_find_block_group(root, block_group,
2574                                                      hint_byte, data, 1);
2575         } else {
2576                 block_group = btrfs_find_block_group(root,
2577                                                      trans->block_group,
2578                                                      search_start, data, 1);
2579         }
2580
2581         total_needed += empty_size;
2582
2583 check_failed:
2584         search_start = stripe_align(root, search_start);
2585         if (!block_group) {
2586                 block_group = btrfs_lookup_first_block_group(info,
2587                                                              search_start);
2588                 if (!block_group)
2589                         block_group = btrfs_lookup_first_block_group(info,
2590                                                        orig_search_start);
2591         }
2592         ret = find_search_start(root, &block_group, &search_start,
2593                                 total_needed, data);
2594         if (ret)
2595                 goto new_group;
2596
2597         ins->objectid = search_start;
2598         ins->offset = num_bytes;
2599
2600         if (ins->objectid + num_bytes >
2601             block_group->key.objectid + block_group->key.offset) {
2602                 search_start = block_group->key.objectid +
2603                         block_group->key.offset;
2604                 goto new_group;
2605         }
2606
2607         if (test_range_bit(&info->extent_ins, ins->objectid,
2608                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
2609                 search_start = ins->objectid + num_bytes;
2610                 goto new_group;
2611         }
2612
2613         if (test_range_bit(&info->pinned_extents, ins->objectid,
2614                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2615                 search_start = ins->objectid + num_bytes;
2616                 goto new_group;
2617         }
2618
2619         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2620             ins->objectid < exclude_start + exclude_nr)) {
2621                 search_start = exclude_start + exclude_nr;
2622                 goto new_group;
2623         }
2624
2625         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
2626                 block_group = btrfs_lookup_block_group(info, ins->objectid);
2627                 if (block_group)
2628                         trans->block_group = block_group;
2629         }
2630         ins->offset = num_bytes;
2631         return 0;
2632
2633 new_group:
2634         block_group = btrfs_lookup_first_block_group(info, search_start);
2635         if (!block_group) {
2636                 search_start = orig_search_start;
2637                 if (full_scan) {
2638                         ret = -ENOSPC;
2639                         goto error;
2640                 }
2641                 if (wrapped) {
2642                         if (!full_scan)
2643                                 total_needed -= empty_size;
2644                         full_scan = 1;
2645                 } else
2646                         wrapped = 1;
2647         }
2648         cond_resched();
2649         block_group = btrfs_find_block_group(root, block_group,
2650                                              search_start, data, 0);
2651         goto check_failed;
2652
2653 error:
2654         return ret;
2655 }
2656
2657 static int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2658                                 struct btrfs_root *root,
2659                                 u64 num_bytes, u64 empty_size,
2660                                 u64 hint_byte, u64 search_end,
2661                                 struct btrfs_key *ins, int data)
2662 {
2663         int ret;
2664         u64 search_start = 0;
2665         u64 alloc_profile;
2666         struct btrfs_fs_info *info = root->fs_info;
2667
2668         if (info->extent_ops) {
2669                 struct btrfs_extent_ops *ops = info->extent_ops;
2670                 ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
2671                 BUG_ON(ret);
2672                 goto found;
2673         }
2674
2675         if (data) {
2676                 alloc_profile = info->avail_data_alloc_bits &
2677                                 info->data_alloc_profile;
2678                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2679         } else if ((info->system_allocs > 0 || root == info->chunk_root) &&
2680                    info->system_allocs >= 0) {
2681                 alloc_profile = info->avail_system_alloc_bits &
2682                                 info->system_alloc_profile;
2683                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2684         } else {
2685                 alloc_profile = info->avail_metadata_alloc_bits &
2686                                 info->metadata_alloc_profile;
2687                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2688         }
2689
2690         if (root->ref_cows) {
2691                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2692                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2693                                              num_bytes,
2694                                              BTRFS_BLOCK_GROUP_METADATA);
2695                         BUG_ON(ret);
2696                 }
2697                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2698                                      num_bytes + 2 * 1024 * 1024, data);
2699                 BUG_ON(ret);
2700         }
2701
2702         WARN_ON(num_bytes < root->sectorsize);
2703         ret = find_free_extent(trans, root, num_bytes, empty_size,
2704                                search_start, search_end, hint_byte, ins,
2705                                trans->alloc_exclude_start,
2706                                trans->alloc_exclude_nr, data);
2707         BUG_ON(ret);
2708 found:
2709         clear_extent_dirty(&root->fs_info->free_space_cache,
2710                            ins->objectid, ins->objectid + ins->offset - 1,
2711                            GFP_NOFS);
2712         return ret;
2713 }
2714
2715 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
2716                                      struct btrfs_root *root,
2717                                      u64 root_objectid, u64 generation,
2718                                      u64 flags, struct btrfs_disk_key *key,
2719                                      int level, struct btrfs_key *ins)
2720 {
2721         int ret;
2722         struct btrfs_fs_info *fs_info = root->fs_info;
2723         struct btrfs_extent_item *extent_item;
2724         struct btrfs_tree_block_info *block_info;
2725         struct btrfs_extent_inline_ref *iref;
2726         struct btrfs_path *path;
2727         struct extent_buffer *leaf;
2728         u32 size = sizeof(*extent_item) + sizeof(*iref);
2729         int skinny_metadata =
2730                 btrfs_fs_incompat(fs_info,
2731                                   BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
2732
2733         if (!skinny_metadata)
2734                 size += sizeof(*block_info);
2735
2736         path = btrfs_alloc_path();
2737         BUG_ON(!path);
2738
2739         path->leave_spinning = 1;
2740         ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
2741                                       ins, size);
2742         BUG_ON(ret);
2743
2744         leaf = path->nodes[0];
2745         extent_item = btrfs_item_ptr(leaf, path->slots[0],
2746                                      struct btrfs_extent_item);
2747         btrfs_set_extent_refs(leaf, extent_item, 1);
2748         btrfs_set_extent_generation(leaf, extent_item, generation);
2749         btrfs_set_extent_flags(leaf, extent_item,
2750                                flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
2751
2752         if (skinny_metadata) {
2753                 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
2754         } else {
2755                 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
2756                 btrfs_set_tree_block_key(leaf, block_info, key);
2757                 btrfs_set_tree_block_level(leaf, block_info, level);
2758                 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
2759         }
2760
2761         btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
2762         btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
2763
2764         btrfs_mark_buffer_dirty(leaf);
2765         btrfs_free_path(path);
2766
2767         ret = update_block_group(trans, root, ins->objectid, root->leafsize,
2768                                  1, 0);
2769         return 0;
2770 }
2771
2772 static int alloc_tree_block(struct btrfs_trans_handle *trans,
2773                             struct btrfs_root *root, u64 num_bytes,
2774                             u64 root_objectid, u64 generation,
2775                             u64 flags, struct btrfs_disk_key *key,
2776                             int level, u64 empty_size, u64 hint_byte,
2777                             u64 search_end, struct btrfs_key *ins)
2778 {
2779         int ret;
2780         ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size,
2781                                    hint_byte, search_end, ins, 0);
2782         BUG_ON(ret);
2783
2784         if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) {
2785                 struct pending_extent_op *extent_op;
2786
2787                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2788                 BUG_ON(!extent_op);
2789
2790                 extent_op->type = PENDING_EXTENT_INSERT;
2791                 extent_op->bytenr = ins->objectid;
2792                 extent_op->num_bytes = ins->offset;
2793                 extent_op->level = level;
2794                 extent_op->flags = flags;
2795                 memcpy(&extent_op->key, key, sizeof(*key));
2796
2797                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2798                                 ins->objectid + ins->offset - 1,
2799                                 EXTENT_LOCKED, GFP_NOFS);
2800                 set_state_private(&root->fs_info->extent_ins,
2801                                   ins->objectid, (unsigned long)extent_op);
2802         } else {
2803                 if (btrfs_fs_incompat(root->fs_info,
2804                                 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) {
2805                         ins->offset = level;
2806                         ins->type = BTRFS_METADATA_ITEM_KEY;
2807                 }
2808                 ret = alloc_reserved_tree_block(trans, root, root_objectid,
2809                                                 generation, flags,
2810                                                 key, level, ins);
2811                 finish_current_insert(trans, root->fs_info->extent_root);
2812                 del_pending_extents(trans, root->fs_info->extent_root);
2813         }
2814         return ret;
2815 }
2816
2817 /*
2818  * helper function to allocate a block for a given tree
2819  * returns the tree buffer or NULL.
2820  */
2821 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2822                                         struct btrfs_root *root,
2823                                         u32 blocksize, u64 root_objectid,
2824                                         struct btrfs_disk_key *key, int level,
2825                                         u64 hint, u64 empty_size)
2826 {
2827         struct btrfs_key ins;
2828         int ret;
2829         struct extent_buffer *buf;
2830
2831         ret = alloc_tree_block(trans, root, blocksize, root_objectid,
2832                                trans->transid, 0, key, level,
2833                                empty_size, hint, (u64)-1, &ins);
2834         if (ret) {
2835                 BUG_ON(ret > 0);
2836                 return ERR_PTR(ret);
2837         }
2838
2839         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2840         if (!buf) {
2841                 btrfs_free_extent(trans, root, ins.objectid, ins.offset,
2842                                   0, root->root_key.objectid, level, 0);
2843                 BUG_ON(1);
2844                 return ERR_PTR(-ENOMEM);
2845         }
2846         btrfs_set_buffer_uptodate(buf);
2847         trans->blocks_used++;
2848
2849         return buf;
2850 }
2851
2852 #if 0
2853
2854 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2855                                   struct btrfs_root *root,
2856                                   struct extent_buffer *leaf)
2857 {
2858         u64 leaf_owner;
2859         u64 leaf_generation;
2860         struct btrfs_key key;
2861         struct btrfs_file_extent_item *fi;
2862         int i;
2863         int nritems;
2864         int ret;
2865
2866         BUG_ON(!btrfs_is_leaf(leaf));
2867         nritems = btrfs_header_nritems(leaf);
2868         leaf_owner = btrfs_header_owner(leaf);
2869         leaf_generation = btrfs_header_generation(leaf);
2870
2871         for (i = 0; i < nritems; i++) {
2872                 u64 disk_bytenr;
2873
2874                 btrfs_item_key_to_cpu(leaf, &key, i);
2875                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2876                         continue;
2877                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2878                 if (btrfs_file_extent_type(leaf, fi) ==
2879                     BTRFS_FILE_EXTENT_INLINE)
2880                         continue;
2881                 /*
2882                  * FIXME make sure to insert a trans record that
2883                  * repeats the snapshot del on crash
2884                  */
2885                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2886                 if (disk_bytenr == 0)
2887                         continue;
2888                 ret = btrfs_free_extent(trans, root, disk_bytenr,
2889                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2890                                 leaf->start, leaf_owner, leaf_generation,
2891                                 key.objectid, 0);
2892                 BUG_ON(ret);
2893         }
2894         return 0;
2895 }
2896
2897 static void noinline reada_walk_down(struct btrfs_root *root,
2898                                      struct extent_buffer *node,
2899                                      int slot)
2900 {
2901         u64 bytenr;
2902         u64 last = 0;
2903         u32 nritems;
2904         u32 refs;
2905         u32 blocksize;
2906         int ret;
2907         int i;
2908         int level;
2909         int skipped = 0;
2910
2911         nritems = btrfs_header_nritems(node);
2912         level = btrfs_header_level(node);
2913         if (level)
2914                 return;
2915
2916         for (i = slot; i < nritems && skipped < 32; i++) {
2917                 bytenr = btrfs_node_blockptr(node, i);
2918                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2919                              (last > bytenr && last - bytenr > 32 * 1024))) {
2920                         skipped++;
2921                         continue;
2922                 }
2923                 blocksize = btrfs_level_size(root, level - 1);
2924                 if (i != slot) {
2925                         ret = btrfs_lookup_extent_ref(NULL, root, bytenr,
2926                                                       blocksize, &refs);
2927                         BUG_ON(ret);
2928                         if (refs != 1) {
2929                                 skipped++;
2930                                 continue;
2931                         }
2932                 }
2933                 mutex_unlock(&root->fs_info->fs_mutex);
2934                 ret = readahead_tree_block(root, bytenr, blocksize,
2935                                            btrfs_node_ptr_generation(node, i));
2936                 last = bytenr + blocksize;
2937                 cond_resched();
2938                 mutex_lock(&root->fs_info->fs_mutex);
2939                 if (ret)
2940                         break;
2941         }
2942 }
2943
2944 /*
2945  * helper function for drop_snapshot, this walks down the tree dropping ref
2946  * counts as it goes.
2947  */
2948 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2949                                    struct btrfs_root *root,
2950                                    struct btrfs_path *path, int *level)
2951 {
2952         u64 root_owner;
2953         u64 root_gen;
2954         u64 bytenr;
2955         u64 ptr_gen;
2956         struct extent_buffer *next;
2957         struct extent_buffer *cur;
2958         struct extent_buffer *parent;
2959         u32 blocksize;
2960         int ret;
2961         u32 refs;
2962
2963         WARN_ON(*level < 0);
2964         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2965         ret = btrfs_lookup_extent_ref(trans, root,
2966                                       path->nodes[*level]->start,
2967                                       path->nodes[*level]->len, &refs);
2968         BUG_ON(ret);
2969         if (refs > 1)
2970                 goto out;
2971
2972         /*
2973          * walk down to the last node level and free all the leaves
2974          */
2975         while(*level >= 0) {
2976                 WARN_ON(*level < 0);
2977                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2978                 cur = path->nodes[*level];
2979
2980                 if (btrfs_header_level(cur) != *level)
2981                         WARN_ON(1);
2982
2983                 if (path->slots[*level] >=
2984                     btrfs_header_nritems(cur))
2985                         break;
2986                 if (*level == 0) {
2987                         ret = drop_leaf_ref(trans, root, cur);
2988                         BUG_ON(ret);
2989                         break;
2990                 }
2991                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2992                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2993                 blocksize = btrfs_level_size(root, *level - 1);
2994                 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
2995                                               &refs);
2996                 BUG_ON(ret);
2997                 if (refs != 1) {
2998                         parent = path->nodes[*level];
2999                         root_owner = btrfs_header_owner(parent);
3000                         root_gen = btrfs_header_generation(parent);
3001                         path->slots[*level]++;
3002                         ret = btrfs_free_extent(trans, root, bytenr, blocksize,
3003                                                 parent->start, root_owner,
3004                                                 root_gen, *level - 1, 1);
3005                         BUG_ON(ret);
3006                         continue;
3007                 }
3008                 next = btrfs_find_tree_block(root, bytenr, blocksize);
3009                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
3010                         free_extent_buffer(next);
3011                         reada_walk_down(root, cur, path->slots[*level]);
3012                         mutex_unlock(&root->fs_info->fs_mutex);
3013                         next = read_tree_block(root, bytenr, blocksize,
3014                                                ptr_gen);
3015                         mutex_lock(&root->fs_info->fs_mutex);
3016                 }
3017                 WARN_ON(*level <= 0);
3018                 if (path->nodes[*level-1])
3019                         free_extent_buffer(path->nodes[*level-1]);
3020                 path->nodes[*level-1] = next;
3021                 *level = btrfs_header_level(next);
3022                 path->slots[*level] = 0;
3023         }
3024 out:
3025         WARN_ON(*level < 0);
3026         WARN_ON(*level >= BTRFS_MAX_LEVEL);
3027
3028         if (path->nodes[*level] == root->node) {
3029                 root_owner = root->root_key.objectid;
3030                 parent = path->nodes[*level];
3031         } else {
3032                 parent = path->nodes[*level + 1];
3033                 root_owner = btrfs_header_owner(parent);
3034         }
3035
3036         root_gen = btrfs_header_generation(parent);
3037         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
3038                                 path->nodes[*level]->len, parent->start,
3039                                 root_owner, root_gen, *level, 1);
3040         free_extent_buffer(path->nodes[*level]);
3041         path->nodes[*level] = NULL;
3042         *level += 1;
3043         BUG_ON(ret);
3044         return 0;
3045 }
3046
3047 /*
3048  * helper for dropping snapshots.  This walks back up the tree in the path
3049  * to find the first node higher up where we haven't yet gone through
3050  * all the slots
3051  */
3052 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3053                                  struct btrfs_root *root,
3054                                  struct btrfs_path *path, int *level)
3055 {
3056         u64 root_owner;
3057         u64 root_gen;
3058         struct btrfs_root_item *root_item = &root->root_item;
3059         int i;
3060         int slot;
3061         int ret;
3062
3063         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
3064                 slot = path->slots[i];
3065                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3066                         struct extent_buffer *node;
3067                         struct btrfs_disk_key disk_key;
3068                         node = path->nodes[i];
3069                         path->slots[i]++;
3070                         *level = i;
3071                         WARN_ON(*level == 0);
3072                         btrfs_node_key(node, &disk_key, path->slots[i]);
3073                         memcpy(&root_item->drop_progress,
3074                                &disk_key, sizeof(disk_key));
3075                         root_item->drop_level = i;
3076                         return 0;
3077                 } else {
3078                         struct extent_buffer *parent;
3079                         if (path->nodes[*level] == root->node)
3080                                 parent = path->nodes[*level];
3081                         else
3082                                 parent = path->nodes[*level + 1];
3083
3084                         root_owner = btrfs_header_owner(parent);
3085                         root_gen = btrfs_header_generation(parent);
3086                         ret = btrfs_free_extent(trans, root,
3087                                                 path->nodes[*level]->start,
3088                                                 path->nodes[*level]->len,
3089                                                 parent->start, root_owner,
3090                                                 root_gen, *level, 1);
3091                         BUG_ON(ret);
3092                         free_extent_buffer(path->nodes[*level]);
3093                         path->nodes[*level] = NULL;
3094                         *level = i + 1;
3095                 }
3096         }
3097         return 1;
3098 }
3099
3100 /*
3101  * drop the reference count on the tree rooted at 'snap'.  This traverses
3102  * the tree freeing any blocks that have a ref count of zero after being
3103  * decremented.
3104  */
3105 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3106                         *root)
3107 {
3108         int ret = 0;
3109         int wret;
3110         int level;
3111         struct btrfs_path *path;
3112         int i;
3113         int orig_level;
3114         struct btrfs_root_item *root_item = &root->root_item;
3115
3116         path = btrfs_alloc_path();
3117         BUG_ON(!path);
3118
3119         level = btrfs_header_level(root->node);
3120         orig_level = level;
3121         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3122                 path->nodes[level] = root->node;
3123                 extent_buffer_get(root->node);
3124                 path->slots[level] = 0;
3125         } else {
3126                 struct btrfs_key key;
3127                 struct btrfs_disk_key found_key;
3128                 struct extent_buffer *node;
3129
3130                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3131                 level = root_item->drop_level;
3132                 path->lowest_level = level;
3133                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3134                 if (wret < 0) {
3135                         ret = wret;
3136                         goto out;
3137                 }
3138                 node = path->nodes[level];
3139                 btrfs_node_key(node, &found_key, path->slots[level]);
3140                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3141                                sizeof(found_key)));
3142         }
3143         while(1) {
3144                 wret = walk_down_tree(trans, root, path, &level);
3145                 if (wret < 0)
3146                         ret = wret;
3147                 if (wret != 0)
3148                         break;
3149
3150                 wret = walk_up_tree(trans, root, path, &level);
3151                 if (wret < 0)
3152                         ret = wret;
3153                 if (wret != 0)
3154                         break;
3155                 /*
3156                 ret = -EAGAIN;
3157                 break;
3158                 */
3159         }
3160         for (i = 0; i <= orig_level; i++) {
3161                 if (path->nodes[i]) {
3162                         free_extent_buffer(path->nodes[i]);
3163                         path->nodes[i] = NULL;
3164                 }
3165         }
3166 out:
3167         btrfs_free_path(path);
3168         return ret;
3169 }
3170
3171 #endif
3172
3173 int btrfs_free_block_groups(struct btrfs_fs_info *info)
3174 {
3175         struct btrfs_space_info *sinfo;
3176         struct btrfs_block_group_cache *cache;
3177         u64 start;
3178         u64 end;
3179         u64 ptr;
3180         int ret;
3181
3182         while(1) {
3183                 ret = find_first_extent_bit(&info->block_group_cache, 0,
3184                                             &start, &end, (unsigned int)-1);
3185                 if (ret)
3186                         break;
3187                 ret = get_state_private(&info->block_group_cache, start, &ptr);
3188                 if (!ret) {
3189                         cache = (struct btrfs_block_group_cache *)ptr;
3190                         if (cache->free_space_ctl) {
3191                                 btrfs_remove_free_space_cache(cache);
3192                                 kfree(cache->free_space_ctl);
3193                         }
3194                         kfree(cache);
3195                 }
3196                 clear_extent_bits(&info->block_group_cache, start,
3197                                   end, (unsigned int)-1, GFP_NOFS);
3198         }
3199         while(1) {
3200                 ret = find_first_extent_bit(&info->free_space_cache, 0,
3201                                             &start, &end, EXTENT_DIRTY);
3202                 if (ret)
3203                         break;
3204                 clear_extent_dirty(&info->free_space_cache, start,
3205                                    end, GFP_NOFS);
3206         }
3207
3208         while (!list_empty(&info->space_info)) {
3209                 sinfo = list_entry(info->space_info.next,
3210                                    struct btrfs_space_info, list);
3211                 list_del_init(&sinfo->list);
3212                 kfree(sinfo);
3213         }
3214         return 0;
3215 }
3216
3217 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3218                            struct btrfs_key *key)
3219 {
3220         int ret;
3221         struct btrfs_key found_key;
3222         struct extent_buffer *leaf;
3223         int slot;
3224
3225         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3226         if (ret < 0)
3227                 return ret;
3228         while(1) {
3229                 slot = path->slots[0];
3230                 leaf = path->nodes[0];
3231                 if (slot >= btrfs_header_nritems(leaf)) {
3232                         ret = btrfs_next_leaf(root, path);
3233                         if (ret == 0)
3234                                 continue;
3235                         if (ret < 0)
3236                                 goto error;
3237                         break;
3238                 }
3239                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3240
3241                 if (found_key.objectid >= key->objectid &&
3242                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
3243                         return 0;
3244                 path->slots[0]++;
3245         }
3246         ret = -ENOENT;
3247 error:
3248         return ret;
3249 }
3250
3251 int btrfs_read_block_groups(struct btrfs_root *root)
3252 {
3253         struct btrfs_path *path;
3254         int ret;
3255         int bit;
3256         struct btrfs_block_group_cache *cache;
3257         struct btrfs_fs_info *info = root->fs_info;
3258         struct btrfs_space_info *space_info;
3259         struct extent_io_tree *block_group_cache;
3260         struct btrfs_key key;
3261         struct btrfs_key found_key;
3262         struct extent_buffer *leaf;
3263
3264         block_group_cache = &info->block_group_cache;
3265
3266         root = info->extent_root;
3267         key.objectid = 0;
3268         key.offset = 0;
3269         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3270         path = btrfs_alloc_path();
3271         if (!path)
3272                 return -ENOMEM;
3273
3274         while(1) {
3275                 ret = find_first_block_group(root, path, &key);
3276                 if (ret > 0) {
3277                         ret = 0;
3278                         goto error;
3279                 }
3280                 if (ret != 0) {
3281                         goto error;
3282                 }
3283                 leaf = path->nodes[0];
3284                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3285                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3286                 if (!cache) {
3287                         ret = -ENOMEM;
3288                         break;
3289                 }
3290
3291                 read_extent_buffer(leaf, &cache->item,
3292                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3293                                    sizeof(cache->item));
3294                 memcpy(&cache->key, &found_key, sizeof(found_key));
3295                 cache->cached = 0;
3296                 cache->pinned = 0;
3297                 key.objectid = found_key.objectid + found_key.offset;
3298                 btrfs_release_path(root, path);
3299                 cache->flags = btrfs_block_group_flags(&cache->item);
3300                 bit = 0;
3301                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3302                         bit = BLOCK_GROUP_DATA;
3303                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3304                         bit = BLOCK_GROUP_SYSTEM;
3305                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3306                         bit = BLOCK_GROUP_METADATA;
3307                 }
3308                 set_avail_alloc_bits(info, cache->flags);
3309                 if (btrfs_chunk_readonly(root, cache->key.objectid))
3310                         cache->ro = 1;
3311
3312                 ret = update_space_info(info, cache->flags, found_key.offset,
3313                                         btrfs_block_group_used(&cache->item),
3314                                         &space_info);
3315                 BUG_ON(ret);
3316                 cache->space_info = space_info;
3317
3318                 /* use EXTENT_LOCKED to prevent merging */
3319                 set_extent_bits(block_group_cache, found_key.objectid,
3320                                 found_key.objectid + found_key.offset - 1,
3321                                 bit | EXTENT_LOCKED, GFP_NOFS);
3322                 set_state_private(block_group_cache, found_key.objectid,
3323                                   (unsigned long)cache);
3324         }
3325         ret = 0;
3326 error:
3327         btrfs_free_path(path);
3328         return ret;
3329 }
3330
3331 struct btrfs_block_group_cache *
3332 btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
3333                       u64 chunk_objectid, u64 chunk_offset, u64 size)
3334 {
3335         int ret;
3336         int bit = 0;
3337         struct btrfs_block_group_cache *cache;
3338         struct extent_io_tree *block_group_cache;
3339
3340         block_group_cache = &fs_info->block_group_cache;
3341
3342         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3343         BUG_ON(!cache);
3344         cache->key.objectid = chunk_offset;
3345         cache->key.offset = size;
3346
3347         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3348         btrfs_set_block_group_used(&cache->item, bytes_used);
3349         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3350         cache->flags = type;
3351         btrfs_set_block_group_flags(&cache->item, type);
3352
3353         ret = update_space_info(fs_info, cache->flags, size, bytes_used,
3354                                 &cache->space_info);
3355         BUG_ON(ret);
3356
3357         bit = block_group_state_bits(type);
3358         set_extent_bits(block_group_cache, chunk_offset,
3359                         chunk_offset + size - 1,
3360                         bit | EXTENT_LOCKED, GFP_NOFS);
3361
3362         set_state_private(block_group_cache, chunk_offset,
3363                           (unsigned long)cache);
3364         set_avail_alloc_bits(fs_info, type);
3365
3366         return cache;
3367 }
3368
3369 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3370                            struct btrfs_root *root, u64 bytes_used,
3371                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3372                            u64 size)
3373 {
3374         int ret;
3375         struct btrfs_root *extent_root;
3376         struct btrfs_block_group_cache *cache;
3377
3378         cache = btrfs_add_block_group(root->fs_info, bytes_used, type,
3379                                       chunk_objectid, chunk_offset, size);
3380         extent_root = root->fs_info->extent_root;
3381         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3382                                 sizeof(cache->item));
3383         BUG_ON(ret);
3384
3385         finish_current_insert(trans, extent_root);
3386         ret = del_pending_extents(trans, extent_root);
3387         return 0;
3388 }
3389
3390 /*
3391  * This is for converter use only.
3392  *
3393  * In that case, we don't know where are free blocks located.
3394  * Therefore all block group cache entries must be setup properly
3395  * before doing any block allocation.
3396  */
3397 int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
3398                             struct btrfs_root *root)
3399 {
3400         u64 total_bytes;
3401         u64 cur_start;
3402         u64 group_type;
3403         u64 group_size;
3404         u64 group_align;
3405         u64 total_data = 0;
3406         u64 total_metadata = 0;
3407         u64 chunk_objectid;
3408         int ret;
3409         int bit;
3410         struct btrfs_root *extent_root;
3411         struct btrfs_block_group_cache *cache;
3412         struct extent_io_tree *block_group_cache;
3413
3414         extent_root = root->fs_info->extent_root;
3415         block_group_cache = &root->fs_info->block_group_cache;
3416         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3417         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
3418         group_align = 64 * root->sectorsize;
3419
3420         cur_start = 0;
3421         while (cur_start < total_bytes) {
3422                 group_size = total_bytes / 12;
3423                 group_size = min_t(u64, group_size, total_bytes - cur_start);
3424                 if (cur_start == 0) {
3425                         bit = BLOCK_GROUP_SYSTEM;
3426                         group_type = BTRFS_BLOCK_GROUP_SYSTEM;
3427                         group_size /= 4;
3428                         group_size &= ~(group_align - 1);
3429                         group_size = max_t(u64, group_size, 8 * 1024 * 1024);
3430                         group_size = min_t(u64, group_size, 32 * 1024 * 1024);
3431                 } else {
3432                         group_size &= ~(group_align - 1);
3433                         if (total_data >= total_metadata * 2) {
3434                                 group_type = BTRFS_BLOCK_GROUP_METADATA;
3435                                 group_size = min_t(u64, group_size,
3436                                                    1ULL * 1024 * 1024 * 1024);
3437                                 total_metadata += group_size;
3438                         } else {
3439                                 group_type = BTRFS_BLOCK_GROUP_DATA;
3440                                 group_size = min_t(u64, group_size,
3441                                                    5ULL * 1024 * 1024 * 1024);
3442                                 total_data += group_size;
3443                         }
3444                         if ((total_bytes - cur_start) * 4 < group_size * 5)
3445                                 group_size = total_bytes - cur_start;
3446                 }
3447
3448                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3449                 BUG_ON(!cache);
3450
3451                 cache->key.objectid = cur_start;
3452                 cache->key.offset = group_size;
3453                 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3454
3455                 btrfs_set_block_group_used(&cache->item, 0);
3456                 btrfs_set_block_group_chunk_objectid(&cache->item,
3457                                                      chunk_objectid);
3458                 btrfs_set_block_group_flags(&cache->item, group_type);
3459
3460                 cache->flags = group_type;
3461
3462                 ret = update_space_info(root->fs_info, group_type, group_size,
3463                                         0, &cache->space_info);
3464                 BUG_ON(ret);
3465                 set_avail_alloc_bits(extent_root->fs_info, group_type);
3466
3467                 set_extent_bits(block_group_cache, cur_start,
3468                                 cur_start + group_size - 1,
3469                                 bit | EXTENT_LOCKED, GFP_NOFS);
3470                 set_state_private(block_group_cache, cur_start,
3471                                   (unsigned long)cache);
3472                 cur_start += group_size;
3473         }
3474         /* then insert all the items */
3475         cur_start = 0;
3476         while(cur_start < total_bytes) {
3477                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
3478                 BUG_ON(!cache);
3479
3480                 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3481                                         sizeof(cache->item));
3482                 BUG_ON(ret);
3483
3484                 finish_current_insert(trans, extent_root);
3485                 ret = del_pending_extents(trans, extent_root);
3486                 BUG_ON(ret);
3487
3488                 cur_start = cache->key.objectid + cache->key.offset;
3489         }
3490         return 0;
3491 }
3492
3493 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
3494                              struct btrfs_root *root,
3495                              u64 bytenr, u64 num_bytes, int alloc,
3496                              int mark_free)
3497 {
3498         return update_block_group(trans, root, bytenr, num_bytes,
3499                                   alloc, mark_free);
3500 }
3501
3502 static int btrfs_count_extents_in_block_group(struct btrfs_root *root,
3503                                               struct btrfs_path *path, u64 start,
3504                                               u64 len,
3505                                               u64 *total)
3506 {
3507         struct btrfs_key key;
3508         struct extent_buffer *leaf;
3509         u64 bytes_used = 0;
3510         int ret;
3511         int slot;
3512
3513         key.offset = 0;
3514         key.objectid = start;
3515         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
3516         ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
3517                                 &key, path, 0, 0);
3518         if (ret < 0)
3519                 return ret;
3520         while(1) {
3521                 leaf = path->nodes[0];
3522                 slot = path->slots[0];
3523                 if (slot >= btrfs_header_nritems(leaf)) {
3524                         ret = btrfs_next_leaf(root, path);
3525                         if (ret < 0)
3526                                 return ret;
3527                         if (ret > 0)
3528                                 break;
3529                         leaf = path->nodes[0];
3530                         slot = path->slots[0];
3531                 }
3532                 btrfs_item_key_to_cpu(leaf, &key, slot);
3533                 if (key.objectid > start + len)
3534                         break;
3535                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3536                         bytes_used += key.offset;
3537                 if (key.type == BTRFS_METADATA_ITEM_KEY)
3538                         bytes_used += root->leafsize;
3539                 path->slots[0]++;
3540         }
3541         *total = bytes_used;
3542         btrfs_release_path(root, path);
3543         return 0;
3544 }
3545
3546 int btrfs_check_block_accounting(struct btrfs_root *root)
3547 {
3548         int ret;
3549         u64 start = 0;
3550         u64 bytes_used = 0;
3551         struct btrfs_path path;
3552         struct btrfs_block_group_cache *cache;
3553         struct btrfs_fs_info *fs_info = root->fs_info;
3554
3555         btrfs_init_path(&path);
3556
3557         while(1) {
3558                 cache = btrfs_lookup_block_group(fs_info, start);
3559                 if (!cache)
3560                         break;
3561
3562                 ret = btrfs_count_extents_in_block_group(root, &path,
3563                                                          cache->key.objectid,
3564                                                          cache->key.offset,
3565                                                          &bytes_used);
3566
3567                 if (ret == 0) {
3568                         u64 on_disk = btrfs_block_group_used(&cache->item);
3569                         if (on_disk != bytes_used) {
3570                                 fprintf(stderr, "bad block group accounting found %llu "
3571                                         "expected %llu block group %llu\n",
3572                                         (unsigned long long)bytes_used,
3573                                         (unsigned long long)on_disk,
3574                                         (unsigned long long)cache->key.objectid);
3575                         }
3576                 }
3577                 start = cache->key.objectid + cache->key.offset;
3578
3579                 cache->space_info->bytes_used = 0;
3580         }
3581         return 0;
3582 }
3583
3584 /*
3585  * Fixup block accounting. The initial block accounting created by
3586  * make_block_groups isn't accuracy in this case.
3587  */
3588 int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans,
3589                                struct btrfs_root *root)
3590 {
3591         int ret;
3592         int slot;
3593         u64 start = 0;
3594         u64 bytes_used = 0;
3595         struct btrfs_path path;
3596         struct btrfs_key key;
3597         struct extent_buffer *leaf;
3598         struct btrfs_block_group_cache *cache;
3599         struct btrfs_fs_info *fs_info = root->fs_info;
3600
3601         root = root->fs_info->extent_root;
3602
3603         while(extent_root_pending_ops(fs_info)) {
3604                 ret = finish_current_insert(trans, root);
3605                 if (ret)
3606                         return ret;
3607                 ret = del_pending_extents(trans, root);
3608                 if (ret)
3609                         return ret;
3610         }
3611
3612         while(1) {
3613                 cache = btrfs_lookup_first_block_group(fs_info, start);
3614                 if (!cache)
3615                         break;
3616                 start = cache->key.objectid + cache->key.offset;
3617                 btrfs_set_block_group_used(&cache->item, 0);
3618                 cache->space_info->bytes_used = 0;
3619                 set_extent_bits(&root->fs_info->block_group_cache,
3620                                 cache->key.objectid,
3621                                 cache->key.objectid + cache->key.offset -1,
3622                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
3623         }
3624
3625         btrfs_init_path(&path);
3626         key.offset = 0;
3627         key.objectid = 0;
3628         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
3629         ret = btrfs_search_slot(trans, root->fs_info->extent_root,
3630                                 &key, &path, 0, 0);
3631         if (ret < 0)
3632                 return ret;
3633         while(1) {
3634                 leaf = path.nodes[0];
3635                 slot = path.slots[0];
3636                 if (slot >= btrfs_header_nritems(leaf)) {
3637                         ret = btrfs_next_leaf(root, &path);
3638                         if (ret < 0)
3639                                 return ret;
3640                         if (ret > 0)
3641                                 break;
3642                         leaf = path.nodes[0];
3643                         slot = path.slots[0];
3644                 }
3645                 btrfs_item_key_to_cpu(leaf, &key, slot);
3646                 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
3647                         bytes_used += key.offset;
3648                         ret = btrfs_update_block_group(trans, root,
3649                                   key.objectid, key.offset, 1, 0);
3650                         BUG_ON(ret);
3651                 } else if (key.type == BTRFS_METADATA_ITEM_KEY) {
3652                         bytes_used += root->leafsize;
3653                         ret = btrfs_update_block_group(trans, root,
3654                                   key.objectid, root->leafsize, 1, 0);
3655                         BUG_ON(ret);
3656                 }
3657                 path.slots[0]++;
3658         }
3659         btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used);
3660         btrfs_release_path(root, &path);
3661         return 0;
3662 }