Implement raid0 when multiple devices are present
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "crc32c.h"
28 #include "volumes.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
41                            struct btrfs_root *root, u64 bytes_used,
42                            u64 type, u64 chunk_tree, u64 chunk_objectid,
43                            u64 size);
44
45 static int cache_block_group(struct btrfs_root *root,
46                              struct btrfs_block_group_cache *block_group)
47 {
48         struct btrfs_path *path;
49         int ret;
50         struct btrfs_key key;
51         struct extent_buffer *leaf;
52         struct extent_io_tree *free_space_cache;
53         int slot;
54         u64 last = 0;
55         u64 hole_size;
56         u64 first_free;
57         int found = 0;
58
59         if (!block_group)
60                 return 0;
61
62         root = root->fs_info->extent_root;
63         free_space_cache = &root->fs_info->free_space_cache;
64
65         if (block_group->cached)
66                 return 0;
67
68         path = btrfs_alloc_path();
69         if (!path)
70                 return -ENOMEM;
71
72         path->reada = 2;
73         first_free = block_group->key.objectid;
74         key.objectid = block_group->key.objectid;
75         key.offset = 0;
76         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
77         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
78         if (ret < 0)
79                 return ret;
80         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
81         if (ret < 0)
82                 return ret;
83         if (ret == 0) {
84                 leaf = path->nodes[0];
85                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
86                 if (key.objectid + key.offset > first_free)
87                         first_free = key.objectid + key.offset;
88         }
89         while(1) {
90                 leaf = path->nodes[0];
91                 slot = path->slots[0];
92                 if (slot >= btrfs_header_nritems(leaf)) {
93                         ret = btrfs_next_leaf(root, path);
94                         if (ret < 0)
95                                 goto err;
96                         if (ret == 0) {
97                                 continue;
98                         } else {
99                                 break;
100                         }
101                 }
102                 btrfs_item_key_to_cpu(leaf, &key, slot);
103                 if (key.objectid < block_group->key.objectid) {
104                         goto next;
105                 }
106                 if (key.objectid >= block_group->key.objectid +
107                     block_group->key.offset) {
108                         break;
109                 }
110
111                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
112                         if (!found) {
113                                 last = first_free;
114                                 found = 1;
115                         }
116                         if (key.objectid > last) {
117                                 hole_size = key.objectid - last;
118                                 set_extent_dirty(free_space_cache, last,
119                                                  last + hole_size - 1,
120                                                  GFP_NOFS);
121                         }
122                         last = key.objectid + key.offset;
123                 }
124 next:
125                 path->slots[0]++;
126         }
127
128         if (!found)
129                 last = first_free;
130         if (block_group->key.objectid +
131             block_group->key.offset > last) {
132                 hole_size = block_group->key.objectid +
133                         block_group->key.offset - last;
134                 set_extent_dirty(free_space_cache, last,
135                                  last + hole_size - 1, GFP_NOFS);
136         }
137         block_group->cached = 1;
138 err:
139         btrfs_free_path(path);
140         return 0;
141 }
142
143 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
144                                                          btrfs_fs_info *info,
145                                                          u64 bytenr)
146 {
147         struct extent_io_tree *block_group_cache;
148         struct btrfs_block_group_cache *block_group = NULL;
149         u64 ptr;
150         u64 start;
151         u64 end;
152         int ret;
153
154         block_group_cache = &info->block_group_cache;
155         ret = find_first_extent_bit(block_group_cache,
156                                     bytenr, &start, &end,
157                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
158                                     BLOCK_GROUP_SYSTEM);
159         if (ret) {
160                 return NULL;
161         }
162         ret = get_state_private(block_group_cache, start, &ptr);
163         if (ret)
164                 return NULL;
165
166         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
167         if (block_group->key.objectid <= bytenr && bytenr <
168             block_group->key.objectid + block_group->key.offset)
169                 return block_group;
170         return NULL;
171 }
172
173 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
174 {
175         return (cache->flags & bits);
176 }
177
178 static int noinline find_search_start(struct btrfs_root *root,
179                               struct btrfs_block_group_cache **cache_ret,
180                               u64 *start_ret, int num, int data)
181 {
182         int ret;
183         struct btrfs_block_group_cache *cache = *cache_ret;
184         u64 last;
185         u64 start = 0;
186         u64 end = 0;
187         u64 cache_miss = 0;
188         u64 search_start = *start_ret;
189         int wrapped = 0;
190
191         if (!cache) {
192                 goto out;
193         }
194 again:
195         ret = cache_block_group(root, cache);
196         if (ret)
197                 goto out;
198
199         last = max(search_start, cache->key.objectid);
200         if (!block_group_bits(cache, data)) {
201                 goto new_group;
202         }
203
204         while(1) {
205                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
206                                             last, &start, &end, EXTENT_DIRTY);
207                 if (ret) {
208                         if (!cache_miss)
209                                 cache_miss = last;
210                         goto new_group;
211                 }
212
213                 start = max(last, start);
214                 last = end + 1;
215                 if (last - start < num) {
216                         if (last == cache->key.objectid + cache->key.offset)
217                                 cache_miss = start;
218                         continue;
219                 }
220                 if (start + num > cache->key.objectid + cache->key.offset)
221                         goto new_group;
222                 *start_ret = start;
223                 return 0;
224         }
225 out:
226         cache = btrfs_lookup_block_group(root->fs_info, search_start);
227         if (!cache) {
228                 printk("Unable to find block group for %Lu\n", search_start);
229                 WARN_ON(1);
230         }
231         return -ENOSPC;
232
233 new_group:
234         last = cache->key.objectid + cache->key.offset;
235 wrapped:
236         cache = btrfs_lookup_block_group(root->fs_info, last);
237         if (!cache) {
238 no_cache:
239                 if (!wrapped) {
240                         wrapped = 1;
241                         last = search_start;
242                         goto wrapped;
243                 }
244                 goto out;
245         }
246         if (cache_miss && !cache->cached) {
247                 cache_block_group(root, cache);
248                 last = cache_miss;
249                 cache = btrfs_lookup_block_group(root->fs_info, last);
250         }
251         cache = btrfs_find_block_group(root, cache, last, data, 0);
252         if (!cache)
253                 goto no_cache;
254         *cache_ret = cache;
255         cache_miss = 0;
256         goto again;
257 }
258
259 static u64 div_factor(u64 num, int factor)
260 {
261         if (factor == 10)
262                 return num;
263         num *= factor;
264         num /= 10;
265         return num;
266 }
267
268 static int block_group_state_bits(u64 flags)
269 {
270         int bits = 0;
271         if (flags & BTRFS_BLOCK_GROUP_DATA)
272                 bits |= BLOCK_GROUP_DATA;
273         if (flags & BTRFS_BLOCK_GROUP_METADATA)
274                 bits |= BLOCK_GROUP_METADATA;
275         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
276                 bits |= BLOCK_GROUP_SYSTEM;
277         return bits;
278 }
279
280 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
281                                                  struct btrfs_block_group_cache
282                                                  *hint, u64 search_start,
283                                                  int data, int owner)
284 {
285         struct btrfs_block_group_cache *cache;
286         struct extent_io_tree *block_group_cache;
287         struct btrfs_block_group_cache *found_group = NULL;
288         struct btrfs_fs_info *info = root->fs_info;
289         u64 used;
290         u64 last = 0;
291         u64 hint_last;
292         u64 start;
293         u64 end;
294         u64 free_check;
295         u64 ptr;
296         int bit;
297         int ret;
298         int full_search = 0;
299         int factor = 8;
300
301         block_group_cache = &info->block_group_cache;
302
303         if (!owner)
304                 factor = 8;
305
306         bit = block_group_state_bits(data);
307
308         if (search_start) {
309                 struct btrfs_block_group_cache *shint;
310                 shint = btrfs_lookup_block_group(info, search_start);
311                 if (shint && block_group_bits(shint, data)) {
312                         used = btrfs_block_group_used(&shint->item);
313                         if (used + shint->pinned <
314                             div_factor(shint->key.offset, factor)) {
315                                 return shint;
316                         }
317                 }
318         }
319         if (hint && block_group_bits(hint, data)) {
320                 used = btrfs_block_group_used(&hint->item);
321                 if (used + hint->pinned <
322                     div_factor(hint->key.offset, factor)) {
323                         return hint;
324                 }
325                 last = hint->key.objectid + hint->key.offset;
326                 hint_last = last;
327         } else {
328                 if (hint)
329                         hint_last = max(hint->key.objectid, search_start);
330                 else
331                         hint_last = search_start;
332
333                 last = hint_last;
334         }
335 again:
336         while(1) {
337                 ret = find_first_extent_bit(block_group_cache, last,
338                                             &start, &end, bit);
339                 if (ret)
340                         break;
341
342                 ret = get_state_private(block_group_cache, start, &ptr);
343                 if (ret)
344                         break;
345
346                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
347                 last = cache->key.objectid + cache->key.offset;
348                 used = btrfs_block_group_used(&cache->item);
349
350                 if (full_search)
351                         free_check = cache->key.offset;
352                 else
353                         free_check = div_factor(cache->key.offset, factor);
354
355                 if (used + cache->pinned < free_check) {
356                         found_group = cache;
357                         goto found;
358                 }
359                 cond_resched();
360         }
361         if (!full_search) {
362                 last = search_start;
363                 full_search = 1;
364                 goto again;
365         }
366 found:
367         return found_group;
368 }
369
370 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
371                            u64 owner, u64 owner_offset)
372 {
373         u32 high_crc = ~(u32)0;
374         u32 low_crc = ~(u32)0;
375         __le64 lenum;
376
377         lenum = cpu_to_le64(root_objectid);
378         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
379         lenum = cpu_to_le64(ref_generation);
380         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
381         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
382                 lenum = cpu_to_le64(owner);
383                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
384                 lenum = cpu_to_le64(owner_offset);
385                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
386         }
387         return ((u64)high_crc << 32) | (u64)low_crc;
388 }
389
390 static int match_extent_ref(struct extent_buffer *leaf,
391                             struct btrfs_extent_ref *disk_ref,
392                             struct btrfs_extent_ref *cpu_ref)
393 {
394         int ret;
395         int len;
396
397         if (cpu_ref->objectid)
398                 len = sizeof(*cpu_ref);
399         else
400                 len = 2 * sizeof(u64);
401         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
402                                    len);
403         return ret == 0;
404 }
405
406 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
407                                           struct btrfs_root *root,
408                                           struct btrfs_path *path, u64 bytenr,
409                                           u64 root_objectid,
410                                           u64 ref_generation, u64 owner,
411                                           u64 owner_offset, int del)
412 {
413         u64 hash;
414         struct btrfs_key key;
415         struct btrfs_key found_key;
416         struct btrfs_extent_ref ref;
417         struct extent_buffer *leaf;
418         struct btrfs_extent_ref *disk_ref;
419         int ret;
420         int ret2;
421
422         btrfs_set_stack_ref_root(&ref, root_objectid);
423         btrfs_set_stack_ref_generation(&ref, ref_generation);
424         btrfs_set_stack_ref_objectid(&ref, owner);
425         btrfs_set_stack_ref_offset(&ref, owner_offset);
426
427         hash = hash_extent_ref(root_objectid, ref_generation, owner,
428                                owner_offset);
429         key.offset = hash;
430         key.objectid = bytenr;
431         key.type = BTRFS_EXTENT_REF_KEY;
432
433         while (1) {
434                 ret = btrfs_search_slot(trans, root, &key, path,
435                                         del ? -1 : 0, del);
436                 if (ret < 0)
437                         goto out;
438                 leaf = path->nodes[0];
439                 if (ret != 0) {
440                         u32 nritems = btrfs_header_nritems(leaf);
441                         if (path->slots[0] >= nritems) {
442                                 ret2 = btrfs_next_leaf(root, path);
443                                 if (ret2)
444                                         goto out;
445                                 leaf = path->nodes[0];
446                         }
447                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
448                         if (found_key.objectid != bytenr ||
449                             found_key.type != BTRFS_EXTENT_REF_KEY)
450                                 goto out;
451                         key.offset = found_key.offset;
452                         if (del) {
453                                 btrfs_release_path(root, path);
454                                 continue;
455                         }
456                 }
457                 disk_ref = btrfs_item_ptr(path->nodes[0],
458                                           path->slots[0],
459                                           struct btrfs_extent_ref);
460                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
461                         ret = 0;
462                         goto out;
463                 }
464                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
465                 key.offset = found_key.offset + 1;
466                 btrfs_release_path(root, path);
467         }
468 out:
469         return ret;
470 }
471
472 /*
473  * Back reference rules.  Back refs have three main goals:
474  *
475  * 1) differentiate between all holders of references to an extent so that
476  *    when a reference is dropped we can make sure it was a valid reference
477  *    before freeing the extent.
478  *
479  * 2) Provide enough information to quickly find the holders of an extent
480  *    if we notice a given block is corrupted or bad.
481  *
482  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
483  *    maintenance.  This is actually the same as #2, but with a slightly
484  *    different use case.
485  *
486  * File extents can be referenced by:
487  *
488  * - multiple snapshots, subvolumes, or different generations in one subvol
489  * - different files inside a single subvolume (in theory, not implemented yet)
490  * - different offsets inside a file (bookend extents in file.c)
491  *
492  * The extent ref structure has fields for:
493  *
494  * - Objectid of the subvolume root
495  * - Generation number of the tree holding the reference
496  * - objectid of the file holding the reference
497  * - offset in the file corresponding to the key holding the reference
498  *
499  * When a file extent is allocated the fields are filled in:
500  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
501  *
502  * When a leaf is cow'd new references are added for every file extent found
503  * in the leaf.  It looks the same as the create case, but trans->transid
504  * will be different when the block is cow'd.
505  *
506  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
507  *
508  * When a file extent is removed either during snapshot deletion or file
509  * truncation, the corresponding back reference is found
510  * by searching for:
511  *
512  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
513  *      inode objectid, offset in file)
514  *
515  * Btree extents can be referenced by:
516  *
517  * - Different subvolumes
518  * - Different generations of the same subvolume
519  *
520  * Storing sufficient information for a full reverse mapping of a btree
521  * block would require storing the lowest key of the block in the backref,
522  * and it would require updating that lowest key either before write out or
523  * every time it changed.  Instead, the objectid of the lowest key is stored
524  * along with the level of the tree block.  This provides a hint
525  * about where in the btree the block can be found.  Searches through the
526  * btree only need to look for a pointer to that block, so they stop one
527  * level higher than the level recorded in the backref.
528  *
529  * Some btrees do not do reference counting on their extents.  These
530  * include the extent tree and the tree of tree roots.  Backrefs for these
531  * trees always have a generation of zero.
532  *
533  * When a tree block is created, back references are inserted:
534  *
535  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
536  *
537  * When a tree block is cow'd in a reference counted root,
538  * new back references are added for all the blocks it points to.
539  * These are of the form (trans->transid will have increased since creation):
540  *
541  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
542  *
543  * Because the lowest_key_objectid and the level are just hints
544  * they are not used when backrefs are deleted.  When a backref is deleted:
545  *
546  * if backref was for a tree root:
547  *     root_objectid = root->root_key.objectid
548  * else
549  *     root_objectid = btrfs_header_owner(parent)
550  *
551  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
552  *
553  * Back Reference Key hashing:
554  *
555  * Back references have four fields, each 64 bits long.  Unfortunately,
556  * This is hashed into a single 64 bit number and placed into the key offset.
557  * The key objectid corresponds to the first byte in the extent, and the
558  * key type is set to BTRFS_EXTENT_REF_KEY
559  */
560 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
561                                  struct btrfs_root *root,
562                                  struct btrfs_path *path, u64 bytenr,
563                                  u64 root_objectid, u64 ref_generation,
564                                  u64 owner, u64 owner_offset)
565 {
566         u64 hash;
567         struct btrfs_key key;
568         struct btrfs_extent_ref ref;
569         struct btrfs_extent_ref *disk_ref;
570         int ret;
571
572         btrfs_set_stack_ref_root(&ref, root_objectid);
573         btrfs_set_stack_ref_generation(&ref, ref_generation);
574         btrfs_set_stack_ref_objectid(&ref, owner);
575         btrfs_set_stack_ref_offset(&ref, owner_offset);
576
577         hash = hash_extent_ref(root_objectid, ref_generation, owner,
578                                owner_offset);
579         key.offset = hash;
580         key.objectid = bytenr;
581         key.type = BTRFS_EXTENT_REF_KEY;
582
583         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
584         while (ret == -EEXIST) {
585                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
586                                           struct btrfs_extent_ref);
587                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
588                         goto out;
589                 key.offset++;
590                 btrfs_release_path(root, path);
591                 ret = btrfs_insert_empty_item(trans, root, path, &key,
592                                               sizeof(ref));
593         }
594         if (ret)
595                 goto out;
596         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
597                                   struct btrfs_extent_ref);
598         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
599                             sizeof(ref));
600         btrfs_mark_buffer_dirty(path->nodes[0]);
601 out:
602         btrfs_release_path(root, path);
603         return ret;
604 }
605
606 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
607                                 struct btrfs_root *root,
608                                 u64 bytenr, u64 num_bytes,
609                                 u64 root_objectid, u64 ref_generation,
610                                 u64 owner, u64 owner_offset)
611 {
612         struct btrfs_path *path;
613         int ret;
614         struct btrfs_key key;
615         struct extent_buffer *l;
616         struct btrfs_extent_item *item;
617         u32 refs;
618
619         WARN_ON(num_bytes < root->sectorsize);
620         path = btrfs_alloc_path();
621         if (!path)
622                 return -ENOMEM;
623
624         key.objectid = bytenr;
625         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
626         key.offset = num_bytes;
627         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
628                                 0, 1);
629         if (ret < 0)
630                 return ret;
631         if (ret != 0) {
632                 BUG();
633         }
634         BUG_ON(ret != 0);
635         l = path->nodes[0];
636         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
637         refs = btrfs_extent_refs(l, item);
638         btrfs_set_extent_refs(l, item, refs + 1);
639         btrfs_mark_buffer_dirty(path->nodes[0]);
640
641         btrfs_release_path(root->fs_info->extent_root, path);
642
643         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
644                                           path, bytenr, root_objectid,
645                                           ref_generation, owner, owner_offset);
646         BUG_ON(ret);
647         finish_current_insert(trans, root->fs_info->extent_root);
648         del_pending_extents(trans, root->fs_info->extent_root);
649
650         btrfs_free_path(path);
651         return 0;
652 }
653
654 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
655                          struct btrfs_root *root)
656 {
657         finish_current_insert(trans, root->fs_info->extent_root);
658         del_pending_extents(trans, root->fs_info->extent_root);
659         return 0;
660 }
661
662 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
663                              struct btrfs_root *root, u64 bytenr,
664                              u64 num_bytes, u32 *refs)
665 {
666         struct btrfs_path *path;
667         int ret;
668         struct btrfs_key key;
669         struct extent_buffer *l;
670         struct btrfs_extent_item *item;
671
672         WARN_ON(num_bytes < root->sectorsize);
673         path = btrfs_alloc_path();
674         key.objectid = bytenr;
675         key.offset = num_bytes;
676         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
677         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
678                                 0, 0);
679         if (ret < 0)
680                 goto out;
681         if (ret != 0) {
682                 btrfs_print_leaf(root, path->nodes[0]);
683                 printk("failed to find block number %Lu\n", bytenr);
684                 BUG();
685         }
686         l = path->nodes[0];
687         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
688         *refs = btrfs_extent_refs(l, item);
689 out:
690         btrfs_free_path(path);
691         return 0;
692 }
693
694 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
695                                   struct btrfs_path *count_path,
696                                   u64 first_extent)
697 {
698         struct btrfs_root *extent_root = root->fs_info->extent_root;
699         struct btrfs_path *path;
700         u64 bytenr;
701         u64 found_objectid;
702         u64 root_objectid = root->root_key.objectid;
703         u32 total_count = 0;
704         u32 cur_count;
705         u32 refs;
706         u32 nritems;
707         int ret;
708         struct btrfs_key key;
709         struct btrfs_key found_key;
710         struct extent_buffer *l;
711         struct btrfs_extent_item *item;
712         struct btrfs_extent_ref *ref_item;
713         int level = -1;
714
715         path = btrfs_alloc_path();
716 again:
717         if (level == -1)
718                 bytenr = first_extent;
719         else
720                 bytenr = count_path->nodes[level]->start;
721
722         cur_count = 0;
723         key.objectid = bytenr;
724         key.offset = 0;
725
726         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
727         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
728         if (ret < 0)
729                 goto out;
730         BUG_ON(ret == 0);
731
732         l = path->nodes[0];
733         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
734
735         if (found_key.objectid != bytenr ||
736             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
737                 goto out;
738         }
739
740         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
741         refs = btrfs_extent_refs(l, item);
742         while (1) {
743                 nritems = btrfs_header_nritems(l);
744                 if (path->slots[0] >= nritems) {
745                         ret = btrfs_next_leaf(extent_root, path);
746                         if (ret == 0)
747                                 continue;
748                         break;
749                 }
750                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
751                 if (found_key.objectid != bytenr)
752                         break;
753                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
754                         path->slots[0]++;
755                         continue;
756                 }
757
758                 cur_count++;
759                 ref_item = btrfs_item_ptr(l, path->slots[0],
760                                           struct btrfs_extent_ref);
761                 found_objectid = btrfs_ref_root(l, ref_item);
762
763                 if (found_objectid != root_objectid) {
764                         total_count = 2;
765                         goto out;
766                 }
767                 total_count = 1;
768                 path->slots[0]++;
769         }
770         if (cur_count == 0) {
771                 total_count = 0;
772                 goto out;
773         }
774         if (level >= 0 && root->node == count_path->nodes[level])
775                 goto out;
776         level++;
777         btrfs_release_path(root, path);
778         goto again;
779
780 out:
781         btrfs_free_path(path);
782         return total_count;
783 }
784 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
785                        struct btrfs_root *root, u64 owner_objectid)
786 {
787         u64 generation;
788         u64 key_objectid;
789         u64 level;
790         u32 nritems;
791         struct btrfs_disk_key disk_key;
792
793         level = btrfs_header_level(root->node);
794         generation = trans->transid;
795         nritems = btrfs_header_nritems(root->node);
796         if (nritems > 0) {
797                 if (level == 0)
798                         btrfs_item_key(root->node, &disk_key, 0);
799                 else
800                         btrfs_node_key(root->node, &disk_key, 0);
801                 key_objectid = btrfs_disk_key_objectid(&disk_key);
802         } else {
803                 key_objectid = 0;
804         }
805         return btrfs_inc_extent_ref(trans, root, root->node->start,
806                                     root->node->len, owner_objectid,
807                                     generation, level, key_objectid);
808 }
809
810 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
811                   struct extent_buffer *buf)
812 {
813         u64 bytenr;
814         u32 nritems;
815         struct btrfs_key key;
816         struct btrfs_file_extent_item *fi;
817         int i;
818         int level;
819         int ret;
820         int faili;
821
822         if (!root->ref_cows)
823                 return 0;
824
825         level = btrfs_header_level(buf);
826         nritems = btrfs_header_nritems(buf);
827         for (i = 0; i < nritems; i++) {
828                 if (level == 0) {
829                         u64 disk_bytenr;
830                         btrfs_item_key_to_cpu(buf, &key, i);
831                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
832                                 continue;
833                         fi = btrfs_item_ptr(buf, i,
834                                             struct btrfs_file_extent_item);
835                         if (btrfs_file_extent_type(buf, fi) ==
836                             BTRFS_FILE_EXTENT_INLINE)
837                                 continue;
838                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
839                         if (disk_bytenr == 0)
840                                 continue;
841                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
842                                     btrfs_file_extent_disk_num_bytes(buf, fi),
843                                     root->root_key.objectid, trans->transid,
844                                     key.objectid, key.offset);
845                         if (ret) {
846                                 faili = i;
847                                 goto fail;
848                         }
849                 } else {
850                         bytenr = btrfs_node_blockptr(buf, i);
851                         btrfs_node_key_to_cpu(buf, &key, i);
852                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
853                                            btrfs_level_size(root, level - 1),
854                                            root->root_key.objectid,
855                                            trans->transid,
856                                            level - 1, key.objectid);
857                         if (ret) {
858                                 faili = i;
859                                 goto fail;
860                         }
861                 }
862         }
863         return 0;
864 fail:
865         WARN_ON(1);
866 #if 0
867         for (i =0; i < faili; i++) {
868                 if (level == 0) {
869                         u64 disk_bytenr;
870                         btrfs_item_key_to_cpu(buf, &key, i);
871                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
872                                 continue;
873                         fi = btrfs_item_ptr(buf, i,
874                                             struct btrfs_file_extent_item);
875                         if (btrfs_file_extent_type(buf, fi) ==
876                             BTRFS_FILE_EXTENT_INLINE)
877                                 continue;
878                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
879                         if (disk_bytenr == 0)
880                                 continue;
881                         err = btrfs_free_extent(trans, root, disk_bytenr,
882                                     btrfs_file_extent_disk_num_bytes(buf,
883                                                                       fi), 0);
884                         BUG_ON(err);
885                 } else {
886                         bytenr = btrfs_node_blockptr(buf, i);
887                         err = btrfs_free_extent(trans, root, bytenr,
888                                         btrfs_level_size(root, level - 1), 0);
889                         BUG_ON(err);
890                 }
891         }
892 #endif
893         return ret;
894 }
895
896 static int write_one_cache_group(struct btrfs_trans_handle *trans,
897                                  struct btrfs_root *root,
898                                  struct btrfs_path *path,
899                                  struct btrfs_block_group_cache *cache)
900 {
901         int ret;
902         int pending_ret;
903         struct btrfs_root *extent_root = root->fs_info->extent_root;
904         unsigned long bi;
905         struct extent_buffer *leaf;
906
907         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
908         if (ret < 0)
909                 goto fail;
910         BUG_ON(ret);
911
912         leaf = path->nodes[0];
913         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
914         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
915         btrfs_mark_buffer_dirty(leaf);
916         btrfs_release_path(extent_root, path);
917 fail:
918         finish_current_insert(trans, extent_root);
919         pending_ret = del_pending_extents(trans, extent_root);
920         if (ret)
921                 return ret;
922         if (pending_ret)
923                 return pending_ret;
924         return 0;
925
926 }
927
928 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
929                                    struct btrfs_root *root)
930 {
931         struct extent_io_tree *block_group_cache;
932         struct btrfs_block_group_cache *cache;
933         int ret;
934         int err = 0;
935         int werr = 0;
936         struct btrfs_path *path;
937         u64 last = 0;
938         u64 start;
939         u64 end;
940         u64 ptr;
941
942         block_group_cache = &root->fs_info->block_group_cache;
943         path = btrfs_alloc_path();
944         if (!path)
945                 return -ENOMEM;
946
947         while(1) {
948                 ret = find_first_extent_bit(block_group_cache, last,
949                                             &start, &end, BLOCK_GROUP_DIRTY);
950                 if (ret)
951                         break;
952
953                 last = end + 1;
954                 ret = get_state_private(block_group_cache, start, &ptr);
955                 if (ret)
956                         break;
957
958                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
959                 err = write_one_cache_group(trans, root,
960                                             path, cache);
961                 /*
962                  * if we fail to write the cache group, we want
963                  * to keep it marked dirty in hopes that a later
964                  * write will work
965                  */
966                 if (err) {
967                         werr = err;
968                         continue;
969                 }
970                 clear_extent_bits(block_group_cache, start, end,
971                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
972         }
973         btrfs_free_path(path);
974         return werr;
975 }
976
977 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
978                                                   u64 flags)
979 {
980         struct list_head *head = &info->space_info;
981         struct list_head *cur;
982         struct btrfs_space_info *found;
983         list_for_each(cur, head) {
984                 found = list_entry(cur, struct btrfs_space_info, list);
985                 if (found->flags == flags)
986                         return found;
987         }
988         return NULL;
989
990 }
991
992 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
993                              u64 total_bytes, u64 bytes_used,
994                              struct btrfs_space_info **space_info)
995 {
996         struct btrfs_space_info *found;
997
998         found = __find_space_info(info, flags);
999         if (found) {
1000                 found->total_bytes += total_bytes;
1001                 found->bytes_used += bytes_used;
1002                 WARN_ON(found->total_bytes < found->bytes_used);
1003                 *space_info = found;
1004                 return 0;
1005         }
1006         found = kmalloc(sizeof(*found), GFP_NOFS);
1007         if (!found)
1008                 return -ENOMEM;
1009
1010         list_add(&found->list, &info->space_info);
1011         found->flags = flags;
1012         found->total_bytes = total_bytes;
1013         found->bytes_used = bytes_used;
1014         found->bytes_pinned = 0;
1015         found->full = 0;
1016         *space_info = found;
1017         return 0;
1018 }
1019
1020
1021 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1022                           struct btrfs_root *extent_root, u64 alloc_bytes,
1023                           u64 flags)
1024 {
1025         struct btrfs_space_info *space_info;
1026         u64 thresh;
1027         u64 start;
1028         u64 num_bytes;
1029         int ret;
1030
1031         space_info = __find_space_info(extent_root->fs_info, flags);
1032         if (!space_info) {
1033                 ret = update_space_info(extent_root->fs_info, flags,
1034                                         0, 0, &space_info);
1035                 BUG_ON(ret);
1036         }
1037         BUG_ON(!space_info);
1038
1039         if (space_info->full)
1040                 return 0;
1041
1042         thresh = div_factor(space_info->total_bytes, 7);
1043         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1044             thresh)
1045                 return 0;
1046
1047         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1048         if (ret == -ENOSPC) {
1049 printk("space info full %Lu\n", flags);
1050                 space_info->full = 1;
1051                 return 0;
1052         }
1053
1054         BUG_ON(ret);
1055
1056         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1057                      extent_root->fs_info->chunk_root->root_key.objectid,
1058                      start, num_bytes);
1059         BUG_ON(ret);
1060         return 0;
1061 }
1062
1063 static int update_block_group(struct btrfs_trans_handle *trans,
1064                               struct btrfs_root *root,
1065                               u64 bytenr, u64 num_bytes, int alloc,
1066                               int mark_free)
1067 {
1068         struct btrfs_block_group_cache *cache;
1069         struct btrfs_fs_info *info = root->fs_info;
1070         u64 total = num_bytes;
1071         u64 old_val;
1072         u64 byte_in_group;
1073         u64 start;
1074         u64 end;
1075
1076         while(total) {
1077                 cache = btrfs_lookup_block_group(info, bytenr);
1078                 if (!cache) {
1079                         return -1;
1080                 }
1081                 byte_in_group = bytenr - cache->key.objectid;
1082                 WARN_ON(byte_in_group > cache->key.offset);
1083                 start = cache->key.objectid;
1084                 end = start + cache->key.offset - 1;
1085                 set_extent_bits(&info->block_group_cache, start, end,
1086                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1087
1088                 old_val = btrfs_block_group_used(&cache->item);
1089                 num_bytes = min(total, cache->key.offset - byte_in_group);
1090                 if (alloc) {
1091                         old_val += num_bytes;
1092                         cache->space_info->bytes_used += num_bytes;
1093                 } else {
1094                         old_val -= num_bytes;
1095                         cache->space_info->bytes_used -= num_bytes;
1096                         if (mark_free) {
1097                                 set_extent_dirty(&info->free_space_cache,
1098                                                  bytenr, bytenr + num_bytes - 1,
1099                                                  GFP_NOFS);
1100                         }
1101                 }
1102                 btrfs_set_block_group_used(&cache->item, old_val);
1103                 total -= num_bytes;
1104                 bytenr += num_bytes;
1105         }
1106         return 0;
1107 }
1108
1109 static int update_pinned_extents(struct btrfs_root *root,
1110                                 u64 bytenr, u64 num, int pin)
1111 {
1112         u64 len;
1113         struct btrfs_block_group_cache *cache;
1114         struct btrfs_fs_info *fs_info = root->fs_info;
1115
1116         if (pin) {
1117                 set_extent_dirty(&fs_info->pinned_extents,
1118                                 bytenr, bytenr + num - 1, GFP_NOFS);
1119         } else {
1120                 clear_extent_dirty(&fs_info->pinned_extents,
1121                                 bytenr, bytenr + num - 1, GFP_NOFS);
1122         }
1123         while (num > 0) {
1124                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1125                 WARN_ON(!cache);
1126                 len = min(num, cache->key.offset -
1127                           (bytenr - cache->key.objectid));
1128                 if (pin) {
1129                         cache->pinned += len;
1130                         cache->space_info->bytes_pinned += len;
1131                         fs_info->total_pinned += len;
1132                 } else {
1133                         cache->pinned -= len;
1134                         cache->space_info->bytes_pinned -= len;
1135                         fs_info->total_pinned -= len;
1136                 }
1137                 bytenr += len;
1138                 num -= len;
1139         }
1140         return 0;
1141 }
1142
1143 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1144 {
1145         u64 last = 0;
1146         u64 start;
1147         u64 end;
1148         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1149         int ret;
1150
1151         while(1) {
1152                 ret = find_first_extent_bit(pinned_extents, last,
1153                                             &start, &end, EXTENT_DIRTY);
1154                 if (ret)
1155                         break;
1156                 set_extent_dirty(copy, start, end, GFP_NOFS);
1157                 last = end + 1;
1158         }
1159         return 0;
1160 }
1161
1162 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1163                                struct btrfs_root *root,
1164                                struct extent_io_tree *unpin)
1165 {
1166         u64 start;
1167         u64 end;
1168         int ret;
1169         struct extent_io_tree *free_space_cache;
1170         free_space_cache = &root->fs_info->free_space_cache;
1171
1172         while(1) {
1173                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1174                                             EXTENT_DIRTY);
1175                 if (ret)
1176                         break;
1177                 update_pinned_extents(root, start, end + 1 - start, 0);
1178                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1179                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1180         }
1181         return 0;
1182 }
1183
1184 static int finish_current_insert(struct btrfs_trans_handle *trans,
1185                                  struct btrfs_root *extent_root)
1186 {
1187         u64 start;
1188         u64 end;
1189         struct btrfs_fs_info *info = extent_root->fs_info;
1190         struct extent_buffer *eb;
1191         struct btrfs_path *path;
1192         struct btrfs_key ins;
1193         struct btrfs_disk_key first;
1194         struct btrfs_extent_item extent_item;
1195         int ret;
1196         int level;
1197         int err = 0;
1198
1199         btrfs_set_stack_extent_refs(&extent_item, 1);
1200         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1201         path = btrfs_alloc_path();
1202
1203         while(1) {
1204                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1205                                             &end, EXTENT_LOCKED);
1206                 if (ret)
1207                         break;
1208
1209                 ins.objectid = start;
1210                 ins.offset = end + 1 - start;
1211                 err = btrfs_insert_item(trans, extent_root, &ins,
1212                                         &extent_item, sizeof(extent_item));
1213                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1214                                   GFP_NOFS);
1215                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1216                 level = btrfs_header_level(eb);
1217                 if (level == 0) {
1218                         btrfs_item_key(eb, &first, 0);
1219                 } else {
1220                         btrfs_node_key(eb, &first, 0);
1221                 }
1222                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1223                                           start, extent_root->root_key.objectid,
1224                                           0, level,
1225                                           btrfs_disk_key_objectid(&first));
1226                 BUG_ON(err);
1227                 free_extent_buffer(eb);
1228         }
1229         btrfs_free_path(path);
1230         return 0;
1231 }
1232
1233 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1234                           int pending)
1235 {
1236         int err = 0;
1237         struct extent_buffer *buf;
1238
1239         if (!pending) {
1240                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1241                 if (buf) {
1242                         if (btrfs_buffer_uptodate(buf)) {
1243                                 u64 transid =
1244                                     root->fs_info->running_transaction->transid;
1245                                 if (btrfs_header_generation(buf) == transid) {
1246                                         free_extent_buffer(buf);
1247                                         return 1;
1248                                 }
1249                         }
1250                         free_extent_buffer(buf);
1251                 }
1252                 update_pinned_extents(root, bytenr, num_bytes, 1);
1253         } else {
1254                 set_extent_bits(&root->fs_info->pending_del,
1255                                 bytenr, bytenr + num_bytes - 1,
1256                                 EXTENT_LOCKED, GFP_NOFS);
1257         }
1258         BUG_ON(err < 0);
1259         return 0;
1260 }
1261
1262 /*
1263  * remove an extent from the root, returns 0 on success
1264  */
1265 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1266                          *root, u64 bytenr, u64 num_bytes,
1267                          u64 root_objectid, u64 ref_generation,
1268                          u64 owner_objectid, u64 owner_offset, int pin,
1269                          int mark_free)
1270 {
1271         struct btrfs_path *path;
1272         struct btrfs_key key;
1273         struct btrfs_fs_info *info = root->fs_info;
1274         struct btrfs_extent_ops *ops = info->extent_ops;
1275         struct btrfs_root *extent_root = info->extent_root;
1276         struct extent_buffer *leaf;
1277         int ret;
1278         int extent_slot = 0;
1279         int found_extent = 0;
1280         int num_to_del = 1;
1281         struct btrfs_extent_item *ei;
1282         u32 refs;
1283
1284         key.objectid = bytenr;
1285         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1286         key.offset = num_bytes;
1287
1288         path = btrfs_alloc_path();
1289         if (!path)
1290                 return -ENOMEM;
1291
1292         ret = lookup_extent_backref(trans, extent_root, path,
1293                                     bytenr, root_objectid,
1294                                     ref_generation,
1295                                     owner_objectid, owner_offset, 1);
1296         if (ret == 0) {
1297                 struct btrfs_key found_key;
1298                 extent_slot = path->slots[0];
1299                 while(extent_slot > 0) {
1300                         extent_slot--;
1301                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1302                                               extent_slot);
1303                         if (found_key.objectid != bytenr)
1304                                 break;
1305                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1306                             found_key.offset == num_bytes) {
1307                                 found_extent = 1;
1308                                 break;
1309                         }
1310                         if (path->slots[0] - extent_slot > 5)
1311                                 break;
1312                 }
1313                 if (!found_extent)
1314                         ret = btrfs_del_item(trans, extent_root, path);
1315         } else {
1316                 btrfs_print_leaf(extent_root, path->nodes[0]);
1317                 WARN_ON(1);
1318                 printk("Unable to find ref byte nr %Lu root %Lu "
1319                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1320                        root_objectid, ref_generation, owner_objectid,
1321                        owner_offset);
1322         }
1323         if (!found_extent) {
1324                 btrfs_release_path(extent_root, path);
1325                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1326                 if (ret < 0)
1327                         return ret;
1328                 BUG_ON(ret);
1329                 extent_slot = path->slots[0];
1330         }
1331
1332         leaf = path->nodes[0];
1333         ei = btrfs_item_ptr(leaf, extent_slot,
1334                             struct btrfs_extent_item);
1335         refs = btrfs_extent_refs(leaf, ei);
1336         BUG_ON(refs == 0);
1337         refs -= 1;
1338         btrfs_set_extent_refs(leaf, ei, refs);
1339
1340         btrfs_mark_buffer_dirty(leaf);
1341
1342         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1343                 /* if the back ref and the extent are next to each other
1344                  * they get deleted below in one shot
1345                  */
1346                 path->slots[0] = extent_slot;
1347                 num_to_del = 2;
1348         } else if (found_extent) {
1349                 /* otherwise delete the extent back ref */
1350                 ret = btrfs_del_item(trans, extent_root, path);
1351                 BUG_ON(ret);
1352                 /* if refs are 0, we need to setup the path for deletion */
1353                 if (refs == 0) {
1354                         btrfs_release_path(extent_root, path);
1355                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1356                                                 -1, 1);
1357                         if (ret < 0)
1358                                 return ret;
1359                         BUG_ON(ret);
1360                 }
1361         }
1362
1363         if (refs == 0) {
1364                 u64 super_used;
1365                 u64 root_used;
1366
1367                 if (pin) {
1368                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1369                         if (ret > 0)
1370                                 mark_free = 1;
1371                         BUG_ON(ret < 0);
1372                 }
1373
1374                 /* block accounting for super block */
1375                 super_used = btrfs_super_bytes_used(&info->super_copy);
1376                 btrfs_set_super_bytes_used(&info->super_copy,
1377                                            super_used - num_bytes);
1378
1379                 /* block accounting for root item */
1380                 root_used = btrfs_root_used(&root->root_item);
1381                 btrfs_set_root_used(&root->root_item,
1382                                            root_used - num_bytes);
1383                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1384                                       num_to_del);
1385                 if (ret)
1386                         return ret;
1387
1388                 if (ops && ops->free_extent)
1389                         ops->free_extent(root, bytenr, num_bytes);
1390
1391                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1392                                          mark_free);
1393                 BUG_ON(ret);
1394         }
1395         btrfs_free_path(path);
1396         finish_current_insert(trans, extent_root);
1397         return ret;
1398 }
1399
1400 /*
1401  * find all the blocks marked as pending in the radix tree and remove
1402  * them from the extent map
1403  */
1404 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1405                                btrfs_root *extent_root)
1406 {
1407         int ret;
1408         int err = 0;
1409         u64 start;
1410         u64 end;
1411         struct extent_io_tree *pending_del;
1412         struct extent_io_tree *pinned_extents;
1413
1414         pending_del = &extent_root->fs_info->pending_del;
1415         pinned_extents = &extent_root->fs_info->pinned_extents;
1416
1417         while(1) {
1418                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1419                                             EXTENT_LOCKED);
1420                 if (ret)
1421                         break;
1422                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1423                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1424                                   GFP_NOFS);
1425                 ret = __free_extent(trans, extent_root,
1426                                      start, end + 1 - start,
1427                                      extent_root->root_key.objectid,
1428                                      0, 0, 0, 0, 0);
1429                 if (ret)
1430                         err = ret;
1431         }
1432         return err;
1433 }
1434
1435 /*
1436  * remove an extent from the root, returns 0 on success
1437  */
1438 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1439                       *root, u64 bytenr, u64 num_bytes,
1440                       u64 root_objectid, u64 ref_generation,
1441                       u64 owner_objectid, u64 owner_offset, int pin)
1442 {
1443         struct btrfs_root *extent_root = root->fs_info->extent_root;
1444         int pending_ret;
1445         int ret;
1446
1447         WARN_ON(num_bytes < root->sectorsize);
1448         if (!root->ref_cows)
1449                 ref_generation = 0;
1450
1451         if (root == extent_root) {
1452                 pin_down_bytes(root, bytenr, num_bytes, 1);
1453                 return 0;
1454         }
1455         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1456                             ref_generation, owner_objectid, owner_offset,
1457                             pin, pin == 0);
1458         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1459         return ret ? ret : pending_ret;
1460 }
1461
1462 static u64 stripe_align(struct btrfs_root *root, u64 val)
1463 {
1464         u64 mask = ((u64)root->stripesize - 1);
1465         u64 ret = (val + mask) & ~mask;
1466         return ret;
1467 }
1468
1469 /*
1470  * walks the btree of allocated extents and find a hole of a given size.
1471  * The key ins is changed to record the hole:
1472  * ins->objectid == block start
1473  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1474  * ins->offset == number of blocks
1475  * Any available blocks before search_start are skipped.
1476  */
1477 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1478                                      struct btrfs_root *orig_root,
1479                                      u64 num_bytes, u64 empty_size,
1480                                      u64 search_start, u64 search_end,
1481                                      u64 hint_byte, struct btrfs_key *ins,
1482                                      u64 exclude_start, u64 exclude_nr,
1483                                      int data)
1484 {
1485         int ret;
1486         u64 orig_search_start = search_start;
1487         struct btrfs_root * root = orig_root->fs_info->extent_root;
1488         struct btrfs_fs_info *info = root->fs_info;
1489         u64 total_needed = num_bytes;
1490         struct btrfs_block_group_cache *block_group;
1491         int full_scan = 0;
1492         int wrapped = 0;
1493
1494         WARN_ON(num_bytes < root->sectorsize);
1495         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1496
1497         if (search_end == (u64)-1)
1498                 search_end = btrfs_super_total_bytes(&info->super_copy);
1499
1500         if (hint_byte) {
1501                 block_group = btrfs_lookup_block_group(info, hint_byte);
1502                 if (!block_group)
1503                         hint_byte = search_start;
1504                 block_group = btrfs_find_block_group(root, block_group,
1505                                                      hint_byte, data, 1);
1506         } else {
1507                 block_group = btrfs_find_block_group(root,
1508                                                      trans->block_group,
1509                                                      search_start, data, 1);
1510         }
1511
1512         total_needed += empty_size;
1513
1514 check_failed:
1515         if (!block_group) {
1516                 block_group = btrfs_lookup_block_group(info, search_start);
1517                 if (!block_group)
1518                         block_group = btrfs_lookup_block_group(info,
1519                                                        orig_search_start);
1520         }
1521         ret = find_search_start(root, &block_group, &search_start,
1522                                 total_needed, data);
1523         if (ret)
1524                 goto error;
1525
1526         search_start = stripe_align(root, search_start);
1527         ins->objectid = search_start;
1528         ins->offset = num_bytes;
1529
1530         if (ins->objectid + num_bytes >= search_end)
1531                 goto enospc;
1532
1533         if (ins->objectid + num_bytes >
1534             block_group->key.objectid + block_group->key.offset) {
1535                 search_start = block_group->key.objectid +
1536                         block_group->key.offset;
1537                 goto new_group;
1538         }
1539
1540         if (test_range_bit(&info->extent_ins, ins->objectid,
1541                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1542                 search_start = ins->objectid + num_bytes;
1543                 goto new_group;
1544         }
1545
1546         if (test_range_bit(&info->pinned_extents, ins->objectid,
1547                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1548                 search_start = ins->objectid + num_bytes;
1549                 goto new_group;
1550         }
1551
1552         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1553             ins->objectid < exclude_start + exclude_nr)) {
1554                 search_start = exclude_start + exclude_nr;
1555                 goto new_group;
1556         }
1557
1558         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1559                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1560                 if (block_group)
1561                         trans->block_group = block_group;
1562         }
1563         ins->offset = num_bytes;
1564         return 0;
1565
1566 new_group:
1567         if (search_start + num_bytes >= search_end) {
1568 enospc:
1569                 search_start = orig_search_start;
1570                 if (full_scan) {
1571                         ret = -ENOSPC;
1572                         goto error;
1573                 }
1574                 if (wrapped) {
1575                         if (!full_scan)
1576                                 total_needed -= empty_size;
1577                         full_scan = 1;
1578                 } else
1579                         wrapped = 1;
1580         }
1581         block_group = btrfs_lookup_block_group(info, search_start);
1582         cond_resched();
1583         block_group = btrfs_find_block_group(root, block_group,
1584                                              search_start, data, 0);
1585         goto check_failed;
1586
1587 error:
1588         return ret;
1589 }
1590 /*
1591  * finds a free extent and does all the dirty work required for allocation
1592  * returns the key for the extent through ins, and a tree buffer for
1593  * the first block of the extent through buf.
1594  *
1595  * returns 0 if everything worked, non-zero otherwise.
1596  */
1597 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1598                        struct btrfs_root *root,
1599                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1600                        u64 owner, u64 owner_offset,
1601                        u64 empty_size, u64 hint_byte,
1602                        u64 search_end, struct btrfs_key *ins, int data)
1603 {
1604         int ret;
1605         int pending_ret;
1606         u64 super_used, root_used;
1607         u64 search_start = 0;
1608         struct btrfs_fs_info *info = root->fs_info;
1609         struct btrfs_extent_ops *ops = info->extent_ops;
1610         u32 sizes[2];
1611         struct btrfs_root *extent_root = info->extent_root;
1612         struct btrfs_path *path;
1613         struct btrfs_extent_item *extent_item;
1614         struct btrfs_extent_ref *ref;
1615         struct btrfs_key keys[2];
1616         int extra_alloc_flags = 0;
1617
1618         if (btrfs_super_num_devices(&info->super_copy) > 1)
1619                 extra_alloc_flags = BTRFS_BLOCK_GROUP_RAID0;
1620
1621         if (data) {
1622                 data = BTRFS_BLOCK_GROUP_DATA | extra_alloc_flags;
1623         } else if (root == root->fs_info->chunk_root ||
1624                    info->force_system_allocs) {
1625                 data = BTRFS_BLOCK_GROUP_SYSTEM;
1626         } else {
1627                 data = BTRFS_BLOCK_GROUP_METADATA | extra_alloc_flags;
1628         }
1629
1630         if (root->ref_cows) {
1631                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1632                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1633                                              num_bytes,
1634                                              BTRFS_BLOCK_GROUP_METADATA |
1635                                              extra_alloc_flags);
1636                         BUG_ON(ret);
1637                 }
1638                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1639                                      num_bytes + 2 * 1024 * 1024, data);
1640                 BUG_ON(ret);
1641         }
1642
1643         WARN_ON(num_bytes < root->sectorsize);
1644         if (ops && ops->alloc_extent) {
1645                 ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
1646         } else {
1647                 ret = find_free_extent(trans, root, num_bytes, empty_size,
1648                                         search_start, search_end, hint_byte,
1649                                         ins, trans->alloc_exclude_start,
1650                                         trans->alloc_exclude_nr, data);
1651         }
1652         BUG_ON(ret);
1653         if (ret)
1654                 return ret;
1655
1656         /* block accounting for super block */
1657         super_used = btrfs_super_bytes_used(&info->super_copy);
1658         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1659
1660         /* block accounting for root item */
1661         root_used = btrfs_root_used(&root->root_item);
1662         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1663
1664         clear_extent_dirty(&root->fs_info->free_space_cache,
1665                            ins->objectid, ins->objectid + ins->offset - 1,
1666                            GFP_NOFS);
1667
1668         if (root == extent_root) {
1669                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1670                                 ins->objectid + ins->offset - 1,
1671                                 EXTENT_LOCKED, GFP_NOFS);
1672                 goto update_block;
1673         }
1674
1675         WARN_ON(trans->alloc_exclude_nr);
1676         trans->alloc_exclude_start = ins->objectid;
1677         trans->alloc_exclude_nr = ins->offset;
1678
1679         memcpy(&keys[0], ins, sizeof(*ins));
1680         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1681                                          owner, owner_offset);
1682         keys[1].objectid = ins->objectid;
1683         keys[1].type = BTRFS_EXTENT_REF_KEY;
1684         sizes[0] = sizeof(*extent_item);
1685         sizes[1] = sizeof(*ref);
1686
1687         path = btrfs_alloc_path();
1688         BUG_ON(!path);
1689
1690         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1691                                        sizes, 2);
1692
1693         BUG_ON(ret);
1694         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1695                                      struct btrfs_extent_item);
1696         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1697         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1698                              struct btrfs_extent_ref);
1699
1700         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1701         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1702         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1703         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1704
1705         btrfs_mark_buffer_dirty(path->nodes[0]);
1706
1707         trans->alloc_exclude_start = 0;
1708         trans->alloc_exclude_nr = 0;
1709         btrfs_free_path(path);
1710         finish_current_insert(trans, extent_root);
1711         pending_ret = del_pending_extents(trans, extent_root);
1712
1713         if (ret) {
1714                 return ret;
1715         }
1716         if (pending_ret) {
1717                 return pending_ret;
1718         }
1719
1720 update_block:
1721         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1722         if (ret) {
1723                 printk("update block group failed for %Lu %Lu\n",
1724                        ins->objectid, ins->offset);
1725                 BUG();
1726         }
1727         return 0;
1728 }
1729
1730 /*
1731  * helper function to allocate a block for a given tree
1732  * returns the tree buffer or NULL.
1733  */
1734 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1735                                              struct btrfs_root *root,
1736                                              u32 blocksize,
1737                                              u64 root_objectid, u64 hint,
1738                                              u64 empty_size)
1739 {
1740         u64 ref_generation;
1741
1742         if (root->ref_cows)
1743                 ref_generation = trans->transid;
1744         else
1745                 ref_generation = 0;
1746
1747
1748         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1749                                         ref_generation, 0, 0, hint, empty_size);
1750 }
1751
1752 /*
1753  * helper function to allocate a block for a given tree
1754  * returns the tree buffer or NULL.
1755  */
1756 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1757                                              struct btrfs_root *root,
1758                                              u32 blocksize,
1759                                              u64 root_objectid,
1760                                              u64 ref_generation,
1761                                              u64 first_objectid,
1762                                              int level,
1763                                              u64 hint,
1764                                              u64 empty_size)
1765 {
1766         struct btrfs_key ins;
1767         int ret;
1768         struct extent_buffer *buf;
1769
1770         ret = btrfs_alloc_extent(trans, root, blocksize,
1771                                  root_objectid, ref_generation,
1772                                  level, first_objectid, empty_size, hint,
1773                                  (u64)-1, &ins, 0);
1774         if (ret) {
1775                 BUG_ON(ret > 0);
1776                 return ERR_PTR(ret);
1777         }
1778         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1779         if (!buf) {
1780                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1781                                   root->root_key.objectid, ref_generation,
1782                                   0, 0, 0);
1783                 BUG_ON(1);
1784                 return ERR_PTR(-ENOMEM);
1785         }
1786         btrfs_set_buffer_uptodate(buf);
1787         trans->blocks_used++;
1788         return buf;
1789 }
1790
1791 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1792                                   struct btrfs_root *root,
1793                                   struct extent_buffer *leaf)
1794 {
1795         u64 leaf_owner;
1796         u64 leaf_generation;
1797         struct btrfs_key key;
1798         struct btrfs_file_extent_item *fi;
1799         int i;
1800         int nritems;
1801         int ret;
1802
1803         BUG_ON(!btrfs_is_leaf(leaf));
1804         nritems = btrfs_header_nritems(leaf);
1805         leaf_owner = btrfs_header_owner(leaf);
1806         leaf_generation = btrfs_header_generation(leaf);
1807
1808         for (i = 0; i < nritems; i++) {
1809                 u64 disk_bytenr;
1810
1811                 btrfs_item_key_to_cpu(leaf, &key, i);
1812                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1813                         continue;
1814                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1815                 if (btrfs_file_extent_type(leaf, fi) ==
1816                     BTRFS_FILE_EXTENT_INLINE)
1817                         continue;
1818                 /*
1819                  * FIXME make sure to insert a trans record that
1820                  * repeats the snapshot del on crash
1821                  */
1822                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1823                 if (disk_bytenr == 0)
1824                         continue;
1825                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1826                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1827                                 leaf_owner, leaf_generation,
1828                                 key.objectid, key.offset, 0);
1829                 BUG_ON(ret);
1830         }
1831         return 0;
1832 }
1833
1834 static void noinline reada_walk_down(struct btrfs_root *root,
1835                                      struct extent_buffer *node,
1836                                      int slot)
1837 {
1838         u64 bytenr;
1839         u64 last = 0;
1840         u32 nritems;
1841         u32 refs;
1842         u32 blocksize;
1843         int ret;
1844         int i;
1845         int level;
1846         int skipped = 0;
1847
1848         nritems = btrfs_header_nritems(node);
1849         level = btrfs_header_level(node);
1850         if (level)
1851                 return;
1852
1853         for (i = slot; i < nritems && skipped < 32; i++) {
1854                 bytenr = btrfs_node_blockptr(node, i);
1855                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1856                              (last > bytenr && last - bytenr > 32 * 1024))) {
1857                         skipped++;
1858                         continue;
1859                 }
1860                 blocksize = btrfs_level_size(root, level - 1);
1861                 if (i != slot) {
1862                         ret = lookup_extent_ref(NULL, root, bytenr,
1863                                                 blocksize, &refs);
1864                         BUG_ON(ret);
1865                         if (refs != 1) {
1866                                 skipped++;
1867                                 continue;
1868                         }
1869                 }
1870                 mutex_unlock(&root->fs_info->fs_mutex);
1871                 ret = readahead_tree_block(root, bytenr, blocksize);
1872                 last = bytenr + blocksize;
1873                 cond_resched();
1874                 mutex_lock(&root->fs_info->fs_mutex);
1875                 if (ret)
1876                         break;
1877         }
1878 }
1879
1880 /*
1881  * helper function for drop_snapshot, this walks down the tree dropping ref
1882  * counts as it goes.
1883  */
1884 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1885                                    struct btrfs_root *root,
1886                                    struct btrfs_path *path, int *level)
1887 {
1888         u64 root_owner;
1889         u64 root_gen;
1890         u64 bytenr;
1891         struct extent_buffer *next;
1892         struct extent_buffer *cur;
1893         struct extent_buffer *parent;
1894         u32 blocksize;
1895         int ret;
1896         u32 refs;
1897
1898         WARN_ON(*level < 0);
1899         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1900         ret = lookup_extent_ref(trans, root,
1901                                 path->nodes[*level]->start,
1902                                 path->nodes[*level]->len, &refs);
1903         BUG_ON(ret);
1904         if (refs > 1)
1905                 goto out;
1906
1907         /*
1908          * walk down to the last node level and free all the leaves
1909          */
1910         while(*level >= 0) {
1911                 WARN_ON(*level < 0);
1912                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1913                 cur = path->nodes[*level];
1914
1915                 if (btrfs_header_level(cur) != *level)
1916                         WARN_ON(1);
1917
1918                 if (path->slots[*level] >=
1919                     btrfs_header_nritems(cur))
1920                         break;
1921                 if (*level == 0) {
1922                         ret = drop_leaf_ref(trans, root, cur);
1923                         BUG_ON(ret);
1924                         break;
1925                 }
1926                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1927                 blocksize = btrfs_level_size(root, *level - 1);
1928                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1929                 BUG_ON(ret);
1930                 if (refs != 1) {
1931                         parent = path->nodes[*level];
1932                         root_owner = btrfs_header_owner(parent);
1933                         root_gen = btrfs_header_generation(parent);
1934                         path->slots[*level]++;
1935                         ret = btrfs_free_extent(trans, root, bytenr,
1936                                                 blocksize, root_owner,
1937                                                 root_gen, 0, 0, 1);
1938                         BUG_ON(ret);
1939                         continue;
1940                 }
1941                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1942                 if (!next || !btrfs_buffer_uptodate(next)) {
1943                         free_extent_buffer(next);
1944                         reada_walk_down(root, cur, path->slots[*level]);
1945                         mutex_unlock(&root->fs_info->fs_mutex);
1946                         next = read_tree_block(root, bytenr, blocksize);
1947                         mutex_lock(&root->fs_info->fs_mutex);
1948
1949                         /* we dropped the lock, check one more time */
1950                         ret = lookup_extent_ref(trans, root, bytenr,
1951                                                 blocksize, &refs);
1952                         BUG_ON(ret);
1953                         if (refs != 1) {
1954                                 parent = path->nodes[*level];
1955                                 root_owner = btrfs_header_owner(parent);
1956                                 root_gen = btrfs_header_generation(parent);
1957
1958                                 path->slots[*level]++;
1959                                 free_extent_buffer(next);
1960                                 ret = btrfs_free_extent(trans, root, bytenr,
1961                                                         blocksize,
1962                                                         root_owner,
1963                                                         root_gen, 0, 0, 1);
1964                                 BUG_ON(ret);
1965                                 continue;
1966                         }
1967                 }
1968                 WARN_ON(*level <= 0);
1969                 if (path->nodes[*level-1])
1970                         free_extent_buffer(path->nodes[*level-1]);
1971                 path->nodes[*level-1] = next;
1972                 *level = btrfs_header_level(next);
1973                 path->slots[*level] = 0;
1974         }
1975 out:
1976         WARN_ON(*level < 0);
1977         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1978
1979         if (path->nodes[*level] == root->node) {
1980                 root_owner = root->root_key.objectid;
1981                 parent = path->nodes[*level];
1982         } else {
1983                 parent = path->nodes[*level + 1];
1984                 root_owner = btrfs_header_owner(parent);
1985         }
1986
1987         root_gen = btrfs_header_generation(parent);
1988         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1989                                 path->nodes[*level]->len,
1990                                 root_owner, root_gen, 0, 0, 1);
1991         free_extent_buffer(path->nodes[*level]);
1992         path->nodes[*level] = NULL;
1993         *level += 1;
1994         BUG_ON(ret);
1995         return 0;
1996 }
1997
1998 /*
1999  * helper for dropping snapshots.  This walks back up the tree in the path
2000  * to find the first node higher up where we haven't yet gone through
2001  * all the slots
2002  */
2003 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2004                                  struct btrfs_root *root,
2005                                  struct btrfs_path *path, int *level)
2006 {
2007         u64 root_owner;
2008         u64 root_gen;
2009         struct btrfs_root_item *root_item = &root->root_item;
2010         int i;
2011         int slot;
2012         int ret;
2013
2014         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2015                 slot = path->slots[i];
2016                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2017                         struct extent_buffer *node;
2018                         struct btrfs_disk_key disk_key;
2019                         node = path->nodes[i];
2020                         path->slots[i]++;
2021                         *level = i;
2022                         WARN_ON(*level == 0);
2023                         btrfs_node_key(node, &disk_key, path->slots[i]);
2024                         memcpy(&root_item->drop_progress,
2025                                &disk_key, sizeof(disk_key));
2026                         root_item->drop_level = i;
2027                         return 0;
2028                 } else {
2029                         if (path->nodes[*level] == root->node) {
2030                                 root_owner = root->root_key.objectid;
2031                                 root_gen =
2032                                    btrfs_header_generation(path->nodes[*level]);
2033                         } else {
2034                                 struct extent_buffer *node;
2035                                 node = path->nodes[*level + 1];
2036                                 root_owner = btrfs_header_owner(node);
2037                                 root_gen = btrfs_header_generation(node);
2038                         }
2039                         ret = btrfs_free_extent(trans, root,
2040                                                 path->nodes[*level]->start,
2041                                                 path->nodes[*level]->len,
2042                                                 root_owner, root_gen, 0, 0, 1);
2043                         BUG_ON(ret);
2044                         free_extent_buffer(path->nodes[*level]);
2045                         path->nodes[*level] = NULL;
2046                         *level = i + 1;
2047                 }
2048         }
2049         return 1;
2050 }
2051
2052 /*
2053  * drop the reference count on the tree rooted at 'snap'.  This traverses
2054  * the tree freeing any blocks that have a ref count of zero after being
2055  * decremented.
2056  */
2057 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2058                         *root)
2059 {
2060         int ret = 0;
2061         int wret;
2062         int level;
2063         struct btrfs_path *path;
2064         int i;
2065         int orig_level;
2066         struct btrfs_root_item *root_item = &root->root_item;
2067
2068         path = btrfs_alloc_path();
2069         BUG_ON(!path);
2070
2071         level = btrfs_header_level(root->node);
2072         orig_level = level;
2073         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2074                 path->nodes[level] = root->node;
2075                 extent_buffer_get(root->node);
2076                 path->slots[level] = 0;
2077         } else {
2078                 struct btrfs_key key;
2079                 struct btrfs_disk_key found_key;
2080                 struct extent_buffer *node;
2081
2082                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2083                 level = root_item->drop_level;
2084                 path->lowest_level = level;
2085                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2086                 if (wret < 0) {
2087                         ret = wret;
2088                         goto out;
2089                 }
2090                 node = path->nodes[level];
2091                 btrfs_node_key(node, &found_key, path->slots[level]);
2092                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2093                                sizeof(found_key)));
2094         }
2095         while(1) {
2096                 wret = walk_down_tree(trans, root, path, &level);
2097                 if (wret < 0)
2098                         ret = wret;
2099                 if (wret != 0)
2100                         break;
2101
2102                 wret = walk_up_tree(trans, root, path, &level);
2103                 if (wret < 0)
2104                         ret = wret;
2105                 if (wret != 0)
2106                         break;
2107                 /*
2108                 ret = -EAGAIN;
2109                 break;
2110                 */
2111         }
2112         for (i = 0; i <= orig_level; i++) {
2113                 if (path->nodes[i]) {
2114                         free_extent_buffer(path->nodes[i]);
2115                         path->nodes[i] = NULL;
2116                 }
2117         }
2118 out:
2119         btrfs_free_path(path);
2120         return ret;
2121 }
2122
2123 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2124 {
2125         u64 start;
2126         u64 end;
2127         u64 ptr;
2128         int ret;
2129         while(1) {
2130                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2131                                             &start, &end, (unsigned int)-1);
2132                 if (ret)
2133                         break;
2134                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2135                 if (!ret)
2136                         kfree((void *)(unsigned long)ptr);
2137                 clear_extent_bits(&info->block_group_cache, start,
2138                                   end, (unsigned int)-1, GFP_NOFS);
2139         }
2140         while(1) {
2141                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2142                                             &start, &end, EXTENT_DIRTY);
2143                 if (ret)
2144                         break;
2145                 clear_extent_dirty(&info->free_space_cache, start,
2146                                    end, GFP_NOFS);
2147         }
2148         return 0;
2149 }
2150
2151 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2152                            struct btrfs_key *key)
2153 {
2154         int ret;
2155         struct btrfs_key found_key;
2156         struct extent_buffer *leaf;
2157         int slot;
2158
2159         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2160         if (ret < 0)
2161                 return ret;
2162         while(1) {
2163                 slot = path->slots[0];
2164                 leaf = path->nodes[0];
2165                 if (slot >= btrfs_header_nritems(leaf)) {
2166                         ret = btrfs_next_leaf(root, path);
2167                         if (ret == 0)
2168                                 continue;
2169                         if (ret < 0)
2170                                 goto error;
2171                         break;
2172                 }
2173                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2174
2175                 if (found_key.objectid >= key->objectid &&
2176                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2177                         return 0;
2178                 path->slots[0]++;
2179         }
2180         ret = -ENOENT;
2181 error:
2182         return ret;
2183 }
2184
2185 int btrfs_read_block_groups(struct btrfs_root *root)
2186 {
2187         struct btrfs_path *path;
2188         int ret;
2189         int bit;
2190         struct btrfs_block_group_cache *cache;
2191         struct btrfs_fs_info *info = root->fs_info;
2192         struct btrfs_space_info *space_info;
2193         struct extent_io_tree *block_group_cache;
2194         struct btrfs_key key;
2195         struct btrfs_key found_key;
2196         struct extent_buffer *leaf;
2197
2198         block_group_cache = &info->block_group_cache;
2199
2200         root = info->extent_root;
2201         key.objectid = 0;
2202         key.offset = 0;
2203         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2204         path = btrfs_alloc_path();
2205         if (!path)
2206                 return -ENOMEM;
2207
2208         while(1) {
2209                 ret = find_first_block_group(root, path, &key);
2210                 if (ret > 0) {
2211                         ret = 0;
2212                         goto error;
2213                 }
2214                 if (ret != 0) {
2215                         goto error;
2216                 }
2217                 leaf = path->nodes[0];
2218                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2219                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2220                 if (!cache) {
2221                         ret = -ENOMEM;
2222                         break;
2223                 }
2224
2225                 read_extent_buffer(leaf, &cache->item,
2226                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2227                                    sizeof(cache->item));
2228                 memcpy(&cache->key, &found_key, sizeof(found_key));
2229                 cache->cached = 0;
2230                 cache->pinned = 0;
2231                 key.objectid = found_key.objectid + found_key.offset;
2232                 btrfs_release_path(root, path);
2233                 cache->flags = btrfs_block_group_flags(&cache->item);
2234                 bit = 0;
2235                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2236                         bit = BLOCK_GROUP_DATA;
2237                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2238                         bit = BLOCK_GROUP_SYSTEM;
2239                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2240                         bit = BLOCK_GROUP_METADATA;
2241                 }
2242
2243                 ret = update_space_info(info, cache->flags, found_key.offset,
2244                                         btrfs_block_group_used(&cache->item),
2245                                         &space_info);
2246                 BUG_ON(ret);
2247                 cache->space_info = space_info;
2248
2249                 /* use EXTENT_LOCKED to prevent merging */
2250                 set_extent_bits(block_group_cache, found_key.objectid,
2251                                 found_key.objectid + found_key.offset - 1,
2252                                 bit | EXTENT_LOCKED, GFP_NOFS);
2253                 set_state_private(block_group_cache, found_key.objectid,
2254                                   (unsigned long)cache);
2255
2256                 if (key.objectid >=
2257                     btrfs_super_total_bytes(&info->super_copy))
2258                         break;
2259         }
2260         ret = 0;
2261 error:
2262         btrfs_free_path(path);
2263         return ret;
2264 }
2265
2266 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2267                            struct btrfs_root *root, u64 bytes_used,
2268                            u64 type, u64 chunk_tree, u64 chunk_objectid,
2269                            u64 size)
2270 {
2271         int ret;
2272         int bit = 0;
2273         struct btrfs_root *extent_root;
2274         struct btrfs_block_group_cache *cache;
2275         struct extent_io_tree *block_group_cache;
2276
2277         extent_root = root->fs_info->extent_root;
2278         block_group_cache = &root->fs_info->block_group_cache;
2279
2280         cache = kmalloc(sizeof(*cache), GFP_NOFS);
2281         BUG_ON(!cache);
2282         cache->key.objectid = chunk_objectid;
2283         cache->key.offset = size;
2284         cache->cached = 0;
2285         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2286         memset(&cache->item, 0, sizeof(cache->item));
2287         btrfs_set_block_group_used(&cache->item, bytes_used);
2288         btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
2289         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2290         cache->flags = type;
2291         btrfs_set_block_group_flags(&cache->item, type);
2292
2293         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2294                                 &cache->space_info);
2295         BUG_ON(ret);
2296
2297         if (type & BTRFS_BLOCK_GROUP_DATA) {
2298                 bit = BLOCK_GROUP_DATA;
2299         } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2300                 bit = BLOCK_GROUP_SYSTEM;
2301         } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2302                 bit = BLOCK_GROUP_METADATA;
2303         }
2304         set_extent_bits(block_group_cache, chunk_objectid,
2305                         chunk_objectid + size - 1,
2306                         bit | EXTENT_LOCKED, GFP_NOFS);
2307         set_state_private(block_group_cache, chunk_objectid,
2308                           (unsigned long)cache);
2309
2310         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2311                                 sizeof(cache->item));
2312         BUG_ON(ret);
2313
2314         finish_current_insert(trans, extent_root);
2315         ret = del_pending_extents(trans, extent_root);
2316         BUG_ON(ret);
2317         return 0;
2318 }
2319
2320 u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation,
2321                           u64 owner, u64 owner_offset)
2322 {
2323         return hash_extent_ref(root_objectid, ref_generation,
2324                                owner, owner_offset);
2325 }
2326
2327 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
2328                              struct btrfs_root *root,
2329                              u64 bytenr, u64 num_bytes, int alloc,
2330                              int mark_free)
2331 {
2332         return update_block_group(trans, root, bytenr, num_bytes,
2333                                   alloc, mark_free);
2334 }