Add chunk uuids and update multi-device back references
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "crc32c.h"
28 #include "volumes.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40
41 static int cache_block_group(struct btrfs_root *root,
42                              struct btrfs_block_group_cache *block_group)
43 {
44         struct btrfs_path *path;
45         int ret;
46         struct btrfs_key key;
47         struct extent_buffer *leaf;
48         struct extent_io_tree *free_space_cache;
49         int slot;
50         u64 last = 0;
51         u64 hole_size;
52         u64 first_free;
53         int found = 0;
54
55         if (!block_group)
56                 return 0;
57
58         root = root->fs_info->extent_root;
59         free_space_cache = &root->fs_info->free_space_cache;
60
61         if (block_group->cached)
62                 return 0;
63
64         path = btrfs_alloc_path();
65         if (!path)
66                 return -ENOMEM;
67
68         path->reada = 2;
69         first_free = block_group->key.objectid;
70         key.objectid = block_group->key.objectid;
71         key.offset = 0;
72         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
73         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
74         if (ret < 0)
75                 return ret;
76         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
77         if (ret < 0)
78                 return ret;
79         if (ret == 0) {
80                 leaf = path->nodes[0];
81                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
82                 if (key.objectid + key.offset > first_free)
83                         first_free = key.objectid + key.offset;
84         }
85         while(1) {
86                 leaf = path->nodes[0];
87                 slot = path->slots[0];
88                 if (slot >= btrfs_header_nritems(leaf)) {
89                         ret = btrfs_next_leaf(root, path);
90                         if (ret < 0)
91                                 goto err;
92                         if (ret == 0) {
93                                 continue;
94                         } else {
95                                 break;
96                         }
97                 }
98                 btrfs_item_key_to_cpu(leaf, &key, slot);
99                 if (key.objectid < block_group->key.objectid) {
100                         goto next;
101                 }
102                 if (key.objectid >= block_group->key.objectid +
103                     block_group->key.offset) {
104                         break;
105                 }
106
107                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
108                         if (!found) {
109                                 last = first_free;
110                                 found = 1;
111                         }
112                         if (key.objectid > last) {
113                                 hole_size = key.objectid - last;
114                                 set_extent_dirty(free_space_cache, last,
115                                                  last + hole_size - 1,
116                                                  GFP_NOFS);
117                         }
118                         last = key.objectid + key.offset;
119                 }
120 next:
121                 path->slots[0]++;
122         }
123
124         if (!found)
125                 last = first_free;
126         if (block_group->key.objectid +
127             block_group->key.offset > last) {
128                 hole_size = block_group->key.objectid +
129                         block_group->key.offset - last;
130                 set_extent_dirty(free_space_cache, last,
131                                  last + hole_size - 1, GFP_NOFS);
132         }
133         block_group->cached = 1;
134 err:
135         btrfs_free_path(path);
136         return 0;
137 }
138
139 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
140                                                          btrfs_fs_info *info,
141                                                          u64 bytenr)
142 {
143         struct extent_io_tree *block_group_cache;
144         struct btrfs_block_group_cache *block_group = NULL;
145         u64 ptr;
146         u64 start;
147         u64 end;
148         int ret;
149
150         block_group_cache = &info->block_group_cache;
151         ret = find_first_extent_bit(block_group_cache,
152                                     bytenr, &start, &end,
153                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
154                                     BLOCK_GROUP_SYSTEM);
155         if (ret) {
156                 return NULL;
157         }
158         ret = get_state_private(block_group_cache, start, &ptr);
159         if (ret)
160                 return NULL;
161
162         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
163         if (block_group->key.objectid <= bytenr && bytenr <
164             block_group->key.objectid + block_group->key.offset)
165                 return block_group;
166         return NULL;
167 }
168
169 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
170 {
171         return (cache->flags & bits) == bits;
172 }
173
174 static int noinline find_search_start(struct btrfs_root *root,
175                               struct btrfs_block_group_cache **cache_ret,
176                               u64 *start_ret, int num, int data)
177 {
178         int ret;
179         struct btrfs_block_group_cache *cache = *cache_ret;
180         u64 last;
181         u64 start = 0;
182         u64 end = 0;
183         u64 cache_miss = 0;
184         u64 search_start = *start_ret;
185         int wrapped = 0;
186
187         if (!cache) {
188                 goto out;
189         }
190 again:
191         ret = cache_block_group(root, cache);
192         if (ret)
193                 goto out;
194
195         last = max(search_start, cache->key.objectid);
196         if (!block_group_bits(cache, data)) {
197                 goto new_group;
198         }
199
200         while(1) {
201                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
202                                             last, &start, &end, EXTENT_DIRTY);
203                 if (ret) {
204                         if (!cache_miss)
205                                 cache_miss = last;
206                         goto new_group;
207                 }
208
209                 start = max(last, start);
210                 last = end + 1;
211                 if (last - start < num) {
212                         if (last == cache->key.objectid + cache->key.offset)
213                                 cache_miss = start;
214                         continue;
215                 }
216                 if (start + num > cache->key.objectid + cache->key.offset)
217                         goto new_group;
218                 *start_ret = start;
219                 return 0;
220         }
221 out:
222         cache = btrfs_lookup_block_group(root->fs_info, search_start);
223         if (!cache) {
224                 printk("Unable to find block group for %llu\n",
225                         (unsigned long long)search_start);
226                 WARN_ON(1);
227         }
228         return -ENOSPC;
229
230 new_group:
231         last = cache->key.objectid + cache->key.offset;
232 wrapped:
233         cache = btrfs_lookup_block_group(root->fs_info, last);
234         if (!cache) {
235 no_cache:
236                 if (!wrapped) {
237                         wrapped = 1;
238                         last = search_start;
239                         goto wrapped;
240                 }
241                 goto out;
242         }
243         if (cache_miss && !cache->cached) {
244                 cache_block_group(root, cache);
245                 last = cache_miss;
246                 cache = btrfs_lookup_block_group(root->fs_info, last);
247         }
248         cache = btrfs_find_block_group(root, cache, last, data, 0);
249         if (!cache)
250                 goto no_cache;
251         *cache_ret = cache;
252         cache_miss = 0;
253         goto again;
254 }
255
256 static u64 div_factor(u64 num, int factor)
257 {
258         if (factor == 10)
259                 return num;
260         num *= factor;
261         num /= 10;
262         return num;
263 }
264
265 static int block_group_state_bits(u64 flags)
266 {
267         int bits = 0;
268         if (flags & BTRFS_BLOCK_GROUP_DATA)
269                 bits |= BLOCK_GROUP_DATA;
270         if (flags & BTRFS_BLOCK_GROUP_METADATA)
271                 bits |= BLOCK_GROUP_METADATA;
272         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
273                 bits |= BLOCK_GROUP_SYSTEM;
274         return bits;
275 }
276
277 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
278                                                  struct btrfs_block_group_cache
279                                                  *hint, u64 search_start,
280                                                  int data, int owner)
281 {
282         struct btrfs_block_group_cache *cache;
283         struct extent_io_tree *block_group_cache;
284         struct btrfs_block_group_cache *found_group = NULL;
285         struct btrfs_fs_info *info = root->fs_info;
286         u64 used;
287         u64 last = 0;
288         u64 hint_last;
289         u64 start;
290         u64 end;
291         u64 free_check;
292         u64 ptr;
293         int bit;
294         int ret;
295         int full_search = 0;
296         int factor = 8;
297
298         block_group_cache = &info->block_group_cache;
299
300         if (!owner)
301                 factor = 8;
302
303         bit = block_group_state_bits(data);
304
305         if (search_start) {
306                 struct btrfs_block_group_cache *shint;
307                 shint = btrfs_lookup_block_group(info, search_start);
308                 if (shint && block_group_bits(shint, data)) {
309                         used = btrfs_block_group_used(&shint->item);
310                         if (used + shint->pinned <
311                             div_factor(shint->key.offset, factor)) {
312                                 return shint;
313                         }
314                 }
315         }
316         if (hint && block_group_bits(hint, data)) {
317                 used = btrfs_block_group_used(&hint->item);
318                 if (used + hint->pinned <
319                     div_factor(hint->key.offset, factor)) {
320                         return hint;
321                 }
322                 last = hint->key.objectid + hint->key.offset;
323                 hint_last = last;
324         } else {
325                 if (hint)
326                         hint_last = max(hint->key.objectid, search_start);
327                 else
328                         hint_last = search_start;
329
330                 last = hint_last;
331         }
332 again:
333         while(1) {
334                 ret = find_first_extent_bit(block_group_cache, last,
335                                             &start, &end, bit);
336                 if (ret)
337                         break;
338
339                 ret = get_state_private(block_group_cache, start, &ptr);
340                 if (ret)
341                         break;
342
343                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
344                 last = cache->key.objectid + cache->key.offset;
345                 used = btrfs_block_group_used(&cache->item);
346
347                 if (block_group_bits(cache, data)) {
348                         if (full_search)
349                                 free_check = cache->key.offset;
350                         else
351                                 free_check = div_factor(cache->key.offset,
352                                                         factor);
353
354                         if (used + cache->pinned < free_check) {
355                                 found_group = cache;
356                                 goto found;
357                         }
358                 }
359                 cond_resched();
360         }
361         if (!full_search) {
362                 last = search_start;
363                 full_search = 1;
364                 goto again;
365         }
366 found:
367         return found_group;
368 }
369
370 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
371                            u64 owner, u64 owner_offset)
372 {
373         u32 high_crc = ~(u32)0;
374         u32 low_crc = ~(u32)0;
375         __le64 lenum;
376
377         lenum = cpu_to_le64(root_objectid);
378         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
379         lenum = cpu_to_le64(ref_generation);
380         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
381         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
382                 lenum = cpu_to_le64(owner);
383                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
384                 lenum = cpu_to_le64(owner_offset);
385                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
386         }
387         return ((u64)high_crc << 32) | (u64)low_crc;
388 }
389
390 static int match_extent_ref(struct extent_buffer *leaf,
391                             struct btrfs_extent_ref *disk_ref,
392                             struct btrfs_extent_ref *cpu_ref)
393 {
394         int ret;
395         int len;
396
397         if (cpu_ref->objectid)
398                 len = sizeof(*cpu_ref);
399         else
400                 len = 2 * sizeof(u64);
401         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
402                                    len);
403         return ret == 0;
404 }
405
406 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
407                                           struct btrfs_root *root,
408                                           struct btrfs_path *path, u64 bytenr,
409                                           u64 root_objectid,
410                                           u64 ref_generation, u64 owner,
411                                           u64 owner_offset, int del)
412 {
413         u64 hash;
414         struct btrfs_key key;
415         struct btrfs_key found_key;
416         struct btrfs_extent_ref ref;
417         struct extent_buffer *leaf;
418         struct btrfs_extent_ref *disk_ref;
419         int ret;
420         int ret2;
421
422         btrfs_set_stack_ref_root(&ref, root_objectid);
423         btrfs_set_stack_ref_generation(&ref, ref_generation);
424         btrfs_set_stack_ref_objectid(&ref, owner);
425         btrfs_set_stack_ref_offset(&ref, owner_offset);
426
427         hash = hash_extent_ref(root_objectid, ref_generation, owner,
428                                owner_offset);
429         key.offset = hash;
430         key.objectid = bytenr;
431         key.type = BTRFS_EXTENT_REF_KEY;
432
433         while (1) {
434                 ret = btrfs_search_slot(trans, root, &key, path,
435                                         del ? -1 : 0, del);
436                 if (ret < 0)
437                         goto out;
438                 leaf = path->nodes[0];
439                 if (ret != 0) {
440                         u32 nritems = btrfs_header_nritems(leaf);
441                         if (path->slots[0] >= nritems) {
442                                 ret2 = btrfs_next_leaf(root, path);
443                                 if (ret2)
444                                         goto out;
445                                 leaf = path->nodes[0];
446                         }
447                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
448                         if (found_key.objectid != bytenr ||
449                             found_key.type != BTRFS_EXTENT_REF_KEY)
450                                 goto out;
451                         key.offset = found_key.offset;
452                         if (del) {
453                                 btrfs_release_path(root, path);
454                                 continue;
455                         }
456                 }
457                 disk_ref = btrfs_item_ptr(path->nodes[0],
458                                           path->slots[0],
459                                           struct btrfs_extent_ref);
460                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
461                         ret = 0;
462                         goto out;
463                 }
464                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
465                 key.offset = found_key.offset + 1;
466                 btrfs_release_path(root, path);
467         }
468 out:
469         return ret;
470 }
471
472 /*
473  * Back reference rules.  Back refs have three main goals:
474  *
475  * 1) differentiate between all holders of references to an extent so that
476  *    when a reference is dropped we can make sure it was a valid reference
477  *    before freeing the extent.
478  *
479  * 2) Provide enough information to quickly find the holders of an extent
480  *    if we notice a given block is corrupted or bad.
481  *
482  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
483  *    maintenance.  This is actually the same as #2, but with a slightly
484  *    different use case.
485  *
486  * File extents can be referenced by:
487  *
488  * - multiple snapshots, subvolumes, or different generations in one subvol
489  * - different files inside a single subvolume (in theory, not implemented yet)
490  * - different offsets inside a file (bookend extents in file.c)
491  *
492  * The extent ref structure has fields for:
493  *
494  * - Objectid of the subvolume root
495  * - Generation number of the tree holding the reference
496  * - objectid of the file holding the reference
497  * - offset in the file corresponding to the key holding the reference
498  *
499  * When a file extent is allocated the fields are filled in:
500  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
501  *
502  * When a leaf is cow'd new references are added for every file extent found
503  * in the leaf.  It looks the same as the create case, but trans->transid
504  * will be different when the block is cow'd.
505  *
506  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
507  *
508  * When a file extent is removed either during snapshot deletion or file
509  * truncation, the corresponding back reference is found
510  * by searching for:
511  *
512  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
513  *      inode objectid, offset in file)
514  *
515  * Btree extents can be referenced by:
516  *
517  * - Different subvolumes
518  * - Different generations of the same subvolume
519  *
520  * Storing sufficient information for a full reverse mapping of a btree
521  * block would require storing the lowest key of the block in the backref,
522  * and it would require updating that lowest key either before write out or
523  * every time it changed.  Instead, the objectid of the lowest key is stored
524  * along with the level of the tree block.  This provides a hint
525  * about where in the btree the block can be found.  Searches through the
526  * btree only need to look for a pointer to that block, so they stop one
527  * level higher than the level recorded in the backref.
528  *
529  * Some btrees do not do reference counting on their extents.  These
530  * include the extent tree and the tree of tree roots.  Backrefs for these
531  * trees always have a generation of zero.
532  *
533  * When a tree block is created, back references are inserted:
534  *
535  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
536  *
537  * When a tree block is cow'd in a reference counted root,
538  * new back references are added for all the blocks it points to.
539  * These are of the form (trans->transid will have increased since creation):
540  *
541  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
542  *
543  * Because the lowest_key_objectid and the level are just hints
544  * they are not used when backrefs are deleted.  When a backref is deleted:
545  *
546  * if backref was for a tree root:
547  *     root_objectid = root->root_key.objectid
548  * else
549  *     root_objectid = btrfs_header_owner(parent)
550  *
551  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
552  *
553  * Back Reference Key hashing:
554  *
555  * Back references have four fields, each 64 bits long.  Unfortunately,
556  * This is hashed into a single 64 bit number and placed into the key offset.
557  * The key objectid corresponds to the first byte in the extent, and the
558  * key type is set to BTRFS_EXTENT_REF_KEY
559  */
560 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
561                                  struct btrfs_root *root,
562                                  struct btrfs_path *path, u64 bytenr,
563                                  u64 root_objectid, u64 ref_generation,
564                                  u64 owner, u64 owner_offset)
565 {
566         u64 hash;
567         struct btrfs_key key;
568         struct btrfs_extent_ref ref;
569         struct btrfs_extent_ref *disk_ref;
570         int ret;
571
572         btrfs_set_stack_ref_root(&ref, root_objectid);
573         btrfs_set_stack_ref_generation(&ref, ref_generation);
574         btrfs_set_stack_ref_objectid(&ref, owner);
575         btrfs_set_stack_ref_offset(&ref, owner_offset);
576
577         hash = hash_extent_ref(root_objectid, ref_generation, owner,
578                                owner_offset);
579         key.offset = hash;
580         key.objectid = bytenr;
581         key.type = BTRFS_EXTENT_REF_KEY;
582
583         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
584         while (ret == -EEXIST) {
585                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
586                                           struct btrfs_extent_ref);
587                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
588                         goto out;
589                 key.offset++;
590                 btrfs_release_path(root, path);
591                 ret = btrfs_insert_empty_item(trans, root, path, &key,
592                                               sizeof(ref));
593         }
594         if (ret)
595                 goto out;
596         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
597                                   struct btrfs_extent_ref);
598         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
599                             sizeof(ref));
600         btrfs_mark_buffer_dirty(path->nodes[0]);
601 out:
602         btrfs_release_path(root, path);
603         return ret;
604 }
605
606 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
607                                 struct btrfs_root *root,
608                                 u64 bytenr, u64 num_bytes,
609                                 u64 root_objectid, u64 ref_generation,
610                                 u64 owner, u64 owner_offset)
611 {
612         struct btrfs_path *path;
613         int ret;
614         struct btrfs_key key;
615         struct extent_buffer *l;
616         struct btrfs_extent_item *item;
617         u32 refs;
618
619         WARN_ON(num_bytes < root->sectorsize);
620         path = btrfs_alloc_path();
621         if (!path)
622                 return -ENOMEM;
623
624         key.objectid = bytenr;
625         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
626         key.offset = num_bytes;
627         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
628                                 0, 1);
629         if (ret < 0)
630                 return ret;
631         if (ret != 0) {
632                 BUG();
633         }
634         BUG_ON(ret != 0);
635         l = path->nodes[0];
636         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
637         refs = btrfs_extent_refs(l, item);
638         btrfs_set_extent_refs(l, item, refs + 1);
639         btrfs_mark_buffer_dirty(path->nodes[0]);
640
641         btrfs_release_path(root->fs_info->extent_root, path);
642
643         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
644                                           path, bytenr, root_objectid,
645                                           ref_generation, owner, owner_offset);
646         BUG_ON(ret);
647         finish_current_insert(trans, root->fs_info->extent_root);
648         del_pending_extents(trans, root->fs_info->extent_root);
649
650         btrfs_free_path(path);
651         return 0;
652 }
653
654 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
655                          struct btrfs_root *root)
656 {
657         finish_current_insert(trans, root->fs_info->extent_root);
658         del_pending_extents(trans, root->fs_info->extent_root);
659         return 0;
660 }
661
662 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
663                              struct btrfs_root *root, u64 bytenr,
664                              u64 num_bytes, u32 *refs)
665 {
666         struct btrfs_path *path;
667         int ret;
668         struct btrfs_key key;
669         struct extent_buffer *l;
670         struct btrfs_extent_item *item;
671
672         WARN_ON(num_bytes < root->sectorsize);
673         path = btrfs_alloc_path();
674         key.objectid = bytenr;
675         key.offset = num_bytes;
676         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
677         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
678                                 0, 0);
679         if (ret < 0)
680                 goto out;
681         if (ret != 0) {
682                 btrfs_print_leaf(root, path->nodes[0]);
683                 printk("failed to find block number %llu\n",
684                         (unsigned long long)bytenr);
685                 BUG();
686         }
687         l = path->nodes[0];
688         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
689         *refs = btrfs_extent_refs(l, item);
690 out:
691         btrfs_free_path(path);
692         return 0;
693 }
694
695 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
696                                   struct btrfs_path *count_path,
697                                   u64 first_extent)
698 {
699         struct btrfs_root *extent_root = root->fs_info->extent_root;
700         struct btrfs_path *path;
701         u64 bytenr;
702         u64 found_objectid;
703         u64 root_objectid = root->root_key.objectid;
704         u32 total_count = 0;
705         u32 cur_count;
706         u32 refs;
707         u32 nritems;
708         int ret;
709         struct btrfs_key key;
710         struct btrfs_key found_key;
711         struct extent_buffer *l;
712         struct btrfs_extent_item *item;
713         struct btrfs_extent_ref *ref_item;
714         int level = -1;
715
716         path = btrfs_alloc_path();
717 again:
718         if (level == -1)
719                 bytenr = first_extent;
720         else
721                 bytenr = count_path->nodes[level]->start;
722
723         cur_count = 0;
724         key.objectid = bytenr;
725         key.offset = 0;
726
727         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
728         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
729         if (ret < 0)
730                 goto out;
731         BUG_ON(ret == 0);
732
733         l = path->nodes[0];
734         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
735
736         if (found_key.objectid != bytenr ||
737             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
738                 goto out;
739         }
740
741         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
742         refs = btrfs_extent_refs(l, item);
743         while (1) {
744                 nritems = btrfs_header_nritems(l);
745                 if (path->slots[0] >= nritems) {
746                         ret = btrfs_next_leaf(extent_root, path);
747                         if (ret == 0)
748                                 continue;
749                         break;
750                 }
751                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
752                 if (found_key.objectid != bytenr)
753                         break;
754                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
755                         path->slots[0]++;
756                         continue;
757                 }
758
759                 cur_count++;
760                 ref_item = btrfs_item_ptr(l, path->slots[0],
761                                           struct btrfs_extent_ref);
762                 found_objectid = btrfs_ref_root(l, ref_item);
763
764                 if (found_objectid != root_objectid) {
765                         total_count = 2;
766                         goto out;
767                 }
768                 total_count = 1;
769                 path->slots[0]++;
770         }
771         if (cur_count == 0) {
772                 total_count = 0;
773                 goto out;
774         }
775         if (level >= 0 && root->node == count_path->nodes[level])
776                 goto out;
777         level++;
778         btrfs_release_path(root, path);
779         goto again;
780
781 out:
782         btrfs_free_path(path);
783         return total_count;
784 }
785 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
786                        struct btrfs_root *root, u64 owner_objectid)
787 {
788         u64 generation;
789         u64 key_objectid;
790         u64 level;
791         u32 nritems;
792         struct btrfs_disk_key disk_key;
793
794         level = btrfs_header_level(root->node);
795         generation = trans->transid;
796         nritems = btrfs_header_nritems(root->node);
797         if (nritems > 0) {
798                 if (level == 0)
799                         btrfs_item_key(root->node, &disk_key, 0);
800                 else
801                         btrfs_node_key(root->node, &disk_key, 0);
802                 key_objectid = btrfs_disk_key_objectid(&disk_key);
803         } else {
804                 key_objectid = 0;
805         }
806         return btrfs_inc_extent_ref(trans, root, root->node->start,
807                                     root->node->len, owner_objectid,
808                                     generation, level, key_objectid);
809 }
810
811 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
812                   struct extent_buffer *buf)
813 {
814         u64 bytenr;
815         u32 nritems;
816         struct btrfs_key key;
817         struct btrfs_file_extent_item *fi;
818         int i;
819         int level;
820         int ret;
821         int faili;
822
823         if (!root->ref_cows)
824                 return 0;
825
826         level = btrfs_header_level(buf);
827         nritems = btrfs_header_nritems(buf);
828         for (i = 0; i < nritems; i++) {
829                 if (level == 0) {
830                         u64 disk_bytenr;
831                         btrfs_item_key_to_cpu(buf, &key, i);
832                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
833                                 continue;
834                         fi = btrfs_item_ptr(buf, i,
835                                             struct btrfs_file_extent_item);
836                         if (btrfs_file_extent_type(buf, fi) ==
837                             BTRFS_FILE_EXTENT_INLINE)
838                                 continue;
839                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
840                         if (disk_bytenr == 0)
841                                 continue;
842                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
843                                     btrfs_file_extent_disk_num_bytes(buf, fi),
844                                     root->root_key.objectid, trans->transid,
845                                     key.objectid, key.offset);
846                         if (ret) {
847                                 faili = i;
848                                 goto fail;
849                         }
850                 } else {
851                         bytenr = btrfs_node_blockptr(buf, i);
852                         btrfs_node_key_to_cpu(buf, &key, i);
853                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
854                                            btrfs_level_size(root, level - 1),
855                                            root->root_key.objectid,
856                                            trans->transid,
857                                            level - 1, key.objectid);
858                         if (ret) {
859                                 faili = i;
860                                 goto fail;
861                         }
862                 }
863         }
864         return 0;
865 fail:
866         WARN_ON(1);
867 #if 0
868         for (i =0; i < faili; i++) {
869                 if (level == 0) {
870                         u64 disk_bytenr;
871                         btrfs_item_key_to_cpu(buf, &key, i);
872                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
873                                 continue;
874                         fi = btrfs_item_ptr(buf, i,
875                                             struct btrfs_file_extent_item);
876                         if (btrfs_file_extent_type(buf, fi) ==
877                             BTRFS_FILE_EXTENT_INLINE)
878                                 continue;
879                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
880                         if (disk_bytenr == 0)
881                                 continue;
882                         err = btrfs_free_extent(trans, root, disk_bytenr,
883                                     btrfs_file_extent_disk_num_bytes(buf,
884                                                                       fi), 0);
885                         BUG_ON(err);
886                 } else {
887                         bytenr = btrfs_node_blockptr(buf, i);
888                         err = btrfs_free_extent(trans, root, bytenr,
889                                         btrfs_level_size(root, level - 1), 0);
890                         BUG_ON(err);
891                 }
892         }
893 #endif
894         return ret;
895 }
896
897 static int write_one_cache_group(struct btrfs_trans_handle *trans,
898                                  struct btrfs_root *root,
899                                  struct btrfs_path *path,
900                                  struct btrfs_block_group_cache *cache)
901 {
902         int ret;
903         int pending_ret;
904         struct btrfs_root *extent_root = root->fs_info->extent_root;
905         unsigned long bi;
906         struct extent_buffer *leaf;
907
908         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
909         if (ret < 0)
910                 goto fail;
911         BUG_ON(ret);
912
913         leaf = path->nodes[0];
914         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
915         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
916         btrfs_mark_buffer_dirty(leaf);
917         btrfs_release_path(extent_root, path);
918 fail:
919         finish_current_insert(trans, extent_root);
920         pending_ret = del_pending_extents(trans, extent_root);
921         if (ret)
922                 return ret;
923         if (pending_ret)
924                 return pending_ret;
925         return 0;
926
927 }
928
929 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
930                                    struct btrfs_root *root)
931 {
932         struct extent_io_tree *block_group_cache;
933         struct btrfs_block_group_cache *cache;
934         int ret;
935         int err = 0;
936         int werr = 0;
937         struct btrfs_path *path;
938         u64 last = 0;
939         u64 start;
940         u64 end;
941         u64 ptr;
942
943         block_group_cache = &root->fs_info->block_group_cache;
944         path = btrfs_alloc_path();
945         if (!path)
946                 return -ENOMEM;
947
948         while(1) {
949                 ret = find_first_extent_bit(block_group_cache, last,
950                                             &start, &end, BLOCK_GROUP_DIRTY);
951                 if (ret)
952                         break;
953
954                 last = end + 1;
955                 ret = get_state_private(block_group_cache, start, &ptr);
956                 if (ret)
957                         break;
958                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
959                 err = write_one_cache_group(trans, root,
960                                             path, cache);
961                 /*
962                  * if we fail to write the cache group, we want
963                  * to keep it marked dirty in hopes that a later
964                  * write will work
965                  */
966                 if (err) {
967                         werr = err;
968                         continue;
969                 }
970                 clear_extent_bits(block_group_cache, start, end,
971                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
972         }
973         btrfs_free_path(path);
974         return werr;
975 }
976
977 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
978                                                   u64 flags)
979 {
980         struct list_head *head = &info->space_info;
981         struct list_head *cur;
982         struct btrfs_space_info *found;
983         list_for_each(cur, head) {
984                 found = list_entry(cur, struct btrfs_space_info, list);
985                 if (found->flags == flags)
986                         return found;
987         }
988         return NULL;
989
990 }
991
992 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
993                              u64 total_bytes, u64 bytes_used,
994                              struct btrfs_space_info **space_info)
995 {
996         struct btrfs_space_info *found;
997
998         found = __find_space_info(info, flags);
999         if (found) {
1000                 found->total_bytes += total_bytes;
1001                 found->bytes_used += bytes_used;
1002                 WARN_ON(found->total_bytes < found->bytes_used);
1003                 *space_info = found;
1004                 return 0;
1005         }
1006         found = kmalloc(sizeof(*found), GFP_NOFS);
1007         if (!found)
1008                 return -ENOMEM;
1009
1010         list_add(&found->list, &info->space_info);
1011         found->flags = flags;
1012         found->total_bytes = total_bytes;
1013         found->bytes_used = bytes_used;
1014         found->bytes_pinned = 0;
1015         found->full = 0;
1016         *space_info = found;
1017         return 0;
1018 }
1019
1020
1021 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1022 {
1023         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1024                                    BTRFS_BLOCK_GROUP_RAID1 |
1025                                    BTRFS_BLOCK_GROUP_DUP);
1026         if (extra_flags) {
1027                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1028                         fs_info->avail_data_alloc_bits |= extra_flags;
1029                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1030                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1031                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1032                         fs_info->avail_system_alloc_bits |= extra_flags;
1033         }
1034 }
1035
1036 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1037                           struct btrfs_root *extent_root, u64 alloc_bytes,
1038                           u64 flags)
1039 {
1040         struct btrfs_space_info *space_info;
1041         u64 thresh;
1042         u64 start;
1043         u64 num_bytes;
1044         int ret;
1045
1046         space_info = __find_space_info(extent_root->fs_info, flags);
1047         if (!space_info) {
1048                 ret = update_space_info(extent_root->fs_info, flags,
1049                                         0, 0, &space_info);
1050                 BUG_ON(ret);
1051         }
1052         BUG_ON(!space_info);
1053
1054         if (space_info->full)
1055                 return 0;
1056
1057         thresh = div_factor(space_info->total_bytes, 7);
1058         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1059             thresh)
1060                 return 0;
1061
1062         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1063         if (ret == -ENOSPC) {
1064                 space_info->full = 1;
1065                 return 0;
1066         }
1067
1068         BUG_ON(ret);
1069
1070         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1071                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1072         BUG_ON(ret);
1073         return 0;
1074 }
1075
1076 static int update_block_group(struct btrfs_trans_handle *trans,
1077                               struct btrfs_root *root,
1078                               u64 bytenr, u64 num_bytes, int alloc,
1079                               int mark_free)
1080 {
1081         struct btrfs_block_group_cache *cache;
1082         struct btrfs_fs_info *info = root->fs_info;
1083         u64 total = num_bytes;
1084         u64 old_val;
1085         u64 byte_in_group;
1086         u64 start;
1087         u64 end;
1088
1089         while(total) {
1090                 cache = btrfs_lookup_block_group(info, bytenr);
1091                 if (!cache) {
1092                         return -1;
1093                 }
1094                 byte_in_group = bytenr - cache->key.objectid;
1095                 WARN_ON(byte_in_group > cache->key.offset);
1096                 start = cache->key.objectid;
1097                 end = start + cache->key.offset - 1;
1098                 set_extent_bits(&info->block_group_cache, start, end,
1099                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1100
1101                 old_val = btrfs_block_group_used(&cache->item);
1102                 num_bytes = min(total, cache->key.offset - byte_in_group);
1103                 if (alloc) {
1104                         old_val += num_bytes;
1105                         cache->space_info->bytes_used += num_bytes;
1106                 } else {
1107                         old_val -= num_bytes;
1108                         cache->space_info->bytes_used -= num_bytes;
1109                         if (mark_free) {
1110                                 set_extent_dirty(&info->free_space_cache,
1111                                                  bytenr, bytenr + num_bytes - 1,
1112                                                  GFP_NOFS);
1113                         }
1114                 }
1115                 btrfs_set_block_group_used(&cache->item, old_val);
1116                 total -= num_bytes;
1117                 bytenr += num_bytes;
1118         }
1119         return 0;
1120 }
1121
1122 static int update_pinned_extents(struct btrfs_root *root,
1123                                 u64 bytenr, u64 num, int pin)
1124 {
1125         u64 len;
1126         struct btrfs_block_group_cache *cache;
1127         struct btrfs_fs_info *fs_info = root->fs_info;
1128
1129         if (pin) {
1130                 set_extent_dirty(&fs_info->pinned_extents,
1131                                 bytenr, bytenr + num - 1, GFP_NOFS);
1132         } else {
1133                 clear_extent_dirty(&fs_info->pinned_extents,
1134                                 bytenr, bytenr + num - 1, GFP_NOFS);
1135         }
1136         while (num > 0) {
1137                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1138                 WARN_ON(!cache);
1139                 len = min(num, cache->key.offset -
1140                           (bytenr - cache->key.objectid));
1141                 if (pin) {
1142                         cache->pinned += len;
1143                         cache->space_info->bytes_pinned += len;
1144                         fs_info->total_pinned += len;
1145                 } else {
1146                         cache->pinned -= len;
1147                         cache->space_info->bytes_pinned -= len;
1148                         fs_info->total_pinned -= len;
1149                 }
1150                 bytenr += len;
1151                 num -= len;
1152         }
1153         return 0;
1154 }
1155
1156 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1157 {
1158         u64 last = 0;
1159         u64 start;
1160         u64 end;
1161         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1162         int ret;
1163
1164         while(1) {
1165                 ret = find_first_extent_bit(pinned_extents, last,
1166                                             &start, &end, EXTENT_DIRTY);
1167                 if (ret)
1168                         break;
1169                 set_extent_dirty(copy, start, end, GFP_NOFS);
1170                 last = end + 1;
1171         }
1172         return 0;
1173 }
1174
1175 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1176                                struct btrfs_root *root,
1177                                struct extent_io_tree *unpin)
1178 {
1179         u64 start;
1180         u64 end;
1181         int ret;
1182         struct extent_io_tree *free_space_cache;
1183         free_space_cache = &root->fs_info->free_space_cache;
1184
1185         while(1) {
1186                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1187                                             EXTENT_DIRTY);
1188                 if (ret)
1189                         break;
1190                 update_pinned_extents(root, start, end + 1 - start, 0);
1191                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1192                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1193         }
1194         return 0;
1195 }
1196
1197 static int finish_current_insert(struct btrfs_trans_handle *trans,
1198                                  struct btrfs_root *extent_root)
1199 {
1200         u64 start;
1201         u64 end;
1202         struct btrfs_fs_info *info = extent_root->fs_info;
1203         struct extent_buffer *eb;
1204         struct btrfs_path *path;
1205         struct btrfs_key ins;
1206         struct btrfs_disk_key first;
1207         struct btrfs_extent_item extent_item;
1208         int ret;
1209         int level;
1210         int err = 0;
1211
1212         btrfs_set_stack_extent_refs(&extent_item, 1);
1213         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1214         path = btrfs_alloc_path();
1215
1216         while(1) {
1217                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1218                                             &end, EXTENT_LOCKED);
1219                 if (ret)
1220                         break;
1221
1222                 ins.objectid = start;
1223                 ins.offset = end + 1 - start;
1224                 err = btrfs_insert_item(trans, extent_root, &ins,
1225                                         &extent_item, sizeof(extent_item));
1226                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1227                                   GFP_NOFS);
1228                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1229                 level = btrfs_header_level(eb);
1230                 if (level == 0) {
1231                         btrfs_item_key(eb, &first, 0);
1232                 } else {
1233                         btrfs_node_key(eb, &first, 0);
1234                 }
1235                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1236                                           start, extent_root->root_key.objectid,
1237                                           0, level,
1238                                           btrfs_disk_key_objectid(&first));
1239                 BUG_ON(err);
1240                 free_extent_buffer(eb);
1241         }
1242         btrfs_free_path(path);
1243         return 0;
1244 }
1245
1246 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1247                           int pending)
1248 {
1249         int err = 0;
1250         struct extent_buffer *buf;
1251
1252         if (!pending) {
1253                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1254                 if (buf) {
1255                         if (btrfs_buffer_uptodate(buf)) {
1256                                 u64 transid =
1257                                     root->fs_info->running_transaction->transid;
1258                                 if (btrfs_header_generation(buf) ==
1259                                     transid && !btrfs_header_flag(buf,
1260                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1261                                         free_extent_buffer(buf);
1262                                         return 1;
1263                                 }
1264                         }
1265                         free_extent_buffer(buf);
1266                 }
1267                 update_pinned_extents(root, bytenr, num_bytes, 1);
1268         } else {
1269                 set_extent_bits(&root->fs_info->pending_del,
1270                                 bytenr, bytenr + num_bytes - 1,
1271                                 EXTENT_LOCKED, GFP_NOFS);
1272         }
1273         BUG_ON(err < 0);
1274         return 0;
1275 }
1276
1277 /*
1278  * remove an extent from the root, returns 0 on success
1279  */
1280 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1281                          *root, u64 bytenr, u64 num_bytes,
1282                          u64 root_objectid, u64 ref_generation,
1283                          u64 owner_objectid, u64 owner_offset, int pin,
1284                          int mark_free)
1285 {
1286         struct btrfs_path *path;
1287         struct btrfs_key key;
1288         struct btrfs_fs_info *info = root->fs_info;
1289         struct btrfs_extent_ops *ops = info->extent_ops;
1290         struct btrfs_root *extent_root = info->extent_root;
1291         struct extent_buffer *leaf;
1292         int ret;
1293         int extent_slot = 0;
1294         int found_extent = 0;
1295         int num_to_del = 1;
1296         struct btrfs_extent_item *ei;
1297         u32 refs;
1298
1299         key.objectid = bytenr;
1300         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1301         key.offset = num_bytes;
1302
1303         path = btrfs_alloc_path();
1304         if (!path)
1305                 return -ENOMEM;
1306
1307         ret = lookup_extent_backref(trans, extent_root, path,
1308                                     bytenr, root_objectid,
1309                                     ref_generation,
1310                                     owner_objectid, owner_offset, 1);
1311         if (ret == 0) {
1312                 struct btrfs_key found_key;
1313                 extent_slot = path->slots[0];
1314                 while(extent_slot > 0) {
1315                         extent_slot--;
1316                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1317                                               extent_slot);
1318                         if (found_key.objectid != bytenr)
1319                                 break;
1320                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1321                             found_key.offset == num_bytes) {
1322                                 found_extent = 1;
1323                                 break;
1324                         }
1325                         if (path->slots[0] - extent_slot > 5)
1326                                 break;
1327                 }
1328                 if (!found_extent)
1329                         ret = btrfs_del_item(trans, extent_root, path);
1330         } else {
1331                 btrfs_print_leaf(extent_root, path->nodes[0]);
1332                 WARN_ON(1);
1333                 printk("Unable to find ref byte nr %llu root %llu "
1334                        " gen %llu owner %llu offset %llu\n",
1335                        (unsigned long long)bytenr,
1336                        (unsigned long long)root_objectid,
1337                        (unsigned long long)ref_generation,
1338                        (unsigned long long)owner_objectid,
1339                        (unsigned long long)owner_offset);
1340         }
1341         if (!found_extent) {
1342                 btrfs_release_path(extent_root, path);
1343                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1344                 if (ret < 0)
1345                         return ret;
1346                 BUG_ON(ret);
1347                 extent_slot = path->slots[0];
1348         }
1349
1350         leaf = path->nodes[0];
1351         ei = btrfs_item_ptr(leaf, extent_slot,
1352                             struct btrfs_extent_item);
1353         refs = btrfs_extent_refs(leaf, ei);
1354         BUG_ON(refs == 0);
1355         refs -= 1;
1356         btrfs_set_extent_refs(leaf, ei, refs);
1357
1358         btrfs_mark_buffer_dirty(leaf);
1359
1360         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1361                 /* if the back ref and the extent are next to each other
1362                  * they get deleted below in one shot
1363                  */
1364                 path->slots[0] = extent_slot;
1365                 num_to_del = 2;
1366         } else if (found_extent) {
1367                 /* otherwise delete the extent back ref */
1368                 ret = btrfs_del_item(trans, extent_root, path);
1369                 BUG_ON(ret);
1370                 /* if refs are 0, we need to setup the path for deletion */
1371                 if (refs == 0) {
1372                         btrfs_release_path(extent_root, path);
1373                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1374                                                 -1, 1);
1375                         if (ret < 0)
1376                                 return ret;
1377                         BUG_ON(ret);
1378                 }
1379         }
1380
1381         if (refs == 0) {
1382                 u64 super_used;
1383                 u64 root_used;
1384
1385                 if (pin) {
1386                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1387                         if (ret > 0)
1388                                 mark_free = 1;
1389                         BUG_ON(ret < 0);
1390                 }
1391
1392                 /* block accounting for super block */
1393                 super_used = btrfs_super_bytes_used(&info->super_copy);
1394                 btrfs_set_super_bytes_used(&info->super_copy,
1395                                            super_used - num_bytes);
1396
1397                 /* block accounting for root item */
1398                 root_used = btrfs_root_used(&root->root_item);
1399                 btrfs_set_root_used(&root->root_item,
1400                                            root_used - num_bytes);
1401                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1402                                       num_to_del);
1403                 if (ret)
1404                         return ret;
1405
1406                 if (ops && ops->free_extent)
1407                         ops->free_extent(root, bytenr, num_bytes);
1408
1409                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1410                                          mark_free);
1411                 BUG_ON(ret);
1412         }
1413         btrfs_free_path(path);
1414         finish_current_insert(trans, extent_root);
1415         return ret;
1416 }
1417
1418 /*
1419  * find all the blocks marked as pending in the radix tree and remove
1420  * them from the extent map
1421  */
1422 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1423                                btrfs_root *extent_root)
1424 {
1425         int ret;
1426         int err = 0;
1427         u64 start;
1428         u64 end;
1429         struct extent_io_tree *pending_del;
1430         struct extent_io_tree *pinned_extents;
1431
1432         pending_del = &extent_root->fs_info->pending_del;
1433         pinned_extents = &extent_root->fs_info->pinned_extents;
1434
1435         while(1) {
1436                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1437                                             EXTENT_LOCKED);
1438                 if (ret)
1439                         break;
1440                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1441                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1442                                   GFP_NOFS);
1443                 ret = __free_extent(trans, extent_root,
1444                                      start, end + 1 - start,
1445                                      extent_root->root_key.objectid,
1446                                      0, 0, 0, 0, 0);
1447                 if (ret)
1448                         err = ret;
1449         }
1450         return err;
1451 }
1452
1453 /*
1454  * remove an extent from the root, returns 0 on success
1455  */
1456 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1457                       *root, u64 bytenr, u64 num_bytes,
1458                       u64 root_objectid, u64 ref_generation,
1459                       u64 owner_objectid, u64 owner_offset, int pin)
1460 {
1461         struct btrfs_root *extent_root = root->fs_info->extent_root;
1462         int pending_ret;
1463         int ret;
1464
1465         WARN_ON(num_bytes < root->sectorsize);
1466         if (!root->ref_cows)
1467                 ref_generation = 0;
1468
1469         if (root == extent_root) {
1470                 pin_down_bytes(root, bytenr, num_bytes, 1);
1471                 return 0;
1472         }
1473         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1474                             ref_generation, owner_objectid, owner_offset,
1475                             pin, pin == 0);
1476         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1477         return ret ? ret : pending_ret;
1478 }
1479
1480 static u64 stripe_align(struct btrfs_root *root, u64 val)
1481 {
1482         u64 mask = ((u64)root->stripesize - 1);
1483         u64 ret = (val + mask) & ~mask;
1484         return ret;
1485 }
1486
1487 /*
1488  * walks the btree of allocated extents and find a hole of a given size.
1489  * The key ins is changed to record the hole:
1490  * ins->objectid == block start
1491  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1492  * ins->offset == number of blocks
1493  * Any available blocks before search_start are skipped.
1494  */
1495 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1496                                      struct btrfs_root *orig_root,
1497                                      u64 num_bytes, u64 empty_size,
1498                                      u64 search_start, u64 search_end,
1499                                      u64 hint_byte, struct btrfs_key *ins,
1500                                      u64 exclude_start, u64 exclude_nr,
1501                                      int data)
1502 {
1503         int ret;
1504         u64 orig_search_start = search_start;
1505         struct btrfs_root * root = orig_root->fs_info->extent_root;
1506         struct btrfs_fs_info *info = root->fs_info;
1507         u64 total_needed = num_bytes;
1508         struct btrfs_block_group_cache *block_group;
1509         int full_scan = 0;
1510         int wrapped = 0;
1511
1512         WARN_ON(num_bytes < root->sectorsize);
1513         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1514
1515         if (search_end == (u64)-1)
1516                 search_end = btrfs_super_total_bytes(&info->super_copy);
1517
1518         if (hint_byte) {
1519                 block_group = btrfs_lookup_block_group(info, hint_byte);
1520                 if (!block_group)
1521                         hint_byte = search_start;
1522                 block_group = btrfs_find_block_group(root, block_group,
1523                                                      hint_byte, data, 1);
1524         } else {
1525                 block_group = btrfs_find_block_group(root,
1526                                                      trans->block_group,
1527                                                      search_start, data, 1);
1528         }
1529
1530         total_needed += empty_size;
1531
1532 check_failed:
1533         if (!block_group) {
1534                 block_group = btrfs_lookup_block_group(info, search_start);
1535                 if (!block_group)
1536                         block_group = btrfs_lookup_block_group(info,
1537                                                        orig_search_start);
1538         }
1539         ret = find_search_start(root, &block_group, &search_start,
1540                                 total_needed, data);
1541         if (ret)
1542                 goto error;
1543
1544         search_start = stripe_align(root, search_start);
1545         ins->objectid = search_start;
1546         ins->offset = num_bytes;
1547
1548         if (ins->objectid + num_bytes >= search_end)
1549                 goto enospc;
1550
1551         if (ins->objectid + num_bytes >
1552             block_group->key.objectid + block_group->key.offset) {
1553                 search_start = block_group->key.objectid +
1554                         block_group->key.offset;
1555                 goto new_group;
1556         }
1557
1558         if (test_range_bit(&info->extent_ins, ins->objectid,
1559                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1560                 search_start = ins->objectid + num_bytes;
1561                 goto new_group;
1562         }
1563
1564         if (test_range_bit(&info->pinned_extents, ins->objectid,
1565                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1566                 search_start = ins->objectid + num_bytes;
1567                 goto new_group;
1568         }
1569
1570         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1571             ins->objectid < exclude_start + exclude_nr)) {
1572                 search_start = exclude_start + exclude_nr;
1573                 goto new_group;
1574         }
1575
1576         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1577                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1578                 if (block_group)
1579                         trans->block_group = block_group;
1580         }
1581         ins->offset = num_bytes;
1582         return 0;
1583
1584 new_group:
1585         if (search_start + num_bytes >= search_end) {
1586 enospc:
1587                 search_start = orig_search_start;
1588                 if (full_scan) {
1589                         ret = -ENOSPC;
1590                         goto error;
1591                 }
1592                 if (wrapped) {
1593                         if (!full_scan)
1594                                 total_needed -= empty_size;
1595                         full_scan = 1;
1596                 } else
1597                         wrapped = 1;
1598         }
1599         block_group = btrfs_lookup_block_group(info, search_start);
1600         cond_resched();
1601         block_group = btrfs_find_block_group(root, block_group,
1602                                              search_start, data, 0);
1603         goto check_failed;
1604
1605 error:
1606         return ret;
1607 }
1608 /*
1609  * finds a free extent and does all the dirty work required for allocation
1610  * returns the key for the extent through ins, and a tree buffer for
1611  * the first block of the extent through buf.
1612  *
1613  * returns 0 if everything worked, non-zero otherwise.
1614  */
1615 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1616                        struct btrfs_root *root,
1617                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1618                        u64 owner, u64 owner_offset,
1619                        u64 empty_size, u64 hint_byte,
1620                        u64 search_end, struct btrfs_key *ins, int data)
1621 {
1622         int ret;
1623         int pending_ret;
1624         u64 super_used, root_used;
1625         u64 search_start = 0;
1626         u64 alloc_profile;
1627         struct btrfs_fs_info *info = root->fs_info;
1628         struct btrfs_extent_ops *ops = info->extent_ops;
1629         u32 sizes[2];
1630         struct btrfs_root *extent_root = info->extent_root;
1631         struct btrfs_path *path;
1632         struct btrfs_extent_item *extent_item;
1633         struct btrfs_extent_ref *ref;
1634         struct btrfs_key keys[2];
1635
1636         if (data) {
1637                 alloc_profile = info->avail_data_alloc_bits &
1638                                 info->data_alloc_profile;
1639                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1640         } else if (root == info->chunk_root || info->force_system_allocs) {
1641                 alloc_profile = info->avail_system_alloc_bits &
1642                                 info->system_alloc_profile;
1643                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1644         } else {
1645                 alloc_profile = info->avail_metadata_alloc_bits &
1646                                 info->metadata_alloc_profile;
1647                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1648         }
1649
1650         if (root->ref_cows) {
1651                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1652                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1653                                              num_bytes,
1654                                              BTRFS_BLOCK_GROUP_METADATA);
1655                         BUG_ON(ret);
1656                 }
1657                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1658                                      num_bytes + 2 * 1024 * 1024, data);
1659                 BUG_ON(ret);
1660         }
1661
1662         WARN_ON(num_bytes < root->sectorsize);
1663         if (ops && ops->alloc_extent) {
1664                 ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
1665         } else {
1666                 ret = find_free_extent(trans, root, num_bytes, empty_size,
1667                                         search_start, search_end, hint_byte,
1668                                         ins, trans->alloc_exclude_start,
1669                                         trans->alloc_exclude_nr, data);
1670         }
1671         BUG_ON(ret);
1672         if (ret)
1673                 return ret;
1674
1675         /* block accounting for super block */
1676         super_used = btrfs_super_bytes_used(&info->super_copy);
1677         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1678
1679         /* block accounting for root item */
1680         root_used = btrfs_root_used(&root->root_item);
1681         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1682
1683         clear_extent_dirty(&root->fs_info->free_space_cache,
1684                            ins->objectid, ins->objectid + ins->offset - 1,
1685                            GFP_NOFS);
1686
1687         if (root == extent_root) {
1688                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1689                                 ins->objectid + ins->offset - 1,
1690                                 EXTENT_LOCKED, GFP_NOFS);
1691                 goto update_block;
1692         }
1693
1694         WARN_ON(trans->alloc_exclude_nr);
1695         trans->alloc_exclude_start = ins->objectid;
1696         trans->alloc_exclude_nr = ins->offset;
1697
1698         memcpy(&keys[0], ins, sizeof(*ins));
1699         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1700                                          owner, owner_offset);
1701         keys[1].objectid = ins->objectid;
1702         keys[1].type = BTRFS_EXTENT_REF_KEY;
1703         sizes[0] = sizeof(*extent_item);
1704         sizes[1] = sizeof(*ref);
1705
1706         path = btrfs_alloc_path();
1707         BUG_ON(!path);
1708
1709         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1710                                        sizes, 2);
1711
1712         BUG_ON(ret);
1713         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1714                                      struct btrfs_extent_item);
1715         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1716         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1717                              struct btrfs_extent_ref);
1718
1719         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1720         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1721         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1722         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1723
1724         btrfs_mark_buffer_dirty(path->nodes[0]);
1725
1726         trans->alloc_exclude_start = 0;
1727         trans->alloc_exclude_nr = 0;
1728         btrfs_free_path(path);
1729         finish_current_insert(trans, extent_root);
1730         pending_ret = del_pending_extents(trans, extent_root);
1731
1732         if (ret) {
1733                 return ret;
1734         }
1735         if (pending_ret) {
1736                 return pending_ret;
1737         }
1738
1739 update_block:
1740         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1741         if (ret) {
1742                 printk("update block group failed for %llu %llu\n",
1743                        (unsigned long long)ins->objectid,
1744                        (unsigned long long)ins->offset);
1745                 BUG();
1746         }
1747         return 0;
1748 }
1749
1750 /*
1751  * helper function to allocate a block for a given tree
1752  * returns the tree buffer or NULL.
1753  */
1754 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1755                                              struct btrfs_root *root,
1756                                              u32 blocksize,
1757                                              u64 root_objectid, u64 hint,
1758                                              u64 empty_size)
1759 {
1760         u64 ref_generation;
1761
1762         if (root->ref_cows)
1763                 ref_generation = trans->transid;
1764         else
1765                 ref_generation = 0;
1766
1767
1768         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1769                                         ref_generation, 0, 0, hint, empty_size);
1770 }
1771
1772 /*
1773  * helper function to allocate a block for a given tree
1774  * returns the tree buffer or NULL.
1775  */
1776 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1777                                              struct btrfs_root *root,
1778                                              u32 blocksize,
1779                                              u64 root_objectid,
1780                                              u64 ref_generation,
1781                                              u64 first_objectid,
1782                                              int level,
1783                                              u64 hint,
1784                                              u64 empty_size)
1785 {
1786         struct btrfs_key ins;
1787         int ret;
1788         struct extent_buffer *buf;
1789
1790         ret = btrfs_alloc_extent(trans, root, blocksize,
1791                                  root_objectid, ref_generation,
1792                                  level, first_objectid, empty_size, hint,
1793                                  (u64)-1, &ins, 0);
1794         if (ret) {
1795                 BUG_ON(ret > 0);
1796                 return ERR_PTR(ret);
1797         }
1798         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1799         if (!buf) {
1800                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1801                                   root->root_key.objectid, ref_generation,
1802                                   0, 0, 0);
1803                 BUG_ON(1);
1804                 return ERR_PTR(-ENOMEM);
1805         }
1806         btrfs_set_buffer_uptodate(buf);
1807         trans->blocks_used++;
1808         return buf;
1809 }
1810
1811 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1812                                   struct btrfs_root *root,
1813                                   struct extent_buffer *leaf)
1814 {
1815         u64 leaf_owner;
1816         u64 leaf_generation;
1817         struct btrfs_key key;
1818         struct btrfs_file_extent_item *fi;
1819         int i;
1820         int nritems;
1821         int ret;
1822
1823         BUG_ON(!btrfs_is_leaf(leaf));
1824         nritems = btrfs_header_nritems(leaf);
1825         leaf_owner = btrfs_header_owner(leaf);
1826         leaf_generation = btrfs_header_generation(leaf);
1827
1828         for (i = 0; i < nritems; i++) {
1829                 u64 disk_bytenr;
1830
1831                 btrfs_item_key_to_cpu(leaf, &key, i);
1832                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1833                         continue;
1834                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1835                 if (btrfs_file_extent_type(leaf, fi) ==
1836                     BTRFS_FILE_EXTENT_INLINE)
1837                         continue;
1838                 /*
1839                  * FIXME make sure to insert a trans record that
1840                  * repeats the snapshot del on crash
1841                  */
1842                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1843                 if (disk_bytenr == 0)
1844                         continue;
1845                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1846                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1847                                 leaf_owner, leaf_generation,
1848                                 key.objectid, key.offset, 0);
1849                 BUG_ON(ret);
1850         }
1851         return 0;
1852 }
1853
1854 static void noinline reada_walk_down(struct btrfs_root *root,
1855                                      struct extent_buffer *node,
1856                                      int slot)
1857 {
1858         u64 bytenr;
1859         u64 last = 0;
1860         u32 nritems;
1861         u32 refs;
1862         u32 blocksize;
1863         int ret;
1864         int i;
1865         int level;
1866         int skipped = 0;
1867
1868         nritems = btrfs_header_nritems(node);
1869         level = btrfs_header_level(node);
1870         if (level)
1871                 return;
1872
1873         for (i = slot; i < nritems && skipped < 32; i++) {
1874                 bytenr = btrfs_node_blockptr(node, i);
1875                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1876                              (last > bytenr && last - bytenr > 32 * 1024))) {
1877                         skipped++;
1878                         continue;
1879                 }
1880                 blocksize = btrfs_level_size(root, level - 1);
1881                 if (i != slot) {
1882                         ret = lookup_extent_ref(NULL, root, bytenr,
1883                                                 blocksize, &refs);
1884                         BUG_ON(ret);
1885                         if (refs != 1) {
1886                                 skipped++;
1887                                 continue;
1888                         }
1889                 }
1890                 mutex_unlock(&root->fs_info->fs_mutex);
1891                 ret = readahead_tree_block(root, bytenr, blocksize);
1892                 last = bytenr + blocksize;
1893                 cond_resched();
1894                 mutex_lock(&root->fs_info->fs_mutex);
1895                 if (ret)
1896                         break;
1897         }
1898 }
1899
1900 /*
1901  * helper function for drop_snapshot, this walks down the tree dropping ref
1902  * counts as it goes.
1903  */
1904 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1905                                    struct btrfs_root *root,
1906                                    struct btrfs_path *path, int *level)
1907 {
1908         u64 root_owner;
1909         u64 root_gen;
1910         u64 bytenr;
1911         struct extent_buffer *next;
1912         struct extent_buffer *cur;
1913         struct extent_buffer *parent;
1914         u32 blocksize;
1915         int ret;
1916         u32 refs;
1917
1918         WARN_ON(*level < 0);
1919         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1920         ret = lookup_extent_ref(trans, root,
1921                                 path->nodes[*level]->start,
1922                                 path->nodes[*level]->len, &refs);
1923         BUG_ON(ret);
1924         if (refs > 1)
1925                 goto out;
1926
1927         /*
1928          * walk down to the last node level and free all the leaves
1929          */
1930         while(*level >= 0) {
1931                 WARN_ON(*level < 0);
1932                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1933                 cur = path->nodes[*level];
1934
1935                 if (btrfs_header_level(cur) != *level)
1936                         WARN_ON(1);
1937
1938                 if (path->slots[*level] >=
1939                     btrfs_header_nritems(cur))
1940                         break;
1941                 if (*level == 0) {
1942                         ret = drop_leaf_ref(trans, root, cur);
1943                         BUG_ON(ret);
1944                         break;
1945                 }
1946                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1947                 blocksize = btrfs_level_size(root, *level - 1);
1948                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1949                 BUG_ON(ret);
1950                 if (refs != 1) {
1951                         parent = path->nodes[*level];
1952                         root_owner = btrfs_header_owner(parent);
1953                         root_gen = btrfs_header_generation(parent);
1954                         path->slots[*level]++;
1955                         ret = btrfs_free_extent(trans, root, bytenr,
1956                                                 blocksize, root_owner,
1957                                                 root_gen, 0, 0, 1);
1958                         BUG_ON(ret);
1959                         continue;
1960                 }
1961                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1962                 if (!next || !btrfs_buffer_uptodate(next)) {
1963                         free_extent_buffer(next);
1964                         reada_walk_down(root, cur, path->slots[*level]);
1965                         mutex_unlock(&root->fs_info->fs_mutex);
1966                         next = read_tree_block(root, bytenr, blocksize);
1967                         mutex_lock(&root->fs_info->fs_mutex);
1968
1969                         /* we dropped the lock, check one more time */
1970                         ret = lookup_extent_ref(trans, root, bytenr,
1971                                                 blocksize, &refs);
1972                         BUG_ON(ret);
1973                         if (refs != 1) {
1974                                 parent = path->nodes[*level];
1975                                 root_owner = btrfs_header_owner(parent);
1976                                 root_gen = btrfs_header_generation(parent);
1977
1978                                 path->slots[*level]++;
1979                                 free_extent_buffer(next);
1980                                 ret = btrfs_free_extent(trans, root, bytenr,
1981                                                         blocksize,
1982                                                         root_owner,
1983                                                         root_gen, 0, 0, 1);
1984                                 BUG_ON(ret);
1985                                 continue;
1986                         }
1987                 }
1988                 WARN_ON(*level <= 0);
1989                 if (path->nodes[*level-1])
1990                         free_extent_buffer(path->nodes[*level-1]);
1991                 path->nodes[*level-1] = next;
1992                 *level = btrfs_header_level(next);
1993                 path->slots[*level] = 0;
1994         }
1995 out:
1996         WARN_ON(*level < 0);
1997         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1998
1999         if (path->nodes[*level] == root->node) {
2000                 root_owner = root->root_key.objectid;
2001                 parent = path->nodes[*level];
2002         } else {
2003                 parent = path->nodes[*level + 1];
2004                 root_owner = btrfs_header_owner(parent);
2005         }
2006
2007         root_gen = btrfs_header_generation(parent);
2008         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2009                                 path->nodes[*level]->len,
2010                                 root_owner, root_gen, 0, 0, 1);
2011         free_extent_buffer(path->nodes[*level]);
2012         path->nodes[*level] = NULL;
2013         *level += 1;
2014         BUG_ON(ret);
2015         return 0;
2016 }
2017
2018 /*
2019  * helper for dropping snapshots.  This walks back up the tree in the path
2020  * to find the first node higher up where we haven't yet gone through
2021  * all the slots
2022  */
2023 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2024                                  struct btrfs_root *root,
2025                                  struct btrfs_path *path, int *level)
2026 {
2027         u64 root_owner;
2028         u64 root_gen;
2029         struct btrfs_root_item *root_item = &root->root_item;
2030         int i;
2031         int slot;
2032         int ret;
2033
2034         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2035                 slot = path->slots[i];
2036                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2037                         struct extent_buffer *node;
2038                         struct btrfs_disk_key disk_key;
2039                         node = path->nodes[i];
2040                         path->slots[i]++;
2041                         *level = i;
2042                         WARN_ON(*level == 0);
2043                         btrfs_node_key(node, &disk_key, path->slots[i]);
2044                         memcpy(&root_item->drop_progress,
2045                                &disk_key, sizeof(disk_key));
2046                         root_item->drop_level = i;
2047                         return 0;
2048                 } else {
2049                         if (path->nodes[*level] == root->node) {
2050                                 root_owner = root->root_key.objectid;
2051                                 root_gen =
2052                                    btrfs_header_generation(path->nodes[*level]);
2053                         } else {
2054                                 struct extent_buffer *node;
2055                                 node = path->nodes[*level + 1];
2056                                 root_owner = btrfs_header_owner(node);
2057                                 root_gen = btrfs_header_generation(node);
2058                         }
2059                         ret = btrfs_free_extent(trans, root,
2060                                                 path->nodes[*level]->start,
2061                                                 path->nodes[*level]->len,
2062                                                 root_owner, root_gen, 0, 0, 1);
2063                         BUG_ON(ret);
2064                         free_extent_buffer(path->nodes[*level]);
2065                         path->nodes[*level] = NULL;
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 /*
2073  * drop the reference count on the tree rooted at 'snap'.  This traverses
2074  * the tree freeing any blocks that have a ref count of zero after being
2075  * decremented.
2076  */
2077 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2078                         *root)
2079 {
2080         int ret = 0;
2081         int wret;
2082         int level;
2083         struct btrfs_path *path;
2084         int i;
2085         int orig_level;
2086         struct btrfs_root_item *root_item = &root->root_item;
2087
2088         path = btrfs_alloc_path();
2089         BUG_ON(!path);
2090
2091         level = btrfs_header_level(root->node);
2092         orig_level = level;
2093         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2094                 path->nodes[level] = root->node;
2095                 extent_buffer_get(root->node);
2096                 path->slots[level] = 0;
2097         } else {
2098                 struct btrfs_key key;
2099                 struct btrfs_disk_key found_key;
2100                 struct extent_buffer *node;
2101
2102                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2103                 level = root_item->drop_level;
2104                 path->lowest_level = level;
2105                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2106                 if (wret < 0) {
2107                         ret = wret;
2108                         goto out;
2109                 }
2110                 node = path->nodes[level];
2111                 btrfs_node_key(node, &found_key, path->slots[level]);
2112                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2113                                sizeof(found_key)));
2114         }
2115         while(1) {
2116                 wret = walk_down_tree(trans, root, path, &level);
2117                 if (wret < 0)
2118                         ret = wret;
2119                 if (wret != 0)
2120                         break;
2121
2122                 wret = walk_up_tree(trans, root, path, &level);
2123                 if (wret < 0)
2124                         ret = wret;
2125                 if (wret != 0)
2126                         break;
2127                 /*
2128                 ret = -EAGAIN;
2129                 break;
2130                 */
2131         }
2132         for (i = 0; i <= orig_level; i++) {
2133                 if (path->nodes[i]) {
2134                         free_extent_buffer(path->nodes[i]);
2135                         path->nodes[i] = NULL;
2136                 }
2137         }
2138 out:
2139         btrfs_free_path(path);
2140         return ret;
2141 }
2142
2143 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2144 {
2145         u64 start;
2146         u64 end;
2147         u64 ptr;
2148         int ret;
2149         while(1) {
2150                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2151                                             &start, &end, (unsigned int)-1);
2152                 if (ret)
2153                         break;
2154                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2155                 if (!ret)
2156                         kfree((void *)(unsigned long)ptr);
2157                 clear_extent_bits(&info->block_group_cache, start,
2158                                   end, (unsigned int)-1, GFP_NOFS);
2159         }
2160         while(1) {
2161                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2162                                             &start, &end, EXTENT_DIRTY);
2163                 if (ret)
2164                         break;
2165                 clear_extent_dirty(&info->free_space_cache, start,
2166                                    end, GFP_NOFS);
2167         }
2168         return 0;
2169 }
2170
2171 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2172                            struct btrfs_key *key)
2173 {
2174         int ret;
2175         struct btrfs_key found_key;
2176         struct extent_buffer *leaf;
2177         int slot;
2178
2179         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2180         if (ret < 0)
2181                 return ret;
2182         while(1) {
2183                 slot = path->slots[0];
2184                 leaf = path->nodes[0];
2185                 if (slot >= btrfs_header_nritems(leaf)) {
2186                         ret = btrfs_next_leaf(root, path);
2187                         if (ret == 0)
2188                                 continue;
2189                         if (ret < 0)
2190                                 goto error;
2191                         break;
2192                 }
2193                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2194
2195                 if (found_key.objectid >= key->objectid &&
2196                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2197                         return 0;
2198                 path->slots[0]++;
2199         }
2200         ret = -ENOENT;
2201 error:
2202         return ret;
2203 }
2204
2205 int btrfs_read_block_groups(struct btrfs_root *root)
2206 {
2207         struct btrfs_path *path;
2208         int ret;
2209         int bit;
2210         struct btrfs_block_group_cache *cache;
2211         struct btrfs_fs_info *info = root->fs_info;
2212         struct btrfs_space_info *space_info;
2213         struct extent_io_tree *block_group_cache;
2214         struct btrfs_key key;
2215         struct btrfs_key found_key;
2216         struct extent_buffer *leaf;
2217
2218         block_group_cache = &info->block_group_cache;
2219
2220         root = info->extent_root;
2221         key.objectid = 0;
2222         key.offset = 0;
2223         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2224         path = btrfs_alloc_path();
2225         if (!path)
2226                 return -ENOMEM;
2227
2228         while(1) {
2229                 ret = find_first_block_group(root, path, &key);
2230                 if (ret > 0) {
2231                         ret = 0;
2232                         goto error;
2233                 }
2234                 if (ret != 0) {
2235                         goto error;
2236                 }
2237                 leaf = path->nodes[0];
2238                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2239                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2240                 if (!cache) {
2241                         ret = -ENOMEM;
2242                         break;
2243                 }
2244
2245                 read_extent_buffer(leaf, &cache->item,
2246                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2247                                    sizeof(cache->item));
2248                 memcpy(&cache->key, &found_key, sizeof(found_key));
2249                 cache->cached = 0;
2250                 cache->pinned = 0;
2251                 key.objectid = found_key.objectid + found_key.offset;
2252                 btrfs_release_path(root, path);
2253                 cache->flags = btrfs_block_group_flags(&cache->item);
2254                 bit = 0;
2255                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2256                         bit = BLOCK_GROUP_DATA;
2257                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2258                         bit = BLOCK_GROUP_SYSTEM;
2259                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2260                         bit = BLOCK_GROUP_METADATA;
2261                 }
2262                 set_avail_alloc_bits(info, cache->flags);
2263
2264                 ret = update_space_info(info, cache->flags, found_key.offset,
2265                                         btrfs_block_group_used(&cache->item),
2266                                         &space_info);
2267                 BUG_ON(ret);
2268                 cache->space_info = space_info;
2269
2270                 /* use EXTENT_LOCKED to prevent merging */
2271                 set_extent_bits(block_group_cache, found_key.objectid,
2272                                 found_key.objectid + found_key.offset - 1,
2273                                 bit | EXTENT_LOCKED, GFP_NOFS);
2274                 set_state_private(block_group_cache, found_key.objectid,
2275                                   (unsigned long)cache);
2276
2277                 if (key.objectid >=
2278                     btrfs_super_total_bytes(&info->super_copy))
2279                         break;
2280         }
2281         ret = 0;
2282 error:
2283         btrfs_free_path(path);
2284         return ret;
2285 }
2286
2287 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2288                            struct btrfs_root *root, u64 bytes_used,
2289                            u64 type, u64 chunk_objectid, u64 chunk_offset,
2290                            u64 size)
2291 {
2292         int ret;
2293         int bit = 0;
2294         struct btrfs_root *extent_root;
2295         struct btrfs_block_group_cache *cache;
2296         struct extent_io_tree *block_group_cache;
2297
2298         extent_root = root->fs_info->extent_root;
2299         block_group_cache = &root->fs_info->block_group_cache;
2300
2301         cache = kzalloc(sizeof(*cache), GFP_NOFS);
2302         BUG_ON(!cache);
2303         cache->key.objectid = chunk_offset;
2304         cache->key.offset = size;
2305
2306         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2307         btrfs_set_block_group_used(&cache->item, bytes_used);
2308         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2309         cache->flags = type;
2310         btrfs_set_block_group_flags(&cache->item, type);
2311
2312         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2313                                 &cache->space_info);
2314         BUG_ON(ret);
2315
2316         bit = block_group_state_bits(type);
2317         set_extent_bits(block_group_cache, chunk_offset,
2318                         chunk_offset + size - 1,
2319                         bit | EXTENT_LOCKED, GFP_NOFS);
2320
2321         set_state_private(block_group_cache, chunk_offset,
2322                           (unsigned long)cache);
2323         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2324                                 sizeof(cache->item));
2325         BUG_ON(ret);
2326
2327         finish_current_insert(trans, extent_root);
2328         ret = del_pending_extents(trans, extent_root);
2329         BUG_ON(ret);
2330         set_avail_alloc_bits(extent_root->fs_info, type);
2331         return 0;
2332 }
2333
2334 u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation,
2335                           u64 owner, u64 owner_offset)
2336 {
2337         return hash_extent_ref(root_objectid, ref_generation,
2338                                owner, owner_offset);
2339 }
2340
2341 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
2342                              struct btrfs_root *root,
2343                              u64 bytenr, u64 num_bytes, int alloc,
2344                              int mark_free)
2345 {
2346         return update_block_group(trans, root, bytenr, num_bytes,
2347                                   alloc, mark_free);
2348 }