Update the Ext3 converter
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "crc32c.h"
28 #include "volumes.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40
41 static int cache_block_group(struct btrfs_root *root,
42                              struct btrfs_block_group_cache *block_group)
43 {
44         struct btrfs_path *path;
45         int ret;
46         struct btrfs_key key;
47         struct extent_buffer *leaf;
48         struct extent_io_tree *free_space_cache;
49         int slot;
50         u64 last = 0;
51         u64 hole_size;
52         u64 first_free;
53         int found = 0;
54
55         if (!block_group)
56                 return 0;
57
58         root = root->fs_info->extent_root;
59         free_space_cache = &root->fs_info->free_space_cache;
60
61         if (block_group->cached)
62                 return 0;
63
64         path = btrfs_alloc_path();
65         if (!path)
66                 return -ENOMEM;
67
68         path->reada = 2;
69         first_free = block_group->key.objectid;
70         key.objectid = block_group->key.objectid;
71         key.offset = 0;
72         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
73         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
74         if (ret < 0)
75                 return ret;
76         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
77         if (ret < 0)
78                 return ret;
79         if (ret == 0) {
80                 leaf = path->nodes[0];
81                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
82                 if (key.objectid + key.offset > first_free)
83                         first_free = key.objectid + key.offset;
84         }
85         while(1) {
86                 leaf = path->nodes[0];
87                 slot = path->slots[0];
88                 if (slot >= btrfs_header_nritems(leaf)) {
89                         ret = btrfs_next_leaf(root, path);
90                         if (ret < 0)
91                                 goto err;
92                         if (ret == 0) {
93                                 continue;
94                         } else {
95                                 break;
96                         }
97                 }
98                 btrfs_item_key_to_cpu(leaf, &key, slot);
99                 if (key.objectid < block_group->key.objectid) {
100                         goto next;
101                 }
102                 if (key.objectid >= block_group->key.objectid +
103                     block_group->key.offset) {
104                         break;
105                 }
106
107                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
108                         if (!found) {
109                                 last = first_free;
110                                 found = 1;
111                         }
112                         if (key.objectid > last) {
113                                 hole_size = key.objectid - last;
114                                 set_extent_dirty(free_space_cache, last,
115                                                  last + hole_size - 1,
116                                                  GFP_NOFS);
117                         }
118                         last = key.objectid + key.offset;
119                 }
120 next:
121                 path->slots[0]++;
122         }
123
124         if (!found)
125                 last = first_free;
126         if (block_group->key.objectid +
127             block_group->key.offset > last) {
128                 hole_size = block_group->key.objectid +
129                         block_group->key.offset - last;
130                 set_extent_dirty(free_space_cache, last,
131                                  last + hole_size - 1, GFP_NOFS);
132         }
133         block_group->cached = 1;
134 err:
135         btrfs_free_path(path);
136         return 0;
137 }
138
139 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
140                                                          btrfs_fs_info *info,
141                                                          u64 bytenr)
142 {
143         struct extent_io_tree *block_group_cache;
144         struct btrfs_block_group_cache *block_group = NULL;
145         u64 ptr;
146         u64 start;
147         u64 end;
148         int ret;
149
150         block_group_cache = &info->block_group_cache;
151         ret = find_first_extent_bit(block_group_cache,
152                                     bytenr, &start, &end,
153                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
154                                     BLOCK_GROUP_SYSTEM);
155         if (ret) {
156                 return NULL;
157         }
158         ret = get_state_private(block_group_cache, start, &ptr);
159         if (ret)
160                 return NULL;
161
162         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
163         if (block_group->key.objectid <= bytenr && bytenr <
164             block_group->key.objectid + block_group->key.offset)
165                 return block_group;
166         return NULL;
167 }
168
169 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
170 {
171         return (cache->flags & bits) == bits;
172 }
173
174 static int noinline find_search_start(struct btrfs_root *root,
175                               struct btrfs_block_group_cache **cache_ret,
176                               u64 *start_ret, int num, int data)
177 {
178         int ret;
179         struct btrfs_block_group_cache *cache = *cache_ret;
180         u64 last;
181         u64 start = 0;
182         u64 end = 0;
183         u64 cache_miss = 0;
184         u64 search_start = *start_ret;
185         int wrapped = 0;
186
187         if (!cache) {
188                 goto out;
189         }
190 again:
191         ret = cache_block_group(root, cache);
192         if (ret)
193                 goto out;
194
195         last = max(search_start, cache->key.objectid);
196         if (!block_group_bits(cache, data)) {
197                 goto new_group;
198         }
199
200         while(1) {
201                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
202                                             last, &start, &end, EXTENT_DIRTY);
203                 if (ret) {
204                         if (!cache_miss)
205                                 cache_miss = last;
206                         goto new_group;
207                 }
208
209                 start = max(last, start);
210                 last = end + 1;
211                 if (last - start < num) {
212                         if (last == cache->key.objectid + cache->key.offset)
213                                 cache_miss = start;
214                         continue;
215                 }
216                 if (start + num > cache->key.objectid + cache->key.offset)
217                         goto new_group;
218                 *start_ret = start;
219                 return 0;
220         }
221 out:
222         cache = btrfs_lookup_block_group(root->fs_info, search_start);
223         if (!cache) {
224                 printk("Unable to find block group for %llu\n",
225                         (unsigned long long)search_start);
226                 WARN_ON(1);
227         }
228         return -ENOSPC;
229
230 new_group:
231         last = cache->key.objectid + cache->key.offset;
232 wrapped:
233         cache = btrfs_lookup_block_group(root->fs_info, last);
234         if (!cache) {
235 no_cache:
236                 if (!wrapped) {
237                         wrapped = 1;
238                         last = search_start;
239                         goto wrapped;
240                 }
241                 goto out;
242         }
243         if (cache_miss && !cache->cached) {
244                 cache_block_group(root, cache);
245                 last = cache_miss;
246                 cache = btrfs_lookup_block_group(root->fs_info, last);
247         }
248         cache = btrfs_find_block_group(root, cache, last, data, 0);
249         if (!cache)
250                 goto no_cache;
251         *cache_ret = cache;
252         cache_miss = 0;
253         goto again;
254 }
255
256 static u64 div_factor(u64 num, int factor)
257 {
258         if (factor == 10)
259                 return num;
260         num *= factor;
261         num /= 10;
262         return num;
263 }
264
265 static int block_group_state_bits(u64 flags)
266 {
267         int bits = 0;
268         if (flags & BTRFS_BLOCK_GROUP_DATA)
269                 bits |= BLOCK_GROUP_DATA;
270         if (flags & BTRFS_BLOCK_GROUP_METADATA)
271                 bits |= BLOCK_GROUP_METADATA;
272         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
273                 bits |= BLOCK_GROUP_SYSTEM;
274         return bits;
275 }
276
277 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
278                                                  struct btrfs_block_group_cache
279                                                  *hint, u64 search_start,
280                                                  int data, int owner)
281 {
282         struct btrfs_block_group_cache *cache;
283         struct extent_io_tree *block_group_cache;
284         struct btrfs_block_group_cache *found_group = NULL;
285         struct btrfs_fs_info *info = root->fs_info;
286         u64 used;
287         u64 last = 0;
288         u64 hint_last;
289         u64 start;
290         u64 end;
291         u64 free_check;
292         u64 ptr;
293         int bit;
294         int ret;
295         int full_search = 0;
296         int factor = 8;
297
298         block_group_cache = &info->block_group_cache;
299
300         if (!owner)
301                 factor = 8;
302
303         bit = block_group_state_bits(data);
304
305         if (search_start) {
306                 struct btrfs_block_group_cache *shint;
307                 shint = btrfs_lookup_block_group(info, search_start);
308                 if (shint && block_group_bits(shint, data)) {
309                         used = btrfs_block_group_used(&shint->item);
310                         if (used + shint->pinned <
311                             div_factor(shint->key.offset, factor)) {
312                                 return shint;
313                         }
314                 }
315         }
316         if (hint && block_group_bits(hint, data)) {
317                 used = btrfs_block_group_used(&hint->item);
318                 if (used + hint->pinned <
319                     div_factor(hint->key.offset, factor)) {
320                         return hint;
321                 }
322                 last = hint->key.objectid + hint->key.offset;
323                 hint_last = last;
324         } else {
325                 if (hint)
326                         hint_last = max(hint->key.objectid, search_start);
327                 else
328                         hint_last = search_start;
329
330                 last = hint_last;
331         }
332 again:
333         while(1) {
334                 ret = find_first_extent_bit(block_group_cache, last,
335                                             &start, &end, bit);
336                 if (ret)
337                         break;
338
339                 ret = get_state_private(block_group_cache, start, &ptr);
340                 if (ret)
341                         break;
342
343                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
344                 last = cache->key.objectid + cache->key.offset;
345                 used = btrfs_block_group_used(&cache->item);
346
347                 if (block_group_bits(cache, data)) {
348                         if (full_search)
349                                 free_check = cache->key.offset;
350                         else
351                                 free_check = div_factor(cache->key.offset,
352                                                         factor);
353
354                         if (used + cache->pinned < free_check) {
355                                 found_group = cache;
356                                 goto found;
357                         }
358                 }
359                 cond_resched();
360         }
361         if (!full_search) {
362                 last = search_start;
363                 full_search = 1;
364                 goto again;
365         }
366 found:
367         return found_group;
368 }
369
370 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
371                            u64 owner, u64 owner_offset)
372 {
373         u32 high_crc = ~(u32)0;
374         u32 low_crc = ~(u32)0;
375         __le64 lenum;
376
377         lenum = cpu_to_le64(root_objectid);
378         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
379         lenum = cpu_to_le64(ref_generation);
380         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
381         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
382                 lenum = cpu_to_le64(owner);
383                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
384                 lenum = cpu_to_le64(owner_offset);
385                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
386         }
387         return ((u64)high_crc << 32) | (u64)low_crc;
388 }
389
390 static int match_extent_ref(struct extent_buffer *leaf,
391                             struct btrfs_extent_ref *disk_ref,
392                             struct btrfs_extent_ref *cpu_ref)
393 {
394         int ret;
395         int len;
396
397         if (cpu_ref->objectid)
398                 len = sizeof(*cpu_ref);
399         else
400                 len = 2 * sizeof(u64);
401         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
402                                    len);
403         return ret == 0;
404 }
405
406 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
407                                           struct btrfs_root *root,
408                                           struct btrfs_path *path, u64 bytenr,
409                                           u64 root_objectid,
410                                           u64 ref_generation, u64 owner,
411                                           u64 owner_offset, int del)
412 {
413         u64 hash;
414         struct btrfs_key key;
415         struct btrfs_key found_key;
416         struct btrfs_extent_ref ref;
417         struct extent_buffer *leaf;
418         struct btrfs_extent_ref *disk_ref;
419         int ret;
420         int ret2;
421
422         btrfs_set_stack_ref_root(&ref, root_objectid);
423         btrfs_set_stack_ref_generation(&ref, ref_generation);
424         btrfs_set_stack_ref_objectid(&ref, owner);
425         btrfs_set_stack_ref_offset(&ref, owner_offset);
426
427         hash = hash_extent_ref(root_objectid, ref_generation, owner,
428                                owner_offset);
429         key.offset = hash;
430         key.objectid = bytenr;
431         key.type = BTRFS_EXTENT_REF_KEY;
432
433         while (1) {
434                 ret = btrfs_search_slot(trans, root, &key, path,
435                                         del ? -1 : 0, del);
436                 if (ret < 0)
437                         goto out;
438                 leaf = path->nodes[0];
439                 if (ret != 0) {
440                         u32 nritems = btrfs_header_nritems(leaf);
441                         if (path->slots[0] >= nritems) {
442                                 ret2 = btrfs_next_leaf(root, path);
443                                 if (ret2)
444                                         goto out;
445                                 leaf = path->nodes[0];
446                         }
447                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
448                         if (found_key.objectid != bytenr ||
449                             found_key.type != BTRFS_EXTENT_REF_KEY)
450                                 goto out;
451                         key.offset = found_key.offset;
452                         if (del) {
453                                 btrfs_release_path(root, path);
454                                 continue;
455                         }
456                 }
457                 disk_ref = btrfs_item_ptr(path->nodes[0],
458                                           path->slots[0],
459                                           struct btrfs_extent_ref);
460                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
461                         ret = 0;
462                         goto out;
463                 }
464                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
465                 key.offset = found_key.offset + 1;
466                 btrfs_release_path(root, path);
467         }
468 out:
469         return ret;
470 }
471
472 /*
473  * Back reference rules.  Back refs have three main goals:
474  *
475  * 1) differentiate between all holders of references to an extent so that
476  *    when a reference is dropped we can make sure it was a valid reference
477  *    before freeing the extent.
478  *
479  * 2) Provide enough information to quickly find the holders of an extent
480  *    if we notice a given block is corrupted or bad.
481  *
482  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
483  *    maintenance.  This is actually the same as #2, but with a slightly
484  *    different use case.
485  *
486  * File extents can be referenced by:
487  *
488  * - multiple snapshots, subvolumes, or different generations in one subvol
489  * - different files inside a single subvolume (in theory, not implemented yet)
490  * - different offsets inside a file (bookend extents in file.c)
491  *
492  * The extent ref structure has fields for:
493  *
494  * - Objectid of the subvolume root
495  * - Generation number of the tree holding the reference
496  * - objectid of the file holding the reference
497  * - offset in the file corresponding to the key holding the reference
498  *
499  * When a file extent is allocated the fields are filled in:
500  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
501  *
502  * When a leaf is cow'd new references are added for every file extent found
503  * in the leaf.  It looks the same as the create case, but trans->transid
504  * will be different when the block is cow'd.
505  *
506  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
507  *
508  * When a file extent is removed either during snapshot deletion or file
509  * truncation, the corresponding back reference is found
510  * by searching for:
511  *
512  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
513  *      inode objectid, offset in file)
514  *
515  * Btree extents can be referenced by:
516  *
517  * - Different subvolumes
518  * - Different generations of the same subvolume
519  *
520  * Storing sufficient information for a full reverse mapping of a btree
521  * block would require storing the lowest key of the block in the backref,
522  * and it would require updating that lowest key either before write out or
523  * every time it changed.  Instead, the objectid of the lowest key is stored
524  * along with the level of the tree block.  This provides a hint
525  * about where in the btree the block can be found.  Searches through the
526  * btree only need to look for a pointer to that block, so they stop one
527  * level higher than the level recorded in the backref.
528  *
529  * Some btrees do not do reference counting on their extents.  These
530  * include the extent tree and the tree of tree roots.  Backrefs for these
531  * trees always have a generation of zero.
532  *
533  * When a tree block is created, back references are inserted:
534  *
535  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
536  *
537  * When a tree block is cow'd in a reference counted root,
538  * new back references are added for all the blocks it points to.
539  * These are of the form (trans->transid will have increased since creation):
540  *
541  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
542  *
543  * Because the lowest_key_objectid and the level are just hints
544  * they are not used when backrefs are deleted.  When a backref is deleted:
545  *
546  * if backref was for a tree root:
547  *     root_objectid = root->root_key.objectid
548  * else
549  *     root_objectid = btrfs_header_owner(parent)
550  *
551  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
552  *
553  * Back Reference Key hashing:
554  *
555  * Back references have four fields, each 64 bits long.  Unfortunately,
556  * This is hashed into a single 64 bit number and placed into the key offset.
557  * The key objectid corresponds to the first byte in the extent, and the
558  * key type is set to BTRFS_EXTENT_REF_KEY
559  */
560 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
561                                  struct btrfs_root *root,
562                                  struct btrfs_path *path, u64 bytenr,
563                                  u64 root_objectid, u64 ref_generation,
564                                  u64 owner, u64 owner_offset)
565 {
566         u64 hash;
567         struct btrfs_key key;
568         struct btrfs_extent_ref ref;
569         struct btrfs_extent_ref *disk_ref;
570         int ret;
571
572         btrfs_set_stack_ref_root(&ref, root_objectid);
573         btrfs_set_stack_ref_generation(&ref, ref_generation);
574         btrfs_set_stack_ref_objectid(&ref, owner);
575         btrfs_set_stack_ref_offset(&ref, owner_offset);
576
577         hash = hash_extent_ref(root_objectid, ref_generation, owner,
578                                owner_offset);
579         key.offset = hash;
580         key.objectid = bytenr;
581         key.type = BTRFS_EXTENT_REF_KEY;
582
583         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
584         while (ret == -EEXIST) {
585                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
586                                           struct btrfs_extent_ref);
587                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
588                         goto out;
589                 key.offset++;
590                 btrfs_release_path(root, path);
591                 ret = btrfs_insert_empty_item(trans, root, path, &key,
592                                               sizeof(ref));
593         }
594         if (ret)
595                 goto out;
596         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
597                                   struct btrfs_extent_ref);
598         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
599                             sizeof(ref));
600         btrfs_mark_buffer_dirty(path->nodes[0]);
601 out:
602         btrfs_release_path(root, path);
603         return ret;
604 }
605
606 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
607                                 struct btrfs_root *root,
608                                 u64 bytenr, u64 num_bytes,
609                                 u64 root_objectid, u64 ref_generation,
610                                 u64 owner, u64 owner_offset)
611 {
612         struct btrfs_path *path;
613         int ret;
614         struct btrfs_key key;
615         struct extent_buffer *l;
616         struct btrfs_extent_item *item;
617         u32 refs;
618
619         WARN_ON(num_bytes < root->sectorsize);
620         path = btrfs_alloc_path();
621         if (!path)
622                 return -ENOMEM;
623
624         key.objectid = bytenr;
625         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
626         key.offset = num_bytes;
627         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
628                                 0, 1);
629         if (ret < 0)
630                 return ret;
631         if (ret != 0) {
632                 BUG();
633         }
634         BUG_ON(ret != 0);
635         l = path->nodes[0];
636         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
637         refs = btrfs_extent_refs(l, item);
638         btrfs_set_extent_refs(l, item, refs + 1);
639         btrfs_mark_buffer_dirty(path->nodes[0]);
640
641         btrfs_release_path(root->fs_info->extent_root, path);
642
643         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
644                                           path, bytenr, root_objectid,
645                                           ref_generation, owner, owner_offset);
646         BUG_ON(ret);
647         finish_current_insert(trans, root->fs_info->extent_root);
648         del_pending_extents(trans, root->fs_info->extent_root);
649
650         btrfs_free_path(path);
651         return 0;
652 }
653
654 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
655                          struct btrfs_root *root)
656 {
657         finish_current_insert(trans, root->fs_info->extent_root);
658         del_pending_extents(trans, root->fs_info->extent_root);
659         return 0;
660 }
661
662 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
663                              struct btrfs_root *root, u64 bytenr,
664                              u64 num_bytes, u32 *refs)
665 {
666         struct btrfs_path *path;
667         int ret;
668         struct btrfs_key key;
669         struct extent_buffer *l;
670         struct btrfs_extent_item *item;
671
672         WARN_ON(num_bytes < root->sectorsize);
673         path = btrfs_alloc_path();
674         key.objectid = bytenr;
675         key.offset = num_bytes;
676         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
677         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
678                                 0, 0);
679         if (ret < 0)
680                 goto out;
681         if (ret != 0) {
682                 btrfs_print_leaf(root, path->nodes[0]);
683                 printk("failed to find block number %llu\n",
684                         (unsigned long long)bytenr);
685                 BUG();
686         }
687         l = path->nodes[0];
688         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
689         *refs = btrfs_extent_refs(l, item);
690 out:
691         btrfs_free_path(path);
692         return 0;
693 }
694
695 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
696                                   struct btrfs_path *count_path,
697                                   u64 first_extent)
698 {
699         struct btrfs_root *extent_root = root->fs_info->extent_root;
700         struct btrfs_path *path;
701         u64 bytenr;
702         u64 found_objectid;
703         u64 root_objectid = root->root_key.objectid;
704         u32 total_count = 0;
705         u32 cur_count;
706         u32 refs;
707         u32 nritems;
708         int ret;
709         struct btrfs_key key;
710         struct btrfs_key found_key;
711         struct extent_buffer *l;
712         struct btrfs_extent_item *item;
713         struct btrfs_extent_ref *ref_item;
714         int level = -1;
715
716         path = btrfs_alloc_path();
717 again:
718         if (level == -1)
719                 bytenr = first_extent;
720         else
721                 bytenr = count_path->nodes[level]->start;
722
723         cur_count = 0;
724         key.objectid = bytenr;
725         key.offset = 0;
726
727         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
728         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
729         if (ret < 0)
730                 goto out;
731         BUG_ON(ret == 0);
732
733         l = path->nodes[0];
734         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
735
736         if (found_key.objectid != bytenr ||
737             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
738                 goto out;
739         }
740
741         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
742         refs = btrfs_extent_refs(l, item);
743         while (1) {
744                 nritems = btrfs_header_nritems(l);
745                 if (path->slots[0] >= nritems) {
746                         ret = btrfs_next_leaf(extent_root, path);
747                         if (ret == 0)
748                                 continue;
749                         break;
750                 }
751                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
752                 if (found_key.objectid != bytenr)
753                         break;
754                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
755                         path->slots[0]++;
756                         continue;
757                 }
758
759                 cur_count++;
760                 ref_item = btrfs_item_ptr(l, path->slots[0],
761                                           struct btrfs_extent_ref);
762                 found_objectid = btrfs_ref_root(l, ref_item);
763
764                 if (found_objectid != root_objectid) {
765                         total_count = 2;
766                         goto out;
767                 }
768                 total_count = 1;
769                 path->slots[0]++;
770         }
771         if (cur_count == 0) {
772                 total_count = 0;
773                 goto out;
774         }
775         if (level >= 0 && root->node == count_path->nodes[level])
776                 goto out;
777         level++;
778         btrfs_release_path(root, path);
779         goto again;
780
781 out:
782         btrfs_free_path(path);
783         return total_count;
784 }
785 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
786                        struct btrfs_root *root, u64 owner_objectid)
787 {
788         u64 generation;
789         u64 key_objectid;
790         u64 level;
791         u32 nritems;
792         struct btrfs_disk_key disk_key;
793
794         level = btrfs_header_level(root->node);
795         generation = trans->transid;
796         nritems = btrfs_header_nritems(root->node);
797         if (nritems > 0) {
798                 if (level == 0)
799                         btrfs_item_key(root->node, &disk_key, 0);
800                 else
801                         btrfs_node_key(root->node, &disk_key, 0);
802                 key_objectid = btrfs_disk_key_objectid(&disk_key);
803         } else {
804                 key_objectid = 0;
805         }
806         return btrfs_inc_extent_ref(trans, root, root->node->start,
807                                     root->node->len, owner_objectid,
808                                     generation, level, key_objectid);
809 }
810
811 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
812                   struct extent_buffer *buf)
813 {
814         u64 bytenr;
815         u32 nritems;
816         struct btrfs_key key;
817         struct btrfs_file_extent_item *fi;
818         int i;
819         int level;
820         int ret;
821         int faili;
822
823         if (!root->ref_cows)
824                 return 0;
825
826         level = btrfs_header_level(buf);
827         nritems = btrfs_header_nritems(buf);
828         for (i = 0; i < nritems; i++) {
829                 if (level == 0) {
830                         u64 disk_bytenr;
831                         btrfs_item_key_to_cpu(buf, &key, i);
832                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
833                                 continue;
834                         fi = btrfs_item_ptr(buf, i,
835                                             struct btrfs_file_extent_item);
836                         if (btrfs_file_extent_type(buf, fi) ==
837                             BTRFS_FILE_EXTENT_INLINE)
838                                 continue;
839                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
840                         if (disk_bytenr == 0)
841                                 continue;
842                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
843                                     btrfs_file_extent_disk_num_bytes(buf, fi),
844                                     root->root_key.objectid, trans->transid,
845                                     key.objectid, key.offset);
846                         if (ret) {
847                                 faili = i;
848                                 goto fail;
849                         }
850                 } else {
851                         bytenr = btrfs_node_blockptr(buf, i);
852                         btrfs_node_key_to_cpu(buf, &key, i);
853                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
854                                            btrfs_level_size(root, level - 1),
855                                            root->root_key.objectid,
856                                            trans->transid,
857                                            level - 1, key.objectid);
858                         if (ret) {
859                                 faili = i;
860                                 goto fail;
861                         }
862                 }
863         }
864         return 0;
865 fail:
866         WARN_ON(1);
867 #if 0
868         for (i =0; i < faili; i++) {
869                 if (level == 0) {
870                         u64 disk_bytenr;
871                         btrfs_item_key_to_cpu(buf, &key, i);
872                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
873                                 continue;
874                         fi = btrfs_item_ptr(buf, i,
875                                             struct btrfs_file_extent_item);
876                         if (btrfs_file_extent_type(buf, fi) ==
877                             BTRFS_FILE_EXTENT_INLINE)
878                                 continue;
879                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
880                         if (disk_bytenr == 0)
881                                 continue;
882                         err = btrfs_free_extent(trans, root, disk_bytenr,
883                                     btrfs_file_extent_disk_num_bytes(buf,
884                                                                       fi), 0);
885                         BUG_ON(err);
886                 } else {
887                         bytenr = btrfs_node_blockptr(buf, i);
888                         err = btrfs_free_extent(trans, root, bytenr,
889                                         btrfs_level_size(root, level - 1), 0);
890                         BUG_ON(err);
891                 }
892         }
893 #endif
894         return ret;
895 }
896
897 static int write_one_cache_group(struct btrfs_trans_handle *trans,
898                                  struct btrfs_root *root,
899                                  struct btrfs_path *path,
900                                  struct btrfs_block_group_cache *cache)
901 {
902         int ret;
903         int pending_ret;
904         struct btrfs_root *extent_root = root->fs_info->extent_root;
905         unsigned long bi;
906         struct extent_buffer *leaf;
907
908         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
909         if (ret < 0)
910                 goto fail;
911         BUG_ON(ret);
912
913         leaf = path->nodes[0];
914         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
915         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
916         btrfs_mark_buffer_dirty(leaf);
917         btrfs_release_path(extent_root, path);
918 fail:
919         finish_current_insert(trans, extent_root);
920         pending_ret = del_pending_extents(trans, extent_root);
921         if (ret)
922                 return ret;
923         if (pending_ret)
924                 return pending_ret;
925         return 0;
926
927 }
928
929 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
930                                    struct btrfs_root *root)
931 {
932         struct extent_io_tree *block_group_cache;
933         struct btrfs_block_group_cache *cache;
934         int ret;
935         int err = 0;
936         int werr = 0;
937         struct btrfs_path *path;
938         u64 last = 0;
939         u64 start;
940         u64 end;
941         u64 ptr;
942
943         block_group_cache = &root->fs_info->block_group_cache;
944         path = btrfs_alloc_path();
945         if (!path)
946                 return -ENOMEM;
947
948         while(1) {
949                 ret = find_first_extent_bit(block_group_cache, last,
950                                             &start, &end, BLOCK_GROUP_DIRTY);
951                 if (ret)
952                         break;
953
954                 last = end + 1;
955                 ret = get_state_private(block_group_cache, start, &ptr);
956                 if (ret)
957                         break;
958                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
959                 err = write_one_cache_group(trans, root,
960                                             path, cache);
961                 /*
962                  * if we fail to write the cache group, we want
963                  * to keep it marked dirty in hopes that a later
964                  * write will work
965                  */
966                 if (err) {
967                         werr = err;
968                         continue;
969                 }
970                 clear_extent_bits(block_group_cache, start, end,
971                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
972         }
973         btrfs_free_path(path);
974         return werr;
975 }
976
977 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
978                                                   u64 flags)
979 {
980         struct list_head *head = &info->space_info;
981         struct list_head *cur;
982         struct btrfs_space_info *found;
983         list_for_each(cur, head) {
984                 found = list_entry(cur, struct btrfs_space_info, list);
985                 if (found->flags == flags)
986                         return found;
987         }
988         return NULL;
989
990 }
991
992 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
993                              u64 total_bytes, u64 bytes_used,
994                              struct btrfs_space_info **space_info)
995 {
996         struct btrfs_space_info *found;
997
998         found = __find_space_info(info, flags);
999         if (found) {
1000                 found->total_bytes += total_bytes;
1001                 found->bytes_used += bytes_used;
1002                 WARN_ON(found->total_bytes < found->bytes_used);
1003                 *space_info = found;
1004                 return 0;
1005         }
1006         found = kmalloc(sizeof(*found), GFP_NOFS);
1007         if (!found)
1008                 return -ENOMEM;
1009
1010         list_add(&found->list, &info->space_info);
1011         found->flags = flags;
1012         found->total_bytes = total_bytes;
1013         found->bytes_used = bytes_used;
1014         found->bytes_pinned = 0;
1015         found->full = 0;
1016         *space_info = found;
1017         return 0;
1018 }
1019
1020
1021 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1022 {
1023         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1024                                    BTRFS_BLOCK_GROUP_RAID1 |
1025                                    BTRFS_BLOCK_GROUP_DUP);
1026         if (extra_flags) {
1027                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1028                         fs_info->avail_data_alloc_bits |= extra_flags;
1029                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1030                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1031                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1032                         fs_info->avail_system_alloc_bits |= extra_flags;
1033         }
1034 }
1035
1036 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1037                           struct btrfs_root *extent_root, u64 alloc_bytes,
1038                           u64 flags)
1039 {
1040         struct btrfs_space_info *space_info;
1041         u64 thresh;
1042         u64 start;
1043         u64 num_bytes;
1044         int ret;
1045
1046         space_info = __find_space_info(extent_root->fs_info, flags);
1047         if (!space_info) {
1048                 ret = update_space_info(extent_root->fs_info, flags,
1049                                         0, 0, &space_info);
1050                 BUG_ON(ret);
1051         }
1052         BUG_ON(!space_info);
1053
1054         if (space_info->full)
1055                 return 0;
1056
1057         thresh = div_factor(space_info->total_bytes, 7);
1058         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1059             thresh)
1060                 return 0;
1061
1062         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1063         if (ret == -ENOSPC) {
1064                 space_info->full = 1;
1065                 return 0;
1066         }
1067
1068         BUG_ON(ret);
1069
1070         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1071                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1072         BUG_ON(ret);
1073         return 0;
1074 }
1075
1076 static int update_block_group(struct btrfs_trans_handle *trans,
1077                               struct btrfs_root *root,
1078                               u64 bytenr, u64 num_bytes, int alloc,
1079                               int mark_free)
1080 {
1081         struct btrfs_block_group_cache *cache;
1082         struct btrfs_fs_info *info = root->fs_info;
1083         u64 total = num_bytes;
1084         u64 old_val;
1085         u64 byte_in_group;
1086         u64 start;
1087         u64 end;
1088
1089         while(total) {
1090                 cache = btrfs_lookup_block_group(info, bytenr);
1091                 if (!cache) {
1092                         return -1;
1093                 }
1094                 byte_in_group = bytenr - cache->key.objectid;
1095                 WARN_ON(byte_in_group > cache->key.offset);
1096                 start = cache->key.objectid;
1097                 end = start + cache->key.offset - 1;
1098                 set_extent_bits(&info->block_group_cache, start, end,
1099                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1100
1101                 old_val = btrfs_block_group_used(&cache->item);
1102                 num_bytes = min(total, cache->key.offset - byte_in_group);
1103                 if (alloc) {
1104                         old_val += num_bytes;
1105                         cache->space_info->bytes_used += num_bytes;
1106                 } else {
1107                         old_val -= num_bytes;
1108                         cache->space_info->bytes_used -= num_bytes;
1109                         if (mark_free) {
1110                                 set_extent_dirty(&info->free_space_cache,
1111                                                  bytenr, bytenr + num_bytes - 1,
1112                                                  GFP_NOFS);
1113                         }
1114                 }
1115                 btrfs_set_block_group_used(&cache->item, old_val);
1116                 total -= num_bytes;
1117                 bytenr += num_bytes;
1118         }
1119         return 0;
1120 }
1121
1122 static int update_pinned_extents(struct btrfs_root *root,
1123                                 u64 bytenr, u64 num, int pin)
1124 {
1125         u64 len;
1126         struct btrfs_block_group_cache *cache;
1127         struct btrfs_fs_info *fs_info = root->fs_info;
1128
1129         if (pin) {
1130                 set_extent_dirty(&fs_info->pinned_extents,
1131                                 bytenr, bytenr + num - 1, GFP_NOFS);
1132         } else {
1133                 clear_extent_dirty(&fs_info->pinned_extents,
1134                                 bytenr, bytenr + num - 1, GFP_NOFS);
1135         }
1136         while (num > 0) {
1137                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1138                 WARN_ON(!cache);
1139                 len = min(num, cache->key.offset -
1140                           (bytenr - cache->key.objectid));
1141                 if (pin) {
1142                         cache->pinned += len;
1143                         cache->space_info->bytes_pinned += len;
1144                         fs_info->total_pinned += len;
1145                 } else {
1146                         cache->pinned -= len;
1147                         cache->space_info->bytes_pinned -= len;
1148                         fs_info->total_pinned -= len;
1149                 }
1150                 bytenr += len;
1151                 num -= len;
1152         }
1153         return 0;
1154 }
1155
1156 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1157 {
1158         u64 last = 0;
1159         u64 start;
1160         u64 end;
1161         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1162         int ret;
1163
1164         while(1) {
1165                 ret = find_first_extent_bit(pinned_extents, last,
1166                                             &start, &end, EXTENT_DIRTY);
1167                 if (ret)
1168                         break;
1169                 set_extent_dirty(copy, start, end, GFP_NOFS);
1170                 last = end + 1;
1171         }
1172         return 0;
1173 }
1174
1175 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1176                                struct btrfs_root *root,
1177                                struct extent_io_tree *unpin)
1178 {
1179         u64 start;
1180         u64 end;
1181         int ret;
1182         struct extent_io_tree *free_space_cache;
1183         free_space_cache = &root->fs_info->free_space_cache;
1184
1185         while(1) {
1186                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1187                                             EXTENT_DIRTY);
1188                 if (ret)
1189                         break;
1190                 update_pinned_extents(root, start, end + 1 - start, 0);
1191                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1192                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1193         }
1194         return 0;
1195 }
1196
1197 static int finish_current_insert(struct btrfs_trans_handle *trans,
1198                                  struct btrfs_root *extent_root)
1199 {
1200         u64 start;
1201         u64 end;
1202         struct btrfs_fs_info *info = extent_root->fs_info;
1203         struct extent_buffer *eb;
1204         struct btrfs_path *path;
1205         struct btrfs_key ins;
1206         struct btrfs_disk_key first;
1207         struct btrfs_extent_item extent_item;
1208         int ret;
1209         int level;
1210         int err = 0;
1211
1212         btrfs_set_stack_extent_refs(&extent_item, 1);
1213         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1214         path = btrfs_alloc_path();
1215
1216         while(1) {
1217                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1218                                             &end, EXTENT_LOCKED);
1219                 if (ret)
1220                         break;
1221
1222                 ins.objectid = start;
1223                 ins.offset = end + 1 - start;
1224                 err = btrfs_insert_item(trans, extent_root, &ins,
1225                                         &extent_item, sizeof(extent_item));
1226                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1227                                   GFP_NOFS);
1228                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1229                 level = btrfs_header_level(eb);
1230                 if (level == 0) {
1231                         btrfs_item_key(eb, &first, 0);
1232                 } else {
1233                         btrfs_node_key(eb, &first, 0);
1234                 }
1235                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1236                                           start, extent_root->root_key.objectid,
1237                                           0, level,
1238                                           btrfs_disk_key_objectid(&first));
1239                 BUG_ON(err);
1240                 free_extent_buffer(eb);
1241         }
1242         btrfs_free_path(path);
1243         return 0;
1244 }
1245
1246 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1247                           int pending)
1248 {
1249         int err = 0;
1250         struct extent_buffer *buf;
1251
1252         if (!pending) {
1253                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1254                 if (buf) {
1255                         if (btrfs_buffer_uptodate(buf)) {
1256                                 u64 transid =
1257                                     root->fs_info->running_transaction->transid;
1258                                 if (btrfs_header_generation(buf) ==
1259                                     transid && !btrfs_header_flag(buf,
1260                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1261                                         free_extent_buffer(buf);
1262                                         return 1;
1263                                 }
1264                         }
1265                         free_extent_buffer(buf);
1266                 }
1267                 update_pinned_extents(root, bytenr, num_bytes, 1);
1268         } else {
1269                 set_extent_bits(&root->fs_info->pending_del,
1270                                 bytenr, bytenr + num_bytes - 1,
1271                                 EXTENT_LOCKED, GFP_NOFS);
1272         }
1273         BUG_ON(err < 0);
1274         return 0;
1275 }
1276
1277 /*
1278  * remove an extent from the root, returns 0 on success
1279  */
1280 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1281                          *root, u64 bytenr, u64 num_bytes,
1282                          u64 root_objectid, u64 ref_generation,
1283                          u64 owner_objectid, u64 owner_offset, int pin,
1284                          int mark_free)
1285 {
1286         struct btrfs_path *path;
1287         struct btrfs_key key;
1288         struct btrfs_fs_info *info = root->fs_info;
1289         struct btrfs_extent_ops *ops = info->extent_ops;
1290         struct btrfs_root *extent_root = info->extent_root;
1291         struct extent_buffer *leaf;
1292         int ret;
1293         int extent_slot = 0;
1294         int found_extent = 0;
1295         int num_to_del = 1;
1296         struct btrfs_extent_item *ei;
1297         u32 refs;
1298
1299         key.objectid = bytenr;
1300         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1301         key.offset = num_bytes;
1302
1303         path = btrfs_alloc_path();
1304         if (!path)
1305                 return -ENOMEM;
1306
1307         ret = lookup_extent_backref(trans, extent_root, path,
1308                                     bytenr, root_objectid,
1309                                     ref_generation,
1310                                     owner_objectid, owner_offset, 1);
1311         if (ret == 0) {
1312                 struct btrfs_key found_key;
1313                 extent_slot = path->slots[0];
1314                 while(extent_slot > 0) {
1315                         extent_slot--;
1316                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1317                                               extent_slot);
1318                         if (found_key.objectid != bytenr)
1319                                 break;
1320                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1321                             found_key.offset == num_bytes) {
1322                                 found_extent = 1;
1323                                 break;
1324                         }
1325                         if (path->slots[0] - extent_slot > 5)
1326                                 break;
1327                 }
1328                 if (!found_extent)
1329                         ret = btrfs_del_item(trans, extent_root, path);
1330         } else {
1331                 btrfs_print_leaf(extent_root, path->nodes[0]);
1332                 WARN_ON(1);
1333                 printk("Unable to find ref byte nr %llu root %llu "
1334                        " gen %llu owner %llu offset %llu\n",
1335                        (unsigned long long)bytenr,
1336                        (unsigned long long)root_objectid,
1337                        (unsigned long long)ref_generation,
1338                        (unsigned long long)owner_objectid,
1339                        (unsigned long long)owner_offset);
1340         }
1341         if (!found_extent) {
1342                 btrfs_release_path(extent_root, path);
1343                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1344                 if (ret < 0)
1345                         return ret;
1346                 BUG_ON(ret);
1347                 extent_slot = path->slots[0];
1348         }
1349
1350         leaf = path->nodes[0];
1351         ei = btrfs_item_ptr(leaf, extent_slot,
1352                             struct btrfs_extent_item);
1353         refs = btrfs_extent_refs(leaf, ei);
1354         BUG_ON(refs == 0);
1355         refs -= 1;
1356         btrfs_set_extent_refs(leaf, ei, refs);
1357
1358         btrfs_mark_buffer_dirty(leaf);
1359
1360         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1361                 /* if the back ref and the extent are next to each other
1362                  * they get deleted below in one shot
1363                  */
1364                 path->slots[0] = extent_slot;
1365                 num_to_del = 2;
1366         } else if (found_extent) {
1367                 /* otherwise delete the extent back ref */
1368                 ret = btrfs_del_item(trans, extent_root, path);
1369                 BUG_ON(ret);
1370                 /* if refs are 0, we need to setup the path for deletion */
1371                 if (refs == 0) {
1372                         btrfs_release_path(extent_root, path);
1373                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1374                                                 -1, 1);
1375                         if (ret < 0)
1376                                 return ret;
1377                         BUG_ON(ret);
1378                 }
1379         }
1380
1381         if (refs == 0) {
1382                 u64 super_used;
1383                 u64 root_used;
1384
1385                 if (pin) {
1386                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1387                         if (ret > 0)
1388                                 mark_free = 1;
1389                         BUG_ON(ret < 0);
1390                 }
1391
1392                 /* block accounting for super block */
1393                 super_used = btrfs_super_bytes_used(&info->super_copy);
1394                 btrfs_set_super_bytes_used(&info->super_copy,
1395                                            super_used - num_bytes);
1396
1397                 /* block accounting for root item */
1398                 root_used = btrfs_root_used(&root->root_item);
1399                 btrfs_set_root_used(&root->root_item,
1400                                            root_used - num_bytes);
1401                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1402                                       num_to_del);
1403                 if (ret)
1404                         return ret;
1405
1406                 if (ops && ops->free_extent)
1407                         ops->free_extent(root, bytenr, num_bytes);
1408
1409                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1410                                          mark_free);
1411                 BUG_ON(ret);
1412         }
1413         btrfs_free_path(path);
1414         finish_current_insert(trans, extent_root);
1415         return ret;
1416 }
1417
1418 /*
1419  * find all the blocks marked as pending in the radix tree and remove
1420  * them from the extent map
1421  */
1422 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1423                                btrfs_root *extent_root)
1424 {
1425         int ret;
1426         int err = 0;
1427         u64 start;
1428         u64 end;
1429         struct extent_io_tree *pending_del;
1430         struct extent_io_tree *pinned_extents;
1431
1432         pending_del = &extent_root->fs_info->pending_del;
1433         pinned_extents = &extent_root->fs_info->pinned_extents;
1434
1435         while(1) {
1436                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1437                                             EXTENT_LOCKED);
1438                 if (ret)
1439                         break;
1440                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1441                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1442                                   GFP_NOFS);
1443                 ret = __free_extent(trans, extent_root,
1444                                      start, end + 1 - start,
1445                                      extent_root->root_key.objectid,
1446                                      0, 0, 0, 0, 0);
1447                 if (ret)
1448                         err = ret;
1449         }
1450         return err;
1451 }
1452
1453 /*
1454  * remove an extent from the root, returns 0 on success
1455  */
1456 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1457                       *root, u64 bytenr, u64 num_bytes,
1458                       u64 root_objectid, u64 ref_generation,
1459                       u64 owner_objectid, u64 owner_offset, int pin)
1460 {
1461         struct btrfs_root *extent_root = root->fs_info->extent_root;
1462         int pending_ret;
1463         int ret;
1464
1465         WARN_ON(num_bytes < root->sectorsize);
1466         if (!root->ref_cows)
1467                 ref_generation = 0;
1468
1469         if (root == extent_root) {
1470                 pin_down_bytes(root, bytenr, num_bytes, 1);
1471                 return 0;
1472         }
1473         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1474                             ref_generation, owner_objectid, owner_offset,
1475                             pin, pin == 0);
1476         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1477         return ret ? ret : pending_ret;
1478 }
1479
1480 static u64 stripe_align(struct btrfs_root *root, u64 val)
1481 {
1482         u64 mask = ((u64)root->stripesize - 1);
1483         u64 ret = (val + mask) & ~mask;
1484         return ret;
1485 }
1486
1487 /*
1488  * walks the btree of allocated extents and find a hole of a given size.
1489  * The key ins is changed to record the hole:
1490  * ins->objectid == block start
1491  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1492  * ins->offset == number of blocks
1493  * Any available blocks before search_start are skipped.
1494  */
1495 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1496                                      struct btrfs_root *orig_root,
1497                                      u64 num_bytes, u64 empty_size,
1498                                      u64 search_start, u64 search_end,
1499                                      u64 hint_byte, struct btrfs_key *ins,
1500                                      u64 exclude_start, u64 exclude_nr,
1501                                      int data)
1502 {
1503         int ret;
1504         u64 orig_search_start = search_start;
1505         struct btrfs_root * root = orig_root->fs_info->extent_root;
1506         struct btrfs_fs_info *info = root->fs_info;
1507         u64 total_needed = num_bytes;
1508         struct btrfs_block_group_cache *block_group;
1509         int full_scan = 0;
1510         int wrapped = 0;
1511
1512         WARN_ON(num_bytes < root->sectorsize);
1513         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1514
1515         if (search_end == (u64)-1)
1516                 search_end = btrfs_super_total_bytes(&info->super_copy);
1517
1518         if (hint_byte) {
1519                 block_group = btrfs_lookup_block_group(info, hint_byte);
1520                 if (!block_group)
1521                         hint_byte = search_start;
1522                 block_group = btrfs_find_block_group(root, block_group,
1523                                                      hint_byte, data, 1);
1524         } else {
1525                 block_group = btrfs_find_block_group(root,
1526                                                      trans->block_group,
1527                                                      search_start, data, 1);
1528         }
1529
1530         total_needed += empty_size;
1531
1532 check_failed:
1533         if (!block_group) {
1534                 block_group = btrfs_lookup_block_group(info, search_start);
1535                 if (!block_group)
1536                         block_group = btrfs_lookup_block_group(info,
1537                                                        orig_search_start);
1538         }
1539         ret = find_search_start(root, &block_group, &search_start,
1540                                 total_needed, data);
1541         if (ret)
1542                 goto error;
1543
1544         search_start = stripe_align(root, search_start);
1545         ins->objectid = search_start;
1546         ins->offset = num_bytes;
1547
1548         if (ins->objectid + num_bytes >= search_end)
1549                 goto enospc;
1550
1551         if (ins->objectid + num_bytes >
1552             block_group->key.objectid + block_group->key.offset) {
1553                 search_start = block_group->key.objectid +
1554                         block_group->key.offset;
1555                 goto new_group;
1556         }
1557
1558         if (test_range_bit(&info->extent_ins, ins->objectid,
1559                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1560                 search_start = ins->objectid + num_bytes;
1561                 goto new_group;
1562         }
1563
1564         if (test_range_bit(&info->pinned_extents, ins->objectid,
1565                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1566                 search_start = ins->objectid + num_bytes;
1567                 goto new_group;
1568         }
1569
1570         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1571             ins->objectid < exclude_start + exclude_nr)) {
1572                 search_start = exclude_start + exclude_nr;
1573                 goto new_group;
1574         }
1575
1576         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1577                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1578                 if (block_group)
1579                         trans->block_group = block_group;
1580         }
1581         ins->offset = num_bytes;
1582         return 0;
1583
1584 new_group:
1585         if (search_start + num_bytes >= search_end) {
1586 enospc:
1587                 search_start = orig_search_start;
1588                 if (full_scan) {
1589                         ret = -ENOSPC;
1590                         goto error;
1591                 }
1592                 if (wrapped) {
1593                         if (!full_scan)
1594                                 total_needed -= empty_size;
1595                         full_scan = 1;
1596                 } else
1597                         wrapped = 1;
1598         }
1599         block_group = btrfs_lookup_block_group(info, search_start);
1600         cond_resched();
1601         block_group = btrfs_find_block_group(root, block_group,
1602                                              search_start, data, 0);
1603         goto check_failed;
1604
1605 error:
1606         return ret;
1607 }
1608 /*
1609  * finds a free extent and does all the dirty work required for allocation
1610  * returns the key for the extent through ins, and a tree buffer for
1611  * the first block of the extent through buf.
1612  *
1613  * returns 0 if everything worked, non-zero otherwise.
1614  */
1615 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1616                        struct btrfs_root *root,
1617                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1618                        u64 owner, u64 owner_offset,
1619                        u64 empty_size, u64 hint_byte,
1620                        u64 search_end, struct btrfs_key *ins, int data)
1621 {
1622         int ret;
1623         int pending_ret;
1624         u64 super_used, root_used;
1625         u64 search_start = 0;
1626         u64 alloc_profile;
1627         u32 sizes[2];
1628         struct btrfs_fs_info *info = root->fs_info;
1629         struct btrfs_root *extent_root = info->extent_root;
1630         struct btrfs_path *path;
1631         struct btrfs_extent_item *extent_item;
1632         struct btrfs_extent_ref *ref;
1633         struct btrfs_key keys[2];
1634
1635         if (info->extent_ops) {
1636                 struct btrfs_extent_ops *ops = info->extent_ops;
1637                 ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
1638                 BUG_ON(ret);
1639                 goto found;
1640         }
1641
1642         if (data) {
1643                 alloc_profile = info->avail_data_alloc_bits &
1644                                 info->data_alloc_profile;
1645                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1646         } else if ((info->system_allocs > 0 || root == info->chunk_root) &&
1647                    info->system_allocs >= 0) {
1648                 alloc_profile = info->avail_system_alloc_bits &
1649                                 info->system_alloc_profile;
1650                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1651         } else {
1652                 alloc_profile = info->avail_metadata_alloc_bits &
1653                                 info->metadata_alloc_profile;
1654                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1655         }
1656
1657         if (root->ref_cows) {
1658                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1659                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1660                                              num_bytes,
1661                                              BTRFS_BLOCK_GROUP_METADATA);
1662                         BUG_ON(ret);
1663                 }
1664                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1665                                      num_bytes + 2 * 1024 * 1024, data);
1666                 BUG_ON(ret);
1667         }
1668
1669         WARN_ON(num_bytes < root->sectorsize);
1670         ret = find_free_extent(trans, root, num_bytes, empty_size,
1671                                search_start, search_end, hint_byte, ins,
1672                                trans->alloc_exclude_start,
1673                                trans->alloc_exclude_nr, data);
1674         BUG_ON(ret);
1675 found:
1676         if (ret)
1677                 return ret;
1678
1679         /* block accounting for super block */
1680         super_used = btrfs_super_bytes_used(&info->super_copy);
1681         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1682
1683         /* block accounting for root item */
1684         root_used = btrfs_root_used(&root->root_item);
1685         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1686
1687         clear_extent_dirty(&root->fs_info->free_space_cache,
1688                            ins->objectid, ins->objectid + ins->offset - 1,
1689                            GFP_NOFS);
1690
1691         if (root == extent_root) {
1692                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1693                                 ins->objectid + ins->offset - 1,
1694                                 EXTENT_LOCKED, GFP_NOFS);
1695                 goto update_block;
1696         }
1697
1698         WARN_ON(trans->alloc_exclude_nr);
1699         trans->alloc_exclude_start = ins->objectid;
1700         trans->alloc_exclude_nr = ins->offset;
1701
1702         memcpy(&keys[0], ins, sizeof(*ins));
1703         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1704                                          owner, owner_offset);
1705         keys[1].objectid = ins->objectid;
1706         keys[1].type = BTRFS_EXTENT_REF_KEY;
1707         sizes[0] = sizeof(*extent_item);
1708         sizes[1] = sizeof(*ref);
1709
1710         path = btrfs_alloc_path();
1711         BUG_ON(!path);
1712
1713         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1714                                        sizes, 2);
1715
1716         BUG_ON(ret);
1717         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1718                                      struct btrfs_extent_item);
1719         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1720         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1721                              struct btrfs_extent_ref);
1722
1723         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1724         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1725         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1726         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1727
1728         btrfs_mark_buffer_dirty(path->nodes[0]);
1729
1730         trans->alloc_exclude_start = 0;
1731         trans->alloc_exclude_nr = 0;
1732         btrfs_free_path(path);
1733         finish_current_insert(trans, extent_root);
1734         pending_ret = del_pending_extents(trans, extent_root);
1735
1736         if (ret) {
1737                 return ret;
1738         }
1739         if (pending_ret) {
1740                 return pending_ret;
1741         }
1742
1743 update_block:
1744         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1745         if (ret) {
1746                 printk("update block group failed for %llu %llu\n",
1747                        (unsigned long long)ins->objectid,
1748                        (unsigned long long)ins->offset);
1749                 BUG();
1750         }
1751         return 0;
1752 }
1753
1754 /*
1755  * helper function to allocate a block for a given tree
1756  * returns the tree buffer or NULL.
1757  */
1758 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1759                                              struct btrfs_root *root,
1760                                              u32 blocksize,
1761                                              u64 root_objectid, u64 hint,
1762                                              u64 empty_size)
1763 {
1764         u64 ref_generation;
1765
1766         if (root->ref_cows)
1767                 ref_generation = trans->transid;
1768         else
1769                 ref_generation = 0;
1770
1771
1772         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1773                                         ref_generation, 0, 0, hint, empty_size);
1774 }
1775
1776 /*
1777  * helper function to allocate a block for a given tree
1778  * returns the tree buffer or NULL.
1779  */
1780 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1781                                              struct btrfs_root *root,
1782                                              u32 blocksize,
1783                                              u64 root_objectid,
1784                                              u64 ref_generation,
1785                                              u64 first_objectid,
1786                                              int level,
1787                                              u64 hint,
1788                                              u64 empty_size)
1789 {
1790         struct btrfs_key ins;
1791         int ret;
1792         struct extent_buffer *buf;
1793
1794         ret = btrfs_alloc_extent(trans, root, blocksize,
1795                                  root_objectid, ref_generation,
1796                                  level, first_objectid, empty_size, hint,
1797                                  (u64)-1, &ins, 0);
1798         if (ret) {
1799                 BUG_ON(ret > 0);
1800                 return ERR_PTR(ret);
1801         }
1802         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1803         if (!buf) {
1804                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1805                                   root->root_key.objectid, ref_generation,
1806                                   0, 0, 0);
1807                 BUG_ON(1);
1808                 return ERR_PTR(-ENOMEM);
1809         }
1810         btrfs_set_buffer_uptodate(buf);
1811         trans->blocks_used++;
1812         return buf;
1813 }
1814
1815 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1816                                   struct btrfs_root *root,
1817                                   struct extent_buffer *leaf)
1818 {
1819         u64 leaf_owner;
1820         u64 leaf_generation;
1821         struct btrfs_key key;
1822         struct btrfs_file_extent_item *fi;
1823         int i;
1824         int nritems;
1825         int ret;
1826
1827         BUG_ON(!btrfs_is_leaf(leaf));
1828         nritems = btrfs_header_nritems(leaf);
1829         leaf_owner = btrfs_header_owner(leaf);
1830         leaf_generation = btrfs_header_generation(leaf);
1831
1832         for (i = 0; i < nritems; i++) {
1833                 u64 disk_bytenr;
1834
1835                 btrfs_item_key_to_cpu(leaf, &key, i);
1836                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1837                         continue;
1838                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1839                 if (btrfs_file_extent_type(leaf, fi) ==
1840                     BTRFS_FILE_EXTENT_INLINE)
1841                         continue;
1842                 /*
1843                  * FIXME make sure to insert a trans record that
1844                  * repeats the snapshot del on crash
1845                  */
1846                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1847                 if (disk_bytenr == 0)
1848                         continue;
1849                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1850                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1851                                 leaf_owner, leaf_generation,
1852                                 key.objectid, key.offset, 0);
1853                 BUG_ON(ret);
1854         }
1855         return 0;
1856 }
1857
1858 static void noinline reada_walk_down(struct btrfs_root *root,
1859                                      struct extent_buffer *node,
1860                                      int slot)
1861 {
1862         u64 bytenr;
1863         u64 last = 0;
1864         u32 nritems;
1865         u32 refs;
1866         u32 blocksize;
1867         int ret;
1868         int i;
1869         int level;
1870         int skipped = 0;
1871
1872         nritems = btrfs_header_nritems(node);
1873         level = btrfs_header_level(node);
1874         if (level)
1875                 return;
1876
1877         for (i = slot; i < nritems && skipped < 32; i++) {
1878                 bytenr = btrfs_node_blockptr(node, i);
1879                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1880                              (last > bytenr && last - bytenr > 32 * 1024))) {
1881                         skipped++;
1882                         continue;
1883                 }
1884                 blocksize = btrfs_level_size(root, level - 1);
1885                 if (i != slot) {
1886                         ret = lookup_extent_ref(NULL, root, bytenr,
1887                                                 blocksize, &refs);
1888                         BUG_ON(ret);
1889                         if (refs != 1) {
1890                                 skipped++;
1891                                 continue;
1892                         }
1893                 }
1894                 mutex_unlock(&root->fs_info->fs_mutex);
1895                 ret = readahead_tree_block(root, bytenr, blocksize);
1896                 last = bytenr + blocksize;
1897                 cond_resched();
1898                 mutex_lock(&root->fs_info->fs_mutex);
1899                 if (ret)
1900                         break;
1901         }
1902 }
1903
1904 /*
1905  * helper function for drop_snapshot, this walks down the tree dropping ref
1906  * counts as it goes.
1907  */
1908 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1909                                    struct btrfs_root *root,
1910                                    struct btrfs_path *path, int *level)
1911 {
1912         u64 root_owner;
1913         u64 root_gen;
1914         u64 bytenr;
1915         struct extent_buffer *next;
1916         struct extent_buffer *cur;
1917         struct extent_buffer *parent;
1918         u32 blocksize;
1919         int ret;
1920         u32 refs;
1921
1922         WARN_ON(*level < 0);
1923         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1924         ret = lookup_extent_ref(trans, root,
1925                                 path->nodes[*level]->start,
1926                                 path->nodes[*level]->len, &refs);
1927         BUG_ON(ret);
1928         if (refs > 1)
1929                 goto out;
1930
1931         /*
1932          * walk down to the last node level and free all the leaves
1933          */
1934         while(*level >= 0) {
1935                 WARN_ON(*level < 0);
1936                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1937                 cur = path->nodes[*level];
1938
1939                 if (btrfs_header_level(cur) != *level)
1940                         WARN_ON(1);
1941
1942                 if (path->slots[*level] >=
1943                     btrfs_header_nritems(cur))
1944                         break;
1945                 if (*level == 0) {
1946                         ret = drop_leaf_ref(trans, root, cur);
1947                         BUG_ON(ret);
1948                         break;
1949                 }
1950                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1951                 blocksize = btrfs_level_size(root, *level - 1);
1952                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1953                 BUG_ON(ret);
1954                 if (refs != 1) {
1955                         parent = path->nodes[*level];
1956                         root_owner = btrfs_header_owner(parent);
1957                         root_gen = btrfs_header_generation(parent);
1958                         path->slots[*level]++;
1959                         ret = btrfs_free_extent(trans, root, bytenr,
1960                                                 blocksize, root_owner,
1961                                                 root_gen, 0, 0, 1);
1962                         BUG_ON(ret);
1963                         continue;
1964                 }
1965                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1966                 if (!next || !btrfs_buffer_uptodate(next)) {
1967                         free_extent_buffer(next);
1968                         reada_walk_down(root, cur, path->slots[*level]);
1969                         mutex_unlock(&root->fs_info->fs_mutex);
1970                         next = read_tree_block(root, bytenr, blocksize);
1971                         mutex_lock(&root->fs_info->fs_mutex);
1972
1973                         /* we dropped the lock, check one more time */
1974                         ret = lookup_extent_ref(trans, root, bytenr,
1975                                                 blocksize, &refs);
1976                         BUG_ON(ret);
1977                         if (refs != 1) {
1978                                 parent = path->nodes[*level];
1979                                 root_owner = btrfs_header_owner(parent);
1980                                 root_gen = btrfs_header_generation(parent);
1981
1982                                 path->slots[*level]++;
1983                                 free_extent_buffer(next);
1984                                 ret = btrfs_free_extent(trans, root, bytenr,
1985                                                         blocksize,
1986                                                         root_owner,
1987                                                         root_gen, 0, 0, 1);
1988                                 BUG_ON(ret);
1989                                 continue;
1990                         }
1991                 }
1992                 WARN_ON(*level <= 0);
1993                 if (path->nodes[*level-1])
1994                         free_extent_buffer(path->nodes[*level-1]);
1995                 path->nodes[*level-1] = next;
1996                 *level = btrfs_header_level(next);
1997                 path->slots[*level] = 0;
1998         }
1999 out:
2000         WARN_ON(*level < 0);
2001         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2002
2003         if (path->nodes[*level] == root->node) {
2004                 root_owner = root->root_key.objectid;
2005                 parent = path->nodes[*level];
2006         } else {
2007                 parent = path->nodes[*level + 1];
2008                 root_owner = btrfs_header_owner(parent);
2009         }
2010
2011         root_gen = btrfs_header_generation(parent);
2012         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2013                                 path->nodes[*level]->len,
2014                                 root_owner, root_gen, 0, 0, 1);
2015         free_extent_buffer(path->nodes[*level]);
2016         path->nodes[*level] = NULL;
2017         *level += 1;
2018         BUG_ON(ret);
2019         return 0;
2020 }
2021
2022 /*
2023  * helper for dropping snapshots.  This walks back up the tree in the path
2024  * to find the first node higher up where we haven't yet gone through
2025  * all the slots
2026  */
2027 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2028                                  struct btrfs_root *root,
2029                                  struct btrfs_path *path, int *level)
2030 {
2031         u64 root_owner;
2032         u64 root_gen;
2033         struct btrfs_root_item *root_item = &root->root_item;
2034         int i;
2035         int slot;
2036         int ret;
2037
2038         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2039                 slot = path->slots[i];
2040                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2041                         struct extent_buffer *node;
2042                         struct btrfs_disk_key disk_key;
2043                         node = path->nodes[i];
2044                         path->slots[i]++;
2045                         *level = i;
2046                         WARN_ON(*level == 0);
2047                         btrfs_node_key(node, &disk_key, path->slots[i]);
2048                         memcpy(&root_item->drop_progress,
2049                                &disk_key, sizeof(disk_key));
2050                         root_item->drop_level = i;
2051                         return 0;
2052                 } else {
2053                         if (path->nodes[*level] == root->node) {
2054                                 root_owner = root->root_key.objectid;
2055                                 root_gen =
2056                                    btrfs_header_generation(path->nodes[*level]);
2057                         } else {
2058                                 struct extent_buffer *node;
2059                                 node = path->nodes[*level + 1];
2060                                 root_owner = btrfs_header_owner(node);
2061                                 root_gen = btrfs_header_generation(node);
2062                         }
2063                         ret = btrfs_free_extent(trans, root,
2064                                                 path->nodes[*level]->start,
2065                                                 path->nodes[*level]->len,
2066                                                 root_owner, root_gen, 0, 0, 1);
2067                         BUG_ON(ret);
2068                         free_extent_buffer(path->nodes[*level]);
2069                         path->nodes[*level] = NULL;
2070                         *level = i + 1;
2071                 }
2072         }
2073         return 1;
2074 }
2075
2076 /*
2077  * drop the reference count on the tree rooted at 'snap'.  This traverses
2078  * the tree freeing any blocks that have a ref count of zero after being
2079  * decremented.
2080  */
2081 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2082                         *root)
2083 {
2084         int ret = 0;
2085         int wret;
2086         int level;
2087         struct btrfs_path *path;
2088         int i;
2089         int orig_level;
2090         struct btrfs_root_item *root_item = &root->root_item;
2091
2092         path = btrfs_alloc_path();
2093         BUG_ON(!path);
2094
2095         level = btrfs_header_level(root->node);
2096         orig_level = level;
2097         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2098                 path->nodes[level] = root->node;
2099                 extent_buffer_get(root->node);
2100                 path->slots[level] = 0;
2101         } else {
2102                 struct btrfs_key key;
2103                 struct btrfs_disk_key found_key;
2104                 struct extent_buffer *node;
2105
2106                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2107                 level = root_item->drop_level;
2108                 path->lowest_level = level;
2109                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2110                 if (wret < 0) {
2111                         ret = wret;
2112                         goto out;
2113                 }
2114                 node = path->nodes[level];
2115                 btrfs_node_key(node, &found_key, path->slots[level]);
2116                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2117                                sizeof(found_key)));
2118         }
2119         while(1) {
2120                 wret = walk_down_tree(trans, root, path, &level);
2121                 if (wret < 0)
2122                         ret = wret;
2123                 if (wret != 0)
2124                         break;
2125
2126                 wret = walk_up_tree(trans, root, path, &level);
2127                 if (wret < 0)
2128                         ret = wret;
2129                 if (wret != 0)
2130                         break;
2131                 /*
2132                 ret = -EAGAIN;
2133                 break;
2134                 */
2135         }
2136         for (i = 0; i <= orig_level; i++) {
2137                 if (path->nodes[i]) {
2138                         free_extent_buffer(path->nodes[i]);
2139                         path->nodes[i] = NULL;
2140                 }
2141         }
2142 out:
2143         btrfs_free_path(path);
2144         return ret;
2145 }
2146
2147 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2148 {
2149         u64 start;
2150         u64 end;
2151         u64 ptr;
2152         int ret;
2153         while(1) {
2154                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2155                                             &start, &end, (unsigned int)-1);
2156                 if (ret)
2157                         break;
2158                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2159                 if (!ret)
2160                         kfree((void *)(unsigned long)ptr);
2161                 clear_extent_bits(&info->block_group_cache, start,
2162                                   end, (unsigned int)-1, GFP_NOFS);
2163         }
2164         while(1) {
2165                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2166                                             &start, &end, EXTENT_DIRTY);
2167                 if (ret)
2168                         break;
2169                 clear_extent_dirty(&info->free_space_cache, start,
2170                                    end, GFP_NOFS);
2171         }
2172         return 0;
2173 }
2174
2175 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2176                            struct btrfs_key *key)
2177 {
2178         int ret;
2179         struct btrfs_key found_key;
2180         struct extent_buffer *leaf;
2181         int slot;
2182
2183         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2184         if (ret < 0)
2185                 return ret;
2186         while(1) {
2187                 slot = path->slots[0];
2188                 leaf = path->nodes[0];
2189                 if (slot >= btrfs_header_nritems(leaf)) {
2190                         ret = btrfs_next_leaf(root, path);
2191                         if (ret == 0)
2192                                 continue;
2193                         if (ret < 0)
2194                                 goto error;
2195                         break;
2196                 }
2197                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2198
2199                 if (found_key.objectid >= key->objectid &&
2200                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2201                         return 0;
2202                 path->slots[0]++;
2203         }
2204         ret = -ENOENT;
2205 error:
2206         return ret;
2207 }
2208
2209 int btrfs_read_block_groups(struct btrfs_root *root)
2210 {
2211         struct btrfs_path *path;
2212         int ret;
2213         int bit;
2214         struct btrfs_block_group_cache *cache;
2215         struct btrfs_fs_info *info = root->fs_info;
2216         struct btrfs_space_info *space_info;
2217         struct extent_io_tree *block_group_cache;
2218         struct btrfs_key key;
2219         struct btrfs_key found_key;
2220         struct extent_buffer *leaf;
2221
2222         block_group_cache = &info->block_group_cache;
2223
2224         root = info->extent_root;
2225         key.objectid = 0;
2226         key.offset = 0;
2227         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2228         path = btrfs_alloc_path();
2229         if (!path)
2230                 return -ENOMEM;
2231
2232         while(1) {
2233                 ret = find_first_block_group(root, path, &key);
2234                 if (ret > 0) {
2235                         ret = 0;
2236                         goto error;
2237                 }
2238                 if (ret != 0) {
2239                         goto error;
2240                 }
2241                 leaf = path->nodes[0];
2242                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2243                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2244                 if (!cache) {
2245                         ret = -ENOMEM;
2246                         break;
2247                 }
2248
2249                 read_extent_buffer(leaf, &cache->item,
2250                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2251                                    sizeof(cache->item));
2252                 memcpy(&cache->key, &found_key, sizeof(found_key));
2253                 cache->cached = 0;
2254                 cache->pinned = 0;
2255                 key.objectid = found_key.objectid + found_key.offset;
2256                 btrfs_release_path(root, path);
2257                 cache->flags = btrfs_block_group_flags(&cache->item);
2258                 bit = 0;
2259                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2260                         bit = BLOCK_GROUP_DATA;
2261                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2262                         bit = BLOCK_GROUP_SYSTEM;
2263                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2264                         bit = BLOCK_GROUP_METADATA;
2265                 }
2266                 set_avail_alloc_bits(info, cache->flags);
2267
2268                 ret = update_space_info(info, cache->flags, found_key.offset,
2269                                         btrfs_block_group_used(&cache->item),
2270                                         &space_info);
2271                 BUG_ON(ret);
2272                 cache->space_info = space_info;
2273
2274                 /* use EXTENT_LOCKED to prevent merging */
2275                 set_extent_bits(block_group_cache, found_key.objectid,
2276                                 found_key.objectid + found_key.offset - 1,
2277                                 bit | EXTENT_LOCKED, GFP_NOFS);
2278                 set_state_private(block_group_cache, found_key.objectid,
2279                                   (unsigned long)cache);
2280
2281                 if (key.objectid >=
2282                     btrfs_super_total_bytes(&info->super_copy))
2283                         break;
2284         }
2285         ret = 0;
2286 error:
2287         btrfs_free_path(path);
2288         return ret;
2289 }
2290
2291 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2292                            struct btrfs_root *root, u64 bytes_used,
2293                            u64 type, u64 chunk_objectid, u64 chunk_offset,
2294                            u64 size)
2295 {
2296         int ret;
2297         int bit = 0;
2298         struct btrfs_root *extent_root;
2299         struct btrfs_block_group_cache *cache;
2300         struct extent_io_tree *block_group_cache;
2301
2302         extent_root = root->fs_info->extent_root;
2303         block_group_cache = &root->fs_info->block_group_cache;
2304
2305         cache = kzalloc(sizeof(*cache), GFP_NOFS);
2306         BUG_ON(!cache);
2307         cache->key.objectid = chunk_offset;
2308         cache->key.offset = size;
2309
2310         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2311         btrfs_set_block_group_used(&cache->item, bytes_used);
2312         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2313         cache->flags = type;
2314         btrfs_set_block_group_flags(&cache->item, type);
2315
2316         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2317                                 &cache->space_info);
2318         BUG_ON(ret);
2319
2320         bit = block_group_state_bits(type);
2321         set_extent_bits(block_group_cache, chunk_offset,
2322                         chunk_offset + size - 1,
2323                         bit | EXTENT_LOCKED, GFP_NOFS);
2324
2325         set_state_private(block_group_cache, chunk_offset,
2326                           (unsigned long)cache);
2327         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2328                                 sizeof(cache->item));
2329         BUG_ON(ret);
2330
2331         finish_current_insert(trans, extent_root);
2332         ret = del_pending_extents(trans, extent_root);
2333         BUG_ON(ret);
2334         set_avail_alloc_bits(extent_root->fs_info, type);
2335         return 0;
2336 }
2337
2338 /*
2339  * This is for converter use only.
2340  *
2341  * In that case, we don't know where are free blocks located.
2342  * Therefore all block group cache entries must be setup properly
2343  * before doing any block allocation.
2344  */
2345 int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
2346                             struct btrfs_root *root)
2347 {
2348         u64 total_bytes;
2349         u64 cur_start;
2350         u64 group_type;
2351         u64 group_size;
2352         u64 group_nr = 0;
2353         u64 chunk_objectid;
2354         int ret;
2355         int bit;
2356         struct btrfs_root *extent_root;
2357         struct btrfs_block_group_cache *cache;
2358         struct extent_io_tree *block_group_cache;
2359
2360         extent_root = root->fs_info->extent_root;
2361         block_group_cache = &root->fs_info->block_group_cache;
2362         chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2363         total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
2364
2365         cur_start = 0;
2366         while (cur_start < total_bytes) {
2367                 if (group_nr == 0) {
2368                         bit = BLOCK_GROUP_SYSTEM;
2369                         group_type = BTRFS_BLOCK_GROUP_SYSTEM;
2370                 } else if (group_nr % 3 == 1) {
2371                         bit = BLOCK_GROUP_DATA;
2372                         group_type = BTRFS_BLOCK_GROUP_METADATA;
2373                 } else {
2374                         bit = BLOCK_GROUP_METADATA;
2375                         group_type = BTRFS_BLOCK_GROUP_DATA;
2376                 }
2377                 group_nr++;
2378
2379                 if (group_type == BTRFS_BLOCK_GROUP_SYSTEM) {
2380                         group_size = 32 * 1024 * 1024;
2381                 } else {
2382                         group_size = 256 * 1024 * 1024;
2383                         if (total_bytes - cur_start < group_size * 5 / 4)
2384                                 group_size = total_bytes - cur_start;
2385                 }
2386
2387                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
2388                 BUG_ON(!cache);
2389
2390                 cache->key.objectid = cur_start;
2391                 cache->key.offset = group_size;
2392                 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2393
2394                 btrfs_set_block_group_used(&cache->item, 0);
2395                 btrfs_set_block_group_chunk_objectid(&cache->item,
2396                                                      chunk_objectid);
2397                 btrfs_set_block_group_flags(&cache->item, group_type);
2398
2399                 cache->flags = group_type;
2400
2401                 ret = update_space_info(root->fs_info, group_type, group_size,
2402                                         0, &cache->space_info);
2403                 BUG_ON(ret);
2404                 set_avail_alloc_bits(extent_root->fs_info, group_type);
2405
2406                 set_extent_bits(block_group_cache, cur_start,
2407                                 cur_start + group_size - 1,
2408                                 bit | EXTENT_LOCKED, GFP_NOFS);
2409                 set_state_private(block_group_cache, cur_start,
2410                                   (unsigned long)cache);
2411                 cur_start += group_size;
2412         }
2413         /* then insert all the items */
2414         cur_start = 0;
2415         while(cur_start < total_bytes) {
2416                 cache = btrfs_lookup_block_group(root->fs_info, cur_start);
2417                 BUG_ON(!cache);
2418
2419                 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2420                                         sizeof(cache->item));
2421                 BUG_ON(ret);
2422
2423                 finish_current_insert(trans, extent_root);
2424                 ret = del_pending_extents(trans, extent_root);
2425                 BUG_ON(ret);
2426
2427                 cur_start = cache->key.objectid + cache->key.offset;
2428         }
2429         return 0;
2430 }
2431
2432 u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation,
2433                           u64 owner, u64 owner_offset)
2434 {
2435         return hash_extent_ref(root_objectid, ref_generation,
2436                                owner, owner_offset);
2437 }
2438
2439 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
2440                              struct btrfs_root *root,
2441                              u64 bytenr, u64 num_bytes, int alloc,
2442                              int mark_free)
2443 {
2444         return update_block_group(trans, root, bytenr, num_bytes,
2445                                   alloc, mark_free);
2446 }