Dynamic chunk allocation
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "crc32c.h"
28 #include "volumes.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
41                            struct btrfs_root *root, u64 bytes_used,
42                            u64 type, u64 chunk_tree, u64 chunk_objectid,
43                            u64 size);
44
45 static int cache_block_group(struct btrfs_root *root,
46                              struct btrfs_block_group_cache *block_group)
47 {
48         struct btrfs_path *path;
49         int ret;
50         struct btrfs_key key;
51         struct extent_buffer *leaf;
52         struct extent_io_tree *free_space_cache;
53         int slot;
54         u64 last = 0;
55         u64 hole_size;
56         u64 first_free;
57         int found = 0;
58
59         if (!block_group)
60                 return 0;
61
62         root = root->fs_info->extent_root;
63         free_space_cache = &root->fs_info->free_space_cache;
64
65         if (block_group->cached)
66                 return 0;
67
68         path = btrfs_alloc_path();
69         if (!path)
70                 return -ENOMEM;
71
72         path->reada = 2;
73         first_free = block_group->key.objectid;
74         key.objectid = block_group->key.objectid;
75         key.offset = 0;
76         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
77         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
78         if (ret < 0)
79                 return ret;
80         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
81         if (ret < 0)
82                 return ret;
83         if (ret == 0) {
84                 leaf = path->nodes[0];
85                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
86                 if (key.objectid + key.offset > first_free)
87                         first_free = key.objectid + key.offset;
88         }
89         while(1) {
90                 leaf = path->nodes[0];
91                 slot = path->slots[0];
92                 if (slot >= btrfs_header_nritems(leaf)) {
93                         ret = btrfs_next_leaf(root, path);
94                         if (ret < 0)
95                                 goto err;
96                         if (ret == 0) {
97                                 continue;
98                         } else {
99                                 break;
100                         }
101                 }
102                 btrfs_item_key_to_cpu(leaf, &key, slot);
103                 if (key.objectid < block_group->key.objectid) {
104                         goto next;
105                 }
106                 if (key.objectid >= block_group->key.objectid +
107                     block_group->key.offset) {
108                         break;
109                 }
110
111                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
112                         if (!found) {
113                                 last = first_free;
114                                 found = 1;
115                         }
116                         if (key.objectid > last) {
117                                 hole_size = key.objectid - last;
118                                 set_extent_dirty(free_space_cache, last,
119                                                  last + hole_size - 1,
120                                                  GFP_NOFS);
121                         }
122                         last = key.objectid + key.offset;
123                 }
124 next:
125                 path->slots[0]++;
126         }
127
128         if (!found)
129                 last = first_free;
130         if (block_group->key.objectid +
131             block_group->key.offset > last) {
132                 hole_size = block_group->key.objectid +
133                         block_group->key.offset - last;
134                 set_extent_dirty(free_space_cache, last,
135                                  last + hole_size - 1, GFP_NOFS);
136         }
137         block_group->cached = 1;
138 err:
139         btrfs_free_path(path);
140         return 0;
141 }
142
143 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
144                                                          btrfs_fs_info *info,
145                                                          u64 bytenr)
146 {
147         struct extent_io_tree *block_group_cache;
148         struct btrfs_block_group_cache *block_group = NULL;
149         u64 ptr;
150         u64 start;
151         u64 end;
152         int ret;
153
154         block_group_cache = &info->block_group_cache;
155         ret = find_first_extent_bit(block_group_cache,
156                                     bytenr, &start, &end,
157                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
158                                     BLOCK_GROUP_SYSTEM);
159         if (ret) {
160                 return NULL;
161         }
162         ret = get_state_private(block_group_cache, start, &ptr);
163         if (ret)
164                 return NULL;
165
166         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
167         if (block_group->key.objectid <= bytenr && bytenr <
168             block_group->key.objectid + block_group->key.offset)
169                 return block_group;
170         return NULL;
171 }
172
173 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
174 {
175         return (cache->flags & bits);
176 }
177
178 static int noinline find_search_start(struct btrfs_root *root,
179                               struct btrfs_block_group_cache **cache_ret,
180                               u64 *start_ret, int num, int data)
181 {
182         int ret;
183         struct btrfs_block_group_cache *cache = *cache_ret;
184         u64 last;
185         u64 start = 0;
186         u64 end = 0;
187         u64 cache_miss = 0;
188         u64 search_start = *start_ret;
189         int wrapped = 0;
190
191         if (!cache) {
192                 goto out;
193         }
194 again:
195         ret = cache_block_group(root, cache);
196         if (ret)
197                 goto out;
198
199         last = max(search_start, cache->key.objectid);
200         if (!block_group_bits(cache, data)) {
201                 goto new_group;
202         }
203
204         while(1) {
205                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
206                                             last, &start, &end, EXTENT_DIRTY);
207                 if (ret) {
208                         if (!cache_miss)
209                                 cache_miss = last;
210                         goto new_group;
211                 }
212
213                 start = max(last, start);
214                 last = end + 1;
215                 if (last - start < num) {
216                         if (last == cache->key.objectid + cache->key.offset)
217                                 cache_miss = start;
218                         continue;
219                 }
220                 if (start + num > cache->key.objectid + cache->key.offset)
221                         goto new_group;
222                 *start_ret = start;
223                 return 0;
224         }
225 out:
226         cache = btrfs_lookup_block_group(root->fs_info, search_start);
227         if (!cache) {
228                 printk("Unable to find block group for %Lu\n", search_start);
229                 WARN_ON(1);
230         }
231         return -ENOSPC;
232
233 new_group:
234         last = cache->key.objectid + cache->key.offset;
235 wrapped:
236         cache = btrfs_lookup_block_group(root->fs_info, last);
237         if (!cache) {
238 no_cache:
239                 if (!wrapped) {
240                         wrapped = 1;
241                         last = search_start;
242                         goto wrapped;
243                 }
244                 goto out;
245         }
246         if (cache_miss && !cache->cached) {
247                 cache_block_group(root, cache);
248                 last = cache_miss;
249                 cache = btrfs_lookup_block_group(root->fs_info, last);
250         }
251         cache = btrfs_find_block_group(root, cache, last, data, 0);
252         if (!cache)
253                 goto no_cache;
254         *cache_ret = cache;
255         cache_miss = 0;
256         goto again;
257 }
258
259 static u64 div_factor(u64 num, int factor)
260 {
261         if (factor == 10)
262                 return num;
263         num *= factor;
264         num /= 10;
265         return num;
266 }
267
268 static int block_group_state_bits(u64 flags)
269 {
270         int bits = 0;
271         if (flags & BTRFS_BLOCK_GROUP_DATA)
272                 bits |= BLOCK_GROUP_DATA;
273         if (flags & BTRFS_BLOCK_GROUP_METADATA)
274                 bits |= BLOCK_GROUP_METADATA;
275         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
276                 bits |= BLOCK_GROUP_SYSTEM;
277         return bits;
278 }
279
280 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
281                                                  struct btrfs_block_group_cache
282                                                  *hint, u64 search_start,
283                                                  int data, int owner)
284 {
285         struct btrfs_block_group_cache *cache;
286         struct extent_io_tree *block_group_cache;
287         struct btrfs_block_group_cache *found_group = NULL;
288         struct btrfs_fs_info *info = root->fs_info;
289         u64 used;
290         u64 last = 0;
291         u64 hint_last;
292         u64 start;
293         u64 end;
294         u64 free_check;
295         u64 ptr;
296         int bit;
297         int ret;
298         int full_search = 0;
299         int factor = 8;
300
301         block_group_cache = &info->block_group_cache;
302
303         if (!owner)
304                 factor = 8;
305
306         bit = block_group_state_bits(data);
307
308         if (search_start) {
309                 struct btrfs_block_group_cache *shint;
310                 shint = btrfs_lookup_block_group(info, search_start);
311                 if (shint && block_group_bits(shint, data)) {
312                         used = btrfs_block_group_used(&shint->item);
313                         if (used + shint->pinned <
314                             div_factor(shint->key.offset, factor)) {
315                                 return shint;
316                         }
317                 }
318         }
319         if (hint && block_group_bits(hint, data)) {
320                 used = btrfs_block_group_used(&hint->item);
321                 if (used + hint->pinned <
322                     div_factor(hint->key.offset, factor)) {
323                         return hint;
324                 }
325                 last = hint->key.objectid + hint->key.offset;
326                 hint_last = last;
327         } else {
328                 if (hint)
329                         hint_last = max(hint->key.objectid, search_start);
330                 else
331                         hint_last = search_start;
332
333                 last = hint_last;
334         }
335 again:
336         while(1) {
337                 ret = find_first_extent_bit(block_group_cache, last,
338                                             &start, &end, bit);
339                 if (ret)
340                         break;
341
342                 ret = get_state_private(block_group_cache, start, &ptr);
343                 if (ret)
344                         break;
345
346                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
347                 last = cache->key.objectid + cache->key.offset;
348                 used = btrfs_block_group_used(&cache->item);
349
350                 if (full_search)
351                         free_check = cache->key.offset;
352                 else
353                         free_check = div_factor(cache->key.offset, factor);
354
355                 if (used + cache->pinned < free_check) {
356                         found_group = cache;
357                         goto found;
358                 }
359                 if (full_search) {
360                         printk("failed on cache %Lu used %Lu total %Lu\n",
361                                cache->key.objectid, used, cache->key.offset);
362                 }
363                 cond_resched();
364         }
365         if (!full_search) {
366                 last = search_start;
367                 full_search = 1;
368                 goto again;
369         }
370 found:
371         return found_group;
372 }
373
374 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
375                            u64 owner, u64 owner_offset)
376 {
377         u32 high_crc = ~(u32)0;
378         u32 low_crc = ~(u32)0;
379         __le64 lenum;
380
381         lenum = cpu_to_le64(root_objectid);
382         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
383         lenum = cpu_to_le64(ref_generation);
384         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
385         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
386                 lenum = cpu_to_le64(owner);
387                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
388                 lenum = cpu_to_le64(owner_offset);
389                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
390         }
391         return ((u64)high_crc << 32) | (u64)low_crc;
392 }
393
394 static int match_extent_ref(struct extent_buffer *leaf,
395                             struct btrfs_extent_ref *disk_ref,
396                             struct btrfs_extent_ref *cpu_ref)
397 {
398         int ret;
399         int len;
400
401         if (cpu_ref->objectid)
402                 len = sizeof(*cpu_ref);
403         else
404                 len = 2 * sizeof(u64);
405         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
406                                    len);
407         return ret == 0;
408 }
409
410 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
411                                           struct btrfs_root *root,
412                                           struct btrfs_path *path, u64 bytenr,
413                                           u64 root_objectid,
414                                           u64 ref_generation, u64 owner,
415                                           u64 owner_offset, int del)
416 {
417         u64 hash;
418         struct btrfs_key key;
419         struct btrfs_key found_key;
420         struct btrfs_extent_ref ref;
421         struct extent_buffer *leaf;
422         struct btrfs_extent_ref *disk_ref;
423         int ret;
424         int ret2;
425
426         btrfs_set_stack_ref_root(&ref, root_objectid);
427         btrfs_set_stack_ref_generation(&ref, ref_generation);
428         btrfs_set_stack_ref_objectid(&ref, owner);
429         btrfs_set_stack_ref_offset(&ref, owner_offset);
430
431         hash = hash_extent_ref(root_objectid, ref_generation, owner,
432                                owner_offset);
433         key.offset = hash;
434         key.objectid = bytenr;
435         key.type = BTRFS_EXTENT_REF_KEY;
436
437         while (1) {
438                 ret = btrfs_search_slot(trans, root, &key, path,
439                                         del ? -1 : 0, del);
440                 if (ret < 0)
441                         goto out;
442                 leaf = path->nodes[0];
443                 if (ret != 0) {
444                         u32 nritems = btrfs_header_nritems(leaf);
445                         if (path->slots[0] >= nritems) {
446                                 ret2 = btrfs_next_leaf(root, path);
447                                 if (ret2)
448                                         goto out;
449                                 leaf = path->nodes[0];
450                         }
451                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
452                         if (found_key.objectid != bytenr ||
453                             found_key.type != BTRFS_EXTENT_REF_KEY)
454                                 goto out;
455                         key.offset = found_key.offset;
456                         if (del) {
457                                 btrfs_release_path(root, path);
458                                 continue;
459                         }
460                 }
461                 disk_ref = btrfs_item_ptr(path->nodes[0],
462                                           path->slots[0],
463                                           struct btrfs_extent_ref);
464                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
465                         ret = 0;
466                         goto out;
467                 }
468                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
469                 key.offset = found_key.offset + 1;
470                 btrfs_release_path(root, path);
471         }
472 out:
473         return ret;
474 }
475
476 /*
477  * Back reference rules.  Back refs have three main goals:
478  *
479  * 1) differentiate between all holders of references to an extent so that
480  *    when a reference is dropped we can make sure it was a valid reference
481  *    before freeing the extent.
482  *
483  * 2) Provide enough information to quickly find the holders of an extent
484  *    if we notice a given block is corrupted or bad.
485  *
486  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
487  *    maintenance.  This is actually the same as #2, but with a slightly
488  *    different use case.
489  *
490  * File extents can be referenced by:
491  *
492  * - multiple snapshots, subvolumes, or different generations in one subvol
493  * - different files inside a single subvolume (in theory, not implemented yet)
494  * - different offsets inside a file (bookend extents in file.c)
495  *
496  * The extent ref structure has fields for:
497  *
498  * - Objectid of the subvolume root
499  * - Generation number of the tree holding the reference
500  * - objectid of the file holding the reference
501  * - offset in the file corresponding to the key holding the reference
502  *
503  * When a file extent is allocated the fields are filled in:
504  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
505  *
506  * When a leaf is cow'd new references are added for every file extent found
507  * in the leaf.  It looks the same as the create case, but trans->transid
508  * will be different when the block is cow'd.
509  *
510  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
511  *
512  * When a file extent is removed either during snapshot deletion or file
513  * truncation, the corresponding back reference is found
514  * by searching for:
515  *
516  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
517  *      inode objectid, offset in file)
518  *
519  * Btree extents can be referenced by:
520  *
521  * - Different subvolumes
522  * - Different generations of the same subvolume
523  *
524  * Storing sufficient information for a full reverse mapping of a btree
525  * block would require storing the lowest key of the block in the backref,
526  * and it would require updating that lowest key either before write out or
527  * every time it changed.  Instead, the objectid of the lowest key is stored
528  * along with the level of the tree block.  This provides a hint
529  * about where in the btree the block can be found.  Searches through the
530  * btree only need to look for a pointer to that block, so they stop one
531  * level higher than the level recorded in the backref.
532  *
533  * Some btrees do not do reference counting on their extents.  These
534  * include the extent tree and the tree of tree roots.  Backrefs for these
535  * trees always have a generation of zero.
536  *
537  * When a tree block is created, back references are inserted:
538  *
539  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
540  *
541  * When a tree block is cow'd in a reference counted root,
542  * new back references are added for all the blocks it points to.
543  * These are of the form (trans->transid will have increased since creation):
544  *
545  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
546  *
547  * Because the lowest_key_objectid and the level are just hints
548  * they are not used when backrefs are deleted.  When a backref is deleted:
549  *
550  * if backref was for a tree root:
551  *     root_objectid = root->root_key.objectid
552  * else
553  *     root_objectid = btrfs_header_owner(parent)
554  *
555  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
556  *
557  * Back Reference Key hashing:
558  *
559  * Back references have four fields, each 64 bits long.  Unfortunately,
560  * This is hashed into a single 64 bit number and placed into the key offset.
561  * The key objectid corresponds to the first byte in the extent, and the
562  * key type is set to BTRFS_EXTENT_REF_KEY
563  */
564 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
565                                  struct btrfs_root *root,
566                                  struct btrfs_path *path, u64 bytenr,
567                                  u64 root_objectid, u64 ref_generation,
568                                  u64 owner, u64 owner_offset)
569 {
570         u64 hash;
571         struct btrfs_key key;
572         struct btrfs_extent_ref ref;
573         struct btrfs_extent_ref *disk_ref;
574         int ret;
575
576         btrfs_set_stack_ref_root(&ref, root_objectid);
577         btrfs_set_stack_ref_generation(&ref, ref_generation);
578         btrfs_set_stack_ref_objectid(&ref, owner);
579         btrfs_set_stack_ref_offset(&ref, owner_offset);
580
581         hash = hash_extent_ref(root_objectid, ref_generation, owner,
582                                owner_offset);
583         key.offset = hash;
584         key.objectid = bytenr;
585         key.type = BTRFS_EXTENT_REF_KEY;
586
587         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
588         while (ret == -EEXIST) {
589                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
590                                           struct btrfs_extent_ref);
591                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
592                         goto out;
593                 key.offset++;
594                 btrfs_release_path(root, path);
595                 ret = btrfs_insert_empty_item(trans, root, path, &key,
596                                               sizeof(ref));
597         }
598         if (ret)
599                 goto out;
600         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
601                                   struct btrfs_extent_ref);
602         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
603                             sizeof(ref));
604         btrfs_mark_buffer_dirty(path->nodes[0]);
605 out:
606         btrfs_release_path(root, path);
607         return ret;
608 }
609
610 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
611                                 struct btrfs_root *root,
612                                 u64 bytenr, u64 num_bytes,
613                                 u64 root_objectid, u64 ref_generation,
614                                 u64 owner, u64 owner_offset)
615 {
616         struct btrfs_path *path;
617         int ret;
618         struct btrfs_key key;
619         struct extent_buffer *l;
620         struct btrfs_extent_item *item;
621         u32 refs;
622
623         WARN_ON(num_bytes < root->sectorsize);
624         path = btrfs_alloc_path();
625         if (!path)
626                 return -ENOMEM;
627
628         key.objectid = bytenr;
629         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
630         key.offset = num_bytes;
631         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
632                                 0, 1);
633         if (ret < 0)
634                 return ret;
635         if (ret != 0) {
636                 BUG();
637         }
638         BUG_ON(ret != 0);
639         l = path->nodes[0];
640         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
641         refs = btrfs_extent_refs(l, item);
642         btrfs_set_extent_refs(l, item, refs + 1);
643         btrfs_mark_buffer_dirty(path->nodes[0]);
644
645         btrfs_release_path(root->fs_info->extent_root, path);
646
647         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
648                                           path, bytenr, root_objectid,
649                                           ref_generation, owner, owner_offset);
650         BUG_ON(ret);
651         finish_current_insert(trans, root->fs_info->extent_root);
652         del_pending_extents(trans, root->fs_info->extent_root);
653
654         btrfs_free_path(path);
655         return 0;
656 }
657
658 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
659                          struct btrfs_root *root)
660 {
661         finish_current_insert(trans, root->fs_info->extent_root);
662         del_pending_extents(trans, root->fs_info->extent_root);
663         return 0;
664 }
665
666 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
667                              struct btrfs_root *root, u64 bytenr,
668                              u64 num_bytes, u32 *refs)
669 {
670         struct btrfs_path *path;
671         int ret;
672         struct btrfs_key key;
673         struct extent_buffer *l;
674         struct btrfs_extent_item *item;
675
676         WARN_ON(num_bytes < root->sectorsize);
677         path = btrfs_alloc_path();
678         key.objectid = bytenr;
679         key.offset = num_bytes;
680         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
681         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
682                                 0, 0);
683         if (ret < 0)
684                 goto out;
685         if (ret != 0) {
686                 btrfs_print_leaf(root, path->nodes[0]);
687                 printk("failed to find block number %Lu\n", bytenr);
688                 BUG();
689         }
690         l = path->nodes[0];
691         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
692         *refs = btrfs_extent_refs(l, item);
693 out:
694         btrfs_free_path(path);
695         return 0;
696 }
697
698 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
699                                   struct btrfs_path *count_path,
700                                   u64 first_extent)
701 {
702         struct btrfs_root *extent_root = root->fs_info->extent_root;
703         struct btrfs_path *path;
704         u64 bytenr;
705         u64 found_objectid;
706         u64 root_objectid = root->root_key.objectid;
707         u32 total_count = 0;
708         u32 cur_count;
709         u32 refs;
710         u32 nritems;
711         int ret;
712         struct btrfs_key key;
713         struct btrfs_key found_key;
714         struct extent_buffer *l;
715         struct btrfs_extent_item *item;
716         struct btrfs_extent_ref *ref_item;
717         int level = -1;
718
719         path = btrfs_alloc_path();
720 again:
721         if (level == -1)
722                 bytenr = first_extent;
723         else
724                 bytenr = count_path->nodes[level]->start;
725
726         cur_count = 0;
727         key.objectid = bytenr;
728         key.offset = 0;
729
730         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
731         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
732         if (ret < 0)
733                 goto out;
734         BUG_ON(ret == 0);
735
736         l = path->nodes[0];
737         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
738
739         if (found_key.objectid != bytenr ||
740             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
741                 goto out;
742         }
743
744         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
745         refs = btrfs_extent_refs(l, item);
746         while (1) {
747                 nritems = btrfs_header_nritems(l);
748                 if (path->slots[0] >= nritems) {
749                         ret = btrfs_next_leaf(extent_root, path);
750                         if (ret == 0)
751                                 continue;
752                         break;
753                 }
754                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
755                 if (found_key.objectid != bytenr)
756                         break;
757                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
758                         path->slots[0]++;
759                         continue;
760                 }
761
762                 cur_count++;
763                 ref_item = btrfs_item_ptr(l, path->slots[0],
764                                           struct btrfs_extent_ref);
765                 found_objectid = btrfs_ref_root(l, ref_item);
766
767                 if (found_objectid != root_objectid) {
768                         total_count = 2;
769                         goto out;
770                 }
771                 total_count = 1;
772                 path->slots[0]++;
773         }
774         if (cur_count == 0) {
775                 total_count = 0;
776                 goto out;
777         }
778         if (level >= 0 && root->node == count_path->nodes[level])
779                 goto out;
780         level++;
781         btrfs_release_path(root, path);
782         goto again;
783
784 out:
785         btrfs_free_path(path);
786         return total_count;
787 }
788 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
789                        struct btrfs_root *root, u64 owner_objectid)
790 {
791         u64 generation;
792         u64 key_objectid;
793         u64 level;
794         u32 nritems;
795         struct btrfs_disk_key disk_key;
796
797         level = btrfs_header_level(root->node);
798         generation = trans->transid;
799         nritems = btrfs_header_nritems(root->node);
800         if (nritems > 0) {
801                 if (level == 0)
802                         btrfs_item_key(root->node, &disk_key, 0);
803                 else
804                         btrfs_node_key(root->node, &disk_key, 0);
805                 key_objectid = btrfs_disk_key_objectid(&disk_key);
806         } else {
807                 key_objectid = 0;
808         }
809         return btrfs_inc_extent_ref(trans, root, root->node->start,
810                                     root->node->len, owner_objectid,
811                                     generation, level, key_objectid);
812 }
813
814 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
815                   struct extent_buffer *buf)
816 {
817         u64 bytenr;
818         u32 nritems;
819         struct btrfs_key key;
820         struct btrfs_file_extent_item *fi;
821         int i;
822         int level;
823         int ret;
824         int faili;
825
826         if (!root->ref_cows)
827                 return 0;
828
829         level = btrfs_header_level(buf);
830         nritems = btrfs_header_nritems(buf);
831         for (i = 0; i < nritems; i++) {
832                 if (level == 0) {
833                         u64 disk_bytenr;
834                         btrfs_item_key_to_cpu(buf, &key, i);
835                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
836                                 continue;
837                         fi = btrfs_item_ptr(buf, i,
838                                             struct btrfs_file_extent_item);
839                         if (btrfs_file_extent_type(buf, fi) ==
840                             BTRFS_FILE_EXTENT_INLINE)
841                                 continue;
842                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
843                         if (disk_bytenr == 0)
844                                 continue;
845                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
846                                     btrfs_file_extent_disk_num_bytes(buf, fi),
847                                     root->root_key.objectid, trans->transid,
848                                     key.objectid, key.offset);
849                         if (ret) {
850                                 faili = i;
851                                 goto fail;
852                         }
853                 } else {
854                         bytenr = btrfs_node_blockptr(buf, i);
855                         btrfs_node_key_to_cpu(buf, &key, i);
856                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
857                                            btrfs_level_size(root, level - 1),
858                                            root->root_key.objectid,
859                                            trans->transid,
860                                            level - 1, key.objectid);
861                         if (ret) {
862                                 faili = i;
863                                 goto fail;
864                         }
865                 }
866         }
867         return 0;
868 fail:
869         WARN_ON(1);
870 #if 0
871         for (i =0; i < faili; i++) {
872                 if (level == 0) {
873                         u64 disk_bytenr;
874                         btrfs_item_key_to_cpu(buf, &key, i);
875                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
876                                 continue;
877                         fi = btrfs_item_ptr(buf, i,
878                                             struct btrfs_file_extent_item);
879                         if (btrfs_file_extent_type(buf, fi) ==
880                             BTRFS_FILE_EXTENT_INLINE)
881                                 continue;
882                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
883                         if (disk_bytenr == 0)
884                                 continue;
885                         err = btrfs_free_extent(trans, root, disk_bytenr,
886                                     btrfs_file_extent_disk_num_bytes(buf,
887                                                                       fi), 0);
888                         BUG_ON(err);
889                 } else {
890                         bytenr = btrfs_node_blockptr(buf, i);
891                         err = btrfs_free_extent(trans, root, bytenr,
892                                         btrfs_level_size(root, level - 1), 0);
893                         BUG_ON(err);
894                 }
895         }
896 #endif
897         return ret;
898 }
899
900 static int write_one_cache_group(struct btrfs_trans_handle *trans,
901                                  struct btrfs_root *root,
902                                  struct btrfs_path *path,
903                                  struct btrfs_block_group_cache *cache)
904 {
905         int ret;
906         int pending_ret;
907         struct btrfs_root *extent_root = root->fs_info->extent_root;
908         unsigned long bi;
909         struct extent_buffer *leaf;
910
911         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
912         if (ret < 0)
913                 goto fail;
914         BUG_ON(ret);
915
916         leaf = path->nodes[0];
917         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
918         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
919         btrfs_mark_buffer_dirty(leaf);
920         btrfs_release_path(extent_root, path);
921 fail:
922         finish_current_insert(trans, extent_root);
923         pending_ret = del_pending_extents(trans, extent_root);
924         if (ret)
925                 return ret;
926         if (pending_ret)
927                 return pending_ret;
928         return 0;
929
930 }
931
932 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
933                                    struct btrfs_root *root)
934 {
935         struct extent_io_tree *block_group_cache;
936         struct btrfs_block_group_cache *cache;
937         int ret;
938         int err = 0;
939         int werr = 0;
940         struct btrfs_path *path;
941         u64 last = 0;
942         u64 start;
943         u64 end;
944         u64 ptr;
945
946         block_group_cache = &root->fs_info->block_group_cache;
947         path = btrfs_alloc_path();
948         if (!path)
949                 return -ENOMEM;
950
951         while(1) {
952                 ret = find_first_extent_bit(block_group_cache, last,
953                                             &start, &end, BLOCK_GROUP_DIRTY);
954                 if (ret)
955                         break;
956
957                 last = end + 1;
958                 ret = get_state_private(block_group_cache, start, &ptr);
959                 if (ret)
960                         break;
961
962                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
963                 err = write_one_cache_group(trans, root,
964                                             path, cache);
965                 /*
966                  * if we fail to write the cache group, we want
967                  * to keep it marked dirty in hopes that a later
968                  * write will work
969                  */
970                 if (err) {
971                         werr = err;
972                         continue;
973                 }
974                 clear_extent_bits(block_group_cache, start, end,
975                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
976         }
977         btrfs_free_path(path);
978         return werr;
979 }
980
981 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
982                                                   u64 flags)
983 {
984         struct list_head *head = &info->space_info;
985         struct list_head *cur;
986         struct btrfs_space_info *found;
987         list_for_each(cur, head) {
988                 found = list_entry(cur, struct btrfs_space_info, list);
989                 if (found->flags == flags)
990                         return found;
991         }
992         return NULL;
993
994 }
995
996 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
997                           struct btrfs_root *extent_root, u64 alloc_bytes,
998                           u64 flags)
999 {
1000         struct btrfs_space_info *space_info;
1001         u64 thresh;
1002         u64 start;
1003         u64 num_bytes;
1004         int ret;
1005
1006         space_info = __find_space_info(extent_root->fs_info, flags);
1007         BUG_ON(!space_info);
1008
1009         if (space_info->full)
1010                 return 0;
1011
1012         thresh = div_factor(space_info->total_bytes, 7);
1013         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1014             thresh)
1015                 return 0;
1016
1017         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1018         if (ret == -ENOSPC) {
1019 printk("space info full %Lu\n", flags);
1020                 space_info->full = 1;
1021                 return 0;
1022         }
1023
1024         BUG_ON(ret);
1025
1026         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1027                      extent_root->fs_info->chunk_root->root_key.objectid,
1028                      start, num_bytes);
1029         BUG_ON(ret);
1030         return 0;
1031 }
1032
1033 static int update_block_group(struct btrfs_trans_handle *trans,
1034                               struct btrfs_root *root,
1035                               u64 bytenr, u64 num_bytes, int alloc,
1036                               int mark_free)
1037 {
1038         struct btrfs_block_group_cache *cache;
1039         struct btrfs_fs_info *info = root->fs_info;
1040         u64 total = num_bytes;
1041         u64 old_val;
1042         u64 byte_in_group;
1043         u64 start;
1044         u64 end;
1045
1046         while(total) {
1047                 cache = btrfs_lookup_block_group(info, bytenr);
1048                 if (!cache) {
1049                         return -1;
1050                 }
1051                 byte_in_group = bytenr - cache->key.objectid;
1052                 WARN_ON(byte_in_group > cache->key.offset);
1053                 start = cache->key.objectid;
1054                 end = start + cache->key.offset - 1;
1055                 set_extent_bits(&info->block_group_cache, start, end,
1056                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1057
1058                 old_val = btrfs_block_group_used(&cache->item);
1059                 num_bytes = min(total, cache->key.offset - byte_in_group);
1060                 if (alloc) {
1061                         old_val += num_bytes;
1062                         cache->space_info->bytes_used += num_bytes;
1063                 } else {
1064                         old_val -= num_bytes;
1065                         cache->space_info->bytes_used -= num_bytes;
1066                         if (mark_free) {
1067                                 set_extent_dirty(&info->free_space_cache,
1068                                                  bytenr, bytenr + num_bytes - 1,
1069                                                  GFP_NOFS);
1070                         }
1071                 }
1072                 btrfs_set_block_group_used(&cache->item, old_val);
1073                 total -= num_bytes;
1074                 bytenr += num_bytes;
1075         }
1076         return 0;
1077 }
1078
1079 static int update_pinned_extents(struct btrfs_root *root,
1080                                 u64 bytenr, u64 num, int pin)
1081 {
1082         u64 len;
1083         struct btrfs_block_group_cache *cache;
1084         struct btrfs_fs_info *fs_info = root->fs_info;
1085
1086         if (pin) {
1087                 set_extent_dirty(&fs_info->pinned_extents,
1088                                 bytenr, bytenr + num - 1, GFP_NOFS);
1089         } else {
1090                 clear_extent_dirty(&fs_info->pinned_extents,
1091                                 bytenr, bytenr + num - 1, GFP_NOFS);
1092         }
1093         while (num > 0) {
1094                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1095                 WARN_ON(!cache);
1096                 len = min(num, cache->key.offset -
1097                           (bytenr - cache->key.objectid));
1098                 if (pin) {
1099                         cache->pinned += len;
1100                         cache->space_info->bytes_pinned += len;
1101                         fs_info->total_pinned += len;
1102                 } else {
1103                         cache->pinned -= len;
1104                         cache->space_info->bytes_pinned -= len;
1105                         fs_info->total_pinned -= len;
1106                 }
1107                 bytenr += len;
1108                 num -= len;
1109         }
1110         return 0;
1111 }
1112
1113 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1114 {
1115         u64 last = 0;
1116         u64 start;
1117         u64 end;
1118         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1119         int ret;
1120
1121         while(1) {
1122                 ret = find_first_extent_bit(pinned_extents, last,
1123                                             &start, &end, EXTENT_DIRTY);
1124                 if (ret)
1125                         break;
1126                 set_extent_dirty(copy, start, end, GFP_NOFS);
1127                 last = end + 1;
1128         }
1129         return 0;
1130 }
1131
1132 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1133                                struct btrfs_root *root,
1134                                struct extent_io_tree *unpin)
1135 {
1136         u64 start;
1137         u64 end;
1138         int ret;
1139         struct extent_io_tree *free_space_cache;
1140         free_space_cache = &root->fs_info->free_space_cache;
1141
1142         while(1) {
1143                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1144                                             EXTENT_DIRTY);
1145                 if (ret)
1146                         break;
1147                 update_pinned_extents(root, start, end + 1 - start, 0);
1148                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1149                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1150         }
1151         return 0;
1152 }
1153
1154 static int finish_current_insert(struct btrfs_trans_handle *trans,
1155                                  struct btrfs_root *extent_root)
1156 {
1157         u64 start;
1158         u64 end;
1159         struct btrfs_fs_info *info = extent_root->fs_info;
1160         struct extent_buffer *eb;
1161         struct btrfs_path *path;
1162         struct btrfs_key ins;
1163         struct btrfs_disk_key first;
1164         struct btrfs_extent_item extent_item;
1165         int ret;
1166         int level;
1167         int err = 0;
1168
1169         btrfs_set_stack_extent_refs(&extent_item, 1);
1170         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1171         path = btrfs_alloc_path();
1172
1173         while(1) {
1174                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1175                                             &end, EXTENT_LOCKED);
1176                 if (ret)
1177                         break;
1178
1179                 ins.objectid = start;
1180                 ins.offset = end + 1 - start;
1181                 err = btrfs_insert_item(trans, extent_root, &ins,
1182                                         &extent_item, sizeof(extent_item));
1183                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1184                                   GFP_NOFS);
1185                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1186                 level = btrfs_header_level(eb);
1187                 if (level == 0) {
1188                         btrfs_item_key(eb, &first, 0);
1189                 } else {
1190                         btrfs_node_key(eb, &first, 0);
1191                 }
1192                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1193                                           start, extent_root->root_key.objectid,
1194                                           0, level,
1195                                           btrfs_disk_key_objectid(&first));
1196                 BUG_ON(err);
1197                 free_extent_buffer(eb);
1198         }
1199         btrfs_free_path(path);
1200         return 0;
1201 }
1202
1203 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1204                           int pending)
1205 {
1206         int err = 0;
1207         struct extent_buffer *buf;
1208
1209         if (!pending) {
1210                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1211                 if (buf) {
1212                         if (btrfs_buffer_uptodate(buf)) {
1213                                 u64 transid =
1214                                     root->fs_info->running_transaction->transid;
1215                                 if (btrfs_header_generation(buf) == transid) {
1216                                         free_extent_buffer(buf);
1217                                         return 1;
1218                                 }
1219                         }
1220                         free_extent_buffer(buf);
1221                 }
1222                 update_pinned_extents(root, bytenr, num_bytes, 1);
1223         } else {
1224                 set_extent_bits(&root->fs_info->pending_del,
1225                                 bytenr, bytenr + num_bytes - 1,
1226                                 EXTENT_LOCKED, GFP_NOFS);
1227         }
1228         BUG_ON(err < 0);
1229         return 0;
1230 }
1231
1232 /*
1233  * remove an extent from the root, returns 0 on success
1234  */
1235 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1236                          *root, u64 bytenr, u64 num_bytes,
1237                          u64 root_objectid, u64 ref_generation,
1238                          u64 owner_objectid, u64 owner_offset, int pin,
1239                          int mark_free)
1240 {
1241         struct btrfs_path *path;
1242         struct btrfs_key key;
1243         struct btrfs_fs_info *info = root->fs_info;
1244         struct btrfs_extent_ops *ops = info->extent_ops;
1245         struct btrfs_root *extent_root = info->extent_root;
1246         struct extent_buffer *leaf;
1247         int ret;
1248         int extent_slot = 0;
1249         int found_extent = 0;
1250         int num_to_del = 1;
1251         struct btrfs_extent_item *ei;
1252         u32 refs;
1253
1254         key.objectid = bytenr;
1255         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1256         key.offset = num_bytes;
1257
1258         path = btrfs_alloc_path();
1259         if (!path)
1260                 return -ENOMEM;
1261
1262         ret = lookup_extent_backref(trans, extent_root, path,
1263                                     bytenr, root_objectid,
1264                                     ref_generation,
1265                                     owner_objectid, owner_offset, 1);
1266         if (ret == 0) {
1267                 struct btrfs_key found_key;
1268                 extent_slot = path->slots[0];
1269                 while(extent_slot > 0) {
1270                         extent_slot--;
1271                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1272                                               extent_slot);
1273                         if (found_key.objectid != bytenr)
1274                                 break;
1275                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1276                             found_key.offset == num_bytes) {
1277                                 found_extent = 1;
1278                                 break;
1279                         }
1280                         if (path->slots[0] - extent_slot > 5)
1281                                 break;
1282                 }
1283                 if (!found_extent)
1284                         ret = btrfs_del_item(trans, extent_root, path);
1285         } else {
1286                 btrfs_print_leaf(extent_root, path->nodes[0]);
1287                 WARN_ON(1);
1288                 printk("Unable to find ref byte nr %Lu root %Lu "
1289                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1290                        root_objectid, ref_generation, owner_objectid,
1291                        owner_offset);
1292         }
1293         if (!found_extent) {
1294                 btrfs_release_path(extent_root, path);
1295                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1296                 if (ret < 0)
1297                         return ret;
1298                 BUG_ON(ret);
1299                 extent_slot = path->slots[0];
1300         }
1301
1302         leaf = path->nodes[0];
1303         ei = btrfs_item_ptr(leaf, extent_slot,
1304                             struct btrfs_extent_item);
1305         refs = btrfs_extent_refs(leaf, ei);
1306         BUG_ON(refs == 0);
1307         refs -= 1;
1308         btrfs_set_extent_refs(leaf, ei, refs);
1309
1310         btrfs_mark_buffer_dirty(leaf);
1311
1312         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1313                 /* if the back ref and the extent are next to each other
1314                  * they get deleted below in one shot
1315                  */
1316                 path->slots[0] = extent_slot;
1317                 num_to_del = 2;
1318         } else if (found_extent) {
1319                 /* otherwise delete the extent back ref */
1320                 ret = btrfs_del_item(trans, extent_root, path);
1321                 BUG_ON(ret);
1322                 /* if refs are 0, we need to setup the path for deletion */
1323                 if (refs == 0) {
1324                         btrfs_release_path(extent_root, path);
1325                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1326                                                 -1, 1);
1327                         if (ret < 0)
1328                                 return ret;
1329                         BUG_ON(ret);
1330                 }
1331         }
1332
1333         if (refs == 0) {
1334                 u64 super_used;
1335                 u64 root_used;
1336
1337                 if (pin) {
1338                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1339                         if (ret > 0)
1340                                 mark_free = 1;
1341                         BUG_ON(ret < 0);
1342                 }
1343
1344                 /* block accounting for super block */
1345                 super_used = btrfs_super_bytes_used(&info->super_copy);
1346                 btrfs_set_super_bytes_used(&info->super_copy,
1347                                            super_used - num_bytes);
1348
1349                 /* block accounting for root item */
1350                 root_used = btrfs_root_used(&root->root_item);
1351                 btrfs_set_root_used(&root->root_item,
1352                                            root_used - num_bytes);
1353                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1354                                       num_to_del);
1355                 if (ret)
1356                         return ret;
1357
1358                 if (ops && ops->free_extent)
1359                         ops->free_extent(root, bytenr, num_bytes);
1360
1361                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1362                                          mark_free);
1363                 BUG_ON(ret);
1364         }
1365         btrfs_free_path(path);
1366         finish_current_insert(trans, extent_root);
1367         return ret;
1368 }
1369
1370 /*
1371  * find all the blocks marked as pending in the radix tree and remove
1372  * them from the extent map
1373  */
1374 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1375                                btrfs_root *extent_root)
1376 {
1377         int ret;
1378         int err = 0;
1379         u64 start;
1380         u64 end;
1381         struct extent_io_tree *pending_del;
1382         struct extent_io_tree *pinned_extents;
1383
1384         pending_del = &extent_root->fs_info->pending_del;
1385         pinned_extents = &extent_root->fs_info->pinned_extents;
1386
1387         while(1) {
1388                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1389                                             EXTENT_LOCKED);
1390                 if (ret)
1391                         break;
1392                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1393                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1394                                   GFP_NOFS);
1395                 ret = __free_extent(trans, extent_root,
1396                                      start, end + 1 - start,
1397                                      extent_root->root_key.objectid,
1398                                      0, 0, 0, 0, 0);
1399                 if (ret)
1400                         err = ret;
1401         }
1402         return err;
1403 }
1404
1405 /*
1406  * remove an extent from the root, returns 0 on success
1407  */
1408 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1409                       *root, u64 bytenr, u64 num_bytes,
1410                       u64 root_objectid, u64 ref_generation,
1411                       u64 owner_objectid, u64 owner_offset, int pin)
1412 {
1413         struct btrfs_root *extent_root = root->fs_info->extent_root;
1414         int pending_ret;
1415         int ret;
1416
1417         WARN_ON(num_bytes < root->sectorsize);
1418         if (!root->ref_cows)
1419                 ref_generation = 0;
1420
1421         if (root == extent_root) {
1422                 pin_down_bytes(root, bytenr, num_bytes, 1);
1423                 return 0;
1424         }
1425         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1426                             ref_generation, owner_objectid, owner_offset,
1427                             pin, pin == 0);
1428         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1429         return ret ? ret : pending_ret;
1430 }
1431
1432 static u64 stripe_align(struct btrfs_root *root, u64 val)
1433 {
1434         u64 mask = ((u64)root->stripesize - 1);
1435         u64 ret = (val + mask) & ~mask;
1436         return ret;
1437 }
1438
1439 /*
1440  * walks the btree of allocated extents and find a hole of a given size.
1441  * The key ins is changed to record the hole:
1442  * ins->objectid == block start
1443  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1444  * ins->offset == number of blocks
1445  * Any available blocks before search_start are skipped.
1446  */
1447 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1448                                      struct btrfs_root *orig_root,
1449                                      u64 num_bytes, u64 empty_size,
1450                                      u64 search_start, u64 search_end,
1451                                      u64 hint_byte, struct btrfs_key *ins,
1452                                      u64 exclude_start, u64 exclude_nr,
1453                                      int data)
1454 {
1455         int ret;
1456         u64 orig_search_start = search_start;
1457         struct btrfs_root * root = orig_root->fs_info->extent_root;
1458         struct btrfs_fs_info *info = root->fs_info;
1459         u64 total_needed = num_bytes;
1460         struct btrfs_block_group_cache *block_group;
1461         int full_scan = 0;
1462         int wrapped = 0;
1463
1464         WARN_ON(num_bytes < root->sectorsize);
1465         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1466
1467         if (search_end == (u64)-1)
1468                 search_end = btrfs_super_total_bytes(&info->super_copy);
1469
1470         if (hint_byte) {
1471                 block_group = btrfs_lookup_block_group(info, hint_byte);
1472                 if (!block_group)
1473                         hint_byte = search_start;
1474                 block_group = btrfs_find_block_group(root, block_group,
1475                                                      hint_byte, data, 1);
1476         } else {
1477                 block_group = btrfs_find_block_group(root,
1478                                                      trans->block_group,
1479                                                      search_start, data, 1);
1480         }
1481
1482         total_needed += empty_size;
1483
1484 check_failed:
1485         if (!block_group) {
1486                 block_group = btrfs_lookup_block_group(info, search_start);
1487                 if (!block_group)
1488                         block_group = btrfs_lookup_block_group(info,
1489                                                        orig_search_start);
1490         }
1491         ret = find_search_start(root, &block_group, &search_start,
1492                                 total_needed, data);
1493         if (ret)
1494                 goto error;
1495
1496         search_start = stripe_align(root, search_start);
1497         ins->objectid = search_start;
1498         ins->offset = num_bytes;
1499
1500         if (ins->objectid + num_bytes >= search_end)
1501                 goto enospc;
1502
1503         if (ins->objectid + num_bytes >
1504             block_group->key.objectid + block_group->key.offset) {
1505                 search_start = block_group->key.objectid +
1506                         block_group->key.offset;
1507                 goto new_group;
1508         }
1509
1510         if (test_range_bit(&info->extent_ins, ins->objectid,
1511                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1512                 search_start = ins->objectid + num_bytes;
1513                 goto new_group;
1514         }
1515
1516         if (test_range_bit(&info->pinned_extents, ins->objectid,
1517                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1518                 search_start = ins->objectid + num_bytes;
1519                 goto new_group;
1520         }
1521
1522         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1523             ins->objectid < exclude_start + exclude_nr)) {
1524                 search_start = exclude_start + exclude_nr;
1525                 goto new_group;
1526         }
1527
1528         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1529                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1530                 if (block_group)
1531                         trans->block_group = block_group;
1532         }
1533         ins->offset = num_bytes;
1534         return 0;
1535
1536 new_group:
1537         if (search_start + num_bytes >= search_end) {
1538 enospc:
1539                 search_start = orig_search_start;
1540                 if (full_scan) {
1541                         ret = -ENOSPC;
1542                         goto error;
1543                 }
1544                 if (wrapped) {
1545                         if (!full_scan)
1546                                 total_needed -= empty_size;
1547                         full_scan = 1;
1548                 } else
1549                         wrapped = 1;
1550         }
1551         block_group = btrfs_lookup_block_group(info, search_start);
1552         cond_resched();
1553         block_group = btrfs_find_block_group(root, block_group,
1554                                              search_start, data, 0);
1555         goto check_failed;
1556
1557 error:
1558         return ret;
1559 }
1560 /*
1561  * finds a free extent and does all the dirty work required for allocation
1562  * returns the key for the extent through ins, and a tree buffer for
1563  * the first block of the extent through buf.
1564  *
1565  * returns 0 if everything worked, non-zero otherwise.
1566  */
1567 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1568                        struct btrfs_root *root,
1569                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1570                        u64 owner, u64 owner_offset,
1571                        u64 empty_size, u64 hint_byte,
1572                        u64 search_end, struct btrfs_key *ins, int data)
1573 {
1574         int ret;
1575         int pending_ret;
1576         u64 super_used, root_used;
1577         u64 search_start = 0;
1578         struct btrfs_fs_info *info = root->fs_info;
1579         struct btrfs_extent_ops *ops = info->extent_ops;
1580         u32 sizes[2];
1581         struct btrfs_root *extent_root = info->extent_root;
1582         struct btrfs_path *path;
1583         struct btrfs_extent_item *extent_item;
1584         struct btrfs_extent_ref *ref;
1585         struct btrfs_key keys[2];
1586
1587         if (data) {
1588                 data = BTRFS_BLOCK_GROUP_DATA;
1589         } else if (root == root->fs_info->chunk_root ||
1590                    info->force_system_allocs) {
1591                 data = BTRFS_BLOCK_GROUP_SYSTEM;
1592         } else {
1593                 data = BTRFS_BLOCK_GROUP_METADATA;
1594         }
1595
1596         if (root->ref_cows) {
1597                 if (data != BTRFS_BLOCK_GROUP_METADATA) {
1598                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1599                                              num_bytes,
1600                                              BTRFS_BLOCK_GROUP_METADATA);
1601                         BUG_ON(ret);
1602                 }
1603                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1604                                      num_bytes, data);
1605                 BUG_ON(ret);
1606         }
1607
1608         WARN_ON(num_bytes < root->sectorsize);
1609         if (ops && ops->alloc_extent) {
1610                 ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
1611         } else {
1612                 ret = find_free_extent(trans, root, num_bytes, empty_size,
1613                                         search_start, search_end, hint_byte,
1614                                         ins, trans->alloc_exclude_start,
1615                                         trans->alloc_exclude_nr, data);
1616         }
1617         BUG_ON(ret);
1618         if (ret)
1619                 return ret;
1620
1621         /* block accounting for super block */
1622         super_used = btrfs_super_bytes_used(&info->super_copy);
1623         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1624
1625         /* block accounting for root item */
1626         root_used = btrfs_root_used(&root->root_item);
1627         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1628
1629         clear_extent_dirty(&root->fs_info->free_space_cache,
1630                            ins->objectid, ins->objectid + ins->offset - 1,
1631                            GFP_NOFS);
1632
1633         if (root == extent_root) {
1634                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1635                                 ins->objectid + ins->offset - 1,
1636                                 EXTENT_LOCKED, GFP_NOFS);
1637                 goto update_block;
1638         }
1639
1640         WARN_ON(trans->alloc_exclude_nr);
1641         trans->alloc_exclude_start = ins->objectid;
1642         trans->alloc_exclude_nr = ins->offset;
1643
1644         memcpy(&keys[0], ins, sizeof(*ins));
1645         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1646                                          owner, owner_offset);
1647         keys[1].objectid = ins->objectid;
1648         keys[1].type = BTRFS_EXTENT_REF_KEY;
1649         sizes[0] = sizeof(*extent_item);
1650         sizes[1] = sizeof(*ref);
1651
1652         path = btrfs_alloc_path();
1653         BUG_ON(!path);
1654
1655         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1656                                        sizes, 2);
1657
1658         BUG_ON(ret);
1659         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1660                                      struct btrfs_extent_item);
1661         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1662         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1663                              struct btrfs_extent_ref);
1664
1665         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1666         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1667         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1668         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1669
1670         btrfs_mark_buffer_dirty(path->nodes[0]);
1671
1672         trans->alloc_exclude_start = 0;
1673         trans->alloc_exclude_nr = 0;
1674         btrfs_free_path(path);
1675         finish_current_insert(trans, extent_root);
1676         pending_ret = del_pending_extents(trans, extent_root);
1677
1678         if (ret) {
1679                 return ret;
1680         }
1681         if (pending_ret) {
1682                 return pending_ret;
1683         }
1684
1685 update_block:
1686         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1687         if (ret) {
1688                 printk("update block group failed for %Lu %Lu\n",
1689                        ins->objectid, ins->offset);
1690                 BUG();
1691         }
1692         return 0;
1693 }
1694
1695 /*
1696  * helper function to allocate a block for a given tree
1697  * returns the tree buffer or NULL.
1698  */
1699 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1700                                              struct btrfs_root *root,
1701                                              u32 blocksize,
1702                                              u64 root_objectid, u64 hint,
1703                                              u64 empty_size)
1704 {
1705         u64 ref_generation;
1706
1707         if (root->ref_cows)
1708                 ref_generation = trans->transid;
1709         else
1710                 ref_generation = 0;
1711
1712
1713         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1714                                         ref_generation, 0, 0, hint, empty_size);
1715 }
1716
1717 /*
1718  * helper function to allocate a block for a given tree
1719  * returns the tree buffer or NULL.
1720  */
1721 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1722                                              struct btrfs_root *root,
1723                                              u32 blocksize,
1724                                              u64 root_objectid,
1725                                              u64 ref_generation,
1726                                              u64 first_objectid,
1727                                              int level,
1728                                              u64 hint,
1729                                              u64 empty_size)
1730 {
1731         struct btrfs_key ins;
1732         int ret;
1733         struct extent_buffer *buf;
1734
1735         ret = btrfs_alloc_extent(trans, root, blocksize,
1736                                  root_objectid, ref_generation,
1737                                  level, first_objectid, empty_size, hint,
1738                                  (u64)-1, &ins, 0);
1739         if (ret) {
1740                 BUG_ON(ret > 0);
1741                 return ERR_PTR(ret);
1742         }
1743         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1744         if (!buf) {
1745                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1746                                   root->root_key.objectid, ref_generation,
1747                                   0, 0, 0);
1748                 BUG_ON(1);
1749                 return ERR_PTR(-ENOMEM);
1750         }
1751         btrfs_set_buffer_uptodate(buf);
1752         trans->blocks_used++;
1753         return buf;
1754 }
1755
1756 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1757                                   struct btrfs_root *root,
1758                                   struct extent_buffer *leaf)
1759 {
1760         u64 leaf_owner;
1761         u64 leaf_generation;
1762         struct btrfs_key key;
1763         struct btrfs_file_extent_item *fi;
1764         int i;
1765         int nritems;
1766         int ret;
1767
1768         BUG_ON(!btrfs_is_leaf(leaf));
1769         nritems = btrfs_header_nritems(leaf);
1770         leaf_owner = btrfs_header_owner(leaf);
1771         leaf_generation = btrfs_header_generation(leaf);
1772
1773         for (i = 0; i < nritems; i++) {
1774                 u64 disk_bytenr;
1775
1776                 btrfs_item_key_to_cpu(leaf, &key, i);
1777                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1778                         continue;
1779                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1780                 if (btrfs_file_extent_type(leaf, fi) ==
1781                     BTRFS_FILE_EXTENT_INLINE)
1782                         continue;
1783                 /*
1784                  * FIXME make sure to insert a trans record that
1785                  * repeats the snapshot del on crash
1786                  */
1787                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1788                 if (disk_bytenr == 0)
1789                         continue;
1790                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1791                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1792                                 leaf_owner, leaf_generation,
1793                                 key.objectid, key.offset, 0);
1794                 BUG_ON(ret);
1795         }
1796         return 0;
1797 }
1798
1799 static void noinline reada_walk_down(struct btrfs_root *root,
1800                                      struct extent_buffer *node,
1801                                      int slot)
1802 {
1803         u64 bytenr;
1804         u64 last = 0;
1805         u32 nritems;
1806         u32 refs;
1807         u32 blocksize;
1808         int ret;
1809         int i;
1810         int level;
1811         int skipped = 0;
1812
1813         nritems = btrfs_header_nritems(node);
1814         level = btrfs_header_level(node);
1815         if (level)
1816                 return;
1817
1818         for (i = slot; i < nritems && skipped < 32; i++) {
1819                 bytenr = btrfs_node_blockptr(node, i);
1820                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1821                              (last > bytenr && last - bytenr > 32 * 1024))) {
1822                         skipped++;
1823                         continue;
1824                 }
1825                 blocksize = btrfs_level_size(root, level - 1);
1826                 if (i != slot) {
1827                         ret = lookup_extent_ref(NULL, root, bytenr,
1828                                                 blocksize, &refs);
1829                         BUG_ON(ret);
1830                         if (refs != 1) {
1831                                 skipped++;
1832                                 continue;
1833                         }
1834                 }
1835                 mutex_unlock(&root->fs_info->fs_mutex);
1836                 ret = readahead_tree_block(root, bytenr, blocksize);
1837                 last = bytenr + blocksize;
1838                 cond_resched();
1839                 mutex_lock(&root->fs_info->fs_mutex);
1840                 if (ret)
1841                         break;
1842         }
1843 }
1844
1845 /*
1846  * helper function for drop_snapshot, this walks down the tree dropping ref
1847  * counts as it goes.
1848  */
1849 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1850                                    struct btrfs_root *root,
1851                                    struct btrfs_path *path, int *level)
1852 {
1853         u64 root_owner;
1854         u64 root_gen;
1855         u64 bytenr;
1856         struct extent_buffer *next;
1857         struct extent_buffer *cur;
1858         struct extent_buffer *parent;
1859         u32 blocksize;
1860         int ret;
1861         u32 refs;
1862
1863         WARN_ON(*level < 0);
1864         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1865         ret = lookup_extent_ref(trans, root,
1866                                 path->nodes[*level]->start,
1867                                 path->nodes[*level]->len, &refs);
1868         BUG_ON(ret);
1869         if (refs > 1)
1870                 goto out;
1871
1872         /*
1873          * walk down to the last node level and free all the leaves
1874          */
1875         while(*level >= 0) {
1876                 WARN_ON(*level < 0);
1877                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1878                 cur = path->nodes[*level];
1879
1880                 if (btrfs_header_level(cur) != *level)
1881                         WARN_ON(1);
1882
1883                 if (path->slots[*level] >=
1884                     btrfs_header_nritems(cur))
1885                         break;
1886                 if (*level == 0) {
1887                         ret = drop_leaf_ref(trans, root, cur);
1888                         BUG_ON(ret);
1889                         break;
1890                 }
1891                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1892                 blocksize = btrfs_level_size(root, *level - 1);
1893                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1894                 BUG_ON(ret);
1895                 if (refs != 1) {
1896                         parent = path->nodes[*level];
1897                         root_owner = btrfs_header_owner(parent);
1898                         root_gen = btrfs_header_generation(parent);
1899                         path->slots[*level]++;
1900                         ret = btrfs_free_extent(trans, root, bytenr,
1901                                                 blocksize, root_owner,
1902                                                 root_gen, 0, 0, 1);
1903                         BUG_ON(ret);
1904                         continue;
1905                 }
1906                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1907                 if (!next || !btrfs_buffer_uptodate(next)) {
1908                         free_extent_buffer(next);
1909                         reada_walk_down(root, cur, path->slots[*level]);
1910                         mutex_unlock(&root->fs_info->fs_mutex);
1911                         next = read_tree_block(root, bytenr, blocksize);
1912                         mutex_lock(&root->fs_info->fs_mutex);
1913
1914                         /* we dropped the lock, check one more time */
1915                         ret = lookup_extent_ref(trans, root, bytenr,
1916                                                 blocksize, &refs);
1917                         BUG_ON(ret);
1918                         if (refs != 1) {
1919                                 parent = path->nodes[*level];
1920                                 root_owner = btrfs_header_owner(parent);
1921                                 root_gen = btrfs_header_generation(parent);
1922
1923                                 path->slots[*level]++;
1924                                 free_extent_buffer(next);
1925                                 ret = btrfs_free_extent(trans, root, bytenr,
1926                                                         blocksize,
1927                                                         root_owner,
1928                                                         root_gen, 0, 0, 1);
1929                                 BUG_ON(ret);
1930                                 continue;
1931                         }
1932                 }
1933                 WARN_ON(*level <= 0);
1934                 if (path->nodes[*level-1])
1935                         free_extent_buffer(path->nodes[*level-1]);
1936                 path->nodes[*level-1] = next;
1937                 *level = btrfs_header_level(next);
1938                 path->slots[*level] = 0;
1939         }
1940 out:
1941         WARN_ON(*level < 0);
1942         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1943
1944         if (path->nodes[*level] == root->node) {
1945                 root_owner = root->root_key.objectid;
1946                 parent = path->nodes[*level];
1947         } else {
1948                 parent = path->nodes[*level + 1];
1949                 root_owner = btrfs_header_owner(parent);
1950         }
1951
1952         root_gen = btrfs_header_generation(parent);
1953         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1954                                 path->nodes[*level]->len,
1955                                 root_owner, root_gen, 0, 0, 1);
1956         free_extent_buffer(path->nodes[*level]);
1957         path->nodes[*level] = NULL;
1958         *level += 1;
1959         BUG_ON(ret);
1960         return 0;
1961 }
1962
1963 /*
1964  * helper for dropping snapshots.  This walks back up the tree in the path
1965  * to find the first node higher up where we haven't yet gone through
1966  * all the slots
1967  */
1968 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
1969                                  struct btrfs_root *root,
1970                                  struct btrfs_path *path, int *level)
1971 {
1972         u64 root_owner;
1973         u64 root_gen;
1974         struct btrfs_root_item *root_item = &root->root_item;
1975         int i;
1976         int slot;
1977         int ret;
1978
1979         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1980                 slot = path->slots[i];
1981                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1982                         struct extent_buffer *node;
1983                         struct btrfs_disk_key disk_key;
1984                         node = path->nodes[i];
1985                         path->slots[i]++;
1986                         *level = i;
1987                         WARN_ON(*level == 0);
1988                         btrfs_node_key(node, &disk_key, path->slots[i]);
1989                         memcpy(&root_item->drop_progress,
1990                                &disk_key, sizeof(disk_key));
1991                         root_item->drop_level = i;
1992                         return 0;
1993                 } else {
1994                         if (path->nodes[*level] == root->node) {
1995                                 root_owner = root->root_key.objectid;
1996                                 root_gen =
1997                                    btrfs_header_generation(path->nodes[*level]);
1998                         } else {
1999                                 struct extent_buffer *node;
2000                                 node = path->nodes[*level + 1];
2001                                 root_owner = btrfs_header_owner(node);
2002                                 root_gen = btrfs_header_generation(node);
2003                         }
2004                         ret = btrfs_free_extent(trans, root,
2005                                                 path->nodes[*level]->start,
2006                                                 path->nodes[*level]->len,
2007                                                 root_owner, root_gen, 0, 0, 1);
2008                         BUG_ON(ret);
2009                         free_extent_buffer(path->nodes[*level]);
2010                         path->nodes[*level] = NULL;
2011                         *level = i + 1;
2012                 }
2013         }
2014         return 1;
2015 }
2016
2017 /*
2018  * drop the reference count on the tree rooted at 'snap'.  This traverses
2019  * the tree freeing any blocks that have a ref count of zero after being
2020  * decremented.
2021  */
2022 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2023                         *root)
2024 {
2025         int ret = 0;
2026         int wret;
2027         int level;
2028         struct btrfs_path *path;
2029         int i;
2030         int orig_level;
2031         struct btrfs_root_item *root_item = &root->root_item;
2032
2033         path = btrfs_alloc_path();
2034         BUG_ON(!path);
2035
2036         level = btrfs_header_level(root->node);
2037         orig_level = level;
2038         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2039                 path->nodes[level] = root->node;
2040                 extent_buffer_get(root->node);
2041                 path->slots[level] = 0;
2042         } else {
2043                 struct btrfs_key key;
2044                 struct btrfs_disk_key found_key;
2045                 struct extent_buffer *node;
2046
2047                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2048                 level = root_item->drop_level;
2049                 path->lowest_level = level;
2050                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2051                 if (wret < 0) {
2052                         ret = wret;
2053                         goto out;
2054                 }
2055                 node = path->nodes[level];
2056                 btrfs_node_key(node, &found_key, path->slots[level]);
2057                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2058                                sizeof(found_key)));
2059         }
2060         while(1) {
2061                 wret = walk_down_tree(trans, root, path, &level);
2062                 if (wret < 0)
2063                         ret = wret;
2064                 if (wret != 0)
2065                         break;
2066
2067                 wret = walk_up_tree(trans, root, path, &level);
2068                 if (wret < 0)
2069                         ret = wret;
2070                 if (wret != 0)
2071                         break;
2072                 /*
2073                 ret = -EAGAIN;
2074                 break;
2075                 */
2076         }
2077         for (i = 0; i <= orig_level; i++) {
2078                 if (path->nodes[i]) {
2079                         free_extent_buffer(path->nodes[i]);
2080                         path->nodes[i] = NULL;
2081                 }
2082         }
2083 out:
2084         btrfs_free_path(path);
2085         return ret;
2086 }
2087
2088 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2089 {
2090         u64 start;
2091         u64 end;
2092         u64 ptr;
2093         int ret;
2094         while(1) {
2095                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2096                                             &start, &end, (unsigned int)-1);
2097                 if (ret)
2098                         break;
2099                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2100                 if (!ret)
2101                         kfree((void *)(unsigned long)ptr);
2102                 clear_extent_bits(&info->block_group_cache, start,
2103                                   end, (unsigned int)-1, GFP_NOFS);
2104         }
2105         while(1) {
2106                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2107                                             &start, &end, EXTENT_DIRTY);
2108                 if (ret)
2109                         break;
2110                 clear_extent_dirty(&info->free_space_cache, start,
2111                                    end, GFP_NOFS);
2112         }
2113         return 0;
2114 }
2115
2116 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2117                            struct btrfs_key *key)
2118 {
2119         int ret;
2120         struct btrfs_key found_key;
2121         struct extent_buffer *leaf;
2122         int slot;
2123
2124         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2125         if (ret < 0)
2126                 return ret;
2127         while(1) {
2128                 slot = path->slots[0];
2129                 leaf = path->nodes[0];
2130                 if (slot >= btrfs_header_nritems(leaf)) {
2131                         ret = btrfs_next_leaf(root, path);
2132                         if (ret == 0)
2133                                 continue;
2134                         if (ret < 0)
2135                                 goto error;
2136                         break;
2137                 }
2138                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2139
2140                 if (found_key.objectid >= key->objectid &&
2141                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2142                         return 0;
2143                 path->slots[0]++;
2144         }
2145         ret = -ENOENT;
2146 error:
2147         return ret;
2148 }
2149
2150 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2151                              u64 total_bytes, u64 bytes_used,
2152                              struct btrfs_space_info **space_info)
2153 {
2154         struct btrfs_space_info *found;
2155
2156         found = __find_space_info(info, flags);
2157         if (found) {
2158                 found->total_bytes += total_bytes;
2159                 found->bytes_used += bytes_used;
2160                 WARN_ON(found->total_bytes < found->bytes_used);
2161                 *space_info = found;
2162                 return 0;
2163         }
2164         found = kmalloc(sizeof(*found), GFP_NOFS);
2165         if (!found)
2166                 return -ENOMEM;
2167
2168         list_add(&found->list, &info->space_info);
2169         found->flags = flags;
2170         found->total_bytes = total_bytes;
2171         found->bytes_used = bytes_used;
2172         found->bytes_pinned = 0;
2173         found->full = 0;
2174         *space_info = found;
2175         return 0;
2176 }
2177
2178 int btrfs_read_block_groups(struct btrfs_root *root)
2179 {
2180         struct btrfs_path *path;
2181         int ret;
2182         int bit;
2183         struct btrfs_block_group_cache *cache;
2184         struct btrfs_fs_info *info = root->fs_info;
2185         struct btrfs_space_info *space_info;
2186         struct extent_io_tree *block_group_cache;
2187         struct btrfs_key key;
2188         struct btrfs_key found_key;
2189         struct extent_buffer *leaf;
2190
2191         block_group_cache = &info->block_group_cache;
2192
2193         root = info->extent_root;
2194         key.objectid = 0;
2195         key.offset = 0;
2196         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2197         path = btrfs_alloc_path();
2198         if (!path)
2199                 return -ENOMEM;
2200
2201         while(1) {
2202                 ret = find_first_block_group(root, path, &key);
2203                 if (ret > 0) {
2204                         ret = 0;
2205                         goto error;
2206                 }
2207                 if (ret != 0) {
2208                         goto error;
2209                 }
2210                 leaf = path->nodes[0];
2211                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2212                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2213                 if (!cache) {
2214                         ret = -ENOMEM;
2215                         break;
2216                 }
2217
2218                 read_extent_buffer(leaf, &cache->item,
2219                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2220                                    sizeof(cache->item));
2221                 memcpy(&cache->key, &found_key, sizeof(found_key));
2222                 cache->cached = 0;
2223                 cache->pinned = 0;
2224                 key.objectid = found_key.objectid + found_key.offset;
2225                 btrfs_release_path(root, path);
2226                 cache->flags = btrfs_block_group_flags(&cache->item);
2227                 bit = 0;
2228                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2229                         bit = BLOCK_GROUP_DATA;
2230                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2231                         bit = BLOCK_GROUP_SYSTEM;
2232                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2233                         bit = BLOCK_GROUP_METADATA;
2234                 }
2235
2236                 ret = update_space_info(info, cache->flags, found_key.offset,
2237                                         btrfs_block_group_used(&cache->item),
2238                                         &space_info);
2239                 BUG_ON(ret);
2240                 cache->space_info = space_info;
2241
2242                 /* use EXTENT_LOCKED to prevent merging */
2243                 set_extent_bits(block_group_cache, found_key.objectid,
2244                                 found_key.objectid + found_key.offset - 1,
2245                                 bit | EXTENT_LOCKED, GFP_NOFS);
2246                 set_state_private(block_group_cache, found_key.objectid,
2247                                   (unsigned long)cache);
2248
2249                 if (key.objectid >=
2250                     btrfs_super_total_bytes(&info->super_copy))
2251                         break;
2252         }
2253         ret = 0;
2254 error:
2255         btrfs_free_path(path);
2256         return ret;
2257 }
2258
2259 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2260                            struct btrfs_root *root, u64 bytes_used,
2261                            u64 type, u64 chunk_tree, u64 chunk_objectid,
2262                            u64 size)
2263 {
2264         int ret;
2265         int bit = 0;
2266         struct btrfs_root *extent_root;
2267         struct btrfs_block_group_cache *cache;
2268         struct extent_io_tree *block_group_cache;
2269
2270         extent_root = root->fs_info->extent_root;
2271         block_group_cache = &root->fs_info->block_group_cache;
2272
2273         cache = kmalloc(sizeof(*cache), GFP_NOFS);
2274         BUG_ON(!cache);
2275         cache->key.objectid = chunk_objectid;
2276         cache->key.offset = size;
2277         cache->cached = 0;
2278         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2279         memset(&cache->item, 0, sizeof(cache->item));
2280         btrfs_set_block_group_used(&cache->item, bytes_used);
2281         btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
2282         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2283         cache->flags = type;
2284         btrfs_set_block_group_flags(&cache->item, type);
2285
2286         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2287                                 &cache->space_info);
2288         BUG_ON(ret);
2289
2290         if (type & BTRFS_BLOCK_GROUP_DATA) {
2291                 bit = BLOCK_GROUP_DATA;
2292         } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2293                 bit = BLOCK_GROUP_SYSTEM;
2294         } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2295                 bit = BLOCK_GROUP_METADATA;
2296         }
2297         set_extent_bits(block_group_cache, chunk_objectid,
2298                         chunk_objectid + size - 1,
2299                         bit | EXTENT_LOCKED, GFP_NOFS);
2300         set_state_private(block_group_cache, chunk_objectid,
2301                           (unsigned long)cache);
2302
2303         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2304                                 sizeof(cache->item));
2305         BUG_ON(ret);
2306
2307         finish_current_insert(trans, extent_root);
2308         ret = del_pending_extents(trans, extent_root);
2309         BUG_ON(ret);
2310         return 0;
2311 }
2312
2313 u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation,
2314                           u64 owner, u64 owner_offset)
2315 {
2316         return hash_extent_ref(root_objectid, ref_generation,
2317                                owner, owner_offset);
2318 }
2319
2320 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
2321                              struct btrfs_root *root,
2322                              u64 bytenr, u64 num_bytes, int alloc,
2323                              int mark_free)
2324 {
2325         return update_block_group(trans, root, bytenr, num_bytes,
2326                                   alloc, mark_free);
2327 }