3b5d2b6ed3f72be660f023b44940ac3e1c53cfc6
[platform/upstream/btrfs-progs.git] / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "crc32c.h"
28 #include "volumes.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
41                            struct btrfs_root *root, u64 bytes_used,
42                            u64 type, u64 chunk_tree, u64 chunk_objectid,
43                            u64 size);
44
45 static int cache_block_group(struct btrfs_root *root,
46                              struct btrfs_block_group_cache *block_group)
47 {
48         struct btrfs_path *path;
49         int ret;
50         struct btrfs_key key;
51         struct extent_buffer *leaf;
52         struct extent_io_tree *free_space_cache;
53         int slot;
54         u64 last = 0;
55         u64 hole_size;
56         u64 first_free;
57         int found = 0;
58
59         if (!block_group)
60                 return 0;
61
62         root = root->fs_info->extent_root;
63         free_space_cache = &root->fs_info->free_space_cache;
64
65         if (block_group->cached)
66                 return 0;
67
68         path = btrfs_alloc_path();
69         if (!path)
70                 return -ENOMEM;
71
72         path->reada = 2;
73         first_free = block_group->key.objectid;
74         key.objectid = block_group->key.objectid;
75         key.offset = 0;
76         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
77         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
78         if (ret < 0)
79                 return ret;
80         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
81         if (ret < 0)
82                 return ret;
83         if (ret == 0) {
84                 leaf = path->nodes[0];
85                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
86                 if (key.objectid + key.offset > first_free)
87                         first_free = key.objectid + key.offset;
88         }
89         while(1) {
90                 leaf = path->nodes[0];
91                 slot = path->slots[0];
92                 if (slot >= btrfs_header_nritems(leaf)) {
93                         ret = btrfs_next_leaf(root, path);
94                         if (ret < 0)
95                                 goto err;
96                         if (ret == 0) {
97                                 continue;
98                         } else {
99                                 break;
100                         }
101                 }
102                 btrfs_item_key_to_cpu(leaf, &key, slot);
103                 if (key.objectid < block_group->key.objectid) {
104                         goto next;
105                 }
106                 if (key.objectid >= block_group->key.objectid +
107                     block_group->key.offset) {
108                         break;
109                 }
110
111                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
112                         if (!found) {
113                                 last = first_free;
114                                 found = 1;
115                         }
116                         if (key.objectid > last) {
117                                 hole_size = key.objectid - last;
118                                 set_extent_dirty(free_space_cache, last,
119                                                  last + hole_size - 1,
120                                                  GFP_NOFS);
121                         }
122                         last = key.objectid + key.offset;
123                 }
124 next:
125                 path->slots[0]++;
126         }
127
128         if (!found)
129                 last = first_free;
130         if (block_group->key.objectid +
131             block_group->key.offset > last) {
132                 hole_size = block_group->key.objectid +
133                         block_group->key.offset - last;
134                 set_extent_dirty(free_space_cache, last,
135                                  last + hole_size - 1, GFP_NOFS);
136         }
137         block_group->cached = 1;
138 err:
139         btrfs_free_path(path);
140         return 0;
141 }
142
143 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
144                                                          btrfs_fs_info *info,
145                                                          u64 bytenr)
146 {
147         struct extent_io_tree *block_group_cache;
148         struct btrfs_block_group_cache *block_group = NULL;
149         u64 ptr;
150         u64 start;
151         u64 end;
152         int ret;
153
154         block_group_cache = &info->block_group_cache;
155         ret = find_first_extent_bit(block_group_cache,
156                                     bytenr, &start, &end,
157                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
158                                     BLOCK_GROUP_SYSTEM);
159         if (ret) {
160                 return NULL;
161         }
162         ret = get_state_private(block_group_cache, start, &ptr);
163         if (ret)
164                 return NULL;
165
166         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
167         if (block_group->key.objectid <= bytenr && bytenr <
168             block_group->key.objectid + block_group->key.offset)
169                 return block_group;
170         return NULL;
171 }
172
173 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
174 {
175         return (cache->flags & bits);
176 }
177
178 static int noinline find_search_start(struct btrfs_root *root,
179                               struct btrfs_block_group_cache **cache_ret,
180                               u64 *start_ret, int num, int data)
181 {
182         int ret;
183         struct btrfs_block_group_cache *cache = *cache_ret;
184         u64 last;
185         u64 start = 0;
186         u64 end = 0;
187         u64 cache_miss = 0;
188         u64 search_start = *start_ret;
189         int wrapped = 0;
190
191         if (!cache) {
192                 goto out;
193         }
194 again:
195         ret = cache_block_group(root, cache);
196         if (ret)
197                 goto out;
198
199         last = max(search_start, cache->key.objectid);
200         if (!block_group_bits(cache, data)) {
201                 goto new_group;
202         }
203
204         while(1) {
205                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
206                                             last, &start, &end, EXTENT_DIRTY);
207                 if (ret) {
208                         if (!cache_miss)
209                                 cache_miss = last;
210                         goto new_group;
211                 }
212
213                 start = max(last, start);
214                 last = end + 1;
215                 if (last - start < num) {
216                         if (last == cache->key.objectid + cache->key.offset)
217                                 cache_miss = start;
218                         continue;
219                 }
220                 if (start + num > cache->key.objectid + cache->key.offset)
221                         goto new_group;
222                 *start_ret = start;
223                 return 0;
224         }
225 out:
226         cache = btrfs_lookup_block_group(root->fs_info, search_start);
227         if (!cache) {
228                 printk("Unable to find block group for %Lu\n", search_start);
229                 WARN_ON(1);
230         }
231         return -ENOSPC;
232
233 new_group:
234         last = cache->key.objectid + cache->key.offset;
235 wrapped:
236         cache = btrfs_lookup_block_group(root->fs_info, last);
237         if (!cache) {
238 no_cache:
239                 if (!wrapped) {
240                         wrapped = 1;
241                         last = search_start;
242                         goto wrapped;
243                 }
244                 goto out;
245         }
246         if (cache_miss && !cache->cached) {
247                 cache_block_group(root, cache);
248                 last = cache_miss;
249                 cache = btrfs_lookup_block_group(root->fs_info, last);
250         }
251         cache = btrfs_find_block_group(root, cache, last, data, 0);
252         if (!cache)
253                 goto no_cache;
254         *cache_ret = cache;
255         cache_miss = 0;
256         goto again;
257 }
258
259 static u64 div_factor(u64 num, int factor)
260 {
261         if (factor == 10)
262                 return num;
263         num *= factor;
264         num /= 10;
265         return num;
266 }
267
268 static int block_group_state_bits(u64 flags)
269 {
270         int bits = 0;
271         if (flags & BTRFS_BLOCK_GROUP_DATA)
272                 bits |= BLOCK_GROUP_DATA;
273         if (flags & BTRFS_BLOCK_GROUP_METADATA)
274                 bits |= BLOCK_GROUP_METADATA;
275         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
276                 bits |= BLOCK_GROUP_SYSTEM;
277         return bits;
278 }
279
280 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
281                                                  struct btrfs_block_group_cache
282                                                  *hint, u64 search_start,
283                                                  int data, int owner)
284 {
285         struct btrfs_block_group_cache *cache;
286         struct extent_io_tree *block_group_cache;
287         struct btrfs_block_group_cache *found_group = NULL;
288         struct btrfs_fs_info *info = root->fs_info;
289         u64 used;
290         u64 last = 0;
291         u64 hint_last;
292         u64 start;
293         u64 end;
294         u64 free_check;
295         u64 ptr;
296         int bit;
297         int ret;
298         int full_search = 0;
299         int factor = 8;
300
301         block_group_cache = &info->block_group_cache;
302
303         if (!owner)
304                 factor = 8;
305
306         bit = block_group_state_bits(data);
307
308         if (search_start) {
309                 struct btrfs_block_group_cache *shint;
310                 shint = btrfs_lookup_block_group(info, search_start);
311                 if (shint && block_group_bits(shint, data)) {
312                         used = btrfs_block_group_used(&shint->item);
313                         if (used + shint->pinned <
314                             div_factor(shint->key.offset, factor)) {
315                                 return shint;
316                         }
317                 }
318         }
319         if (hint && block_group_bits(hint, data)) {
320                 used = btrfs_block_group_used(&hint->item);
321                 if (used + hint->pinned <
322                     div_factor(hint->key.offset, factor)) {
323                         return hint;
324                 }
325                 last = hint->key.objectid + hint->key.offset;
326                 hint_last = last;
327         } else {
328                 if (hint)
329                         hint_last = max(hint->key.objectid, search_start);
330                 else
331                         hint_last = search_start;
332
333                 last = hint_last;
334         }
335 again:
336         while(1) {
337                 ret = find_first_extent_bit(block_group_cache, last,
338                                             &start, &end, bit);
339                 if (ret)
340                         break;
341
342                 ret = get_state_private(block_group_cache, start, &ptr);
343                 if (ret)
344                         break;
345
346                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
347                 last = cache->key.objectid + cache->key.offset;
348                 used = btrfs_block_group_used(&cache->item);
349
350                 if (full_search)
351                         free_check = cache->key.offset;
352                 else
353                         free_check = div_factor(cache->key.offset, factor);
354
355                 if (used + cache->pinned < free_check) {
356                         found_group = cache;
357                         goto found;
358                 }
359                 cond_resched();
360         }
361         if (!full_search) {
362                 last = search_start;
363                 full_search = 1;
364                 goto again;
365         }
366 found:
367         return found_group;
368 }
369
370 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
371                            u64 owner, u64 owner_offset)
372 {
373         u32 high_crc = ~(u32)0;
374         u32 low_crc = ~(u32)0;
375         __le64 lenum;
376
377         lenum = cpu_to_le64(root_objectid);
378         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
379         lenum = cpu_to_le64(ref_generation);
380         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
381         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
382                 lenum = cpu_to_le64(owner);
383                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
384                 lenum = cpu_to_le64(owner_offset);
385                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
386         }
387         return ((u64)high_crc << 32) | (u64)low_crc;
388 }
389
390 static int match_extent_ref(struct extent_buffer *leaf,
391                             struct btrfs_extent_ref *disk_ref,
392                             struct btrfs_extent_ref *cpu_ref)
393 {
394         int ret;
395         int len;
396
397         if (cpu_ref->objectid)
398                 len = sizeof(*cpu_ref);
399         else
400                 len = 2 * sizeof(u64);
401         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
402                                    len);
403         return ret == 0;
404 }
405
406 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
407                                           struct btrfs_root *root,
408                                           struct btrfs_path *path, u64 bytenr,
409                                           u64 root_objectid,
410                                           u64 ref_generation, u64 owner,
411                                           u64 owner_offset, int del)
412 {
413         u64 hash;
414         struct btrfs_key key;
415         struct btrfs_key found_key;
416         struct btrfs_extent_ref ref;
417         struct extent_buffer *leaf;
418         struct btrfs_extent_ref *disk_ref;
419         int ret;
420         int ret2;
421
422         btrfs_set_stack_ref_root(&ref, root_objectid);
423         btrfs_set_stack_ref_generation(&ref, ref_generation);
424         btrfs_set_stack_ref_objectid(&ref, owner);
425         btrfs_set_stack_ref_offset(&ref, owner_offset);
426
427         hash = hash_extent_ref(root_objectid, ref_generation, owner,
428                                owner_offset);
429         key.offset = hash;
430         key.objectid = bytenr;
431         key.type = BTRFS_EXTENT_REF_KEY;
432
433         while (1) {
434                 ret = btrfs_search_slot(trans, root, &key, path,
435                                         del ? -1 : 0, del);
436                 if (ret < 0)
437                         goto out;
438                 leaf = path->nodes[0];
439                 if (ret != 0) {
440                         u32 nritems = btrfs_header_nritems(leaf);
441                         if (path->slots[0] >= nritems) {
442                                 ret2 = btrfs_next_leaf(root, path);
443                                 if (ret2)
444                                         goto out;
445                                 leaf = path->nodes[0];
446                         }
447                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
448                         if (found_key.objectid != bytenr ||
449                             found_key.type != BTRFS_EXTENT_REF_KEY)
450                                 goto out;
451                         key.offset = found_key.offset;
452                         if (del) {
453                                 btrfs_release_path(root, path);
454                                 continue;
455                         }
456                 }
457                 disk_ref = btrfs_item_ptr(path->nodes[0],
458                                           path->slots[0],
459                                           struct btrfs_extent_ref);
460                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
461                         ret = 0;
462                         goto out;
463                 }
464                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
465                 key.offset = found_key.offset + 1;
466                 btrfs_release_path(root, path);
467         }
468 out:
469         return ret;
470 }
471
472 /*
473  * Back reference rules.  Back refs have three main goals:
474  *
475  * 1) differentiate between all holders of references to an extent so that
476  *    when a reference is dropped we can make sure it was a valid reference
477  *    before freeing the extent.
478  *
479  * 2) Provide enough information to quickly find the holders of an extent
480  *    if we notice a given block is corrupted or bad.
481  *
482  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
483  *    maintenance.  This is actually the same as #2, but with a slightly
484  *    different use case.
485  *
486  * File extents can be referenced by:
487  *
488  * - multiple snapshots, subvolumes, or different generations in one subvol
489  * - different files inside a single subvolume (in theory, not implemented yet)
490  * - different offsets inside a file (bookend extents in file.c)
491  *
492  * The extent ref structure has fields for:
493  *
494  * - Objectid of the subvolume root
495  * - Generation number of the tree holding the reference
496  * - objectid of the file holding the reference
497  * - offset in the file corresponding to the key holding the reference
498  *
499  * When a file extent is allocated the fields are filled in:
500  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
501  *
502  * When a leaf is cow'd new references are added for every file extent found
503  * in the leaf.  It looks the same as the create case, but trans->transid
504  * will be different when the block is cow'd.
505  *
506  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
507  *
508  * When a file extent is removed either during snapshot deletion or file
509  * truncation, the corresponding back reference is found
510  * by searching for:
511  *
512  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
513  *      inode objectid, offset in file)
514  *
515  * Btree extents can be referenced by:
516  *
517  * - Different subvolumes
518  * - Different generations of the same subvolume
519  *
520  * Storing sufficient information for a full reverse mapping of a btree
521  * block would require storing the lowest key of the block in the backref,
522  * and it would require updating that lowest key either before write out or
523  * every time it changed.  Instead, the objectid of the lowest key is stored
524  * along with the level of the tree block.  This provides a hint
525  * about where in the btree the block can be found.  Searches through the
526  * btree only need to look for a pointer to that block, so they stop one
527  * level higher than the level recorded in the backref.
528  *
529  * Some btrees do not do reference counting on their extents.  These
530  * include the extent tree and the tree of tree roots.  Backrefs for these
531  * trees always have a generation of zero.
532  *
533  * When a tree block is created, back references are inserted:
534  *
535  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
536  *
537  * When a tree block is cow'd in a reference counted root,
538  * new back references are added for all the blocks it points to.
539  * These are of the form (trans->transid will have increased since creation):
540  *
541  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
542  *
543  * Because the lowest_key_objectid and the level are just hints
544  * they are not used when backrefs are deleted.  When a backref is deleted:
545  *
546  * if backref was for a tree root:
547  *     root_objectid = root->root_key.objectid
548  * else
549  *     root_objectid = btrfs_header_owner(parent)
550  *
551  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
552  *
553  * Back Reference Key hashing:
554  *
555  * Back references have four fields, each 64 bits long.  Unfortunately,
556  * This is hashed into a single 64 bit number and placed into the key offset.
557  * The key objectid corresponds to the first byte in the extent, and the
558  * key type is set to BTRFS_EXTENT_REF_KEY
559  */
560 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
561                                  struct btrfs_root *root,
562                                  struct btrfs_path *path, u64 bytenr,
563                                  u64 root_objectid, u64 ref_generation,
564                                  u64 owner, u64 owner_offset)
565 {
566         u64 hash;
567         struct btrfs_key key;
568         struct btrfs_extent_ref ref;
569         struct btrfs_extent_ref *disk_ref;
570         int ret;
571
572         btrfs_set_stack_ref_root(&ref, root_objectid);
573         btrfs_set_stack_ref_generation(&ref, ref_generation);
574         btrfs_set_stack_ref_objectid(&ref, owner);
575         btrfs_set_stack_ref_offset(&ref, owner_offset);
576
577         hash = hash_extent_ref(root_objectid, ref_generation, owner,
578                                owner_offset);
579         key.offset = hash;
580         key.objectid = bytenr;
581         key.type = BTRFS_EXTENT_REF_KEY;
582
583         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
584         while (ret == -EEXIST) {
585                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
586                                           struct btrfs_extent_ref);
587                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
588                         goto out;
589                 key.offset++;
590                 btrfs_release_path(root, path);
591                 ret = btrfs_insert_empty_item(trans, root, path, &key,
592                                               sizeof(ref));
593         }
594         if (ret)
595                 goto out;
596         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
597                                   struct btrfs_extent_ref);
598         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
599                             sizeof(ref));
600         btrfs_mark_buffer_dirty(path->nodes[0]);
601 out:
602         btrfs_release_path(root, path);
603         return ret;
604 }
605
606 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
607                                 struct btrfs_root *root,
608                                 u64 bytenr, u64 num_bytes,
609                                 u64 root_objectid, u64 ref_generation,
610                                 u64 owner, u64 owner_offset)
611 {
612         struct btrfs_path *path;
613         int ret;
614         struct btrfs_key key;
615         struct extent_buffer *l;
616         struct btrfs_extent_item *item;
617         u32 refs;
618
619         WARN_ON(num_bytes < root->sectorsize);
620         path = btrfs_alloc_path();
621         if (!path)
622                 return -ENOMEM;
623
624         key.objectid = bytenr;
625         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
626         key.offset = num_bytes;
627         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
628                                 0, 1);
629         if (ret < 0)
630                 return ret;
631         if (ret != 0) {
632                 BUG();
633         }
634         BUG_ON(ret != 0);
635         l = path->nodes[0];
636         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
637         refs = btrfs_extent_refs(l, item);
638         btrfs_set_extent_refs(l, item, refs + 1);
639         btrfs_mark_buffer_dirty(path->nodes[0]);
640
641         btrfs_release_path(root->fs_info->extent_root, path);
642
643         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
644                                           path, bytenr, root_objectid,
645                                           ref_generation, owner, owner_offset);
646         BUG_ON(ret);
647         finish_current_insert(trans, root->fs_info->extent_root);
648         del_pending_extents(trans, root->fs_info->extent_root);
649
650         btrfs_free_path(path);
651         return 0;
652 }
653
654 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
655                          struct btrfs_root *root)
656 {
657         finish_current_insert(trans, root->fs_info->extent_root);
658         del_pending_extents(trans, root->fs_info->extent_root);
659         return 0;
660 }
661
662 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
663                              struct btrfs_root *root, u64 bytenr,
664                              u64 num_bytes, u32 *refs)
665 {
666         struct btrfs_path *path;
667         int ret;
668         struct btrfs_key key;
669         struct extent_buffer *l;
670         struct btrfs_extent_item *item;
671
672         WARN_ON(num_bytes < root->sectorsize);
673         path = btrfs_alloc_path();
674         key.objectid = bytenr;
675         key.offset = num_bytes;
676         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
677         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
678                                 0, 0);
679         if (ret < 0)
680                 goto out;
681         if (ret != 0) {
682                 btrfs_print_leaf(root, path->nodes[0]);
683                 printk("failed to find block number %Lu\n", bytenr);
684                 BUG();
685         }
686         l = path->nodes[0];
687         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
688         *refs = btrfs_extent_refs(l, item);
689 out:
690         btrfs_free_path(path);
691         return 0;
692 }
693
694 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
695                                   struct btrfs_path *count_path,
696                                   u64 first_extent)
697 {
698         struct btrfs_root *extent_root = root->fs_info->extent_root;
699         struct btrfs_path *path;
700         u64 bytenr;
701         u64 found_objectid;
702         u64 root_objectid = root->root_key.objectid;
703         u32 total_count = 0;
704         u32 cur_count;
705         u32 refs;
706         u32 nritems;
707         int ret;
708         struct btrfs_key key;
709         struct btrfs_key found_key;
710         struct extent_buffer *l;
711         struct btrfs_extent_item *item;
712         struct btrfs_extent_ref *ref_item;
713         int level = -1;
714
715         path = btrfs_alloc_path();
716 again:
717         if (level == -1)
718                 bytenr = first_extent;
719         else
720                 bytenr = count_path->nodes[level]->start;
721
722         cur_count = 0;
723         key.objectid = bytenr;
724         key.offset = 0;
725
726         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
727         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
728         if (ret < 0)
729                 goto out;
730         BUG_ON(ret == 0);
731
732         l = path->nodes[0];
733         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
734
735         if (found_key.objectid != bytenr ||
736             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
737                 goto out;
738         }
739
740         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
741         refs = btrfs_extent_refs(l, item);
742         while (1) {
743                 nritems = btrfs_header_nritems(l);
744                 if (path->slots[0] >= nritems) {
745                         ret = btrfs_next_leaf(extent_root, path);
746                         if (ret == 0)
747                                 continue;
748                         break;
749                 }
750                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
751                 if (found_key.objectid != bytenr)
752                         break;
753                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
754                         path->slots[0]++;
755                         continue;
756                 }
757
758                 cur_count++;
759                 ref_item = btrfs_item_ptr(l, path->slots[0],
760                                           struct btrfs_extent_ref);
761                 found_objectid = btrfs_ref_root(l, ref_item);
762
763                 if (found_objectid != root_objectid) {
764                         total_count = 2;
765                         goto out;
766                 }
767                 total_count = 1;
768                 path->slots[0]++;
769         }
770         if (cur_count == 0) {
771                 total_count = 0;
772                 goto out;
773         }
774         if (level >= 0 && root->node == count_path->nodes[level])
775                 goto out;
776         level++;
777         btrfs_release_path(root, path);
778         goto again;
779
780 out:
781         btrfs_free_path(path);
782         return total_count;
783 }
784 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
785                        struct btrfs_root *root, u64 owner_objectid)
786 {
787         u64 generation;
788         u64 key_objectid;
789         u64 level;
790         u32 nritems;
791         struct btrfs_disk_key disk_key;
792
793         level = btrfs_header_level(root->node);
794         generation = trans->transid;
795         nritems = btrfs_header_nritems(root->node);
796         if (nritems > 0) {
797                 if (level == 0)
798                         btrfs_item_key(root->node, &disk_key, 0);
799                 else
800                         btrfs_node_key(root->node, &disk_key, 0);
801                 key_objectid = btrfs_disk_key_objectid(&disk_key);
802         } else {
803                 key_objectid = 0;
804         }
805         return btrfs_inc_extent_ref(trans, root, root->node->start,
806                                     root->node->len, owner_objectid,
807                                     generation, level, key_objectid);
808 }
809
810 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
811                   struct extent_buffer *buf)
812 {
813         u64 bytenr;
814         u32 nritems;
815         struct btrfs_key key;
816         struct btrfs_file_extent_item *fi;
817         int i;
818         int level;
819         int ret;
820         int faili;
821
822         if (!root->ref_cows)
823                 return 0;
824
825         level = btrfs_header_level(buf);
826         nritems = btrfs_header_nritems(buf);
827         for (i = 0; i < nritems; i++) {
828                 if (level == 0) {
829                         u64 disk_bytenr;
830                         btrfs_item_key_to_cpu(buf, &key, i);
831                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
832                                 continue;
833                         fi = btrfs_item_ptr(buf, i,
834                                             struct btrfs_file_extent_item);
835                         if (btrfs_file_extent_type(buf, fi) ==
836                             BTRFS_FILE_EXTENT_INLINE)
837                                 continue;
838                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
839                         if (disk_bytenr == 0)
840                                 continue;
841                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
842                                     btrfs_file_extent_disk_num_bytes(buf, fi),
843                                     root->root_key.objectid, trans->transid,
844                                     key.objectid, key.offset);
845                         if (ret) {
846                                 faili = i;
847                                 goto fail;
848                         }
849                 } else {
850                         bytenr = btrfs_node_blockptr(buf, i);
851                         btrfs_node_key_to_cpu(buf, &key, i);
852                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
853                                            btrfs_level_size(root, level - 1),
854                                            root->root_key.objectid,
855                                            trans->transid,
856                                            level - 1, key.objectid);
857                         if (ret) {
858                                 faili = i;
859                                 goto fail;
860                         }
861                 }
862         }
863         return 0;
864 fail:
865         WARN_ON(1);
866 #if 0
867         for (i =0; i < faili; i++) {
868                 if (level == 0) {
869                         u64 disk_bytenr;
870                         btrfs_item_key_to_cpu(buf, &key, i);
871                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
872                                 continue;
873                         fi = btrfs_item_ptr(buf, i,
874                                             struct btrfs_file_extent_item);
875                         if (btrfs_file_extent_type(buf, fi) ==
876                             BTRFS_FILE_EXTENT_INLINE)
877                                 continue;
878                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
879                         if (disk_bytenr == 0)
880                                 continue;
881                         err = btrfs_free_extent(trans, root, disk_bytenr,
882                                     btrfs_file_extent_disk_num_bytes(buf,
883                                                                       fi), 0);
884                         BUG_ON(err);
885                 } else {
886                         bytenr = btrfs_node_blockptr(buf, i);
887                         err = btrfs_free_extent(trans, root, bytenr,
888                                         btrfs_level_size(root, level - 1), 0);
889                         BUG_ON(err);
890                 }
891         }
892 #endif
893         return ret;
894 }
895
896 static int write_one_cache_group(struct btrfs_trans_handle *trans,
897                                  struct btrfs_root *root,
898                                  struct btrfs_path *path,
899                                  struct btrfs_block_group_cache *cache)
900 {
901         int ret;
902         int pending_ret;
903         struct btrfs_root *extent_root = root->fs_info->extent_root;
904         unsigned long bi;
905         struct extent_buffer *leaf;
906
907         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
908         if (ret < 0)
909                 goto fail;
910         BUG_ON(ret);
911
912         leaf = path->nodes[0];
913         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
914         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
915         btrfs_mark_buffer_dirty(leaf);
916         btrfs_release_path(extent_root, path);
917 fail:
918         finish_current_insert(trans, extent_root);
919         pending_ret = del_pending_extents(trans, extent_root);
920         if (ret)
921                 return ret;
922         if (pending_ret)
923                 return pending_ret;
924         return 0;
925
926 }
927
928 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
929                                    struct btrfs_root *root)
930 {
931         struct extent_io_tree *block_group_cache;
932         struct btrfs_block_group_cache *cache;
933         int ret;
934         int err = 0;
935         int werr = 0;
936         struct btrfs_path *path;
937         u64 last = 0;
938         u64 start;
939         u64 end;
940         u64 ptr;
941
942         block_group_cache = &root->fs_info->block_group_cache;
943         path = btrfs_alloc_path();
944         if (!path)
945                 return -ENOMEM;
946
947         while(1) {
948                 ret = find_first_extent_bit(block_group_cache, last,
949                                             &start, &end, BLOCK_GROUP_DIRTY);
950                 if (ret)
951                         break;
952
953                 last = end + 1;
954                 ret = get_state_private(block_group_cache, start, &ptr);
955                 if (ret)
956                         break;
957
958                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
959                 err = write_one_cache_group(trans, root,
960                                             path, cache);
961                 /*
962                  * if we fail to write the cache group, we want
963                  * to keep it marked dirty in hopes that a later
964                  * write will work
965                  */
966                 if (err) {
967                         werr = err;
968                         continue;
969                 }
970                 clear_extent_bits(block_group_cache, start, end,
971                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
972         }
973         btrfs_free_path(path);
974         return werr;
975 }
976
977 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
978                                                   u64 flags)
979 {
980         struct list_head *head = &info->space_info;
981         struct list_head *cur;
982         struct btrfs_space_info *found;
983         list_for_each(cur, head) {
984                 found = list_entry(cur, struct btrfs_space_info, list);
985                 if (found->flags == flags)
986                         return found;
987         }
988         return NULL;
989
990 }
991
992 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
993                           struct btrfs_root *extent_root, u64 alloc_bytes,
994                           u64 flags)
995 {
996         struct btrfs_space_info *space_info;
997         u64 thresh;
998         u64 start;
999         u64 num_bytes;
1000         int ret;
1001
1002         space_info = __find_space_info(extent_root->fs_info, flags);
1003         BUG_ON(!space_info);
1004
1005         if (space_info->full)
1006                 return 0;
1007
1008         thresh = div_factor(space_info->total_bytes, 7);
1009         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1010             thresh)
1011                 return 0;
1012
1013         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1014         if (ret == -ENOSPC) {
1015 printk("space info full %Lu\n", flags);
1016                 space_info->full = 1;
1017                 return 0;
1018         }
1019
1020         BUG_ON(ret);
1021
1022         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1023                      extent_root->fs_info->chunk_root->root_key.objectid,
1024                      start, num_bytes);
1025         BUG_ON(ret);
1026         return 0;
1027 }
1028
1029 static int update_block_group(struct btrfs_trans_handle *trans,
1030                               struct btrfs_root *root,
1031                               u64 bytenr, u64 num_bytes, int alloc,
1032                               int mark_free)
1033 {
1034         struct btrfs_block_group_cache *cache;
1035         struct btrfs_fs_info *info = root->fs_info;
1036         u64 total = num_bytes;
1037         u64 old_val;
1038         u64 byte_in_group;
1039         u64 start;
1040         u64 end;
1041
1042         while(total) {
1043                 cache = btrfs_lookup_block_group(info, bytenr);
1044                 if (!cache) {
1045                         return -1;
1046                 }
1047                 byte_in_group = bytenr - cache->key.objectid;
1048                 WARN_ON(byte_in_group > cache->key.offset);
1049                 start = cache->key.objectid;
1050                 end = start + cache->key.offset - 1;
1051                 set_extent_bits(&info->block_group_cache, start, end,
1052                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1053
1054                 old_val = btrfs_block_group_used(&cache->item);
1055                 num_bytes = min(total, cache->key.offset - byte_in_group);
1056                 if (alloc) {
1057                         old_val += num_bytes;
1058                         cache->space_info->bytes_used += num_bytes;
1059                 } else {
1060                         old_val -= num_bytes;
1061                         cache->space_info->bytes_used -= num_bytes;
1062                         if (mark_free) {
1063                                 set_extent_dirty(&info->free_space_cache,
1064                                                  bytenr, bytenr + num_bytes - 1,
1065                                                  GFP_NOFS);
1066                         }
1067                 }
1068                 btrfs_set_block_group_used(&cache->item, old_val);
1069                 total -= num_bytes;
1070                 bytenr += num_bytes;
1071         }
1072         return 0;
1073 }
1074
1075 static int update_pinned_extents(struct btrfs_root *root,
1076                                 u64 bytenr, u64 num, int pin)
1077 {
1078         u64 len;
1079         struct btrfs_block_group_cache *cache;
1080         struct btrfs_fs_info *fs_info = root->fs_info;
1081
1082         if (pin) {
1083                 set_extent_dirty(&fs_info->pinned_extents,
1084                                 bytenr, bytenr + num - 1, GFP_NOFS);
1085         } else {
1086                 clear_extent_dirty(&fs_info->pinned_extents,
1087                                 bytenr, bytenr + num - 1, GFP_NOFS);
1088         }
1089         while (num > 0) {
1090                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1091                 WARN_ON(!cache);
1092                 len = min(num, cache->key.offset -
1093                           (bytenr - cache->key.objectid));
1094                 if (pin) {
1095                         cache->pinned += len;
1096                         cache->space_info->bytes_pinned += len;
1097                         fs_info->total_pinned += len;
1098                 } else {
1099                         cache->pinned -= len;
1100                         cache->space_info->bytes_pinned -= len;
1101                         fs_info->total_pinned -= len;
1102                 }
1103                 bytenr += len;
1104                 num -= len;
1105         }
1106         return 0;
1107 }
1108
1109 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1110 {
1111         u64 last = 0;
1112         u64 start;
1113         u64 end;
1114         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1115         int ret;
1116
1117         while(1) {
1118                 ret = find_first_extent_bit(pinned_extents, last,
1119                                             &start, &end, EXTENT_DIRTY);
1120                 if (ret)
1121                         break;
1122                 set_extent_dirty(copy, start, end, GFP_NOFS);
1123                 last = end + 1;
1124         }
1125         return 0;
1126 }
1127
1128 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1129                                struct btrfs_root *root,
1130                                struct extent_io_tree *unpin)
1131 {
1132         u64 start;
1133         u64 end;
1134         int ret;
1135         struct extent_io_tree *free_space_cache;
1136         free_space_cache = &root->fs_info->free_space_cache;
1137
1138         while(1) {
1139                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1140                                             EXTENT_DIRTY);
1141                 if (ret)
1142                         break;
1143                 update_pinned_extents(root, start, end + 1 - start, 0);
1144                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1145                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1146         }
1147         return 0;
1148 }
1149
1150 static int finish_current_insert(struct btrfs_trans_handle *trans,
1151                                  struct btrfs_root *extent_root)
1152 {
1153         u64 start;
1154         u64 end;
1155         struct btrfs_fs_info *info = extent_root->fs_info;
1156         struct extent_buffer *eb;
1157         struct btrfs_path *path;
1158         struct btrfs_key ins;
1159         struct btrfs_disk_key first;
1160         struct btrfs_extent_item extent_item;
1161         int ret;
1162         int level;
1163         int err = 0;
1164
1165         btrfs_set_stack_extent_refs(&extent_item, 1);
1166         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1167         path = btrfs_alloc_path();
1168
1169         while(1) {
1170                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1171                                             &end, EXTENT_LOCKED);
1172                 if (ret)
1173                         break;
1174
1175                 ins.objectid = start;
1176                 ins.offset = end + 1 - start;
1177                 err = btrfs_insert_item(trans, extent_root, &ins,
1178                                         &extent_item, sizeof(extent_item));
1179                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1180                                   GFP_NOFS);
1181                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1182                 level = btrfs_header_level(eb);
1183                 if (level == 0) {
1184                         btrfs_item_key(eb, &first, 0);
1185                 } else {
1186                         btrfs_node_key(eb, &first, 0);
1187                 }
1188                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1189                                           start, extent_root->root_key.objectid,
1190                                           0, level,
1191                                           btrfs_disk_key_objectid(&first));
1192                 BUG_ON(err);
1193                 free_extent_buffer(eb);
1194         }
1195         btrfs_free_path(path);
1196         return 0;
1197 }
1198
1199 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1200                           int pending)
1201 {
1202         int err = 0;
1203         struct extent_buffer *buf;
1204
1205         if (!pending) {
1206                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1207                 if (buf) {
1208                         if (btrfs_buffer_uptodate(buf)) {
1209                                 u64 transid =
1210                                     root->fs_info->running_transaction->transid;
1211                                 if (btrfs_header_generation(buf) == transid) {
1212                                         free_extent_buffer(buf);
1213                                         return 1;
1214                                 }
1215                         }
1216                         free_extent_buffer(buf);
1217                 }
1218                 update_pinned_extents(root, bytenr, num_bytes, 1);
1219         } else {
1220                 set_extent_bits(&root->fs_info->pending_del,
1221                                 bytenr, bytenr + num_bytes - 1,
1222                                 EXTENT_LOCKED, GFP_NOFS);
1223         }
1224         BUG_ON(err < 0);
1225         return 0;
1226 }
1227
1228 /*
1229  * remove an extent from the root, returns 0 on success
1230  */
1231 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1232                          *root, u64 bytenr, u64 num_bytes,
1233                          u64 root_objectid, u64 ref_generation,
1234                          u64 owner_objectid, u64 owner_offset, int pin,
1235                          int mark_free)
1236 {
1237         struct btrfs_path *path;
1238         struct btrfs_key key;
1239         struct btrfs_fs_info *info = root->fs_info;
1240         struct btrfs_extent_ops *ops = info->extent_ops;
1241         struct btrfs_root *extent_root = info->extent_root;
1242         struct extent_buffer *leaf;
1243         int ret;
1244         int extent_slot = 0;
1245         int found_extent = 0;
1246         int num_to_del = 1;
1247         struct btrfs_extent_item *ei;
1248         u32 refs;
1249
1250         key.objectid = bytenr;
1251         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1252         key.offset = num_bytes;
1253
1254         path = btrfs_alloc_path();
1255         if (!path)
1256                 return -ENOMEM;
1257
1258         ret = lookup_extent_backref(trans, extent_root, path,
1259                                     bytenr, root_objectid,
1260                                     ref_generation,
1261                                     owner_objectid, owner_offset, 1);
1262         if (ret == 0) {
1263                 struct btrfs_key found_key;
1264                 extent_slot = path->slots[0];
1265                 while(extent_slot > 0) {
1266                         extent_slot--;
1267                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1268                                               extent_slot);
1269                         if (found_key.objectid != bytenr)
1270                                 break;
1271                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1272                             found_key.offset == num_bytes) {
1273                                 found_extent = 1;
1274                                 break;
1275                         }
1276                         if (path->slots[0] - extent_slot > 5)
1277                                 break;
1278                 }
1279                 if (!found_extent)
1280                         ret = btrfs_del_item(trans, extent_root, path);
1281         } else {
1282                 btrfs_print_leaf(extent_root, path->nodes[0]);
1283                 WARN_ON(1);
1284                 printk("Unable to find ref byte nr %Lu root %Lu "
1285                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1286                        root_objectid, ref_generation, owner_objectid,
1287                        owner_offset);
1288         }
1289         if (!found_extent) {
1290                 btrfs_release_path(extent_root, path);
1291                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1292                 if (ret < 0)
1293                         return ret;
1294                 BUG_ON(ret);
1295                 extent_slot = path->slots[0];
1296         }
1297
1298         leaf = path->nodes[0];
1299         ei = btrfs_item_ptr(leaf, extent_slot,
1300                             struct btrfs_extent_item);
1301         refs = btrfs_extent_refs(leaf, ei);
1302         BUG_ON(refs == 0);
1303         refs -= 1;
1304         btrfs_set_extent_refs(leaf, ei, refs);
1305
1306         btrfs_mark_buffer_dirty(leaf);
1307
1308         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1309                 /* if the back ref and the extent are next to each other
1310                  * they get deleted below in one shot
1311                  */
1312                 path->slots[0] = extent_slot;
1313                 num_to_del = 2;
1314         } else if (found_extent) {
1315                 /* otherwise delete the extent back ref */
1316                 ret = btrfs_del_item(trans, extent_root, path);
1317                 BUG_ON(ret);
1318                 /* if refs are 0, we need to setup the path for deletion */
1319                 if (refs == 0) {
1320                         btrfs_release_path(extent_root, path);
1321                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1322                                                 -1, 1);
1323                         if (ret < 0)
1324                                 return ret;
1325                         BUG_ON(ret);
1326                 }
1327         }
1328
1329         if (refs == 0) {
1330                 u64 super_used;
1331                 u64 root_used;
1332
1333                 if (pin) {
1334                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1335                         if (ret > 0)
1336                                 mark_free = 1;
1337                         BUG_ON(ret < 0);
1338                 }
1339
1340                 /* block accounting for super block */
1341                 super_used = btrfs_super_bytes_used(&info->super_copy);
1342                 btrfs_set_super_bytes_used(&info->super_copy,
1343                                            super_used - num_bytes);
1344
1345                 /* block accounting for root item */
1346                 root_used = btrfs_root_used(&root->root_item);
1347                 btrfs_set_root_used(&root->root_item,
1348                                            root_used - num_bytes);
1349                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1350                                       num_to_del);
1351                 if (ret)
1352                         return ret;
1353
1354                 if (ops && ops->free_extent)
1355                         ops->free_extent(root, bytenr, num_bytes);
1356
1357                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1358                                          mark_free);
1359                 BUG_ON(ret);
1360         }
1361         btrfs_free_path(path);
1362         finish_current_insert(trans, extent_root);
1363         return ret;
1364 }
1365
1366 /*
1367  * find all the blocks marked as pending in the radix tree and remove
1368  * them from the extent map
1369  */
1370 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1371                                btrfs_root *extent_root)
1372 {
1373         int ret;
1374         int err = 0;
1375         u64 start;
1376         u64 end;
1377         struct extent_io_tree *pending_del;
1378         struct extent_io_tree *pinned_extents;
1379
1380         pending_del = &extent_root->fs_info->pending_del;
1381         pinned_extents = &extent_root->fs_info->pinned_extents;
1382
1383         while(1) {
1384                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1385                                             EXTENT_LOCKED);
1386                 if (ret)
1387                         break;
1388                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1389                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1390                                   GFP_NOFS);
1391                 ret = __free_extent(trans, extent_root,
1392                                      start, end + 1 - start,
1393                                      extent_root->root_key.objectid,
1394                                      0, 0, 0, 0, 0);
1395                 if (ret)
1396                         err = ret;
1397         }
1398         return err;
1399 }
1400
1401 /*
1402  * remove an extent from the root, returns 0 on success
1403  */
1404 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1405                       *root, u64 bytenr, u64 num_bytes,
1406                       u64 root_objectid, u64 ref_generation,
1407                       u64 owner_objectid, u64 owner_offset, int pin)
1408 {
1409         struct btrfs_root *extent_root = root->fs_info->extent_root;
1410         int pending_ret;
1411         int ret;
1412
1413         WARN_ON(num_bytes < root->sectorsize);
1414         if (!root->ref_cows)
1415                 ref_generation = 0;
1416
1417         if (root == extent_root) {
1418                 pin_down_bytes(root, bytenr, num_bytes, 1);
1419                 return 0;
1420         }
1421         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1422                             ref_generation, owner_objectid, owner_offset,
1423                             pin, pin == 0);
1424         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1425         return ret ? ret : pending_ret;
1426 }
1427
1428 static u64 stripe_align(struct btrfs_root *root, u64 val)
1429 {
1430         u64 mask = ((u64)root->stripesize - 1);
1431         u64 ret = (val + mask) & ~mask;
1432         return ret;
1433 }
1434
1435 /*
1436  * walks the btree of allocated extents and find a hole of a given size.
1437  * The key ins is changed to record the hole:
1438  * ins->objectid == block start
1439  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1440  * ins->offset == number of blocks
1441  * Any available blocks before search_start are skipped.
1442  */
1443 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1444                                      struct btrfs_root *orig_root,
1445                                      u64 num_bytes, u64 empty_size,
1446                                      u64 search_start, u64 search_end,
1447                                      u64 hint_byte, struct btrfs_key *ins,
1448                                      u64 exclude_start, u64 exclude_nr,
1449                                      int data)
1450 {
1451         int ret;
1452         u64 orig_search_start = search_start;
1453         struct btrfs_root * root = orig_root->fs_info->extent_root;
1454         struct btrfs_fs_info *info = root->fs_info;
1455         u64 total_needed = num_bytes;
1456         struct btrfs_block_group_cache *block_group;
1457         int full_scan = 0;
1458         int wrapped = 0;
1459
1460         WARN_ON(num_bytes < root->sectorsize);
1461         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1462
1463         if (search_end == (u64)-1)
1464                 search_end = btrfs_super_total_bytes(&info->super_copy);
1465
1466         if (hint_byte) {
1467                 block_group = btrfs_lookup_block_group(info, hint_byte);
1468                 if (!block_group)
1469                         hint_byte = search_start;
1470                 block_group = btrfs_find_block_group(root, block_group,
1471                                                      hint_byte, data, 1);
1472         } else {
1473                 block_group = btrfs_find_block_group(root,
1474                                                      trans->block_group,
1475                                                      search_start, data, 1);
1476         }
1477
1478         total_needed += empty_size;
1479
1480 check_failed:
1481         if (!block_group) {
1482                 block_group = btrfs_lookup_block_group(info, search_start);
1483                 if (!block_group)
1484                         block_group = btrfs_lookup_block_group(info,
1485                                                        orig_search_start);
1486         }
1487         ret = find_search_start(root, &block_group, &search_start,
1488                                 total_needed, data);
1489         if (ret)
1490                 goto error;
1491
1492         search_start = stripe_align(root, search_start);
1493         ins->objectid = search_start;
1494         ins->offset = num_bytes;
1495
1496         if (ins->objectid + num_bytes >= search_end)
1497                 goto enospc;
1498
1499         if (ins->objectid + num_bytes >
1500             block_group->key.objectid + block_group->key.offset) {
1501                 search_start = block_group->key.objectid +
1502                         block_group->key.offset;
1503                 goto new_group;
1504         }
1505
1506         if (test_range_bit(&info->extent_ins, ins->objectid,
1507                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1508                 search_start = ins->objectid + num_bytes;
1509                 goto new_group;
1510         }
1511
1512         if (test_range_bit(&info->pinned_extents, ins->objectid,
1513                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1514                 search_start = ins->objectid + num_bytes;
1515                 goto new_group;
1516         }
1517
1518         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1519             ins->objectid < exclude_start + exclude_nr)) {
1520                 search_start = exclude_start + exclude_nr;
1521                 goto new_group;
1522         }
1523
1524         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1525                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1526                 if (block_group)
1527                         trans->block_group = block_group;
1528         }
1529         ins->offset = num_bytes;
1530         return 0;
1531
1532 new_group:
1533         if (search_start + num_bytes >= search_end) {
1534 enospc:
1535                 search_start = orig_search_start;
1536                 if (full_scan) {
1537                         ret = -ENOSPC;
1538                         goto error;
1539                 }
1540                 if (wrapped) {
1541                         if (!full_scan)
1542                                 total_needed -= empty_size;
1543                         full_scan = 1;
1544                 } else
1545                         wrapped = 1;
1546         }
1547         block_group = btrfs_lookup_block_group(info, search_start);
1548         cond_resched();
1549         block_group = btrfs_find_block_group(root, block_group,
1550                                              search_start, data, 0);
1551         goto check_failed;
1552
1553 error:
1554         return ret;
1555 }
1556 /*
1557  * finds a free extent and does all the dirty work required for allocation
1558  * returns the key for the extent through ins, and a tree buffer for
1559  * the first block of the extent through buf.
1560  *
1561  * returns 0 if everything worked, non-zero otherwise.
1562  */
1563 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1564                        struct btrfs_root *root,
1565                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1566                        u64 owner, u64 owner_offset,
1567                        u64 empty_size, u64 hint_byte,
1568                        u64 search_end, struct btrfs_key *ins, int data)
1569 {
1570         int ret;
1571         int pending_ret;
1572         u64 super_used, root_used;
1573         u64 search_start = 0;
1574         struct btrfs_fs_info *info = root->fs_info;
1575         struct btrfs_extent_ops *ops = info->extent_ops;
1576         u32 sizes[2];
1577         struct btrfs_root *extent_root = info->extent_root;
1578         struct btrfs_path *path;
1579         struct btrfs_extent_item *extent_item;
1580         struct btrfs_extent_ref *ref;
1581         struct btrfs_key keys[2];
1582
1583         if (data) {
1584                 data = BTRFS_BLOCK_GROUP_DATA;
1585         } else if (root == root->fs_info->chunk_root ||
1586                    info->force_system_allocs) {
1587                 data = BTRFS_BLOCK_GROUP_SYSTEM;
1588         } else {
1589                 data = BTRFS_BLOCK_GROUP_METADATA;
1590         }
1591
1592         if (root->ref_cows) {
1593                 if (data != BTRFS_BLOCK_GROUP_METADATA) {
1594                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1595                                              num_bytes,
1596                                              BTRFS_BLOCK_GROUP_METADATA);
1597                         BUG_ON(ret);
1598                 }
1599                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1600                                      num_bytes, data);
1601                 BUG_ON(ret);
1602         }
1603
1604         WARN_ON(num_bytes < root->sectorsize);
1605         if (ops && ops->alloc_extent) {
1606                 ret = ops->alloc_extent(root, num_bytes, hint_byte, ins);
1607         } else {
1608                 ret = find_free_extent(trans, root, num_bytes, empty_size,
1609                                         search_start, search_end, hint_byte,
1610                                         ins, trans->alloc_exclude_start,
1611                                         trans->alloc_exclude_nr, data);
1612         }
1613         BUG_ON(ret);
1614         if (ret)
1615                 return ret;
1616
1617         /* block accounting for super block */
1618         super_used = btrfs_super_bytes_used(&info->super_copy);
1619         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1620
1621         /* block accounting for root item */
1622         root_used = btrfs_root_used(&root->root_item);
1623         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1624
1625         clear_extent_dirty(&root->fs_info->free_space_cache,
1626                            ins->objectid, ins->objectid + ins->offset - 1,
1627                            GFP_NOFS);
1628
1629         if (root == extent_root) {
1630                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1631                                 ins->objectid + ins->offset - 1,
1632                                 EXTENT_LOCKED, GFP_NOFS);
1633                 goto update_block;
1634         }
1635
1636         WARN_ON(trans->alloc_exclude_nr);
1637         trans->alloc_exclude_start = ins->objectid;
1638         trans->alloc_exclude_nr = ins->offset;
1639
1640         memcpy(&keys[0], ins, sizeof(*ins));
1641         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1642                                          owner, owner_offset);
1643         keys[1].objectid = ins->objectid;
1644         keys[1].type = BTRFS_EXTENT_REF_KEY;
1645         sizes[0] = sizeof(*extent_item);
1646         sizes[1] = sizeof(*ref);
1647
1648         path = btrfs_alloc_path();
1649         BUG_ON(!path);
1650
1651         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1652                                        sizes, 2);
1653
1654         BUG_ON(ret);
1655         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1656                                      struct btrfs_extent_item);
1657         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1658         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1659                              struct btrfs_extent_ref);
1660
1661         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1662         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1663         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1664         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1665
1666         btrfs_mark_buffer_dirty(path->nodes[0]);
1667
1668         trans->alloc_exclude_start = 0;
1669         trans->alloc_exclude_nr = 0;
1670         btrfs_free_path(path);
1671         finish_current_insert(trans, extent_root);
1672         pending_ret = del_pending_extents(trans, extent_root);
1673
1674         if (ret) {
1675                 return ret;
1676         }
1677         if (pending_ret) {
1678                 return pending_ret;
1679         }
1680
1681 update_block:
1682         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1683         if (ret) {
1684                 printk("update block group failed for %Lu %Lu\n",
1685                        ins->objectid, ins->offset);
1686                 BUG();
1687         }
1688         return 0;
1689 }
1690
1691 /*
1692  * helper function to allocate a block for a given tree
1693  * returns the tree buffer or NULL.
1694  */
1695 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1696                                              struct btrfs_root *root,
1697                                              u32 blocksize,
1698                                              u64 root_objectid, u64 hint,
1699                                              u64 empty_size)
1700 {
1701         u64 ref_generation;
1702
1703         if (root->ref_cows)
1704                 ref_generation = trans->transid;
1705         else
1706                 ref_generation = 0;
1707
1708
1709         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1710                                         ref_generation, 0, 0, hint, empty_size);
1711 }
1712
1713 /*
1714  * helper function to allocate a block for a given tree
1715  * returns the tree buffer or NULL.
1716  */
1717 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1718                                              struct btrfs_root *root,
1719                                              u32 blocksize,
1720                                              u64 root_objectid,
1721                                              u64 ref_generation,
1722                                              u64 first_objectid,
1723                                              int level,
1724                                              u64 hint,
1725                                              u64 empty_size)
1726 {
1727         struct btrfs_key ins;
1728         int ret;
1729         struct extent_buffer *buf;
1730
1731         ret = btrfs_alloc_extent(trans, root, blocksize,
1732                                  root_objectid, ref_generation,
1733                                  level, first_objectid, empty_size, hint,
1734                                  (u64)-1, &ins, 0);
1735         if (ret) {
1736                 BUG_ON(ret > 0);
1737                 return ERR_PTR(ret);
1738         }
1739         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1740         if (!buf) {
1741                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1742                                   root->root_key.objectid, ref_generation,
1743                                   0, 0, 0);
1744                 BUG_ON(1);
1745                 return ERR_PTR(-ENOMEM);
1746         }
1747         btrfs_set_buffer_uptodate(buf);
1748         trans->blocks_used++;
1749         return buf;
1750 }
1751
1752 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1753                                   struct btrfs_root *root,
1754                                   struct extent_buffer *leaf)
1755 {
1756         u64 leaf_owner;
1757         u64 leaf_generation;
1758         struct btrfs_key key;
1759         struct btrfs_file_extent_item *fi;
1760         int i;
1761         int nritems;
1762         int ret;
1763
1764         BUG_ON(!btrfs_is_leaf(leaf));
1765         nritems = btrfs_header_nritems(leaf);
1766         leaf_owner = btrfs_header_owner(leaf);
1767         leaf_generation = btrfs_header_generation(leaf);
1768
1769         for (i = 0; i < nritems; i++) {
1770                 u64 disk_bytenr;
1771
1772                 btrfs_item_key_to_cpu(leaf, &key, i);
1773                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1774                         continue;
1775                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1776                 if (btrfs_file_extent_type(leaf, fi) ==
1777                     BTRFS_FILE_EXTENT_INLINE)
1778                         continue;
1779                 /*
1780                  * FIXME make sure to insert a trans record that
1781                  * repeats the snapshot del on crash
1782                  */
1783                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1784                 if (disk_bytenr == 0)
1785                         continue;
1786                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1787                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1788                                 leaf_owner, leaf_generation,
1789                                 key.objectid, key.offset, 0);
1790                 BUG_ON(ret);
1791         }
1792         return 0;
1793 }
1794
1795 static void noinline reada_walk_down(struct btrfs_root *root,
1796                                      struct extent_buffer *node,
1797                                      int slot)
1798 {
1799         u64 bytenr;
1800         u64 last = 0;
1801         u32 nritems;
1802         u32 refs;
1803         u32 blocksize;
1804         int ret;
1805         int i;
1806         int level;
1807         int skipped = 0;
1808
1809         nritems = btrfs_header_nritems(node);
1810         level = btrfs_header_level(node);
1811         if (level)
1812                 return;
1813
1814         for (i = slot; i < nritems && skipped < 32; i++) {
1815                 bytenr = btrfs_node_blockptr(node, i);
1816                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1817                              (last > bytenr && last - bytenr > 32 * 1024))) {
1818                         skipped++;
1819                         continue;
1820                 }
1821                 blocksize = btrfs_level_size(root, level - 1);
1822                 if (i != slot) {
1823                         ret = lookup_extent_ref(NULL, root, bytenr,
1824                                                 blocksize, &refs);
1825                         BUG_ON(ret);
1826                         if (refs != 1) {
1827                                 skipped++;
1828                                 continue;
1829                         }
1830                 }
1831                 mutex_unlock(&root->fs_info->fs_mutex);
1832                 ret = readahead_tree_block(root, bytenr, blocksize);
1833                 last = bytenr + blocksize;
1834                 cond_resched();
1835                 mutex_lock(&root->fs_info->fs_mutex);
1836                 if (ret)
1837                         break;
1838         }
1839 }
1840
1841 /*
1842  * helper function for drop_snapshot, this walks down the tree dropping ref
1843  * counts as it goes.
1844  */
1845 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1846                                    struct btrfs_root *root,
1847                                    struct btrfs_path *path, int *level)
1848 {
1849         u64 root_owner;
1850         u64 root_gen;
1851         u64 bytenr;
1852         struct extent_buffer *next;
1853         struct extent_buffer *cur;
1854         struct extent_buffer *parent;
1855         u32 blocksize;
1856         int ret;
1857         u32 refs;
1858
1859         WARN_ON(*level < 0);
1860         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1861         ret = lookup_extent_ref(trans, root,
1862                                 path->nodes[*level]->start,
1863                                 path->nodes[*level]->len, &refs);
1864         BUG_ON(ret);
1865         if (refs > 1)
1866                 goto out;
1867
1868         /*
1869          * walk down to the last node level and free all the leaves
1870          */
1871         while(*level >= 0) {
1872                 WARN_ON(*level < 0);
1873                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1874                 cur = path->nodes[*level];
1875
1876                 if (btrfs_header_level(cur) != *level)
1877                         WARN_ON(1);
1878
1879                 if (path->slots[*level] >=
1880                     btrfs_header_nritems(cur))
1881                         break;
1882                 if (*level == 0) {
1883                         ret = drop_leaf_ref(trans, root, cur);
1884                         BUG_ON(ret);
1885                         break;
1886                 }
1887                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1888                 blocksize = btrfs_level_size(root, *level - 1);
1889                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1890                 BUG_ON(ret);
1891                 if (refs != 1) {
1892                         parent = path->nodes[*level];
1893                         root_owner = btrfs_header_owner(parent);
1894                         root_gen = btrfs_header_generation(parent);
1895                         path->slots[*level]++;
1896                         ret = btrfs_free_extent(trans, root, bytenr,
1897                                                 blocksize, root_owner,
1898                                                 root_gen, 0, 0, 1);
1899                         BUG_ON(ret);
1900                         continue;
1901                 }
1902                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1903                 if (!next || !btrfs_buffer_uptodate(next)) {
1904                         free_extent_buffer(next);
1905                         reada_walk_down(root, cur, path->slots[*level]);
1906                         mutex_unlock(&root->fs_info->fs_mutex);
1907                         next = read_tree_block(root, bytenr, blocksize);
1908                         mutex_lock(&root->fs_info->fs_mutex);
1909
1910                         /* we dropped the lock, check one more time */
1911                         ret = lookup_extent_ref(trans, root, bytenr,
1912                                                 blocksize, &refs);
1913                         BUG_ON(ret);
1914                         if (refs != 1) {
1915                                 parent = path->nodes[*level];
1916                                 root_owner = btrfs_header_owner(parent);
1917                                 root_gen = btrfs_header_generation(parent);
1918
1919                                 path->slots[*level]++;
1920                                 free_extent_buffer(next);
1921                                 ret = btrfs_free_extent(trans, root, bytenr,
1922                                                         blocksize,
1923                                                         root_owner,
1924                                                         root_gen, 0, 0, 1);
1925                                 BUG_ON(ret);
1926                                 continue;
1927                         }
1928                 }
1929                 WARN_ON(*level <= 0);
1930                 if (path->nodes[*level-1])
1931                         free_extent_buffer(path->nodes[*level-1]);
1932                 path->nodes[*level-1] = next;
1933                 *level = btrfs_header_level(next);
1934                 path->slots[*level] = 0;
1935         }
1936 out:
1937         WARN_ON(*level < 0);
1938         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1939
1940         if (path->nodes[*level] == root->node) {
1941                 root_owner = root->root_key.objectid;
1942                 parent = path->nodes[*level];
1943         } else {
1944                 parent = path->nodes[*level + 1];
1945                 root_owner = btrfs_header_owner(parent);
1946         }
1947
1948         root_gen = btrfs_header_generation(parent);
1949         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1950                                 path->nodes[*level]->len,
1951                                 root_owner, root_gen, 0, 0, 1);
1952         free_extent_buffer(path->nodes[*level]);
1953         path->nodes[*level] = NULL;
1954         *level += 1;
1955         BUG_ON(ret);
1956         return 0;
1957 }
1958
1959 /*
1960  * helper for dropping snapshots.  This walks back up the tree in the path
1961  * to find the first node higher up where we haven't yet gone through
1962  * all the slots
1963  */
1964 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
1965                                  struct btrfs_root *root,
1966                                  struct btrfs_path *path, int *level)
1967 {
1968         u64 root_owner;
1969         u64 root_gen;
1970         struct btrfs_root_item *root_item = &root->root_item;
1971         int i;
1972         int slot;
1973         int ret;
1974
1975         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1976                 slot = path->slots[i];
1977                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1978                         struct extent_buffer *node;
1979                         struct btrfs_disk_key disk_key;
1980                         node = path->nodes[i];
1981                         path->slots[i]++;
1982                         *level = i;
1983                         WARN_ON(*level == 0);
1984                         btrfs_node_key(node, &disk_key, path->slots[i]);
1985                         memcpy(&root_item->drop_progress,
1986                                &disk_key, sizeof(disk_key));
1987                         root_item->drop_level = i;
1988                         return 0;
1989                 } else {
1990                         if (path->nodes[*level] == root->node) {
1991                                 root_owner = root->root_key.objectid;
1992                                 root_gen =
1993                                    btrfs_header_generation(path->nodes[*level]);
1994                         } else {
1995                                 struct extent_buffer *node;
1996                                 node = path->nodes[*level + 1];
1997                                 root_owner = btrfs_header_owner(node);
1998                                 root_gen = btrfs_header_generation(node);
1999                         }
2000                         ret = btrfs_free_extent(trans, root,
2001                                                 path->nodes[*level]->start,
2002                                                 path->nodes[*level]->len,
2003                                                 root_owner, root_gen, 0, 0, 1);
2004                         BUG_ON(ret);
2005                         free_extent_buffer(path->nodes[*level]);
2006                         path->nodes[*level] = NULL;
2007                         *level = i + 1;
2008                 }
2009         }
2010         return 1;
2011 }
2012
2013 /*
2014  * drop the reference count on the tree rooted at 'snap'.  This traverses
2015  * the tree freeing any blocks that have a ref count of zero after being
2016  * decremented.
2017  */
2018 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2019                         *root)
2020 {
2021         int ret = 0;
2022         int wret;
2023         int level;
2024         struct btrfs_path *path;
2025         int i;
2026         int orig_level;
2027         struct btrfs_root_item *root_item = &root->root_item;
2028
2029         path = btrfs_alloc_path();
2030         BUG_ON(!path);
2031
2032         level = btrfs_header_level(root->node);
2033         orig_level = level;
2034         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2035                 path->nodes[level] = root->node;
2036                 extent_buffer_get(root->node);
2037                 path->slots[level] = 0;
2038         } else {
2039                 struct btrfs_key key;
2040                 struct btrfs_disk_key found_key;
2041                 struct extent_buffer *node;
2042
2043                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2044                 level = root_item->drop_level;
2045                 path->lowest_level = level;
2046                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2047                 if (wret < 0) {
2048                         ret = wret;
2049                         goto out;
2050                 }
2051                 node = path->nodes[level];
2052                 btrfs_node_key(node, &found_key, path->slots[level]);
2053                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2054                                sizeof(found_key)));
2055         }
2056         while(1) {
2057                 wret = walk_down_tree(trans, root, path, &level);
2058                 if (wret < 0)
2059                         ret = wret;
2060                 if (wret != 0)
2061                         break;
2062
2063                 wret = walk_up_tree(trans, root, path, &level);
2064                 if (wret < 0)
2065                         ret = wret;
2066                 if (wret != 0)
2067                         break;
2068                 /*
2069                 ret = -EAGAIN;
2070                 break;
2071                 */
2072         }
2073         for (i = 0; i <= orig_level; i++) {
2074                 if (path->nodes[i]) {
2075                         free_extent_buffer(path->nodes[i]);
2076                         path->nodes[i] = NULL;
2077                 }
2078         }
2079 out:
2080         btrfs_free_path(path);
2081         return ret;
2082 }
2083
2084 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2085 {
2086         u64 start;
2087         u64 end;
2088         u64 ptr;
2089         int ret;
2090         while(1) {
2091                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2092                                             &start, &end, (unsigned int)-1);
2093                 if (ret)
2094                         break;
2095                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2096                 if (!ret)
2097                         kfree((void *)(unsigned long)ptr);
2098                 clear_extent_bits(&info->block_group_cache, start,
2099                                   end, (unsigned int)-1, GFP_NOFS);
2100         }
2101         while(1) {
2102                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2103                                             &start, &end, EXTENT_DIRTY);
2104                 if (ret)
2105                         break;
2106                 clear_extent_dirty(&info->free_space_cache, start,
2107                                    end, GFP_NOFS);
2108         }
2109         return 0;
2110 }
2111
2112 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2113                            struct btrfs_key *key)
2114 {
2115         int ret;
2116         struct btrfs_key found_key;
2117         struct extent_buffer *leaf;
2118         int slot;
2119
2120         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2121         if (ret < 0)
2122                 return ret;
2123         while(1) {
2124                 slot = path->slots[0];
2125                 leaf = path->nodes[0];
2126                 if (slot >= btrfs_header_nritems(leaf)) {
2127                         ret = btrfs_next_leaf(root, path);
2128                         if (ret == 0)
2129                                 continue;
2130                         if (ret < 0)
2131                                 goto error;
2132                         break;
2133                 }
2134                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2135
2136                 if (found_key.objectid >= key->objectid &&
2137                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2138                         return 0;
2139                 path->slots[0]++;
2140         }
2141         ret = -ENOENT;
2142 error:
2143         return ret;
2144 }
2145
2146 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2147                              u64 total_bytes, u64 bytes_used,
2148                              struct btrfs_space_info **space_info)
2149 {
2150         struct btrfs_space_info *found;
2151
2152         found = __find_space_info(info, flags);
2153         if (found) {
2154                 found->total_bytes += total_bytes;
2155                 found->bytes_used += bytes_used;
2156                 WARN_ON(found->total_bytes < found->bytes_used);
2157                 *space_info = found;
2158                 return 0;
2159         }
2160         found = kmalloc(sizeof(*found), GFP_NOFS);
2161         if (!found)
2162                 return -ENOMEM;
2163
2164         list_add(&found->list, &info->space_info);
2165         found->flags = flags;
2166         found->total_bytes = total_bytes;
2167         found->bytes_used = bytes_used;
2168         found->bytes_pinned = 0;
2169         found->full = 0;
2170         *space_info = found;
2171         return 0;
2172 }
2173
2174 int btrfs_read_block_groups(struct btrfs_root *root)
2175 {
2176         struct btrfs_path *path;
2177         int ret;
2178         int bit;
2179         struct btrfs_block_group_cache *cache;
2180         struct btrfs_fs_info *info = root->fs_info;
2181         struct btrfs_space_info *space_info;
2182         struct extent_io_tree *block_group_cache;
2183         struct btrfs_key key;
2184         struct btrfs_key found_key;
2185         struct extent_buffer *leaf;
2186
2187         block_group_cache = &info->block_group_cache;
2188
2189         root = info->extent_root;
2190         key.objectid = 0;
2191         key.offset = 0;
2192         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2193         path = btrfs_alloc_path();
2194         if (!path)
2195                 return -ENOMEM;
2196
2197         while(1) {
2198                 ret = find_first_block_group(root, path, &key);
2199                 if (ret > 0) {
2200                         ret = 0;
2201                         goto error;
2202                 }
2203                 if (ret != 0) {
2204                         goto error;
2205                 }
2206                 leaf = path->nodes[0];
2207                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2208                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2209                 if (!cache) {
2210                         ret = -ENOMEM;
2211                         break;
2212                 }
2213
2214                 read_extent_buffer(leaf, &cache->item,
2215                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2216                                    sizeof(cache->item));
2217                 memcpy(&cache->key, &found_key, sizeof(found_key));
2218                 cache->cached = 0;
2219                 cache->pinned = 0;
2220                 key.objectid = found_key.objectid + found_key.offset;
2221                 btrfs_release_path(root, path);
2222                 cache->flags = btrfs_block_group_flags(&cache->item);
2223                 bit = 0;
2224                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2225                         bit = BLOCK_GROUP_DATA;
2226                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2227                         bit = BLOCK_GROUP_SYSTEM;
2228                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2229                         bit = BLOCK_GROUP_METADATA;
2230                 }
2231
2232                 ret = update_space_info(info, cache->flags, found_key.offset,
2233                                         btrfs_block_group_used(&cache->item),
2234                                         &space_info);
2235                 BUG_ON(ret);
2236                 cache->space_info = space_info;
2237
2238                 /* use EXTENT_LOCKED to prevent merging */
2239                 set_extent_bits(block_group_cache, found_key.objectid,
2240                                 found_key.objectid + found_key.offset - 1,
2241                                 bit | EXTENT_LOCKED, GFP_NOFS);
2242                 set_state_private(block_group_cache, found_key.objectid,
2243                                   (unsigned long)cache);
2244
2245                 if (key.objectid >=
2246                     btrfs_super_total_bytes(&info->super_copy))
2247                         break;
2248         }
2249         ret = 0;
2250 error:
2251         btrfs_free_path(path);
2252         return ret;
2253 }
2254
2255 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2256                            struct btrfs_root *root, u64 bytes_used,
2257                            u64 type, u64 chunk_tree, u64 chunk_objectid,
2258                            u64 size)
2259 {
2260         int ret;
2261         int bit = 0;
2262         struct btrfs_root *extent_root;
2263         struct btrfs_block_group_cache *cache;
2264         struct extent_io_tree *block_group_cache;
2265
2266         extent_root = root->fs_info->extent_root;
2267         block_group_cache = &root->fs_info->block_group_cache;
2268
2269         cache = kmalloc(sizeof(*cache), GFP_NOFS);
2270         BUG_ON(!cache);
2271         cache->key.objectid = chunk_objectid;
2272         cache->key.offset = size;
2273         cache->cached = 0;
2274         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2275         memset(&cache->item, 0, sizeof(cache->item));
2276         btrfs_set_block_group_used(&cache->item, bytes_used);
2277         btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
2278         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2279         cache->flags = type;
2280         btrfs_set_block_group_flags(&cache->item, type);
2281
2282         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2283                                 &cache->space_info);
2284         BUG_ON(ret);
2285
2286         if (type & BTRFS_BLOCK_GROUP_DATA) {
2287                 bit = BLOCK_GROUP_DATA;
2288         } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2289                 bit = BLOCK_GROUP_SYSTEM;
2290         } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2291                 bit = BLOCK_GROUP_METADATA;
2292         }
2293         set_extent_bits(block_group_cache, chunk_objectid,
2294                         chunk_objectid + size - 1,
2295                         bit | EXTENT_LOCKED, GFP_NOFS);
2296         set_state_private(block_group_cache, chunk_objectid,
2297                           (unsigned long)cache);
2298
2299         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2300                                 sizeof(cache->item));
2301         BUG_ON(ret);
2302
2303         finish_current_insert(trans, extent_root);
2304         ret = del_pending_extents(trans, extent_root);
2305         BUG_ON(ret);
2306         return 0;
2307 }
2308
2309 u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation,
2310                           u64 owner, u64 owner_offset)
2311 {
2312         return hash_extent_ref(root_objectid, ref_generation,
2313                                owner, owner_offset);
2314 }
2315
2316 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
2317                              struct btrfs_root *root,
2318                              u64 bytenr, u64 num_bytes, int alloc,
2319                              int mark_free)
2320 {
2321         return update_block_group(trans, root, bytenr, num_bytes,
2322                                   alloc, mark_free);
2323 }