2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
24 #include <sys/types.h>
28 #include "kerncompat.h"
29 #include "radix-tree.h"
33 #include "transaction.h"
36 #include "print-tree.h"
38 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
41 struct btrfs_fs_devices *fs_devices;
44 if (buf->start != btrfs_header_bytenr(buf)) {
45 printk("Check tree block failed, want=%Lu, have=%Lu\n",
46 buf->start, btrfs_header_bytenr(buf));
50 fs_devices = root->fs_info->fs_devices;
52 if (!memcmp_extent_buffer(buf, fs_devices->fsid,
53 (unsigned long)btrfs_header_fsid(buf),
58 fs_devices = fs_devices->seed;
63 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
65 return crc32c(seed, data, len);
68 void btrfs_csum_final(u32 crc, char *result)
70 *(__le32 *)result = ~cpu_to_le32(crc);
73 int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
80 result = malloc(csum_size * sizeof(char));
84 len = buf->len - BTRFS_CSUM_SIZE;
85 crc = crc32c(crc, buf->data + BTRFS_CSUM_SIZE, len);
86 btrfs_csum_final(crc, result);
89 if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
90 printk("checksum verify failed on %llu found %08X "
91 "wanted %08X\n", (unsigned long long)buf->start,
92 *((u32 *)result), *((u32*)(char *)buf->data));
97 write_extent_buffer(buf, result, 0, csum_size);
103 int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
107 btrfs_super_csum_size(root->fs_info->super_copy);
108 return csum_tree_block_size(buf, csum_size, verify);
111 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
112 u64 bytenr, u32 blocksize)
114 return find_extent_buffer(&root->fs_info->extent_cache,
118 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
119 u64 bytenr, u32 blocksize)
121 return alloc_extent_buffer(&root->fs_info->extent_cache, bytenr,
125 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
129 struct extent_buffer *eb;
131 struct btrfs_multi_bio *multi = NULL;
132 struct btrfs_device *device;
134 eb = btrfs_find_tree_block(root, bytenr, blocksize);
135 if (eb && btrfs_buffer_uptodate(eb, parent_transid)) {
136 free_extent_buffer(eb);
141 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
142 bytenr, &length, &multi, 0, NULL);
144 device = multi->stripes[0].dev;
146 blocksize = min(blocksize, (u32)(64 * 1024));
147 readahead(device->fd, multi->stripes[0].physical, blocksize);
152 static int verify_parent_transid(struct extent_io_tree *io_tree,
153 struct extent_buffer *eb, u64 parent_transid,
158 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
161 if (extent_buffer_uptodate(eb) &&
162 btrfs_header_generation(eb) == parent_transid) {
166 printk("parent transid verify failed on %llu wanted %llu found %llu\n",
167 (unsigned long long)eb->start,
168 (unsigned long long)parent_transid,
169 (unsigned long long)btrfs_header_generation(eb));
171 printk("Ignoring transid failure\n");
177 clear_extent_buffer_uptodate(io_tree, eb);
183 static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
185 unsigned long offset = 0;
186 struct btrfs_multi_bio *multi = NULL;
187 struct btrfs_device *device;
190 unsigned long bytes_left = eb->len;
193 read_len = bytes_left;
194 ret = btrfs_map_block(&info->mapping_tree, READ,
195 eb->start + offset, &read_len, &multi,
198 printk("Couldn't map the block %Lu\n", eb->start + offset);
202 device = multi->stripes[0].dev;
204 if (device->fd == 0) {
211 eb->dev_bytenr = multi->stripes[0].physical;
215 if (read_len > bytes_left)
216 read_len = bytes_left;
218 ret = read_extent_from_disk(eb, offset, read_len);
222 bytes_left -= read_len;
227 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
228 u32 blocksize, u64 parent_transid)
231 struct extent_buffer *eb;
232 u64 best_transid = 0;
238 eb = btrfs_find_create_tree_block(root, bytenr, blocksize);
242 if (btrfs_buffer_uptodate(eb, parent_transid))
246 ret = read_whole_eb(root->fs_info, eb, mirror_num);
247 if (ret == 0 && check_tree_block(root, eb) == 0 &&
248 csum_tree_block(root, eb, 1) == 0 &&
249 verify_parent_transid(eb->tree, eb, parent_transid, ignore)
251 btrfs_set_buffer_uptodate(eb);
255 if (check_tree_block(root, eb))
256 printk("read block failed check_tree_block\n");
258 printk("Csum didn't match\n");
261 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
263 if (num_copies == 1) {
267 if (btrfs_header_generation(eb) > best_transid) {
268 best_transid = btrfs_header_generation(eb);
269 good_mirror = mirror_num;
272 if (mirror_num > num_copies) {
273 mirror_num = good_mirror;
278 free_extent_buffer(eb);
282 static int rmw_eb(struct btrfs_fs_info *info,
283 struct extent_buffer *eb, struct extent_buffer *orig_eb)
286 unsigned long orig_off = 0;
287 unsigned long dest_off = 0;
288 unsigned long copy_len = eb->len;
290 ret = read_whole_eb(info, eb, 0);
294 if (eb->start + eb->len <= orig_eb->start ||
295 eb->start >= orig_eb->start + orig_eb->len)
298 * | ----- orig_eb ------- |
299 * | ----- stripe ------- |
300 * | ----- orig_eb ------- |
301 * | ----- orig_eb ------- |
303 if (eb->start > orig_eb->start)
304 orig_off = eb->start - orig_eb->start;
305 if (orig_eb->start > eb->start)
306 dest_off = orig_eb->start - eb->start;
308 if (copy_len > orig_eb->len - orig_off)
309 copy_len = orig_eb->len - orig_off;
310 if (copy_len > eb->len - dest_off)
311 copy_len = eb->len - dest_off;
313 memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len);
317 static void split_eb_for_raid56(struct btrfs_fs_info *info,
318 struct extent_buffer *orig_eb,
319 struct extent_buffer **ebs,
320 u64 stripe_len, u64 *raid_map,
323 struct extent_buffer *eb;
324 u64 start = orig_eb->start;
329 for (i = 0; i < num_stripes; i++) {
330 if (raid_map[i] >= BTRFS_RAID5_P_STRIPE)
333 eb = malloc(sizeof(struct extent_buffer) + stripe_len);
336 memset(eb, 0, sizeof(struct extent_buffer) + stripe_len);
338 eb->start = raid_map[i];
339 eb->len = stripe_len;
343 eb->dev_bytenr = (u64)-1;
345 this_eb_start = raid_map[i];
347 if (start > this_eb_start ||
348 start + orig_eb->len < this_eb_start + stripe_len) {
349 ret = rmw_eb(info, eb, orig_eb);
352 memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len);
358 static int write_raid56_with_parity(struct btrfs_fs_info *info,
359 struct extent_buffer *eb,
360 struct btrfs_multi_bio *multi,
361 u64 stripe_len, u64 *raid_map)
363 struct extent_buffer *ebs[multi->num_stripes], *p_eb = NULL, *q_eb = NULL;
367 int alloc_size = eb->len;
369 if (stripe_len > alloc_size)
370 alloc_size = stripe_len;
372 split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map,
375 for (i = 0; i < multi->num_stripes; i++) {
376 struct extent_buffer *new_eb;
377 if (raid_map[i] < BTRFS_RAID5_P_STRIPE) {
378 ebs[i]->dev_bytenr = multi->stripes[i].physical;
379 ebs[i]->fd = multi->stripes[i].dev->fd;
380 multi->stripes[i].dev->total_ios++;
381 BUG_ON(ebs[i]->start != raid_map[i]);
384 new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS);
386 new_eb->dev_bytenr = multi->stripes[i].physical;
387 new_eb->fd = multi->stripes[i].dev->fd;
388 multi->stripes[i].dev->total_ios++;
389 new_eb->len = stripe_len;
391 if (raid_map[i] == BTRFS_RAID5_P_STRIPE)
393 else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE)
397 void *pointers[multi->num_stripes];
398 ebs[multi->num_stripes - 2] = p_eb;
399 ebs[multi->num_stripes - 1] = q_eb;
401 for (i = 0; i < multi->num_stripes; i++)
402 pointers[i] = ebs[i]->data;
404 raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers);
406 ebs[multi->num_stripes - 1] = p_eb;
407 memcpy(p_eb->data, ebs[0]->data, stripe_len);
408 for (j = 1; j < multi->num_stripes - 1; j++) {
409 for (i = 0; i < stripe_len; i += sizeof(unsigned long)) {
410 *(unsigned long *)(p_eb->data + i) ^=
411 *(unsigned long *)(ebs[j]->data + i);
416 for (i = 0; i < multi->num_stripes; i++) {
417 ret = write_extent_to_disk(ebs[i]);
425 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
426 struct extent_buffer *eb)
431 u64 *raid_map = NULL;
432 struct btrfs_multi_bio *multi = NULL;
434 if (check_tree_block(root, eb))
436 if (!btrfs_buffer_uptodate(eb, trans->transid))
439 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
440 csum_tree_block(root, eb, 0);
444 ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE,
445 eb->start, &length, &multi, 0, &raid_map);
448 ret = write_raid56_with_parity(root->fs_info, eb, multi,
451 } else while (dev_nr < multi->num_stripes) {
453 eb->fd = multi->stripes[dev_nr].dev->fd;
454 eb->dev_bytenr = multi->stripes[dev_nr].physical;
455 multi->stripes[dev_nr].dev->total_ios++;
457 ret = write_extent_to_disk(eb);
464 int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
465 u32 stripesize, struct btrfs_root *root,
466 struct btrfs_fs_info *fs_info, u64 objectid)
469 root->commit_root = NULL;
470 root->sectorsize = sectorsize;
471 root->nodesize = nodesize;
472 root->leafsize = leafsize;
473 root->stripesize = stripesize;
475 root->track_dirty = 0;
477 root->fs_info = fs_info;
478 root->objectid = objectid;
479 root->last_trans = 0;
480 root->highest_inode = 0;
481 root->last_inode_alloc = 0;
483 INIT_LIST_HEAD(&root->dirty_list);
484 memset(&root->root_key, 0, sizeof(root->root_key));
485 memset(&root->root_item, 0, sizeof(root->root_item));
486 root->root_key.objectid = objectid;
490 static int update_cowonly_root(struct btrfs_trans_handle *trans,
491 struct btrfs_root *root)
495 struct btrfs_root *tree_root = root->fs_info->tree_root;
497 btrfs_write_dirty_block_groups(trans, root);
499 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
500 if (old_root_bytenr == root->node->start)
502 btrfs_set_root_bytenr(&root->root_item,
504 btrfs_set_root_generation(&root->root_item,
506 root->root_item.level = btrfs_header_level(root->node);
507 ret = btrfs_update_root(trans, tree_root,
511 btrfs_write_dirty_block_groups(trans, root);
516 static int commit_tree_roots(struct btrfs_trans_handle *trans,
517 struct btrfs_fs_info *fs_info)
519 struct btrfs_root *root;
520 struct list_head *next;
521 struct extent_buffer *eb;
524 if (fs_info->readonly)
527 eb = fs_info->tree_root->node;
528 extent_buffer_get(eb);
529 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
530 free_extent_buffer(eb);
534 while(!list_empty(&fs_info->dirty_cowonly_roots)) {
535 next = fs_info->dirty_cowonly_roots.next;
537 root = list_entry(next, struct btrfs_root, dirty_list);
538 update_cowonly_root(trans, root);
539 free_extent_buffer(root->commit_root);
540 root->commit_root = NULL;
546 static int __commit_transaction(struct btrfs_trans_handle *trans,
547 struct btrfs_root *root)
551 struct extent_buffer *eb;
552 struct extent_io_tree *tree = &root->fs_info->extent_cache;
556 ret = find_first_extent_bit(tree, 0, &start, &end,
560 while(start <= end) {
561 eb = find_first_extent_buffer(tree, start);
562 BUG_ON(!eb || eb->start != start);
563 ret = write_tree_block(trans, root, eb);
566 clear_extent_buffer_dirty(eb);
567 free_extent_buffer(eb);
573 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
574 struct btrfs_root *root)
576 u64 transid = trans->transid;
578 struct btrfs_fs_info *fs_info = root->fs_info;
580 if (root->commit_root == root->node)
583 free_extent_buffer(root->commit_root);
584 root->commit_root = NULL;
586 btrfs_set_root_bytenr(&root->root_item, root->node->start);
587 btrfs_set_root_generation(&root->root_item, trans->transid);
588 root->root_item.level = btrfs_header_level(root->node);
589 ret = btrfs_update_root(trans, root->fs_info->tree_root,
590 &root->root_key, &root->root_item);
593 ret = commit_tree_roots(trans, fs_info);
595 ret = __commit_transaction(trans, root);
597 write_ctree_super(trans, root);
598 btrfs_finish_extent_commit(trans, fs_info->extent_root,
599 &fs_info->pinned_extents);
600 btrfs_free_transaction(root, trans);
601 free_extent_buffer(root->commit_root);
602 root->commit_root = NULL;
603 fs_info->running_transaction = NULL;
604 fs_info->last_trans_committed = transid;
608 static int find_and_setup_root(struct btrfs_root *tree_root,
609 struct btrfs_fs_info *fs_info,
610 u64 objectid, struct btrfs_root *root)
616 __setup_root(tree_root->nodesize, tree_root->leafsize,
617 tree_root->sectorsize, tree_root->stripesize,
618 root, fs_info, objectid);
619 ret = btrfs_find_last_root(tree_root, objectid,
620 &root->root_item, &root->root_key);
624 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
625 generation = btrfs_root_generation(&root->root_item);
626 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
627 blocksize, generation);
628 if (!extent_buffer_uptodate(root->node))
634 static int find_and_setup_log_root(struct btrfs_root *tree_root,
635 struct btrfs_fs_info *fs_info,
636 struct btrfs_super_block *disk_super)
639 u64 blocknr = btrfs_super_log_root(disk_super);
640 struct btrfs_root *log_root = malloc(sizeof(struct btrfs_root));
650 blocksize = btrfs_level_size(tree_root,
651 btrfs_super_log_root_level(disk_super));
653 __setup_root(tree_root->nodesize, tree_root->leafsize,
654 tree_root->sectorsize, tree_root->stripesize,
655 log_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
657 log_root->node = read_tree_block(tree_root, blocknr,
659 btrfs_super_generation(disk_super) + 1);
661 fs_info->log_root_tree = log_root;
663 if (!extent_buffer_uptodate(log_root->node)) {
664 free_extent_buffer(log_root->node);
666 fs_info->log_root_tree = NULL;
674 int btrfs_free_fs_root(struct btrfs_root *root)
677 free_extent_buffer(root->node);
678 if (root->commit_root)
679 free_extent_buffer(root->commit_root);
684 static void __free_fs_root(struct rb_node *node)
686 struct btrfs_root *root;
688 root = container_of(node, struct btrfs_root, rb_node);
689 btrfs_free_fs_root(root);
692 FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
694 struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
695 struct btrfs_key *location)
697 struct btrfs_root *root;
698 struct btrfs_root *tree_root = fs_info->tree_root;
699 struct btrfs_path *path;
700 struct extent_buffer *l;
705 root = malloc(sizeof(*root));
707 return ERR_PTR(-ENOMEM);
708 memset(root, 0, sizeof(*root));
709 if (location->offset == (u64)-1) {
710 ret = find_and_setup_root(tree_root, fs_info,
711 location->objectid, root);
719 __setup_root(tree_root->nodesize, tree_root->leafsize,
720 tree_root->sectorsize, tree_root->stripesize,
721 root, fs_info, location->objectid);
723 path = btrfs_alloc_path();
725 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
732 read_extent_buffer(l, &root->root_item,
733 btrfs_item_ptr_offset(l, path->slots[0]),
734 sizeof(root->root_item));
735 memcpy(&root->root_key, location, sizeof(*location));
738 btrfs_release_path(root, path);
739 btrfs_free_path(path);
744 generation = btrfs_root_generation(&root->root_item);
745 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
746 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
747 blocksize, generation);
754 static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
757 u64 objectid = *((u64 *)data);
758 struct btrfs_root *root;
760 root = rb_entry(node, struct btrfs_root, rb_node);
761 if (objectid > root->objectid)
763 else if (objectid < root->objectid)
769 static int btrfs_fs_roots_compare_roots(struct rb_node *node1,
770 struct rb_node *node2)
772 struct btrfs_root *root;
774 root = rb_entry(node2, struct btrfs_root, rb_node);
775 return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
778 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
779 struct btrfs_key *location)
781 struct btrfs_root *root;
782 struct rb_node *node;
785 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
786 return fs_info->tree_root;
787 if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
788 return fs_info->extent_root;
789 if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
790 return fs_info->chunk_root;
791 if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
792 return fs_info->dev_root;
793 if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
794 return fs_info->csum_root;
796 BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID ||
797 location->offset != (u64)-1);
799 node = rb_search(&fs_info->fs_root_tree, (void *)&location->objectid,
800 btrfs_fs_roots_compare_objectids, NULL);
802 return container_of(node, struct btrfs_root, rb_node);
804 root = btrfs_read_fs_root_no_cache(fs_info, location);
808 ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
809 btrfs_fs_roots_compare_roots);
814 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
816 free(fs_info->tree_root);
817 free(fs_info->extent_root);
818 free(fs_info->chunk_root);
819 free(fs_info->dev_root);
820 free(fs_info->csum_root);
821 free(fs_info->super_copy);
822 free(fs_info->log_root_tree);
826 struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr)
828 struct btrfs_fs_info *fs_info;
830 fs_info = malloc(sizeof(struct btrfs_fs_info));
834 memset(fs_info, 0, sizeof(struct btrfs_fs_info));
836 fs_info->tree_root = malloc(sizeof(struct btrfs_root));
837 fs_info->extent_root = malloc(sizeof(struct btrfs_root));
838 fs_info->chunk_root = malloc(sizeof(struct btrfs_root));
839 fs_info->dev_root = malloc(sizeof(struct btrfs_root));
840 fs_info->csum_root = malloc(sizeof(struct btrfs_root));
841 fs_info->super_copy = malloc(BTRFS_SUPER_INFO_SIZE);
843 if (!fs_info->tree_root || !fs_info->extent_root ||
844 !fs_info->chunk_root || !fs_info->dev_root ||
845 !fs_info->csum_root || !fs_info->super_copy)
848 memset(fs_info->super_copy, 0, BTRFS_SUPER_INFO_SIZE);
849 memset(fs_info->tree_root, 0, sizeof(struct btrfs_root));
850 memset(fs_info->extent_root, 0, sizeof(struct btrfs_root));
851 memset(fs_info->chunk_root, 0, sizeof(struct btrfs_root));
852 memset(fs_info->dev_root, 0, sizeof(struct btrfs_root));
853 memset(fs_info->csum_root, 0, sizeof(struct btrfs_root));
855 extent_io_tree_init(&fs_info->extent_cache);
856 extent_io_tree_init(&fs_info->free_space_cache);
857 extent_io_tree_init(&fs_info->block_group_cache);
858 extent_io_tree_init(&fs_info->pinned_extents);
859 extent_io_tree_init(&fs_info->pending_del);
860 extent_io_tree_init(&fs_info->extent_ins);
861 fs_info->fs_root_tree = RB_ROOT;
862 cache_tree_init(&fs_info->mapping_tree.cache_tree);
864 mutex_init(&fs_info->fs_mutex);
865 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
866 INIT_LIST_HEAD(&fs_info->space_info);
869 fs_info->readonly = 1;
871 fs_info->super_bytenr = sb_bytenr;
872 fs_info->data_alloc_profile = (u64)-1;
873 fs_info->metadata_alloc_profile = (u64)-1;
874 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
877 btrfs_free_fs_info(fs_info);
881 int btrfs_check_fs_compatibility(struct btrfs_super_block *sb, int writable)
885 features = btrfs_super_incompat_flags(sb) &
886 ~BTRFS_FEATURE_INCOMPAT_SUPP;
888 printk("couldn't open because of unsupported "
889 "option features (%Lx).\n",
890 (unsigned long long)features);
894 features = btrfs_super_incompat_flags(sb);
895 if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
896 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
897 btrfs_set_super_incompat_flags(sb, features);
900 features = btrfs_super_compat_ro_flags(sb) &
901 ~BTRFS_FEATURE_COMPAT_RO_SUPP;
902 if (writable && features) {
903 printk("couldn't open RDWR because of unsupported "
904 "option features (%Lx).\n",
905 (unsigned long long)features);
911 int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info,
912 u64 root_tree_bytenr, int partial)
914 struct btrfs_super_block *sb = fs_info->super_copy;
915 struct btrfs_root *root;
916 struct btrfs_key key;
925 nodesize = btrfs_super_nodesize(sb);
926 leafsize = btrfs_super_leafsize(sb);
927 sectorsize = btrfs_super_sectorsize(sb);
928 stripesize = btrfs_super_stripesize(sb);
930 root = fs_info->tree_root;
931 __setup_root(nodesize, leafsize, sectorsize, stripesize,
932 root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
933 blocksize = btrfs_level_size(root, btrfs_super_root_level(sb));
934 generation = btrfs_super_generation(sb);
936 if (!root_tree_bytenr)
937 root_tree_bytenr = btrfs_super_root(sb);
938 root->node = read_tree_block(root, root_tree_bytenr, blocksize,
940 if (!extent_buffer_uptodate(root->node)) {
941 fprintf(stderr, "Couldn't read tree root\n");
945 ret = find_and_setup_root(root, fs_info, BTRFS_EXTENT_TREE_OBJECTID,
946 fs_info->extent_root);
948 printk("Couldn't setup extent tree\n");
951 fs_info->extent_root->track_dirty = 1;
953 ret = find_and_setup_root(root, fs_info, BTRFS_DEV_TREE_OBJECTID,
956 printk("Couldn't setup device tree\n");
959 fs_info->dev_root->track_dirty = 1;
961 ret = find_and_setup_root(root, fs_info, BTRFS_CSUM_TREE_OBJECTID,
964 printk("Couldn't setup csum tree\n");
968 fs_info->csum_root->track_dirty = 1;
970 ret = find_and_setup_log_root(root, fs_info, sb);
972 printk("Couldn't setup log root tree\n");
976 fs_info->generation = generation;
977 fs_info->last_trans_committed = generation;
978 btrfs_read_block_groups(fs_info->tree_root);
980 key.objectid = BTRFS_FS_TREE_OBJECTID;
981 key.type = BTRFS_ROOT_ITEM_KEY;
982 key.offset = (u64)-1;
983 fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
985 if (!fs_info->fs_root)
990 void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
992 if (fs_info->csum_root)
993 free_extent_buffer(fs_info->csum_root->node);
994 if (fs_info->dev_root)
995 free_extent_buffer(fs_info->dev_root->node);
996 if (fs_info->extent_root)
997 free_extent_buffer(fs_info->extent_root->node);
998 if (fs_info->tree_root)
999 free_extent_buffer(fs_info->tree_root->node);
1000 if (fs_info->log_root_tree)
1001 free_extent_buffer(fs_info->log_root_tree->node);
1002 if (fs_info->chunk_root)
1003 free_extent_buffer(fs_info->chunk_root->node);
1006 static void free_map_lookup(struct cache_extent *ce)
1008 struct map_lookup *map;
1010 map = container_of(ce, struct map_lookup, ce);
1014 FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
1016 void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
1018 free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
1019 extent_io_tree_cleanup(&fs_info->extent_cache);
1020 extent_io_tree_cleanup(&fs_info->free_space_cache);
1021 extent_io_tree_cleanup(&fs_info->block_group_cache);
1022 extent_io_tree_cleanup(&fs_info->pinned_extents);
1023 extent_io_tree_cleanup(&fs_info->pending_del);
1024 extent_io_tree_cleanup(&fs_info->extent_ins);
1027 int btrfs_scan_fs_devices(int fd, const char *path,
1028 struct btrfs_fs_devices **fs_devices)
1033 ret = btrfs_scan_one_device(fd, path, fs_devices,
1034 &total_devs, BTRFS_SUPER_INFO_OFFSET);
1036 fprintf(stderr, "No valid Btrfs found on %s\n", path);
1040 if (total_devs != 1) {
1041 ret = btrfs_scan_for_fsid(*fs_devices, total_devs, 1);
1048 int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
1050 struct btrfs_super_block *sb = fs_info->super_copy;
1059 nodesize = btrfs_super_nodesize(sb);
1060 leafsize = btrfs_super_leafsize(sb);
1061 sectorsize = btrfs_super_sectorsize(sb);
1062 stripesize = btrfs_super_stripesize(sb);
1064 __setup_root(nodesize, leafsize, sectorsize, stripesize,
1065 fs_info->chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
1067 ret = btrfs_read_sys_array(fs_info->chunk_root);
1071 blocksize = btrfs_level_size(fs_info->chunk_root,
1072 btrfs_super_chunk_root_level(sb));
1073 generation = btrfs_super_chunk_root_generation(sb);
1075 fs_info->chunk_root->node = read_tree_block(fs_info->chunk_root,
1076 btrfs_super_chunk_root(sb),
1077 blocksize, generation);
1078 if (!fs_info->chunk_root->node ||
1079 !extent_buffer_uptodate(fs_info->chunk_root->node)) {
1080 fprintf(stderr, "Couldn't read chunk root\n");
1084 if (!(btrfs_super_flags(sb) & BTRFS_SUPER_FLAG_METADUMP)) {
1085 ret = btrfs_read_chunk_tree(fs_info->chunk_root);
1087 fprintf(stderr, "Couldn't read chunk tree\n");
1094 static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path,
1096 u64 root_tree_bytenr, int writes,
1099 struct btrfs_fs_info *fs_info;
1100 struct btrfs_super_block *disk_super;
1101 struct btrfs_fs_devices *fs_devices = NULL;
1102 struct extent_buffer *eb;
1106 sb_bytenr = BTRFS_SUPER_INFO_OFFSET;
1108 /* try to drop all the caches */
1109 if (posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED))
1110 fprintf(stderr, "Warning, could not drop caches\n");
1112 fs_info = btrfs_new_fs_info(writes, sb_bytenr);
1114 fprintf(stderr, "Failed to allocate memory for fs_info\n");
1118 ret = btrfs_scan_fs_devices(fp, path, &fs_devices);
1122 fs_info->fs_devices = fs_devices;
1124 ret = btrfs_open_devices(fs_devices, O_RDWR);
1126 ret = btrfs_open_devices(fs_devices, O_RDONLY);
1131 disk_super = fs_info->super_copy;
1132 ret = btrfs_read_dev_super(fs_devices->latest_bdev,
1133 disk_super, sb_bytenr);
1135 printk("No valid btrfs found\n");
1139 memcpy(fs_info->fsid, &disk_super->fsid, BTRFS_FSID_SIZE);
1141 ret = btrfs_check_fs_compatibility(fs_info->super_copy, writes);
1145 ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
1149 eb = fs_info->chunk_root->node;
1150 read_extent_buffer(eb, fs_info->chunk_tree_uuid,
1151 (unsigned long)btrfs_header_chunk_tree_uuid(eb),
1154 ret = btrfs_setup_all_roots(fs_info, root_tree_bytenr, partial);
1164 btrfs_release_all_roots(fs_info);
1165 btrfs_cleanup_all_caches(fs_info);
1167 btrfs_close_devices(fs_devices);
1169 btrfs_free_fs_info(fs_info);
1173 struct btrfs_fs_info *open_ctree_fs_info(const char *filename,
1174 u64 sb_bytenr, u64 root_tree_bytenr,
1175 int writes, int partial)
1178 struct btrfs_fs_info *info;
1179 int flags = O_CREAT | O_RDWR;
1184 fp = open(filename, flags, 0600);
1186 fprintf (stderr, "Could not open %s\n", filename);
1189 info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr,
1195 struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes)
1197 struct btrfs_fs_info *info;
1199 info = open_ctree_fs_info(filename, sb_bytenr, 0, writes, 0);
1202 return info->fs_root;
1205 struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
1208 struct btrfs_fs_info *info;
1209 info = __open_ctree_fd(fp, path, sb_bytenr, 0, writes, 0);
1212 return info->fs_root;
1215 int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr)
1217 u8 fsid[BTRFS_FSID_SIZE];
1218 int fsid_is_initialized = 0;
1219 struct btrfs_super_block buf;
1225 if (sb_bytenr != BTRFS_SUPER_INFO_OFFSET) {
1226 ret = pread64(fd, &buf, sizeof(buf), sb_bytenr);
1227 if (ret < sizeof(buf))
1230 if (btrfs_super_bytenr(&buf) != sb_bytenr ||
1231 buf.magic != cpu_to_le64(BTRFS_MAGIC))
1234 memcpy(sb, &buf, sizeof(*sb));
1238 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1239 bytenr = btrfs_sb_offset(i);
1240 ret = pread64(fd, &buf, sizeof(buf), bytenr);
1241 if (ret < sizeof(buf))
1244 if (btrfs_super_bytenr(&buf) != bytenr )
1246 /* if magic is NULL, the device was removed */
1247 if (buf.magic == 0 && i == 0)
1249 if (buf.magic != cpu_to_le64(BTRFS_MAGIC))
1252 if (!fsid_is_initialized) {
1253 memcpy(fsid, buf.fsid, sizeof(fsid));
1254 fsid_is_initialized = 1;
1255 } else if (memcmp(fsid, buf.fsid, sizeof(fsid))) {
1257 * the superblocks (the original one and
1258 * its backups) contain data of different
1259 * filesystems -> the super cannot be trusted
1264 if (btrfs_super_generation(&buf) > transid) {
1265 memcpy(sb, &buf, sizeof(*sb));
1266 transid = btrfs_super_generation(&buf);
1270 return transid > 0 ? 0 : -1;
1273 int write_dev_supers(struct btrfs_root *root, struct btrfs_super_block *sb,
1274 struct btrfs_device *device)
1280 if (root->fs_info->super_bytenr != BTRFS_SUPER_INFO_OFFSET) {
1281 btrfs_set_super_bytenr(sb, root->fs_info->super_bytenr);
1283 crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc,
1284 BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
1285 btrfs_csum_final(crc, (char *)&sb->csum[0]);
1288 * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is
1289 * zero filled, we can use it directly
1291 ret = pwrite64(device->fd, root->fs_info->super_copy,
1292 BTRFS_SUPER_INFO_SIZE,
1293 root->fs_info->super_bytenr);
1294 BUG_ON(ret != BTRFS_SUPER_INFO_SIZE);
1298 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1299 bytenr = btrfs_sb_offset(i);
1300 if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
1303 btrfs_set_super_bytenr(sb, bytenr);
1306 crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc,
1307 BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
1308 btrfs_csum_final(crc, (char *)&sb->csum[0]);
1311 * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is
1312 * zero filled, we can use it directly
1314 ret = pwrite64(device->fd, root->fs_info->super_copy,
1315 BTRFS_SUPER_INFO_SIZE, bytenr);
1316 BUG_ON(ret != BTRFS_SUPER_INFO_SIZE);
1322 int write_all_supers(struct btrfs_root *root)
1324 struct list_head *cur;
1325 struct list_head *head = &root->fs_info->fs_devices->devices;
1326 struct btrfs_device *dev;
1327 struct btrfs_super_block *sb;
1328 struct btrfs_dev_item *dev_item;
1332 sb = root->fs_info->super_copy;
1333 dev_item = &sb->dev_item;
1334 list_for_each(cur, head) {
1335 dev = list_entry(cur, struct btrfs_device, dev_list);
1336 if (!dev->writeable)
1339 btrfs_set_stack_device_generation(dev_item, 0);
1340 btrfs_set_stack_device_type(dev_item, dev->type);
1341 btrfs_set_stack_device_id(dev_item, dev->devid);
1342 btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
1343 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
1344 btrfs_set_stack_device_io_align(dev_item, dev->io_align);
1345 btrfs_set_stack_device_io_width(dev_item, dev->io_width);
1346 btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
1347 memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
1348 memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
1350 flags = btrfs_super_flags(sb);
1351 btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
1353 ret = write_dev_supers(root, sb, dev);
1359 int write_ctree_super(struct btrfs_trans_handle *trans,
1360 struct btrfs_root *root)
1363 struct btrfs_root *tree_root = root->fs_info->tree_root;
1364 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
1366 if (root->fs_info->readonly)
1369 btrfs_set_super_generation(root->fs_info->super_copy,
1371 btrfs_set_super_root(root->fs_info->super_copy,
1372 tree_root->node->start);
1373 btrfs_set_super_root_level(root->fs_info->super_copy,
1374 btrfs_header_level(tree_root->node));
1375 btrfs_set_super_chunk_root(root->fs_info->super_copy,
1376 chunk_root->node->start);
1377 btrfs_set_super_chunk_root_level(root->fs_info->super_copy,
1378 btrfs_header_level(chunk_root->node));
1379 btrfs_set_super_chunk_root_generation(root->fs_info->super_copy,
1380 btrfs_header_generation(chunk_root->node));
1382 ret = write_all_supers(root);
1384 fprintf(stderr, "failed to write new super block err %d\n", ret);
1388 int close_ctree(struct btrfs_root *root)
1391 struct btrfs_trans_handle *trans;
1392 struct btrfs_fs_info *fs_info = root->fs_info;
1394 if (fs_info->last_trans_committed !=
1395 fs_info->generation) {
1396 trans = btrfs_start_transaction(root, 1);
1397 btrfs_commit_transaction(trans, root);
1398 trans = btrfs_start_transaction(root, 1);
1399 ret = commit_tree_roots(trans, fs_info);
1401 ret = __commit_transaction(trans, root);
1403 write_ctree_super(trans, root);
1404 btrfs_free_transaction(root, trans);
1406 btrfs_free_block_groups(fs_info);
1408 free_fs_roots_tree(&fs_info->fs_root_tree);
1410 btrfs_release_all_roots(fs_info);
1411 btrfs_close_devices(fs_info->fs_devices);
1412 btrfs_cleanup_all_caches(fs_info);
1413 btrfs_free_fs_info(fs_info);
1417 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1418 struct extent_buffer *eb)
1420 return clear_extent_buffer_dirty(eb);
1423 int wait_on_tree_block_writeback(struct btrfs_root *root,
1424 struct extent_buffer *eb)
1429 void btrfs_mark_buffer_dirty(struct extent_buffer *eb)
1431 set_extent_buffer_dirty(eb);
1434 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
1438 ret = extent_buffer_uptodate(buf);
1442 ret = verify_parent_transid(buf->tree, buf, parent_transid, 1);
1446 int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
1448 return set_extent_buffer_uptodate(eb);