2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
24 #include <sys/types.h>
28 #include "kerncompat.h"
29 #include "radix-tree.h"
33 #include "transaction.h"
36 #include "print-tree.h"
38 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
40 if (buf->start != btrfs_header_bytenr(buf))
43 if (memcmp_extent_buffer(buf, root->fs_info->fsid,
44 (unsigned long)btrfs_header_fsid(buf),
50 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
52 return crc32c(seed, data, len);
55 void btrfs_csum_final(u32 crc, char *result)
57 *(__le32 *)result = ~cpu_to_le32(crc);
60 int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
63 char result[BTRFS_CRC32_SIZE];
67 len = buf->len - BTRFS_CSUM_SIZE;
68 crc = crc32c(crc, buf->data + BTRFS_CSUM_SIZE, len);
69 btrfs_csum_final(crc, result);
72 if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
73 printk("checksum verify failed on %llu wanted %X "
74 "found %X\n", (unsigned long long)buf->start,
75 *((int *)result), *((int *)buf));
79 write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
84 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
85 u64 bytenr, u32 blocksize)
87 return find_extent_buffer(&root->fs_info->extent_cache,
91 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
92 u64 bytenr, u32 blocksize)
94 return alloc_extent_buffer(&root->fs_info->extent_cache, bytenr,
98 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
103 struct extent_buffer *eb;
105 struct btrfs_multi_bio *multi = NULL;
106 struct btrfs_device *device;
108 eb = btrfs_find_tree_block(root, bytenr, blocksize);
109 if (eb && btrfs_buffer_uptodate(eb, parent_transid)) {
110 free_extent_buffer(eb);
116 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
117 bytenr, &length, &multi, 0);
119 device = multi->stripes[0].dev;
121 blocksize = min(blocksize, (u32)(64 * 1024));
122 readahead(device->fd, multi->stripes[0].physical, blocksize);
127 static int verify_parent_transid(struct extent_io_tree *io_tree,
128 struct extent_buffer *eb, u64 parent_transid)
132 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
135 if (extent_buffer_uptodate(eb) &&
136 btrfs_header_generation(eb) == parent_transid) {
140 printk("parent transid verify failed on %llu wanted %llu found %llu\n",
141 (unsigned long long)eb->start,
142 (unsigned long long)parent_transid,
143 (unsigned long long)btrfs_header_generation(eb));
146 clear_extent_buffer_uptodate(io_tree, eb);
152 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
153 u32 blocksize, u64 parent_transid)
157 struct extent_buffer *eb;
159 struct btrfs_multi_bio *multi = NULL;
160 struct btrfs_device *device;
164 eb = btrfs_find_create_tree_block(root, bytenr, blocksize);
168 if (btrfs_buffer_uptodate(eb, parent_transid))
174 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
175 eb->start, &length, &multi, mirror_num);
177 device = multi->stripes[0].dev;
180 eb->dev_bytenr = multi->stripes[0].physical;
182 ret = read_extent_from_disk(eb);
183 if (ret == 0 && check_tree_block(root, eb) == 0 &&
184 csum_tree_block(root, eb, 1) == 0 &&
185 verify_parent_transid(eb->tree, eb, parent_transid) == 0) {
186 btrfs_set_buffer_uptodate(eb);
189 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
191 if (num_copies == 1) {
195 if (mirror_num > num_copies) {
199 free_extent_buffer(eb);
203 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
204 struct extent_buffer *eb)
209 struct btrfs_multi_bio *multi = NULL;
211 if (check_tree_block(root, eb))
213 if (!btrfs_buffer_uptodate(eb, trans->transid))
216 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
217 csum_tree_block(root, eb, 0);
221 ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE,
222 eb->start, &length, &multi, 0);
224 while(dev_nr < multi->num_stripes) {
226 eb->fd = multi->stripes[dev_nr].dev->fd;
227 eb->dev_bytenr = multi->stripes[dev_nr].physical;
228 multi->stripes[dev_nr].dev->total_ios++;
230 ret = write_extent_to_disk(eb);
237 static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
238 u32 stripesize, struct btrfs_root *root,
239 struct btrfs_fs_info *fs_info, u64 objectid)
242 root->commit_root = NULL;
243 root->sectorsize = sectorsize;
244 root->nodesize = nodesize;
245 root->leafsize = leafsize;
246 root->stripesize = stripesize;
248 root->track_dirty = 0;
250 root->fs_info = fs_info;
251 root->objectid = objectid;
252 root->last_trans = 0;
253 root->highest_inode = 0;
254 root->last_inode_alloc = 0;
256 INIT_LIST_HEAD(&root->dirty_list);
257 memset(&root->root_key, 0, sizeof(root->root_key));
258 memset(&root->root_item, 0, sizeof(root->root_item));
259 root->root_key.objectid = objectid;
263 static int update_cowonly_root(struct btrfs_trans_handle *trans,
264 struct btrfs_root *root)
268 struct btrfs_root *tree_root = root->fs_info->tree_root;
270 btrfs_write_dirty_block_groups(trans, root);
272 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
273 if (old_root_bytenr == root->node->start)
275 btrfs_set_root_bytenr(&root->root_item,
277 btrfs_set_root_generation(&root->root_item,
279 root->root_item.level = btrfs_header_level(root->node);
280 ret = btrfs_update_root(trans, tree_root,
284 btrfs_write_dirty_block_groups(trans, root);
289 static int commit_tree_roots(struct btrfs_trans_handle *trans,
290 struct btrfs_fs_info *fs_info)
292 struct btrfs_root *root;
293 struct list_head *next;
294 struct extent_buffer *eb;
296 eb = fs_info->tree_root->node;
297 extent_buffer_get(eb);
298 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
299 free_extent_buffer(eb);
301 while(!list_empty(&fs_info->dirty_cowonly_roots)) {
302 next = fs_info->dirty_cowonly_roots.next;
304 root = list_entry(next, struct btrfs_root, dirty_list);
305 update_cowonly_root(trans, root);
310 static int __commit_transaction(struct btrfs_trans_handle *trans,
311 struct btrfs_root *root)
315 struct extent_buffer *eb;
316 struct extent_io_tree *tree = &root->fs_info->extent_cache;
320 ret = find_first_extent_bit(tree, 0, &start, &end,
324 while(start <= end) {
325 eb = find_first_extent_buffer(tree, start);
326 BUG_ON(!eb || eb->start != start);
327 ret = write_tree_block(trans, root, eb);
330 clear_extent_buffer_dirty(eb);
331 free_extent_buffer(eb);
337 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
338 struct btrfs_root *root)
341 struct btrfs_root *new_root = NULL;
342 struct btrfs_fs_info *fs_info = root->fs_info;
344 if (root->commit_root == root->node)
347 new_root = malloc(sizeof(*new_root));
350 memcpy(new_root, root, sizeof(*new_root));
351 new_root->node = root->commit_root;
352 root->commit_root = NULL;
354 root->root_key.offset = trans->transid;
355 btrfs_set_root_bytenr(&root->root_item, root->node->start);
356 btrfs_set_root_generation(&root->root_item, root->root_key.offset);
357 root->root_item.level = btrfs_header_level(root->node);
358 ret = btrfs_insert_root(trans, fs_info->tree_root,
359 &root->root_key, &root->root_item);
362 btrfs_set_root_refs(&new_root->root_item, 0);
363 ret = btrfs_update_root(trans, root->fs_info->tree_root,
364 &new_root->root_key, &new_root->root_item);
367 ret = commit_tree_roots(trans, fs_info);
369 ret = __commit_transaction(trans, root);
371 write_ctree_super(trans, root);
372 btrfs_finish_extent_commit(trans, fs_info->extent_root,
373 &fs_info->pinned_extents);
374 btrfs_free_transaction(root, trans);
375 fs_info->running_transaction = NULL;
377 trans = btrfs_start_transaction(root, 1);
378 ret = btrfs_drop_snapshot(trans, new_root);
380 ret = btrfs_del_root(trans, fs_info->tree_root, &new_root->root_key);
383 ret = commit_tree_roots(trans, fs_info);
385 ret = __commit_transaction(trans, root);
387 write_ctree_super(trans, root);
388 btrfs_finish_extent_commit(trans, fs_info->extent_root,
389 &fs_info->pinned_extents);
390 btrfs_free_transaction(root, trans);
391 free_extent_buffer(root->commit_root);
392 root->commit_root = NULL;
393 fs_info->running_transaction = NULL;
395 free_extent_buffer(new_root->node);
401 static int find_and_setup_root(struct btrfs_root *tree_root,
402 struct btrfs_fs_info *fs_info,
403 u64 objectid, struct btrfs_root *root)
409 __setup_root(tree_root->nodesize, tree_root->leafsize,
410 tree_root->sectorsize, tree_root->stripesize,
411 root, fs_info, objectid);
412 ret = btrfs_find_last_root(tree_root, objectid,
413 &root->root_item, &root->root_key);
416 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
417 generation = btrfs_root_generation(&root->root_item);
418 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
419 blocksize, generation);
424 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
427 free_extent_buffer(root->node);
428 if (root->commit_root)
429 free_extent_buffer(root->commit_root);
435 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
436 struct btrfs_key *location)
438 struct btrfs_root *root;
439 struct btrfs_root *tree_root = fs_info->tree_root;
440 struct btrfs_path *path;
441 struct extent_buffer *l;
446 root = malloc(sizeof(*root));
448 return ERR_PTR(-ENOMEM);
449 memset(root, 0, sizeof(*root));
450 if (location->offset == (u64)-1) {
451 ret = find_and_setup_root(tree_root, fs_info,
452 location->objectid, root);
460 __setup_root(tree_root->nodesize, tree_root->leafsize,
461 tree_root->sectorsize, tree_root->stripesize,
462 root, fs_info, location->objectid);
464 path = btrfs_alloc_path();
466 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
473 read_extent_buffer(l, &root->root_item,
474 btrfs_item_ptr_offset(l, path->slots[0]),
475 sizeof(root->root_item));
476 memcpy(&root->root_key, location, sizeof(*location));
479 btrfs_release_path(root, path);
480 btrfs_free_path(path);
485 generation = btrfs_root_generation(&root->root_item);
486 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
487 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
488 blocksize, generation);
495 struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes)
498 struct btrfs_root *root;
499 int flags = O_CREAT | O_RDWR;
504 fp = open(filename, flags, 0600);
508 root = open_ctree_fd(fp, filename, sb_bytenr, writes);
514 struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
523 struct btrfs_root *root = malloc(sizeof(struct btrfs_root));
524 struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
525 struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
526 struct btrfs_root *chunk_root = malloc(sizeof(struct btrfs_root));
527 struct btrfs_root *dev_root = malloc(sizeof(struct btrfs_root));
528 struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info));
530 struct btrfs_super_block *disk_super;
531 struct btrfs_fs_devices *fs_devices = NULL;
535 sb_bytenr = BTRFS_SUPER_INFO_OFFSET;
537 ret = btrfs_scan_one_device(fp, path, &fs_devices,
538 &total_devs, sb_bytenr);
541 fprintf(stderr, "No valid Btrfs found on %s\n", path);
545 if (total_devs != 1) {
546 ret = btrfs_scan_for_fsid(fs_devices, total_devs, 1);
550 memset(fs_info, 0, sizeof(*fs_info));
551 fs_info->fs_root = root;
552 fs_info->tree_root = tree_root;
553 fs_info->extent_root = extent_root;
554 fs_info->chunk_root = chunk_root;
555 fs_info->dev_root = dev_root;
558 fs_info->readonly = 1;
560 extent_io_tree_init(&fs_info->extent_cache);
561 extent_io_tree_init(&fs_info->free_space_cache);
562 extent_io_tree_init(&fs_info->block_group_cache);
563 extent_io_tree_init(&fs_info->pinned_extents);
564 extent_io_tree_init(&fs_info->pending_del);
565 extent_io_tree_init(&fs_info->extent_ins);
567 cache_tree_init(&fs_info->mapping_tree.cache_tree);
569 mutex_init(&fs_info->fs_mutex);
570 fs_info->fs_devices = fs_devices;
571 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
572 INIT_LIST_HEAD(&fs_info->space_info);
574 __setup_root(4096, 4096, 4096, 4096, tree_root,
575 fs_info, BTRFS_ROOT_TREE_OBJECTID);
578 ret = btrfs_open_devices(fs_devices, O_RDWR);
580 ret = btrfs_open_devices(fs_devices, O_RDONLY);
583 ret = btrfs_bootstrap_super_map(&fs_info->mapping_tree, fs_devices);
585 fs_info->sb_buffer = btrfs_find_create_tree_block(tree_root, sb_bytenr,
587 BUG_ON(!fs_info->sb_buffer);
588 fs_info->sb_buffer->fd = fs_devices->latest_bdev;
589 fs_info->sb_buffer->dev_bytenr = sb_bytenr;
590 ret = read_extent_from_disk(fs_info->sb_buffer);
592 btrfs_set_buffer_uptodate(fs_info->sb_buffer);
594 read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
595 sizeof(fs_info->super_copy));
596 read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
597 (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
600 disk_super = &fs_info->super_copy;
601 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
602 sizeof(disk_super->magic))) {
603 printk("No valid btrfs found\n");
606 nodesize = btrfs_super_nodesize(disk_super);
607 leafsize = btrfs_super_leafsize(disk_super);
608 sectorsize = btrfs_super_sectorsize(disk_super);
609 stripesize = btrfs_super_stripesize(disk_super);
610 tree_root->nodesize = nodesize;
611 tree_root->leafsize = leafsize;
612 tree_root->sectorsize = sectorsize;
613 tree_root->stripesize = stripesize;
615 ret = btrfs_read_super_device(tree_root, fs_info->sb_buffer);
617 ret = btrfs_read_sys_array(tree_root);
619 blocksize = btrfs_level_size(tree_root,
620 btrfs_super_chunk_root_level(disk_super));
621 generation = btrfs_super_chunk_root_generation(disk_super);
623 __setup_root(nodesize, leafsize, sectorsize, stripesize,
624 chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
626 chunk_root->node = read_tree_block(chunk_root,
627 btrfs_super_chunk_root(disk_super),
628 blocksize, generation);
630 BUG_ON(!chunk_root->node);
632 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
633 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
636 ret = btrfs_read_chunk_tree(chunk_root);
639 blocksize = btrfs_level_size(tree_root,
640 btrfs_super_root_level(disk_super));
641 generation = btrfs_super_generation(disk_super);
643 tree_root->node = read_tree_block(tree_root,
644 btrfs_super_root(disk_super),
645 blocksize, generation);
646 BUG_ON(!tree_root->node);
647 ret = find_and_setup_root(tree_root, fs_info,
648 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
650 extent_root->track_dirty = 1;
652 ret = find_and_setup_root(tree_root, fs_info,
653 BTRFS_DEV_TREE_OBJECTID, dev_root);
655 dev_root->track_dirty = 1;
657 ret = find_and_setup_root(tree_root, fs_info,
658 BTRFS_FS_TREE_OBJECTID, root);
661 fs_info->generation = btrfs_super_generation(disk_super) + 1;
662 btrfs_read_block_groups(root);
664 fs_info->data_alloc_profile = (u64)-1;
665 fs_info->metadata_alloc_profile = (u64)-1;
666 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
671 int write_all_supers(struct btrfs_root *root)
673 struct list_head *cur;
674 struct list_head *head = &root->fs_info->fs_devices->devices;
675 struct btrfs_device *dev;
676 struct extent_buffer *sb;
677 struct btrfs_dev_item *dev_item;
680 sb = root->fs_info->sb_buffer;
681 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
683 list_for_each(cur, head) {
684 dev = list_entry(cur, struct btrfs_device, dev_list);
685 btrfs_set_device_type(sb, dev_item, dev->type);
686 btrfs_set_device_id(sb, dev_item, dev->devid);
687 btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes);
688 btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used);
689 btrfs_set_device_io_align(sb, dev_item, dev->io_align);
690 btrfs_set_device_io_width(sb, dev_item, dev->io_width);
691 btrfs_set_device_sector_size(sb, dev_item, dev->sector_size);
692 write_extent_buffer(sb, dev->uuid,
693 (unsigned long)btrfs_device_uuid(dev_item),
696 sb->dev_bytenr = sb->start;
697 btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN);
698 csum_tree_block(root, sb, 0);
699 ret = write_extent_to_disk(sb);
705 int write_ctree_super(struct btrfs_trans_handle *trans,
706 struct btrfs_root *root)
709 struct btrfs_root *tree_root = root->fs_info->tree_root;
710 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
712 if (root->fs_info->readonly)
715 btrfs_set_super_generation(&root->fs_info->super_copy,
717 btrfs_set_super_root(&root->fs_info->super_copy,
718 tree_root->node->start);
719 btrfs_set_super_root_level(&root->fs_info->super_copy,
720 btrfs_header_level(tree_root->node));
721 btrfs_set_super_chunk_root(&root->fs_info->super_copy,
722 chunk_root->node->start);
723 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
724 btrfs_header_level(chunk_root->node));
725 btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy,
726 btrfs_header_generation(chunk_root->node));
727 write_extent_buffer(root->fs_info->sb_buffer,
728 &root->fs_info->super_copy, 0,
729 sizeof(root->fs_info->super_copy));
730 ret = write_all_supers(root);
732 fprintf(stderr, "failed to write new super block err %d\n", ret);
736 static int close_all_devices(struct btrfs_fs_info *fs_info)
738 struct list_head *list;
739 struct list_head *next;
740 struct btrfs_device *device;
744 list = &fs_info->fs_devices->devices;
745 list_for_each(next, list) {
746 device = list_entry(next, struct btrfs_device, dev_list);
752 int close_ctree(struct btrfs_root *root)
755 struct btrfs_trans_handle *trans;
756 struct btrfs_fs_info *fs_info = root->fs_info;
758 trans = btrfs_start_transaction(root, 1);
759 btrfs_commit_transaction(trans, root);
760 trans = btrfs_start_transaction(root, 1);
761 ret = commit_tree_roots(trans, root->fs_info);
763 ret = __commit_transaction(trans, root);
765 write_ctree_super(trans, root);
766 btrfs_free_transaction(root, trans);
767 btrfs_free_block_groups(root->fs_info);
769 free_extent_buffer(root->node);
770 if (root->fs_info->extent_root->node)
771 free_extent_buffer(root->fs_info->extent_root->node);
772 if (root->fs_info->tree_root->node)
773 free_extent_buffer(root->fs_info->tree_root->node);
774 free_extent_buffer(root->commit_root);
775 free_extent_buffer(root->fs_info->sb_buffer);
777 if (root->fs_info->chunk_root->node);
778 free_extent_buffer(root->fs_info->chunk_root->node);
780 if (root->fs_info->dev_root->node);
781 free_extent_buffer(root->fs_info->dev_root->node);
783 close_all_devices(root->fs_info);
784 extent_io_tree_cleanup(&fs_info->extent_cache);
785 extent_io_tree_cleanup(&fs_info->free_space_cache);
786 extent_io_tree_cleanup(&fs_info->block_group_cache);
787 extent_io_tree_cleanup(&fs_info->pinned_extents);
788 extent_io_tree_cleanup(&fs_info->pending_del);
789 extent_io_tree_cleanup(&fs_info->extent_ins);
791 free(fs_info->tree_root);
792 free(fs_info->extent_root);
793 free(fs_info->fs_root);
794 free(fs_info->chunk_root);
795 free(fs_info->dev_root);
801 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
802 struct extent_buffer *eb)
804 return clear_extent_buffer_dirty(eb);
807 int wait_on_tree_block_writeback(struct btrfs_root *root,
808 struct extent_buffer *eb)
813 void btrfs_mark_buffer_dirty(struct extent_buffer *eb)
815 set_extent_buffer_dirty(eb);
818 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
822 ret = extent_buffer_uptodate(buf);
826 ret = verify_parent_transid(buf->tree, buf, parent_transid);
830 int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
832 return set_extent_buffer_uptodate(eb);