2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
24 #include <sys/types.h>
28 #include "kerncompat.h"
29 #include "radix-tree.h"
33 #include "transaction.h"
36 #include "print-tree.h"
38 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
40 if (buf->start != btrfs_header_bytenr(buf))
43 if (memcmp_extent_buffer(buf, root->fs_info->fsid,
44 (unsigned long)btrfs_header_fsid(buf),
50 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
52 return crc32c(seed, data, len);
55 void btrfs_csum_final(u32 crc, char *result)
57 *(__le32 *)result = ~cpu_to_le32(crc);
60 int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
63 char result[BTRFS_CRC32_SIZE];
67 len = buf->len - BTRFS_CSUM_SIZE;
68 crc = crc32c(crc, buf->data + BTRFS_CSUM_SIZE, len);
69 btrfs_csum_final(crc, result);
72 if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
73 printk("checksum verify failed on %llu wanted %X "
74 "found %X\n", (unsigned long long)buf->start,
75 *((int *)result), *((int *)buf));
79 write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
84 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
85 u64 bytenr, u32 blocksize)
87 return find_extent_buffer(&root->fs_info->extent_cache,
91 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
92 u64 bytenr, u32 blocksize)
94 return alloc_extent_buffer(&root->fs_info->extent_cache, bytenr,
98 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
103 struct extent_buffer *eb;
105 struct btrfs_multi_bio *multi = NULL;
106 struct btrfs_device *device;
108 eb = btrfs_find_tree_block(root, bytenr, blocksize);
109 if (eb && btrfs_buffer_uptodate(eb, parent_transid)) {
110 free_extent_buffer(eb);
116 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
117 bytenr, &length, &multi, 0);
119 device = multi->stripes[0].dev;
121 blocksize = min(blocksize, (u32)(64 * 1024));
122 readahead(device->fd, multi->stripes[0].physical, blocksize);
127 static int verify_parent_transid(struct extent_io_tree *io_tree,
128 struct extent_buffer *eb, u64 parent_transid)
132 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
135 if (extent_buffer_uptodate(eb) &&
136 btrfs_header_generation(eb) == parent_transid) {
140 printk("parent transid verify failed on %llu wanted %llu found %llu\n",
141 (unsigned long long)eb->start,
142 (unsigned long long)parent_transid,
143 (unsigned long long)btrfs_header_generation(eb));
146 clear_extent_buffer_uptodate(io_tree, eb);
152 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
153 u32 blocksize, u64 parent_transid)
157 struct extent_buffer *eb;
159 struct btrfs_multi_bio *multi = NULL;
160 struct btrfs_device *device;
164 eb = btrfs_find_create_tree_block(root, bytenr, blocksize);
168 if (btrfs_buffer_uptodate(eb, parent_transid))
174 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
175 eb->start, &length, &multi, mirror_num);
177 device = multi->stripes[0].dev;
180 eb->dev_bytenr = multi->stripes[0].physical;
182 ret = read_extent_from_disk(eb);
183 if (ret == 0 && check_tree_block(root, eb) == 0 &&
184 csum_tree_block(root, eb, 1) == 0 &&
185 verify_parent_transid(eb->tree, eb, parent_transid) == 0) {
186 btrfs_set_buffer_uptodate(eb);
189 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
191 if (num_copies == 1) {
195 if (mirror_num > num_copies) {
199 free_extent_buffer(eb);
203 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
204 struct extent_buffer *eb)
209 struct btrfs_multi_bio *multi = NULL;
211 if (check_tree_block(root, eb))
213 if (!btrfs_buffer_uptodate(eb, trans->transid))
216 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
217 csum_tree_block(root, eb, 0);
221 ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE,
222 eb->start, &length, &multi, 0);
224 while(dev_nr < multi->num_stripes) {
226 eb->fd = multi->stripes[dev_nr].dev->fd;
227 eb->dev_bytenr = multi->stripes[dev_nr].physical;
228 multi->stripes[dev_nr].dev->total_ios++;
230 ret = write_extent_to_disk(eb);
237 static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
238 u32 stripesize, struct btrfs_root *root,
239 struct btrfs_fs_info *fs_info, u64 objectid)
242 root->commit_root = NULL;
243 root->sectorsize = sectorsize;
244 root->nodesize = nodesize;
245 root->leafsize = leafsize;
246 root->stripesize = stripesize;
248 root->track_dirty = 0;
250 root->fs_info = fs_info;
251 root->objectid = objectid;
252 root->last_trans = 0;
253 root->highest_inode = 0;
254 root->last_inode_alloc = 0;
256 INIT_LIST_HEAD(&root->dirty_list);
257 memset(&root->root_key, 0, sizeof(root->root_key));
258 memset(&root->root_item, 0, sizeof(root->root_item));
259 root->root_key.objectid = objectid;
263 static int update_cowonly_root(struct btrfs_trans_handle *trans,
264 struct btrfs_root *root)
268 struct btrfs_root *tree_root = root->fs_info->tree_root;
270 btrfs_write_dirty_block_groups(trans, root);
272 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
273 if (old_root_bytenr == root->node->start)
275 btrfs_set_root_bytenr(&root->root_item,
277 root->root_item.level = btrfs_header_level(root->node);
278 ret = btrfs_update_root(trans, tree_root,
282 btrfs_write_dirty_block_groups(trans, root);
287 static int commit_tree_roots(struct btrfs_trans_handle *trans,
288 struct btrfs_fs_info *fs_info)
290 struct btrfs_root *root;
291 struct list_head *next;
293 while(!list_empty(&fs_info->dirty_cowonly_roots)) {
294 next = fs_info->dirty_cowonly_roots.next;
296 root = list_entry(next, struct btrfs_root, dirty_list);
297 update_cowonly_root(trans, root);
302 static int __commit_transaction(struct btrfs_trans_handle *trans,
303 struct btrfs_root *root)
307 struct extent_buffer *eb;
308 struct extent_io_tree *tree = &root->fs_info->extent_cache;
312 ret = find_first_extent_bit(tree, 0, &start, &end,
316 while(start <= end) {
317 eb = find_first_extent_buffer(tree, start);
318 BUG_ON(!eb || eb->start != start);
319 ret = write_tree_block(trans, root, eb);
322 clear_extent_buffer_dirty(eb);
323 free_extent_buffer(eb);
329 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
330 struct btrfs_root *root)
333 struct btrfs_root *new_root = NULL;
334 struct btrfs_fs_info *fs_info = root->fs_info;
336 if (root->commit_root == root->node)
339 new_root = malloc(sizeof(*new_root));
342 memcpy(new_root, root, sizeof(*new_root));
343 new_root->node = root->commit_root;
344 root->commit_root = NULL;
346 root->root_key.offset = trans->transid;
347 btrfs_set_root_bytenr(&root->root_item, root->node->start);
348 root->root_item.level = btrfs_header_level(root->node);
349 ret = btrfs_insert_root(trans, fs_info->tree_root,
350 &root->root_key, &root->root_item);
353 btrfs_set_root_refs(&new_root->root_item, 0);
354 ret = btrfs_update_root(trans, root->fs_info->tree_root,
355 &new_root->root_key, &new_root->root_item);
358 ret = commit_tree_roots(trans, fs_info);
360 ret = __commit_transaction(trans, root);
362 write_ctree_super(trans, root);
363 btrfs_finish_extent_commit(trans, fs_info->extent_root,
364 &fs_info->pinned_extents);
365 btrfs_free_transaction(root, trans);
366 fs_info->running_transaction = NULL;
368 trans = btrfs_start_transaction(root, 1);
369 ret = btrfs_drop_snapshot(trans, new_root);
371 ret = btrfs_del_root(trans, fs_info->tree_root, &new_root->root_key);
374 ret = commit_tree_roots(trans, fs_info);
376 ret = __commit_transaction(trans, root);
378 write_ctree_super(trans, root);
379 btrfs_finish_extent_commit(trans, fs_info->extent_root,
380 &fs_info->pinned_extents);
381 btrfs_free_transaction(root, trans);
382 free_extent_buffer(root->commit_root);
383 root->commit_root = NULL;
384 fs_info->running_transaction = NULL;
386 free_extent_buffer(new_root->node);
392 static int find_and_setup_root(struct btrfs_root *tree_root,
393 struct btrfs_fs_info *fs_info,
394 u64 objectid, struct btrfs_root *root)
399 __setup_root(tree_root->nodesize, tree_root->leafsize,
400 tree_root->sectorsize, tree_root->stripesize,
401 root, fs_info, objectid);
402 ret = btrfs_find_last_root(tree_root, objectid,
403 &root->root_item, &root->root_key);
406 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
407 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
413 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
416 free_extent_buffer(root->node);
417 if (root->commit_root)
418 free_extent_buffer(root->commit_root);
424 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
425 struct btrfs_key *location)
427 struct btrfs_root *root;
428 struct btrfs_root *tree_root = fs_info->tree_root;
429 struct btrfs_path *path;
430 struct extent_buffer *l;
434 root = malloc(sizeof(*root));
436 return ERR_PTR(-ENOMEM);
437 memset(root, 0, sizeof(*root));
438 if (location->offset == (u64)-1) {
439 ret = find_and_setup_root(tree_root, fs_info,
440 location->objectid, root);
448 __setup_root(tree_root->nodesize, tree_root->leafsize,
449 tree_root->sectorsize, tree_root->stripesize,
450 root, fs_info, location->objectid);
452 path = btrfs_alloc_path();
454 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
461 read_extent_buffer(l, &root->root_item,
462 btrfs_item_ptr_offset(l, path->slots[0]),
463 sizeof(root->root_item));
464 memcpy(&root->root_key, location, sizeof(*location));
467 btrfs_release_path(root, path);
468 btrfs_free_path(path);
473 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
474 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
482 struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes)
485 struct btrfs_root *root;
486 int flags = O_CREAT | O_RDWR;
491 fp = open(filename, flags, 0600);
495 root = open_ctree_fd(fp, filename, sb_bytenr, writes);
501 struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
509 struct btrfs_root *root = malloc(sizeof(struct btrfs_root));
510 struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
511 struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
512 struct btrfs_root *chunk_root = malloc(sizeof(struct btrfs_root));
513 struct btrfs_root *dev_root = malloc(sizeof(struct btrfs_root));
514 struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info));
516 struct btrfs_super_block *disk_super;
517 struct btrfs_fs_devices *fs_devices = NULL;
521 sb_bytenr = BTRFS_SUPER_INFO_OFFSET;
523 ret = btrfs_scan_one_device(fp, path, &fs_devices,
524 &total_devs, sb_bytenr);
527 fprintf(stderr, "No valid Btrfs found on %s\n", path);
531 if (total_devs != 1) {
532 ret = btrfs_scan_for_fsid(fs_devices, total_devs, 1);
536 memset(fs_info, 0, sizeof(*fs_info));
537 fs_info->fs_root = root;
538 fs_info->tree_root = tree_root;
539 fs_info->extent_root = extent_root;
540 fs_info->chunk_root = chunk_root;
541 fs_info->dev_root = dev_root;
544 fs_info->readonly = 1;
546 extent_io_tree_init(&fs_info->extent_cache);
547 extent_io_tree_init(&fs_info->free_space_cache);
548 extent_io_tree_init(&fs_info->block_group_cache);
549 extent_io_tree_init(&fs_info->pinned_extents);
550 extent_io_tree_init(&fs_info->pending_del);
551 extent_io_tree_init(&fs_info->extent_ins);
553 cache_tree_init(&fs_info->mapping_tree.cache_tree);
555 mutex_init(&fs_info->fs_mutex);
556 fs_info->fs_devices = fs_devices;
557 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
558 INIT_LIST_HEAD(&fs_info->space_info);
560 __setup_root(4096, 4096, 4096, 4096, tree_root,
561 fs_info, BTRFS_ROOT_TREE_OBJECTID);
564 ret = btrfs_open_devices(fs_devices, O_RDWR);
566 ret = btrfs_open_devices(fs_devices, O_RDONLY);
569 ret = btrfs_bootstrap_super_map(&fs_info->mapping_tree, fs_devices);
571 fs_info->sb_buffer = btrfs_find_create_tree_block(tree_root, sb_bytenr,
573 BUG_ON(!fs_info->sb_buffer);
574 fs_info->sb_buffer->fd = fs_devices->latest_bdev;
575 fs_info->sb_buffer->dev_bytenr = sb_bytenr;
576 ret = read_extent_from_disk(fs_info->sb_buffer);
578 btrfs_set_buffer_uptodate(fs_info->sb_buffer);
580 read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
581 sizeof(fs_info->super_copy));
582 read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
583 (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
586 disk_super = &fs_info->super_copy;
587 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
588 sizeof(disk_super->magic))) {
589 printk("No valid btrfs found\n");
592 nodesize = btrfs_super_nodesize(disk_super);
593 leafsize = btrfs_super_leafsize(disk_super);
594 sectorsize = btrfs_super_sectorsize(disk_super);
595 stripesize = btrfs_super_stripesize(disk_super);
596 tree_root->nodesize = nodesize;
597 tree_root->leafsize = leafsize;
598 tree_root->sectorsize = sectorsize;
599 tree_root->stripesize = stripesize;
601 ret = btrfs_read_super_device(tree_root, fs_info->sb_buffer);
603 ret = btrfs_read_sys_array(tree_root);
605 blocksize = btrfs_level_size(tree_root,
606 btrfs_super_chunk_root_level(disk_super));
608 __setup_root(nodesize, leafsize, sectorsize, stripesize,
609 chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
611 chunk_root->node = read_tree_block(chunk_root,
612 btrfs_super_chunk_root(disk_super),
615 BUG_ON(!chunk_root->node);
617 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
618 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
621 ret = btrfs_read_chunk_tree(chunk_root);
624 blocksize = btrfs_level_size(tree_root,
625 btrfs_super_root_level(disk_super));
627 tree_root->node = read_tree_block(tree_root,
628 btrfs_super_root(disk_super),
630 BUG_ON(!tree_root->node);
631 ret = find_and_setup_root(tree_root, fs_info,
632 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
634 extent_root->track_dirty = 1;
636 ret = find_and_setup_root(tree_root, fs_info,
637 BTRFS_DEV_TREE_OBJECTID, dev_root);
639 dev_root->track_dirty = 1;
641 ret = find_and_setup_root(tree_root, fs_info,
642 BTRFS_FS_TREE_OBJECTID, root);
645 fs_info->generation = btrfs_super_generation(disk_super) + 1;
646 btrfs_read_block_groups(root);
648 fs_info->data_alloc_profile = (u64)-1;
649 fs_info->metadata_alloc_profile = (u64)-1;
650 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
655 int write_all_supers(struct btrfs_root *root)
657 struct list_head *cur;
658 struct list_head *head = &root->fs_info->fs_devices->devices;
659 struct btrfs_device *dev;
660 struct extent_buffer *sb;
661 struct btrfs_dev_item *dev_item;
664 sb = root->fs_info->sb_buffer;
665 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
667 list_for_each(cur, head) {
668 dev = list_entry(cur, struct btrfs_device, dev_list);
669 btrfs_set_device_type(sb, dev_item, dev->type);
670 btrfs_set_device_id(sb, dev_item, dev->devid);
671 btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes);
672 btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used);
673 btrfs_set_device_io_align(sb, dev_item, dev->io_align);
674 btrfs_set_device_io_width(sb, dev_item, dev->io_width);
675 btrfs_set_device_sector_size(sb, dev_item, dev->sector_size);
676 write_extent_buffer(sb, dev->uuid,
677 (unsigned long)btrfs_device_uuid(dev_item),
680 sb->dev_bytenr = sb->start;
681 btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN);
682 csum_tree_block(root, sb, 0);
683 ret = write_extent_to_disk(sb);
689 int write_ctree_super(struct btrfs_trans_handle *trans,
690 struct btrfs_root *root)
693 struct btrfs_root *tree_root = root->fs_info->tree_root;
694 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
696 if (root->fs_info->readonly)
699 btrfs_set_super_generation(&root->fs_info->super_copy,
701 btrfs_set_super_root(&root->fs_info->super_copy,
702 tree_root->node->start);
703 btrfs_set_super_root_level(&root->fs_info->super_copy,
704 btrfs_header_level(tree_root->node));
705 btrfs_set_super_chunk_root(&root->fs_info->super_copy,
706 chunk_root->node->start);
707 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
708 btrfs_header_level(chunk_root->node));
709 write_extent_buffer(root->fs_info->sb_buffer,
710 &root->fs_info->super_copy, 0,
711 sizeof(root->fs_info->super_copy));
712 ret = write_all_supers(root);
714 fprintf(stderr, "failed to write new super block err %d\n", ret);
718 static int close_all_devices(struct btrfs_fs_info *fs_info)
720 struct list_head *list;
721 struct list_head *next;
722 struct btrfs_device *device;
726 list = &fs_info->fs_devices->devices;
727 list_for_each(next, list) {
728 device = list_entry(next, struct btrfs_device, dev_list);
734 int close_ctree(struct btrfs_root *root)
737 struct btrfs_trans_handle *trans;
738 struct btrfs_fs_info *fs_info = root->fs_info;
740 trans = btrfs_start_transaction(root, 1);
741 btrfs_commit_transaction(trans, root);
742 trans = btrfs_start_transaction(root, 1);
743 ret = commit_tree_roots(trans, root->fs_info);
745 ret = __commit_transaction(trans, root);
747 write_ctree_super(trans, root);
748 btrfs_free_transaction(root, trans);
749 btrfs_free_block_groups(root->fs_info);
751 free_extent_buffer(root->node);
752 if (root->fs_info->extent_root->node)
753 free_extent_buffer(root->fs_info->extent_root->node);
754 if (root->fs_info->tree_root->node)
755 free_extent_buffer(root->fs_info->tree_root->node);
756 free_extent_buffer(root->commit_root);
757 free_extent_buffer(root->fs_info->sb_buffer);
759 if (root->fs_info->chunk_root->node);
760 free_extent_buffer(root->fs_info->chunk_root->node);
762 if (root->fs_info->dev_root->node);
763 free_extent_buffer(root->fs_info->dev_root->node);
765 close_all_devices(root->fs_info);
766 extent_io_tree_cleanup(&fs_info->extent_cache);
767 extent_io_tree_cleanup(&fs_info->free_space_cache);
768 extent_io_tree_cleanup(&fs_info->block_group_cache);
769 extent_io_tree_cleanup(&fs_info->pinned_extents);
770 extent_io_tree_cleanup(&fs_info->pending_del);
771 extent_io_tree_cleanup(&fs_info->extent_ins);
773 free(fs_info->tree_root);
774 free(fs_info->extent_root);
775 free(fs_info->fs_root);
776 free(fs_info->chunk_root);
777 free(fs_info->dev_root);
783 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
784 struct extent_buffer *eb)
786 return clear_extent_buffer_dirty(eb);
789 int wait_on_tree_block_writeback(struct btrfs_root *root,
790 struct extent_buffer *eb)
795 void btrfs_mark_buffer_dirty(struct extent_buffer *eb)
797 set_extent_buffer_dirty(eb);
800 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
804 ret = extent_buffer_uptodate(buf);
808 ret = verify_parent_transid(buf->tree, buf, parent_transid);
812 int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
814 return set_extent_buffer_uptodate(eb);