3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
33 void extent_io_tree_init(struct extent_io_tree *tree)
35 cache_tree_init(&tree->state);
36 cache_tree_init(&tree->cache);
37 INIT_LIST_HEAD(&tree->lru);
41 static struct extent_state *alloc_extent_state(void)
43 struct extent_state *state;
45 state = malloc(sizeof(*state));
48 state->cache_node.objectid = 0;
55 static void btrfs_free_extent_state(struct extent_state *state)
58 BUG_ON(state->refs < 0);
63 static void free_extent_state_func(struct cache_extent *cache)
65 struct extent_state *es;
67 es = container_of(cache, struct extent_state, cache_node);
68 btrfs_free_extent_state(es);
71 void extent_io_tree_cleanup(struct extent_io_tree *tree)
73 struct extent_buffer *eb;
75 while(!list_empty(&tree->lru)) {
76 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
77 fprintf(stderr, "extent buffer leak: "
78 "start %llu len %u\n",
79 (unsigned long long)eb->start, eb->len);
80 free_extent_buffer(eb);
83 cache_tree_free_extents(&tree->state, free_extent_state_func);
86 static inline void update_extent_state(struct extent_state *state)
88 state->cache_node.start = state->start;
89 state->cache_node.size = state->end + 1 - state->start;
93 * Utility function to look for merge candidates inside a given range.
94 * Any extents with matching state are merged together into a single
95 * extent in the tree. Extents with EXTENT_IO in their state field are
98 static int merge_state(struct extent_io_tree *tree,
99 struct extent_state *state)
101 struct extent_state *other;
102 struct cache_extent *other_node;
104 if (state->state & EXTENT_IOBITS)
107 other_node = prev_cache_extent(&state->cache_node);
109 other = container_of(other_node, struct extent_state,
111 if (other->end == state->start - 1 &&
112 other->state == state->state) {
113 state->start = other->start;
114 update_extent_state(state);
115 remove_cache_extent(&tree->state, &other->cache_node);
116 btrfs_free_extent_state(other);
119 other_node = next_cache_extent(&state->cache_node);
121 other = container_of(other_node, struct extent_state,
123 if (other->start == state->end + 1 &&
124 other->state == state->state) {
125 other->start = state->start;
126 update_extent_state(other);
127 remove_cache_extent(&tree->state, &state->cache_node);
128 btrfs_free_extent_state(state);
135 * insert an extent_state struct into the tree. 'bits' are set on the
136 * struct before it is inserted.
138 static int insert_state(struct extent_io_tree *tree,
139 struct extent_state *state, u64 start, u64 end,
145 state->state |= bits;
146 state->start = start;
148 update_extent_state(state);
149 ret = insert_cache_extent(&tree->state, &state->cache_node);
151 merge_state(tree, state);
156 * split a given extent state struct in two, inserting the preallocated
157 * struct 'prealloc' as the newly created second half. 'split' indicates an
158 * offset inside 'orig' where it should be split.
160 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
161 struct extent_state *prealloc, u64 split)
164 prealloc->start = orig->start;
165 prealloc->end = split - 1;
166 prealloc->state = orig->state;
167 update_extent_state(prealloc);
169 update_extent_state(orig);
170 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
176 * clear some bits on a range in the tree.
178 static int clear_state_bit(struct extent_io_tree *tree,
179 struct extent_state *state, int bits)
181 int ret = state->state & bits;
183 state->state &= ~bits;
184 if (state->state == 0) {
185 remove_cache_extent(&tree->state, &state->cache_node);
186 btrfs_free_extent_state(state);
188 merge_state(tree, state);
194 * clear some bits on a range in the tree.
196 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
197 u64 end, int bits, gfp_t mask)
199 struct extent_state *state;
200 struct extent_state *prealloc = NULL;
201 struct cache_extent *node;
208 prealloc = alloc_extent_state();
214 * this search will find the extents that end after
217 node = search_cache_extent(&tree->state, start);
220 state = container_of(node, struct extent_state, cache_node);
221 if (state->start > end)
223 last_end = state->end;
226 * | ---- desired range ---- |
228 * | ------------- state -------------- |
230 * We need to split the extent we found, and may flip
231 * bits on second half.
233 * If the extent we found extends past our range, we
234 * just split and search again. It'll get split again
235 * the next time though.
237 * If the extent we found is inside our range, we clear
238 * the desired bit on it.
240 if (state->start < start) {
241 err = split_state(tree, state, prealloc, start);
242 BUG_ON(err == -EEXIST);
246 if (state->end <= end) {
247 set |= clear_state_bit(tree, state, bits);
248 if (last_end == (u64)-1)
250 start = last_end + 1;
252 start = state->start;
257 * | ---- desired range ---- |
259 * We need to split the extent, and clear the bit
262 if (state->start <= end && state->end > end) {
263 err = split_state(tree, state, prealloc, end + 1);
264 BUG_ON(err == -EEXIST);
266 set |= clear_state_bit(tree, prealloc, bits);
271 start = state->end + 1;
272 set |= clear_state_bit(tree, state, bits);
273 if (last_end == (u64)-1)
275 start = last_end + 1;
279 btrfs_free_extent_state(prealloc);
289 * set some bits on a range in the tree.
291 int set_extent_bits(struct extent_io_tree *tree, u64 start,
292 u64 end, int bits, gfp_t mask)
294 struct extent_state *state;
295 struct extent_state *prealloc = NULL;
296 struct cache_extent *node;
302 prealloc = alloc_extent_state();
308 * this search will find the extents that end after
311 node = search_cache_extent(&tree->state, start);
313 err = insert_state(tree, prealloc, start, end, bits);
314 BUG_ON(err == -EEXIST);
319 state = container_of(node, struct extent_state, cache_node);
320 last_start = state->start;
321 last_end = state->end;
324 * | ---- desired range ---- |
327 * Just lock what we found and keep going
329 if (state->start == start && state->end <= end) {
330 state->state |= bits;
331 merge_state(tree, state);
332 if (last_end == (u64)-1)
334 start = last_end + 1;
338 * | ---- desired range ---- |
341 * | ------------- state -------------- |
343 * We need to split the extent we found, and may flip bits on
346 * If the extent we found extends past our
347 * range, we just split and search again. It'll get split
348 * again the next time though.
350 * If the extent we found is inside our range, we set the
353 if (state->start < start) {
354 err = split_state(tree, state, prealloc, start);
355 BUG_ON(err == -EEXIST);
359 if (state->end <= end) {
360 state->state |= bits;
361 start = state->end + 1;
362 merge_state(tree, state);
363 if (last_end == (u64)-1)
365 start = last_end + 1;
367 start = state->start;
372 * | ---- desired range ---- |
373 * | state | or | state |
375 * There's a hole, we need to insert something in it and
376 * ignore the extent we found.
378 if (state->start > start) {
380 if (end < last_start)
383 this_end = last_start -1;
384 err = insert_state(tree, prealloc, start, this_end,
386 BUG_ON(err == -EEXIST);
390 start = this_end + 1;
394 * | ---- desired range ---- |
395 * | ---------- state ---------- |
396 * We need to split the extent, and set the bit
399 err = split_state(tree, state, prealloc, end + 1);
400 BUG_ON(err == -EEXIST);
402 state->state |= bits;
403 merge_state(tree, prealloc);
407 btrfs_free_extent_state(prealloc);
415 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
418 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
421 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
424 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
427 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
428 u64 *start_ret, u64 *end_ret, int bits)
430 struct cache_extent *node;
431 struct extent_state *state;
435 * this search will find all the extents that end after
438 node = search_cache_extent(&tree->state, start);
443 state = container_of(node, struct extent_state, cache_node);
444 if (state->end >= start && (state->state & bits)) {
445 *start_ret = state->start;
446 *end_ret = state->end;
450 node = next_cache_extent(node);
458 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
459 int bits, int filled)
461 struct extent_state *state = NULL;
462 struct cache_extent *node;
465 node = search_cache_extent(&tree->state, start);
466 while (node && start <= end) {
467 state = container_of(node, struct extent_state, cache_node);
469 if (filled && state->start > start) {
473 if (state->start > end)
475 if (state->state & bits) {
483 start = state->end + 1;
486 node = next_cache_extent(node);
496 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
498 struct cache_extent *node;
499 struct extent_state *state;
502 node = search_cache_extent(&tree->state, start);
507 state = container_of(node, struct extent_state, cache_node);
508 if (state->start != start) {
512 state->xprivate = private;
517 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
519 struct cache_extent *node;
520 struct extent_state *state;
523 node = search_cache_extent(&tree->state, start);
528 state = container_of(node, struct extent_state, cache_node);
529 if (state->start != start) {
533 *private = state->xprivate;
538 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
539 u64 bytenr, u32 blocksize)
541 struct extent_buffer *eb;
543 eb = malloc(sizeof(struct extent_buffer) + blocksize);
548 memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
556 eb->dev_bytenr = (u64)-1;
557 eb->cache_node.start = bytenr;
558 eb->cache_node.size = blocksize;
559 INIT_LIST_HEAD(&eb->recow);
564 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
566 struct extent_buffer *new;
568 new = __alloc_extent_buffer(NULL, src->start, src->len);
572 copy_extent_buffer(new, src, 0, 0, src->len);
573 new->flags |= EXTENT_BUFFER_DUMMY;
578 void free_extent_buffer(struct extent_buffer *eb)
584 BUG_ON(eb->refs < 0);
586 struct extent_io_tree *tree = eb->tree;
587 BUG_ON(eb->flags & EXTENT_DIRTY);
588 list_del_init(&eb->lru);
589 list_del_init(&eb->recow);
590 if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
591 BUG_ON(tree->cache_size < eb->len);
592 remove_cache_extent(&tree->cache, &eb->cache_node);
593 tree->cache_size -= eb->len;
599 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
600 u64 bytenr, u32 blocksize)
602 struct extent_buffer *eb = NULL;
603 struct cache_extent *cache;
605 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
606 if (cache && cache->start == bytenr &&
607 cache->size == blocksize) {
608 eb = container_of(cache, struct extent_buffer, cache_node);
609 list_move_tail(&eb->lru, &tree->lru);
615 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
618 struct extent_buffer *eb = NULL;
619 struct cache_extent *cache;
621 cache = search_cache_extent(&tree->cache, start);
623 eb = container_of(cache, struct extent_buffer, cache_node);
624 list_move_tail(&eb->lru, &tree->lru);
630 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
631 u64 bytenr, u32 blocksize)
633 struct extent_buffer *eb;
634 struct cache_extent *cache;
636 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
637 if (cache && cache->start == bytenr &&
638 cache->size == blocksize) {
639 eb = container_of(cache, struct extent_buffer, cache_node);
640 list_move_tail(&eb->lru, &tree->lru);
646 eb = container_of(cache, struct extent_buffer,
648 free_extent_buffer(eb);
650 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
653 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
658 list_add_tail(&eb->lru, &tree->lru);
659 tree->cache_size += blocksize;
664 int read_extent_from_disk(struct extent_buffer *eb,
665 unsigned long offset, unsigned long len)
668 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
682 int write_extent_to_disk(struct extent_buffer *eb)
685 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
688 if (ret != eb->len) {
697 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
698 u64 bytes, int mirror)
700 struct btrfs_multi_bio *multi = NULL;
701 struct btrfs_device *device;
702 u64 bytes_left = bytes;
708 read_len = bytes_left;
709 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
710 &read_len, &multi, mirror, NULL);
712 fprintf(stderr, "Couldn't map the block %Lu\n",
716 device = multi->stripes[0].dev;
718 read_len = min(bytes_left, read_len);
719 if (device->fd == 0) {
724 ret = pread(device->fd, buf + total_read, read_len,
725 multi->stripes[0].physical);
728 fprintf(stderr, "Error reading %Lu, %d\n", offset,
732 if (ret != read_len) {
733 fprintf(stderr, "Short read for %Lu, read %d, "
734 "read_len %Lu\n", offset, ret, read_len);
738 bytes_left -= read_len;
740 total_read += read_len;
746 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
747 u64 bytes, int mirror)
749 struct btrfs_multi_bio *multi = NULL;
750 struct btrfs_device *device;
751 u64 bytes_left = bytes;
754 u64 *raid_map = NULL;
759 while (bytes_left > 0) {
760 this_len = bytes_left;
763 ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
764 &this_len, &multi, mirror, &raid_map);
766 fprintf(stderr, "Couldn't map the block %Lu\n",
772 struct extent_buffer *eb;
773 u64 stripe_len = this_len;
775 this_len = min(this_len, bytes_left);
776 this_len = min(this_len, (u64)info->tree_root->leafsize);
778 eb = malloc(sizeof(struct extent_buffer) + this_len);
781 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
785 memcpy(eb->data, buf + total_write, this_len);
786 ret = write_raid56_with_parity(info, eb, multi,
787 stripe_len, raid_map);
793 } else while (dev_nr < multi->num_stripes) {
794 device = multi->stripes[dev_nr].dev;
795 if (device->fd == 0) {
800 dev_bytenr = multi->stripes[dev_nr].physical;
801 this_len = min(this_len, bytes_left);
804 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
805 if (ret != this_len) {
807 fprintf(stderr, "Error writing to "
808 "device %d\n", errno);
813 fprintf(stderr, "Short write\n");
820 BUG_ON(bytes_left < this_len);
822 bytes_left -= this_len;
824 total_write += this_len;
833 int set_extent_buffer_uptodate(struct extent_buffer *eb)
835 eb->flags |= EXTENT_UPTODATE;
839 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
840 struct extent_buffer *eb)
842 eb->flags &= ~EXTENT_UPTODATE;
846 int extent_buffer_uptodate(struct extent_buffer *eb)
851 if (eb->flags & EXTENT_UPTODATE)
856 int set_extent_buffer_dirty(struct extent_buffer *eb)
858 struct extent_io_tree *tree = eb->tree;
859 if (!(eb->flags & EXTENT_DIRTY)) {
860 eb->flags |= EXTENT_DIRTY;
861 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
862 extent_buffer_get(eb);
867 int clear_extent_buffer_dirty(struct extent_buffer *eb)
869 struct extent_io_tree *tree = eb->tree;
870 if (eb->flags & EXTENT_DIRTY) {
871 eb->flags &= ~EXTENT_DIRTY;
872 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
873 free_extent_buffer(eb);
878 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
879 unsigned long start, unsigned long len)
881 return memcmp(eb->data + start, ptrv, len);
884 void read_extent_buffer(struct extent_buffer *eb, void *dst,
885 unsigned long start, unsigned long len)
887 memcpy(dst, eb->data + start, len);
890 void write_extent_buffer(struct extent_buffer *eb, const void *src,
891 unsigned long start, unsigned long len)
893 memcpy(eb->data + start, src, len);
896 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
897 unsigned long dst_offset, unsigned long src_offset,
900 memcpy(dst->data + dst_offset, src->data + src_offset, len);
903 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
904 unsigned long src_offset, unsigned long len)
906 memmove(dst->data + dst_offset, dst->data + src_offset, len);
909 void memset_extent_buffer(struct extent_buffer *eb, char c,
910 unsigned long start, unsigned long len)
912 memset(eb->data + start, c, len);