3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
21 #include <sys/types.h>
25 #include "kerncompat.h"
26 #include "extent_io.h"
32 void extent_io_tree_init(struct extent_io_tree *tree)
34 cache_tree_init(&tree->state);
35 cache_tree_init(&tree->cache);
36 INIT_LIST_HEAD(&tree->lru);
40 static struct extent_state *alloc_extent_state(void)
42 struct extent_state *state;
44 state = malloc(sizeof(*state));
47 state->cache_node.objectid = 0;
54 static void btrfs_free_extent_state(struct extent_state *state)
57 BUG_ON(state->refs < 0);
62 static void free_extent_state_func(struct cache_extent *cache)
64 struct extent_state *es;
66 es = container_of(cache, struct extent_state, cache_node);
67 btrfs_free_extent_state(es);
70 void extent_io_tree_cleanup(struct extent_io_tree *tree)
72 struct extent_buffer *eb;
74 while(!list_empty(&tree->lru)) {
75 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
76 fprintf(stderr, "extent buffer leak: "
77 "start %llu len %u\n",
78 (unsigned long long)eb->start, eb->len);
79 free_extent_buffer(eb);
82 cache_tree_free_extents(&tree->state, free_extent_state_func);
85 static inline void update_extent_state(struct extent_state *state)
87 state->cache_node.start = state->start;
88 state->cache_node.size = state->end + 1 - state->start;
92 * Utility function to look for merge candidates inside a given range.
93 * Any extents with matching state are merged together into a single
94 * extent in the tree. Extents with EXTENT_IO in their state field are
97 static int merge_state(struct extent_io_tree *tree,
98 struct extent_state *state)
100 struct extent_state *other;
101 struct cache_extent *other_node;
103 if (state->state & EXTENT_IOBITS)
106 other_node = prev_cache_extent(&state->cache_node);
108 other = container_of(other_node, struct extent_state,
110 if (other->end == state->start - 1 &&
111 other->state == state->state) {
112 state->start = other->start;
113 update_extent_state(state);
114 remove_cache_extent(&tree->state, &other->cache_node);
115 btrfs_free_extent_state(other);
118 other_node = next_cache_extent(&state->cache_node);
120 other = container_of(other_node, struct extent_state,
122 if (other->start == state->end + 1 &&
123 other->state == state->state) {
124 other->start = state->start;
125 update_extent_state(other);
126 remove_cache_extent(&tree->state, &state->cache_node);
127 btrfs_free_extent_state(state);
134 * insert an extent_state struct into the tree. 'bits' are set on the
135 * struct before it is inserted.
137 static int insert_state(struct extent_io_tree *tree,
138 struct extent_state *state, u64 start, u64 end,
144 state->state |= bits;
145 state->start = start;
147 update_extent_state(state);
148 ret = insert_cache_extent(&tree->state, &state->cache_node);
150 merge_state(tree, state);
155 * split a given extent state struct in two, inserting the preallocated
156 * struct 'prealloc' as the newly created second half. 'split' indicates an
157 * offset inside 'orig' where it should be split.
159 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
160 struct extent_state *prealloc, u64 split)
163 prealloc->start = orig->start;
164 prealloc->end = split - 1;
165 prealloc->state = orig->state;
166 update_extent_state(prealloc);
168 update_extent_state(orig);
169 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
175 * clear some bits on a range in the tree.
177 static int clear_state_bit(struct extent_io_tree *tree,
178 struct extent_state *state, int bits)
180 int ret = state->state & bits;
182 state->state &= ~bits;
183 if (state->state == 0) {
184 remove_cache_extent(&tree->state, &state->cache_node);
185 btrfs_free_extent_state(state);
187 merge_state(tree, state);
193 * clear some bits on a range in the tree.
195 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
197 struct extent_state *state;
198 struct extent_state *prealloc = NULL;
199 struct cache_extent *node;
206 prealloc = alloc_extent_state();
212 * this search will find the extents that end after
215 node = search_cache_extent(&tree->state, start);
218 state = container_of(node, struct extent_state, cache_node);
219 if (state->start > end)
221 last_end = state->end;
224 * | ---- desired range ---- |
226 * | ------------- state -------------- |
228 * We need to split the extent we found, and may flip
229 * bits on second half.
231 * If the extent we found extends past our range, we
232 * just split and search again. It'll get split again
233 * the next time though.
235 * If the extent we found is inside our range, we clear
236 * the desired bit on it.
238 if (state->start < start) {
239 err = split_state(tree, state, prealloc, start);
240 BUG_ON(err == -EEXIST);
244 if (state->end <= end) {
245 set |= clear_state_bit(tree, state, bits);
246 if (last_end == (u64)-1)
248 start = last_end + 1;
250 start = state->start;
255 * | ---- desired range ---- |
257 * We need to split the extent, and clear the bit
260 if (state->start <= end && state->end > end) {
261 err = split_state(tree, state, prealloc, end + 1);
262 BUG_ON(err == -EEXIST);
264 set |= clear_state_bit(tree, prealloc, bits);
269 start = state->end + 1;
270 set |= clear_state_bit(tree, state, bits);
271 if (last_end == (u64)-1)
273 start = last_end + 1;
277 btrfs_free_extent_state(prealloc);
287 * set some bits on a range in the tree.
289 int set_extent_bits(struct extent_io_tree *tree, u64 start,
290 u64 end, int bits, gfp_t mask)
292 struct extent_state *state;
293 struct extent_state *prealloc = NULL;
294 struct cache_extent *node;
300 prealloc = alloc_extent_state();
306 * this search will find the extents that end after
309 node = search_cache_extent(&tree->state, start);
311 err = insert_state(tree, prealloc, start, end, bits);
312 BUG_ON(err == -EEXIST);
317 state = container_of(node, struct extent_state, cache_node);
318 last_start = state->start;
319 last_end = state->end;
322 * | ---- desired range ---- |
325 * Just lock what we found and keep going
327 if (state->start == start && state->end <= end) {
328 state->state |= bits;
329 merge_state(tree, state);
330 if (last_end == (u64)-1)
332 start = last_end + 1;
336 * | ---- desired range ---- |
339 * | ------------- state -------------- |
341 * We need to split the extent we found, and may flip bits on
344 * If the extent we found extends past our
345 * range, we just split and search again. It'll get split
346 * again the next time though.
348 * If the extent we found is inside our range, we set the
351 if (state->start < start) {
352 err = split_state(tree, state, prealloc, start);
353 BUG_ON(err == -EEXIST);
357 if (state->end <= end) {
358 state->state |= bits;
359 start = state->end + 1;
360 merge_state(tree, state);
361 if (last_end == (u64)-1)
363 start = last_end + 1;
365 start = state->start;
370 * | ---- desired range ---- |
371 * | state | or | state |
373 * There's a hole, we need to insert something in it and
374 * ignore the extent we found.
376 if (state->start > start) {
378 if (end < last_start)
381 this_end = last_start -1;
382 err = insert_state(tree, prealloc, start, this_end,
384 BUG_ON(err == -EEXIST);
388 start = this_end + 1;
392 * | ---- desired range ---- |
393 * | ---------- state ---------- |
394 * We need to split the extent, and set the bit
397 err = split_state(tree, state, prealloc, end + 1);
398 BUG_ON(err == -EEXIST);
400 state->state |= bits;
401 merge_state(tree, prealloc);
405 btrfs_free_extent_state(prealloc);
413 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
416 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
419 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
422 return clear_extent_bits(tree, start, end, EXTENT_DIRTY);
425 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
426 u64 *start_ret, u64 *end_ret, int bits)
428 struct cache_extent *node;
429 struct extent_state *state;
433 * this search will find all the extents that end after
436 node = search_cache_extent(&tree->state, start);
441 state = container_of(node, struct extent_state, cache_node);
442 if (state->end >= start && (state->state & bits)) {
443 *start_ret = state->start;
444 *end_ret = state->end;
448 node = next_cache_extent(node);
456 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
457 int bits, int filled)
459 struct extent_state *state = NULL;
460 struct cache_extent *node;
463 node = search_cache_extent(&tree->state, start);
464 while (node && start <= end) {
465 state = container_of(node, struct extent_state, cache_node);
467 if (filled && state->start > start) {
471 if (state->start > end)
473 if (state->state & bits) {
481 start = state->end + 1;
484 node = next_cache_extent(node);
494 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
496 struct cache_extent *node;
497 struct extent_state *state;
500 node = search_cache_extent(&tree->state, start);
505 state = container_of(node, struct extent_state, cache_node);
506 if (state->start != start) {
510 state->xprivate = private;
515 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
517 struct cache_extent *node;
518 struct extent_state *state;
521 node = search_cache_extent(&tree->state, start);
526 state = container_of(node, struct extent_state, cache_node);
527 if (state->start != start) {
531 *private = state->xprivate;
536 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
537 u64 bytenr, u32 blocksize)
539 struct extent_buffer *eb;
541 eb = calloc(1, sizeof(struct extent_buffer) + blocksize);
551 eb->dev_bytenr = (u64)-1;
552 eb->cache_node.start = bytenr;
553 eb->cache_node.size = blocksize;
554 INIT_LIST_HEAD(&eb->recow);
559 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
561 struct extent_buffer *new;
563 new = __alloc_extent_buffer(NULL, src->start, src->len);
567 copy_extent_buffer(new, src, 0, 0, src->len);
568 new->flags |= EXTENT_BUFFER_DUMMY;
573 void free_extent_buffer(struct extent_buffer *eb)
575 if (!eb || IS_ERR(eb))
579 BUG_ON(eb->refs < 0);
581 struct extent_io_tree *tree = eb->tree;
582 BUG_ON(eb->flags & EXTENT_DIRTY);
583 list_del_init(&eb->lru);
584 list_del_init(&eb->recow);
585 if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
586 BUG_ON(tree->cache_size < eb->len);
587 remove_cache_extent(&tree->cache, &eb->cache_node);
588 tree->cache_size -= eb->len;
594 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
595 u64 bytenr, u32 blocksize)
597 struct extent_buffer *eb = NULL;
598 struct cache_extent *cache;
600 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
601 if (cache && cache->start == bytenr &&
602 cache->size == blocksize) {
603 eb = container_of(cache, struct extent_buffer, cache_node);
604 list_move_tail(&eb->lru, &tree->lru);
610 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
613 struct extent_buffer *eb = NULL;
614 struct cache_extent *cache;
616 cache = search_cache_extent(&tree->cache, start);
618 eb = container_of(cache, struct extent_buffer, cache_node);
619 list_move_tail(&eb->lru, &tree->lru);
625 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
626 u64 bytenr, u32 blocksize)
628 struct extent_buffer *eb;
629 struct cache_extent *cache;
631 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
632 if (cache && cache->start == bytenr &&
633 cache->size == blocksize) {
634 eb = container_of(cache, struct extent_buffer, cache_node);
635 list_move_tail(&eb->lru, &tree->lru);
641 eb = container_of(cache, struct extent_buffer,
643 free_extent_buffer(eb);
645 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
648 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
653 list_add_tail(&eb->lru, &tree->lru);
654 tree->cache_size += blocksize;
659 int read_extent_from_disk(struct extent_buffer *eb,
660 unsigned long offset, unsigned long len)
663 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
677 int write_extent_to_disk(struct extent_buffer *eb)
680 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
683 if (ret != eb->len) {
692 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
693 u64 bytes, int mirror)
695 struct btrfs_multi_bio *multi = NULL;
696 struct btrfs_device *device;
697 u64 bytes_left = bytes;
703 read_len = bytes_left;
704 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
705 &read_len, &multi, mirror, NULL);
707 fprintf(stderr, "Couldn't map the block %Lu\n",
711 device = multi->stripes[0].dev;
713 read_len = min(bytes_left, read_len);
714 if (device->fd <= 0) {
719 ret = pread(device->fd, buf + total_read, read_len,
720 multi->stripes[0].physical);
723 fprintf(stderr, "Error reading %Lu, %d\n", offset,
727 if (ret != read_len) {
728 fprintf(stderr, "Short read for %Lu, read %d, "
729 "read_len %Lu\n", offset, ret, read_len);
733 bytes_left -= read_len;
735 total_read += read_len;
741 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
742 u64 bytes, int mirror)
744 struct btrfs_multi_bio *multi = NULL;
745 struct btrfs_device *device;
746 u64 bytes_left = bytes;
749 u64 *raid_map = NULL;
754 while (bytes_left > 0) {
755 this_len = bytes_left;
758 ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
759 &this_len, &multi, mirror, &raid_map);
761 fprintf(stderr, "Couldn't map the block %Lu\n",
767 struct extent_buffer *eb;
768 u64 stripe_len = this_len;
770 this_len = min(this_len, bytes_left);
771 this_len = min(this_len, (u64)info->tree_root->nodesize);
773 eb = malloc(sizeof(struct extent_buffer) + this_len);
775 fprintf(stderr, "cannot allocate memory for eb\n");
780 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
784 memcpy(eb->data, buf + total_write, this_len);
785 ret = write_raid56_with_parity(info, eb, multi,
786 stripe_len, raid_map);
792 } else while (dev_nr < multi->num_stripes) {
793 device = multi->stripes[dev_nr].dev;
794 if (device->fd <= 0) {
799 dev_bytenr = multi->stripes[dev_nr].physical;
800 this_len = min(this_len, bytes_left);
803 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
804 if (ret != this_len) {
806 fprintf(stderr, "Error writing to "
807 "device %d\n", errno);
812 fprintf(stderr, "Short write\n");
819 BUG_ON(bytes_left < this_len);
821 bytes_left -= this_len;
823 total_write += this_len;
835 int set_extent_buffer_dirty(struct extent_buffer *eb)
837 struct extent_io_tree *tree = eb->tree;
838 if (!(eb->flags & EXTENT_DIRTY)) {
839 eb->flags |= EXTENT_DIRTY;
840 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
841 extent_buffer_get(eb);
846 int clear_extent_buffer_dirty(struct extent_buffer *eb)
848 struct extent_io_tree *tree = eb->tree;
849 if (eb->flags & EXTENT_DIRTY) {
850 eb->flags &= ~EXTENT_DIRTY;
851 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
852 free_extent_buffer(eb);
857 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
858 unsigned long start, unsigned long len)
860 return memcmp(eb->data + start, ptrv, len);
863 void read_extent_buffer(struct extent_buffer *eb, void *dst,
864 unsigned long start, unsigned long len)
866 memcpy(dst, eb->data + start, len);
869 void write_extent_buffer(struct extent_buffer *eb, const void *src,
870 unsigned long start, unsigned long len)
872 memcpy(eb->data + start, src, len);
875 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
876 unsigned long dst_offset, unsigned long src_offset,
879 memcpy(dst->data + dst_offset, src->data + src_offset, len);
882 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
883 unsigned long src_offset, unsigned long len)
885 memmove(dst->data + dst_offset, dst->data + src_offset, len);
888 void memset_extent_buffer(struct extent_buffer *eb, char c,
889 unsigned long start, unsigned long len)
891 memset(eb->data + start, c, len);
894 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
897 return le_test_bit(nr, (u8 *)eb->data + start);