3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
33 u64 cache_soft_max = 1024 * 1024 * 256;
34 u64 cache_hard_max = 1 * 1024 * 1024 * 1024;
36 void extent_io_tree_init(struct extent_io_tree *tree)
38 cache_tree_init(&tree->state);
39 cache_tree_init(&tree->cache);
40 INIT_LIST_HEAD(&tree->lru);
44 static struct extent_state *alloc_extent_state(void)
46 struct extent_state *state;
48 state = malloc(sizeof(*state));
51 state->cache_node.objectid = 0;
58 static void btrfs_free_extent_state(struct extent_state *state)
61 BUG_ON(state->refs < 0);
66 static void free_extent_state_func(struct cache_extent *cache)
68 struct extent_state *es;
70 es = container_of(cache, struct extent_state, cache_node);
71 btrfs_free_extent_state(es);
74 void extent_io_tree_cleanup(struct extent_io_tree *tree)
76 struct extent_buffer *eb;
78 while(!list_empty(&tree->lru)) {
79 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
81 fprintf(stderr, "extent buffer leak: "
82 "start %llu len %u\n",
83 (unsigned long long)eb->start, eb->len);
86 free_extent_buffer(eb);
89 cache_tree_free_extents(&tree->state, free_extent_state_func);
92 static inline void update_extent_state(struct extent_state *state)
94 state->cache_node.start = state->start;
95 state->cache_node.size = state->end + 1 - state->start;
99 * Utility function to look for merge candidates inside a given range.
100 * Any extents with matching state are merged together into a single
101 * extent in the tree. Extents with EXTENT_IO in their state field are
104 static int merge_state(struct extent_io_tree *tree,
105 struct extent_state *state)
107 struct extent_state *other;
108 struct cache_extent *other_node;
110 if (state->state & EXTENT_IOBITS)
113 other_node = prev_cache_extent(&state->cache_node);
115 other = container_of(other_node, struct extent_state,
117 if (other->end == state->start - 1 &&
118 other->state == state->state) {
119 state->start = other->start;
120 update_extent_state(state);
121 remove_cache_extent(&tree->state, &other->cache_node);
122 btrfs_free_extent_state(other);
125 other_node = next_cache_extent(&state->cache_node);
127 other = container_of(other_node, struct extent_state,
129 if (other->start == state->end + 1 &&
130 other->state == state->state) {
131 other->start = state->start;
132 update_extent_state(other);
133 remove_cache_extent(&tree->state, &state->cache_node);
134 btrfs_free_extent_state(state);
141 * insert an extent_state struct into the tree. 'bits' are set on the
142 * struct before it is inserted.
144 static int insert_state(struct extent_io_tree *tree,
145 struct extent_state *state, u64 start, u64 end,
151 state->state |= bits;
152 state->start = start;
154 update_extent_state(state);
155 ret = insert_cache_extent(&tree->state, &state->cache_node);
157 merge_state(tree, state);
162 * split a given extent state struct in two, inserting the preallocated
163 * struct 'prealloc' as the newly created second half. 'split' indicates an
164 * offset inside 'orig' where it should be split.
166 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
167 struct extent_state *prealloc, u64 split)
170 prealloc->start = orig->start;
171 prealloc->end = split - 1;
172 prealloc->state = orig->state;
173 update_extent_state(prealloc);
175 update_extent_state(orig);
176 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
182 * clear some bits on a range in the tree.
184 static int clear_state_bit(struct extent_io_tree *tree,
185 struct extent_state *state, int bits)
187 int ret = state->state & bits;
189 state->state &= ~bits;
190 if (state->state == 0) {
191 remove_cache_extent(&tree->state, &state->cache_node);
192 btrfs_free_extent_state(state);
194 merge_state(tree, state);
200 * set some bits on a range in the tree.
202 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
203 u64 end, int bits, gfp_t mask)
205 struct extent_state *state;
206 struct extent_state *prealloc = NULL;
207 struct cache_extent *node;
213 prealloc = alloc_extent_state();
218 * this search will find the extents that end after
221 node = search_cache_extent(&tree->state, start);
224 state = container_of(node, struct extent_state, cache_node);
225 if (state->start > end)
227 last_end = state->end;
230 * | ---- desired range ---- |
232 * | ------------- state -------------- |
234 * We need to split the extent we found, and may flip
235 * bits on second half.
237 * If the extent we found extends past our range, we
238 * just split and search again. It'll get split again
239 * the next time though.
241 * If the extent we found is inside our range, we clear
242 * the desired bit on it.
244 if (state->start < start) {
245 err = split_state(tree, state, prealloc, start);
246 BUG_ON(err == -EEXIST);
250 if (state->end <= end) {
251 set |= clear_state_bit(tree, state, bits);
252 if (last_end == (u64)-1)
254 start = last_end + 1;
256 start = state->start;
261 * | ---- desired range ---- |
263 * We need to split the extent, and clear the bit
266 if (state->start <= end && state->end > end) {
267 err = split_state(tree, state, prealloc, end + 1);
268 BUG_ON(err == -EEXIST);
270 set |= clear_state_bit(tree, prealloc, bits);
275 start = state->end + 1;
276 set |= clear_state_bit(tree, state, bits);
277 if (last_end == (u64)-1)
279 start = last_end + 1;
283 btrfs_free_extent_state(prealloc);
293 * set some bits on a range in the tree.
295 int set_extent_bits(struct extent_io_tree *tree, u64 start,
296 u64 end, int bits, gfp_t mask)
298 struct extent_state *state;
299 struct extent_state *prealloc = NULL;
300 struct cache_extent *node;
306 prealloc = alloc_extent_state();
312 * this search will find the extents that end after
315 node = search_cache_extent(&tree->state, start);
317 err = insert_state(tree, prealloc, start, end, bits);
318 BUG_ON(err == -EEXIST);
323 state = container_of(node, struct extent_state, cache_node);
324 last_start = state->start;
325 last_end = state->end;
328 * | ---- desired range ---- |
331 * Just lock what we found and keep going
333 if (state->start == start && state->end <= end) {
334 state->state |= bits;
335 merge_state(tree, state);
336 if (last_end == (u64)-1)
338 start = last_end + 1;
342 * | ---- desired range ---- |
345 * | ------------- state -------------- |
347 * We need to split the extent we found, and may flip bits on
350 * If the extent we found extends past our
351 * range, we just split and search again. It'll get split
352 * again the next time though.
354 * If the extent we found is inside our range, we set the
357 if (state->start < start) {
358 err = split_state(tree, state, prealloc, start);
359 BUG_ON(err == -EEXIST);
363 if (state->end <= end) {
364 state->state |= bits;
365 start = state->end + 1;
366 merge_state(tree, state);
367 if (last_end == (u64)-1)
369 start = last_end + 1;
371 start = state->start;
376 * | ---- desired range ---- |
377 * | state | or | state |
379 * There's a hole, we need to insert something in it and
380 * ignore the extent we found.
382 if (state->start > start) {
384 if (end < last_start)
387 this_end = last_start -1;
388 err = insert_state(tree, prealloc, start, this_end,
390 BUG_ON(err == -EEXIST);
394 start = this_end + 1;
398 * | ---- desired range ---- |
399 * | ---------- state ---------- |
400 * We need to split the extent, and set the bit
403 err = split_state(tree, state, prealloc, end + 1);
404 BUG_ON(err == -EEXIST);
406 state->state |= bits;
407 merge_state(tree, prealloc);
411 btrfs_free_extent_state(prealloc);
419 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
422 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
425 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
428 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
431 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
432 u64 *start_ret, u64 *end_ret, int bits)
434 struct cache_extent *node;
435 struct extent_state *state;
439 * this search will find all the extents that end after
442 node = search_cache_extent(&tree->state, start);
447 state = container_of(node, struct extent_state, cache_node);
448 if (state->end >= start && (state->state & bits)) {
449 *start_ret = state->start;
450 *end_ret = state->end;
454 node = next_cache_extent(node);
462 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
463 int bits, int filled)
465 struct extent_state *state = NULL;
466 struct cache_extent *node;
469 node = search_cache_extent(&tree->state, start);
470 while (node && start <= end) {
471 state = container_of(node, struct extent_state, cache_node);
473 if (filled && state->start > start) {
477 if (state->start > end)
479 if (state->state & bits) {
487 start = state->end + 1;
490 node = next_cache_extent(node);
500 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
502 struct cache_extent *node;
503 struct extent_state *state;
506 node = search_cache_extent(&tree->state, start);
511 state = container_of(node, struct extent_state, cache_node);
512 if (state->start != start) {
516 state->xprivate = private;
521 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
523 struct cache_extent *node;
524 struct extent_state *state;
527 node = search_cache_extent(&tree->state, start);
532 state = container_of(node, struct extent_state, cache_node);
533 if (state->start != start) {
537 *private = state->xprivate;
542 static int free_some_buffers(struct extent_io_tree *tree)
545 struct extent_buffer *eb;
546 struct list_head *node, *next;
548 if (tree->cache_size < cache_soft_max)
551 list_for_each_safe(node, next, &tree->lru) {
552 eb = list_entry(node, struct extent_buffer, lru);
554 free_extent_buffer(eb);
555 if (tree->cache_size < cache_hard_max)
558 list_move_tail(&eb->lru, &tree->lru);
560 if (nrscan++ > 64 && tree->cache_size < cache_hard_max)
566 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
567 u64 bytenr, u32 blocksize)
569 struct extent_buffer *eb;
572 eb = malloc(sizeof(struct extent_buffer) + blocksize);
577 memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
585 eb->dev_bytenr = (u64)-1;
586 eb->cache_node.start = bytenr;
587 eb->cache_node.size = blocksize;
589 free_some_buffers(tree);
590 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
595 list_add_tail(&eb->lru, &tree->lru);
596 tree->cache_size += blocksize;
600 void free_extent_buffer(struct extent_buffer *eb)
606 BUG_ON(eb->refs < 0);
608 struct extent_io_tree *tree = eb->tree;
609 BUG_ON(eb->flags & EXTENT_DIRTY);
610 list_del_init(&eb->lru);
611 remove_cache_extent(&tree->cache, &eb->cache_node);
612 BUG_ON(tree->cache_size < eb->len);
613 tree->cache_size -= eb->len;
618 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
619 u64 bytenr, u32 blocksize)
621 struct extent_buffer *eb = NULL;
622 struct cache_extent *cache;
624 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
625 if (cache && cache->start == bytenr &&
626 cache->size == blocksize) {
627 eb = container_of(cache, struct extent_buffer, cache_node);
628 list_move_tail(&eb->lru, &tree->lru);
634 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
637 struct extent_buffer *eb = NULL;
638 struct cache_extent *cache;
640 cache = search_cache_extent(&tree->cache, start);
642 eb = container_of(cache, struct extent_buffer, cache_node);
643 list_move_tail(&eb->lru, &tree->lru);
649 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
650 u64 bytenr, u32 blocksize)
652 struct extent_buffer *eb;
653 struct cache_extent *cache;
655 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
656 if (cache && cache->start == bytenr &&
657 cache->size == blocksize) {
658 eb = container_of(cache, struct extent_buffer, cache_node);
659 list_move_tail(&eb->lru, &tree->lru);
663 eb = container_of(cache, struct extent_buffer,
665 free_extent_buffer(eb);
667 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
672 int read_extent_from_disk(struct extent_buffer *eb,
673 unsigned long offset, unsigned long len)
676 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
688 int write_extent_to_disk(struct extent_buffer *eb)
691 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
694 if (ret != eb->len) {
703 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
704 u64 bytes, int mirror)
706 struct btrfs_multi_bio *multi = NULL;
707 struct btrfs_device *device;
708 u64 bytes_left = bytes;
714 read_len = bytes_left;
715 ret = btrfs_map_block(&info->mapping_tree, READ, offset,
716 &read_len, &multi, mirror, NULL);
718 fprintf(stderr, "Couldn't map the block %Lu\n",
722 device = multi->stripes[0].dev;
724 read_len = min(bytes_left, read_len);
725 if (device->fd == 0) {
730 ret = pread(device->fd, buf + total_read, read_len,
731 multi->stripes[0].physical);
734 fprintf(stderr, "Error reading %Lu, %d\n", offset,
738 if (ret != read_len) {
739 fprintf(stderr, "Short read for %Lu, read %d, "
740 "read_len %Lu\n", offset, ret, read_len);
744 bytes_left -= read_len;
746 total_read += read_len;
752 int set_extent_buffer_uptodate(struct extent_buffer *eb)
754 eb->flags |= EXTENT_UPTODATE;
758 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
759 struct extent_buffer *eb)
761 eb->flags &= ~EXTENT_UPTODATE;
765 int extent_buffer_uptodate(struct extent_buffer *eb)
770 if (eb->flags & EXTENT_UPTODATE)
775 int set_extent_buffer_dirty(struct extent_buffer *eb)
777 struct extent_io_tree *tree = eb->tree;
778 if (!(eb->flags & EXTENT_DIRTY)) {
779 eb->flags |= EXTENT_DIRTY;
780 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
781 extent_buffer_get(eb);
786 int clear_extent_buffer_dirty(struct extent_buffer *eb)
788 struct extent_io_tree *tree = eb->tree;
789 if (eb->flags & EXTENT_DIRTY) {
790 eb->flags &= ~EXTENT_DIRTY;
791 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
792 free_extent_buffer(eb);
797 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
798 unsigned long start, unsigned long len)
800 return memcmp(eb->data + start, ptrv, len);
803 void read_extent_buffer(struct extent_buffer *eb, void *dst,
804 unsigned long start, unsigned long len)
806 memcpy(dst, eb->data + start, len);
809 void write_extent_buffer(struct extent_buffer *eb, const void *src,
810 unsigned long start, unsigned long len)
812 memcpy(eb->data + start, src, len);
815 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
816 unsigned long dst_offset, unsigned long src_offset,
819 memcpy(dst->data + dst_offset, src->data + src_offset, len);
822 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
823 unsigned long src_offset, unsigned long len)
825 memcpy(dst->data + dst_offset, dst->data + src_offset, len);
828 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
829 unsigned long src_offset, unsigned long len)
831 memmove(dst->data + dst_offset, dst->data + src_offset, len);
834 void memset_extent_buffer(struct extent_buffer *eb, char c,
835 unsigned long start, unsigned long len)
837 memset(eb->data + start, c, len);