3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
21 #include <sys/types.h>
26 #include "kerncompat.h"
27 #include "extent_io.h"
34 void extent_io_tree_init(struct extent_io_tree *tree)
36 cache_tree_init(&tree->state);
37 cache_tree_init(&tree->cache);
38 INIT_LIST_HEAD(&tree->lru);
40 tree->max_cache_size = (u64)total_memory() / 4;
43 void extent_io_tree_init_cache_max(struct extent_io_tree *tree,
46 extent_io_tree_init(tree);
47 tree->max_cache_size = max_cache_size;
50 static struct extent_state *alloc_extent_state(void)
52 struct extent_state *state;
54 state = malloc(sizeof(*state));
57 state->cache_node.objectid = 0;
64 static void btrfs_free_extent_state(struct extent_state *state)
67 BUG_ON(state->refs < 0);
72 static void free_extent_state_func(struct cache_extent *cache)
74 struct extent_state *es;
76 es = container_of(cache, struct extent_state, cache_node);
77 btrfs_free_extent_state(es);
80 static void free_extent_buffer_final(struct extent_buffer *eb);
81 void extent_io_tree_cleanup(struct extent_io_tree *tree)
83 struct extent_buffer *eb;
85 while(!list_empty(&tree->lru)) {
86 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
89 "extent buffer leak: start %llu len %u\n",
90 (unsigned long long)eb->start, eb->len);
91 free_extent_buffer_nocache(eb);
93 free_extent_buffer_final(eb);
97 cache_tree_free_extents(&tree->state, free_extent_state_func);
100 static inline void update_extent_state(struct extent_state *state)
102 state->cache_node.start = state->start;
103 state->cache_node.size = state->end + 1 - state->start;
107 * Utility function to look for merge candidates inside a given range.
108 * Any extents with matching state are merged together into a single
109 * extent in the tree. Extents with EXTENT_IO in their state field are
112 static int merge_state(struct extent_io_tree *tree,
113 struct extent_state *state)
115 struct extent_state *other;
116 struct cache_extent *other_node;
118 if (state->state & EXTENT_IOBITS)
121 other_node = prev_cache_extent(&state->cache_node);
123 other = container_of(other_node, struct extent_state,
125 if (other->end == state->start - 1 &&
126 other->state == state->state) {
127 state->start = other->start;
128 update_extent_state(state);
129 remove_cache_extent(&tree->state, &other->cache_node);
130 btrfs_free_extent_state(other);
133 other_node = next_cache_extent(&state->cache_node);
135 other = container_of(other_node, struct extent_state,
137 if (other->start == state->end + 1 &&
138 other->state == state->state) {
139 other->start = state->start;
140 update_extent_state(other);
141 remove_cache_extent(&tree->state, &state->cache_node);
142 btrfs_free_extent_state(state);
149 * insert an extent_state struct into the tree. 'bits' are set on the
150 * struct before it is inserted.
152 static int insert_state(struct extent_io_tree *tree,
153 struct extent_state *state, u64 start, u64 end,
159 state->state |= bits;
160 state->start = start;
162 update_extent_state(state);
163 ret = insert_cache_extent(&tree->state, &state->cache_node);
165 merge_state(tree, state);
170 * split a given extent state struct in two, inserting the preallocated
171 * struct 'prealloc' as the newly created second half. 'split' indicates an
172 * offset inside 'orig' where it should be split.
174 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
175 struct extent_state *prealloc, u64 split)
178 prealloc->start = orig->start;
179 prealloc->end = split - 1;
180 prealloc->state = orig->state;
181 update_extent_state(prealloc);
183 update_extent_state(orig);
184 ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
190 * clear some bits on a range in the tree.
192 static int clear_state_bit(struct extent_io_tree *tree,
193 struct extent_state *state, int bits)
195 int ret = state->state & bits;
197 state->state &= ~bits;
198 if (state->state == 0) {
199 remove_cache_extent(&tree->state, &state->cache_node);
200 btrfs_free_extent_state(state);
202 merge_state(tree, state);
208 * clear some bits on a range in the tree.
210 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
212 struct extent_state *state;
213 struct extent_state *prealloc = NULL;
214 struct cache_extent *node;
221 prealloc = alloc_extent_state();
227 * this search will find the extents that end after
230 node = search_cache_extent(&tree->state, start);
233 state = container_of(node, struct extent_state, cache_node);
234 if (state->start > end)
236 last_end = state->end;
239 * | ---- desired range ---- |
241 * | ------------- state -------------- |
243 * We need to split the extent we found, and may flip
244 * bits on second half.
246 * If the extent we found extends past our range, we
247 * just split and search again. It'll get split again
248 * the next time though.
250 * If the extent we found is inside our range, we clear
251 * the desired bit on it.
253 if (state->start < start) {
254 err = split_state(tree, state, prealloc, start);
255 BUG_ON(err == -EEXIST);
259 if (state->end <= end) {
260 set |= clear_state_bit(tree, state, bits);
261 if (last_end == (u64)-1)
263 start = last_end + 1;
265 start = state->start;
270 * | ---- desired range ---- |
272 * We need to split the extent, and clear the bit
275 if (state->start <= end && state->end > end) {
276 err = split_state(tree, state, prealloc, end + 1);
277 BUG_ON(err == -EEXIST);
279 set |= clear_state_bit(tree, prealloc, bits);
284 start = state->end + 1;
285 set |= clear_state_bit(tree, state, bits);
286 if (last_end == (u64)-1)
288 start = last_end + 1;
292 btrfs_free_extent_state(prealloc);
302 * set some bits on a range in the tree.
304 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
306 struct extent_state *state;
307 struct extent_state *prealloc = NULL;
308 struct cache_extent *node;
314 prealloc = alloc_extent_state();
320 * this search will find the extents that end after
323 node = search_cache_extent(&tree->state, start);
325 err = insert_state(tree, prealloc, start, end, bits);
326 BUG_ON(err == -EEXIST);
331 state = container_of(node, struct extent_state, cache_node);
332 last_start = state->start;
333 last_end = state->end;
336 * | ---- desired range ---- |
339 * Just lock what we found and keep going
341 if (state->start == start && state->end <= end) {
342 state->state |= bits;
343 merge_state(tree, state);
344 if (last_end == (u64)-1)
346 start = last_end + 1;
350 * | ---- desired range ---- |
353 * | ------------- state -------------- |
355 * We need to split the extent we found, and may flip bits on
358 * If the extent we found extends past our
359 * range, we just split and search again. It'll get split
360 * again the next time though.
362 * If the extent we found is inside our range, we set the
365 if (state->start < start) {
366 err = split_state(tree, state, prealloc, start);
367 BUG_ON(err == -EEXIST);
371 if (state->end <= end) {
372 state->state |= bits;
373 start = state->end + 1;
374 merge_state(tree, state);
375 if (last_end == (u64)-1)
377 start = last_end + 1;
379 start = state->start;
384 * | ---- desired range ---- |
385 * | state | or | state |
387 * There's a hole, we need to insert something in it and
388 * ignore the extent we found.
390 if (state->start > start) {
392 if (end < last_start)
395 this_end = last_start -1;
396 err = insert_state(tree, prealloc, start, this_end,
398 BUG_ON(err == -EEXIST);
402 start = this_end + 1;
406 * | ---- desired range ---- |
407 * | ---------- state ---------- |
408 * We need to split the extent, and set the bit
411 err = split_state(tree, state, prealloc, end + 1);
412 BUG_ON(err == -EEXIST);
414 state->state |= bits;
415 merge_state(tree, prealloc);
419 btrfs_free_extent_state(prealloc);
427 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
429 return set_extent_bits(tree, start, end, EXTENT_DIRTY);
432 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
434 return clear_extent_bits(tree, start, end, EXTENT_DIRTY);
437 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
438 u64 *start_ret, u64 *end_ret, int bits)
440 struct cache_extent *node;
441 struct extent_state *state;
445 * this search will find all the extents that end after
448 node = search_cache_extent(&tree->state, start);
453 state = container_of(node, struct extent_state, cache_node);
454 if (state->end >= start && (state->state & bits)) {
455 *start_ret = state->start;
456 *end_ret = state->end;
460 node = next_cache_extent(node);
468 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
469 int bits, int filled)
471 struct extent_state *state = NULL;
472 struct cache_extent *node;
475 node = search_cache_extent(&tree->state, start);
476 while (node && start <= end) {
477 state = container_of(node, struct extent_state, cache_node);
479 if (filled && state->start > start) {
483 if (state->start > end)
485 if (state->state & bits) {
493 start = state->end + 1;
496 node = next_cache_extent(node);
506 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
508 struct cache_extent *node;
509 struct extent_state *state;
512 node = search_cache_extent(&tree->state, start);
517 state = container_of(node, struct extent_state, cache_node);
518 if (state->start != start) {
522 state->xprivate = private;
527 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
529 struct cache_extent *node;
530 struct extent_state *state;
533 node = search_cache_extent(&tree->state, start);
538 state = container_of(node, struct extent_state, cache_node);
539 if (state->start != start) {
543 *private = state->xprivate;
548 static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *info,
549 u64 bytenr, u32 blocksize)
551 struct extent_buffer *eb;
553 eb = calloc(1, sizeof(struct extent_buffer) + blocksize);
562 eb->dev_bytenr = (u64)-1;
563 eb->cache_node.start = bytenr;
564 eb->cache_node.size = blocksize;
566 eb->tree = &info->extent_cache;
567 INIT_LIST_HEAD(&eb->recow);
568 INIT_LIST_HEAD(&eb->lru);
573 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
575 struct extent_buffer *new;
577 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
580 /* cloned eb is not linked into fs_info->extent_cache */
583 copy_extent_buffer(new, src, 0, 0, src->len);
584 new->flags |= EXTENT_BUFFER_DUMMY;
589 static void free_extent_buffer_final(struct extent_buffer *eb)
591 struct extent_io_tree *tree = eb->tree;
594 BUG_ON(tree && tree->cache_size < eb->len);
595 list_del_init(&eb->lru);
596 if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
597 remove_cache_extent(&tree->cache, &eb->cache_node);
598 tree->cache_size -= eb->len;
603 static void free_extent_buffer_internal(struct extent_buffer *eb, bool free_now)
605 if (!eb || IS_ERR(eb))
609 BUG_ON(eb->refs < 0);
611 BUG_ON(eb->flags & EXTENT_DIRTY);
612 list_del_init(&eb->recow);
613 if (eb->flags & EXTENT_BUFFER_DUMMY || free_now)
614 free_extent_buffer_final(eb);
618 void free_extent_buffer(struct extent_buffer *eb)
620 free_extent_buffer_internal(eb, 0);
623 void free_extent_buffer_nocache(struct extent_buffer *eb)
625 free_extent_buffer_internal(eb, 1);
628 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
629 u64 bytenr, u32 blocksize)
631 struct extent_buffer *eb = NULL;
632 struct cache_extent *cache;
634 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
635 if (cache && cache->start == bytenr &&
636 cache->size == blocksize) {
637 eb = container_of(cache, struct extent_buffer, cache_node);
638 list_move_tail(&eb->lru, &tree->lru);
644 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
647 struct extent_buffer *eb = NULL;
648 struct cache_extent *cache;
650 cache = search_cache_extent(&tree->cache, start);
652 eb = container_of(cache, struct extent_buffer, cache_node);
653 list_move_tail(&eb->lru, &tree->lru);
659 static void trim_extent_buffer_cache(struct extent_io_tree *tree)
661 struct extent_buffer *eb, *tmp;
663 list_for_each_entry_safe(eb, tmp, &tree->lru, lru) {
665 free_extent_buffer_final(eb);
666 if (tree->cache_size <= ((tree->max_cache_size * 9) / 10))
671 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
672 u64 bytenr, u32 blocksize)
674 struct extent_buffer *eb;
675 struct extent_io_tree *tree = &fs_info->extent_cache;
676 struct cache_extent *cache;
678 cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
679 if (cache && cache->start == bytenr &&
680 cache->size == blocksize) {
681 eb = container_of(cache, struct extent_buffer, cache_node);
682 list_move_tail(&eb->lru, &tree->lru);
688 eb = container_of(cache, struct extent_buffer,
690 free_extent_buffer(eb);
692 eb = __alloc_extent_buffer(fs_info, bytenr, blocksize);
695 ret = insert_cache_extent(&tree->cache, &eb->cache_node);
700 list_add_tail(&eb->lru, &tree->lru);
701 tree->cache_size += blocksize;
702 if (tree->cache_size >= tree->max_cache_size)
703 trim_extent_buffer_cache(tree);
708 int read_extent_from_disk(struct extent_buffer *eb,
709 unsigned long offset, unsigned long len)
712 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
726 int write_extent_to_disk(struct extent_buffer *eb)
729 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
732 if (ret != eb->len) {
741 int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
742 u64 bytes, int mirror)
744 struct btrfs_multi_bio *multi = NULL;
745 struct btrfs_device *device;
746 u64 bytes_left = bytes;
752 read_len = bytes_left;
753 ret = btrfs_map_block(info, READ, offset, &read_len, &multi,
756 fprintf(stderr, "Couldn't map the block %Lu\n",
760 device = multi->stripes[0].dev;
762 read_len = min(bytes_left, read_len);
763 if (device->fd <= 0) {
768 ret = pread(device->fd, buf + total_read, read_len,
769 multi->stripes[0].physical);
772 fprintf(stderr, "Error reading %Lu, %d\n", offset,
776 if (ret != read_len) {
777 fprintf(stderr, "Short read for %Lu, read %d, "
778 "read_len %Lu\n", offset, ret, read_len);
782 bytes_left -= read_len;
784 total_read += read_len;
790 int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
791 u64 bytes, int mirror)
793 struct btrfs_multi_bio *multi = NULL;
794 struct btrfs_device *device;
795 u64 bytes_left = bytes;
798 u64 *raid_map = NULL;
803 while (bytes_left > 0) {
804 this_len = bytes_left;
807 ret = btrfs_map_block(info, WRITE, offset, &this_len, &multi,
810 fprintf(stderr, "Couldn't map the block %Lu\n",
816 struct extent_buffer *eb;
817 u64 stripe_len = this_len;
819 this_len = min(this_len, bytes_left);
820 this_len = min(this_len, (u64)info->nodesize);
822 eb = malloc(sizeof(struct extent_buffer) + this_len);
824 fprintf(stderr, "cannot allocate memory for eb\n");
829 memset(eb, 0, sizeof(struct extent_buffer) + this_len);
833 memcpy(eb->data, buf + total_write, this_len);
834 ret = write_raid56_with_parity(info, eb, multi,
835 stripe_len, raid_map);
841 } else while (dev_nr < multi->num_stripes) {
842 device = multi->stripes[dev_nr].dev;
843 if (device->fd <= 0) {
848 dev_bytenr = multi->stripes[dev_nr].physical;
849 this_len = min(this_len, bytes_left);
852 ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
853 if (ret != this_len) {
855 fprintf(stderr, "Error writing to "
856 "device %d\n", errno);
861 fprintf(stderr, "Short write\n");
868 BUG_ON(bytes_left < this_len);
870 bytes_left -= this_len;
872 total_write += this_len;
884 int set_extent_buffer_dirty(struct extent_buffer *eb)
886 struct extent_io_tree *tree = eb->tree;
887 if (!(eb->flags & EXTENT_DIRTY)) {
888 eb->flags |= EXTENT_DIRTY;
889 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
890 extent_buffer_get(eb);
895 int clear_extent_buffer_dirty(struct extent_buffer *eb)
897 struct extent_io_tree *tree = eb->tree;
898 if (eb->flags & EXTENT_DIRTY) {
899 eb->flags &= ~EXTENT_DIRTY;
900 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
901 free_extent_buffer(eb);
906 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
907 unsigned long start, unsigned long len)
909 return memcmp(eb->data + start, ptrv, len);
912 void read_extent_buffer(struct extent_buffer *eb, void *dst,
913 unsigned long start, unsigned long len)
915 memcpy(dst, eb->data + start, len);
918 void write_extent_buffer(struct extent_buffer *eb, const void *src,
919 unsigned long start, unsigned long len)
921 memcpy(eb->data + start, src, len);
924 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
925 unsigned long dst_offset, unsigned long src_offset,
928 memcpy(dst->data + dst_offset, src->data + src_offset, len);
931 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
932 unsigned long src_offset, unsigned long len)
934 memmove(dst->data + dst_offset, dst->data + src_offset, len);
937 void memset_extent_buffer(struct extent_buffer *eb, char c,
938 unsigned long start, unsigned long len)
940 memset(eb->data + start, c, len);
943 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
946 return le_test_bit(nr, (u8 *)eb->data + start);