3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
31 u64 cache_soft_max = 1024 * 1024 * 256;
32 u64 cache_hard_max = 1 * 1024 * 1024 * 1024;
34 void extent_io_tree_init(struct extent_io_tree *tree)
36 cache_tree_init(&tree->state);
37 cache_tree_init(&tree->cache);
38 INIT_LIST_HEAD(&tree->lru);
42 static struct extent_state *alloc_extent_state(void)
44 struct extent_state *state;
46 state = malloc(sizeof(*state));
55 static void free_extent_state(struct extent_state *state)
58 BUG_ON(state->refs < 0);
63 void extent_io_tree_cleanup(struct extent_io_tree *tree)
65 struct extent_state *es;
66 struct extent_buffer *eb;
67 struct cache_extent *cache;
69 while(!list_empty(&tree->lru)) {
70 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
72 fprintf(stderr, "extent buffer leak: "
73 "start %llu len %u\n",
74 (unsigned long long)eb->start, eb->len);
77 free_extent_buffer(eb);
80 cache = find_first_cache_extent(&tree->state, 0);
83 es = container_of(cache, struct extent_state, cache_node);
84 remove_cache_extent(&tree->state, &es->cache_node);
85 free_extent_state(es);
89 static inline void update_extent_state(struct extent_state *state)
91 state->cache_node.start = state->start;
92 state->cache_node.size = state->end + 1 - state->start;
96 * Utility function to look for merge candidates inside a given range.
97 * Any extents with matching state are merged together into a single
98 * extent in the tree. Extents with EXTENT_IO in their state field are
101 static int merge_state(struct extent_io_tree *tree,
102 struct extent_state *state)
104 struct extent_state *other;
105 struct cache_extent *other_node;
107 if (state->state & EXTENT_IOBITS)
110 other_node = prev_cache_extent(&state->cache_node);
112 other = container_of(other_node, struct extent_state,
114 if (other->end == state->start - 1 &&
115 other->state == state->state) {
116 state->start = other->start;
117 update_extent_state(state);
118 remove_cache_extent(&tree->state, &other->cache_node);
119 free_extent_state(other);
122 other_node = next_cache_extent(&state->cache_node);
124 other = container_of(other_node, struct extent_state,
126 if (other->start == state->end + 1 &&
127 other->state == state->state) {
128 other->start = state->start;
129 update_extent_state(other);
130 remove_cache_extent(&tree->state, &state->cache_node);
131 free_extent_state(state);
138 * insert an extent_state struct into the tree. 'bits' are set on the
139 * struct before it is inserted.
141 static int insert_state(struct extent_io_tree *tree,
142 struct extent_state *state, u64 start, u64 end,
148 state->state |= bits;
149 state->start = start;
151 update_extent_state(state);
152 ret = insert_existing_cache_extent(&tree->state, &state->cache_node);
154 merge_state(tree, state);
159 * split a given extent state struct in two, inserting the preallocated
160 * struct 'prealloc' as the newly created second half. 'split' indicates an
161 * offset inside 'orig' where it should be split.
163 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
164 struct extent_state *prealloc, u64 split)
167 prealloc->start = orig->start;
168 prealloc->end = split - 1;
169 prealloc->state = orig->state;
170 update_extent_state(prealloc);
172 update_extent_state(orig);
173 ret = insert_existing_cache_extent(&tree->state,
174 &prealloc->cache_node);
180 * clear some bits on a range in the tree.
182 static int clear_state_bit(struct extent_io_tree *tree,
183 struct extent_state *state, int bits)
185 int ret = state->state & bits;
187 state->state &= ~bits;
188 if (state->state == 0) {
189 remove_cache_extent(&tree->state, &state->cache_node);
190 free_extent_state(state);
192 merge_state(tree, state);
198 * set some bits on a range in the tree.
200 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
201 u64 end, int bits, gfp_t mask)
203 struct extent_state *state;
204 struct extent_state *prealloc = NULL;
205 struct cache_extent *node;
211 prealloc = alloc_extent_state();
216 * this search will find the extents that end after
219 node = find_first_cache_extent(&tree->state, start);
222 state = container_of(node, struct extent_state, cache_node);
223 if (state->start > end)
225 last_end = state->end;
228 * | ---- desired range ---- |
230 * | ------------- state -------------- |
232 * We need to split the extent we found, and may flip
233 * bits on second half.
235 * If the extent we found extends past our range, we
236 * just split and search again. It'll get split again
237 * the next time though.
239 * If the extent we found is inside our range, we clear
240 * the desired bit on it.
242 if (state->start < start) {
243 err = split_state(tree, state, prealloc, start);
244 BUG_ON(err == -EEXIST);
248 if (state->end <= end) {
249 set |= clear_state_bit(tree, state, bits);
250 if (last_end == (u64)-1)
252 start = last_end + 1;
254 start = state->start;
259 * | ---- desired range ---- |
261 * We need to split the extent, and clear the bit
264 if (state->start <= end && state->end > end) {
265 err = split_state(tree, state, prealloc, end + 1);
266 BUG_ON(err == -EEXIST);
268 set |= clear_state_bit(tree, prealloc, bits);
273 start = state->end + 1;
274 set |= clear_state_bit(tree, state, bits);
275 if (last_end == (u64)-1)
277 start = last_end + 1;
281 free_extent_state(prealloc);
291 * set some bits on a range in the tree.
293 int set_extent_bits(struct extent_io_tree *tree, u64 start,
294 u64 end, int bits, gfp_t mask)
296 struct extent_state *state;
297 struct extent_state *prealloc = NULL;
298 struct cache_extent *node;
304 prealloc = alloc_extent_state();
310 * this search will find the extents that end after
313 node = find_first_cache_extent(&tree->state, start);
315 err = insert_state(tree, prealloc, start, end, bits);
316 BUG_ON(err == -EEXIST);
321 state = container_of(node, struct extent_state, cache_node);
322 last_start = state->start;
323 last_end = state->end;
326 * | ---- desired range ---- |
329 * Just lock what we found and keep going
331 if (state->start == start && state->end <= end) {
332 state->state |= bits;
333 merge_state(tree, state);
334 if (last_end == (u64)-1)
336 start = last_end + 1;
340 * | ---- desired range ---- |
343 * | ------------- state -------------- |
345 * We need to split the extent we found, and may flip bits on
348 * If the extent we found extends past our
349 * range, we just split and search again. It'll get split
350 * again the next time though.
352 * If the extent we found is inside our range, we set the
355 if (state->start < start) {
356 err = split_state(tree, state, prealloc, start);
357 BUG_ON(err == -EEXIST);
361 if (state->end <= end) {
362 state->state |= bits;
363 start = state->end + 1;
364 merge_state(tree, state);
365 if (last_end == (u64)-1)
367 start = last_end + 1;
369 start = state->start;
374 * | ---- desired range ---- |
375 * | state | or | state |
377 * There's a hole, we need to insert something in it and
378 * ignore the extent we found.
380 if (state->start > start) {
382 if (end < last_start)
385 this_end = last_start -1;
386 err = insert_state(tree, prealloc, start, this_end,
388 BUG_ON(err == -EEXIST);
392 start = this_end + 1;
396 * | ---- desired range ---- |
397 * | ---------- state ---------- |
398 * We need to split the extent, and set the bit
401 err = split_state(tree, state, prealloc, end + 1);
402 BUG_ON(err == -EEXIST);
404 state->state |= bits;
405 merge_state(tree, prealloc);
409 free_extent_state(prealloc);
417 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
420 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
423 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
426 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
429 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
430 u64 *start_ret, u64 *end_ret, int bits)
432 struct cache_extent *node;
433 struct extent_state *state;
437 * this search will find all the extents that end after
440 node = find_first_cache_extent(&tree->state, start);
445 state = container_of(node, struct extent_state, cache_node);
446 if (state->end >= start && (state->state & bits)) {
447 *start_ret = state->start;
448 *end_ret = state->end;
452 node = next_cache_extent(node);
460 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
461 int bits, int filled)
463 struct extent_state *state = NULL;
464 struct cache_extent *node;
467 node = find_first_cache_extent(&tree->state, start);
468 while (node && start <= end) {
469 state = container_of(node, struct extent_state, cache_node);
471 if (filled && state->start > start) {
475 if (state->start > end)
477 if (state->state & bits) {
485 start = state->end + 1;
488 node = next_cache_extent(node);
498 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
500 struct cache_extent *node;
501 struct extent_state *state;
504 node = find_first_cache_extent(&tree->state, start);
509 state = container_of(node, struct extent_state, cache_node);
510 if (state->start != start) {
514 state->xprivate = private;
519 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
521 struct cache_extent *node;
522 struct extent_state *state;
525 node = find_first_cache_extent(&tree->state, start);
530 state = container_of(node, struct extent_state, cache_node);
531 if (state->start != start) {
535 *private = state->xprivate;
540 static int free_some_buffers(struct extent_io_tree *tree)
543 struct extent_buffer *eb;
544 struct list_head *node, *next;
546 if (tree->cache_size < cache_soft_max)
549 list_for_each_safe(node, next, &tree->lru) {
550 eb = list_entry(node, struct extent_buffer, lru);
552 free_extent_buffer(eb);
553 if (tree->cache_size < cache_hard_max)
556 list_move_tail(&eb->lru, &tree->lru);
558 if (nrscan++ > 64 && tree->cache_size < cache_hard_max)
564 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
565 u64 bytenr, u32 blocksize)
567 struct extent_buffer *eb;
570 eb = malloc(sizeof(struct extent_buffer) + blocksize);
575 memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
583 eb->dev_bytenr = (u64)-1;
584 eb->cache_node.start = bytenr;
585 eb->cache_node.size = blocksize;
587 free_some_buffers(tree);
588 ret = insert_existing_cache_extent(&tree->cache, &eb->cache_node);
593 list_add_tail(&eb->lru, &tree->lru);
594 tree->cache_size += blocksize;
598 void free_extent_buffer(struct extent_buffer *eb)
604 BUG_ON(eb->refs < 0);
606 struct extent_io_tree *tree = eb->tree;
607 BUG_ON(eb->flags & EXTENT_DIRTY);
608 list_del_init(&eb->lru);
609 remove_cache_extent(&tree->cache, &eb->cache_node);
610 BUG_ON(tree->cache_size < eb->len);
611 tree->cache_size -= eb->len;
616 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
617 u64 bytenr, u32 blocksize)
619 struct extent_buffer *eb = NULL;
620 struct cache_extent *cache;
622 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
623 if (cache && cache->start == bytenr && cache->size == blocksize) {
624 eb = container_of(cache, struct extent_buffer, cache_node);
625 list_move_tail(&eb->lru, &tree->lru);
631 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
634 struct extent_buffer *eb = NULL;
635 struct cache_extent *cache;
637 cache = find_first_cache_extent(&tree->cache, start);
639 eb = container_of(cache, struct extent_buffer, cache_node);
640 list_move_tail(&eb->lru, &tree->lru);
646 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
647 u64 bytenr, u32 blocksize)
649 struct extent_buffer *eb;
650 struct cache_extent *cache;
652 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
653 if (cache && cache->start == bytenr && cache->size == blocksize) {
654 eb = container_of(cache, struct extent_buffer, cache_node);
655 list_move_tail(&eb->lru, &tree->lru);
659 eb = container_of(cache, struct extent_buffer,
661 free_extent_buffer(eb);
663 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
668 int read_extent_from_disk(struct extent_buffer *eb,
669 unsigned long offset, unsigned long len)
672 ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr);
684 int write_extent_to_disk(struct extent_buffer *eb)
687 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
690 if (ret != eb->len) {
699 int set_extent_buffer_uptodate(struct extent_buffer *eb)
701 eb->flags |= EXTENT_UPTODATE;
705 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
706 struct extent_buffer *eb)
708 eb->flags &= ~EXTENT_UPTODATE;
712 int extent_buffer_uptodate(struct extent_buffer *eb)
717 if (eb->flags & EXTENT_UPTODATE)
722 int set_extent_buffer_dirty(struct extent_buffer *eb)
724 struct extent_io_tree *tree = eb->tree;
725 if (!(eb->flags & EXTENT_DIRTY)) {
726 eb->flags |= EXTENT_DIRTY;
727 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
728 extent_buffer_get(eb);
733 int clear_extent_buffer_dirty(struct extent_buffer *eb)
735 struct extent_io_tree *tree = eb->tree;
736 if (eb->flags & EXTENT_DIRTY) {
737 eb->flags &= ~EXTENT_DIRTY;
738 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
739 free_extent_buffer(eb);
744 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
745 unsigned long start, unsigned long len)
747 return memcmp(eb->data + start, ptrv, len);
750 void read_extent_buffer(struct extent_buffer *eb, void *dst,
751 unsigned long start, unsigned long len)
753 memcpy(dst, eb->data + start, len);
756 void write_extent_buffer(struct extent_buffer *eb, const void *src,
757 unsigned long start, unsigned long len)
759 memcpy(eb->data + start, src, len);
762 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
763 unsigned long dst_offset, unsigned long src_offset,
766 memcpy(dst->data + dst_offset, src->data + src_offset, len);
769 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
770 unsigned long src_offset, unsigned long len)
772 memcpy(dst->data + dst_offset, dst->data + src_offset, len);
775 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
776 unsigned long src_offset, unsigned long len)
778 memmove(dst->data + dst_offset, dst->data + src_offset, len);
781 void memset_extent_buffer(struct extent_buffer *eb, char c,
782 unsigned long start, unsigned long len)
784 memset(eb->data + start, c, len);