3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
31 u64 cache_max = 1024 * 1024 * 32;
33 void extent_io_tree_init(struct extent_io_tree *tree)
35 cache_tree_init(&tree->state);
36 cache_tree_init(&tree->cache);
37 INIT_LIST_HEAD(&tree->lru);
41 static struct extent_state *alloc_extent_state(void)
43 struct extent_state *state;
45 state = malloc(sizeof(*state));
54 static void free_extent_state(struct extent_state *state)
57 BUG_ON(state->refs < 0);
62 void extent_io_tree_cleanup(struct extent_io_tree *tree)
64 struct extent_state *es;
65 struct extent_buffer *eb;
66 struct cache_extent *cache;
68 while(!list_empty(&tree->lru)) {
69 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
71 fprintf(stderr, "extent buffer leak: "
72 "start %llu len %u\n",
73 (unsigned long long)eb->start, eb->len);
76 free_extent_buffer(eb);
79 cache = find_first_cache_extent(&tree->state, 0);
82 es = container_of(cache, struct extent_state, cache_node);
83 remove_cache_extent(&tree->state, &es->cache_node);
84 free_extent_state(es);
88 static inline void update_extent_state(struct extent_state *state)
90 state->cache_node.start = state->start;
91 state->cache_node.size = state->end + 1 - state->start;
95 * Utility function to look for merge candidates inside a given range.
96 * Any extents with matching state are merged together into a single
97 * extent in the tree. Extents with EXTENT_IO in their state field are
100 static int merge_state(struct extent_io_tree *tree,
101 struct extent_state *state)
103 struct extent_state *other;
104 struct cache_extent *other_node;
106 if (state->state & EXTENT_IOBITS)
109 other_node = prev_cache_extent(&state->cache_node);
111 other = container_of(other_node, struct extent_state,
113 if (other->end == state->start - 1 &&
114 other->state == state->state) {
115 state->start = other->start;
116 update_extent_state(state);
117 remove_cache_extent(&tree->state, &other->cache_node);
118 free_extent_state(other);
121 other_node = next_cache_extent(&state->cache_node);
123 other = container_of(other_node, struct extent_state,
125 if (other->start == state->end + 1 &&
126 other->state == state->state) {
127 other->start = state->start;
128 update_extent_state(other);
129 remove_cache_extent(&tree->state, &state->cache_node);
130 free_extent_state(state);
137 * insert an extent_state struct into the tree. 'bits' are set on the
138 * struct before it is inserted.
140 static int insert_state(struct extent_io_tree *tree,
141 struct extent_state *state, u64 start, u64 end,
147 state->state |= bits;
148 state->start = start;
150 update_extent_state(state);
151 ret = insert_existing_cache_extent(&tree->state, &state->cache_node);
153 merge_state(tree, state);
158 * split a given extent state struct in two, inserting the preallocated
159 * struct 'prealloc' as the newly created second half. 'split' indicates an
160 * offset inside 'orig' where it should be split.
162 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
163 struct extent_state *prealloc, u64 split)
166 prealloc->start = orig->start;
167 prealloc->end = split - 1;
168 prealloc->state = orig->state;
169 update_extent_state(prealloc);
171 update_extent_state(orig);
172 ret = insert_existing_cache_extent(&tree->state,
173 &prealloc->cache_node);
179 * clear some bits on a range in the tree.
181 static int clear_state_bit(struct extent_io_tree *tree,
182 struct extent_state *state, int bits)
184 int ret = state->state & bits;
186 state->state &= ~bits;
187 if (state->state == 0) {
188 remove_cache_extent(&tree->state, &state->cache_node);
189 free_extent_state(state);
191 merge_state(tree, state);
197 * set some bits on a range in the tree.
199 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
200 u64 end, int bits, gfp_t mask)
202 struct extent_state *state;
203 struct extent_state *prealloc = NULL;
204 struct cache_extent *node;
210 prealloc = alloc_extent_state();
215 * this search will find the extents that end after
218 node = find_first_cache_extent(&tree->state, start);
221 state = container_of(node, struct extent_state, cache_node);
222 if (state->start > end)
224 last_end = state->end;
227 * | ---- desired range ---- |
229 * | ------------- state -------------- |
231 * We need to split the extent we found, and may flip
232 * bits on second half.
234 * If the extent we found extends past our range, we
235 * just split and search again. It'll get split again
236 * the next time though.
238 * If the extent we found is inside our range, we clear
239 * the desired bit on it.
241 if (state->start < start) {
242 err = split_state(tree, state, prealloc, start);
243 BUG_ON(err == -EEXIST);
247 if (state->end <= end) {
248 set |= clear_state_bit(tree, state, bits);
249 if (last_end == (u64)-1)
251 start = last_end + 1;
253 start = state->start;
258 * | ---- desired range ---- |
260 * We need to split the extent, and clear the bit
263 if (state->start <= end && state->end > end) {
264 err = split_state(tree, state, prealloc, end + 1);
265 BUG_ON(err == -EEXIST);
267 set |= clear_state_bit(tree, prealloc, bits);
272 start = state->end + 1;
273 set |= clear_state_bit(tree, state, bits);
274 if (last_end == (u64)-1)
276 start = last_end + 1;
280 free_extent_state(prealloc);
290 * set some bits on a range in the tree.
292 int set_extent_bits(struct extent_io_tree *tree, u64 start,
293 u64 end, int bits, gfp_t mask)
295 struct extent_state *state;
296 struct extent_state *prealloc = NULL;
297 struct cache_extent *node;
302 prealloc = alloc_extent_state();
307 * this search will find the extents that end after
310 node = find_first_cache_extent(&tree->state, start);
312 err = insert_state(tree, prealloc, start, end, bits);
313 BUG_ON(err == -EEXIST);
318 state = container_of(node, struct extent_state, cache_node);
319 last_start = state->start;
320 last_end = state->end;
323 * | ---- desired range ---- |
326 * Just lock what we found and keep going
328 if (state->start == start && state->end <= end) {
329 state->state |= bits;
330 merge_state(tree, state);
331 if (last_end == (u64)-1)
333 start = last_end + 1;
337 * | ---- desired range ---- |
340 * | ------------- state -------------- |
342 * We need to split the extent we found, and may flip bits on
345 * If the extent we found extends past our
346 * range, we just split and search again. It'll get split
347 * again the next time though.
349 * If the extent we found is inside our range, we set the
352 if (state->start < start) {
353 err = split_state(tree, state, prealloc, start);
354 BUG_ON(err == -EEXIST);
358 if (state->end <= end) {
359 state->state |= bits;
360 start = state->end + 1;
361 merge_state(tree, state);
362 if (last_end == (u64)-1)
364 start = last_end + 1;
366 start = state->start;
371 * | ---- desired range ---- |
372 * | state | or | state |
374 * There's a hole, we need to insert something in it and
375 * ignore the extent we found.
377 if (state->start > start) {
379 if (end < last_start)
382 this_end = last_start -1;
383 err = insert_state(tree, prealloc, start, this_end,
385 BUG_ON(err == -EEXIST);
389 start = this_end + 1;
393 * | ---- desired range ---- |
394 * | ---------- state ---------- |
395 * We need to split the extent, and set the bit
398 err = split_state(tree, state, prealloc, end + 1);
399 BUG_ON(err == -EEXIST);
401 state->state |= bits;
402 merge_state(tree, prealloc);
406 free_extent_state(prealloc);
414 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
417 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
420 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
423 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
426 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
427 u64 *start_ret, u64 *end_ret, int bits)
429 struct cache_extent *node;
430 struct extent_state *state;
434 * this search will find all the extents that end after
437 node = find_first_cache_extent(&tree->state, start);
442 state = container_of(node, struct extent_state, cache_node);
443 if (state->end >= start && (state->state & bits)) {
444 *start_ret = state->start;
445 *end_ret = state->end;
449 node = next_cache_extent(node);
457 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
458 int bits, int filled)
460 struct extent_state *state = NULL;
461 struct cache_extent *node;
464 node = find_first_cache_extent(&tree->state, start);
465 while (node && start <= end) {
466 state = container_of(node, struct extent_state, cache_node);
468 if (filled && state->start > start) {
472 if (state->start > end)
474 if (state->state & bits) {
482 start = state->end + 1;
485 node = next_cache_extent(node);
495 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
497 struct cache_extent *node;
498 struct extent_state *state;
501 node = find_first_cache_extent(&tree->state, start);
506 state = container_of(node, struct extent_state, cache_node);
507 if (state->start != start) {
511 state->private = private;
516 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
518 struct cache_extent *node;
519 struct extent_state *state;
522 node = find_first_cache_extent(&tree->state, start);
527 state = container_of(node, struct extent_state, cache_node);
528 if (state->start != start) {
532 *private = state->private;
537 static int free_some_buffers(struct extent_io_tree *tree)
540 struct extent_buffer *eb;
541 struct list_head *node, *next;
543 if (tree->cache_size < cache_max)
545 list_for_each_safe(node, next, &tree->lru) {
546 eb = list_entry(node, struct extent_buffer, lru);
548 free_extent_buffer(eb);
549 if (tree->cache_size < cache_max)
552 list_move_tail(&eb->lru, &tree->lru);
560 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
561 u64 bytenr, u32 blocksize)
563 struct extent_buffer *eb;
566 eb = malloc(sizeof(struct extent_buffer) + blocksize);
571 memset(eb, 0, sizeof(struct extent_buffer) + blocksize);
579 eb->dev_bytenr = (u64)-1;
580 eb->cache_node.start = bytenr;
581 eb->cache_node.size = blocksize;
583 free_some_buffers(tree);
584 ret = insert_existing_cache_extent(&tree->cache, &eb->cache_node);
589 list_add_tail(&eb->lru, &tree->lru);
590 tree->cache_size += blocksize;
594 void free_extent_buffer(struct extent_buffer *eb)
600 BUG_ON(eb->refs < 0);
602 struct extent_io_tree *tree = eb->tree;
603 BUG_ON(eb->flags & EXTENT_DIRTY);
604 list_del_init(&eb->lru);
605 remove_cache_extent(&tree->cache, &eb->cache_node);
606 BUG_ON(tree->cache_size < eb->len);
607 tree->cache_size -= eb->len;
612 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
613 u64 bytenr, u32 blocksize)
615 struct extent_buffer *eb = NULL;
616 struct cache_extent *cache;
618 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
619 if (cache && cache->start == bytenr && cache->size == blocksize) {
620 eb = container_of(cache, struct extent_buffer, cache_node);
621 list_move_tail(&eb->lru, &tree->lru);
627 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
630 struct extent_buffer *eb = NULL;
631 struct cache_extent *cache;
633 cache = find_first_cache_extent(&tree->cache, start);
635 eb = container_of(cache, struct extent_buffer, cache_node);
636 list_move_tail(&eb->lru, &tree->lru);
642 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
643 u64 bytenr, u32 blocksize)
645 struct extent_buffer *eb;
646 struct cache_extent *cache;
648 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
649 if (cache && cache->start == bytenr && cache->size == blocksize) {
650 eb = container_of(cache, struct extent_buffer, cache_node);
651 list_move_tail(&eb->lru, &tree->lru);
655 eb = container_of(cache, struct extent_buffer,
657 free_extent_buffer(eb);
659 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
664 int read_extent_from_disk(struct extent_buffer *eb)
667 ret = pread(eb->fd, eb->data, eb->len, eb->dev_bytenr);
670 if (ret != eb->len) {
679 int write_extent_to_disk(struct extent_buffer *eb)
682 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
685 if (ret != eb->len) {
694 int set_extent_buffer_uptodate(struct extent_buffer *eb)
696 eb->flags |= EXTENT_UPTODATE;
700 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
701 struct extent_buffer *eb)
703 eb->flags &= ~EXTENT_UPTODATE;
707 int extent_buffer_uptodate(struct extent_buffer *eb)
709 if (eb->flags & EXTENT_UPTODATE)
714 int set_extent_buffer_dirty(struct extent_buffer *eb)
716 struct extent_io_tree *tree = eb->tree;
717 if (!(eb->flags & EXTENT_DIRTY)) {
718 eb->flags |= EXTENT_DIRTY;
719 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
720 extent_buffer_get(eb);
725 int clear_extent_buffer_dirty(struct extent_buffer *eb)
727 struct extent_io_tree *tree = eb->tree;
728 if (eb->flags & EXTENT_DIRTY) {
729 eb->flags &= ~EXTENT_DIRTY;
730 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
731 free_extent_buffer(eb);
736 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
737 unsigned long start, unsigned long len)
739 return memcmp(eb->data + start, ptrv, len);
742 void read_extent_buffer(struct extent_buffer *eb, void *dst,
743 unsigned long start, unsigned long len)
745 memcpy(dst, eb->data + start, len);
748 void write_extent_buffer(struct extent_buffer *eb, const void *src,
749 unsigned long start, unsigned long len)
751 memcpy(eb->data + start, src, len);
754 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
755 unsigned long dst_offset, unsigned long src_offset,
758 memcpy(dst->data + dst_offset, src->data + src_offset, len);
761 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
762 unsigned long src_offset, unsigned long len)
764 memcpy(dst->data + dst_offset, dst->data + src_offset, len);
767 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
768 unsigned long src_offset, unsigned long len)
770 memmove(dst->data + dst_offset, dst->data + src_offset, len);
773 void memset_extent_buffer(struct extent_buffer *eb, char c,
774 unsigned long start, unsigned long len)
776 memset(eb->data + start, c, len);