2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #define _XOPEN_SOURCE 600
22 #include <sys/types.h>
26 #include "kerncompat.h"
27 #include "extent_io.h"
30 u64 cache_max = 1024 * 1024 * 32;
32 void extent_io_tree_init(struct extent_io_tree *tree)
34 cache_tree_init(&tree->state);
35 cache_tree_init(&tree->cache);
36 INIT_LIST_HEAD(&tree->lru);
40 static struct extent_state *alloc_extent_state(void)
42 struct extent_state *state;
44 state = malloc(sizeof(*state));
53 static void free_extent_state(struct extent_state *state)
56 BUG_ON(state->refs < 0);
61 void extent_io_tree_cleanup(struct extent_io_tree *tree)
63 struct extent_state *es;
64 struct extent_buffer *eb;
65 struct cache_extent *cache;
67 while(!list_empty(&tree->lru)) {
68 eb = list_entry(tree->lru.next, struct extent_buffer, lru);
70 fprintf(stderr, "extent buffer leak: "
71 "start %Lu len %u\n", eb->start, eb->len);
74 free_extent_buffer(eb);
77 cache = find_first_cache_extent(&tree->state, 0);
80 es = container_of(cache, struct extent_state, cache_node);
81 remove_cache_extent(&tree->state, &es->cache_node);
82 free_extent_state(es);
86 static inline void update_extent_state(struct extent_state *state)
88 state->cache_node.start = state->start;
89 state->cache_node.size = state->end + 1 - state->start;
93 * Utility function to look for merge candidates inside a given range.
94 * Any extents with matching state are merged together into a single
95 * extent in the tree. Extents with EXTENT_IO in their state field are
98 static int merge_state(struct extent_io_tree *tree,
99 struct extent_state *state)
101 struct extent_state *other;
102 struct cache_extent *other_node;
104 if (state->state & EXTENT_IOBITS)
107 other_node = prev_cache_extent(&state->cache_node);
109 other = container_of(other_node, struct extent_state,
111 if (other->end == state->start - 1 &&
112 other->state == state->state) {
113 state->start = other->start;
114 update_extent_state(state);
115 remove_cache_extent(&tree->state, &other->cache_node);
116 free_extent_state(other);
119 other_node = next_cache_extent(&state->cache_node);
121 other = container_of(other_node, struct extent_state,
123 if (other->start == state->end + 1 &&
124 other->state == state->state) {
125 other->start = state->start;
126 update_extent_state(other);
127 remove_cache_extent(&tree->state, &state->cache_node);
128 free_extent_state(state);
135 * insert an extent_state struct into the tree. 'bits' are set on the
136 * struct before it is inserted.
138 static int insert_state(struct extent_io_tree *tree,
139 struct extent_state *state, u64 start, u64 end,
145 state->state |= bits;
146 state->start = start;
148 update_extent_state(state);
149 ret = insert_existing_cache_extent(&tree->state, &state->cache_node);
151 merge_state(tree, state);
156 * split a given extent state struct in two, inserting the preallocated
157 * struct 'prealloc' as the newly created second half. 'split' indicates an
158 * offset inside 'orig' where it should be split.
160 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
161 struct extent_state *prealloc, u64 split)
164 prealloc->start = orig->start;
165 prealloc->end = split - 1;
166 prealloc->state = orig->state;
167 update_extent_state(prealloc);
169 update_extent_state(orig);
170 ret = insert_existing_cache_extent(&tree->state,
171 &prealloc->cache_node);
177 * clear some bits on a range in the tree.
179 static int clear_state_bit(struct extent_io_tree *tree,
180 struct extent_state *state, int bits)
182 int ret = state->state & bits;
184 state->state &= ~bits;
185 if (state->state == 0) {
186 remove_cache_extent(&tree->state, &state->cache_node);
187 free_extent_state(state);
189 merge_state(tree, state);
195 * set some bits on a range in the tree.
197 int clear_extent_bits(struct extent_io_tree *tree, u64 start,
198 u64 end, int bits, gfp_t mask)
200 struct extent_state *state;
201 struct extent_state *prealloc = NULL;
202 struct cache_extent *node;
207 prealloc = alloc_extent_state();
212 * this search will find the extents that end after
215 node = find_first_cache_extent(&tree->state, start);
218 state = container_of(node, struct extent_state, cache_node);
219 if (state->start > end)
223 * | ---- desired range ---- |
225 * | ------------- state -------------- |
227 * We need to split the extent we found, and may flip
228 * bits on second half.
230 * If the extent we found extends past our range, we
231 * just split and search again. It'll get split again
232 * the next time though.
234 * If the extent we found is inside our range, we clear
235 * the desired bit on it.
237 if (state->start < start) {
238 err = split_state(tree, state, prealloc, start);
239 BUG_ON(err == -EEXIST);
243 if (state->end <= end) {
244 start = state->end + 1;
245 set |= clear_state_bit(tree, state, bits);
247 start = state->start;
252 * | ---- desired range ---- |
254 * We need to split the extent, and clear the bit
257 if (state->start <= end && state->end > end) {
258 err = split_state(tree, state, prealloc, end + 1);
259 BUG_ON(err == -EEXIST);
261 set |= clear_state_bit(tree, prealloc, bits);
266 start = state->end + 1;
267 set |= clear_state_bit(tree, state, bits);
271 free_extent_state(prealloc);
281 * set some bits on a range in the tree.
283 int set_extent_bits(struct extent_io_tree *tree, u64 start,
284 u64 end, int bits, gfp_t mask)
286 struct extent_state *state;
287 struct extent_state *prealloc = NULL;
288 struct cache_extent *node;
294 prealloc = alloc_extent_state();
299 * this search will find the extents that end after
302 node = find_first_cache_extent(&tree->state, start);
304 err = insert_state(tree, prealloc, start, end, bits);
305 BUG_ON(err == -EEXIST);
310 state = container_of(node, struct extent_state, cache_node);
311 last_start = state->start;
312 last_end = state->end;
315 * | ---- desired range ---- |
318 * Just lock what we found and keep going
320 if (state->start == start && state->end <= end) {
321 set = state->state & bits;
322 state->state |= bits;
323 start = state->end + 1;
324 merge_state(tree, state);
328 * | ---- desired range ---- |
331 * | ------------- state -------------- |
333 * We need to split the extent we found, and may flip bits on
336 * If the extent we found extends past our
337 * range, we just split and search again. It'll get split
338 * again the next time though.
340 * If the extent we found is inside our range, we set the
343 if (state->start < start) {
344 set = state->state & bits;
345 err = split_state(tree, state, prealloc, start);
346 BUG_ON(err == -EEXIST);
350 if (state->end <= end) {
351 state->state |= bits;
352 start = state->end + 1;
353 merge_state(tree, state);
355 start = state->start;
360 * | ---- desired range ---- |
361 * | state | or | state |
363 * There's a hole, we need to insert something in it and
364 * ignore the extent we found.
366 if (state->start > start) {
368 if (end < last_start)
371 this_end = last_start -1;
372 err = insert_state(tree, prealloc, start, this_end,
374 BUG_ON(err == -EEXIST);
378 start = this_end + 1;
382 * | ---- desired range ---- |
383 * | ---------- state ---------- |
384 * We need to split the extent, and set the bit
387 set = state->state & bits;
388 err = split_state(tree, state, prealloc, end + 1);
389 BUG_ON(err == -EEXIST);
391 state->state |= bits;
392 merge_state(tree, prealloc);
396 free_extent_state(prealloc);
404 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
407 return set_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
410 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
413 return clear_extent_bits(tree, start, end, EXTENT_DIRTY, mask);
416 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
417 u64 *start_ret, u64 *end_ret, int bits)
419 struct cache_extent *node;
420 struct extent_state *state;
424 * this search will find all the extents that end after
427 node = find_first_cache_extent(&tree->state, start);
432 state = container_of(node, struct extent_state, cache_node);
433 if (state->end >= start && (state->state & bits)) {
434 *start_ret = state->start;
435 *end_ret = state->end;
439 node = next_cache_extent(node);
447 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
448 int bits, int filled)
450 struct extent_state *state = NULL;
451 struct cache_extent *node;
454 node = find_first_cache_extent(&tree->state, start);
455 while (node && start <= end) {
456 state = container_of(node, struct extent_state, cache_node);
458 if (filled && state->start > start) {
462 if (state->start > end)
464 if (state->state & bits) {
472 start = state->end + 1;
475 node = next_cache_extent(node);
485 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
487 struct cache_extent *node;
488 struct extent_state *state;
491 node = find_first_cache_extent(&tree->state, start);
496 state = container_of(node, struct extent_state, cache_node);
497 if (state->start != start) {
501 state->private = private;
506 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
508 struct cache_extent *node;
509 struct extent_state *state;
512 node = find_first_cache_extent(&tree->state, start);
517 state = container_of(node, struct extent_state, cache_node);
518 if (state->start != start) {
522 *private = state->private;
527 static int free_some_buffers(struct extent_io_tree *tree)
530 struct extent_buffer *eb;
531 struct list_head *node, *next;
533 if (tree->cache_size < cache_max)
535 list_for_each_safe(node, next, &tree->lru) {
536 eb = list_entry(node, struct extent_buffer, lru);
538 free_extent_buffer(eb);
539 if (tree->cache_size < cache_max)
548 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
549 u64 bytenr, u32 blocksize)
551 struct extent_buffer *eb;
554 eb = malloc(sizeof(struct extent_buffer) + blocksize);
566 eb->dev_bytenr = (u64)-1;
567 eb->cache_node.start = bytenr;
568 eb->cache_node.size = blocksize;
570 free_some_buffers(tree);
571 ret = insert_existing_cache_extent(&tree->cache, &eb->cache_node);
576 list_add_tail(&eb->lru, &tree->lru);
577 tree->cache_size += blocksize;
581 void free_extent_buffer(struct extent_buffer *eb)
587 BUG_ON(eb->refs < 0);
589 struct extent_io_tree *tree = eb->tree;
590 BUG_ON(eb->flags & EXTENT_DIRTY);
591 list_del_init(&eb->lru);
592 remove_cache_extent(&tree->cache, &eb->cache_node);
593 BUG_ON(tree->cache_size < eb->len);
594 tree->cache_size -= eb->len;
599 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
600 u64 bytenr, u32 blocksize)
602 struct extent_buffer *eb = NULL;
603 struct cache_extent *cache;
605 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
606 if (cache && cache->start == bytenr && cache->size == blocksize) {
607 eb = container_of(cache, struct extent_buffer, cache_node);
608 list_move_tail(&eb->lru, &tree->lru);
614 struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
617 struct extent_buffer *eb = NULL;
618 struct cache_extent *cache;
620 cache = find_first_cache_extent(&tree->cache, start);
622 eb = container_of(cache, struct extent_buffer, cache_node);
623 list_move_tail(&eb->lru, &tree->lru);
629 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
630 u64 bytenr, u32 blocksize)
632 struct extent_buffer *eb;
633 struct cache_extent *cache;
635 cache = find_cache_extent(&tree->cache, bytenr, blocksize);
636 if (cache && cache->start == bytenr && cache->size == blocksize) {
637 eb = container_of(cache, struct extent_buffer, cache_node);
638 list_move_tail(&eb->lru, &tree->lru);
642 eb = container_of(cache, struct extent_buffer,
644 BUG_ON(eb->refs != 1);
645 free_extent_buffer(eb);
647 eb = __alloc_extent_buffer(tree, bytenr, blocksize);
652 int read_extent_from_disk(struct extent_buffer *eb)
655 ret = pread(eb->fd, eb->data, eb->len, eb->dev_bytenr);
658 if (ret != eb->len) {
667 int write_extent_to_disk(struct extent_buffer *eb)
670 ret = pwrite(eb->fd, eb->data, eb->len, eb->dev_bytenr);
673 if (ret != eb->len) {
682 int set_extent_buffer_uptodate(struct extent_buffer *eb)
684 eb->flags |= EXTENT_UPTODATE;
688 int extent_buffer_uptodate(struct extent_buffer *eb)
690 if (eb->flags & EXTENT_UPTODATE)
695 int set_extent_buffer_dirty(struct extent_buffer *eb)
697 struct extent_io_tree *tree = eb->tree;
698 if (!(eb->flags & EXTENT_DIRTY)) {
699 eb->flags |= EXTENT_DIRTY;
700 set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
701 extent_buffer_get(eb);
706 int clear_extent_buffer_dirty(struct extent_buffer *eb)
708 struct extent_io_tree *tree = eb->tree;
709 if (eb->flags & EXTENT_DIRTY) {
710 eb->flags &= ~EXTENT_DIRTY;
711 clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1, 0);
712 free_extent_buffer(eb);
717 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
718 unsigned long start, unsigned long len)
720 return memcmp(eb->data + start, ptrv, len);
723 void read_extent_buffer(struct extent_buffer *eb, void *dst,
724 unsigned long start, unsigned long len)
726 memcpy(dst, eb->data + start, len);
729 void write_extent_buffer(struct extent_buffer *eb, const void *src,
730 unsigned long start, unsigned long len)
732 memcpy(eb->data + start, src, len);
735 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
736 unsigned long dst_offset, unsigned long src_offset,
739 memcpy(dst->data + dst_offset, src->data + src_offset, len);
742 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
743 unsigned long src_offset, unsigned long len)
745 memcpy(dst->data + dst_offset, dst->data + src_offset, len);
748 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
749 unsigned long src_offset, unsigned long len)
751 memmove(dst->data + dst_offset, dst->data + src_offset, len);
754 void memset_extent_buffer(struct extent_buffer *eb, char c,
755 unsigned long start, unsigned long len)
757 memset(eb->data + start, c, len);