4772f3a8e7e59ef00856dc44364c2c30723160ce
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30
31 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35                            struct btrfs_free_space *info);
36 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
37                               struct btrfs_free_space *info);
38
39 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
40                                                struct btrfs_path *path,
41                                                u64 offset)
42 {
43         struct btrfs_key key;
44         struct btrfs_key location;
45         struct btrfs_disk_key disk_key;
46         struct btrfs_free_space_header *header;
47         struct extent_buffer *leaf;
48         struct inode *inode = NULL;
49         int ret;
50
51         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
52         key.offset = offset;
53         key.type = 0;
54
55         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
56         if (ret < 0)
57                 return ERR_PTR(ret);
58         if (ret > 0) {
59                 btrfs_release_path(path);
60                 return ERR_PTR(-ENOENT);
61         }
62
63         leaf = path->nodes[0];
64         header = btrfs_item_ptr(leaf, path->slots[0],
65                                 struct btrfs_free_space_header);
66         btrfs_free_space_key(leaf, header, &disk_key);
67         btrfs_disk_key_to_cpu(&location, &disk_key);
68         btrfs_release_path(path);
69
70         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
71         if (!inode)
72                 return ERR_PTR(-ENOENT);
73         if (IS_ERR(inode))
74                 return inode;
75         if (is_bad_inode(inode)) {
76                 iput(inode);
77                 return ERR_PTR(-ENOENT);
78         }
79
80         mapping_set_gfp_mask(inode->i_mapping,
81                         mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
82
83         return inode;
84 }
85
86 struct inode *lookup_free_space_inode(struct btrfs_root *root,
87                                       struct btrfs_block_group_cache
88                                       *block_group, struct btrfs_path *path)
89 {
90         struct inode *inode = NULL;
91         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
92
93         spin_lock(&block_group->lock);
94         if (block_group->inode)
95                 inode = igrab(block_group->inode);
96         spin_unlock(&block_group->lock);
97         if (inode)
98                 return inode;
99
100         inode = __lookup_free_space_inode(root, path,
101                                           block_group->key.objectid);
102         if (IS_ERR(inode))
103                 return inode;
104
105         spin_lock(&block_group->lock);
106         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
107                 btrfs_info(root->fs_info,
108                         "Old style space inode found, converting.");
109                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
110                         BTRFS_INODE_NODATACOW;
111                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
112         }
113
114         if (!block_group->iref) {
115                 block_group->inode = igrab(inode);
116                 block_group->iref = 1;
117         }
118         spin_unlock(&block_group->lock);
119
120         return inode;
121 }
122
123 static int __create_free_space_inode(struct btrfs_root *root,
124                                      struct btrfs_trans_handle *trans,
125                                      struct btrfs_path *path,
126                                      u64 ino, u64 offset)
127 {
128         struct btrfs_key key;
129         struct btrfs_disk_key disk_key;
130         struct btrfs_free_space_header *header;
131         struct btrfs_inode_item *inode_item;
132         struct extent_buffer *leaf;
133         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
134         int ret;
135
136         ret = btrfs_insert_empty_inode(trans, root, path, ino);
137         if (ret)
138                 return ret;
139
140         /* We inline crc's for the free disk space cache */
141         if (ino != BTRFS_FREE_INO_OBJECTID)
142                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
143
144         leaf = path->nodes[0];
145         inode_item = btrfs_item_ptr(leaf, path->slots[0],
146                                     struct btrfs_inode_item);
147         btrfs_item_key(leaf, &disk_key, path->slots[0]);
148         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
149                              sizeof(*inode_item));
150         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
151         btrfs_set_inode_size(leaf, inode_item, 0);
152         btrfs_set_inode_nbytes(leaf, inode_item, 0);
153         btrfs_set_inode_uid(leaf, inode_item, 0);
154         btrfs_set_inode_gid(leaf, inode_item, 0);
155         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
156         btrfs_set_inode_flags(leaf, inode_item, flags);
157         btrfs_set_inode_nlink(leaf, inode_item, 1);
158         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
159         btrfs_set_inode_block_group(leaf, inode_item, offset);
160         btrfs_mark_buffer_dirty(leaf);
161         btrfs_release_path(path);
162
163         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
164         key.offset = offset;
165         key.type = 0;
166
167         ret = btrfs_insert_empty_item(trans, root, path, &key,
168                                       sizeof(struct btrfs_free_space_header));
169         if (ret < 0) {
170                 btrfs_release_path(path);
171                 return ret;
172         }
173         leaf = path->nodes[0];
174         header = btrfs_item_ptr(leaf, path->slots[0],
175                                 struct btrfs_free_space_header);
176         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
177         btrfs_set_free_space_key(leaf, header, &disk_key);
178         btrfs_mark_buffer_dirty(leaf);
179         btrfs_release_path(path);
180
181         return 0;
182 }
183
184 int create_free_space_inode(struct btrfs_root *root,
185                             struct btrfs_trans_handle *trans,
186                             struct btrfs_block_group_cache *block_group,
187                             struct btrfs_path *path)
188 {
189         int ret;
190         u64 ino;
191
192         ret = btrfs_find_free_objectid(root, &ino);
193         if (ret < 0)
194                 return ret;
195
196         return __create_free_space_inode(root, trans, path, ino,
197                                          block_group->key.objectid);
198 }
199
200 int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
201                                        struct btrfs_block_rsv *rsv)
202 {
203         u64 needed_bytes;
204         int ret;
205
206         /* 1 for slack space, 1 for updating the inode */
207         needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
208                 btrfs_calc_trans_metadata_size(root, 1);
209
210         spin_lock(&rsv->lock);
211         if (rsv->reserved < needed_bytes)
212                 ret = -ENOSPC;
213         else
214                 ret = 0;
215         spin_unlock(&rsv->lock);
216         return ret;
217 }
218
219 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
220                                     struct btrfs_trans_handle *trans,
221                                     struct inode *inode)
222 {
223         int ret = 0;
224
225         btrfs_i_size_write(inode, 0);
226         truncate_pagecache(inode, 0);
227
228         /*
229          * We don't need an orphan item because truncating the free space cache
230          * will never be split across transactions.
231          */
232         ret = btrfs_truncate_inode_items(trans, root, inode,
233                                          0, BTRFS_EXTENT_DATA_KEY);
234         if (ret) {
235                 btrfs_abort_transaction(trans, root, ret);
236                 return ret;
237         }
238
239         ret = btrfs_update_inode(trans, root, inode);
240         if (ret)
241                 btrfs_abort_transaction(trans, root, ret);
242
243         return ret;
244 }
245
246 static int readahead_cache(struct inode *inode)
247 {
248         struct file_ra_state *ra;
249         unsigned long last_index;
250
251         ra = kzalloc(sizeof(*ra), GFP_NOFS);
252         if (!ra)
253                 return -ENOMEM;
254
255         file_ra_state_init(ra, inode->i_mapping);
256         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
257
258         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
259
260         kfree(ra);
261
262         return 0;
263 }
264
265 struct io_ctl {
266         void *cur, *orig;
267         struct page *page;
268         struct page **pages;
269         struct btrfs_root *root;
270         unsigned long size;
271         int index;
272         int num_pages;
273         unsigned check_crcs:1;
274 };
275
276 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
277                        struct btrfs_root *root)
278 {
279         memset(io_ctl, 0, sizeof(struct io_ctl));
280         io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
281                 PAGE_CACHE_SHIFT;
282         io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
283                                 GFP_NOFS);
284         if (!io_ctl->pages)
285                 return -ENOMEM;
286         io_ctl->root = root;
287         if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
288                 io_ctl->check_crcs = 1;
289         return 0;
290 }
291
292 static void io_ctl_free(struct io_ctl *io_ctl)
293 {
294         kfree(io_ctl->pages);
295 }
296
297 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
298 {
299         if (io_ctl->cur) {
300                 kunmap(io_ctl->page);
301                 io_ctl->cur = NULL;
302                 io_ctl->orig = NULL;
303         }
304 }
305
306 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
307 {
308         ASSERT(io_ctl->index < io_ctl->num_pages);
309         io_ctl->page = io_ctl->pages[io_ctl->index++];
310         io_ctl->cur = kmap(io_ctl->page);
311         io_ctl->orig = io_ctl->cur;
312         io_ctl->size = PAGE_CACHE_SIZE;
313         if (clear)
314                 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
315 }
316
317 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
318 {
319         int i;
320
321         io_ctl_unmap_page(io_ctl);
322
323         for (i = 0; i < io_ctl->num_pages; i++) {
324                 if (io_ctl->pages[i]) {
325                         ClearPageChecked(io_ctl->pages[i]);
326                         unlock_page(io_ctl->pages[i]);
327                         page_cache_release(io_ctl->pages[i]);
328                 }
329         }
330 }
331
332 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
333                                 int uptodate)
334 {
335         struct page *page;
336         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
337         int i;
338
339         for (i = 0; i < io_ctl->num_pages; i++) {
340                 page = find_or_create_page(inode->i_mapping, i, mask);
341                 if (!page) {
342                         io_ctl_drop_pages(io_ctl);
343                         return -ENOMEM;
344                 }
345                 io_ctl->pages[i] = page;
346                 if (uptodate && !PageUptodate(page)) {
347                         btrfs_readpage(NULL, page);
348                         lock_page(page);
349                         if (!PageUptodate(page)) {
350                                 printk(KERN_ERR "btrfs: error reading free "
351                                        "space cache\n");
352                                 io_ctl_drop_pages(io_ctl);
353                                 return -EIO;
354                         }
355                 }
356         }
357
358         for (i = 0; i < io_ctl->num_pages; i++) {
359                 clear_page_dirty_for_io(io_ctl->pages[i]);
360                 set_page_extent_mapped(io_ctl->pages[i]);
361         }
362
363         return 0;
364 }
365
366 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
367 {
368         __le64 *val;
369
370         io_ctl_map_page(io_ctl, 1);
371
372         /*
373          * Skip the csum areas.  If we don't check crcs then we just have a
374          * 64bit chunk at the front of the first page.
375          */
376         if (io_ctl->check_crcs) {
377                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
378                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
379         } else {
380                 io_ctl->cur += sizeof(u64);
381                 io_ctl->size -= sizeof(u64) * 2;
382         }
383
384         val = io_ctl->cur;
385         *val = cpu_to_le64(generation);
386         io_ctl->cur += sizeof(u64);
387 }
388
389 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
390 {
391         __le64 *gen;
392
393         /*
394          * Skip the crc area.  If we don't check crcs then we just have a 64bit
395          * chunk at the front of the first page.
396          */
397         if (io_ctl->check_crcs) {
398                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
399                 io_ctl->size -= sizeof(u64) +
400                         (sizeof(u32) * io_ctl->num_pages);
401         } else {
402                 io_ctl->cur += sizeof(u64);
403                 io_ctl->size -= sizeof(u64) * 2;
404         }
405
406         gen = io_ctl->cur;
407         if (le64_to_cpu(*gen) != generation) {
408                 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
409                                    "(%Lu) does not match inode (%Lu)\n", *gen,
410                                    generation);
411                 io_ctl_unmap_page(io_ctl);
412                 return -EIO;
413         }
414         io_ctl->cur += sizeof(u64);
415         return 0;
416 }
417
418 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
419 {
420         u32 *tmp;
421         u32 crc = ~(u32)0;
422         unsigned offset = 0;
423
424         if (!io_ctl->check_crcs) {
425                 io_ctl_unmap_page(io_ctl);
426                 return;
427         }
428
429         if (index == 0)
430                 offset = sizeof(u32) * io_ctl->num_pages;
431
432         crc = btrfs_csum_data(io_ctl->orig + offset, crc,
433                               PAGE_CACHE_SIZE - offset);
434         btrfs_csum_final(crc, (char *)&crc);
435         io_ctl_unmap_page(io_ctl);
436         tmp = kmap(io_ctl->pages[0]);
437         tmp += index;
438         *tmp = crc;
439         kunmap(io_ctl->pages[0]);
440 }
441
442 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
443 {
444         u32 *tmp, val;
445         u32 crc = ~(u32)0;
446         unsigned offset = 0;
447
448         if (!io_ctl->check_crcs) {
449                 io_ctl_map_page(io_ctl, 0);
450                 return 0;
451         }
452
453         if (index == 0)
454                 offset = sizeof(u32) * io_ctl->num_pages;
455
456         tmp = kmap(io_ctl->pages[0]);
457         tmp += index;
458         val = *tmp;
459         kunmap(io_ctl->pages[0]);
460
461         io_ctl_map_page(io_ctl, 0);
462         crc = btrfs_csum_data(io_ctl->orig + offset, crc,
463                               PAGE_CACHE_SIZE - offset);
464         btrfs_csum_final(crc, (char *)&crc);
465         if (val != crc) {
466                 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
467                                    "space cache\n");
468                 io_ctl_unmap_page(io_ctl);
469                 return -EIO;
470         }
471
472         return 0;
473 }
474
475 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
476                             void *bitmap)
477 {
478         struct btrfs_free_space_entry *entry;
479
480         if (!io_ctl->cur)
481                 return -ENOSPC;
482
483         entry = io_ctl->cur;
484         entry->offset = cpu_to_le64(offset);
485         entry->bytes = cpu_to_le64(bytes);
486         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
487                 BTRFS_FREE_SPACE_EXTENT;
488         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
489         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
490
491         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
492                 return 0;
493
494         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
495
496         /* No more pages to map */
497         if (io_ctl->index >= io_ctl->num_pages)
498                 return 0;
499
500         /* map the next page */
501         io_ctl_map_page(io_ctl, 1);
502         return 0;
503 }
504
505 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
506 {
507         if (!io_ctl->cur)
508                 return -ENOSPC;
509
510         /*
511          * If we aren't at the start of the current page, unmap this one and
512          * map the next one if there is any left.
513          */
514         if (io_ctl->cur != io_ctl->orig) {
515                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
516                 if (io_ctl->index >= io_ctl->num_pages)
517                         return -ENOSPC;
518                 io_ctl_map_page(io_ctl, 0);
519         }
520
521         memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
522         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
523         if (io_ctl->index < io_ctl->num_pages)
524                 io_ctl_map_page(io_ctl, 0);
525         return 0;
526 }
527
528 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
529 {
530         /*
531          * If we're not on the boundary we know we've modified the page and we
532          * need to crc the page.
533          */
534         if (io_ctl->cur != io_ctl->orig)
535                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
536         else
537                 io_ctl_unmap_page(io_ctl);
538
539         while (io_ctl->index < io_ctl->num_pages) {
540                 io_ctl_map_page(io_ctl, 1);
541                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
542         }
543 }
544
545 static int io_ctl_read_entry(struct io_ctl *io_ctl,
546                             struct btrfs_free_space *entry, u8 *type)
547 {
548         struct btrfs_free_space_entry *e;
549         int ret;
550
551         if (!io_ctl->cur) {
552                 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
553                 if (ret)
554                         return ret;
555         }
556
557         e = io_ctl->cur;
558         entry->offset = le64_to_cpu(e->offset);
559         entry->bytes = le64_to_cpu(e->bytes);
560         *type = e->type;
561         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
562         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
563
564         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
565                 return 0;
566
567         io_ctl_unmap_page(io_ctl);
568
569         return 0;
570 }
571
572 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
573                               struct btrfs_free_space *entry)
574 {
575         int ret;
576
577         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
578         if (ret)
579                 return ret;
580
581         memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
582         io_ctl_unmap_page(io_ctl);
583
584         return 0;
585 }
586
587 /*
588  * Since we attach pinned extents after the fact we can have contiguous sections
589  * of free space that are split up in entries.  This poses a problem with the
590  * tree logging stuff since it could have allocated across what appears to be 2
591  * entries since we would have merged the entries when adding the pinned extents
592  * back to the free space cache.  So run through the space cache that we just
593  * loaded and merge contiguous entries.  This will make the log replay stuff not
594  * blow up and it will make for nicer allocator behavior.
595  */
596 static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
597 {
598         struct btrfs_free_space *e, *prev = NULL;
599         struct rb_node *n;
600
601 again:
602         spin_lock(&ctl->tree_lock);
603         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
604                 e = rb_entry(n, struct btrfs_free_space, offset_index);
605                 if (!prev)
606                         goto next;
607                 if (e->bitmap || prev->bitmap)
608                         goto next;
609                 if (prev->offset + prev->bytes == e->offset) {
610                         unlink_free_space(ctl, prev);
611                         unlink_free_space(ctl, e);
612                         prev->bytes += e->bytes;
613                         kmem_cache_free(btrfs_free_space_cachep, e);
614                         link_free_space(ctl, prev);
615                         prev = NULL;
616                         spin_unlock(&ctl->tree_lock);
617                         goto again;
618                 }
619 next:
620                 prev = e;
621         }
622         spin_unlock(&ctl->tree_lock);
623 }
624
625 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
626                                    struct btrfs_free_space_ctl *ctl,
627                                    struct btrfs_path *path, u64 offset)
628 {
629         struct btrfs_free_space_header *header;
630         struct extent_buffer *leaf;
631         struct io_ctl io_ctl;
632         struct btrfs_key key;
633         struct btrfs_free_space *e, *n;
634         struct list_head bitmaps;
635         u64 num_entries;
636         u64 num_bitmaps;
637         u64 generation;
638         u8 type;
639         int ret = 0;
640
641         INIT_LIST_HEAD(&bitmaps);
642
643         /* Nothing in the space cache, goodbye */
644         if (!i_size_read(inode))
645                 return 0;
646
647         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
648         key.offset = offset;
649         key.type = 0;
650
651         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
652         if (ret < 0)
653                 return 0;
654         else if (ret > 0) {
655                 btrfs_release_path(path);
656                 return 0;
657         }
658
659         ret = -1;
660
661         leaf = path->nodes[0];
662         header = btrfs_item_ptr(leaf, path->slots[0],
663                                 struct btrfs_free_space_header);
664         num_entries = btrfs_free_space_entries(leaf, header);
665         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
666         generation = btrfs_free_space_generation(leaf, header);
667         btrfs_release_path(path);
668
669         if (BTRFS_I(inode)->generation != generation) {
670                 btrfs_err(root->fs_info,
671                         "free space inode generation (%llu) "
672                         "did not match free space cache generation (%llu)",
673                         BTRFS_I(inode)->generation, generation);
674                 return 0;
675         }
676
677         if (!num_entries)
678                 return 0;
679
680         ret = io_ctl_init(&io_ctl, inode, root);
681         if (ret)
682                 return ret;
683
684         ret = readahead_cache(inode);
685         if (ret)
686                 goto out;
687
688         ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
689         if (ret)
690                 goto out;
691
692         ret = io_ctl_check_crc(&io_ctl, 0);
693         if (ret)
694                 goto free_cache;
695
696         ret = io_ctl_check_generation(&io_ctl, generation);
697         if (ret)
698                 goto free_cache;
699
700         while (num_entries) {
701                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
702                                       GFP_NOFS);
703                 if (!e)
704                         goto free_cache;
705
706                 ret = io_ctl_read_entry(&io_ctl, e, &type);
707                 if (ret) {
708                         kmem_cache_free(btrfs_free_space_cachep, e);
709                         goto free_cache;
710                 }
711
712                 if (!e->bytes) {
713                         kmem_cache_free(btrfs_free_space_cachep, e);
714                         goto free_cache;
715                 }
716
717                 if (type == BTRFS_FREE_SPACE_EXTENT) {
718                         spin_lock(&ctl->tree_lock);
719                         ret = link_free_space(ctl, e);
720                         spin_unlock(&ctl->tree_lock);
721                         if (ret) {
722                                 btrfs_err(root->fs_info,
723                                         "Duplicate entries in free space cache, dumping");
724                                 kmem_cache_free(btrfs_free_space_cachep, e);
725                                 goto free_cache;
726                         }
727                 } else {
728                         ASSERT(num_bitmaps);
729                         num_bitmaps--;
730                         e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
731                         if (!e->bitmap) {
732                                 kmem_cache_free(
733                                         btrfs_free_space_cachep, e);
734                                 goto free_cache;
735                         }
736                         spin_lock(&ctl->tree_lock);
737                         ret = link_free_space(ctl, e);
738                         ctl->total_bitmaps++;
739                         ctl->op->recalc_thresholds(ctl);
740                         spin_unlock(&ctl->tree_lock);
741                         if (ret) {
742                                 btrfs_err(root->fs_info,
743                                         "Duplicate entries in free space cache, dumping");
744                                 kmem_cache_free(btrfs_free_space_cachep, e);
745                                 goto free_cache;
746                         }
747                         list_add_tail(&e->list, &bitmaps);
748                 }
749
750                 num_entries--;
751         }
752
753         io_ctl_unmap_page(&io_ctl);
754
755         /*
756          * We add the bitmaps at the end of the entries in order that
757          * the bitmap entries are added to the cache.
758          */
759         list_for_each_entry_safe(e, n, &bitmaps, list) {
760                 list_del_init(&e->list);
761                 ret = io_ctl_read_bitmap(&io_ctl, e);
762                 if (ret)
763                         goto free_cache;
764         }
765
766         io_ctl_drop_pages(&io_ctl);
767         merge_space_tree(ctl);
768         ret = 1;
769 out:
770         io_ctl_free(&io_ctl);
771         return ret;
772 free_cache:
773         io_ctl_drop_pages(&io_ctl);
774         __btrfs_remove_free_space_cache(ctl);
775         goto out;
776 }
777
778 int load_free_space_cache(struct btrfs_fs_info *fs_info,
779                           struct btrfs_block_group_cache *block_group)
780 {
781         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
782         struct btrfs_root *root = fs_info->tree_root;
783         struct inode *inode;
784         struct btrfs_path *path;
785         int ret = 0;
786         bool matched;
787         u64 used = btrfs_block_group_used(&block_group->item);
788
789         /*
790          * If this block group has been marked to be cleared for one reason or
791          * another then we can't trust the on disk cache, so just return.
792          */
793         spin_lock(&block_group->lock);
794         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
795                 spin_unlock(&block_group->lock);
796                 return 0;
797         }
798         spin_unlock(&block_group->lock);
799
800         path = btrfs_alloc_path();
801         if (!path)
802                 return 0;
803         path->search_commit_root = 1;
804         path->skip_locking = 1;
805
806         inode = lookup_free_space_inode(root, block_group, path);
807         if (IS_ERR(inode)) {
808                 btrfs_free_path(path);
809                 return 0;
810         }
811
812         /* We may have converted the inode and made the cache invalid. */
813         spin_lock(&block_group->lock);
814         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
815                 spin_unlock(&block_group->lock);
816                 btrfs_free_path(path);
817                 goto out;
818         }
819         spin_unlock(&block_group->lock);
820
821         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
822                                       path, block_group->key.objectid);
823         btrfs_free_path(path);
824         if (ret <= 0)
825                 goto out;
826
827         spin_lock(&ctl->tree_lock);
828         matched = (ctl->free_space == (block_group->key.offset - used -
829                                        block_group->bytes_super));
830         spin_unlock(&ctl->tree_lock);
831
832         if (!matched) {
833                 __btrfs_remove_free_space_cache(ctl);
834                 btrfs_err(fs_info, "block group %llu has wrong amount of free space",
835                         block_group->key.objectid);
836                 ret = -1;
837         }
838 out:
839         if (ret < 0) {
840                 /* This cache is bogus, make sure it gets cleared */
841                 spin_lock(&block_group->lock);
842                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
843                 spin_unlock(&block_group->lock);
844                 ret = 0;
845
846                 btrfs_err(fs_info, "failed to load free space cache for block group %llu",
847                         block_group->key.objectid);
848         }
849
850         iput(inode);
851         return ret;
852 }
853
854 /**
855  * __btrfs_write_out_cache - write out cached info to an inode
856  * @root - the root the inode belongs to
857  * @ctl - the free space cache we are going to write out
858  * @block_group - the block_group for this cache if it belongs to a block_group
859  * @trans - the trans handle
860  * @path - the path to use
861  * @offset - the offset for the key we'll insert
862  *
863  * This function writes out a free space cache struct to disk for quick recovery
864  * on mount.  This will return 0 if it was successfull in writing the cache out,
865  * and -1 if it was not.
866  */
867 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
868                                    struct btrfs_free_space_ctl *ctl,
869                                    struct btrfs_block_group_cache *block_group,
870                                    struct btrfs_trans_handle *trans,
871                                    struct btrfs_path *path, u64 offset)
872 {
873         struct btrfs_free_space_header *header;
874         struct extent_buffer *leaf;
875         struct rb_node *node;
876         struct list_head *pos, *n;
877         struct extent_state *cached_state = NULL;
878         struct btrfs_free_cluster *cluster = NULL;
879         struct extent_io_tree *unpin = NULL;
880         struct io_ctl io_ctl;
881         struct list_head bitmap_list;
882         struct btrfs_key key;
883         u64 start, extent_start, extent_end, len;
884         int entries = 0;
885         int bitmaps = 0;
886         int ret;
887         int err = -1;
888
889         INIT_LIST_HEAD(&bitmap_list);
890
891         if (!i_size_read(inode))
892                 return -1;
893
894         ret = io_ctl_init(&io_ctl, inode, root);
895         if (ret)
896                 return -1;
897
898         /* Get the cluster for this block_group if it exists */
899         if (block_group && !list_empty(&block_group->cluster_list))
900                 cluster = list_entry(block_group->cluster_list.next,
901                                      struct btrfs_free_cluster,
902                                      block_group_list);
903
904         /* Lock all pages first so we can lock the extent safely. */
905         io_ctl_prepare_pages(&io_ctl, inode, 0);
906
907         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
908                          0, &cached_state);
909
910         node = rb_first(&ctl->free_space_offset);
911         if (!node && cluster) {
912                 node = rb_first(&cluster->root);
913                 cluster = NULL;
914         }
915
916         /* Make sure we can fit our crcs into the first page */
917         if (io_ctl.check_crcs &&
918             (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
919                 goto out_nospc;
920
921         io_ctl_set_generation(&io_ctl, trans->transid);
922
923         /* Write out the extent entries */
924         while (node) {
925                 struct btrfs_free_space *e;
926
927                 e = rb_entry(node, struct btrfs_free_space, offset_index);
928                 entries++;
929
930                 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
931                                        e->bitmap);
932                 if (ret)
933                         goto out_nospc;
934
935                 if (e->bitmap) {
936                         list_add_tail(&e->list, &bitmap_list);
937                         bitmaps++;
938                 }
939                 node = rb_next(node);
940                 if (!node && cluster) {
941                         node = rb_first(&cluster->root);
942                         cluster = NULL;
943                 }
944         }
945
946         /*
947          * We want to add any pinned extents to our free space cache
948          * so we don't leak the space
949          */
950
951         /*
952          * We shouldn't have switched the pinned extents yet so this is the
953          * right one
954          */
955         unpin = root->fs_info->pinned_extents;
956
957         if (block_group)
958                 start = block_group->key.objectid;
959
960         while (block_group && (start < block_group->key.objectid +
961                                block_group->key.offset)) {
962                 ret = find_first_extent_bit(unpin, start,
963                                             &extent_start, &extent_end,
964                                             EXTENT_DIRTY, NULL);
965                 if (ret) {
966                         ret = 0;
967                         break;
968                 }
969
970                 /* This pinned extent is out of our range */
971                 if (extent_start >= block_group->key.objectid +
972                     block_group->key.offset)
973                         break;
974
975                 extent_start = max(extent_start, start);
976                 extent_end = min(block_group->key.objectid +
977                                  block_group->key.offset, extent_end + 1);
978                 len = extent_end - extent_start;
979
980                 entries++;
981                 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
982                 if (ret)
983                         goto out_nospc;
984
985                 start = extent_end;
986         }
987
988         /* Write out the bitmaps */
989         list_for_each_safe(pos, n, &bitmap_list) {
990                 struct btrfs_free_space *entry =
991                         list_entry(pos, struct btrfs_free_space, list);
992
993                 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
994                 if (ret)
995                         goto out_nospc;
996                 list_del_init(&entry->list);
997         }
998
999         /* Zero out the rest of the pages just to make sure */
1000         io_ctl_zero_remaining_pages(&io_ctl);
1001
1002         ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
1003                                 0, i_size_read(inode), &cached_state);
1004         io_ctl_drop_pages(&io_ctl);
1005         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1006                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1007
1008         if (ret)
1009                 goto out;
1010
1011
1012         btrfs_wait_ordered_range(inode, 0, (u64)-1);
1013
1014         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
1015         key.offset = offset;
1016         key.type = 0;
1017
1018         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1019         if (ret < 0) {
1020                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1021                                  EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
1022                                  GFP_NOFS);
1023                 goto out;
1024         }
1025         leaf = path->nodes[0];
1026         if (ret > 0) {
1027                 struct btrfs_key found_key;
1028                 ASSERT(path->slots[0]);
1029                 path->slots[0]--;
1030                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1031                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1032                     found_key.offset != offset) {
1033                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1034                                          inode->i_size - 1,
1035                                          EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1036                                          NULL, GFP_NOFS);
1037                         btrfs_release_path(path);
1038                         goto out;
1039                 }
1040         }
1041
1042         BTRFS_I(inode)->generation = trans->transid;
1043         header = btrfs_item_ptr(leaf, path->slots[0],
1044                                 struct btrfs_free_space_header);
1045         btrfs_set_free_space_entries(leaf, header, entries);
1046         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1047         btrfs_set_free_space_generation(leaf, header, trans->transid);
1048         btrfs_mark_buffer_dirty(leaf);
1049         btrfs_release_path(path);
1050
1051         err = 0;
1052 out:
1053         io_ctl_free(&io_ctl);
1054         if (err) {
1055                 invalidate_inode_pages2(inode->i_mapping);
1056                 BTRFS_I(inode)->generation = 0;
1057         }
1058         btrfs_update_inode(trans, root, inode);
1059         return err;
1060
1061 out_nospc:
1062         list_for_each_safe(pos, n, &bitmap_list) {
1063                 struct btrfs_free_space *entry =
1064                         list_entry(pos, struct btrfs_free_space, list);
1065                 list_del_init(&entry->list);
1066         }
1067         io_ctl_drop_pages(&io_ctl);
1068         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1069                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1070         goto out;
1071 }
1072
1073 int btrfs_write_out_cache(struct btrfs_root *root,
1074                           struct btrfs_trans_handle *trans,
1075                           struct btrfs_block_group_cache *block_group,
1076                           struct btrfs_path *path)
1077 {
1078         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1079         struct inode *inode;
1080         int ret = 0;
1081
1082         root = root->fs_info->tree_root;
1083
1084         spin_lock(&block_group->lock);
1085         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1086                 spin_unlock(&block_group->lock);
1087                 return 0;
1088         }
1089         spin_unlock(&block_group->lock);
1090
1091         inode = lookup_free_space_inode(root, block_group, path);
1092         if (IS_ERR(inode))
1093                 return 0;
1094
1095         ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1096                                       path, block_group->key.objectid);
1097         if (ret) {
1098                 spin_lock(&block_group->lock);
1099                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1100                 spin_unlock(&block_group->lock);
1101                 ret = 0;
1102 #ifdef DEBUG
1103                 btrfs_err(root->fs_info,
1104                         "failed to write free space cache for block group %llu",
1105                         block_group->key.objectid);
1106 #endif
1107         }
1108
1109         iput(inode);
1110         return ret;
1111 }
1112
1113 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1114                                           u64 offset)
1115 {
1116         ASSERT(offset >= bitmap_start);
1117         offset -= bitmap_start;
1118         return (unsigned long)(div_u64(offset, unit));
1119 }
1120
1121 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1122 {
1123         return (unsigned long)(div_u64(bytes, unit));
1124 }
1125
1126 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1127                                    u64 offset)
1128 {
1129         u64 bitmap_start;
1130         u64 bytes_per_bitmap;
1131
1132         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1133         bitmap_start = offset - ctl->start;
1134         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1135         bitmap_start *= bytes_per_bitmap;
1136         bitmap_start += ctl->start;
1137
1138         return bitmap_start;
1139 }
1140
1141 static int tree_insert_offset(struct rb_root *root, u64 offset,
1142                               struct rb_node *node, int bitmap)
1143 {
1144         struct rb_node **p = &root->rb_node;
1145         struct rb_node *parent = NULL;
1146         struct btrfs_free_space *info;
1147
1148         while (*p) {
1149                 parent = *p;
1150                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1151
1152                 if (offset < info->offset) {
1153                         p = &(*p)->rb_left;
1154                 } else if (offset > info->offset) {
1155                         p = &(*p)->rb_right;
1156                 } else {
1157                         /*
1158                          * we could have a bitmap entry and an extent entry
1159                          * share the same offset.  If this is the case, we want
1160                          * the extent entry to always be found first if we do a
1161                          * linear search through the tree, since we want to have
1162                          * the quickest allocation time, and allocating from an
1163                          * extent is faster than allocating from a bitmap.  So
1164                          * if we're inserting a bitmap and we find an entry at
1165                          * this offset, we want to go right, or after this entry
1166                          * logically.  If we are inserting an extent and we've
1167                          * found a bitmap, we want to go left, or before
1168                          * logically.
1169                          */
1170                         if (bitmap) {
1171                                 if (info->bitmap) {
1172                                         WARN_ON_ONCE(1);
1173                                         return -EEXIST;
1174                                 }
1175                                 p = &(*p)->rb_right;
1176                         } else {
1177                                 if (!info->bitmap) {
1178                                         WARN_ON_ONCE(1);
1179                                         return -EEXIST;
1180                                 }
1181                                 p = &(*p)->rb_left;
1182                         }
1183                 }
1184         }
1185
1186         rb_link_node(node, parent, p);
1187         rb_insert_color(node, root);
1188
1189         return 0;
1190 }
1191
1192 /*
1193  * searches the tree for the given offset.
1194  *
1195  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1196  * want a section that has at least bytes size and comes at or after the given
1197  * offset.
1198  */
1199 static struct btrfs_free_space *
1200 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1201                    u64 offset, int bitmap_only, int fuzzy)
1202 {
1203         struct rb_node *n = ctl->free_space_offset.rb_node;
1204         struct btrfs_free_space *entry, *prev = NULL;
1205
1206         /* find entry that is closest to the 'offset' */
1207         while (1) {
1208                 if (!n) {
1209                         entry = NULL;
1210                         break;
1211                 }
1212
1213                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1214                 prev = entry;
1215
1216                 if (offset < entry->offset)
1217                         n = n->rb_left;
1218                 else if (offset > entry->offset)
1219                         n = n->rb_right;
1220                 else
1221                         break;
1222         }
1223
1224         if (bitmap_only) {
1225                 if (!entry)
1226                         return NULL;
1227                 if (entry->bitmap)
1228                         return entry;
1229
1230                 /*
1231                  * bitmap entry and extent entry may share same offset,
1232                  * in that case, bitmap entry comes after extent entry.
1233                  */
1234                 n = rb_next(n);
1235                 if (!n)
1236                         return NULL;
1237                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1238                 if (entry->offset != offset)
1239                         return NULL;
1240
1241                 WARN_ON(!entry->bitmap);
1242                 return entry;
1243         } else if (entry) {
1244                 if (entry->bitmap) {
1245                         /*
1246                          * if previous extent entry covers the offset,
1247                          * we should return it instead of the bitmap entry
1248                          */
1249                         n = rb_prev(&entry->offset_index);
1250                         if (n) {
1251                                 prev = rb_entry(n, struct btrfs_free_space,
1252                                                 offset_index);
1253                                 if (!prev->bitmap &&
1254                                     prev->offset + prev->bytes > offset)
1255                                         entry = prev;
1256                         }
1257                 }
1258                 return entry;
1259         }
1260
1261         if (!prev)
1262                 return NULL;
1263
1264         /* find last entry before the 'offset' */
1265         entry = prev;
1266         if (entry->offset > offset) {
1267                 n = rb_prev(&entry->offset_index);
1268                 if (n) {
1269                         entry = rb_entry(n, struct btrfs_free_space,
1270                                         offset_index);
1271                         ASSERT(entry->offset <= offset);
1272                 } else {
1273                         if (fuzzy)
1274                                 return entry;
1275                         else
1276                                 return NULL;
1277                 }
1278         }
1279
1280         if (entry->bitmap) {
1281                 n = rb_prev(&entry->offset_index);
1282                 if (n) {
1283                         prev = rb_entry(n, struct btrfs_free_space,
1284                                         offset_index);
1285                         if (!prev->bitmap &&
1286                             prev->offset + prev->bytes > offset)
1287                                 return prev;
1288                 }
1289                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1290                         return entry;
1291         } else if (entry->offset + entry->bytes > offset)
1292                 return entry;
1293
1294         if (!fuzzy)
1295                 return NULL;
1296
1297         while (1) {
1298                 if (entry->bitmap) {
1299                         if (entry->offset + BITS_PER_BITMAP *
1300                             ctl->unit > offset)
1301                                 break;
1302                 } else {
1303                         if (entry->offset + entry->bytes > offset)
1304                                 break;
1305                 }
1306
1307                 n = rb_next(&entry->offset_index);
1308                 if (!n)
1309                         return NULL;
1310                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1311         }
1312         return entry;
1313 }
1314
1315 static inline void
1316 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1317                     struct btrfs_free_space *info)
1318 {
1319         rb_erase(&info->offset_index, &ctl->free_space_offset);
1320         ctl->free_extents--;
1321 }
1322
1323 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1324                               struct btrfs_free_space *info)
1325 {
1326         __unlink_free_space(ctl, info);
1327         ctl->free_space -= info->bytes;
1328 }
1329
1330 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1331                            struct btrfs_free_space *info)
1332 {
1333         int ret = 0;
1334
1335         ASSERT(info->bytes || info->bitmap);
1336         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1337                                  &info->offset_index, (info->bitmap != NULL));
1338         if (ret)
1339                 return ret;
1340
1341         ctl->free_space += info->bytes;
1342         ctl->free_extents++;
1343         return ret;
1344 }
1345
1346 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1347 {
1348         struct btrfs_block_group_cache *block_group = ctl->private;
1349         u64 max_bytes;
1350         u64 bitmap_bytes;
1351         u64 extent_bytes;
1352         u64 size = block_group->key.offset;
1353         u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
1354         int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1355
1356         max_bitmaps = max(max_bitmaps, 1);
1357
1358         ASSERT(ctl->total_bitmaps <= max_bitmaps);
1359
1360         /*
1361          * The goal is to keep the total amount of memory used per 1gb of space
1362          * at or below 32k, so we need to adjust how much memory we allow to be
1363          * used by extent based free space tracking
1364          */
1365         if (size < 1024 * 1024 * 1024)
1366                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1367         else
1368                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1369                         div64_u64(size, 1024 * 1024 * 1024);
1370
1371         /*
1372          * we want to account for 1 more bitmap than what we have so we can make
1373          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1374          * we add more bitmaps.
1375          */
1376         bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1377
1378         if (bitmap_bytes >= max_bytes) {
1379                 ctl->extents_thresh = 0;
1380                 return;
1381         }
1382
1383         /*
1384          * we want the extent entry threshold to always be at most 1/2 the maxw
1385          * bytes we can have, or whatever is less than that.
1386          */
1387         extent_bytes = max_bytes - bitmap_bytes;
1388         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1389
1390         ctl->extents_thresh =
1391                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1392 }
1393
1394 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1395                                        struct btrfs_free_space *info,
1396                                        u64 offset, u64 bytes)
1397 {
1398         unsigned long start, count;
1399
1400         start = offset_to_bit(info->offset, ctl->unit, offset);
1401         count = bytes_to_bits(bytes, ctl->unit);
1402         ASSERT(start + count <= BITS_PER_BITMAP);
1403
1404         bitmap_clear(info->bitmap, start, count);
1405
1406         info->bytes -= bytes;
1407 }
1408
1409 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1410                               struct btrfs_free_space *info, u64 offset,
1411                               u64 bytes)
1412 {
1413         __bitmap_clear_bits(ctl, info, offset, bytes);
1414         ctl->free_space -= bytes;
1415 }
1416
1417 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1418                             struct btrfs_free_space *info, u64 offset,
1419                             u64 bytes)
1420 {
1421         unsigned long start, count;
1422
1423         start = offset_to_bit(info->offset, ctl->unit, offset);
1424         count = bytes_to_bits(bytes, ctl->unit);
1425         ASSERT(start + count <= BITS_PER_BITMAP);
1426
1427         bitmap_set(info->bitmap, start, count);
1428
1429         info->bytes += bytes;
1430         ctl->free_space += bytes;
1431 }
1432
1433 /*
1434  * If we can not find suitable extent, we will use bytes to record
1435  * the size of the max extent.
1436  */
1437 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1438                          struct btrfs_free_space *bitmap_info, u64 *offset,
1439                          u64 *bytes)
1440 {
1441         unsigned long found_bits = 0;
1442         unsigned long max_bits = 0;
1443         unsigned long bits, i;
1444         unsigned long next_zero;
1445         unsigned long extent_bits;
1446
1447         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1448                           max_t(u64, *offset, bitmap_info->offset));
1449         bits = bytes_to_bits(*bytes, ctl->unit);
1450
1451         for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1452                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1453                                                BITS_PER_BITMAP, i);
1454                 extent_bits = next_zero - i;
1455                 if (extent_bits >= bits) {
1456                         found_bits = extent_bits;
1457                         break;
1458                 } else if (extent_bits > max_bits) {
1459                         max_bits = extent_bits;
1460                 }
1461                 i = next_zero;
1462         }
1463
1464         if (found_bits) {
1465                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1466                 *bytes = (u64)(found_bits) * ctl->unit;
1467                 return 0;
1468         }
1469
1470         *bytes = (u64)(max_bits) * ctl->unit;
1471         return -1;
1472 }
1473
1474 /* Cache the size of the max extent in bytes */
1475 static struct btrfs_free_space *
1476 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1477                 unsigned long align, u64 *max_extent_size)
1478 {
1479         struct btrfs_free_space *entry;
1480         struct rb_node *node;
1481         u64 tmp;
1482         u64 align_off;
1483         int ret;
1484
1485         if (!ctl->free_space_offset.rb_node)
1486                 goto out;
1487
1488         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1489         if (!entry)
1490                 goto out;
1491
1492         for (node = &entry->offset_index; node; node = rb_next(node)) {
1493                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1494                 if (entry->bytes < *bytes) {
1495                         if (entry->bytes > *max_extent_size)
1496                                 *max_extent_size = entry->bytes;
1497                         continue;
1498                 }
1499
1500                 /* make sure the space returned is big enough
1501                  * to match our requested alignment
1502                  */
1503                 if (*bytes >= align) {
1504                         tmp = entry->offset - ctl->start + align - 1;
1505                         do_div(tmp, align);
1506                         tmp = tmp * align + ctl->start;
1507                         align_off = tmp - entry->offset;
1508                 } else {
1509                         align_off = 0;
1510                         tmp = entry->offset;
1511                 }
1512
1513                 if (entry->bytes < *bytes + align_off) {
1514                         if (entry->bytes > *max_extent_size)
1515                                 *max_extent_size = entry->bytes;
1516                         continue;
1517                 }
1518
1519                 if (entry->bitmap) {
1520                         u64 size = *bytes;
1521
1522                         ret = search_bitmap(ctl, entry, &tmp, &size);
1523                         if (!ret) {
1524                                 *offset = tmp;
1525                                 *bytes = size;
1526                                 return entry;
1527                         } else if (size > *max_extent_size) {
1528                                 *max_extent_size = size;
1529                         }
1530                         continue;
1531                 }
1532
1533                 *offset = tmp;
1534                 *bytes = entry->bytes - align_off;
1535                 return entry;
1536         }
1537 out:
1538         return NULL;
1539 }
1540
1541 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1542                            struct btrfs_free_space *info, u64 offset)
1543 {
1544         info->offset = offset_to_bitmap(ctl, offset);
1545         info->bytes = 0;
1546         INIT_LIST_HEAD(&info->list);
1547         link_free_space(ctl, info);
1548         ctl->total_bitmaps++;
1549
1550         ctl->op->recalc_thresholds(ctl);
1551 }
1552
1553 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1554                         struct btrfs_free_space *bitmap_info)
1555 {
1556         unlink_free_space(ctl, bitmap_info);
1557         kfree(bitmap_info->bitmap);
1558         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1559         ctl->total_bitmaps--;
1560         ctl->op->recalc_thresholds(ctl);
1561 }
1562
1563 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1564                               struct btrfs_free_space *bitmap_info,
1565                               u64 *offset, u64 *bytes)
1566 {
1567         u64 end;
1568         u64 search_start, search_bytes;
1569         int ret;
1570
1571 again:
1572         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1573
1574         /*
1575          * We need to search for bits in this bitmap.  We could only cover some
1576          * of the extent in this bitmap thanks to how we add space, so we need
1577          * to search for as much as it as we can and clear that amount, and then
1578          * go searching for the next bit.
1579          */
1580         search_start = *offset;
1581         search_bytes = ctl->unit;
1582         search_bytes = min(search_bytes, end - search_start + 1);
1583         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1584         if (ret < 0 || search_start != *offset)
1585                 return -EINVAL;
1586
1587         /* We may have found more bits than what we need */
1588         search_bytes = min(search_bytes, *bytes);
1589
1590         /* Cannot clear past the end of the bitmap */
1591         search_bytes = min(search_bytes, end - search_start + 1);
1592
1593         bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
1594         *offset += search_bytes;
1595         *bytes -= search_bytes;
1596
1597         if (*bytes) {
1598                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1599                 if (!bitmap_info->bytes)
1600                         free_bitmap(ctl, bitmap_info);
1601
1602                 /*
1603                  * no entry after this bitmap, but we still have bytes to
1604                  * remove, so something has gone wrong.
1605                  */
1606                 if (!next)
1607                         return -EINVAL;
1608
1609                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1610                                        offset_index);
1611
1612                 /*
1613                  * if the next entry isn't a bitmap we need to return to let the
1614                  * extent stuff do its work.
1615                  */
1616                 if (!bitmap_info->bitmap)
1617                         return -EAGAIN;
1618
1619                 /*
1620                  * Ok the next item is a bitmap, but it may not actually hold
1621                  * the information for the rest of this free space stuff, so
1622                  * look for it, and if we don't find it return so we can try
1623                  * everything over again.
1624                  */
1625                 search_start = *offset;
1626                 search_bytes = ctl->unit;
1627                 ret = search_bitmap(ctl, bitmap_info, &search_start,
1628                                     &search_bytes);
1629                 if (ret < 0 || search_start != *offset)
1630                         return -EAGAIN;
1631
1632                 goto again;
1633         } else if (!bitmap_info->bytes)
1634                 free_bitmap(ctl, bitmap_info);
1635
1636         return 0;
1637 }
1638
1639 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1640                                struct btrfs_free_space *info, u64 offset,
1641                                u64 bytes)
1642 {
1643         u64 bytes_to_set = 0;
1644         u64 end;
1645
1646         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1647
1648         bytes_to_set = min(end - offset, bytes);
1649
1650         bitmap_set_bits(ctl, info, offset, bytes_to_set);
1651
1652         return bytes_to_set;
1653
1654 }
1655
1656 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1657                       struct btrfs_free_space *info)
1658 {
1659         struct btrfs_block_group_cache *block_group = ctl->private;
1660
1661         /*
1662          * If we are below the extents threshold then we can add this as an
1663          * extent, and don't have to deal with the bitmap
1664          */
1665         if (ctl->free_extents < ctl->extents_thresh) {
1666                 /*
1667                  * If this block group has some small extents we don't want to
1668                  * use up all of our free slots in the cache with them, we want
1669                  * to reserve them to larger extents, however if we have plent
1670                  * of cache left then go ahead an dadd them, no sense in adding
1671                  * the overhead of a bitmap if we don't have to.
1672                  */
1673                 if (info->bytes <= block_group->sectorsize * 4) {
1674                         if (ctl->free_extents * 2 <= ctl->extents_thresh)
1675                                 return false;
1676                 } else {
1677                         return false;
1678                 }
1679         }
1680
1681         /*
1682          * The original block groups from mkfs can be really small, like 8
1683          * megabytes, so don't bother with a bitmap for those entries.  However
1684          * some block groups can be smaller than what a bitmap would cover but
1685          * are still large enough that they could overflow the 32k memory limit,
1686          * so allow those block groups to still be allowed to have a bitmap
1687          * entry.
1688          */
1689         if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset)
1690                 return false;
1691
1692         return true;
1693 }
1694
1695 static struct btrfs_free_space_op free_space_op = {
1696         .recalc_thresholds      = recalculate_thresholds,
1697         .use_bitmap             = use_bitmap,
1698 };
1699
1700 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1701                               struct btrfs_free_space *info)
1702 {
1703         struct btrfs_free_space *bitmap_info;
1704         struct btrfs_block_group_cache *block_group = NULL;
1705         int added = 0;
1706         u64 bytes, offset, bytes_added;
1707         int ret;
1708
1709         bytes = info->bytes;
1710         offset = info->offset;
1711
1712         if (!ctl->op->use_bitmap(ctl, info))
1713                 return 0;
1714
1715         if (ctl->op == &free_space_op)
1716                 block_group = ctl->private;
1717 again:
1718         /*
1719          * Since we link bitmaps right into the cluster we need to see if we
1720          * have a cluster here, and if so and it has our bitmap we need to add
1721          * the free space to that bitmap.
1722          */
1723         if (block_group && !list_empty(&block_group->cluster_list)) {
1724                 struct btrfs_free_cluster *cluster;
1725                 struct rb_node *node;
1726                 struct btrfs_free_space *entry;
1727
1728                 cluster = list_entry(block_group->cluster_list.next,
1729                                      struct btrfs_free_cluster,
1730                                      block_group_list);
1731                 spin_lock(&cluster->lock);
1732                 node = rb_first(&cluster->root);
1733                 if (!node) {
1734                         spin_unlock(&cluster->lock);
1735                         goto no_cluster_bitmap;
1736                 }
1737
1738                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1739                 if (!entry->bitmap) {
1740                         spin_unlock(&cluster->lock);
1741                         goto no_cluster_bitmap;
1742                 }
1743
1744                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1745                         bytes_added = add_bytes_to_bitmap(ctl, entry,
1746                                                           offset, bytes);
1747                         bytes -= bytes_added;
1748                         offset += bytes_added;
1749                 }
1750                 spin_unlock(&cluster->lock);
1751                 if (!bytes) {
1752                         ret = 1;
1753                         goto out;
1754                 }
1755         }
1756
1757 no_cluster_bitmap:
1758         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1759                                          1, 0);
1760         if (!bitmap_info) {
1761                 ASSERT(added == 0);
1762                 goto new_bitmap;
1763         }
1764
1765         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1766         bytes -= bytes_added;
1767         offset += bytes_added;
1768         added = 0;
1769
1770         if (!bytes) {
1771                 ret = 1;
1772                 goto out;
1773         } else
1774                 goto again;
1775
1776 new_bitmap:
1777         if (info && info->bitmap) {
1778                 add_new_bitmap(ctl, info, offset);
1779                 added = 1;
1780                 info = NULL;
1781                 goto again;
1782         } else {
1783                 spin_unlock(&ctl->tree_lock);
1784
1785                 /* no pre-allocated info, allocate a new one */
1786                 if (!info) {
1787                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1788                                                  GFP_NOFS);
1789                         if (!info) {
1790                                 spin_lock(&ctl->tree_lock);
1791                                 ret = -ENOMEM;
1792                                 goto out;
1793                         }
1794                 }
1795
1796                 /* allocate the bitmap */
1797                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1798                 spin_lock(&ctl->tree_lock);
1799                 if (!info->bitmap) {
1800                         ret = -ENOMEM;
1801                         goto out;
1802                 }
1803                 goto again;
1804         }
1805
1806 out:
1807         if (info) {
1808                 if (info->bitmap)
1809                         kfree(info->bitmap);
1810                 kmem_cache_free(btrfs_free_space_cachep, info);
1811         }
1812
1813         return ret;
1814 }
1815
1816 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1817                           struct btrfs_free_space *info, bool update_stat)
1818 {
1819         struct btrfs_free_space *left_info;
1820         struct btrfs_free_space *right_info;
1821         bool merged = false;
1822         u64 offset = info->offset;
1823         u64 bytes = info->bytes;
1824
1825         /*
1826          * first we want to see if there is free space adjacent to the range we
1827          * are adding, if there is remove that struct and add a new one to
1828          * cover the entire range
1829          */
1830         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1831         if (right_info && rb_prev(&right_info->offset_index))
1832                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1833                                      struct btrfs_free_space, offset_index);
1834         else
1835                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1836
1837         if (right_info && !right_info->bitmap) {
1838                 if (update_stat)
1839                         unlink_free_space(ctl, right_info);
1840                 else
1841                         __unlink_free_space(ctl, right_info);
1842                 info->bytes += right_info->bytes;
1843                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1844                 merged = true;
1845         }
1846
1847         if (left_info && !left_info->bitmap &&
1848             left_info->offset + left_info->bytes == offset) {
1849                 if (update_stat)
1850                         unlink_free_space(ctl, left_info);
1851                 else
1852                         __unlink_free_space(ctl, left_info);
1853                 info->offset = left_info->offset;
1854                 info->bytes += left_info->bytes;
1855                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1856                 merged = true;
1857         }
1858
1859         return merged;
1860 }
1861
1862 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1863                            u64 offset, u64 bytes)
1864 {
1865         struct btrfs_free_space *info;
1866         int ret = 0;
1867
1868         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1869         if (!info)
1870                 return -ENOMEM;
1871
1872         info->offset = offset;
1873         info->bytes = bytes;
1874
1875         spin_lock(&ctl->tree_lock);
1876
1877         if (try_merge_free_space(ctl, info, true))
1878                 goto link;
1879
1880         /*
1881          * There was no extent directly to the left or right of this new
1882          * extent then we know we're going to have to allocate a new extent, so
1883          * before we do that see if we need to drop this into a bitmap
1884          */
1885         ret = insert_into_bitmap(ctl, info);
1886         if (ret < 0) {
1887                 goto out;
1888         } else if (ret) {
1889                 ret = 0;
1890                 goto out;
1891         }
1892 link:
1893         ret = link_free_space(ctl, info);
1894         if (ret)
1895                 kmem_cache_free(btrfs_free_space_cachep, info);
1896 out:
1897         spin_unlock(&ctl->tree_lock);
1898
1899         if (ret) {
1900                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1901                 ASSERT(ret != -EEXIST);
1902         }
1903
1904         return ret;
1905 }
1906
1907 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1908                             u64 offset, u64 bytes)
1909 {
1910         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1911         struct btrfs_free_space *info;
1912         int ret;
1913         bool re_search = false;
1914
1915         spin_lock(&ctl->tree_lock);
1916
1917 again:
1918         ret = 0;
1919         if (!bytes)
1920                 goto out_lock;
1921
1922         info = tree_search_offset(ctl, offset, 0, 0);
1923         if (!info) {
1924                 /*
1925                  * oops didn't find an extent that matched the space we wanted
1926                  * to remove, look for a bitmap instead
1927                  */
1928                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1929                                           1, 0);
1930                 if (!info) {
1931                         /*
1932                          * If we found a partial bit of our free space in a
1933                          * bitmap but then couldn't find the other part this may
1934                          * be a problem, so WARN about it.
1935                          */
1936                         WARN_ON(re_search);
1937                         goto out_lock;
1938                 }
1939         }
1940
1941         re_search = false;
1942         if (!info->bitmap) {
1943                 unlink_free_space(ctl, info);
1944                 if (offset == info->offset) {
1945                         u64 to_free = min(bytes, info->bytes);
1946
1947                         info->bytes -= to_free;
1948                         info->offset += to_free;
1949                         if (info->bytes) {
1950                                 ret = link_free_space(ctl, info);
1951                                 WARN_ON(ret);
1952                         } else {
1953                                 kmem_cache_free(btrfs_free_space_cachep, info);
1954                         }
1955
1956                         offset += to_free;
1957                         bytes -= to_free;
1958                         goto again;
1959                 } else {
1960                         u64 old_end = info->bytes + info->offset;
1961
1962                         info->bytes = offset - info->offset;
1963                         ret = link_free_space(ctl, info);
1964                         WARN_ON(ret);
1965                         if (ret)
1966                                 goto out_lock;
1967
1968                         /* Not enough bytes in this entry to satisfy us */
1969                         if (old_end < offset + bytes) {
1970                                 bytes -= old_end - offset;
1971                                 offset = old_end;
1972                                 goto again;
1973                         } else if (old_end == offset + bytes) {
1974                                 /* all done */
1975                                 goto out_lock;
1976                         }
1977                         spin_unlock(&ctl->tree_lock);
1978
1979                         ret = btrfs_add_free_space(block_group, offset + bytes,
1980                                                    old_end - (offset + bytes));
1981                         WARN_ON(ret);
1982                         goto out;
1983                 }
1984         }
1985
1986         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1987         if (ret == -EAGAIN) {
1988                 re_search = true;
1989                 goto again;
1990         }
1991 out_lock:
1992         spin_unlock(&ctl->tree_lock);
1993 out:
1994         return ret;
1995 }
1996
1997 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1998                            u64 bytes)
1999 {
2000         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2001         struct btrfs_free_space *info;
2002         struct rb_node *n;
2003         int count = 0;
2004
2005         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
2006                 info = rb_entry(n, struct btrfs_free_space, offset_index);
2007                 if (info->bytes >= bytes && !block_group->ro)
2008                         count++;
2009                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
2010                        info->offset, info->bytes,
2011                        (info->bitmap) ? "yes" : "no");
2012         }
2013         printk(KERN_INFO "block group has cluster?: %s\n",
2014                list_empty(&block_group->cluster_list) ? "no" : "yes");
2015         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
2016                "\n", count);
2017 }
2018
2019 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
2020 {
2021         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2022
2023         spin_lock_init(&ctl->tree_lock);
2024         ctl->unit = block_group->sectorsize;
2025         ctl->start = block_group->key.objectid;
2026         ctl->private = block_group;
2027         ctl->op = &free_space_op;
2028
2029         /*
2030          * we only want to have 32k of ram per block group for keeping
2031          * track of free space, and if we pass 1/2 of that we want to
2032          * start converting things over to using bitmaps
2033          */
2034         ctl->extents_thresh = ((1024 * 32) / 2) /
2035                                 sizeof(struct btrfs_free_space);
2036 }
2037
2038 /*
2039  * for a given cluster, put all of its extents back into the free
2040  * space cache.  If the block group passed doesn't match the block group
2041  * pointed to by the cluster, someone else raced in and freed the
2042  * cluster already.  In that case, we just return without changing anything
2043  */
2044 static int
2045 __btrfs_return_cluster_to_free_space(
2046                              struct btrfs_block_group_cache *block_group,
2047                              struct btrfs_free_cluster *cluster)
2048 {
2049         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2050         struct btrfs_free_space *entry;
2051         struct rb_node *node;
2052
2053         spin_lock(&cluster->lock);
2054         if (cluster->block_group != block_group)
2055                 goto out;
2056
2057         cluster->block_group = NULL;
2058         cluster->window_start = 0;
2059         list_del_init(&cluster->block_group_list);
2060
2061         node = rb_first(&cluster->root);
2062         while (node) {
2063                 bool bitmap;
2064
2065                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2066                 node = rb_next(&entry->offset_index);
2067                 rb_erase(&entry->offset_index, &cluster->root);
2068
2069                 bitmap = (entry->bitmap != NULL);
2070                 if (!bitmap)
2071                         try_merge_free_space(ctl, entry, false);
2072                 tree_insert_offset(&ctl->free_space_offset,
2073                                    entry->offset, &entry->offset_index, bitmap);
2074         }
2075         cluster->root = RB_ROOT;
2076
2077 out:
2078         spin_unlock(&cluster->lock);
2079         btrfs_put_block_group(block_group);
2080         return 0;
2081 }
2082
2083 static void __btrfs_remove_free_space_cache_locked(
2084                                 struct btrfs_free_space_ctl *ctl)
2085 {
2086         struct btrfs_free_space *info;
2087         struct rb_node *node;
2088
2089         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2090                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2091                 if (!info->bitmap) {
2092                         unlink_free_space(ctl, info);
2093                         kmem_cache_free(btrfs_free_space_cachep, info);
2094                 } else {
2095                         free_bitmap(ctl, info);
2096                 }
2097                 if (need_resched()) {
2098                         spin_unlock(&ctl->tree_lock);
2099                         cond_resched();
2100                         spin_lock(&ctl->tree_lock);
2101                 }
2102         }
2103 }
2104
2105 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2106 {
2107         spin_lock(&ctl->tree_lock);
2108         __btrfs_remove_free_space_cache_locked(ctl);
2109         spin_unlock(&ctl->tree_lock);
2110 }
2111
2112 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2113 {
2114         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2115         struct btrfs_free_cluster *cluster;
2116         struct list_head *head;
2117
2118         spin_lock(&ctl->tree_lock);
2119         while ((head = block_group->cluster_list.next) !=
2120                &block_group->cluster_list) {
2121                 cluster = list_entry(head, struct btrfs_free_cluster,
2122                                      block_group_list);
2123
2124                 WARN_ON(cluster->block_group != block_group);
2125                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2126                 if (need_resched()) {
2127                         spin_unlock(&ctl->tree_lock);
2128                         cond_resched();
2129                         spin_lock(&ctl->tree_lock);
2130                 }
2131         }
2132         __btrfs_remove_free_space_cache_locked(ctl);
2133         spin_unlock(&ctl->tree_lock);
2134
2135 }
2136
2137 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2138                                u64 offset, u64 bytes, u64 empty_size,
2139                                u64 *max_extent_size)
2140 {
2141         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2142         struct btrfs_free_space *entry = NULL;
2143         u64 bytes_search = bytes + empty_size;
2144         u64 ret = 0;
2145         u64 align_gap = 0;
2146         u64 align_gap_len = 0;
2147
2148         spin_lock(&ctl->tree_lock);
2149         entry = find_free_space(ctl, &offset, &bytes_search,
2150                                 block_group->full_stripe_len, max_extent_size);
2151         if (!entry)
2152                 goto out;
2153
2154         ret = offset;
2155         if (entry->bitmap) {
2156                 bitmap_clear_bits(ctl, entry, offset, bytes);
2157                 if (!entry->bytes)
2158                         free_bitmap(ctl, entry);
2159         } else {
2160                 unlink_free_space(ctl, entry);
2161                 align_gap_len = offset - entry->offset;
2162                 align_gap = entry->offset;
2163
2164                 entry->offset = offset + bytes;
2165                 WARN_ON(entry->bytes < bytes + align_gap_len);
2166
2167                 entry->bytes -= bytes + align_gap_len;
2168                 if (!entry->bytes)
2169                         kmem_cache_free(btrfs_free_space_cachep, entry);
2170                 else
2171                         link_free_space(ctl, entry);
2172         }
2173 out:
2174         spin_unlock(&ctl->tree_lock);
2175
2176         if (align_gap_len)
2177                 __btrfs_add_free_space(ctl, align_gap, align_gap_len);
2178         return ret;
2179 }
2180
2181 /*
2182  * given a cluster, put all of its extents back into the free space
2183  * cache.  If a block group is passed, this function will only free
2184  * a cluster that belongs to the passed block group.
2185  *
2186  * Otherwise, it'll get a reference on the block group pointed to by the
2187  * cluster and remove the cluster from it.
2188  */
2189 int btrfs_return_cluster_to_free_space(
2190                                struct btrfs_block_group_cache *block_group,
2191                                struct btrfs_free_cluster *cluster)
2192 {
2193         struct btrfs_free_space_ctl *ctl;
2194         int ret;
2195
2196         /* first, get a safe pointer to the block group */
2197         spin_lock(&cluster->lock);
2198         if (!block_group) {
2199                 block_group = cluster->block_group;
2200                 if (!block_group) {
2201                         spin_unlock(&cluster->lock);
2202                         return 0;
2203                 }
2204         } else if (cluster->block_group != block_group) {
2205                 /* someone else has already freed it don't redo their work */
2206                 spin_unlock(&cluster->lock);
2207                 return 0;
2208         }
2209         atomic_inc(&block_group->count);
2210         spin_unlock(&cluster->lock);
2211
2212         ctl = block_group->free_space_ctl;
2213
2214         /* now return any extents the cluster had on it */
2215         spin_lock(&ctl->tree_lock);
2216         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2217         spin_unlock(&ctl->tree_lock);
2218
2219         /* finally drop our ref */
2220         btrfs_put_block_group(block_group);
2221         return ret;
2222 }
2223
2224 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2225                                    struct btrfs_free_cluster *cluster,
2226                                    struct btrfs_free_space *entry,
2227                                    u64 bytes, u64 min_start,
2228                                    u64 *max_extent_size)
2229 {
2230         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2231         int err;
2232         u64 search_start = cluster->window_start;
2233         u64 search_bytes = bytes;
2234         u64 ret = 0;
2235
2236         search_start = min_start;
2237         search_bytes = bytes;
2238
2239         err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2240         if (err) {
2241                 if (search_bytes > *max_extent_size)
2242                         *max_extent_size = search_bytes;
2243                 return 0;
2244         }
2245
2246         ret = search_start;
2247         __bitmap_clear_bits(ctl, entry, ret, bytes);
2248
2249         return ret;
2250 }
2251
2252 /*
2253  * given a cluster, try to allocate 'bytes' from it, returns 0
2254  * if it couldn't find anything suitably large, or a logical disk offset
2255  * if things worked out
2256  */
2257 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2258                              struct btrfs_free_cluster *cluster, u64 bytes,
2259                              u64 min_start, u64 *max_extent_size)
2260 {
2261         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2262         struct btrfs_free_space *entry = NULL;
2263         struct rb_node *node;
2264         u64 ret = 0;
2265
2266         spin_lock(&cluster->lock);
2267         if (bytes > cluster->max_size)
2268                 goto out;
2269
2270         if (cluster->block_group != block_group)
2271                 goto out;
2272
2273         node = rb_first(&cluster->root);
2274         if (!node)
2275                 goto out;
2276
2277         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2278         while(1) {
2279                 if (entry->bytes < bytes && entry->bytes > *max_extent_size)
2280                         *max_extent_size = entry->bytes;
2281
2282                 if (entry->bytes < bytes ||
2283                     (!entry->bitmap && entry->offset < min_start)) {
2284                         node = rb_next(&entry->offset_index);
2285                         if (!node)
2286                                 break;
2287                         entry = rb_entry(node, struct btrfs_free_space,
2288                                          offset_index);
2289                         continue;
2290                 }
2291
2292                 if (entry->bitmap) {
2293                         ret = btrfs_alloc_from_bitmap(block_group,
2294                                                       cluster, entry, bytes,
2295                                                       cluster->window_start,
2296                                                       max_extent_size);
2297                         if (ret == 0) {
2298                                 node = rb_next(&entry->offset_index);
2299                                 if (!node)
2300                                         break;
2301                                 entry = rb_entry(node, struct btrfs_free_space,
2302                                                  offset_index);
2303                                 continue;
2304                         }
2305                         cluster->window_start += bytes;
2306                 } else {
2307                         ret = entry->offset;
2308
2309                         entry->offset += bytes;
2310                         entry->bytes -= bytes;
2311                 }
2312
2313                 if (entry->bytes == 0)
2314                         rb_erase(&entry->offset_index, &cluster->root);
2315                 break;
2316         }
2317 out:
2318         spin_unlock(&cluster->lock);
2319
2320         if (!ret)
2321                 return 0;
2322
2323         spin_lock(&ctl->tree_lock);
2324
2325         ctl->free_space -= bytes;
2326         if (entry->bytes == 0) {
2327                 ctl->free_extents--;
2328                 if (entry->bitmap) {
2329                         kfree(entry->bitmap);
2330                         ctl->total_bitmaps--;
2331                         ctl->op->recalc_thresholds(ctl);
2332                 }
2333                 kmem_cache_free(btrfs_free_space_cachep, entry);
2334         }
2335
2336         spin_unlock(&ctl->tree_lock);
2337
2338         return ret;
2339 }
2340
2341 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2342                                 struct btrfs_free_space *entry,
2343                                 struct btrfs_free_cluster *cluster,
2344                                 u64 offset, u64 bytes,
2345                                 u64 cont1_bytes, u64 min_bytes)
2346 {
2347         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2348         unsigned long next_zero;
2349         unsigned long i;
2350         unsigned long want_bits;
2351         unsigned long min_bits;
2352         unsigned long found_bits;
2353         unsigned long start = 0;
2354         unsigned long total_found = 0;
2355         int ret;
2356
2357         i = offset_to_bit(entry->offset, ctl->unit,
2358                           max_t(u64, offset, entry->offset));
2359         want_bits = bytes_to_bits(bytes, ctl->unit);
2360         min_bits = bytes_to_bits(min_bytes, ctl->unit);
2361
2362 again:
2363         found_bits = 0;
2364         for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
2365                 next_zero = find_next_zero_bit(entry->bitmap,
2366                                                BITS_PER_BITMAP, i);
2367                 if (next_zero - i >= min_bits) {
2368                         found_bits = next_zero - i;
2369                         break;
2370                 }
2371                 i = next_zero;
2372         }
2373
2374         if (!found_bits)
2375                 return -ENOSPC;
2376
2377         if (!total_found) {
2378                 start = i;
2379                 cluster->max_size = 0;
2380         }
2381
2382         total_found += found_bits;
2383
2384         if (cluster->max_size < found_bits * ctl->unit)
2385                 cluster->max_size = found_bits * ctl->unit;
2386
2387         if (total_found < want_bits || cluster->max_size < cont1_bytes) {
2388                 i = next_zero + 1;
2389                 goto again;
2390         }
2391
2392         cluster->window_start = start * ctl->unit + entry->offset;
2393         rb_erase(&entry->offset_index, &ctl->free_space_offset);
2394         ret = tree_insert_offset(&cluster->root, entry->offset,
2395                                  &entry->offset_index, 1);
2396         ASSERT(!ret); /* -EEXIST; Logic error */
2397
2398         trace_btrfs_setup_cluster(block_group, cluster,
2399                                   total_found * ctl->unit, 1);
2400         return 0;
2401 }
2402
2403 /*
2404  * This searches the block group for just extents to fill the cluster with.
2405  * Try to find a cluster with at least bytes total bytes, at least one
2406  * extent of cont1_bytes, and other clusters of at least min_bytes.
2407  */
2408 static noinline int
2409 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2410                         struct btrfs_free_cluster *cluster,
2411                         struct list_head *bitmaps, u64 offset, u64 bytes,
2412                         u64 cont1_bytes, u64 min_bytes)
2413 {
2414         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2415         struct btrfs_free_space *first = NULL;
2416         struct btrfs_free_space *entry = NULL;
2417         struct btrfs_free_space *last;
2418         struct rb_node *node;
2419         u64 window_start;
2420         u64 window_free;
2421         u64 max_extent;
2422         u64 total_size = 0;
2423
2424         entry = tree_search_offset(ctl, offset, 0, 1);
2425         if (!entry)
2426                 return -ENOSPC;
2427
2428         /*
2429          * We don't want bitmaps, so just move along until we find a normal
2430          * extent entry.
2431          */
2432         while (entry->bitmap || entry->bytes < min_bytes) {
2433                 if (entry->bitmap && list_empty(&entry->list))
2434                         list_add_tail(&entry->list, bitmaps);
2435                 node = rb_next(&entry->offset_index);
2436                 if (!node)
2437                         return -ENOSPC;
2438                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2439         }
2440
2441         window_start = entry->offset;
2442         window_free = entry->bytes;
2443         max_extent = entry->bytes;
2444         first = entry;
2445         last = entry;
2446
2447         for (node = rb_next(&entry->offset_index); node;
2448              node = rb_next(&entry->offset_index)) {
2449                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2450
2451                 if (entry->bitmap) {
2452                         if (list_empty(&entry->list))
2453                                 list_add_tail(&entry->list, bitmaps);
2454                         continue;
2455                 }
2456
2457                 if (entry->bytes < min_bytes)
2458                         continue;
2459
2460                 last = entry;
2461                 window_free += entry->bytes;
2462                 if (entry->bytes > max_extent)
2463                         max_extent = entry->bytes;
2464         }
2465
2466         if (window_free < bytes || max_extent < cont1_bytes)
2467                 return -ENOSPC;
2468
2469         cluster->window_start = first->offset;
2470
2471         node = &first->offset_index;
2472
2473         /*
2474          * now we've found our entries, pull them out of the free space
2475          * cache and put them into the cluster rbtree
2476          */
2477         do {
2478                 int ret;
2479
2480                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2481                 node = rb_next(&entry->offset_index);
2482                 if (entry->bitmap || entry->bytes < min_bytes)
2483                         continue;
2484
2485                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2486                 ret = tree_insert_offset(&cluster->root, entry->offset,
2487                                          &entry->offset_index, 0);
2488                 total_size += entry->bytes;
2489                 ASSERT(!ret); /* -EEXIST; Logic error */
2490         } while (node && entry != last);
2491
2492         cluster->max_size = max_extent;
2493         trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
2494         return 0;
2495 }
2496
2497 /*
2498  * This specifically looks for bitmaps that may work in the cluster, we assume
2499  * that we have already failed to find extents that will work.
2500  */
2501 static noinline int
2502 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2503                      struct btrfs_free_cluster *cluster,
2504                      struct list_head *bitmaps, u64 offset, u64 bytes,
2505                      u64 cont1_bytes, u64 min_bytes)
2506 {
2507         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2508         struct btrfs_free_space *entry;
2509         int ret = -ENOSPC;
2510         u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2511
2512         if (ctl->total_bitmaps == 0)
2513                 return -ENOSPC;
2514
2515         /*
2516          * The bitmap that covers offset won't be in the list unless offset
2517          * is just its start offset.
2518          */
2519         entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2520         if (entry->offset != bitmap_offset) {
2521                 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2522                 if (entry && list_empty(&entry->list))
2523                         list_add(&entry->list, bitmaps);
2524         }
2525
2526         list_for_each_entry(entry, bitmaps, list) {
2527                 if (entry->bytes < bytes)
2528                         continue;
2529                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2530                                            bytes, cont1_bytes, min_bytes);
2531                 if (!ret)
2532                         return 0;
2533         }
2534
2535         /*
2536          * The bitmaps list has all the bitmaps that record free space
2537          * starting after offset, so no more search is required.
2538          */
2539         return -ENOSPC;
2540 }
2541
2542 /*
2543  * here we try to find a cluster of blocks in a block group.  The goal
2544  * is to find at least bytes+empty_size.
2545  * We might not find them all in one contiguous area.
2546  *
2547  * returns zero and sets up cluster if things worked out, otherwise
2548  * it returns -enospc
2549  */
2550 int btrfs_find_space_cluster(struct btrfs_root *root,
2551                              struct btrfs_block_group_cache *block_group,
2552                              struct btrfs_free_cluster *cluster,
2553                              u64 offset, u64 bytes, u64 empty_size)
2554 {
2555         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2556         struct btrfs_free_space *entry, *tmp;
2557         LIST_HEAD(bitmaps);
2558         u64 min_bytes;
2559         u64 cont1_bytes;
2560         int ret;
2561
2562         /*
2563          * Choose the minimum extent size we'll require for this
2564          * cluster.  For SSD_SPREAD, don't allow any fragmentation.
2565          * For metadata, allow allocates with smaller extents.  For
2566          * data, keep it dense.
2567          */
2568         if (btrfs_test_opt(root, SSD_SPREAD)) {
2569                 cont1_bytes = min_bytes = bytes + empty_size;
2570         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2571                 cont1_bytes = bytes;
2572                 min_bytes = block_group->sectorsize;
2573         } else {
2574                 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
2575                 min_bytes = block_group->sectorsize;
2576         }
2577
2578         spin_lock(&ctl->tree_lock);
2579
2580         /*
2581          * If we know we don't have enough space to make a cluster don't even
2582          * bother doing all the work to try and find one.
2583          */
2584         if (ctl->free_space < bytes) {
2585                 spin_unlock(&ctl->tree_lock);
2586                 return -ENOSPC;
2587         }
2588
2589         spin_lock(&cluster->lock);
2590
2591         /* someone already found a cluster, hooray */
2592         if (cluster->block_group) {
2593                 ret = 0;
2594                 goto out;
2595         }
2596
2597         trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2598                                  min_bytes);
2599
2600         INIT_LIST_HEAD(&bitmaps);
2601         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2602                                       bytes + empty_size,
2603                                       cont1_bytes, min_bytes);
2604         if (ret)
2605                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2606                                            offset, bytes + empty_size,
2607                                            cont1_bytes, min_bytes);
2608
2609         /* Clear our temporary list */
2610         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2611                 list_del_init(&entry->list);
2612
2613         if (!ret) {
2614                 atomic_inc(&block_group->count);
2615                 list_add_tail(&cluster->block_group_list,
2616                               &block_group->cluster_list);
2617                 cluster->block_group = block_group;
2618         } else {
2619                 trace_btrfs_failed_cluster_setup(block_group);
2620         }
2621 out:
2622         spin_unlock(&cluster->lock);
2623         spin_unlock(&ctl->tree_lock);
2624
2625         return ret;
2626 }
2627
2628 /*
2629  * simple code to zero out a cluster
2630  */
2631 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2632 {
2633         spin_lock_init(&cluster->lock);
2634         spin_lock_init(&cluster->refill_lock);
2635         cluster->root = RB_ROOT;
2636         cluster->max_size = 0;
2637         INIT_LIST_HEAD(&cluster->block_group_list);
2638         cluster->block_group = NULL;
2639 }
2640
2641 static int do_trimming(struct btrfs_block_group_cache *block_group,
2642                        u64 *total_trimmed, u64 start, u64 bytes,
2643                        u64 reserved_start, u64 reserved_bytes)
2644 {
2645         struct btrfs_space_info *space_info = block_group->space_info;
2646         struct btrfs_fs_info *fs_info = block_group->fs_info;
2647         int ret;
2648         int update = 0;
2649         u64 trimmed = 0;
2650
2651         spin_lock(&space_info->lock);
2652         spin_lock(&block_group->lock);
2653         if (!block_group->ro) {
2654                 block_group->reserved += reserved_bytes;
2655                 space_info->bytes_reserved += reserved_bytes;
2656                 update = 1;
2657         }
2658         spin_unlock(&block_group->lock);
2659         spin_unlock(&space_info->lock);
2660
2661         ret = btrfs_error_discard_extent(fs_info->extent_root,
2662                                          start, bytes, &trimmed);
2663         if (!ret)
2664                 *total_trimmed += trimmed;
2665
2666         btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2667
2668         if (update) {
2669                 spin_lock(&space_info->lock);
2670                 spin_lock(&block_group->lock);
2671                 if (block_group->ro)
2672                         space_info->bytes_readonly += reserved_bytes;
2673                 block_group->reserved -= reserved_bytes;
2674                 space_info->bytes_reserved -= reserved_bytes;
2675                 spin_unlock(&space_info->lock);
2676                 spin_unlock(&block_group->lock);
2677         }
2678
2679         return ret;
2680 }
2681
2682 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2683                           u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2684 {
2685         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2686         struct btrfs_free_space *entry;
2687         struct rb_node *node;
2688         int ret = 0;
2689         u64 extent_start;
2690         u64 extent_bytes;
2691         u64 bytes;
2692
2693         while (start < end) {
2694                 spin_lock(&ctl->tree_lock);
2695
2696                 if (ctl->free_space < minlen) {
2697                         spin_unlock(&ctl->tree_lock);
2698                         break;
2699                 }
2700
2701                 entry = tree_search_offset(ctl, start, 0, 1);
2702                 if (!entry) {
2703                         spin_unlock(&ctl->tree_lock);
2704                         break;
2705                 }
2706
2707                 /* skip bitmaps */
2708                 while (entry->bitmap) {
2709                         node = rb_next(&entry->offset_index);
2710                         if (!node) {
2711                                 spin_unlock(&ctl->tree_lock);
2712                                 goto out;
2713                         }
2714                         entry = rb_entry(node, struct btrfs_free_space,
2715                                          offset_index);
2716                 }
2717
2718                 if (entry->offset >= end) {
2719                         spin_unlock(&ctl->tree_lock);
2720                         break;
2721                 }
2722
2723                 extent_start = entry->offset;
2724                 extent_bytes = entry->bytes;
2725                 start = max(start, extent_start);
2726                 bytes = min(extent_start + extent_bytes, end) - start;
2727                 if (bytes < minlen) {
2728                         spin_unlock(&ctl->tree_lock);
2729                         goto next;
2730                 }
2731
2732                 unlink_free_space(ctl, entry);
2733                 kmem_cache_free(btrfs_free_space_cachep, entry);
2734
2735                 spin_unlock(&ctl->tree_lock);
2736
2737                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2738                                   extent_start, extent_bytes);
2739                 if (ret)
2740                         break;
2741 next:
2742                 start += bytes;
2743
2744                 if (fatal_signal_pending(current)) {
2745                         ret = -ERESTARTSYS;
2746                         break;
2747                 }
2748
2749                 cond_resched();
2750         }
2751 out:
2752         return ret;
2753 }
2754
2755 static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2756                         u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2757 {
2758         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2759         struct btrfs_free_space *entry;
2760         int ret = 0;
2761         int ret2;
2762         u64 bytes;
2763         u64 offset = offset_to_bitmap(ctl, start);
2764
2765         while (offset < end) {
2766                 bool next_bitmap = false;
2767
2768                 spin_lock(&ctl->tree_lock);
2769
2770                 if (ctl->free_space < minlen) {
2771                         spin_unlock(&ctl->tree_lock);
2772                         break;
2773                 }
2774
2775                 entry = tree_search_offset(ctl, offset, 1, 0);
2776                 if (!entry) {
2777                         spin_unlock(&ctl->tree_lock);
2778                         next_bitmap = true;
2779                         goto next;
2780                 }
2781
2782                 bytes = minlen;
2783                 ret2 = search_bitmap(ctl, entry, &start, &bytes);
2784                 if (ret2 || start >= end) {
2785                         spin_unlock(&ctl->tree_lock);
2786                         next_bitmap = true;
2787                         goto next;
2788                 }
2789
2790                 bytes = min(bytes, end - start);
2791                 if (bytes < minlen) {
2792                         spin_unlock(&ctl->tree_lock);
2793                         goto next;
2794                 }
2795
2796                 bitmap_clear_bits(ctl, entry, start, bytes);
2797                 if (entry->bytes == 0)
2798                         free_bitmap(ctl, entry);
2799
2800                 spin_unlock(&ctl->tree_lock);
2801
2802                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2803                                   start, bytes);
2804                 if (ret)
2805                         break;
2806 next:
2807                 if (next_bitmap) {
2808                         offset += BITS_PER_BITMAP * ctl->unit;
2809                 } else {
2810                         start += bytes;
2811                         if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2812                                 offset += BITS_PER_BITMAP * ctl->unit;
2813                 }
2814
2815                 if (fatal_signal_pending(current)) {
2816                         ret = -ERESTARTSYS;
2817                         break;
2818                 }
2819
2820                 cond_resched();
2821         }
2822
2823         return ret;
2824 }
2825
2826 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2827                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2828 {
2829         int ret;
2830
2831         *trimmed = 0;
2832
2833         ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2834         if (ret)
2835                 return ret;
2836
2837         ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
2838
2839         return ret;
2840 }
2841
2842 /*
2843  * Find the left-most item in the cache tree, and then return the
2844  * smallest inode number in the item.
2845  *
2846  * Note: the returned inode number may not be the smallest one in
2847  * the tree, if the left-most item is a bitmap.
2848  */
2849 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2850 {
2851         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2852         struct btrfs_free_space *entry = NULL;
2853         u64 ino = 0;
2854
2855         spin_lock(&ctl->tree_lock);
2856
2857         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2858                 goto out;
2859
2860         entry = rb_entry(rb_first(&ctl->free_space_offset),
2861                          struct btrfs_free_space, offset_index);
2862
2863         if (!entry->bitmap) {
2864                 ino = entry->offset;
2865
2866                 unlink_free_space(ctl, entry);
2867                 entry->offset++;
2868                 entry->bytes--;
2869                 if (!entry->bytes)
2870                         kmem_cache_free(btrfs_free_space_cachep, entry);
2871                 else
2872                         link_free_space(ctl, entry);
2873         } else {
2874                 u64 offset = 0;
2875                 u64 count = 1;
2876                 int ret;
2877
2878                 ret = search_bitmap(ctl, entry, &offset, &count);
2879                 /* Logic error; Should be empty if it can't find anything */
2880                 ASSERT(!ret);
2881
2882                 ino = offset;
2883                 bitmap_clear_bits(ctl, entry, offset, 1);
2884                 if (entry->bytes == 0)
2885                         free_bitmap(ctl, entry);
2886         }
2887 out:
2888         spin_unlock(&ctl->tree_lock);
2889
2890         return ino;
2891 }
2892
2893 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2894                                     struct btrfs_path *path)
2895 {
2896         struct inode *inode = NULL;
2897
2898         spin_lock(&root->cache_lock);
2899         if (root->cache_inode)
2900                 inode = igrab(root->cache_inode);
2901         spin_unlock(&root->cache_lock);
2902         if (inode)
2903                 return inode;
2904
2905         inode = __lookup_free_space_inode(root, path, 0);
2906         if (IS_ERR(inode))
2907                 return inode;
2908
2909         spin_lock(&root->cache_lock);
2910         if (!btrfs_fs_closing(root->fs_info))
2911                 root->cache_inode = igrab(inode);
2912         spin_unlock(&root->cache_lock);
2913
2914         return inode;
2915 }
2916
2917 int create_free_ino_inode(struct btrfs_root *root,
2918                           struct btrfs_trans_handle *trans,
2919                           struct btrfs_path *path)
2920 {
2921         return __create_free_space_inode(root, trans, path,
2922                                          BTRFS_FREE_INO_OBJECTID, 0);
2923 }
2924
2925 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2926 {
2927         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2928         struct btrfs_path *path;
2929         struct inode *inode;
2930         int ret = 0;
2931         u64 root_gen = btrfs_root_generation(&root->root_item);
2932
2933         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2934                 return 0;
2935
2936         /*
2937          * If we're unmounting then just return, since this does a search on the
2938          * normal root and not the commit root and we could deadlock.
2939          */
2940         if (btrfs_fs_closing(fs_info))
2941                 return 0;
2942
2943         path = btrfs_alloc_path();
2944         if (!path)
2945                 return 0;
2946
2947         inode = lookup_free_ino_inode(root, path);
2948         if (IS_ERR(inode))
2949                 goto out;
2950
2951         if (root_gen != BTRFS_I(inode)->generation)
2952                 goto out_put;
2953
2954         ret = __load_free_space_cache(root, inode, ctl, path, 0);
2955
2956         if (ret < 0)
2957                 btrfs_err(fs_info,
2958                         "failed to load free ino cache for root %llu",
2959                         root->root_key.objectid);
2960 out_put:
2961         iput(inode);
2962 out:
2963         btrfs_free_path(path);
2964         return ret;
2965 }
2966
2967 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2968                               struct btrfs_trans_handle *trans,
2969                               struct btrfs_path *path,
2970                               struct inode *inode)
2971 {
2972         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2973         int ret;
2974
2975         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2976                 return 0;
2977
2978         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2979         if (ret) {
2980                 btrfs_delalloc_release_metadata(inode, inode->i_size);
2981 #ifdef DEBUG
2982                 btrfs_err(root->fs_info,
2983                         "failed to write free ino cache for root %llu",
2984                         root->root_key.objectid);
2985 #endif
2986         }
2987
2988         return ret;
2989 }
2990
2991 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
2992 /*
2993  * Use this if you need to make a bitmap or extent entry specifically, it
2994  * doesn't do any of the merging that add_free_space does, this acts a lot like
2995  * how the free space cache loading stuff works, so you can get really weird
2996  * configurations.
2997  */
2998 int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
2999                               u64 offset, u64 bytes, bool bitmap)
3000 {
3001         struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3002         struct btrfs_free_space *info = NULL, *bitmap_info;
3003         void *map = NULL;
3004         u64 bytes_added;
3005         int ret;
3006
3007 again:
3008         if (!info) {
3009                 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
3010                 if (!info)
3011                         return -ENOMEM;
3012         }
3013
3014         if (!bitmap) {
3015                 spin_lock(&ctl->tree_lock);
3016                 info->offset = offset;
3017                 info->bytes = bytes;
3018                 ret = link_free_space(ctl, info);
3019                 spin_unlock(&ctl->tree_lock);
3020                 if (ret)
3021                         kmem_cache_free(btrfs_free_space_cachep, info);
3022                 return ret;
3023         }
3024
3025         if (!map) {
3026                 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
3027                 if (!map) {
3028                         kmem_cache_free(btrfs_free_space_cachep, info);
3029                         return -ENOMEM;
3030                 }
3031         }
3032
3033         spin_lock(&ctl->tree_lock);
3034         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3035                                          1, 0);
3036         if (!bitmap_info) {
3037                 info->bitmap = map;
3038                 map = NULL;
3039                 add_new_bitmap(ctl, info, offset);
3040                 bitmap_info = info;
3041         }
3042
3043         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
3044         bytes -= bytes_added;
3045         offset += bytes_added;
3046         spin_unlock(&ctl->tree_lock);
3047
3048         if (bytes)
3049                 goto again;
3050
3051         if (map)
3052                 kfree(map);
3053         return 0;
3054 }
3055
3056 /*
3057  * Checks to see if the given range is in the free space cache.  This is really
3058  * just used to check the absence of space, so if there is free space in the
3059  * range at all we will return 1.
3060  */
3061 int test_check_exists(struct btrfs_block_group_cache *cache,
3062                       u64 offset, u64 bytes)
3063 {
3064         struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3065         struct btrfs_free_space *info;
3066         int ret = 0;
3067
3068         spin_lock(&ctl->tree_lock);
3069         info = tree_search_offset(ctl, offset, 0, 0);
3070         if (!info) {
3071                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3072                                           1, 0);
3073                 if (!info)
3074                         goto out;
3075         }
3076
3077 have_info:
3078         if (info->bitmap) {
3079                 u64 bit_off, bit_bytes;
3080                 struct rb_node *n;
3081                 struct btrfs_free_space *tmp;
3082
3083                 bit_off = offset;
3084                 bit_bytes = ctl->unit;
3085                 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes);
3086                 if (!ret) {
3087                         if (bit_off == offset) {
3088                                 ret = 1;
3089                                 goto out;
3090                         } else if (bit_off > offset &&
3091                                    offset + bytes > bit_off) {
3092                                 ret = 1;
3093                                 goto out;
3094                         }
3095                 }
3096
3097                 n = rb_prev(&info->offset_index);
3098                 while (n) {
3099                         tmp = rb_entry(n, struct btrfs_free_space,
3100                                        offset_index);
3101                         if (tmp->offset + tmp->bytes < offset)
3102                                 break;
3103                         if (offset + bytes < tmp->offset) {
3104                                 n = rb_prev(&info->offset_index);
3105                                 continue;
3106                         }
3107                         info = tmp;
3108                         goto have_info;
3109                 }
3110
3111                 n = rb_next(&info->offset_index);
3112                 while (n) {
3113                         tmp = rb_entry(n, struct btrfs_free_space,
3114                                        offset_index);
3115                         if (offset + bytes < tmp->offset)
3116                                 break;
3117                         if (tmp->offset + tmp->bytes < offset) {
3118                                 n = rb_next(&info->offset_index);
3119                                 continue;
3120                         }
3121                         info = tmp;
3122                         goto have_info;
3123                 }
3124
3125                 goto out;
3126         }
3127
3128         if (info->offset == offset) {
3129                 ret = 1;
3130                 goto out;
3131         }
3132
3133         if (offset > info->offset && offset < info->offset + info->bytes)
3134                 ret = 1;
3135 out:
3136         spin_unlock(&ctl->tree_lock);
3137         return ret;
3138 }
3139 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */