2 * alloc.c - NILFS dat/inode allocator
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 * Original code was written by Koji Sato <koji@osrg.net>.
21 * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>,
22 * Amagai Yoshiji <amagai@osrg.net>.
25 #include <linux/types.h>
26 #include <linux/buffer_head.h>
28 #include <linux/bitops.h>
29 #include <linux/slab.h>
35 * nilfs_palloc_groups_per_desc_block - get the number of groups that a group
36 * descriptor block can maintain
37 * @inode: inode of metadata file using this allocator
39 static inline unsigned long
40 nilfs_palloc_groups_per_desc_block(const struct inode *inode)
42 return (1UL << inode->i_blkbits) /
43 sizeof(struct nilfs_palloc_group_desc);
47 * nilfs_palloc_groups_count - get maximum number of groups
48 * @inode: inode of metadata file using this allocator
50 static inline unsigned long
51 nilfs_palloc_groups_count(const struct inode *inode)
53 return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */));
57 * nilfs_palloc_init_blockgroup - initialize private variables for allocator
58 * @inode: inode of metadata file using this allocator
59 * @entry_size: size of the persistent object
61 int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
63 struct nilfs_mdt_info *mi = NILFS_MDT(inode);
65 mi->mi_bgl = kmalloc(sizeof(*mi->mi_bgl), GFP_NOFS);
69 bgl_lock_init(mi->mi_bgl);
71 nilfs_mdt_set_entry_size(inode, entry_size, 0);
73 mi->mi_blocks_per_group =
74 DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode),
75 mi->mi_entries_per_block) + 1;
76 /* Number of blocks in a group including entry blocks and
78 mi->mi_blocks_per_desc_block =
79 nilfs_palloc_groups_per_desc_block(inode) *
80 mi->mi_blocks_per_group + 1;
81 /* Number of blocks per descriptor including the
87 * nilfs_palloc_group - get group number and offset from an entry number
88 * @inode: inode of metadata file using this allocator
89 * @nr: serial number of the entry (e.g. inode number)
90 * @offset: pointer to store offset number in the group
92 static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
93 unsigned long *offset)
97 *offset = do_div(group, nilfs_palloc_entries_per_group(inode));
102 * nilfs_palloc_desc_blkoff - get block offset of a group descriptor block
103 * @inode: inode of metadata file using this allocator
104 * @group: group number
106 * nilfs_palloc_desc_blkoff() returns block offset of the descriptor
107 * block which contains a descriptor of the specified group.
110 nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
112 unsigned long desc_block =
113 group / nilfs_palloc_groups_per_desc_block(inode);
114 return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block;
118 * nilfs_palloc_bitmap_blkoff - get block offset of a bitmap block
119 * @inode: inode of metadata file using this allocator
120 * @group: group number
122 * nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap
123 * block used to allocate/deallocate entries in the specified group.
126 nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
128 unsigned long desc_offset =
129 group % nilfs_palloc_groups_per_desc_block(inode);
130 return nilfs_palloc_desc_blkoff(inode, group) + 1 +
131 desc_offset * NILFS_MDT(inode)->mi_blocks_per_group;
135 * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group
136 * @inode: inode of metadata file using this allocator
137 * @group: group number
138 * @desc: pointer to descriptor structure for the group
141 nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
142 const struct nilfs_palloc_group_desc *desc)
146 spin_lock(nilfs_mdt_bgl_lock(inode, group));
147 nfree = le32_to_cpu(desc->pg_nfrees);
148 spin_unlock(nilfs_mdt_bgl_lock(inode, group));
153 * nilfs_palloc_group_desc_add_entries - adjust count of free entries
154 * @inode: inode of metadata file using this allocator
155 * @group: group number
156 * @desc: pointer to descriptor structure for the group
157 * @n: delta to be added
160 nilfs_palloc_group_desc_add_entries(struct inode *inode,
162 struct nilfs_palloc_group_desc *desc,
165 spin_lock(nilfs_mdt_bgl_lock(inode, group));
166 le32_add_cpu(&desc->pg_nfrees, n);
167 spin_unlock(nilfs_mdt_bgl_lock(inode, group));
171 * nilfs_palloc_entry_blkoff - get block offset of an entry block
172 * @inode: inode of metadata file using this allocator
173 * @nr: serial number of the entry (e.g. inode number)
176 nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
178 unsigned long group, group_offset;
180 group = nilfs_palloc_group(inode, nr, &group_offset);
182 return nilfs_palloc_bitmap_blkoff(inode, group) + 1 +
183 group_offset / NILFS_MDT(inode)->mi_entries_per_block;
187 * nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block
188 * @inode: inode of metadata file
189 * @bh: buffer head of the buffer to be initialized
190 * @kaddr: kernel address mapped for the page including the buffer
192 static void nilfs_palloc_desc_block_init(struct inode *inode,
193 struct buffer_head *bh, void *kaddr)
195 struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh);
196 unsigned long n = nilfs_palloc_groups_per_desc_block(inode);
199 nfrees = cpu_to_le32(nilfs_palloc_entries_per_group(inode));
201 desc->pg_nfrees = nfrees;
206 static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
208 void (*init_block)(struct inode *,
209 struct buffer_head *,
211 struct buffer_head **bhp,
212 struct nilfs_bh_assoc *prev,
218 if (prev->bh && blkoff == prev->blkoff) {
226 ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp);
230 * The following code must be safe for change of the
231 * cache contents during the get block call.
236 prev->blkoff = blkoff;
243 * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block
244 * @inode: inode of metadata file using this allocator
245 * @group: group number
246 * @create: create flag
247 * @bhp: pointer to store the resultant buffer head
249 static int nilfs_palloc_get_desc_block(struct inode *inode,
251 int create, struct buffer_head **bhp)
253 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
255 return nilfs_palloc_get_block(inode,
256 nilfs_palloc_desc_blkoff(inode, group),
257 create, nilfs_palloc_desc_block_init,
258 bhp, &cache->prev_desc, &cache->lock);
262 * nilfs_palloc_get_bitmap_block - get buffer head of a bitmap block
263 * @inode: inode of metadata file using this allocator
264 * @group: group number
265 * @create: create flag
266 * @bhp: pointer to store the resultant buffer head
268 static int nilfs_palloc_get_bitmap_block(struct inode *inode,
270 int create, struct buffer_head **bhp)
272 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
274 return nilfs_palloc_get_block(inode,
275 nilfs_palloc_bitmap_blkoff(inode, group),
277 &cache->prev_bitmap, &cache->lock);
281 * nilfs_palloc_get_entry_block - get buffer head of an entry block
282 * @inode: inode of metadata file using this allocator
283 * @nr: serial number of the entry (e.g. inode number)
284 * @create: create flag
285 * @bhp: pointer to store the resultant buffer head
287 int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
288 int create, struct buffer_head **bhp)
290 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
292 return nilfs_palloc_get_block(inode,
293 nilfs_palloc_entry_blkoff(inode, nr),
295 &cache->prev_entry, &cache->lock);
299 * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor
300 * @inode: inode of metadata file using this allocator
301 * @group: group number
302 * @bh: buffer head of the buffer storing the group descriptor block
303 * @kaddr: kernel address mapped for the page including the buffer
305 static struct nilfs_palloc_group_desc *
306 nilfs_palloc_block_get_group_desc(const struct inode *inode,
308 const struct buffer_head *bh, void *kaddr)
310 return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) +
311 group % nilfs_palloc_groups_per_desc_block(inode);
315 * nilfs_palloc_block_get_entry - get kernel address of an entry
316 * @inode: inode of metadata file using this allocator
317 * @nr: serial number of the entry (e.g. inode number)
318 * @bh: buffer head of the buffer storing the entry block
319 * @kaddr: kernel address mapped for the page including the buffer
321 void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
322 const struct buffer_head *bh, void *kaddr)
324 unsigned long entry_offset, group_offset;
326 nilfs_palloc_group(inode, nr, &group_offset);
327 entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block;
329 return kaddr + bh_offset(bh) +
330 entry_offset * NILFS_MDT(inode)->mi_entry_size;
334 * nilfs_palloc_find_available_slot - find available slot in a group
335 * @inode: inode of metadata file using this allocator
336 * @group: group number
337 * @target: offset number of an entry in the group (start point)
338 * @bitmap: bitmap of the group
339 * @bsize: size in bits
341 static int nilfs_palloc_find_available_slot(struct inode *inode,
343 unsigned long target,
344 unsigned char *bitmap,
347 int curr, pos, end, i;
350 end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1);
353 pos = nilfs_find_next_zero_bit(bitmap, end, target);
355 !nilfs_set_bit_atomic(
356 nilfs_mdt_bgl_lock(inode, group), pos, bitmap))
361 for (i = 0, curr = end;
363 i += BITS_PER_LONG, curr += BITS_PER_LONG) {
367 while (*((unsigned long *)bitmap + curr / BITS_PER_LONG)
369 end = curr + BITS_PER_LONG;
372 pos = nilfs_find_next_zero_bit(bitmap, end, curr);
374 !nilfs_set_bit_atomic(
375 nilfs_mdt_bgl_lock(inode, group), pos,
384 * nilfs_palloc_rest_groups_in_desc_block - get the remaining number of groups
385 * in a group descriptor block
386 * @inode: inode of metadata file using this allocator
387 * @curr: current group number
388 * @max: maximum number of groups
391 nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
392 unsigned long curr, unsigned long max)
394 return min_t(unsigned long,
395 nilfs_palloc_groups_per_desc_block(inode) -
396 curr % nilfs_palloc_groups_per_desc_block(inode),
401 * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
402 * @inode: inode of metadata file using this allocator
403 * @req: nilfs_palloc_req structure exchanged for the allocation
405 int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
406 struct nilfs_palloc_req *req)
408 struct buffer_head *desc_bh, *bitmap_bh;
409 struct nilfs_palloc_group_desc *desc;
410 unsigned char *bitmap;
411 void *desc_kaddr, *bitmap_kaddr;
412 unsigned long group, maxgroup, ngroups;
413 unsigned long group_offset, maxgroup_offset;
414 unsigned long n, entries_per_group, groups_per_desc_block;
418 ngroups = nilfs_palloc_groups_count(inode);
419 maxgroup = ngroups - 1;
420 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
421 entries_per_group = nilfs_palloc_entries_per_group(inode);
422 groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode);
424 for (i = 0; i < ngroups; i += n) {
425 if (group >= ngroups) {
428 maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr,
429 &maxgroup_offset) - 1;
431 ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
434 desc_kaddr = kmap(desc_bh->b_page);
435 desc = nilfs_palloc_block_get_group_desc(
436 inode, group, desc_bh, desc_kaddr);
437 n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
439 for (j = 0; j < n; j++, desc++, group++) {
440 if (nilfs_palloc_group_desc_nfrees(inode, group, desc)
442 ret = nilfs_palloc_get_bitmap_block(
443 inode, group, 1, &bitmap_bh);
446 bitmap_kaddr = kmap(bitmap_bh->b_page);
447 bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
448 pos = nilfs_palloc_find_available_slot(
449 inode, group, group_offset, bitmap,
452 /* found a free entry */
453 nilfs_palloc_group_desc_add_entries(
454 inode, group, desc, -1);
456 entries_per_group * group + pos;
457 kunmap(desc_bh->b_page);
458 kunmap(bitmap_bh->b_page);
460 req->pr_desc_bh = desc_bh;
461 req->pr_bitmap_bh = bitmap_bh;
464 kunmap(bitmap_bh->b_page);
471 kunmap(desc_bh->b_page);
475 /* no entries left */
479 kunmap(desc_bh->b_page);
485 * nilfs_palloc_commit_alloc_entry - finish allocation of a persistent object
486 * @inode: inode of metadata file using this allocator
487 * @req: nilfs_palloc_req structure exchanged for the allocation
489 void nilfs_palloc_commit_alloc_entry(struct inode *inode,
490 struct nilfs_palloc_req *req)
492 mark_buffer_dirty(req->pr_bitmap_bh);
493 mark_buffer_dirty(req->pr_desc_bh);
494 nilfs_mdt_mark_dirty(inode);
496 brelse(req->pr_bitmap_bh);
497 brelse(req->pr_desc_bh);
501 * nilfs_palloc_commit_free_entry - finish deallocating a persistent object
502 * @inode: inode of metadata file using this allocator
503 * @req: nilfs_palloc_req structure exchanged for the removal
505 void nilfs_palloc_commit_free_entry(struct inode *inode,
506 struct nilfs_palloc_req *req)
508 struct nilfs_palloc_group_desc *desc;
509 unsigned long group, group_offset;
510 unsigned char *bitmap;
511 void *desc_kaddr, *bitmap_kaddr;
513 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
514 desc_kaddr = kmap(req->pr_desc_bh->b_page);
515 desc = nilfs_palloc_block_get_group_desc(inode, group,
516 req->pr_desc_bh, desc_kaddr);
517 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
518 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
520 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
521 group_offset, bitmap))
522 printk(KERN_WARNING "%s: entry number %llu already freed\n",
523 __func__, (unsigned long long)req->pr_entry_nr);
525 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
527 kunmap(req->pr_bitmap_bh->b_page);
528 kunmap(req->pr_desc_bh->b_page);
530 mark_buffer_dirty(req->pr_desc_bh);
531 mark_buffer_dirty(req->pr_bitmap_bh);
532 nilfs_mdt_mark_dirty(inode);
534 brelse(req->pr_bitmap_bh);
535 brelse(req->pr_desc_bh);
539 * nilfs_palloc_abort_alloc_entry - cancel allocation of a persistent object
540 * @inode: inode of metadata file using this allocator
541 * @req: nilfs_palloc_req structure exchanged for the allocation
543 void nilfs_palloc_abort_alloc_entry(struct inode *inode,
544 struct nilfs_palloc_req *req)
546 struct nilfs_palloc_group_desc *desc;
547 void *desc_kaddr, *bitmap_kaddr;
548 unsigned char *bitmap;
549 unsigned long group, group_offset;
551 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
552 desc_kaddr = kmap(req->pr_desc_bh->b_page);
553 desc = nilfs_palloc_block_get_group_desc(inode, group,
554 req->pr_desc_bh, desc_kaddr);
555 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
556 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
557 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
558 group_offset, bitmap))
559 printk(KERN_WARNING "%s: entry number %llu already freed\n",
560 __func__, (unsigned long long)req->pr_entry_nr);
562 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
564 kunmap(req->pr_bitmap_bh->b_page);
565 kunmap(req->pr_desc_bh->b_page);
567 brelse(req->pr_bitmap_bh);
568 brelse(req->pr_desc_bh);
570 req->pr_entry_nr = 0;
571 req->pr_bitmap_bh = NULL;
572 req->pr_desc_bh = NULL;
576 * nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object
577 * @inode: inode of metadata file using this allocator
578 * @req: nilfs_palloc_req structure exchanged for the removal
580 int nilfs_palloc_prepare_free_entry(struct inode *inode,
581 struct nilfs_palloc_req *req)
583 struct buffer_head *desc_bh, *bitmap_bh;
584 unsigned long group, group_offset;
587 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
588 ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
591 ret = nilfs_palloc_get_bitmap_block(inode, group, 1, &bitmap_bh);
597 req->pr_desc_bh = desc_bh;
598 req->pr_bitmap_bh = bitmap_bh;
603 * nilfs_palloc_abort_free_entry - cancel deallocating a persistent object
604 * @inode: inode of metadata file using this allocator
605 * @req: nilfs_palloc_req structure exchanged for the removal
607 void nilfs_palloc_abort_free_entry(struct inode *inode,
608 struct nilfs_palloc_req *req)
610 brelse(req->pr_bitmap_bh);
611 brelse(req->pr_desc_bh);
613 req->pr_entry_nr = 0;
614 req->pr_bitmap_bh = NULL;
615 req->pr_desc_bh = NULL;
619 * nilfs_palloc_group_is_in - judge if an entry is in a group
620 * @inode: inode of metadata file using this allocator
621 * @group: group number
622 * @nr: serial number of the entry (e.g. inode number)
625 nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
629 first = group * nilfs_palloc_entries_per_group(inode);
630 last = first + nilfs_palloc_entries_per_group(inode) - 1;
631 return (nr >= first) && (nr <= last);
635 * nilfs_palloc_freev - deallocate a set of persistent objects
636 * @inode: inode of metadata file using this allocator
637 * @entry_nrs: array of entry numbers to be deallocated
638 * @nitems: number of entries stored in @entry_nrs
640 int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
642 struct buffer_head *desc_bh, *bitmap_bh;
643 struct nilfs_palloc_group_desc *desc;
644 unsigned char *bitmap;
645 void *desc_kaddr, *bitmap_kaddr;
646 unsigned long group, group_offset;
649 for (i = 0; i < nitems; i = j) {
650 group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
651 ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
654 ret = nilfs_palloc_get_bitmap_block(inode, group, 0,
660 desc_kaddr = kmap(desc_bh->b_page);
661 desc = nilfs_palloc_block_get_group_desc(
662 inode, group, desc_bh, desc_kaddr);
663 bitmap_kaddr = kmap(bitmap_bh->b_page);
664 bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
666 (j < nitems) && nilfs_palloc_group_is_in(inode, group,
669 nilfs_palloc_group(inode, entry_nrs[j], &group_offset);
670 if (!nilfs_clear_bit_atomic(
671 nilfs_mdt_bgl_lock(inode, group),
672 group_offset, bitmap)) {
674 "%s: entry number %llu already freed\n",
676 (unsigned long long)entry_nrs[j]);
681 nilfs_palloc_group_desc_add_entries(inode, group, desc, n);
683 kunmap(bitmap_bh->b_page);
684 kunmap(desc_bh->b_page);
686 mark_buffer_dirty(desc_bh);
687 mark_buffer_dirty(bitmap_bh);
688 nilfs_mdt_mark_dirty(inode);
696 void nilfs_palloc_setup_cache(struct inode *inode,
697 struct nilfs_palloc_cache *cache)
699 NILFS_MDT(inode)->mi_palloc_cache = cache;
700 spin_lock_init(&cache->lock);
703 void nilfs_palloc_clear_cache(struct inode *inode)
705 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
707 spin_lock(&cache->lock);
708 brelse(cache->prev_desc.bh);
709 brelse(cache->prev_bitmap.bh);
710 brelse(cache->prev_entry.bh);
711 cache->prev_desc.bh = NULL;
712 cache->prev_bitmap.bh = NULL;
713 cache->prev_entry.bh = NULL;
714 spin_unlock(&cache->lock);
717 void nilfs_palloc_destroy_cache(struct inode *inode)
719 nilfs_palloc_clear_cache(inode);
720 NILFS_MDT(inode)->mi_palloc_cache = NULL;