1 // SPDX-License-Identifier: GPL-2.0-only
3 * This file is part of UBIFS.
5 * Copyright (C) 2006-2008 Nokia Corporation.
7 * Authors: Adrian Hunter
8 * Artem Bityutskiy (Битюцкий Артём)
12 * This file implements the budgeting sub-system which is responsible for UBIFS
15 * Factors such as compression, wasted space at the ends of LEBs, space in other
16 * journal heads, the effect of updates on the index, and so on, make it
17 * impossible to accurately predict the amount of space needed. Consequently
18 * approximations are used.
22 #include <linux/writeback.h>
23 #include <linux/math64.h>
26 * When pessimistic budget calculations say that there is no enough space,
27 * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
28 * or committing. The below constant defines maximum number of times UBIFS
29 * repeats the operations.
31 #define MAX_MKSPC_RETRIES 3
34 * The below constant defines amount of dirty pages which should be written
35 * back at when trying to shrink the liability.
37 #define NR_TO_WRITE 16
40 * shrink_liability - write-back some dirty pages/inodes.
41 * @c: UBIFS file-system description object
42 * @nr_to_write: how many dirty pages to write-back
44 * This function shrinks UBIFS liability by means of writing back some amount
45 * of dirty inodes and their pages.
47 * Note, this function synchronizes even VFS inodes which are locked
48 * (@i_mutex) by the caller of the budgeting function, because write-back does
51 static void shrink_liability(struct ubifs_info *c, int nr_to_write)
53 down_read(&c->vfs_sb->s_umount);
54 writeback_inodes_sb_nr(c->vfs_sb, nr_to_write, WB_REASON_FS_FREE_SPACE);
55 up_read(&c->vfs_sb->s_umount);
59 * run_gc - run garbage collector.
60 * @c: UBIFS file-system description object
62 * This function runs garbage collector to make some more free space. Returns
63 * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a
64 * negative error code in case of failure.
66 static int run_gc(struct ubifs_info *c)
70 /* Make some free space by garbage-collecting dirty space */
71 down_read(&c->commit_sem);
72 lnum = ubifs_garbage_collect(c, 1);
73 up_read(&c->commit_sem);
77 /* GC freed one LEB, return it to lprops */
78 dbg_budg("GC freed LEB %d", lnum);
79 return ubifs_return_leb(c, lnum);
83 * get_liability - calculate current liability.
84 * @c: UBIFS file-system description object
86 * This function calculates and returns current UBIFS liability, i.e. the
87 * amount of bytes UBIFS has "promised" to write to the media.
89 static long long get_liability(struct ubifs_info *c)
93 spin_lock(&c->space_lock);
94 liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
95 spin_unlock(&c->space_lock);
100 * make_free_space - make more free space on the file-system.
101 * @c: UBIFS file-system description object
103 * This function is called when an operation cannot be budgeted because there
104 * is supposedly no free space. But in most cases there is some free space:
105 * o budgeting is pessimistic, so it always budgets more than it is actually
106 * needed, so shrinking the liability is one way to make free space - the
107 * cached data will take less space then it was budgeted for;
108 * o GC may turn some dark space into free space (budgeting treats dark space
110 * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs.
112 * So this function tries to do the above. Returns %-EAGAIN if some free space
113 * was presumably made and the caller has to re-try budgeting the operation.
114 * Returns %-ENOSPC if it couldn't do more free space, and other negative error
117 static int make_free_space(struct ubifs_info *c)
119 int err, retries = 0;
120 long long liab1, liab2;
123 liab1 = get_liability(c);
125 * We probably have some dirty pages or inodes (liability), try
126 * to write them back.
128 dbg_budg("liability %lld, run write-back", liab1);
129 shrink_liability(c, NR_TO_WRITE);
131 liab2 = get_liability(c);
135 dbg_budg("new liability %lld (not shrunk)", liab2);
137 /* Liability did not shrink again, try GC */
143 if (err != -EAGAIN && err != -ENOSPC)
144 /* Some real error happened */
147 dbg_budg("Run commit (retries %d)", retries);
148 err = ubifs_run_commit(c);
151 } while (retries++ < MAX_MKSPC_RETRIES);
157 * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index.
158 * @c: UBIFS file-system description object
160 * This function calculates and returns the number of LEBs which should be kept
163 int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
168 idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
169 /* And make sure we have thrice the index size of space reserved */
170 idx_size += idx_size << 1;
172 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
173 * pair, nor similarly the two variables for the new index size, so we
174 * have to do this costly 64-bit division on fast-path.
176 idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size);
178 * The index head is not available for the in-the-gaps method, so add an
179 * extra LEB to compensate.
182 if (idx_lebs < MIN_INDEX_LEBS)
183 idx_lebs = MIN_INDEX_LEBS;
188 * ubifs_calc_available - calculate available FS space.
189 * @c: UBIFS file-system description object
190 * @min_idx_lebs: minimum number of LEBs reserved for the index
192 * This function calculates and returns amount of FS space available for use.
194 long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
199 available = c->main_bytes - c->lst.total_used;
202 * Now 'available' contains theoretically available flash space
203 * assuming there is no index, so we have to subtract the space which
204 * is reserved for the index.
206 subtract_lebs = min_idx_lebs;
208 /* Take into account that GC reserves one LEB for its own needs */
212 * The GC journal head LEB is not really accessible. And since
213 * different write types go to different heads, we may count only on
216 subtract_lebs += c->jhead_cnt - 1;
218 /* We also reserve one LEB for deletions, which bypass budgeting */
221 available -= (long long)subtract_lebs * c->leb_size;
223 /* Subtract the dead space which is not available for use */
224 available -= c->lst.total_dead;
227 * Subtract dark space, which might or might not be usable - it depends
228 * on the data which we have on the media and which will be written. If
229 * this is a lot of uncompressed or not-compressible data, the dark
230 * space cannot be used.
232 available -= c->lst.total_dark;
235 * However, there is more dark space. The index may be bigger than
236 * @min_idx_lebs. Those extra LEBs are assumed to be available, but
237 * their dark space is not included in total_dark, so it is subtracted
240 if (c->lst.idx_lebs > min_idx_lebs) {
241 subtract_lebs = c->lst.idx_lebs - min_idx_lebs;
242 available -= subtract_lebs * c->dark_wm;
245 /* The calculations are rough and may end up with a negative number */
246 return available > 0 ? available : 0;
250 * can_use_rp - check whether the user is allowed to use reserved pool.
251 * @c: UBIFS file-system description object
253 * UBIFS has so-called "reserved pool" which is flash space reserved
254 * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock.
255 * This function checks whether current user is allowed to use reserved pool.
256 * Returns %1 current user is allowed to use reserved pool and %0 otherwise.
258 static int can_use_rp(struct ubifs_info *c)
260 if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) ||
261 (!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid)))
267 * do_budget_space - reserve flash space for index and data growth.
268 * @c: UBIFS file-system description object
270 * This function makes sure UBIFS has enough free LEBs for index growth and
273 * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
274 * would take if it was consolidated and written to the flash. This guarantees
275 * that the "in-the-gaps" commit method always succeeds and UBIFS will always
276 * be able to commit dirty index. So this function basically adds amount of
277 * budgeted index space to the size of the current index, multiplies this by 3,
278 * and makes sure this does not exceed the amount of free LEBs.
280 * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
281 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
282 * be large, because UBIFS does not do any index consolidation as long as
283 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
284 * will contain a lot of dirt.
285 * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
286 * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
288 * This function returns zero in case of success, and %-ENOSPC in case of
291 static int do_budget_space(struct ubifs_info *c)
293 long long outstanding, available;
294 int lebs, rsvd_idx_lebs, min_idx_lebs;
296 /* First budget index space */
297 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
299 /* Now 'min_idx_lebs' contains number of LEBs to reserve */
300 if (min_idx_lebs > c->lst.idx_lebs)
301 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
306 * The number of LEBs that are available to be used by the index is:
308 * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
309 * @c->lst.taken_empty_lebs
311 * @c->lst.empty_lebs are available because they are empty.
312 * @c->freeable_cnt are available because they contain only free and
313 * dirty space, @c->idx_gc_cnt are available because they are index
314 * LEBs that have been garbage collected and are awaiting the commit
315 * before they can be used. And the in-the-gaps method will grab these
316 * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have
317 * already been allocated for some purpose.
319 * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because
320 * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they
321 * are taken until after the commit).
323 * Note, @c->lst.taken_empty_lebs may temporarily be higher by one
324 * because of the way we serialize LEB allocations and budgeting. See a
325 * comment in 'ubifs_find_free_space()'.
327 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
328 c->lst.taken_empty_lebs;
329 if (unlikely(rsvd_idx_lebs > lebs)) {
330 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d",
331 min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs);
335 available = ubifs_calc_available(c, min_idx_lebs);
336 outstanding = c->bi.data_growth + c->bi.dd_growth;
338 if (unlikely(available < outstanding)) {
339 dbg_budg("out of data space: available %lld, outstanding %lld",
340 available, outstanding);
344 if (available - outstanding <= c->rp_size && !can_use_rp(c))
347 c->bi.min_idx_lebs = min_idx_lebs;
352 * calc_idx_growth - calculate approximate index growth from budgeting request.
353 * @c: UBIFS file-system description object
354 * @req: budgeting request
356 * For now we assume each new node adds one znode. But this is rather poor
357 * approximation, though.
359 static int calc_idx_growth(const struct ubifs_info *c,
360 const struct ubifs_budget_req *req)
364 znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) +
366 return znodes * c->max_idx_node_sz;
370 * calc_data_growth - calculate approximate amount of new data from budgeting
372 * @c: UBIFS file-system description object
373 * @req: budgeting request
375 static int calc_data_growth(const struct ubifs_info *c,
376 const struct ubifs_budget_req *req)
380 data_growth = req->new_ino ? c->bi.inode_budget : 0;
382 data_growth += c->bi.page_budget;
384 data_growth += c->bi.dent_budget;
385 data_growth += req->new_ino_d;
390 * calc_dd_growth - calculate approximate amount of data which makes other data
391 * dirty from budgeting request.
392 * @c: UBIFS file-system description object
393 * @req: budgeting request
395 static int calc_dd_growth(const struct ubifs_info *c,
396 const struct ubifs_budget_req *req)
400 dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
402 if (req->dirtied_ino)
403 dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
405 dd_growth += c->bi.dent_budget;
406 dd_growth += req->dirtied_ino_d;
411 * ubifs_budget_space - ensure there is enough space to complete an operation.
412 * @c: UBIFS file-system description object
413 * @req: budget request
415 * This function allocates budget for an operation. It uses pessimistic
416 * approximation of how much flash space the operation needs. The goal of this
417 * function is to make sure UBIFS always has flash space to flush all dirty
418 * pages, dirty inodes, and dirty znodes (liability). This function may force
419 * commit, garbage-collection or write-back. Returns zero in case of success,
420 * %-ENOSPC if there is no free space and other negative error codes in case of
423 int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
425 int err, idx_growth, data_growth, dd_growth, retried = 0;
427 ubifs_assert(c, req->new_page <= 1);
428 ubifs_assert(c, req->dirtied_page <= 1);
429 ubifs_assert(c, req->new_dent <= 1);
430 ubifs_assert(c, req->mod_dent <= 1);
431 ubifs_assert(c, req->new_ino <= 1);
432 ubifs_assert(c, req->new_ino_d <= UBIFS_MAX_INO_DATA);
433 ubifs_assert(c, req->dirtied_ino <= 4);
434 ubifs_assert(c, req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
435 ubifs_assert(c, !(req->new_ino_d & 7));
436 ubifs_assert(c, !(req->dirtied_ino_d & 7));
438 data_growth = calc_data_growth(c, req);
439 dd_growth = calc_dd_growth(c, req);
440 if (!data_growth && !dd_growth)
442 idx_growth = calc_idx_growth(c, req);
445 spin_lock(&c->space_lock);
446 ubifs_assert(c, c->bi.idx_growth >= 0);
447 ubifs_assert(c, c->bi.data_growth >= 0);
448 ubifs_assert(c, c->bi.dd_growth >= 0);
450 if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
451 dbg_budg("no space");
452 spin_unlock(&c->space_lock);
456 c->bi.idx_growth += idx_growth;
457 c->bi.data_growth += data_growth;
458 c->bi.dd_growth += dd_growth;
460 err = do_budget_space(c);
462 req->idx_growth = idx_growth;
463 req->data_growth = data_growth;
464 req->dd_growth = dd_growth;
465 spin_unlock(&c->space_lock);
469 /* Restore the old values */
470 c->bi.idx_growth -= idx_growth;
471 c->bi.data_growth -= data_growth;
472 c->bi.dd_growth -= dd_growth;
473 spin_unlock(&c->space_lock);
476 dbg_budg("no space for fast budgeting");
480 err = make_free_space(c);
482 if (err == -EAGAIN) {
483 dbg_budg("try again");
485 } else if (err == -ENOSPC) {
488 dbg_budg("-ENOSPC, but anyway try once again");
491 dbg_budg("FS is full, -ENOSPC");
493 if (can_use_rp(c) || c->rp_size == 0)
494 c->bi.nospace_rp = 1;
497 ubifs_err(c, "cannot budget space, error %d", err);
502 * ubifs_release_budget - release budgeted free space.
503 * @c: UBIFS file-system description object
504 * @req: budget request
506 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
507 * since the index changes (which were budgeted for in @req->idx_growth) will
508 * only be written to the media on commit, this function moves the index budget
509 * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
510 * by the commit operation.
512 void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
514 ubifs_assert(c, req->new_page <= 1);
515 ubifs_assert(c, req->dirtied_page <= 1);
516 ubifs_assert(c, req->new_dent <= 1);
517 ubifs_assert(c, req->mod_dent <= 1);
518 ubifs_assert(c, req->new_ino <= 1);
519 ubifs_assert(c, req->new_ino_d <= UBIFS_MAX_INO_DATA);
520 ubifs_assert(c, req->dirtied_ino <= 4);
521 ubifs_assert(c, req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
522 ubifs_assert(c, !(req->new_ino_d & 7));
523 ubifs_assert(c, !(req->dirtied_ino_d & 7));
524 if (!req->recalculate) {
525 ubifs_assert(c, req->idx_growth >= 0);
526 ubifs_assert(c, req->data_growth >= 0);
527 ubifs_assert(c, req->dd_growth >= 0);
530 if (req->recalculate) {
531 req->data_growth = calc_data_growth(c, req);
532 req->dd_growth = calc_dd_growth(c, req);
533 req->idx_growth = calc_idx_growth(c, req);
536 if (!req->data_growth && !req->dd_growth)
539 c->bi.nospace = c->bi.nospace_rp = 0;
542 spin_lock(&c->space_lock);
543 c->bi.idx_growth -= req->idx_growth;
544 c->bi.uncommitted_idx += req->idx_growth;
545 c->bi.data_growth -= req->data_growth;
546 c->bi.dd_growth -= req->dd_growth;
547 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
549 ubifs_assert(c, c->bi.idx_growth >= 0);
550 ubifs_assert(c, c->bi.data_growth >= 0);
551 ubifs_assert(c, c->bi.dd_growth >= 0);
552 ubifs_assert(c, c->bi.min_idx_lebs < c->main_lebs);
553 ubifs_assert(c, !(c->bi.idx_growth & 7));
554 ubifs_assert(c, !(c->bi.data_growth & 7));
555 ubifs_assert(c, !(c->bi.dd_growth & 7));
556 spin_unlock(&c->space_lock);
560 * ubifs_convert_page_budget - convert budget of a new page.
561 * @c: UBIFS file-system description object
563 * This function converts budget which was allocated for a new page of data to
564 * the budget of changing an existing page of data. The latter is smaller than
565 * the former, so this function only does simple re-calculation and does not
566 * involve any write-back.
568 void ubifs_convert_page_budget(struct ubifs_info *c)
570 spin_lock(&c->space_lock);
571 /* Release the index growth reservation */
572 c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
573 /* Release the data growth reservation */
574 c->bi.data_growth -= c->bi.page_budget;
575 /* Increase the dirty data growth reservation instead */
576 c->bi.dd_growth += c->bi.page_budget;
577 /* And re-calculate the indexing space reservation */
578 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
579 spin_unlock(&c->space_lock);
583 * ubifs_release_dirty_inode_budget - release dirty inode budget.
584 * @c: UBIFS file-system description object
585 * @ui: UBIFS inode to release the budget for
587 * This function releases budget corresponding to a dirty inode. It is usually
588 * called when after the inode has been written to the media and marked as
589 * clean. It also causes the "no space" flags to be cleared.
591 void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
592 struct ubifs_inode *ui)
594 struct ubifs_budget_req req;
596 memset(&req, 0, sizeof(struct ubifs_budget_req));
597 /* The "no space" flags will be cleared because dd_growth is > 0 */
598 req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
599 ubifs_release_budget(c, &req);
603 * ubifs_reported_space - calculate reported free space.
604 * @c: the UBIFS file-system description object
605 * @free: amount of free space
607 * This function calculates amount of free space which will be reported to
608 * user-space. User-space application tend to expect that if the file-system
609 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
610 * are able to write a file of size N. UBIFS attaches node headers to each data
611 * node and it has to write indexing nodes as well. This introduces additional
612 * overhead, and UBIFS has to report slightly less free space to meet the above
615 * This function assumes free space is made up of uncompressed data nodes and
616 * full index nodes (one per data node, tripled because we always allow enough
617 * space to write the index thrice).
619 * Note, the calculation is pessimistic, which means that most of the time
620 * UBIFS reports less space than it actually has.
622 long long ubifs_reported_space(const struct ubifs_info *c, long long free)
624 int divisor, factor, f;
627 * Reported space size is @free * X, where X is UBIFS block size
628 * divided by UBIFS block size + all overhead one data block
629 * introduces. The overhead is the node header + indexing overhead.
631 * Indexing overhead calculations are based on the following formula:
632 * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
633 * of data nodes, f - fanout. Because effective UBIFS fanout is twice
634 * as less than maximum fanout, we assume that each data node
635 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
636 * Note, the multiplier 3 is because UBIFS reserves thrice as more space
639 f = c->fanout > 3 ? c->fanout >> 1 : 2;
640 factor = UBIFS_BLOCK_SIZE;
641 divisor = UBIFS_MAX_DATA_NODE_SZ;
642 divisor += (c->max_idx_node_sz * 3) / (f - 1);
644 return div_u64(free, divisor);
648 * ubifs_get_free_space_nolock - return amount of free space.
649 * @c: UBIFS file-system description object
651 * This function calculates amount of free space to report to user-space.
653 * Because UBIFS may introduce substantial overhead (the index, node headers,
654 * alignment, wastage at the end of LEBs, etc), it cannot report real amount of
655 * free flash space it has (well, because not all dirty space is reclaimable,
656 * UBIFS does not actually know the real amount). If UBIFS did so, it would
657 * bread user expectations about what free space is. Users seem to accustomed
658 * to assume that if the file-system reports N bytes of free space, they would
659 * be able to fit a file of N bytes to the FS. This almost works for
660 * traditional file-systems, because they have way less overhead than UBIFS.
661 * So, to keep users happy, UBIFS tries to take the overhead into account.
663 long long ubifs_get_free_space_nolock(struct ubifs_info *c)
665 int rsvd_idx_lebs, lebs;
666 long long available, outstanding, free;
668 ubifs_assert(c, c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
669 outstanding = c->bi.data_growth + c->bi.dd_growth;
670 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
673 * When reporting free space to user-space, UBIFS guarantees that it is
674 * possible to write a file of free space size. This means that for
675 * empty LEBs we may use more precise calculations than
676 * 'ubifs_calc_available()' is using. Namely, we know that in empty
677 * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
678 * Thus, amend the available space.
680 * Note, the calculations below are similar to what we have in
681 * 'do_budget_space()', so refer there for comments.
683 if (c->bi.min_idx_lebs > c->lst.idx_lebs)
684 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
687 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
688 c->lst.taken_empty_lebs;
689 lebs -= rsvd_idx_lebs;
690 available += lebs * (c->dark_wm - c->leb_overhead);
692 if (available > outstanding)
693 free = ubifs_reported_space(c, available - outstanding);
700 * ubifs_get_free_space - return amount of free space.
701 * @c: UBIFS file-system description object
703 * This function calculates and returns amount of free space to report to
706 long long ubifs_get_free_space(struct ubifs_info *c)
710 spin_lock(&c->space_lock);
711 free = ubifs_get_free_space_nolock(c);
712 spin_unlock(&c->space_lock);