xfs: cross-reference rmap records with free space btrees
[platform/kernel/linux-rpi.git] / fs / xfs / scrub / rmap.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_trans.h"
14 #include "xfs_btree.h"
15 #include "xfs_rmap.h"
16 #include "xfs_refcount.h"
17 #include "xfs_ag.h"
18 #include "xfs_bit.h"
19 #include "xfs_alloc.h"
20 #include "xfs_alloc_btree.h"
21 #include "scrub/scrub.h"
22 #include "scrub/common.h"
23 #include "scrub/btree.h"
24 #include "scrub/bitmap.h"
25
26 /*
27  * Set us up to scrub reverse mapping btrees.
28  */
29 int
30 xchk_setup_ag_rmapbt(
31         struct xfs_scrub        *sc)
32 {
33         if (xchk_need_intent_drain(sc))
34                 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
35
36         return xchk_setup_ag_btree(sc, false);
37 }
38
39 /* Reverse-mapping scrubber. */
40
41 struct xchk_rmap {
42         /*
43          * The furthest-reaching of the rmapbt records that we've already
44          * processed.  This enables us to detect overlapping records for space
45          * allocations that cannot be shared.
46          */
47         struct xfs_rmap_irec    overlap_rec;
48
49         /*
50          * The previous rmapbt record, so that we can check for two records
51          * that could be one.
52          */
53         struct xfs_rmap_irec    prev_rec;
54
55         /* Bitmaps containing all blocks for each type of AG metadata. */
56         struct xagb_bitmap      fs_owned;
57         struct xagb_bitmap      log_owned;
58         struct xagb_bitmap      ag_owned;
59
60         /* Did we complete the AG space metadata bitmaps? */
61         bool                    bitmaps_complete;
62 };
63
64 /* Cross-reference a rmap against the refcount btree. */
65 STATIC void
66 xchk_rmapbt_xref_refc(
67         struct xfs_scrub        *sc,
68         struct xfs_rmap_irec    *irec)
69 {
70         xfs_agblock_t           fbno;
71         xfs_extlen_t            flen;
72         bool                    non_inode;
73         bool                    is_bmbt;
74         bool                    is_attr;
75         bool                    is_unwritten;
76         int                     error;
77
78         if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
79                 return;
80
81         non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
82         is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK;
83         is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK;
84         is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN;
85
86         /* If this is shared, must be a data fork extent. */
87         error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock,
88                         irec->rm_blockcount, &fbno, &flen, false);
89         if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
90                 return;
91         if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten))
92                 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
93 }
94
95 /* Cross-reference with the other btrees. */
96 STATIC void
97 xchk_rmapbt_xref(
98         struct xfs_scrub        *sc,
99         struct xfs_rmap_irec    *irec)
100 {
101         xfs_agblock_t           agbno = irec->rm_startblock;
102         xfs_extlen_t            len = irec->rm_blockcount;
103
104         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
105                 return;
106
107         xchk_xref_is_used_space(sc, agbno, len);
108         if (irec->rm_owner == XFS_RMAP_OWN_INODES)
109                 xchk_xref_is_inode_chunk(sc, agbno, len);
110         else
111                 xchk_xref_is_not_inode_chunk(sc, agbno, len);
112         if (irec->rm_owner == XFS_RMAP_OWN_COW)
113                 xchk_xref_is_cow_staging(sc, irec->rm_startblock,
114                                 irec->rm_blockcount);
115         else
116                 xchk_rmapbt_xref_refc(sc, irec);
117 }
118
119 /*
120  * Check for bogus UNWRITTEN flags in the rmapbt node block keys.
121  *
122  * In reverse mapping records, the file mapping extent state
123  * (XFS_RMAP_OFF_UNWRITTEN) is a record attribute, not a key field.  It is not
124  * involved in lookups in any way.  In older kernels, the functions that
125  * convert rmapbt records to keys forgot to filter out the extent state bit,
126  * even though the key comparison functions have filtered the flag correctly.
127  * If we spot an rmap key with the unwritten bit set in rm_offset, we should
128  * mark the btree as needing optimization to rebuild the btree without those
129  * flags.
130  */
131 STATIC void
132 xchk_rmapbt_check_unwritten_in_keyflags(
133         struct xchk_btree       *bs)
134 {
135         struct xfs_scrub        *sc = bs->sc;
136         struct xfs_btree_cur    *cur = bs->cur;
137         struct xfs_btree_block  *keyblock;
138         union xfs_btree_key     *lkey, *hkey;
139         __be64                  badflag = cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
140         unsigned int            level;
141
142         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_PREEN)
143                 return;
144
145         for (level = 1; level < cur->bc_nlevels; level++) {
146                 struct xfs_buf  *bp;
147                 unsigned int    ptr;
148
149                 /* Only check the first time we've seen this node block. */
150                 if (cur->bc_levels[level].ptr > 1)
151                         continue;
152
153                 keyblock = xfs_btree_get_block(cur, level, &bp);
154                 for (ptr = 1; ptr <= be16_to_cpu(keyblock->bb_numrecs); ptr++) {
155                         lkey = xfs_btree_key_addr(cur, ptr, keyblock);
156
157                         if (lkey->rmap.rm_offset & badflag) {
158                                 xchk_btree_set_preen(sc, cur, level);
159                                 break;
160                         }
161
162                         hkey = xfs_btree_high_key_addr(cur, ptr, keyblock);
163                         if (hkey->rmap.rm_offset & badflag) {
164                                 xchk_btree_set_preen(sc, cur, level);
165                                 break;
166                         }
167                 }
168         }
169 }
170
171 static inline bool
172 xchk_rmapbt_is_shareable(
173         struct xfs_scrub                *sc,
174         const struct xfs_rmap_irec      *irec)
175 {
176         if (!xfs_has_reflink(sc->mp))
177                 return false;
178         if (XFS_RMAP_NON_INODE_OWNER(irec->rm_owner))
179                 return false;
180         if (irec->rm_flags & (XFS_RMAP_BMBT_BLOCK | XFS_RMAP_ATTR_FORK |
181                               XFS_RMAP_UNWRITTEN))
182                 return false;
183         return true;
184 }
185
186 /* Flag failures for records that overlap but cannot. */
187 STATIC void
188 xchk_rmapbt_check_overlapping(
189         struct xchk_btree               *bs,
190         struct xchk_rmap                *cr,
191         const struct xfs_rmap_irec      *irec)
192 {
193         xfs_agblock_t                   pnext, inext;
194
195         if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
196                 return;
197
198         /* No previous record? */
199         if (cr->overlap_rec.rm_blockcount == 0)
200                 goto set_prev;
201
202         /* Do overlap_rec and irec overlap? */
203         pnext = cr->overlap_rec.rm_startblock + cr->overlap_rec.rm_blockcount;
204         if (pnext <= irec->rm_startblock)
205                 goto set_prev;
206
207         /* Overlap is only allowed if both records are data fork mappings. */
208         if (!xchk_rmapbt_is_shareable(bs->sc, &cr->overlap_rec) ||
209             !xchk_rmapbt_is_shareable(bs->sc, irec))
210                 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
211
212         /* Save whichever rmap record extends furthest. */
213         inext = irec->rm_startblock + irec->rm_blockcount;
214         if (pnext > inext)
215                 return;
216
217 set_prev:
218         memcpy(&cr->overlap_rec, irec, sizeof(struct xfs_rmap_irec));
219 }
220
221 /* Decide if two reverse-mapping records can be merged. */
222 static inline bool
223 xchk_rmap_mergeable(
224         struct xchk_rmap                *cr,
225         const struct xfs_rmap_irec      *r2)
226 {
227         const struct xfs_rmap_irec      *r1 = &cr->prev_rec;
228
229         /* Ignore if prev_rec is not yet initialized. */
230         if (cr->prev_rec.rm_blockcount == 0)
231                 return false;
232
233         if (r1->rm_owner != r2->rm_owner)
234                 return false;
235         if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
236                 return false;
237         if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
238             XFS_RMAP_LEN_MAX)
239                 return false;
240         if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
241                 return true;
242         /* must be an inode owner below here */
243         if (r1->rm_flags != r2->rm_flags)
244                 return false;
245         if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
246                 return true;
247         return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
248 }
249
250 /* Flag failures for records that could be merged. */
251 STATIC void
252 xchk_rmapbt_check_mergeable(
253         struct xchk_btree               *bs,
254         struct xchk_rmap                *cr,
255         const struct xfs_rmap_irec      *irec)
256 {
257         if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
258                 return;
259
260         if (xchk_rmap_mergeable(cr, irec))
261                 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
262
263         memcpy(&cr->prev_rec, irec, sizeof(struct xfs_rmap_irec));
264 }
265
266 /* Compare an rmap for AG metadata against the metadata walk. */
267 STATIC int
268 xchk_rmapbt_mark_bitmap(
269         struct xchk_btree               *bs,
270         struct xchk_rmap                *cr,
271         const struct xfs_rmap_irec      *irec)
272 {
273         struct xfs_scrub                *sc = bs->sc;
274         struct xagb_bitmap              *bmp = NULL;
275         xfs_extlen_t                    fsbcount = irec->rm_blockcount;
276
277         /*
278          * Skip corrupt records.  It is essential that we detect records in the
279          * btree that cannot overlap but do, flag those as CORRUPT, and skip
280          * the bitmap comparison to avoid generating false XCORRUPT reports.
281          */
282         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
283                 return 0;
284
285         /*
286          * If the AG metadata walk didn't complete, there's no point in
287          * comparing against partial results.
288          */
289         if (!cr->bitmaps_complete)
290                 return 0;
291
292         switch (irec->rm_owner) {
293         case XFS_RMAP_OWN_FS:
294                 bmp = &cr->fs_owned;
295                 break;
296         case XFS_RMAP_OWN_LOG:
297                 bmp = &cr->log_owned;
298                 break;
299         case XFS_RMAP_OWN_AG:
300                 bmp = &cr->ag_owned;
301                 break;
302         }
303
304         if (!bmp)
305                 return 0;
306
307         if (xagb_bitmap_test(bmp, irec->rm_startblock, &fsbcount)) {
308                 /*
309                  * The start of this reverse mapping corresponds to a set
310                  * region in the bitmap.  If the mapping covers more area than
311                  * the set region, then it covers space that wasn't found by
312                  * the AG metadata walk.
313                  */
314                 if (fsbcount < irec->rm_blockcount)
315                         xchk_btree_xref_set_corrupt(bs->sc,
316                                         bs->sc->sa.rmap_cur, 0);
317         } else {
318                 /*
319                  * The start of this reverse mapping does not correspond to a
320                  * completely set region in the bitmap.  The region wasn't
321                  * fully set by walking the AG metadata, so this is a
322                  * cross-referencing corruption.
323                  */
324                 xchk_btree_xref_set_corrupt(bs->sc, bs->sc->sa.rmap_cur, 0);
325         }
326
327         /* Unset the region so that we can detect missing rmap records. */
328         return xagb_bitmap_clear(bmp, irec->rm_startblock, irec->rm_blockcount);
329 }
330
331 /* Scrub an rmapbt record. */
332 STATIC int
333 xchk_rmapbt_rec(
334         struct xchk_btree       *bs,
335         const union xfs_btree_rec *rec)
336 {
337         struct xchk_rmap        *cr = bs->private;
338         struct xfs_rmap_irec    irec;
339
340         if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
341             xfs_rmap_check_irec(bs->cur, &irec) != NULL) {
342                 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
343                 return 0;
344         }
345
346         xchk_rmapbt_check_unwritten_in_keyflags(bs);
347         xchk_rmapbt_check_mergeable(bs, cr, &irec);
348         xchk_rmapbt_check_overlapping(bs, cr, &irec);
349         xchk_rmapbt_xref(bs->sc, &irec);
350
351         return xchk_rmapbt_mark_bitmap(bs, cr, &irec);
352 }
353
354 /* Add an AGFL block to the rmap list. */
355 STATIC int
356 xchk_rmapbt_walk_agfl(
357         struct xfs_mount        *mp,
358         xfs_agblock_t           agbno,
359         void                    *priv)
360 {
361         struct xagb_bitmap      *bitmap = priv;
362
363         return xagb_bitmap_set(bitmap, agbno, 1);
364 }
365
366 /*
367  * Set up bitmaps mapping all the AG metadata to compare with the rmapbt
368  * records.
369  *
370  * Grab our own btree cursors here if the scrub setup function didn't give us a
371  * btree cursor due to reports of poor health.  We need to find out if the
372  * rmapbt disagrees with primary metadata btrees to tag the rmapbt as being
373  * XCORRUPT.
374  */
375 STATIC int
376 xchk_rmapbt_walk_ag_metadata(
377         struct xfs_scrub        *sc,
378         struct xchk_rmap        *cr)
379 {
380         struct xfs_mount        *mp = sc->mp;
381         struct xfs_buf          *agfl_bp;
382         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
383         struct xfs_btree_cur    *cur;
384         int                     error;
385
386         /* OWN_FS: AG headers */
387         error = xagb_bitmap_set(&cr->fs_owned, XFS_SB_BLOCK(mp),
388                         XFS_AGFL_BLOCK(mp) - XFS_SB_BLOCK(mp) + 1);
389         if (error)
390                 goto out;
391
392         /* OWN_LOG: Internal log */
393         if (xfs_ag_contains_log(mp, sc->sa.pag->pag_agno)) {
394                 error = xagb_bitmap_set(&cr->log_owned,
395                                 XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart),
396                                 mp->m_sb.sb_logblocks);
397                 if (error)
398                         goto out;
399         }
400
401         /* OWN_AG: bnobt, cntbt, rmapbt, and AGFL */
402         cur = sc->sa.bno_cur;
403         if (!cur)
404                 cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
405                                 sc->sa.pag, XFS_BTNUM_BNO);
406         error = xagb_bitmap_set_btblocks(&cr->ag_owned, cur);
407         if (cur != sc->sa.bno_cur)
408                 xfs_btree_del_cursor(cur, error);
409         if (error)
410                 goto out;
411
412         cur = sc->sa.cnt_cur;
413         if (!cur)
414                 cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
415                                 sc->sa.pag, XFS_BTNUM_CNT);
416         error = xagb_bitmap_set_btblocks(&cr->ag_owned, cur);
417         if (cur != sc->sa.cnt_cur)
418                 xfs_btree_del_cursor(cur, error);
419         if (error)
420                 goto out;
421
422         error = xagb_bitmap_set_btblocks(&cr->ag_owned, sc->sa.rmap_cur);
423         if (error)
424                 goto out;
425
426         error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
427         if (error)
428                 goto out;
429
430         error = xfs_agfl_walk(sc->mp, agf, agfl_bp, xchk_rmapbt_walk_agfl,
431                         &cr->ag_owned);
432         xfs_trans_brelse(sc->tp, agfl_bp);
433
434 out:
435         /*
436          * If there's an error, set XFAIL and disable the bitmap
437          * cross-referencing checks, but proceed with the scrub anyway.
438          */
439         if (error)
440                 xchk_btree_xref_process_error(sc, sc->sa.rmap_cur,
441                                 sc->sa.rmap_cur->bc_nlevels - 1, &error);
442         else
443                 cr->bitmaps_complete = true;
444         return 0;
445 }
446
447 /*
448  * Check for set regions in the bitmaps; if there are any, the rmap records do
449  * not describe all the AG metadata.
450  */
451 STATIC void
452 xchk_rmapbt_check_bitmaps(
453         struct xfs_scrub        *sc,
454         struct xchk_rmap        *cr)
455 {
456         struct xfs_btree_cur    *cur = sc->sa.rmap_cur;
457         unsigned int            level;
458
459         if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
460                                 XFS_SCRUB_OFLAG_XFAIL))
461                 return;
462         if (!cur)
463                 return;
464         level = cur->bc_nlevels - 1;
465
466         /*
467          * Any bitmap with bits still set indicates that the reverse mapping
468          * doesn't cover the entire primary structure.
469          */
470         if (xagb_bitmap_hweight(&cr->fs_owned) != 0)
471                 xchk_btree_xref_set_corrupt(sc, cur, level);
472
473         if (xagb_bitmap_hweight(&cr->log_owned) != 0)
474                 xchk_btree_xref_set_corrupt(sc, cur, level);
475
476         if (xagb_bitmap_hweight(&cr->ag_owned) != 0)
477                 xchk_btree_xref_set_corrupt(sc, cur, level);
478 }
479
480 /* Scrub the rmap btree for some AG. */
481 int
482 xchk_rmapbt(
483         struct xfs_scrub        *sc)
484 {
485         struct xchk_rmap        *cr;
486         int                     error;
487
488         cr = kzalloc(sizeof(struct xchk_rmap), XCHK_GFP_FLAGS);
489         if (!cr)
490                 return -ENOMEM;
491
492         xagb_bitmap_init(&cr->fs_owned);
493         xagb_bitmap_init(&cr->log_owned);
494         xagb_bitmap_init(&cr->ag_owned);
495
496         error = xchk_rmapbt_walk_ag_metadata(sc, cr);
497         if (error)
498                 goto out;
499
500         error = xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
501                         &XFS_RMAP_OINFO_AG, cr);
502         if (error)
503                 goto out;
504
505         xchk_rmapbt_check_bitmaps(sc, cr);
506
507 out:
508         xagb_bitmap_destroy(&cr->ag_owned);
509         xagb_bitmap_destroy(&cr->log_owned);
510         xagb_bitmap_destroy(&cr->fs_owned);
511         kfree(cr);
512         return error;
513 }
514
515 /* xref check that the extent is owned only by a given owner */
516 void
517 xchk_xref_is_only_owned_by(
518         struct xfs_scrub                *sc,
519         xfs_agblock_t                   bno,
520         xfs_extlen_t                    len,
521         const struct xfs_owner_info     *oinfo)
522 {
523         struct xfs_rmap_matches         res;
524         int                             error;
525
526         if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
527                 return;
528
529         error = xfs_rmap_count_owners(sc->sa.rmap_cur, bno, len, oinfo, &res);
530         if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
531                 return;
532         if (res.matches != 1)
533                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
534         if (res.bad_non_owner_matches)
535                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
536         if (res.non_owner_matches)
537                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
538 }
539
540 /* xref check that the extent is not owned by a given owner */
541 void
542 xchk_xref_is_not_owned_by(
543         struct xfs_scrub                *sc,
544         xfs_agblock_t                   bno,
545         xfs_extlen_t                    len,
546         const struct xfs_owner_info     *oinfo)
547 {
548         struct xfs_rmap_matches         res;
549         int                             error;
550
551         if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
552                 return;
553
554         error = xfs_rmap_count_owners(sc->sa.rmap_cur, bno, len, oinfo, &res);
555         if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
556                 return;
557         if (res.matches != 0)
558                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
559         if (res.bad_non_owner_matches)
560                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
561 }
562
563 /* xref check that the extent has no reverse mapping at all */
564 void
565 xchk_xref_has_no_owner(
566         struct xfs_scrub        *sc,
567         xfs_agblock_t           bno,
568         xfs_extlen_t            len)
569 {
570         enum xbtree_recpacking  outcome;
571         int                     error;
572
573         if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
574                 return;
575
576         error = xfs_rmap_has_records(sc->sa.rmap_cur, bno, len, &outcome);
577         if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
578                 return;
579         if (outcome != XBTREE_RECPACKING_EMPTY)
580                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
581 }