xfs: always succeed at setting the reserve pool size
[platform/kernel/linux-rpi.git] / fs / xfs / xfs_fsmap.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_trans.h"
15 #include "xfs_btree.h"
16 #include "xfs_rmap_btree.h"
17 #include "xfs_trace.h"
18 #include "xfs_rmap.h"
19 #include "xfs_alloc.h"
20 #include "xfs_bit.h"
21 #include <linux/fsmap.h>
22 #include "xfs_fsmap.h"
23 #include "xfs_refcount.h"
24 #include "xfs_refcount_btree.h"
25 #include "xfs_alloc_btree.h"
26 #include "xfs_rtalloc.h"
27 #include "xfs_ag.h"
28
29 /* Convert an xfs_fsmap to an fsmap. */
30 static void
31 xfs_fsmap_from_internal(
32         struct fsmap            *dest,
33         struct xfs_fsmap        *src)
34 {
35         dest->fmr_device = src->fmr_device;
36         dest->fmr_flags = src->fmr_flags;
37         dest->fmr_physical = BBTOB(src->fmr_physical);
38         dest->fmr_owner = src->fmr_owner;
39         dest->fmr_offset = BBTOB(src->fmr_offset);
40         dest->fmr_length = BBTOB(src->fmr_length);
41         dest->fmr_reserved[0] = 0;
42         dest->fmr_reserved[1] = 0;
43         dest->fmr_reserved[2] = 0;
44 }
45
46 /* Convert an fsmap to an xfs_fsmap. */
47 void
48 xfs_fsmap_to_internal(
49         struct xfs_fsmap        *dest,
50         struct fsmap            *src)
51 {
52         dest->fmr_device = src->fmr_device;
53         dest->fmr_flags = src->fmr_flags;
54         dest->fmr_physical = BTOBBT(src->fmr_physical);
55         dest->fmr_owner = src->fmr_owner;
56         dest->fmr_offset = BTOBBT(src->fmr_offset);
57         dest->fmr_length = BTOBBT(src->fmr_length);
58 }
59
60 /* Convert an fsmap owner into an rmapbt owner. */
61 static int
62 xfs_fsmap_owner_to_rmap(
63         struct xfs_rmap_irec    *dest,
64         const struct xfs_fsmap  *src)
65 {
66         if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
67                 dest->rm_owner = src->fmr_owner;
68                 return 0;
69         }
70
71         switch (src->fmr_owner) {
72         case 0:                 /* "lowest owner id possible" */
73         case -1ULL:             /* "highest owner id possible" */
74                 dest->rm_owner = 0;
75                 break;
76         case XFS_FMR_OWN_FREE:
77                 dest->rm_owner = XFS_RMAP_OWN_NULL;
78                 break;
79         case XFS_FMR_OWN_UNKNOWN:
80                 dest->rm_owner = XFS_RMAP_OWN_UNKNOWN;
81                 break;
82         case XFS_FMR_OWN_FS:
83                 dest->rm_owner = XFS_RMAP_OWN_FS;
84                 break;
85         case XFS_FMR_OWN_LOG:
86                 dest->rm_owner = XFS_RMAP_OWN_LOG;
87                 break;
88         case XFS_FMR_OWN_AG:
89                 dest->rm_owner = XFS_RMAP_OWN_AG;
90                 break;
91         case XFS_FMR_OWN_INOBT:
92                 dest->rm_owner = XFS_RMAP_OWN_INOBT;
93                 break;
94         case XFS_FMR_OWN_INODES:
95                 dest->rm_owner = XFS_RMAP_OWN_INODES;
96                 break;
97         case XFS_FMR_OWN_REFC:
98                 dest->rm_owner = XFS_RMAP_OWN_REFC;
99                 break;
100         case XFS_FMR_OWN_COW:
101                 dest->rm_owner = XFS_RMAP_OWN_COW;
102                 break;
103         case XFS_FMR_OWN_DEFECTIVE:     /* not implemented */
104                 /* fall through */
105         default:
106                 return -EINVAL;
107         }
108         return 0;
109 }
110
111 /* Convert an rmapbt owner into an fsmap owner. */
112 static int
113 xfs_fsmap_owner_from_rmap(
114         struct xfs_fsmap                *dest,
115         const struct xfs_rmap_irec      *src)
116 {
117         dest->fmr_flags = 0;
118         if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) {
119                 dest->fmr_owner = src->rm_owner;
120                 return 0;
121         }
122         dest->fmr_flags |= FMR_OF_SPECIAL_OWNER;
123
124         switch (src->rm_owner) {
125         case XFS_RMAP_OWN_FS:
126                 dest->fmr_owner = XFS_FMR_OWN_FS;
127                 break;
128         case XFS_RMAP_OWN_LOG:
129                 dest->fmr_owner = XFS_FMR_OWN_LOG;
130                 break;
131         case XFS_RMAP_OWN_AG:
132                 dest->fmr_owner = XFS_FMR_OWN_AG;
133                 break;
134         case XFS_RMAP_OWN_INOBT:
135                 dest->fmr_owner = XFS_FMR_OWN_INOBT;
136                 break;
137         case XFS_RMAP_OWN_INODES:
138                 dest->fmr_owner = XFS_FMR_OWN_INODES;
139                 break;
140         case XFS_RMAP_OWN_REFC:
141                 dest->fmr_owner = XFS_FMR_OWN_REFC;
142                 break;
143         case XFS_RMAP_OWN_COW:
144                 dest->fmr_owner = XFS_FMR_OWN_COW;
145                 break;
146         case XFS_RMAP_OWN_NULL: /* "free" */
147                 dest->fmr_owner = XFS_FMR_OWN_FREE;
148                 break;
149         default:
150                 ASSERT(0);
151                 return -EFSCORRUPTED;
152         }
153         return 0;
154 }
155
156 /* getfsmap query state */
157 struct xfs_getfsmap_info {
158         struct xfs_fsmap_head   *head;
159         struct fsmap            *fsmap_recs;    /* mapping records */
160         struct xfs_buf          *agf_bp;        /* AGF, for refcount queries */
161         struct xfs_perag        *pag;           /* AG info, if applicable */
162         xfs_daddr_t             next_daddr;     /* next daddr we expect */
163         u64                     missing_owner;  /* owner of holes */
164         u32                     dev;            /* device id */
165         struct xfs_rmap_irec    low;            /* low rmap key */
166         struct xfs_rmap_irec    high;           /* high rmap key */
167         bool                    last;           /* last extent? */
168 };
169
170 /* Associate a device with a getfsmap handler. */
171 struct xfs_getfsmap_dev {
172         u32                     dev;
173         int                     (*fn)(struct xfs_trans *tp,
174                                       const struct xfs_fsmap *keys,
175                                       struct xfs_getfsmap_info *info);
176 };
177
178 /* Compare two getfsmap device handlers. */
179 static int
180 xfs_getfsmap_dev_compare(
181         const void                      *p1,
182         const void                      *p2)
183 {
184         const struct xfs_getfsmap_dev   *d1 = p1;
185         const struct xfs_getfsmap_dev   *d2 = p2;
186
187         return d1->dev - d2->dev;
188 }
189
190 /* Decide if this mapping is shared. */
191 STATIC int
192 xfs_getfsmap_is_shared(
193         struct xfs_trans                *tp,
194         struct xfs_getfsmap_info        *info,
195         const struct xfs_rmap_irec      *rec,
196         bool                            *stat)
197 {
198         struct xfs_mount                *mp = tp->t_mountp;
199         struct xfs_btree_cur            *cur;
200         xfs_agblock_t                   fbno;
201         xfs_extlen_t                    flen;
202         int                             error;
203
204         *stat = false;
205         if (!xfs_has_reflink(mp))
206                 return 0;
207         /* rt files will have no perag structure */
208         if (!info->pag)
209                 return 0;
210
211         /* Are there any shared blocks here? */
212         flen = 0;
213         cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, info->pag);
214
215         error = xfs_refcount_find_shared(cur, rec->rm_startblock,
216                         rec->rm_blockcount, &fbno, &flen, false);
217
218         xfs_btree_del_cursor(cur, error);
219         if (error)
220                 return error;
221
222         *stat = flen > 0;
223         return 0;
224 }
225
226 static inline void
227 xfs_getfsmap_format(
228         struct xfs_mount                *mp,
229         struct xfs_fsmap                *xfm,
230         struct xfs_getfsmap_info        *info)
231 {
232         struct fsmap                    *rec;
233
234         trace_xfs_getfsmap_mapping(mp, xfm);
235
236         rec = &info->fsmap_recs[info->head->fmh_entries++];
237         xfs_fsmap_from_internal(rec, xfm);
238 }
239
240 /*
241  * Format a reverse mapping for getfsmap, having translated rm_startblock
242  * into the appropriate daddr units.
243  */
244 STATIC int
245 xfs_getfsmap_helper(
246         struct xfs_trans                *tp,
247         struct xfs_getfsmap_info        *info,
248         const struct xfs_rmap_irec      *rec,
249         xfs_daddr_t                     rec_daddr)
250 {
251         struct xfs_fsmap                fmr;
252         struct xfs_mount                *mp = tp->t_mountp;
253         bool                            shared;
254         int                             error;
255
256         if (fatal_signal_pending(current))
257                 return -EINTR;
258
259         /*
260          * Filter out records that start before our startpoint, if the
261          * caller requested that.
262          */
263         if (xfs_rmap_compare(rec, &info->low) < 0) {
264                 rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
265                 if (info->next_daddr < rec_daddr)
266                         info->next_daddr = rec_daddr;
267                 return 0;
268         }
269
270         /* Are we just counting mappings? */
271         if (info->head->fmh_count == 0) {
272                 if (info->head->fmh_entries == UINT_MAX)
273                         return -ECANCELED;
274
275                 if (rec_daddr > info->next_daddr)
276                         info->head->fmh_entries++;
277
278                 if (info->last)
279                         return 0;
280
281                 info->head->fmh_entries++;
282
283                 rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
284                 if (info->next_daddr < rec_daddr)
285                         info->next_daddr = rec_daddr;
286                 return 0;
287         }
288
289         /*
290          * If the record starts past the last physical block we saw,
291          * then we've found a gap.  Report the gap as being owned by
292          * whatever the caller specified is the missing owner.
293          */
294         if (rec_daddr > info->next_daddr) {
295                 if (info->head->fmh_entries >= info->head->fmh_count)
296                         return -ECANCELED;
297
298                 fmr.fmr_device = info->dev;
299                 fmr.fmr_physical = info->next_daddr;
300                 fmr.fmr_owner = info->missing_owner;
301                 fmr.fmr_offset = 0;
302                 fmr.fmr_length = rec_daddr - info->next_daddr;
303                 fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
304                 xfs_getfsmap_format(mp, &fmr, info);
305         }
306
307         if (info->last)
308                 goto out;
309
310         /* Fill out the extent we found */
311         if (info->head->fmh_entries >= info->head->fmh_count)
312                 return -ECANCELED;
313
314         trace_xfs_fsmap_mapping(mp, info->dev,
315                         info->pag ? info->pag->pag_agno : NULLAGNUMBER, rec);
316
317         fmr.fmr_device = info->dev;
318         fmr.fmr_physical = rec_daddr;
319         error = xfs_fsmap_owner_from_rmap(&fmr, rec);
320         if (error)
321                 return error;
322         fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
323         fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
324         if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
325                 fmr.fmr_flags |= FMR_OF_PREALLOC;
326         if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
327                 fmr.fmr_flags |= FMR_OF_ATTR_FORK;
328         if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
329                 fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
330         if (fmr.fmr_flags == 0) {
331                 error = xfs_getfsmap_is_shared(tp, info, rec, &shared);
332                 if (error)
333                         return error;
334                 if (shared)
335                         fmr.fmr_flags |= FMR_OF_SHARED;
336         }
337
338         xfs_getfsmap_format(mp, &fmr, info);
339 out:
340         rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
341         if (info->next_daddr < rec_daddr)
342                 info->next_daddr = rec_daddr;
343         return 0;
344 }
345
346 /* Transform a rmapbt irec into a fsmap */
347 STATIC int
348 xfs_getfsmap_datadev_helper(
349         struct xfs_btree_cur            *cur,
350         const struct xfs_rmap_irec      *rec,
351         void                            *priv)
352 {
353         struct xfs_mount                *mp = cur->bc_mp;
354         struct xfs_getfsmap_info        *info = priv;
355         xfs_fsblock_t                   fsb;
356         xfs_daddr_t                     rec_daddr;
357
358         fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock);
359         rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
360
361         return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
362 }
363
364 /* Transform a bnobt irec into a fsmap */
365 STATIC int
366 xfs_getfsmap_datadev_bnobt_helper(
367         struct xfs_btree_cur            *cur,
368         const struct xfs_alloc_rec_incore *rec,
369         void                            *priv)
370 {
371         struct xfs_mount                *mp = cur->bc_mp;
372         struct xfs_getfsmap_info        *info = priv;
373         struct xfs_rmap_irec            irec;
374         xfs_daddr_t                     rec_daddr;
375
376         rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_ag.pag->pag_agno,
377                         rec->ar_startblock);
378
379         irec.rm_startblock = rec->ar_startblock;
380         irec.rm_blockcount = rec->ar_blockcount;
381         irec.rm_owner = XFS_RMAP_OWN_NULL;      /* "free" */
382         irec.rm_offset = 0;
383         irec.rm_flags = 0;
384
385         return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr);
386 }
387
388 /* Set rmap flags based on the getfsmap flags */
389 static void
390 xfs_getfsmap_set_irec_flags(
391         struct xfs_rmap_irec    *irec,
392         const struct xfs_fsmap  *fmr)
393 {
394         irec->rm_flags = 0;
395         if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
396                 irec->rm_flags |= XFS_RMAP_ATTR_FORK;
397         if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
398                 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
399         if (fmr->fmr_flags & FMR_OF_PREALLOC)
400                 irec->rm_flags |= XFS_RMAP_UNWRITTEN;
401 }
402
403 /* Execute a getfsmap query against the log device. */
404 STATIC int
405 xfs_getfsmap_logdev(
406         struct xfs_trans                *tp,
407         const struct xfs_fsmap          *keys,
408         struct xfs_getfsmap_info        *info)
409 {
410         struct xfs_mount                *mp = tp->t_mountp;
411         struct xfs_rmap_irec            rmap;
412         int                             error;
413
414         /* Set up search keys */
415         info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
416         info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
417         error = xfs_fsmap_owner_to_rmap(&info->low, keys);
418         if (error)
419                 return error;
420         info->low.rm_blockcount = 0;
421         xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
422
423         error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1);
424         if (error)
425                 return error;
426         info->high.rm_startblock = -1U;
427         info->high.rm_owner = ULLONG_MAX;
428         info->high.rm_offset = ULLONG_MAX;
429         info->high.rm_blockcount = 0;
430         info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
431         info->missing_owner = XFS_FMR_OWN_FREE;
432
433         trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
434         trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
435
436         if (keys[0].fmr_physical > 0)
437                 return 0;
438
439         /* Fabricate an rmap entry for the external log device. */
440         rmap.rm_startblock = 0;
441         rmap.rm_blockcount = mp->m_sb.sb_logblocks;
442         rmap.rm_owner = XFS_RMAP_OWN_LOG;
443         rmap.rm_offset = 0;
444         rmap.rm_flags = 0;
445
446         return xfs_getfsmap_helper(tp, info, &rmap, 0);
447 }
448
449 #ifdef CONFIG_XFS_RT
450 /* Transform a rtbitmap "record" into a fsmap */
451 STATIC int
452 xfs_getfsmap_rtdev_rtbitmap_helper(
453         struct xfs_trans                *tp,
454         const struct xfs_rtalloc_rec    *rec,
455         void                            *priv)
456 {
457         struct xfs_mount                *mp = tp->t_mountp;
458         struct xfs_getfsmap_info        *info = priv;
459         struct xfs_rmap_irec            irec;
460         xfs_daddr_t                     rec_daddr;
461
462         irec.rm_startblock = rec->ar_startext * mp->m_sb.sb_rextsize;
463         rec_daddr = XFS_FSB_TO_BB(mp, irec.rm_startblock);
464         irec.rm_blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize;
465         irec.rm_owner = XFS_RMAP_OWN_NULL;      /* "free" */
466         irec.rm_offset = 0;
467         irec.rm_flags = 0;
468
469         return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
470 }
471
472 /* Execute a getfsmap query against the realtime device. */
473 STATIC int
474 __xfs_getfsmap_rtdev(
475         struct xfs_trans                *tp,
476         const struct xfs_fsmap          *keys,
477         int                             (*query_fn)(struct xfs_trans *,
478                                                     struct xfs_getfsmap_info *),
479         struct xfs_getfsmap_info        *info)
480 {
481         struct xfs_mount                *mp = tp->t_mountp;
482         xfs_fsblock_t                   start_fsb;
483         xfs_fsblock_t                   end_fsb;
484         uint64_t                        eofs;
485         int                             error = 0;
486
487         eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
488         if (keys[0].fmr_physical >= eofs)
489                 return 0;
490         start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
491         end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
492
493         /* Set up search keys */
494         info->low.rm_startblock = start_fsb;
495         error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
496         if (error)
497                 return error;
498         info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
499         info->low.rm_blockcount = 0;
500         xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
501
502         info->high.rm_startblock = end_fsb;
503         error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
504         if (error)
505                 return error;
506         info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset);
507         info->high.rm_blockcount = 0;
508         xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
509
510         trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
511         trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
512
513         return query_fn(tp, info);
514 }
515
516 /* Actually query the realtime bitmap. */
517 STATIC int
518 xfs_getfsmap_rtdev_rtbitmap_query(
519         struct xfs_trans                *tp,
520         struct xfs_getfsmap_info        *info)
521 {
522         struct xfs_rtalloc_rec          alow = { 0 };
523         struct xfs_rtalloc_rec          ahigh = { 0 };
524         struct xfs_mount                *mp = tp->t_mountp;
525         int                             error;
526
527         xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED);
528
529         /*
530          * Set up query parameters to return free rtextents covering the range
531          * we want.
532          */
533         alow.ar_startext = info->low.rm_startblock;
534         ahigh.ar_startext = info->high.rm_startblock;
535         do_div(alow.ar_startext, mp->m_sb.sb_rextsize);
536         if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))
537                 ahigh.ar_startext++;
538         error = xfs_rtalloc_query_range(tp, &alow, &ahigh,
539                         xfs_getfsmap_rtdev_rtbitmap_helper, info);
540         if (error)
541                 goto err;
542
543         /*
544          * Report any gaps at the end of the rtbitmap by simulating a null
545          * rmap starting at the block after the end of the query range.
546          */
547         info->last = true;
548         ahigh.ar_startext = min(mp->m_sb.sb_rextents, ahigh.ar_startext);
549
550         error = xfs_getfsmap_rtdev_rtbitmap_helper(tp, &ahigh, info);
551         if (error)
552                 goto err;
553 err:
554         xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED);
555         return error;
556 }
557
558 /* Execute a getfsmap query against the realtime device rtbitmap. */
559 STATIC int
560 xfs_getfsmap_rtdev_rtbitmap(
561         struct xfs_trans                *tp,
562         const struct xfs_fsmap          *keys,
563         struct xfs_getfsmap_info        *info)
564 {
565         info->missing_owner = XFS_FMR_OWN_UNKNOWN;
566         return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query,
567                         info);
568 }
569 #endif /* CONFIG_XFS_RT */
570
571 /* Execute a getfsmap query against the regular data device. */
572 STATIC int
573 __xfs_getfsmap_datadev(
574         struct xfs_trans                *tp,
575         const struct xfs_fsmap          *keys,
576         struct xfs_getfsmap_info        *info,
577         int                             (*query_fn)(struct xfs_trans *,
578                                                     struct xfs_getfsmap_info *,
579                                                     struct xfs_btree_cur **,
580                                                     void *),
581         void                            *priv)
582 {
583         struct xfs_mount                *mp = tp->t_mountp;
584         struct xfs_perag                *pag;
585         struct xfs_btree_cur            *bt_cur = NULL;
586         xfs_fsblock_t                   start_fsb;
587         xfs_fsblock_t                   end_fsb;
588         xfs_agnumber_t                  start_ag;
589         xfs_agnumber_t                  end_ag;
590         uint64_t                        eofs;
591         int                             error = 0;
592
593         eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
594         if (keys[0].fmr_physical >= eofs)
595                 return 0;
596         start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical);
597         end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
598
599         /*
600          * Convert the fsmap low/high keys to AG based keys.  Initialize
601          * low to the fsmap low key and max out the high key to the end
602          * of the AG.
603          */
604         info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
605         info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
606         error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
607         if (error)
608                 return error;
609         info->low.rm_blockcount = 0;
610         xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
611
612         info->high.rm_startblock = -1U;
613         info->high.rm_owner = ULLONG_MAX;
614         info->high.rm_offset = ULLONG_MAX;
615         info->high.rm_blockcount = 0;
616         info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
617
618         start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
619         end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
620
621         for_each_perag_range(mp, start_ag, end_ag, pag) {
622                 /*
623                  * Set the AG high key from the fsmap high key if this
624                  * is the last AG that we're querying.
625                  */
626                 info->pag = pag;
627                 if (pag->pag_agno == end_ag) {
628                         info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
629                                         end_fsb);
630                         info->high.rm_offset = XFS_BB_TO_FSBT(mp,
631                                         keys[1].fmr_offset);
632                         error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
633                         if (error)
634                                 break;
635                         xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
636                 }
637
638                 if (bt_cur) {
639                         xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
640                         bt_cur = NULL;
641                         xfs_trans_brelse(tp, info->agf_bp);
642                         info->agf_bp = NULL;
643                 }
644
645                 error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0,
646                                 &info->agf_bp);
647                 if (error)
648                         break;
649
650                 trace_xfs_fsmap_low_key(mp, info->dev, pag->pag_agno,
651                                 &info->low);
652                 trace_xfs_fsmap_high_key(mp, info->dev, pag->pag_agno,
653                                 &info->high);
654
655                 error = query_fn(tp, info, &bt_cur, priv);
656                 if (error)
657                         break;
658
659                 /*
660                  * Set the AG low key to the start of the AG prior to
661                  * moving on to the next AG.
662                  */
663                 if (pag->pag_agno == start_ag) {
664                         info->low.rm_startblock = 0;
665                         info->low.rm_owner = 0;
666                         info->low.rm_offset = 0;
667                         info->low.rm_flags = 0;
668                 }
669
670                 /*
671                  * If this is the last AG, report any gap at the end of it
672                  * before we drop the reference to the perag when the loop
673                  * terminates.
674                  */
675                 if (pag->pag_agno == end_ag) {
676                         info->last = true;
677                         error = query_fn(tp, info, &bt_cur, priv);
678                         if (error)
679                                 break;
680                 }
681                 info->pag = NULL;
682         }
683
684         if (bt_cur)
685                 xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
686                                                          XFS_BTREE_NOERROR);
687         if (info->agf_bp) {
688                 xfs_trans_brelse(tp, info->agf_bp);
689                 info->agf_bp = NULL;
690         }
691         if (info->pag) {
692                 xfs_perag_put(info->pag);
693                 info->pag = NULL;
694         } else if (pag) {
695                 /* loop termination case */
696                 xfs_perag_put(pag);
697         }
698
699         return error;
700 }
701
702 /* Actually query the rmap btree. */
703 STATIC int
704 xfs_getfsmap_datadev_rmapbt_query(
705         struct xfs_trans                *tp,
706         struct xfs_getfsmap_info        *info,
707         struct xfs_btree_cur            **curpp,
708         void                            *priv)
709 {
710         /* Report any gap at the end of the last AG. */
711         if (info->last)
712                 return xfs_getfsmap_datadev_helper(*curpp, &info->high, info);
713
714         /* Allocate cursor for this AG and query_range it. */
715         *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
716                         info->pag);
717         return xfs_rmap_query_range(*curpp, &info->low, &info->high,
718                         xfs_getfsmap_datadev_helper, info);
719 }
720
721 /* Execute a getfsmap query against the regular data device rmapbt. */
722 STATIC int
723 xfs_getfsmap_datadev_rmapbt(
724         struct xfs_trans                *tp,
725         const struct xfs_fsmap          *keys,
726         struct xfs_getfsmap_info        *info)
727 {
728         info->missing_owner = XFS_FMR_OWN_FREE;
729         return __xfs_getfsmap_datadev(tp, keys, info,
730                         xfs_getfsmap_datadev_rmapbt_query, NULL);
731 }
732
733 /* Actually query the bno btree. */
734 STATIC int
735 xfs_getfsmap_datadev_bnobt_query(
736         struct xfs_trans                *tp,
737         struct xfs_getfsmap_info        *info,
738         struct xfs_btree_cur            **curpp,
739         void                            *priv)
740 {
741         struct xfs_alloc_rec_incore     *key = priv;
742
743         /* Report any gap at the end of the last AG. */
744         if (info->last)
745                 return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info);
746
747         /* Allocate cursor for this AG and query_range it. */
748         *curpp = xfs_allocbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
749                         info->pag, XFS_BTNUM_BNO);
750         key->ar_startblock = info->low.rm_startblock;
751         key[1].ar_startblock = info->high.rm_startblock;
752         return xfs_alloc_query_range(*curpp, key, &key[1],
753                         xfs_getfsmap_datadev_bnobt_helper, info);
754 }
755
756 /* Execute a getfsmap query against the regular data device's bnobt. */
757 STATIC int
758 xfs_getfsmap_datadev_bnobt(
759         struct xfs_trans                *tp,
760         const struct xfs_fsmap          *keys,
761         struct xfs_getfsmap_info        *info)
762 {
763         struct xfs_alloc_rec_incore     akeys[2];
764
765         info->missing_owner = XFS_FMR_OWN_UNKNOWN;
766         return __xfs_getfsmap_datadev(tp, keys, info,
767                         xfs_getfsmap_datadev_bnobt_query, &akeys[0]);
768 }
769
770 /* Do we recognize the device? */
771 STATIC bool
772 xfs_getfsmap_is_valid_device(
773         struct xfs_mount        *mp,
774         struct xfs_fsmap        *fm)
775 {
776         if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
777             fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
778                 return true;
779         if (mp->m_logdev_targp &&
780             fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
781                 return true;
782         if (mp->m_rtdev_targp &&
783             fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
784                 return true;
785         return false;
786 }
787
788 /* Ensure that the low key is less than the high key. */
789 STATIC bool
790 xfs_getfsmap_check_keys(
791         struct xfs_fsmap                *low_key,
792         struct xfs_fsmap                *high_key)
793 {
794         if (low_key->fmr_device > high_key->fmr_device)
795                 return false;
796         if (low_key->fmr_device < high_key->fmr_device)
797                 return true;
798
799         if (low_key->fmr_physical > high_key->fmr_physical)
800                 return false;
801         if (low_key->fmr_physical < high_key->fmr_physical)
802                 return true;
803
804         if (low_key->fmr_owner > high_key->fmr_owner)
805                 return false;
806         if (low_key->fmr_owner < high_key->fmr_owner)
807                 return true;
808
809         if (low_key->fmr_offset > high_key->fmr_offset)
810                 return false;
811         if (low_key->fmr_offset < high_key->fmr_offset)
812                 return true;
813
814         return false;
815 }
816
817 /*
818  * There are only two devices if we didn't configure RT devices at build time.
819  */
820 #ifdef CONFIG_XFS_RT
821 #define XFS_GETFSMAP_DEVS       3
822 #else
823 #define XFS_GETFSMAP_DEVS       2
824 #endif /* CONFIG_XFS_RT */
825
826 /*
827  * Get filesystem's extents as described in head, and format for output. Fills
828  * in the supplied records array until there are no more reverse mappings to
829  * return or head.fmh_entries == head.fmh_count.  In the second case, this
830  * function returns -ECANCELED to indicate that more records would have been
831  * returned.
832  *
833  * Key to Confusion
834  * ----------------
835  * There are multiple levels of keys and counters at work here:
836  * xfs_fsmap_head.fmh_keys      -- low and high fsmap keys passed in;
837  *                                 these reflect fs-wide sector addrs.
838  * dkeys                        -- fmh_keys used to query each device;
839  *                                 these are fmh_keys but w/ the low key
840  *                                 bumped up by fmr_length.
841  * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
842  *                                 is how we detect gaps in the fsmap
843                                    records and report them.
844  * xfs_getfsmap_info.low/high   -- per-AG low/high keys computed from
845  *                                 dkeys; used to query the metadata.
846  */
847 int
848 xfs_getfsmap(
849         struct xfs_mount                *mp,
850         struct xfs_fsmap_head           *head,
851         struct fsmap                    *fsmap_recs)
852 {
853         struct xfs_trans                *tp = NULL;
854         struct xfs_fsmap                dkeys[2];       /* per-dev keys */
855         struct xfs_getfsmap_dev         handlers[XFS_GETFSMAP_DEVS];
856         struct xfs_getfsmap_info        info = { NULL };
857         bool                            use_rmap;
858         int                             i;
859         int                             error = 0;
860
861         if (head->fmh_iflags & ~FMH_IF_VALID)
862                 return -EINVAL;
863         if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
864             !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
865                 return -EINVAL;
866
867         use_rmap = xfs_has_rmapbt(mp) &&
868                    has_capability_noaudit(current, CAP_SYS_ADMIN);
869         head->fmh_entries = 0;
870
871         /* Set up our device handlers. */
872         memset(handlers, 0, sizeof(handlers));
873         handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
874         if (use_rmap)
875                 handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
876         else
877                 handlers[0].fn = xfs_getfsmap_datadev_bnobt;
878         if (mp->m_logdev_targp != mp->m_ddev_targp) {
879                 handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
880                 handlers[1].fn = xfs_getfsmap_logdev;
881         }
882 #ifdef CONFIG_XFS_RT
883         if (mp->m_rtdev_targp) {
884                 handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
885                 handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
886         }
887 #endif /* CONFIG_XFS_RT */
888
889         xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
890                         xfs_getfsmap_dev_compare);
891
892         /*
893          * To continue where we left off, we allow userspace to use the
894          * last mapping from a previous call as the low key of the next.
895          * This is identified by a non-zero length in the low key. We
896          * have to increment the low key in this scenario to ensure we
897          * don't return the same mapping again, and instead return the
898          * very next mapping.
899          *
900          * If the low key mapping refers to file data, the same physical
901          * blocks could be mapped to several other files/offsets.
902          * According to rmapbt record ordering, the minimal next
903          * possible record for the block range is the next starting
904          * offset in the same inode. Therefore, bump the file offset to
905          * continue the search appropriately.  For all other low key
906          * mapping types (attr blocks, metadata), bump the physical
907          * offset as there can be no other mapping for the same physical
908          * block range.
909          */
910         dkeys[0] = head->fmh_keys[0];
911         if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
912                 dkeys[0].fmr_physical += dkeys[0].fmr_length;
913                 dkeys[0].fmr_owner = 0;
914                 if (dkeys[0].fmr_offset)
915                         return -EINVAL;
916         } else
917                 dkeys[0].fmr_offset += dkeys[0].fmr_length;
918         dkeys[0].fmr_length = 0;
919         memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
920
921         if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1]))
922                 return -EINVAL;
923
924         info.next_daddr = head->fmh_keys[0].fmr_physical +
925                           head->fmh_keys[0].fmr_length;
926         info.fsmap_recs = fsmap_recs;
927         info.head = head;
928
929         /* For each device we support... */
930         for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
931                 /* Is this device within the range the user asked for? */
932                 if (!handlers[i].fn)
933                         continue;
934                 if (head->fmh_keys[0].fmr_device > handlers[i].dev)
935                         continue;
936                 if (head->fmh_keys[1].fmr_device < handlers[i].dev)
937                         break;
938
939                 /*
940                  * If this device number matches the high key, we have
941                  * to pass the high key to the handler to limit the
942                  * query results.  If the device number exceeds the
943                  * low key, zero out the low key so that we get
944                  * everything from the beginning.
945                  */
946                 if (handlers[i].dev == head->fmh_keys[1].fmr_device)
947                         dkeys[1] = head->fmh_keys[1];
948                 if (handlers[i].dev > head->fmh_keys[0].fmr_device)
949                         memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
950
951                 /*
952                  * Grab an empty transaction so that we can use its recursive
953                  * buffer locking abilities to detect cycles in the rmapbt
954                  * without deadlocking.
955                  */
956                 error = xfs_trans_alloc_empty(mp, &tp);
957                 if (error)
958                         break;
959
960                 info.dev = handlers[i].dev;
961                 info.last = false;
962                 info.pag = NULL;
963                 error = handlers[i].fn(tp, dkeys, &info);
964                 if (error)
965                         break;
966                 xfs_trans_cancel(tp);
967                 tp = NULL;
968                 info.next_daddr = 0;
969         }
970
971         if (tp)
972                 xfs_trans_cancel(tp);
973         head->fmh_oflags = FMH_OF_DEV_T;
974         return error;
975 }