xfs: convert growfs AG header init to use buffer lists
[platform/kernel/linux-rpi.git] / fs / xfs / xfs_fsops.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_sb.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_inode.h"
30 #include "xfs_trans.h"
31 #include "xfs_inode_item.h"
32 #include "xfs_error.h"
33 #include "xfs_btree.h"
34 #include "xfs_alloc_btree.h"
35 #include "xfs_alloc.h"
36 #include "xfs_rmap_btree.h"
37 #include "xfs_ialloc.h"
38 #include "xfs_fsops.h"
39 #include "xfs_itable.h"
40 #include "xfs_trans_space.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_trace.h"
43 #include "xfs_log.h"
44 #include "xfs_filestream.h"
45 #include "xfs_rmap.h"
46 #include "xfs_ag_resv.h"
47
48 /*
49  * File system operations
50  */
51
52 static struct xfs_buf *
53 xfs_growfs_get_hdr_buf(
54         struct xfs_mount        *mp,
55         xfs_daddr_t             blkno,
56         size_t                  numblks,
57         int                     flags,
58         const struct xfs_buf_ops *ops)
59 {
60         struct xfs_buf          *bp;
61
62         bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
63         if (!bp)
64                 return NULL;
65
66         xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
67         bp->b_bn = blkno;
68         bp->b_maps[0].bm_bn = blkno;
69         bp->b_ops = ops;
70
71         return bp;
72 }
73
74 /*
75  * Write new AG headers to disk. Non-transactional, but written
76  * synchronously so they are completed prior to the growfs transaction
77  * being logged.
78  */
79 static int
80 xfs_grow_ag_headers(
81         struct xfs_mount        *mp,
82         xfs_agnumber_t          agno,
83         xfs_extlen_t            agsize,
84         xfs_rfsblock_t          *nfree,
85         struct list_head        *buffer_list)
86 {
87         struct xfs_agf          *agf;
88         struct xfs_agi          *agi;
89         struct xfs_agfl         *agfl;
90         __be32                  *agfl_bno;
91         xfs_alloc_rec_t         *arec;
92         struct xfs_buf          *bp;
93         int                     bucket;
94         xfs_extlen_t            tmpsize;
95         int                     error = 0;
96
97         /*
98          * AG freespace header block
99          */
100         bp = xfs_growfs_get_hdr_buf(mp,
101                         XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
102                         XFS_FSS_TO_BB(mp, 1), 0,
103                         &xfs_agf_buf_ops);
104         if (!bp) {
105                 error = -ENOMEM;
106                 goto out_error;
107         }
108
109         agf = XFS_BUF_TO_AGF(bp);
110         agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
111         agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
112         agf->agf_seqno = cpu_to_be32(agno);
113         agf->agf_length = cpu_to_be32(agsize);
114         agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
115         agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
116         agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
117         agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
118         if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
119                 agf->agf_roots[XFS_BTNUM_RMAPi] =
120                                         cpu_to_be32(XFS_RMAP_BLOCK(mp));
121                 agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
122                 agf->agf_rmap_blocks = cpu_to_be32(1);
123         }
124
125         agf->agf_flfirst = cpu_to_be32(1);
126         agf->agf_fllast = 0;
127         agf->agf_flcount = 0;
128         tmpsize = agsize - mp->m_ag_prealloc_blocks;
129         agf->agf_freeblks = cpu_to_be32(tmpsize);
130         agf->agf_longest = cpu_to_be32(tmpsize);
131         if (xfs_sb_version_hascrc(&mp->m_sb))
132                 uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
133         if (xfs_sb_version_hasreflink(&mp->m_sb)) {
134                 agf->agf_refcount_root = cpu_to_be32(
135                                 xfs_refc_block(mp));
136                 agf->agf_refcount_level = cpu_to_be32(1);
137                 agf->agf_refcount_blocks = cpu_to_be32(1);
138         }
139         xfs_buf_delwri_queue(bp, buffer_list);
140         xfs_buf_relse(bp);
141
142         /*
143          * AG freelist header block
144          */
145         bp = xfs_growfs_get_hdr_buf(mp,
146                         XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
147                         XFS_FSS_TO_BB(mp, 1), 0,
148                         &xfs_agfl_buf_ops);
149         if (!bp) {
150                 error = -ENOMEM;
151                 goto out_error;
152         }
153
154         agfl = XFS_BUF_TO_AGFL(bp);
155         if (xfs_sb_version_hascrc(&mp->m_sb)) {
156                 agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
157                 agfl->agfl_seqno = cpu_to_be32(agno);
158                 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
159         }
160
161         agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
162         for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
163                 agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
164
165         xfs_buf_delwri_queue(bp, buffer_list);
166         xfs_buf_relse(bp);
167
168         /*
169          * AG inode header block
170          */
171         bp = xfs_growfs_get_hdr_buf(mp,
172                         XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
173                         XFS_FSS_TO_BB(mp, 1), 0,
174                         &xfs_agi_buf_ops);
175         if (!bp) {
176                 error = -ENOMEM;
177                 goto out_error;
178         }
179
180         agi = XFS_BUF_TO_AGI(bp);
181         agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
182         agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
183         agi->agi_seqno = cpu_to_be32(agno);
184         agi->agi_length = cpu_to_be32(agsize);
185         agi->agi_count = 0;
186         agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
187         agi->agi_level = cpu_to_be32(1);
188         agi->agi_freecount = 0;
189         agi->agi_newino = cpu_to_be32(NULLAGINO);
190         agi->agi_dirino = cpu_to_be32(NULLAGINO);
191         if (xfs_sb_version_hascrc(&mp->m_sb))
192                 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
193         if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
194                 agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
195                 agi->agi_free_level = cpu_to_be32(1);
196         }
197         for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
198                 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
199
200         xfs_buf_delwri_queue(bp, buffer_list);
201         xfs_buf_relse(bp);
202
203         /*
204          * BNO btree root block
205          */
206         bp = xfs_growfs_get_hdr_buf(mp,
207                         XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
208                         BTOBB(mp->m_sb.sb_blocksize), 0,
209                         &xfs_allocbt_buf_ops);
210
211         if (!bp) {
212                 error = -ENOMEM;
213                 goto out_error;
214         }
215
216         xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, agno, 0);
217
218         arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
219         arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
220         arec->ar_blockcount = cpu_to_be32(
221                 agsize - be32_to_cpu(arec->ar_startblock));
222
223         xfs_buf_delwri_queue(bp, buffer_list);
224         xfs_buf_relse(bp);
225
226         /*
227          * CNT btree root block
228          */
229         bp = xfs_growfs_get_hdr_buf(mp,
230                         XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
231                         BTOBB(mp->m_sb.sb_blocksize), 0,
232                         &xfs_allocbt_buf_ops);
233         if (!bp) {
234                 error = -ENOMEM;
235                 goto out_error;
236         }
237
238         xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, agno, 0);
239
240         arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
241         arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
242         arec->ar_blockcount = cpu_to_be32(
243                 agsize - be32_to_cpu(arec->ar_startblock));
244         *nfree += be32_to_cpu(arec->ar_blockcount);
245
246         xfs_buf_delwri_queue(bp, buffer_list);
247         xfs_buf_relse(bp);
248
249         /* RMAP btree root block */
250         if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
251                 struct xfs_rmap_rec     *rrec;
252                 struct xfs_btree_block  *block;
253
254                 bp = xfs_growfs_get_hdr_buf(mp,
255                         XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
256                         BTOBB(mp->m_sb.sb_blocksize), 0,
257                         &xfs_rmapbt_buf_ops);
258                 if (!bp) {
259                         error = -ENOMEM;
260                         goto out_error;
261                 }
262
263                 xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 0,
264                                         agno, 0);
265                 block = XFS_BUF_TO_BLOCK(bp);
266
267
268                 /*
269                  * mark the AG header regions as static metadata The BNO
270                  * btree block is the first block after the headers, so
271                  * it's location defines the size of region the static
272                  * metadata consumes.
273                  *
274                  * Note: unlike mkfs, we never have to account for log
275                  * space when growing the data regions
276                  */
277                 rrec = XFS_RMAP_REC_ADDR(block, 1);
278                 rrec->rm_startblock = 0;
279                 rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
280                 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
281                 rrec->rm_offset = 0;
282                 be16_add_cpu(&block->bb_numrecs, 1);
283
284                 /* account freespace btree root blocks */
285                 rrec = XFS_RMAP_REC_ADDR(block, 2);
286                 rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
287                 rrec->rm_blockcount = cpu_to_be32(2);
288                 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
289                 rrec->rm_offset = 0;
290                 be16_add_cpu(&block->bb_numrecs, 1);
291
292                 /* account inode btree root blocks */
293                 rrec = XFS_RMAP_REC_ADDR(block, 3);
294                 rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
295                 rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
296                                                 XFS_IBT_BLOCK(mp));
297                 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
298                 rrec->rm_offset = 0;
299                 be16_add_cpu(&block->bb_numrecs, 1);
300
301                 /* account for rmap btree root */
302                 rrec = XFS_RMAP_REC_ADDR(block, 4);
303                 rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
304                 rrec->rm_blockcount = cpu_to_be32(1);
305                 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
306                 rrec->rm_offset = 0;
307                 be16_add_cpu(&block->bb_numrecs, 1);
308
309                 /* account for refc btree root */
310                 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
311                         rrec = XFS_RMAP_REC_ADDR(block, 5);
312                         rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp));
313                         rrec->rm_blockcount = cpu_to_be32(1);
314                         rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
315                         rrec->rm_offset = 0;
316                         be16_add_cpu(&block->bb_numrecs, 1);
317                 }
318
319                 xfs_buf_delwri_queue(bp, buffer_list);
320                 xfs_buf_relse(bp);
321         }
322
323         /*
324          * INO btree root block
325          */
326         bp = xfs_growfs_get_hdr_buf(mp,
327                         XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
328                         BTOBB(mp->m_sb.sb_blocksize), 0,
329                         &xfs_inobt_buf_ops);
330         if (!bp) {
331                 error = -ENOMEM;
332                 goto out_error;
333         }
334
335         xfs_btree_init_block(mp, bp, XFS_BTNUM_INO , 0, 0, agno, 0);
336         xfs_buf_delwri_queue(bp, buffer_list);
337         xfs_buf_relse(bp);
338
339         /*
340          * FINO btree root block
341          */
342         if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
343                 bp = xfs_growfs_get_hdr_buf(mp,
344                         XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
345                         BTOBB(mp->m_sb.sb_blocksize), 0,
346                         &xfs_inobt_buf_ops);
347                 if (!bp) {
348                         error = -ENOMEM;
349                         goto out_error;
350                 }
351
352                 xfs_btree_init_block(mp, bp, XFS_BTNUM_FINO, 0, 0, agno, 0);
353                 xfs_buf_delwri_queue(bp, buffer_list);
354                 xfs_buf_relse(bp);
355         }
356
357         /*
358          * refcount btree root block
359          */
360         if (xfs_sb_version_hasreflink(&mp->m_sb)) {
361                 bp = xfs_growfs_get_hdr_buf(mp,
362                         XFS_AGB_TO_DADDR(mp, agno, xfs_refc_block(mp)),
363                         BTOBB(mp->m_sb.sb_blocksize), 0,
364                         &xfs_refcountbt_buf_ops);
365                 if (!bp) {
366                         error = -ENOMEM;
367                         goto out_error;
368                 }
369
370                 xfs_btree_init_block(mp, bp, XFS_BTNUM_REFC, 0, 0, agno, 0);
371                 xfs_buf_delwri_queue(bp, buffer_list);
372                 xfs_buf_relse(bp);
373         }
374
375 out_error:
376         return error;
377 }
378
379 static int
380 xfs_growfs_data_private(
381         xfs_mount_t             *mp,            /* mount point for filesystem */
382         xfs_growfs_data_t       *in)            /* growfs data input struct */
383 {
384         xfs_agf_t               *agf;
385         xfs_agi_t               *agi;
386         xfs_agnumber_t          agno;
387         xfs_extlen_t            agsize;
388         xfs_buf_t               *bp;
389         int                     dpct;
390         int                     error, saved_error = 0;
391         xfs_agnumber_t          nagcount;
392         xfs_agnumber_t          nagimax = 0;
393         xfs_rfsblock_t          nb, nb_mod;
394         xfs_rfsblock_t          new;
395         xfs_rfsblock_t          nfree;
396         xfs_agnumber_t          oagcount;
397         int                     pct;
398         xfs_trans_t             *tp;
399         LIST_HEAD               (buffer_list);
400
401         nb = in->newblocks;
402         pct = in->imaxpct;
403         if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
404                 return -EINVAL;
405         if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
406                 return error;
407         dpct = pct - mp->m_sb.sb_imax_pct;
408         error = xfs_buf_read_uncached(mp->m_ddev_targp,
409                                 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
410                                 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
411         if (error)
412                 return error;
413         xfs_buf_relse(bp);
414
415         new = nb;       /* use new as a temporary here */
416         nb_mod = do_div(new, mp->m_sb.sb_agblocks);
417         nagcount = new + (nb_mod != 0);
418         if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
419                 nagcount--;
420                 nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
421                 if (nb < mp->m_sb.sb_dblocks)
422                         return -EINVAL;
423         }
424         new = nb - mp->m_sb.sb_dblocks;
425         oagcount = mp->m_sb.sb_agcount;
426
427         /* allocate the new per-ag structures */
428         if (nagcount > oagcount) {
429                 error = xfs_initialize_perag(mp, nagcount, &nagimax);
430                 if (error)
431                         return error;
432         }
433
434         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
435                         XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
436         if (error)
437                 return error;
438
439         /*
440          * Write new AG headers to disk. Non-transactional, but need to be
441          * written and completed prior to the growfs transaction being logged.
442          * To do this, we use a delayed write buffer list and wait for
443          * submission and IO completion of the list as a whole. This allows the
444          * IO subsystem to merge all the AG headers in a single AG into a single
445          * IO and hide most of the latency of the IO from us.
446          *
447          * This also means that if we get an error whilst building the buffer
448          * list to write, we can cancel the entire list without having written
449          * anything.
450          */
451         nfree = 0;
452         for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
453
454                 if (agno == nagcount - 1)
455                         agsize = nb -
456                                 (agno * (xfs_rfsblock_t)mp->m_sb.sb_agblocks);
457                 else
458                         agsize = mp->m_sb.sb_agblocks;
459
460                 error = xfs_grow_ag_headers(mp, agno, agsize, &nfree,
461                                             &buffer_list);
462                 if (error) {
463                         xfs_buf_delwri_cancel(&buffer_list);
464                         goto error0;
465                 }
466         }
467         error = xfs_buf_delwri_submit(&buffer_list);
468         if (error)
469                 goto error0;
470
471         xfs_trans_agblocks_delta(tp, nfree);
472
473         /*
474          * There are new blocks in the old last a.g.
475          */
476         if (new) {
477                 struct xfs_owner_info   oinfo;
478
479                 /*
480                  * Change the agi length.
481                  */
482                 error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
483                 if (error) {
484                         goto error0;
485                 }
486                 ASSERT(bp);
487                 agi = XFS_BUF_TO_AGI(bp);
488                 be32_add_cpu(&agi->agi_length, new);
489                 ASSERT(nagcount == oagcount ||
490                        be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
491                 xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
492                 /*
493                  * Change agf length.
494                  */
495                 error = xfs_alloc_read_agf(mp, tp, agno, 0, &bp);
496                 if (error) {
497                         goto error0;
498                 }
499                 ASSERT(bp);
500                 agf = XFS_BUF_TO_AGF(bp);
501                 be32_add_cpu(&agf->agf_length, new);
502                 ASSERT(be32_to_cpu(agf->agf_length) ==
503                        be32_to_cpu(agi->agi_length));
504
505                 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
506
507                 /*
508                  * Free the new space.
509                  *
510                  * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
511                  * this doesn't actually exist in the rmap btree.
512                  */
513                 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
514                 error = xfs_rmap_free(tp, bp, agno,
515                                 be32_to_cpu(agf->agf_length) - new,
516                                 new, &oinfo);
517                 if (error)
518                         goto error0;
519                 error = xfs_free_extent(tp,
520                                 XFS_AGB_TO_FSB(mp, agno,
521                                         be32_to_cpu(agf->agf_length) - new),
522                                 new, &oinfo, XFS_AG_RESV_NONE);
523                 if (error)
524                         goto error0;
525         }
526
527         /*
528          * Update changed superblock fields transactionally. These are not
529          * seen by the rest of the world until the transaction commit applies
530          * them atomically to the superblock.
531          */
532         if (nagcount > oagcount)
533                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
534         if (nb > mp->m_sb.sb_dblocks)
535                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS,
536                                  nb - mp->m_sb.sb_dblocks);
537         if (nfree)
538                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree);
539         if (dpct)
540                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
541         xfs_trans_set_sync(tp);
542         error = xfs_trans_commit(tp);
543         if (error)
544                 return error;
545
546         /* New allocation groups fully initialized, so update mount struct */
547         if (nagimax)
548                 mp->m_maxagi = nagimax;
549         if (mp->m_sb.sb_imax_pct) {
550                 uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
551                 do_div(icount, 100);
552                 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
553         } else
554                 mp->m_maxicount = 0;
555         xfs_set_low_space_thresholds(mp);
556         mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
557
558         /*
559          * If we expanded the last AG, free the per-AG reservation
560          * so we can reinitialize it with the new size.
561          */
562         if (new) {
563                 struct xfs_perag        *pag;
564
565                 pag = xfs_perag_get(mp, agno);
566                 error = xfs_ag_resv_free(pag);
567                 xfs_perag_put(pag);
568                 if (error)
569                         goto out;
570         }
571
572         /* Reserve AG metadata blocks. */
573         error = xfs_fs_reserve_ag_blocks(mp);
574         if (error && error != -ENOSPC)
575                 goto out;
576
577         /* update secondary superblocks. */
578         for (agno = 1; agno < nagcount; agno++) {
579                 error = 0;
580                 /*
581                  * new secondary superblocks need to be zeroed, not read from
582                  * disk as the contents of the new area we are growing into is
583                  * completely unknown.
584                  */
585                 if (agno < oagcount) {
586                         error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
587                                   XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
588                                   XFS_FSS_TO_BB(mp, 1), 0, &bp,
589                                   &xfs_sb_buf_ops);
590                 } else {
591                         bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
592                                   XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
593                                   XFS_FSS_TO_BB(mp, 1), 0);
594                         if (bp) {
595                                 bp->b_ops = &xfs_sb_buf_ops;
596                                 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
597                         } else
598                                 error = -ENOMEM;
599                 }
600
601                 /*
602                  * If we get an error reading or writing alternate superblocks,
603                  * continue.  xfs_repair chooses the "best" superblock based
604                  * on most matches; if we break early, we'll leave more
605                  * superblocks un-updated than updated, and xfs_repair may
606                  * pick them over the properly-updated primary.
607                  */
608                 if (error) {
609                         xfs_warn(mp,
610                 "error %d reading secondary superblock for ag %d",
611                                 error, agno);
612                         saved_error = error;
613                         continue;
614                 }
615                 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
616
617                 error = xfs_bwrite(bp);
618                 xfs_buf_relse(bp);
619                 if (error) {
620                         xfs_warn(mp,
621                 "write error %d updating secondary superblock for ag %d",
622                                 error, agno);
623                         saved_error = error;
624                         continue;
625                 }
626         }
627
628  out:
629         return saved_error ? saved_error : error;
630
631  error0:
632         xfs_trans_cancel(tp);
633         return error;
634 }
635
636 static int
637 xfs_growfs_log_private(
638         xfs_mount_t             *mp,    /* mount point for filesystem */
639         xfs_growfs_log_t        *in)    /* growfs log input struct */
640 {
641         xfs_extlen_t            nb;
642
643         nb = in->newblocks;
644         if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
645                 return -EINVAL;
646         if (nb == mp->m_sb.sb_logblocks &&
647             in->isint == (mp->m_sb.sb_logstart != 0))
648                 return -EINVAL;
649         /*
650          * Moving the log is hard, need new interfaces to sync
651          * the log first, hold off all activity while moving it.
652          * Can have shorter or longer log in the same space,
653          * or transform internal to external log or vice versa.
654          */
655         return -ENOSYS;
656 }
657
658 /*
659  * protected versions of growfs function acquire and release locks on the mount
660  * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG,
661  * XFS_IOC_FSGROWFSRT
662  */
663
664
665 int
666 xfs_growfs_data(
667         xfs_mount_t             *mp,
668         xfs_growfs_data_t       *in)
669 {
670         int error;
671
672         if (!capable(CAP_SYS_ADMIN))
673                 return -EPERM;
674         if (!mutex_trylock(&mp->m_growlock))
675                 return -EWOULDBLOCK;
676         error = xfs_growfs_data_private(mp, in);
677         /*
678          * Increment the generation unconditionally, the error could be from
679          * updating the secondary superblocks, in which case the new size
680          * is live already.
681          */
682         mp->m_generation++;
683         mutex_unlock(&mp->m_growlock);
684         return error;
685 }
686
687 int
688 xfs_growfs_log(
689         xfs_mount_t             *mp,
690         xfs_growfs_log_t        *in)
691 {
692         int error;
693
694         if (!capable(CAP_SYS_ADMIN))
695                 return -EPERM;
696         if (!mutex_trylock(&mp->m_growlock))
697                 return -EWOULDBLOCK;
698         error = xfs_growfs_log_private(mp, in);
699         mutex_unlock(&mp->m_growlock);
700         return error;
701 }
702
703 /*
704  * exported through ioctl XFS_IOC_FSCOUNTS
705  */
706
707 int
708 xfs_fs_counts(
709         xfs_mount_t             *mp,
710         xfs_fsop_counts_t       *cnt)
711 {
712         cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
713         cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
714         cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
715                                                 mp->m_alloc_set_aside;
716
717         spin_lock(&mp->m_sb_lock);
718         cnt->freertx = mp->m_sb.sb_frextents;
719         spin_unlock(&mp->m_sb_lock);
720         return 0;
721 }
722
723 /*
724  * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS
725  *
726  * xfs_reserve_blocks is called to set m_resblks
727  * in the in-core mount table. The number of unused reserved blocks
728  * is kept in m_resblks_avail.
729  *
730  * Reserve the requested number of blocks if available. Otherwise return
731  * as many as possible to satisfy the request. The actual number
732  * reserved are returned in outval
733  *
734  * A null inval pointer indicates that only the current reserved blocks
735  * available  should  be returned no settings are changed.
736  */
737
738 int
739 xfs_reserve_blocks(
740         xfs_mount_t             *mp,
741         uint64_t              *inval,
742         xfs_fsop_resblks_t      *outval)
743 {
744         int64_t                 lcounter, delta;
745         int64_t                 fdblks_delta = 0;
746         uint64_t                request;
747         int64_t                 free;
748         int                     error = 0;
749
750         /* If inval is null, report current values and return */
751         if (inval == (uint64_t *)NULL) {
752                 if (!outval)
753                         return -EINVAL;
754                 outval->resblks = mp->m_resblks;
755                 outval->resblks_avail = mp->m_resblks_avail;
756                 return 0;
757         }
758
759         request = *inval;
760
761         /*
762          * With per-cpu counters, this becomes an interesting problem. we need
763          * to work out if we are freeing or allocation blocks first, then we can
764          * do the modification as necessary.
765          *
766          * We do this under the m_sb_lock so that if we are near ENOSPC, we will
767          * hold out any changes while we work out what to do. This means that
768          * the amount of free space can change while we do this, so we need to
769          * retry if we end up trying to reserve more space than is available.
770          */
771         spin_lock(&mp->m_sb_lock);
772
773         /*
774          * If our previous reservation was larger than the current value,
775          * then move any unused blocks back to the free pool. Modify the resblks
776          * counters directly since we shouldn't have any problems unreserving
777          * space.
778          */
779         if (mp->m_resblks > request) {
780                 lcounter = mp->m_resblks_avail - request;
781                 if (lcounter  > 0) {            /* release unused blocks */
782                         fdblks_delta = lcounter;
783                         mp->m_resblks_avail -= lcounter;
784                 }
785                 mp->m_resblks = request;
786                 if (fdblks_delta) {
787                         spin_unlock(&mp->m_sb_lock);
788                         error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
789                         spin_lock(&mp->m_sb_lock);
790                 }
791
792                 goto out;
793         }
794
795         /*
796          * If the request is larger than the current reservation, reserve the
797          * blocks before we update the reserve counters. Sample m_fdblocks and
798          * perform a partial reservation if the request exceeds free space.
799          */
800         error = -ENOSPC;
801         do {
802                 free = percpu_counter_sum(&mp->m_fdblocks) -
803                                                 mp->m_alloc_set_aside;
804                 if (!free)
805                         break;
806
807                 delta = request - mp->m_resblks;
808                 lcounter = free - delta;
809                 if (lcounter < 0)
810                         /* We can't satisfy the request, just get what we can */
811                         fdblks_delta = free;
812                 else
813                         fdblks_delta = delta;
814
815                 /*
816                  * We'll either succeed in getting space from the free block
817                  * count or we'll get an ENOSPC. If we get a ENOSPC, it means
818                  * things changed while we were calculating fdblks_delta and so
819                  * we should try again to see if there is anything left to
820                  * reserve.
821                  *
822                  * Don't set the reserved flag here - we don't want to reserve
823                  * the extra reserve blocks from the reserve.....
824                  */
825                 spin_unlock(&mp->m_sb_lock);
826                 error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
827                 spin_lock(&mp->m_sb_lock);
828         } while (error == -ENOSPC);
829
830         /*
831          * Update the reserve counters if blocks have been successfully
832          * allocated.
833          */
834         if (!error && fdblks_delta) {
835                 mp->m_resblks += fdblks_delta;
836                 mp->m_resblks_avail += fdblks_delta;
837         }
838
839 out:
840         if (outval) {
841                 outval->resblks = mp->m_resblks;
842                 outval->resblks_avail = mp->m_resblks_avail;
843         }
844
845         spin_unlock(&mp->m_sb_lock);
846         return error;
847 }
848
849 int
850 xfs_fs_goingdown(
851         xfs_mount_t     *mp,
852         uint32_t        inflags)
853 {
854         switch (inflags) {
855         case XFS_FSOP_GOING_FLAGS_DEFAULT: {
856                 struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
857
858                 if (sb && !IS_ERR(sb)) {
859                         xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
860                         thaw_bdev(sb->s_bdev, sb);
861                 }
862
863                 break;
864         }
865         case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
866                 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
867                 break;
868         case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH:
869                 xfs_force_shutdown(mp,
870                                 SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
871                 break;
872         default:
873                 return -EINVAL;
874         }
875
876         return 0;
877 }
878
879 /*
880  * Force a shutdown of the filesystem instantly while keeping the filesystem
881  * consistent. We don't do an unmount here; just shutdown the shop, make sure
882  * that absolutely nothing persistent happens to this filesystem after this
883  * point.
884  */
885 void
886 xfs_do_force_shutdown(
887         xfs_mount_t     *mp,
888         int             flags,
889         char            *fname,
890         int             lnnum)
891 {
892         int             logerror;
893
894         logerror = flags & SHUTDOWN_LOG_IO_ERROR;
895
896         if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
897                 xfs_notice(mp,
898         "%s(0x%x) called from line %d of file %s.  Return address = "PTR_FMT,
899                         __func__, flags, lnnum, fname, __return_address);
900         }
901         /*
902          * No need to duplicate efforts.
903          */
904         if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
905                 return;
906
907         /*
908          * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
909          * queue up anybody new on the log reservations, and wakes up
910          * everybody who's sleeping on log reservations to tell them
911          * the bad news.
912          */
913         if (xfs_log_force_umount(mp, logerror))
914                 return;
915
916         if (flags & SHUTDOWN_CORRUPT_INCORE) {
917                 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
918     "Corruption of in-memory data detected.  Shutting down filesystem");
919                 if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
920                         xfs_stack_trace();
921         } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
922                 if (logerror) {
923                         xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
924                 "Log I/O Error Detected.  Shutting down filesystem");
925                 } else if (flags & SHUTDOWN_DEVICE_REQ) {
926                         xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
927                 "All device paths lost.  Shutting down filesystem");
928                 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
929                         xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
930                 "I/O Error Detected. Shutting down filesystem");
931                 }
932         }
933         if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
934                 xfs_alert(mp,
935         "Please umount the filesystem and rectify the problem(s)");
936         }
937 }
938
939 /*
940  * Reserve free space for per-AG metadata.
941  */
942 int
943 xfs_fs_reserve_ag_blocks(
944         struct xfs_mount        *mp)
945 {
946         xfs_agnumber_t          agno;
947         struct xfs_perag        *pag;
948         int                     error = 0;
949         int                     err2;
950
951         for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
952                 pag = xfs_perag_get(mp, agno);
953                 err2 = xfs_ag_resv_init(pag);
954                 xfs_perag_put(pag);
955                 if (err2 && !error)
956                         error = err2;
957         }
958
959         if (error && error != -ENOSPC) {
960                 xfs_warn(mp,
961         "Error %d reserving per-AG metadata reserve pool.", error);
962                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
963         }
964
965         return error;
966 }
967
968 /*
969  * Free space reserved for per-AG metadata.
970  */
971 int
972 xfs_fs_unreserve_ag_blocks(
973         struct xfs_mount        *mp)
974 {
975         xfs_agnumber_t          agno;
976         struct xfs_perag        *pag;
977         int                     error = 0;
978         int                     err2;
979
980         for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
981                 pag = xfs_perag_get(mp, agno);
982                 err2 = xfs_ag_resv_free(pag);
983                 xfs_perag_put(pag);
984                 if (err2 && !error)
985                         error = err2;
986         }
987
988         if (error)
989                 xfs_warn(mp,
990         "Error %d freeing per-AG metadata reserve pool.", error);
991
992         return error;
993 }