fs/xfs/xfs_fsops.c

   1 /*
   2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 #include "xfs.h"
  19 #include "xfs_fs.h"
  20 #include "xfs_shared.h"
  21 #include "xfs_format.h"
  22 #include "xfs_log_format.h"
  23 #include "xfs_trans_resv.h"
  24 #include "xfs_sb.h"
  25 #include "xfs_mount.h"
  26 #include "xfs_defer.h"
  27 #include "xfs_da_format.h"
  28 #include "xfs_da_btree.h"
  29 #include "xfs_inode.h"
  30 #include "xfs_trans.h"
  31 #include "xfs_inode_item.h"
  32 #include "xfs_error.h"
  33 #include "xfs_btree.h"
  34 #include "xfs_alloc_btree.h"
  35 #include "xfs_alloc.h"
  36 #include "xfs_rmap_btree.h"
  37 #include "xfs_ialloc.h"
  38 #include "xfs_fsops.h"
  39 #include "xfs_itable.h"
  40 #include "xfs_trans_space.h"
  41 #include "xfs_rtalloc.h"
  42 #include "xfs_trace.h"
  43 #include "xfs_log.h"
  44 #include "xfs_filestream.h"
  45 #include "xfs_rmap.h"
  46 #include "xfs_ag_resv.h"
  47
  48 /*
  49  * File system operations
  50  */
  51
  52 static struct xfs_buf *
  53 xfs_growfs_get_hdr_buf(
  54         struct xfs_mount        *mp,
  55         xfs_daddr_t             blkno,
  56         size_t                  numblks,
  57         int                     flags,
  58         const struct xfs_buf_ops *ops)
  59 {
  60         struct xfs_buf          *bp;
  61
  62         bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
  63         if (!bp)
  64                 return NULL;
  65
  66         xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
  67         bp->b_bn = blkno;
  68         bp->b_maps[0].bm_bn = blkno;
  69         bp->b_ops = ops;
  70
  71         return bp;
  72 }
  73
  74 /*
  75  * Write new AG headers to disk. Non-transactional, but written
  76  * synchronously so they are completed prior to the growfs transaction
  77  * being logged.
  78  */
  79 static int
  80 xfs_grow_ag_headers(
  81         struct xfs_mount        *mp,
  82         xfs_agnumber_t          agno,
  83         xfs_extlen_t            agsize,
  84         xfs_rfsblock_t          *nfree,
  85         struct list_head        *buffer_list)
  86 {
  87         struct xfs_agf          *agf;
  88         struct xfs_agi          *agi;
  89         struct xfs_agfl         *agfl;
  90         __be32                  *agfl_bno;
  91         xfs_alloc_rec_t         *arec;
  92         struct xfs_buf          *bp;
  93         int                     bucket;
  94         xfs_extlen_t            tmpsize;
  95         int                     error = 0;
  96
  97         /*
  98          * AG freespace header block
  99          */
 100         bp = xfs_growfs_get_hdr_buf(mp,
 101                         XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
 102                         XFS_FSS_TO_BB(mp, 1), 0,
 103                         &xfs_agf_buf_ops);
 104         if (!bp) {
 105                 error = -ENOMEM;
 106                 goto out_error;
 107         }
 108
 109         agf = XFS_BUF_TO_AGF(bp);
 110         agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
 111         agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
 112         agf->agf_seqno = cpu_to_be32(agno);
 113         agf->agf_length = cpu_to_be32(agsize);
 114         agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
 115         agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
 116         agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
 117         agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
 118         if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
 119                 agf->agf_roots[XFS_BTNUM_RMAPi] =
 120                                         cpu_to_be32(XFS_RMAP_BLOCK(mp));
 121                 agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
 122                 agf->agf_rmap_blocks = cpu_to_be32(1);
 123         }
 124
 125         agf->agf_flfirst = cpu_to_be32(1);
 126         agf->agf_fllast = 0;
 127         agf->agf_flcount = 0;
 128         tmpsize = agsize - mp->m_ag_prealloc_blocks;
 129         agf->agf_freeblks = cpu_to_be32(tmpsize);
 130         agf->agf_longest = cpu_to_be32(tmpsize);
 131         if (xfs_sb_version_hascrc(&mp->m_sb))
 132                 uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
 133         if (xfs_sb_version_hasreflink(&mp->m_sb)) {
 134                 agf->agf_refcount_root = cpu_to_be32(
 135                                 xfs_refc_block(mp));
 136                 agf->agf_refcount_level = cpu_to_be32(1);
 137                 agf->agf_refcount_blocks = cpu_to_be32(1);
 138         }
 139         xfs_buf_delwri_queue(bp, buffer_list);
 140         xfs_buf_relse(bp);
 141
 142         /*
 143          * AG freelist header block
 144          */
 145         bp = xfs_growfs_get_hdr_buf(mp,
 146                         XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 147                         XFS_FSS_TO_BB(mp, 1), 0,
 148                         &xfs_agfl_buf_ops);
 149         if (!bp) {
 150                 error = -ENOMEM;
 151                 goto out_error;
 152         }
 153
 154         agfl = XFS_BUF_TO_AGFL(bp);
 155         if (xfs_sb_version_hascrc(&mp->m_sb)) {
 156                 agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
 157                 agfl->agfl_seqno = cpu_to_be32(agno);
 158                 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
 159         }
 160
 161         agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
 162         for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
 163                 agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
 164
 165         xfs_buf_delwri_queue(bp, buffer_list);
 166         xfs_buf_relse(bp);
 167
 168         /*
 169          * AG inode header block
 170          */
 171         bp = xfs_growfs_get_hdr_buf(mp,
 172                         XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
 173                         XFS_FSS_TO_BB(mp, 1), 0,
 174                         &xfs_agi_buf_ops);
 175         if (!bp) {
 176                 error = -ENOMEM;
 177                 goto out_error;
 178         }
 179
 180         agi = XFS_BUF_TO_AGI(bp);
 181         agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
 182         agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
 183         agi->agi_seqno = cpu_to_be32(agno);
 184         agi->agi_length = cpu_to_be32(agsize);
 185         agi->agi_count = 0;
 186         agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
 187         agi->agi_level = cpu_to_be32(1);
 188         agi->agi_freecount = 0;
 189         agi->agi_newino = cpu_to_be32(NULLAGINO);
 190         agi->agi_dirino = cpu_to_be32(NULLAGINO);
 191         if (xfs_sb_version_hascrc(&mp->m_sb))
 192                 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
 193         if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
 194                 agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
 195                 agi->agi_free_level = cpu_to_be32(1);
 196         }
 197         for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
 198                 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
 199
 200         xfs_buf_delwri_queue(bp, buffer_list);
 201         xfs_buf_relse(bp);
 202
 203         /*
 204          * BNO btree root block
 205          */
 206         bp = xfs_growfs_get_hdr_buf(mp,
 207                         XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
 208                         BTOBB(mp->m_sb.sb_blocksize), 0,
 209                         &xfs_allocbt_buf_ops);
 210
 211         if (!bp) {
 212                 error = -ENOMEM;
 213                 goto out_error;
 214         }
 215
 216         xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, agno, 0);
 217
 218         arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
 219         arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
 220         arec->ar_blockcount = cpu_to_be32(
 221                 agsize - be32_to_cpu(arec->ar_startblock));
 222
 223         xfs_buf_delwri_queue(bp, buffer_list);
 224         xfs_buf_relse(bp);
 225
 226         /*
 227          * CNT btree root block
 228          */
 229         bp = xfs_growfs_get_hdr_buf(mp,
 230                         XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
 231                         BTOBB(mp->m_sb.sb_blocksize), 0,
 232                         &xfs_allocbt_buf_ops);
 233         if (!bp) {
 234                 error = -ENOMEM;
 235                 goto out_error;
 236         }
 237
 238         xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, agno, 0);
 239
 240         arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
 241         arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
 242         arec->ar_blockcount = cpu_to_be32(
 243                 agsize - be32_to_cpu(arec->ar_startblock));
 244         *nfree += be32_to_cpu(arec->ar_blockcount);
 245
 246         xfs_buf_delwri_queue(bp, buffer_list);
 247         xfs_buf_relse(bp);
 248
 249         /* RMAP btree root block */
 250         if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
 251                 struct xfs_rmap_rec     *rrec;
 252                 struct xfs_btree_block  *block;
 253
 254                 bp = xfs_growfs_get_hdr_buf(mp,
 255                         XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
 256                         BTOBB(mp->m_sb.sb_blocksize), 0,
 257                         &xfs_rmapbt_buf_ops);
 258                 if (!bp) {
 259                         error = -ENOMEM;
 260                         goto out_error;
 261                 }
 262
 263                 xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 0,
 264                                         agno, 0);
 265                 block = XFS_BUF_TO_BLOCK(bp);
 266
 267
 268                 /*
 269                  * mark the AG header regions as static metadata The BNO
 270                  * btree block is the first block after the headers, so
 271                  * it's location defines the size of region the static
 272                  * metadata consumes.
 273                  *
 274                  * Note: unlike mkfs, we never have to account for log
 275                  * space when growing the data regions
 276                  */
 277                 rrec = XFS_RMAP_REC_ADDR(block, 1);
 278                 rrec->rm_startblock = 0;
 279                 rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
 280                 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
 281                 rrec->rm_offset = 0;
 282                 be16_add_cpu(&block->bb_numrecs, 1);
 283
 284                 /* account freespace btree root blocks */
 285                 rrec = XFS_RMAP_REC_ADDR(block, 2);
 286                 rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
 287                 rrec->rm_blockcount = cpu_to_be32(2);
 288                 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
 289                 rrec->rm_offset = 0;
 290                 be16_add_cpu(&block->bb_numrecs, 1);
 291
 292                 /* account inode btree root blocks */
 293                 rrec = XFS_RMAP_REC_ADDR(block, 3);
 294                 rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
 295                 rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
 296                                                 XFS_IBT_BLOCK(mp));
 297                 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
 298                 rrec->rm_offset = 0;
 299                 be16_add_cpu(&block->bb_numrecs, 1);
 300
 301                 /* account for rmap btree root */
 302                 rrec = XFS_RMAP_REC_ADDR(block, 4);
 303                 rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
 304                 rrec->rm_blockcount = cpu_to_be32(1);
 305                 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
 306                 rrec->rm_offset = 0;
 307                 be16_add_cpu(&block->bb_numrecs, 1);
 308
 309                 /* account for refc btree root */
 310                 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
 311                         rrec = XFS_RMAP_REC_ADDR(block, 5);
 312                         rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp));
 313                         rrec->rm_blockcount = cpu_to_be32(1);
 314                         rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
 315                         rrec->rm_offset = 0;
 316                         be16_add_cpu(&block->bb_numrecs, 1);
 317                 }
 318
 319                 xfs_buf_delwri_queue(bp, buffer_list);
 320                 xfs_buf_relse(bp);
 321         }
 322
 323         /*
 324          * INO btree root block
 325          */
 326         bp = xfs_growfs_get_hdr_buf(mp,
 327                         XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
 328                         BTOBB(mp->m_sb.sb_blocksize), 0,
 329                         &xfs_inobt_buf_ops);
 330         if (!bp) {
 331                 error = -ENOMEM;
 332                 goto out_error;
 333         }
 334
 335         xfs_btree_init_block(mp, bp, XFS_BTNUM_INO , 0, 0, agno, 0);
 336         xfs_buf_delwri_queue(bp, buffer_list);
 337         xfs_buf_relse(bp);
 338
 339         /*
 340          * FINO btree root block
 341          */
 342         if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
 343                 bp = xfs_growfs_get_hdr_buf(mp,
 344                         XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
 345                         BTOBB(mp->m_sb.sb_blocksize), 0,
 346                         &xfs_inobt_buf_ops);
 347                 if (!bp) {
 348                         error = -ENOMEM;
 349                         goto out_error;
 350                 }
 351
 352                 xfs_btree_init_block(mp, bp, XFS_BTNUM_FINO, 0, 0, agno, 0);
 353                 xfs_buf_delwri_queue(bp, buffer_list);
 354                 xfs_buf_relse(bp);
 355         }
 356
 357         /*
 358          * refcount btree root block
 359          */
 360         if (xfs_sb_version_hasreflink(&mp->m_sb)) {
 361                 bp = xfs_growfs_get_hdr_buf(mp,
 362                         XFS_AGB_TO_DADDR(mp, agno, xfs_refc_block(mp)),
 363                         BTOBB(mp->m_sb.sb_blocksize), 0,
 364                         &xfs_refcountbt_buf_ops);
 365                 if (!bp) {
 366                         error = -ENOMEM;
 367                         goto out_error;
 368                 }
 369
 370                 xfs_btree_init_block(mp, bp, XFS_BTNUM_REFC, 0, 0, agno, 0);
 371                 xfs_buf_delwri_queue(bp, buffer_list);
 372                 xfs_buf_relse(bp);
 373         }
 374
 375 out_error:
 376         return error;
 377 }
 378
 379 static int
 380 xfs_growfs_data_private(
 381         xfs_mount_t             *mp,            /* mount point for filesystem */
 382         xfs_growfs_data_t       *in)            /* growfs data input struct */
 383 {
 384         xfs_agf_t               *agf;
 385         xfs_agi_t               *agi;
 386         xfs_agnumber_t          agno;
 387         xfs_extlen_t            agsize;
 388         xfs_buf_t               *bp;
 389         int                     dpct;
 390         int                     error, saved_error = 0;
 391         xfs_agnumber_t          nagcount;
 392         xfs_agnumber_t          nagimax = 0;
 393         xfs_rfsblock_t          nb, nb_mod;
 394         xfs_rfsblock_t          new;
 395         xfs_rfsblock_t          nfree;
 396         xfs_agnumber_t          oagcount;
 397         int                     pct;
 398         xfs_trans_t             *tp;
 399         LIST_HEAD               (buffer_list);
 400
 401         nb = in->newblocks;
 402         pct = in->imaxpct;
 403         if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
 404                 return -EINVAL;
 405         if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
 406                 return error;
 407         dpct = pct - mp->m_sb.sb_imax_pct;
 408         error = xfs_buf_read_uncached(mp->m_ddev_targp,
 409                                 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
 410                                 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
 411         if (error)
 412                 return error;
 413         xfs_buf_relse(bp);
 414
 415         new = nb;       /* use new as a temporary here */
 416         nb_mod = do_div(new, mp->m_sb.sb_agblocks);
 417         nagcount = new + (nb_mod != 0);
 418         if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
 419                 nagcount--;
 420                 nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
 421                 if (nb < mp->m_sb.sb_dblocks)
 422                         return -EINVAL;
 423         }
 424         new = nb - mp->m_sb.sb_dblocks;
 425         oagcount = mp->m_sb.sb_agcount;
 426
 427         /* allocate the new per-ag structures */
 428         if (nagcount > oagcount) {
 429                 error = xfs_initialize_perag(mp, nagcount, &nagimax);
 430                 if (error)
 431                         return error;
 432         }
 433
 434         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
 435                         XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
 436         if (error)
 437                 return error;
 438
 439         /*
 440          * Write new AG headers to disk. Non-transactional, but need to be
 441          * written and completed prior to the growfs transaction being logged.
 442          * To do this, we use a delayed write buffer list and wait for
 443          * submission and IO completion of the list as a whole. This allows the
 444          * IO subsystem to merge all the AG headers in a single AG into a single
 445          * IO and hide most of the latency of the IO from us.
 446          *
 447          * This also means that if we get an error whilst building the buffer
 448          * list to write, we can cancel the entire list without having written
 449          * anything.
 450          */
 451         nfree = 0;
 452         for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
 453
 454                 if (agno == nagcount - 1)
 455                         agsize = nb -
 456                                 (agno * (xfs_rfsblock_t)mp->m_sb.sb_agblocks);
 457                 else
 458                         agsize = mp->m_sb.sb_agblocks;
 459
 460                 error = xfs_grow_ag_headers(mp, agno, agsize, &nfree,
 461                                             &buffer_list);
 462                 if (error) {
 463                         xfs_buf_delwri_cancel(&buffer_list);
 464                         goto error0;
 465                 }
 466         }
 467         error = xfs_buf_delwri_submit(&buffer_list);
 468         if (error)
 469                 goto error0;
 470
 471         xfs_trans_agblocks_delta(tp, nfree);
 472
 473         /*
 474          * There are new blocks in the old last a.g.
 475          */
 476         if (new) {
 477                 struct xfs_owner_info   oinfo;
 478
 479                 /*
 480                  * Change the agi length.
 481                  */
 482                 error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
 483                 if (error) {
 484                         goto error0;
 485                 }
 486                 ASSERT(bp);
 487                 agi = XFS_BUF_TO_AGI(bp);
 488                 be32_add_cpu(&agi->agi_length, new);
 489                 ASSERT(nagcount == oagcount ||
 490                        be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
 491                 xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
 492                 /*
 493                  * Change agf length.
 494                  */
 495                 error = xfs_alloc_read_agf(mp, tp, agno, 0, &bp);
 496                 if (error) {
 497                         goto error0;
 498                 }
 499                 ASSERT(bp);
 500                 agf = XFS_BUF_TO_AGF(bp);
 501                 be32_add_cpu(&agf->agf_length, new);
 502                 ASSERT(be32_to_cpu(agf->agf_length) ==
 503                        be32_to_cpu(agi->agi_length));
 504
 505                 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
 506
 507                 /*
 508                  * Free the new space.
 509                  *
 510                  * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
 511                  * this doesn't actually exist in the rmap btree.
 512                  */
 513                 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
 514                 error = xfs_rmap_free(tp, bp, agno,
 515                                 be32_to_cpu(agf->agf_length) - new,
 516                                 new, &oinfo);
 517                 if (error)
 518                         goto error0;
 519                 error = xfs_free_extent(tp,
 520                                 XFS_AGB_TO_FSB(mp, agno,
 521                                         be32_to_cpu(agf->agf_length) - new),
 522                                 new, &oinfo, XFS_AG_RESV_NONE);
 523                 if (error)
 524                         goto error0;
 525         }
 526
 527         /*
 528          * Update changed superblock fields transactionally. These are not
 529          * seen by the rest of the world until the transaction commit applies
 530          * them atomically to the superblock.
 531          */
 532         if (nagcount > oagcount)
 533                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
 534         if (nb > mp->m_sb.sb_dblocks)
 535                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS,
 536                                  nb - mp->m_sb.sb_dblocks);
 537         if (nfree)
 538                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree);
 539         if (dpct)
 540                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
 541         xfs_trans_set_sync(tp);
 542         error = xfs_trans_commit(tp);
 543         if (error)
 544                 return error;
 545
 546         /* New allocation groups fully initialized, so update mount struct */
 547         if (nagimax)
 548                 mp->m_maxagi = nagimax;
 549         if (mp->m_sb.sb_imax_pct) {
 550                 uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
 551                 do_div(icount, 100);
 552                 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
 553         } else
 554                 mp->m_maxicount = 0;
 555         xfs_set_low_space_thresholds(mp);
 556         mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
 557
 558         /*
 559          * If we expanded the last AG, free the per-AG reservation
 560          * so we can reinitialize it with the new size.
 561          */
 562         if (new) {
 563                 struct xfs_perag        *pag;
 564
 565                 pag = xfs_perag_get(mp, agno);
 566                 error = xfs_ag_resv_free(pag);
 567                 xfs_perag_put(pag);
 568                 if (error)
 569                         goto out;
 570         }
 571
 572         /* Reserve AG metadata blocks. */
 573         error = xfs_fs_reserve_ag_blocks(mp);
 574         if (error && error != -ENOSPC)
 575                 goto out;
 576
 577         /* update secondary superblocks. */
 578         for (agno = 1; agno < nagcount; agno++) {
 579                 error = 0;
 580                 /*
 581                  * new secondary superblocks need to be zeroed, not read from
 582                  * disk as the contents of the new area we are growing into is
 583                  * completely unknown.
 584                  */
 585                 if (agno < oagcount) {
 586                         error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
 587                                   XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
 588                                   XFS_FSS_TO_BB(mp, 1), 0, &bp,
 589                                   &xfs_sb_buf_ops);
 590                 } else {
 591                         bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
 592                                   XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
 593                                   XFS_FSS_TO_BB(mp, 1), 0);
 594                         if (bp) {
 595                                 bp->b_ops = &xfs_sb_buf_ops;
 596                                 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
 597                         } else
 598                                 error = -ENOMEM;
 599                 }
 600
 601                 /*
 602                  * If we get an error reading or writing alternate superblocks,
 603                  * continue.  xfs_repair chooses the "best" superblock based
 604                  * on most matches; if we break early, we'll leave more
 605                  * superblocks un-updated than updated, and xfs_repair may
 606                  * pick them over the properly-updated primary.
 607                  */
 608                 if (error) {
 609                         xfs_warn(mp,
 610                 "error %d reading secondary superblock for ag %d",
 611                                 error, agno);
 612                         saved_error = error;
 613                         continue;
 614                 }
 615                 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
 616
 617                 error = xfs_bwrite(bp);
 618                 xfs_buf_relse(bp);
 619                 if (error) {
 620                         xfs_warn(mp,
 621                 "write error %d updating secondary superblock for ag %d",
 622                                 error, agno);
 623                         saved_error = error;
 624                         continue;
 625                 }
 626         }
 627
 628  out:
 629         return saved_error ? saved_error : error;
 630
 631  error0:
 632         xfs_trans_cancel(tp);
 633         return error;
 634 }
 635
 636 static int
 637 xfs_growfs_log_private(
 638         xfs_mount_t             *mp,    /* mount point for filesystem */
 639         xfs_growfs_log_t        *in)    /* growfs log input struct */
 640 {
 641         xfs_extlen_t            nb;
 642
 643         nb = in->newblocks;
 644         if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
 645                 return -EINVAL;
 646         if (nb == mp->m_sb.sb_logblocks &&
 647             in->isint == (mp->m_sb.sb_logstart != 0))
 648                 return -EINVAL;
 649         /*
 650          * Moving the log is hard, need new interfaces to sync
 651          * the log first, hold off all activity while moving it.
 652          * Can have shorter or longer log in the same space,
 653          * or transform internal to external log or vice versa.
 654          */
 655         return -ENOSYS;
 656 }
 657
 658 /*
 659  * protected versions of growfs function acquire and release locks on the mount
 660  * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG,
 661  * XFS_IOC_FSGROWFSRT
 662  */
 663
 664
 665 int
 666 xfs_growfs_data(
 667         xfs_mount_t             *mp,
 668         xfs_growfs_data_t       *in)
 669 {
 670         int error;
 671
 672         if (!capable(CAP_SYS_ADMIN))
 673                 return -EPERM;
 674         if (!mutex_trylock(&mp->m_growlock))
 675                 return -EWOULDBLOCK;
 676         error = xfs_growfs_data_private(mp, in);
 677         /*
 678          * Increment the generation unconditionally, the error could be from
 679          * updating the secondary superblocks, in which case the new size
 680          * is live already.
 681          */
 682         mp->m_generation++;
 683         mutex_unlock(&mp->m_growlock);
 684         return error;
 685 }
 686
 687 int
 688 xfs_growfs_log(
 689         xfs_mount_t             *mp,
 690         xfs_growfs_log_t        *in)
 691 {
 692         int error;
 693
 694         if (!capable(CAP_SYS_ADMIN))
 695                 return -EPERM;
 696         if (!mutex_trylock(&mp->m_growlock))
 697                 return -EWOULDBLOCK;
 698         error = xfs_growfs_log_private(mp, in);
 699         mutex_unlock(&mp->m_growlock);
 700         return error;
 701 }
 702
 703 /*
 704  * exported through ioctl XFS_IOC_FSCOUNTS
 705  */
 706
 707 int
 708 xfs_fs_counts(
 709         xfs_mount_t             *mp,
 710         xfs_fsop_counts_t       *cnt)
 711 {
 712         cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
 713         cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
 714         cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
 715                                                 mp->m_alloc_set_aside;
 716
 717         spin_lock(&mp->m_sb_lock);
 718         cnt->freertx = mp->m_sb.sb_frextents;
 719         spin_unlock(&mp->m_sb_lock);
 720         return 0;
 721 }
 722
 723 /*
 724  * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS
 725  *
 726  * xfs_reserve_blocks is called to set m_resblks
 727  * in the in-core mount table. The number of unused reserved blocks
 728  * is kept in m_resblks_avail.
 729  *
 730  * Reserve the requested number of blocks if available. Otherwise return
 731  * as many as possible to satisfy the request. The actual number
 732  * reserved are returned in outval
 733  *
 734  * A null inval pointer indicates that only the current reserved blocks
 735  * available  should  be returned no settings are changed.
 736  */
 737
 738 int
 739 xfs_reserve_blocks(
 740         xfs_mount_t             *mp,
 741         uint64_t              *inval,
 742         xfs_fsop_resblks_t      *outval)
 743 {
 744         int64_t                 lcounter, delta;
 745         int64_t                 fdblks_delta = 0;
 746         uint64_t                request;
 747         int64_t                 free;
 748         int                     error = 0;
 749
 750         /* If inval is null, report current values and return */
 751         if (inval == (uint64_t *)NULL) {
 752                 if (!outval)
 753                         return -EINVAL;
 754                 outval->resblks = mp->m_resblks;
 755                 outval->resblks_avail = mp->m_resblks_avail;
 756                 return 0;
 757         }
 758
 759         request = *inval;
 760
 761         /*
 762          * With per-cpu counters, this becomes an interesting problem. we need
 763          * to work out if we are freeing or allocation blocks first, then we can
 764          * do the modification as necessary.
 765          *
 766          * We do this under the m_sb_lock so that if we are near ENOSPC, we will
 767          * hold out any changes while we work out what to do. This means that
 768          * the amount of free space can change while we do this, so we need to
 769          * retry if we end up trying to reserve more space than is available.
 770          */
 771         spin_lock(&mp->m_sb_lock);
 772
 773         /*
 774          * If our previous reservation was larger than the current value,
 775          * then move any unused blocks back to the free pool. Modify the resblks
 776          * counters directly since we shouldn't have any problems unreserving
 777          * space.
 778          */
 779         if (mp->m_resblks > request) {
 780                 lcounter = mp->m_resblks_avail - request;
 781                 if (lcounter  > 0) {            /* release unused blocks */
 782                         fdblks_delta = lcounter;
 783                         mp->m_resblks_avail -= lcounter;
 784                 }
 785                 mp->m_resblks = request;
 786                 if (fdblks_delta) {
 787                         spin_unlock(&mp->m_sb_lock);
 788                         error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
 789                         spin_lock(&mp->m_sb_lock);
 790                 }
 791
 792                 goto out;
 793         }
 794
 795         /*
 796          * If the request is larger than the current reservation, reserve the
 797          * blocks before we update the reserve counters. Sample m_fdblocks and
 798          * perform a partial reservation if the request exceeds free space.
 799          */
 800         error = -ENOSPC;
 801         do {
 802                 free = percpu_counter_sum(&mp->m_fdblocks) -
 803                                                 mp->m_alloc_set_aside;
 804                 if (!free)
 805                         break;
 806
 807                 delta = request - mp->m_resblks;
 808                 lcounter = free - delta;
 809                 if (lcounter < 0)
 810                         /* We can't satisfy the request, just get what we can */
 811                         fdblks_delta = free;
 812                 else
 813                         fdblks_delta = delta;
 814
 815                 /*
 816                  * We'll either succeed in getting space from the free block
 817                  * count or we'll get an ENOSPC. If we get a ENOSPC, it means
 818                  * things changed while we were calculating fdblks_delta and so
 819                  * we should try again to see if there is anything left to
 820                  * reserve.
 821                  *
 822                  * Don't set the reserved flag here - we don't want to reserve
 823                  * the extra reserve blocks from the reserve.....
 824                  */
 825                 spin_unlock(&mp->m_sb_lock);
 826                 error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
 827                 spin_lock(&mp->m_sb_lock);
 828         } while (error == -ENOSPC);
 829
 830         /*
 831          * Update the reserve counters if blocks have been successfully
 832          * allocated.
 833          */
 834         if (!error && fdblks_delta) {
 835                 mp->m_resblks += fdblks_delta;
 836                 mp->m_resblks_avail += fdblks_delta;
 837         }
 838
 839 out:
 840         if (outval) {
 841                 outval->resblks = mp->m_resblks;
 842                 outval->resblks_avail = mp->m_resblks_avail;
 843         }
 844
 845         spin_unlock(&mp->m_sb_lock);
 846         return error;
 847 }
 848
 849 int
 850 xfs_fs_goingdown(
 851         xfs_mount_t     *mp,
 852         uint32_t        inflags)
 853 {
 854         switch (inflags) {
 855         case XFS_FSOP_GOING_FLAGS_DEFAULT: {
 856                 struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
 857
 858                 if (sb && !IS_ERR(sb)) {
 859                         xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
 860                         thaw_bdev(sb->s_bdev, sb);
 861                 }
 862
 863                 break;
 864         }
 865         case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
 866                 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
 867                 break;
 868         case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH:
 869                 xfs_force_shutdown(mp,
 870                                 SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
 871                 break;
 872         default:
 873                 return -EINVAL;
 874         }
 875
 876         return 0;
 877 }
 878
 879 /*
 880  * Force a shutdown of the filesystem instantly while keeping the filesystem
 881  * consistent. We don't do an unmount here; just shutdown the shop, make sure
 882  * that absolutely nothing persistent happens to this filesystem after this
 883  * point.
 884  */
 885 void
 886 xfs_do_force_shutdown(
 887         xfs_mount_t     *mp,
 888         int             flags,
 889         char            *fname,
 890         int             lnnum)
 891 {
 892         int             logerror;
 893
 894         logerror = flags & SHUTDOWN_LOG_IO_ERROR;
 895
 896         if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
 897                 xfs_notice(mp,
 898         "%s(0x%x) called from line %d of file %s.  Return address = "PTR_FMT,
 899                         __func__, flags, lnnum, fname, __return_address);
 900         }
 901         /*
 902          * No need to duplicate efforts.
 903          */
 904         if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
 905                 return;
 906
 907         /*
 908          * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
 909          * queue up anybody new on the log reservations, and wakes up
 910          * everybody who's sleeping on log reservations to tell them
 911          * the bad news.
 912          */
 913         if (xfs_log_force_umount(mp, logerror))
 914                 return;
 915
 916         if (flags & SHUTDOWN_CORRUPT_INCORE) {
 917                 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
 918     "Corruption of in-memory data detected.  Shutting down filesystem");
 919                 if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
 920                         xfs_stack_trace();
 921         } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
 922                 if (logerror) {
 923                         xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
 924                 "Log I/O Error Detected.  Shutting down filesystem");
 925                 } else if (flags & SHUTDOWN_DEVICE_REQ) {
 926                         xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
 927                 "All device paths lost.  Shutting down filesystem");
 928                 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
 929                         xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
 930                 "I/O Error Detected. Shutting down filesystem");
 931                 }
 932         }
 933         if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
 934                 xfs_alert(mp,
 935         "Please umount the filesystem and rectify the problem(s)");
 936         }
 937 }
 938
 939 /*
 940  * Reserve free space for per-AG metadata.
 941  */
 942 int
 943 xfs_fs_reserve_ag_blocks(
 944         struct xfs_mount        *mp)
 945 {
 946         xfs_agnumber_t          agno;
 947         struct xfs_perag        *pag;
 948         int                     error = 0;
 949         int                     err2;
 950
 951         for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
 952                 pag = xfs_perag_get(mp, agno);
 953                 err2 = xfs_ag_resv_init(pag);
 954                 xfs_perag_put(pag);
 955                 if (err2 && !error)
 956                         error = err2;
 957         }
 958
 959         if (error && error != -ENOSPC) {
 960                 xfs_warn(mp,
 961         "Error %d reserving per-AG metadata reserve pool.", error);
 962                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 963         }
 964
 965         return error;
 966 }
 967
 968 /*
 969  * Free space reserved for per-AG metadata.
 970  */
 971 int
 972 xfs_fs_unreserve_ag_blocks(
 973         struct xfs_mount        *mp)
 974 {
 975         xfs_agnumber_t          agno;
 976         struct xfs_perag        *pag;
 977         int                     error = 0;
 978         int                     err2;
 979
 980         for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
 981                 pag = xfs_perag_get(mp, agno);
 982                 err2 = xfs_ag_resv_free(pag);
 983                 xfs_perag_put(pag);
 984                 if (err2 && !error)
 985                         error = err2;
 986         }
 987
 988         if (error)
 989                 xfs_warn(mp,
 990         "Error %d freeing per-AG metadata reserve pool.", error);
 991
 992         return error;
 993 }