Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Jan 2013 18:55:21 +0000 (10:55 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Jan 2013 18:55:21 +0000 (10:55 -0800)
Pull btrfs fixes from Chris Mason:
 "It turns out that we had two crc bugs when running fsx-linux in a
  loop.  Many thanks to Josef, Miao Xie, and Dave Sterba for nailing it
  all down.  Miao also has a new OOM fix in this v2 pull as well.

  Ilya fixed a regression Liu Bo found in the balance ioctls for pausing
  and resuming a running balance across drives.

  Josef's orphan truncate patch fixes an obscure corruption we'd see
  during xfstests.

  Arne's patches address problems with subvolume quotas.  If the user
  destroys quota groups incorrectly the FS will refuse to mount.

  The rest are smaller fixes and plugs for memory leaks."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (30 commits)
  Btrfs: fix repeated delalloc work allocation
  Btrfs: fix wrong max device number for single profile
  Btrfs: fix missed transaction->aborted check
  Btrfs: Add ACCESS_ONCE() to transaction->abort accesses
  Btrfs: put csums on the right ordered extent
  Btrfs: use right range to find checksum for compressed extents
  Btrfs: fix panic when recovering tree log
  Btrfs: do not allow logged extents to be merged or removed
  Btrfs: fix a regression in balance usage filter
  Btrfs: prevent qgroup destroy when there are still relations
  Btrfs: ignore orphan qgroup relations
  Btrfs: reorder locks and sanity checks in btrfs_ioctl_defrag
  Btrfs: fix unlock order in btrfs_ioctl_rm_dev
  Btrfs: fix unlock order in btrfs_ioctl_resize
  Btrfs: fix "mutually exclusive op is running" error code
  Btrfs: bring back balance pause/resume logic
  btrfs: update timestamps on truncate()
  btrfs: fix btrfs_cont_expand() freeing IS_ERR em
  Btrfs: fix a bug when llseek for delalloc bytes behind prealloc extents
  Btrfs: fix off-by-one in lseek
  ...

1  2 
fs/btrfs/extent-tree.c
fs/btrfs/extent_map.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c

diff --combined fs/btrfs/extent-tree.c
@@@ -3898,7 -3898,7 +3898,7 @@@ static int flush_space(struct btrfs_roo
   * @root - the root we're allocating for
   * @block_rsv - the block_rsv we're allocating for
   * @orig_bytes - the number of bytes we want
 - * @flush - wether or not we can flush to make our reservation
 + * @flush - whether or not we can flush to make our reservation
   *
   * This will reserve orgi_bytes number of bytes from the space info associated
   * with the block_rsv.  If there is not enough space it will make an attempt to
@@@ -3997,7 -3997,7 +3997,7 @@@ again
         * We make the other tasks wait for the flush only when we can flush
         * all things.
         */
-       if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) {
+       if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
                flushing = true;
                space_info->flush = 1;
        }
@@@ -5560,7 -5560,7 +5560,7 @@@ static noinline int find_free_extent(st
        int empty_cluster = 2 * 1024 * 1024;
        struct btrfs_space_info *space_info;
        int loop = 0;
-       int index = 0;
+       int index = __get_raid_index(data);
        int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
                RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
        bool found_uncached_bg = false;
@@@ -6788,11 -6788,13 +6788,13 @@@ static noinline int walk_up_proc(struc
                                                       &wc->flags[level]);
                        if (ret < 0) {
                                btrfs_tree_unlock_rw(eb, path->locks[level]);
+                               path->locks[level] = 0;
                                return ret;
                        }
                        BUG_ON(wc->refs[level] == 0);
                        if (wc->refs[level] == 1) {
                                btrfs_tree_unlock_rw(eb, path->locks[level]);
+                               path->locks[level] = 0;
                                return 1;
                        }
                }
diff --combined fs/btrfs/extent_map.c
@@@ -171,6 -171,10 +171,10 @@@ static int mergable_maps(struct extent_
        if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
                return 0;
  
+       if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
+           test_bit(EXTENT_FLAG_LOGGING, &next->flags))
+               return 0;
        if (extent_map_end(prev) == next->start &&
            prev->flags == next->flags &&
            prev->bdev == next->bdev &&
@@@ -230,11 -234,12 +234,11 @@@ static void try_merge_map(struct extent
  }
  
  /**
 - * unpint_extent_cache - unpin an extent from the cache
 + * unpin_extent_cache - unpin an extent from the cache
   * @tree:     tree to unpin the extent in
   * @start:    logical offset in the file
   * @len:      length of the extent
   * @gen:      generation that this extent has been modified in
 - * @prealloc: if this is set we need to clear the prealloc flag
   *
   * Called after an extent has been written to disk properly.  Set the generation
   * to the generation that actually added the file item to the inode so we know
@@@ -255,7 -260,8 +259,8 @@@ int unpin_extent_cache(struct extent_ma
        if (!em)
                goto out;
  
-       list_move(&em->list, &tree->modified_extents);
+       if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
+               list_move(&em->list, &tree->modified_extents);
        em->generation = gen;
        clear_bit(EXTENT_FLAG_PINNED, &em->flags);
        em->mod_start = em->start;
@@@ -280,6 -286,12 +285,12 @@@ out
  
  }
  
+ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
+ {
+       clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
+       try_merge_map(tree, em);
+ }
  /**
   * add_extent_mapping - add new extent map to the extent tree
   * @tree:     tree to insert new map in
diff --combined fs/btrfs/file.c
@@@ -1412,7 -1412,8 +1412,7 @@@ static noinline ssize_t __btrfs_buffere
  
                cond_resched();
  
 -              balance_dirty_pages_ratelimited_nr(inode->i_mapping,
 -                                                 dirty_pages);
 +              balance_dirty_pages_ratelimited(inode->i_mapping);
                if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
                        btrfs_btree_balance_dirty(root);
  
@@@ -2224,7 -2225,7 +2224,7 @@@ out
        return ret;
  }
  
 -static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
 +static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_map *em;
        if (lockend <= lockstart)
                lockend = lockstart + root->sectorsize;
  
+       lockend--;
        len = lockend - lockstart + 1;
  
        len = max_t(u64, len, root->sectorsize);
         * before the position we want in case there is outstanding delalloc
         * going on here.
         */
 -      if (origin == SEEK_HOLE && start != 0) {
 +      if (whence == SEEK_HOLE && start != 0) {
                if (start <= root->sectorsize)
                        em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
                                                     root->sectorsize, 0);
                                }
                        }
  
 -                      if (origin == SEEK_HOLE) {
 +                      if (whence == SEEK_HOLE) {
                                *offset = start;
                                free_extent_map(em);
                                break;
                        }
                } else {
 -                      if (origin == SEEK_DATA) {
 +                      if (whence == SEEK_DATA) {
                                if (em->block_start == EXTENT_MAP_DELALLOC) {
                                        if (start >= inode->i_size) {
                                                free_extent_map(em);
                                        }
                                }
  
-                               *offset = start;
-                               free_extent_map(em);
-                               break;
+                               if (!test_bit(EXTENT_FLAG_PREALLOC,
+                                             &em->flags)) {
+                                       *offset = start;
+                                       free_extent_map(em);
+                                       break;
+                               }
                        }
                }
  
        return ret;
  }
  
 -static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
 +static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
  {
        struct inode *inode = file->f_mapping->host;
        int ret;
  
        mutex_lock(&inode->i_mutex);
 -      switch (origin) {
 +      switch (whence) {
        case SEEK_END:
        case SEEK_CUR:
 -              offset = generic_file_llseek(file, offset, origin);
 +              offset = generic_file_llseek(file, offset, whence);
                goto out;
        case SEEK_DATA:
        case SEEK_HOLE:
                        return -ENXIO;
                }
  
 -              ret = find_desired_extent(inode, &offset, origin);
 +              ret = find_desired_extent(inode, &offset, whence);
                if (ret) {
                        mutex_unlock(&inode->i_mutex);
                        return ret;
diff --combined fs/btrfs/inode.c
@@@ -88,7 -88,7 +88,7 @@@ static unsigned char btrfs_type_by_mode
        [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
  };
  
- static int btrfs_setsize(struct inode *inode, loff_t newsize);
+ static int btrfs_setsize(struct inode *inode, struct iattr *attr);
  static int btrfs_truncate(struct inode *inode);
  static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
  static noinline int cow_file_range(struct inode *inode,
@@@ -2478,6 -2478,18 +2478,18 @@@ int btrfs_orphan_cleanup(struct btrfs_r
                                continue;
                        }
                        nr_truncate++;
+                       /* 1 for the orphan item deletion. */
+                       trans = btrfs_start_transaction(root, 1);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               goto out;
+                       }
+                       ret = btrfs_orphan_add(trans, inode);
+                       btrfs_end_transaction(trans, root);
+                       if (ret)
+                               goto out;
                        ret = btrfs_truncate(inode);
                } else {
                        nr_unlink++;
@@@ -3665,6 -3677,7 +3677,7 @@@ int btrfs_cont_expand(struct inode *ino
                                block_end - cur_offset, 0);
                if (IS_ERR(em)) {
                        err = PTR_ERR(em);
+                       em = NULL;
                        break;
                }
                last_byte = min(extent_map_end(em), block_end);
@@@ -3748,16 -3761,27 +3761,27 @@@ next
        return err;
  }
  
- static int btrfs_setsize(struct inode *inode, loff_t newsize)
+ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        loff_t oldsize = i_size_read(inode);
+       loff_t newsize = attr->ia_size;
+       int mask = attr->ia_valid;
        int ret;
  
        if (newsize == oldsize)
                return 0;
  
+       /*
+        * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+        * special case where we need to update the times despite not having
+        * these flags set.  For all other operations the VFS set these flags
+        * explicitly if it wants a timestamp update.
+        */
+       if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
+               inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
        if (newsize > oldsize) {
                truncate_pagecache(inode, oldsize, newsize);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
                        set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
                                &BTRFS_I(inode)->runtime_flags);
  
+               /*
+                * 1 for the orphan item we're going to add
+                * 1 for the orphan item deletion.
+                */
+               trans = btrfs_start_transaction(root, 2);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
+               /*
+                * We need to do this in case we fail at _any_ point during the
+                * actual truncate.  Once we do the truncate_setsize we could
+                * invalidate pages which forces any outstanding ordered io to
+                * be instantly completed which will give us extents that need
+                * to be truncated.  If we fail to get an orphan inode down we
+                * could have left over extents that were never meant to live,
+                * so we need to garuntee from this point on that everything
+                * will be consistent.
+                */
+               ret = btrfs_orphan_add(trans, inode);
+               btrfs_end_transaction(trans, root);
+               if (ret)
+                       return ret;
                /* we don't support swapfiles, so vmtruncate shouldn't fail */
                truncate_setsize(inode, newsize);
                ret = btrfs_truncate(inode);
+               if (ret && inode->i_nlink)
+                       btrfs_orphan_del(NULL, inode);
        }
  
        return ret;
@@@ -3805,7 -3854,7 +3854,7 @@@ static int btrfs_setattr(struct dentry 
                return err;
  
        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
-               err = btrfs_setsize(inode, attr->ia_size);
+               err = btrfs_setsize(inode, attr);
                if (err)
                        return err;
        }
@@@ -4262,7 -4311,16 +4311,7 @@@ struct inode *btrfs_lookup_dentry(struc
        if (dentry->d_name.len > BTRFS_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
  
 -      if (unlikely(d_need_lookup(dentry))) {
 -              memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
 -              kfree(dentry->d_fsdata);
 -              dentry->d_fsdata = NULL;
 -              /* This thing is hashed, drop it for now */
 -              d_drop(dentry);
 -      } else {
 -              ret = btrfs_inode_by_name(dir, dentry, &location);
 -      }
 -
 +      ret = btrfs_inode_by_name(dir, dentry, &location);
        if (ret < 0)
                return ERR_PTR(ret);
  
@@@ -4332,6 -4390,11 +4381,6 @@@ static struct dentry *btrfs_lookup(stru
        struct dentry *ret;
  
        ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
 -      if (unlikely(d_need_lookup(dentry))) {
 -              spin_lock(&dentry->d_lock);
 -              dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
 -              spin_unlock(&dentry->d_lock);
 -      }
        return ret;
  }
  
@@@ -5572,10 -5635,13 +5621,13 @@@ struct extent_map *btrfs_get_extent_fie
                return em;
        if (em) {
                /*
-                * if our em maps to a hole, there might
-                * actually be delalloc bytes behind it
+                * if our em maps to
+                * -  a hole or
+                * -  a pre-alloc extent,
+                * there might actually be delalloc bytes behind it.
                 */
-               if (em->block_start != EXTENT_MAP_HOLE)
+               if (em->block_start != EXTENT_MAP_HOLE &&
+                   !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
                        return em;
                else
                        hole_em = em;
                         */
                        em->block_start = hole_em->block_start;
                        em->block_len = hole_len;
+                       if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
+                               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                } else {
                        em->start = range_start;
                        em->len = found;
@@@ -6915,11 -6983,9 +6969,9 @@@ static int btrfs_truncate(struct inode 
  
        /*
         * 1 for the truncate slack space
-        * 1 for the orphan item we're going to add
-        * 1 for the orphan item deletion
         * 1 for updating the inode.
         */
-       trans = btrfs_start_transaction(root, 4);
+       trans = btrfs_start_transaction(root, 2);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
                goto out;
                                      min_size);
        BUG_ON(ret);
  
-       ret = btrfs_orphan_add(trans, inode);
-       if (ret) {
-               btrfs_end_transaction(trans, root);
-               goto out;
-       }
        /*
         * setattr is responsible for setting the ordered_data_close flag,
         * but that is only tested during the last file release.  That
                ret = btrfs_orphan_del(trans, inode);
                if (ret)
                        err = ret;
-       } else if (ret && inode->i_nlink > 0) {
-               /*
-                * Failed to do the truncate, remove us from the in memory
-                * orphan list.
-                */
-               ret = btrfs_orphan_del(NULL, inode);
        }
  
        if (trans) {
@@@ -7531,41 -7585,61 +7571,61 @@@ void btrfs_wait_and_free_delalloc_work(
   */
  int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
  {
-       struct list_head *head = &root->fs_info->delalloc_inodes;
        struct btrfs_inode *binode;
        struct inode *inode;
        struct btrfs_delalloc_work *work, *next;
        struct list_head works;
+       struct list_head splice;
        int ret = 0;
  
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
  
        INIT_LIST_HEAD(&works);
+       INIT_LIST_HEAD(&splice);
+ again:
        spin_lock(&root->fs_info->delalloc_lock);
-       while (!list_empty(head)) {
-               binode = list_entry(head->next, struct btrfs_inode,
+       list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+       while (!list_empty(&splice)) {
+               binode = list_entry(splice.next, struct btrfs_inode,
                                    delalloc_inodes);
+               list_del_init(&binode->delalloc_inodes);
                inode = igrab(&binode->vfs_inode);
                if (!inode)
-                       list_del_init(&binode->delalloc_inodes);
+                       continue;
+               list_add_tail(&binode->delalloc_inodes,
+                             &root->fs_info->delalloc_inodes);
                spin_unlock(&root->fs_info->delalloc_lock);
-               if (inode) {
-                       work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
-                       if (!work) {
-                               ret = -ENOMEM;
-                               goto out;
-                       }
-                       list_add_tail(&work->list, &works);
-                       btrfs_queue_worker(&root->fs_info->flush_workers,
-                                          &work->work);
+               work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+               if (unlikely(!work)) {
+                       ret = -ENOMEM;
+                       goto out;
                }
+               list_add_tail(&work->list, &works);
+               btrfs_queue_worker(&root->fs_info->flush_workers,
+                                  &work->work);
                cond_resched();
                spin_lock(&root->fs_info->delalloc_lock);
        }
        spin_unlock(&root->fs_info->delalloc_lock);
  
+       list_for_each_entry_safe(work, next, &works, list) {
+               list_del_init(&work->list);
+               btrfs_wait_and_free_delalloc_work(work);
+       }
+       spin_lock(&root->fs_info->delalloc_lock);
+       if (!list_empty(&root->fs_info->delalloc_inodes)) {
+               spin_unlock(&root->fs_info->delalloc_lock);
+               goto again;
+       }
+       spin_unlock(&root->fs_info->delalloc_lock);
        /* the filemap_flush will queue IO into the worker threads, but
         * we have to make sure the IO is actually started and that
         * ordered extents get created before we return
                    atomic_read(&root->fs_info->async_delalloc_pages) == 0));
        }
        atomic_dec(&root->fs_info->async_submit_draining);
+       return 0;
  out:
        list_for_each_entry_safe(work, next, &works, list) {
                list_del_init(&work->list);
                btrfs_wait_and_free_delalloc_work(work);
        }
+       if (!list_empty_careful(&splice)) {
+               spin_lock(&root->fs_info->delalloc_lock);
+               list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
+               spin_unlock(&root->fs_info->delalloc_lock);
+       }
        return ret;
  }
  
diff --combined fs/btrfs/ioctl.c
@@@ -1243,7 -1243,7 +1243,7 @@@ int btrfs_defrag_file(struct inode *ino
                }
  
                defrag_count += ret;
 -              balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
 +              balance_dirty_pages_ratelimited(inode->i_mapping);
                mutex_unlock(&inode->i_mutex);
  
                if (newer_than) {
@@@ -1339,7 -1339,8 +1339,8 @@@ static noinline int btrfs_ioctl_resize(
        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
                        1)) {
                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-               return -EINPROGRESS;
+               mnt_drop_write_file(file);
+               return -EINVAL;
        }
  
        mutex_lock(&root->fs_info->volume_mutex);
                printk(KERN_INFO "btrfs: resizing devid %llu\n",
                       (unsigned long long)devid);
        }
        device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
        if (!device) {
                printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
                ret = -EINVAL;
                goto out_free;
        }
-       if (device->fs_devices && device->fs_devices->seeding) {
+       if (!device->writeable) {
                printk(KERN_INFO "btrfs: resizer unable to apply on "
-                      "seeding device %llu\n",
+                      "readonly device %llu\n",
                       (unsigned long long)devid);
                ret = -EINVAL;
                goto out_free;
@@@ -1443,8 -1446,8 +1446,8 @@@ out_free
        kfree(vol_args);
  out:
        mutex_unlock(&root->fs_info->volume_mutex);
-       mnt_drop_write_file(file);
        atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+       mnt_drop_write_file(file);
        return ret;
  }
  
@@@ -2095,13 -2098,13 +2098,13 @@@ static noinline int btrfs_ioctl_snap_de
                err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
                if (err)
                        goto out_dput;
-               /* check if subvolume may be deleted by a non-root user */
-               err = btrfs_may_delete(dir, dentry, 1);
-               if (err)
-                       goto out_dput;
        }
  
+       /* check if subvolume may be deleted by a user */
+       err = btrfs_may_delete(dir, dentry, 1);
+       if (err)
+               goto out_dput;
        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
                err = -EINVAL;
                goto out_dput;
@@@ -2183,19 -2186,20 +2186,20 @@@ static int btrfs_ioctl_defrag(struct fi
        struct btrfs_ioctl_defrag_range_args *range;
        int ret;
  
-       if (btrfs_root_readonly(root))
-               return -EROFS;
+       ret = mnt_want_write_file(file);
+       if (ret)
+               return ret;
  
        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
                        1)) {
                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-               return -EINPROGRESS;
+               mnt_drop_write_file(file);
+               return -EINVAL;
        }
-       ret = mnt_want_write_file(file);
-       if (ret) {
-               atomic_set(&root->fs_info->mutually_exclusive_operation_running,
-                          0);
-               return ret;
+       if (btrfs_root_readonly(root)) {
+               ret = -EROFS;
+               goto out;
        }
  
        switch (inode->i_mode & S_IFMT) {
                ret = -EINVAL;
        }
  out:
-       mnt_drop_write_file(file);
        atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+       mnt_drop_write_file(file);
        return ret;
  }
  
@@@ -2263,7 -2267,7 +2267,7 @@@ static long btrfs_ioctl_add_dev(struct 
        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
                        1)) {
                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-               return -EINPROGRESS;
+               return -EINVAL;
        }
  
        mutex_lock(&root->fs_info->volume_mutex);
@@@ -2300,7 -2304,7 +2304,7 @@@ static long btrfs_ioctl_rm_dev(struct f
                        1)) {
                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
                mnt_drop_write_file(file);
-               return -EINPROGRESS;
+               return -EINVAL;
        }
  
        mutex_lock(&root->fs_info->volume_mutex);
        kfree(vol_args);
  out:
        mutex_unlock(&root->fs_info->volume_mutex);
-       mnt_drop_write_file(file);
        atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+       mnt_drop_write_file(file);
        return ret;
  }
  
@@@ -3437,8 -3441,8 +3441,8 @@@ static long btrfs_ioctl_balance(struct 
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_ioctl_balance_args *bargs;
        struct btrfs_balance_control *bctl;
+       bool need_unlock; /* for mut. excl. ops lock */
        int ret;
-       int need_to_clear_lock = 0;
  
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
        if (ret)
                return ret;
  
-       mutex_lock(&fs_info->volume_mutex);
+ again:
+       if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
+               mutex_lock(&fs_info->volume_mutex);
+               mutex_lock(&fs_info->balance_mutex);
+               need_unlock = true;
+               goto locked;
+       }
+       /*
+        * mut. excl. ops lock is locked.  Three possibilites:
+        *   (1) some other op is running
+        *   (2) balance is running
+        *   (3) balance is paused -- special case (think resume)
+        */
        mutex_lock(&fs_info->balance_mutex);
+       if (fs_info->balance_ctl) {
+               /* this is either (2) or (3) */
+               if (!atomic_read(&fs_info->balance_running)) {
+                       mutex_unlock(&fs_info->balance_mutex);
+                       if (!mutex_trylock(&fs_info->volume_mutex))
+                               goto again;
+                       mutex_lock(&fs_info->balance_mutex);
+                       if (fs_info->balance_ctl &&
+                           !atomic_read(&fs_info->balance_running)) {
+                               /* this is (3) */
+                               need_unlock = false;
+                               goto locked;
+                       }
+                       mutex_unlock(&fs_info->balance_mutex);
+                       mutex_unlock(&fs_info->volume_mutex);
+                       goto again;
+               } else {
+                       /* this is (2) */
+                       mutex_unlock(&fs_info->balance_mutex);
+                       ret = -EINPROGRESS;
+                       goto out;
+               }
+       } else {
+               /* this is (1) */
+               mutex_unlock(&fs_info->balance_mutex);
+               pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+               ret = -EINVAL;
+               goto out;
+       }
+ locked:
+       BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
  
        if (arg) {
                bargs = memdup_user(arg, sizeof(*bargs));
                if (IS_ERR(bargs)) {
                        ret = PTR_ERR(bargs);
-                       goto out;
+                       goto out_unlock;
                }
  
                if (bargs->flags & BTRFS_BALANCE_RESUME) {
                bargs = NULL;
        }
  
-       if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
-                       1)) {
-               pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
+       if (fs_info->balance_ctl) {
                ret = -EINPROGRESS;
                goto out_bargs;
        }
-       need_to_clear_lock = 1;
  
        bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
        if (!bctl) {
        }
  
  do_balance:
-       ret = btrfs_balance(bctl, bargs);
        /*
-        * bctl is freed in __cancel_balance or in free_fs_info if
-        * restriper was paused all the way until unmount
+        * Ownership of bctl and mutually_exclusive_operation_running
+        * goes to to btrfs_balance.  bctl is freed in __cancel_balance,
+        * or, if restriper was paused all the way until unmount, in
+        * free_fs_info.  mutually_exclusive_operation_running is
+        * cleared in __cancel_balance.
         */
+       need_unlock = false;
+       ret = btrfs_balance(bctl, bargs);
        if (arg) {
                if (copy_to_user(arg, bargs, sizeof(*bargs)))
                        ret = -EFAULT;
  
  out_bargs:
        kfree(bargs);
- out:
-       if (need_to_clear_lock)
-               atomic_set(&root->fs_info->mutually_exclusive_operation_running,
-                          0);
+ out_unlock:
        mutex_unlock(&fs_info->balance_mutex);
        mutex_unlock(&fs_info->volume_mutex);
+       if (need_unlock)
+               atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+ out:
        mnt_drop_write_file(file);
        return ret;
  }
@@@ -3698,6 -3752,11 +3752,11 @@@ static long btrfs_ioctl_qgroup_create(s
                goto drop_write;
        }
  
+       if (!sa->qgroupid) {
+               ret = -EINVAL;
+               goto out;
+       }
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);