Merge branch 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 28 Mar 2011 22:31:05 +0000 (15:31 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 28 Mar 2011 22:31:05 +0000 (15:31 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Mar 2011 22:31:05 +0000 (15:31 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Mar 2011 22:31:05 +0000 (15:31 -0700)
diff --combined fs/btrfs/disk-io.c

index 830d261,5cf3aa7..d7a7315
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -29,6 -29,7 +29,7 @@@
   #include <linux/crc32c.h>
   #include <linux/slab.h>
   #include <linux/migrate.h>
+ #include <asm/unaligned.h>
   #include "compat.h"
   #include "ctree.h"
   #include "disk-io.h"
@@@ -198,7 -199,7 +199,7 @@@ u32 btrfs_csum_data(struct btrfs_root *
   
   void btrfs_csum_final(u32 crc, char *result)
   {
-       *(__le32 *)result = ~cpu_to_le32(crc);
+       put_unaligned_le32(~crc, result);
   }
   
   /*
@@@ -323,6 -324,7 +324,7 @@@ static int btree_read_extent_buffer_pag
         int num_copies = 0;
         int mirror_num = 0;
   
+       clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
         io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
         while (1) {
                 ret = read_extent_buffer_pages(io_tree, eb, start, 1,
@@@ -331,6 -333,14 +333,14 @@@
                     !verify_parent_transid(io_tree, eb, parent_transid))
                         return ret;
   
+               /*
+                * This buffer's crc is fine, but its contents are corrupted, so
+                * there is no reason to read the other copies, they won't be
+                * any less wrong.
+                */
+               if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
+                       return ret;
+ 
                 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
                                               eb->start, eb->len);
                 if (num_copies == 1)
@@@ -419,6 -429,73 +429,73 @@@ static int check_tree_block_fsid(struc
         return ret;
   }
   
+ #define CORRUPT(reason, eb, root, slot)                               \
+       printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
+              "root=%llu, slot=%d\n", reason,                  \
+              (unsigned long long)btrfs_header_bytenr(eb),     \
+              (unsigned long long)root->objectid, slot)
+ 
+ static noinline int check_leaf(struct btrfs_root *root,
+                              struct extent_buffer *leaf)
+ {
+       struct btrfs_key key;
+       struct btrfs_key leaf_key;
+       u32 nritems = btrfs_header_nritems(leaf);
+       int slot;
+ 
+       if (nritems == 0)
+               return 0;
+ 
+       /* Check the 0 item */
+       if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
+           BTRFS_LEAF_DATA_SIZE(root)) {
+               CORRUPT("invalid item offset size pair", leaf, root, 0);
+               return -EIO;
+       }
+ 
+       /*
+        * Check to make sure each items keys are in the correct order and their
+        * offsets make sense.  We only have to loop through nritems-1 because
+        * we check the current slot against the next slot, which verifies the
+        * next slot's offset+size makes sense and that the current's slot
+        * offset is correct.
+        */
+       for (slot = 0; slot < nritems - 1; slot++) {
+               btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
+               btrfs_item_key_to_cpu(leaf, &key, slot + 1);
+ 
+               /* Make sure the keys are in the right order */
+               if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
+                       CORRUPT("bad key order", leaf, root, slot);
+                       return -EIO;
+               }
+ 
+               /*
+                * Make sure the offset and ends are right, remember that the
+                * item data starts at the end of the leaf and grows towards the
+                * front.
+                */
+               if (btrfs_item_offset_nr(leaf, slot) !=
+                       btrfs_item_end_nr(leaf, slot + 1)) {
+                       CORRUPT("slot offset bad", leaf, root, slot);
+                       return -EIO;
+               }
+ 
+               /*
+                * Check to make sure that we don't point outside of the leaf,
+                * just incase all the items are consistent to eachother, but
+                * all point outside of the leaf.
+                */
+               if (btrfs_item_end_nr(leaf, slot) >
+                   BTRFS_LEAF_DATA_SIZE(root)) {
+                       CORRUPT("slot end outside of leaf", leaf, root, slot);
+                       return -EIO;
+               }
+       }
+ 
+       return 0;
+ }
+ 
   #ifdef CONFIG_DEBUG_LOCK_ALLOC
   void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
   {
@@@ -485,8 -562,20 +562,20 @@@ static int btree_readpage_end_io_hook(s
         btrfs_set_buffer_lockdep_class(eb, found_level);
   
         ret = csum_tree_block(root, eb, 1);
-       if (ret)
+       if (ret) {
                 ret = -EIO;
+               goto err;
+       }
+ 
+       /*
+        * If this is a leaf block and it is corrupt, set the corrupt bit so
+        * that we don't try and read the other copies of this block, just
+        * return -EIO.
+        */
+       if (found_level == 0 && check_leaf(root, eb)) {
+               set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+               ret = -EIO;
+       }
   
         end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
         end = eb->start + end - 1;
@@@ -847,6 -936,7 +936,6 @@@ static const struct address_space_opera
         .writepages     = btree_writepages,
         .releasepage    = btree_releasepage,
         .invalidatepage = btree_invalidatepage,
- -      .sync_page      = block_sync_page,
   #ifdef CONFIG_MIGRATION
         .migratepage    = btree_migratepage,
   #endif
@@@ -1159,7 -1249,10 +1248,10 @@@ struct btrfs_root *btrfs_read_fs_root_n
                      root, fs_info, location->objectid);
   
         path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path) {
+               kfree(root);
+               return ERR_PTR(-ENOMEM);
+       }
         ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
         if (ret == 0) {
                 l = path->nodes[0];
@@@ -1330,6 -1423,82 +1422,6 @@@ static int btrfs_congested_fn(void *con
   }
   
   /*
- - * this unplugs every device on the box, and it is only used when page
- - * is null
- - */
- -static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
- -{
- -      struct btrfs_device *device;
- -      struct btrfs_fs_info *info;
- -
- -      info = (struct btrfs_fs_info *)bdi->unplug_io_data;
- -      list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
- -              if (!device->bdev)
- -                      continue;
- -
- -              bdi = blk_get_backing_dev_info(device->bdev);
- -              if (bdi->unplug_io_fn)
- -                      bdi->unplug_io_fn(bdi, page);
- -      }
- -}
- -
- -static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
- -{
- -      struct inode *inode;
- -      struct extent_map_tree *em_tree;
- -      struct extent_map *em;
- -      struct address_space *mapping;
- -      u64 offset;
- -
- -      /* the generic O_DIRECT read code does this */
- -      if (1 || !page) {
- -              __unplug_io_fn(bdi, page);
- -              return;
- -      }
- -
- -      /*
- -       * page->mapping may change at any time.  Get a consistent copy
- -       * and use that for everything below
- -       */
- -      smp_mb();
- -      mapping = page->mapping;
- -      if (!mapping)
- -              return;
- -
- -      inode = mapping->host;
- -
- -      /*
- -       * don't do the expensive searching for a small number of
- -       * devices
- -       */
- -      if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
- -              __unplug_io_fn(bdi, page);
- -              return;
- -      }
- -
- -      offset = page_offset(page);
- -
- -      em_tree = &BTRFS_I(inode)->extent_tree;
- -      read_lock(&em_tree->lock);
- -      em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
- -      read_unlock(&em_tree->lock);
- -      if (!em) {
- -              __unplug_io_fn(bdi, page);
- -              return;
- -      }
- -
- -      if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- -              free_extent_map(em);
- -              __unplug_io_fn(bdi, page);
- -              return;
- -      }
- -      offset = offset - em->start;
- -      btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
- -                        em->block_start + offset, page);
- -      free_extent_map(em);
- -}
- -
- -/*
    * If this fails, caller must call bdi_destroy() to get rid of the
    * bdi again.
    */
@@@ -1343,6 -1512,8 +1435,6 @@@ static int setup_bdi(struct btrfs_fs_in
                 return err;
   
         bdi->ra_pages   = default_backing_dev_info.ra_pages;
- -      bdi->unplug_io_fn       = btrfs_unplug_io_fn;
- -      bdi->unplug_io_data     = info;
         bdi->congested_fn       = btrfs_congested_fn;
         bdi->congested_data     = info;
         return 0;
@@@ -1553,6 -1724,8 +1645,8 @@@ struct btrfs_root *open_ctree(struct su
                 goto fail_bdi;
         }
   
+       fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+ 
         INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
         INIT_LIST_HEAD(&fs_info->trans_list);
         INIT_LIST_HEAD(&fs_info->dead_roots);
@@@ -1683,6 -1856,12 +1777,12 @@@
   
         btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
   
+       /*
+        * In the long term, we'll store the compression type in the super
+        * block, and it'll be used for per file compression control.
+        */
+       fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
+ 
         ret = btrfs_parse_options(tree_root, options);
         if (ret) {
                 err = ret;
@@@ -1888,6 -2067,12 +1988,12 @@@
         fs_info->metadata_alloc_profile = (u64)-1;
         fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
   
+       ret = btrfs_init_space_info(fs_info);
+       if (ret) {
+               printk(KERN_ERR "Failed to initial space info: %d\n", ret);
+               goto fail_block_groups;
+       }
+ 
         ret = btrfs_read_block_groups(extent_root);
         if (ret) {
                 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
@@@ -1979,9 -2164,14 +2085,14 @@@
   
         if (!(sb->s_flags & MS_RDONLY)) {
                 down_read(&fs_info->cleanup_work_sem);
-               btrfs_orphan_cleanup(fs_info->fs_root);
-               btrfs_orphan_cleanup(fs_info->tree_root);
+               err = btrfs_orphan_cleanup(fs_info->fs_root);
+               if (!err)
+                       err = btrfs_orphan_cleanup(fs_info->tree_root);
                 up_read(&fs_info->cleanup_work_sem);
+               if (err) {
+                       close_ctree(tree_root);
+                       return ERR_PTR(err);
+               }
         }
   
         return tree_root;
@@@ -2356,8 -2546,12 +2467,12 @@@ int btrfs_cleanup_fs_roots(struct btrfs
   
                 root_objectid = gang[ret - 1]->root_key.objectid + 1;
                 for (i = 0; i < ret; i++) {
+                       int err;
+ 
                         root_objectid = gang[i]->root_key.objectid;
-                       btrfs_orphan_cleanup(gang[i]);
+                       err = btrfs_orphan_cleanup(gang[i]);
+                       if (err)
+                               return err;
                 }
                 root_objectid++;
         }
@@@ -2414,7 -2608,7 +2529,7 @@@ int close_ctree(struct btrfs_root *root
          * ERROR state on disk.
          *
          * 2. when btrfs flips readonly just in btrfs_commit_super,
- -       * and in such case, btrfs cannnot write sb via btrfs_commit_super,
+ +       * and in such case, btrfs cannot write sb via btrfs_commit_super,
          * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
          * btrfs will cleanup all FS resources first and write sb then.
          */
@@@ -2868,7 -3062,10 +2983,10 @@@ static int btrfs_destroy_pinned_extent(
                         break;
   
                 /* opt_discard */
-               ret = btrfs_error_discard_extent(root, start, end + 1 - start);
+               if (btrfs_test_opt(root, DISCARD))
+                       ret = btrfs_error_discard_extent(root, start,
+                                                        end + 1 - start,
+                                                        NULL);
   
                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
                 btrfs_error_unpin_extent_range(root, start, end);
diff --combined fs/btrfs/extent_io.c

index b5b9282,77c65a0..20ddb28
--- 1/fs/btrfs/extent_io.c
--- 2/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@@ -2188,10 -2188,12 +2188,12 @@@ static int __extent_writepage(struct pa
         unsigned long nr_written = 0;
   
         if (wbc->sync_mode == WB_SYNC_ALL)
- -              write_flags = WRITE_SYNC_PLUG;
+ +              write_flags = WRITE_SYNC;
         else
                 write_flags = WRITE;
   
+       trace___extent_writepage(page, inode, wbc);
+ 
         WARN_ON(!PageLocked(page));
         pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
         if (page->index > end_index ||
@@@ -3690,6 -3692,7 +3692,7 @@@ int map_private_extent_buffer(struct ex
                        "wanted %lu %lu\n", (unsigned long long)eb->start,
                        eb->len, start, min_len);
                 WARN_ON(1);
+               return -EINVAL;
         }
   
         p = extent_buffer_page(eb, i);
diff --combined fs/btrfs/inode.c

index 119520b,0627418..93c28a1
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -50,6 -50,7 +50,7 @@@
   #include "tree-log.h"
   #include "compression.h"
   #include "locking.h"
+ #include "free-space-cache.h"
   
   struct btrfs_iget_args {
         u64 ino;
@@@ -70,6 -71,7 +71,7 @@@ static struct kmem_cache *btrfs_inode_c
   struct kmem_cache *btrfs_trans_handle_cachep;
   struct kmem_cache *btrfs_transaction_cachep;
   struct kmem_cache *btrfs_path_cachep;
+ struct kmem_cache *btrfs_free_space_cachep;
   
   #define S_SHIFT 12
   static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
@@@ -82,7 -84,8 +84,8 @@@
         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
   };
   
- static void btrfs_truncate(struct inode *inode);
+ static int btrfs_setsize(struct inode *inode, loff_t newsize);
+ static int btrfs_truncate(struct inode *inode);
   static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
   static noinline int cow_file_range(struct inode *inode,
                                    struct page *locked_page,
@@@ -90,14 -93,13 +93,14 @@@
                                    unsigned long *nr_written, int unlock);
   
   static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
- -                                   struct inode *inode,  struct inode *dir)
+ +                                   struct inode *inode,  struct inode *dir,
+ +                                   const struct qstr *qstr)
   {
         int err;
   
         err = btrfs_init_acl(trans, inode, dir);
         if (!err)
- -              err = btrfs_xattr_security_init(trans, inode, dir);
+ +              err = btrfs_xattr_security_init(trans, inode, dir, qstr);
         return err;
   }
   
@@@ -288,6 -290,7 +291,7 @@@ static noinline int add_async_extent(st
         struct async_extent *async_extent;
   
         async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
+       BUG_ON(!async_extent);
         async_extent->start = start;
         async_extent->ram_size = ram_size;
         async_extent->compressed_size = compressed_size;
@@@ -382,9 -385,11 +386,11 @@@ again
          */
         if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
             (btrfs_test_opt(root, COMPRESS) ||
-            (BTRFS_I(inode)->force_compress))) {
+            (BTRFS_I(inode)->force_compress) ||
+            (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
                 WARN_ON(pages);
                 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+               BUG_ON(!pages);
   
                 if (BTRFS_I(inode)->force_compress)
                         compress_type = BTRFS_I(inode)->force_compress;
@@@ -1254,7 -1259,8 +1260,8 @@@ static int run_delalloc_range(struct in
                 ret = run_delalloc_nocow(inode, locked_page, start, end,
                                          page_started, 0, nr_written);
         else if (!btrfs_test_opt(root, COMPRESS) &&
-                !(BTRFS_I(inode)->force_compress))
+                !(BTRFS_I(inode)->force_compress) &&
+                !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))
                 ret = cow_file_range(inode, locked_page, start, end,
                                       page_started, nr_written, 1);
         else
@@@ -1461,8 -1467,11 +1468,11 @@@ static int btrfs_submit_bio_hook(struc
                 if (bio_flags & EXTENT_BIO_COMPRESSED) {
                         return btrfs_submit_compressed_read(inode, bio,
                                                     mirror_num, bio_flags);
-               } else if (!skip_sum)
-                       btrfs_lookup_bio_sums(root, inode, bio, NULL);
+               } else if (!skip_sum) {
+                       ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
+                       if (ret)
+                               return ret;
+               }
                 goto mapit;
         } else if (!skip_sum) {
                 /* csum items have already been cloned */
@@@ -1785,6 -1794,8 +1795,8 @@@ out
   static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
                                 struct extent_state *state, int uptodate)
   {
+       trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
+ 
         ClearPagePrivate2(page);
         return btrfs_finish_ordered_io(page->mapping->host, start, end);
   }
@@@ -1895,10 -1906,10 +1907,10 @@@ static int btrfs_io_failed_hook(struct 
         else
                 rw = READ;
   
-       BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
+       ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
                                                       failrec->last_mirror,
                                                       failrec->bio_flags, 0);
-       return 0;
+       return ret;
   }
   
   /*
@@@ -2282,7 -2293,7 +2294,7 @@@ int btrfs_orphan_del(struct btrfs_trans
    * this cleans up any orphans that may be left on the list from the last use
    * of this root.
    */
- void btrfs_orphan_cleanup(struct btrfs_root *root)
+ int btrfs_orphan_cleanup(struct btrfs_root *root)
   {
         struct btrfs_path *path;
         struct extent_buffer *leaf;
@@@ -2292,10 -2303,13 +2304,13 @@@
         int ret = 0, nr_unlink = 0, nr_truncate = 0;
   
         if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
-               return;
+               return 0;
   
         path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
         path->reada = -1;
   
         key.objectid = BTRFS_ORPHAN_OBJECTID;
@@@ -2304,11 -2318,8 +2319,8 @@@
   
         while (1) {
                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-               if (ret < 0) {
-                       printk(KERN_ERR "Error searching slot for orphan: %d"
-                              "\n", ret);
-                       break;
-               }
+               if (ret < 0)
+                       goto out;
   
                 /*
                  * if ret == 0 means we found what we were searching for, which
@@@ -2316,6 -2327,7 +2328,7 @@@
                  * find the key and see if we have stuff that matches
                  */
                 if (ret > 0) {
+                       ret = 0;
                         if (path->slots[0] == 0)
                                 break;
                         path->slots[0]--;
@@@ -2343,7 -2355,10 +2356,10 @@@
                 found_key.type = BTRFS_INODE_ITEM_KEY;
                 found_key.offset = 0;
                 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
-               BUG_ON(IS_ERR(inode));
+               if (IS_ERR(inode)) {
+                       ret = PTR_ERR(inode);
+                       goto out;
+               }
   
                 /*
                  * add this inode to the orphan list so btrfs_orphan_del does
@@@ -2361,7 -2376,10 +2377,10 @@@
                  */
                 if (is_bad_inode(inode)) {
                         trans = btrfs_start_transaction(root, 0);
-                       BUG_ON(IS_ERR(trans));
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               goto out;
+                       }
                         btrfs_orphan_del(trans, inode);
                         btrfs_end_transaction(trans, root);
                         iput(inode);
@@@ -2370,17 -2388,22 +2389,22 @@@
   
                 /* if we have links, this was a truncate, lets do that */
                 if (inode->i_nlink) {
+                       if (!S_ISREG(inode->i_mode)) {
+                               WARN_ON(1);
+                               iput(inode);
+                               continue;
+                       }
                         nr_truncate++;
-                       btrfs_truncate(inode);
+                       ret = btrfs_truncate(inode);
                 } else {
                         nr_unlink++;
                 }
   
                 /* this will do delete_inode and everything for us */
                 iput(inode);
+               if (ret)
+                       goto out;
         }
-       btrfs_free_path(path);
- 
         root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
   
         if (root->orphan_block_rsv)
@@@ -2389,14 -2412,20 +2413,20 @@@
   
         if (root->orphan_block_rsv || root->orphan_item_inserted) {
                 trans = btrfs_join_transaction(root, 1);
-               BUG_ON(IS_ERR(trans));
-               btrfs_end_transaction(trans, root);
+               if (!IS_ERR(trans))
+                       btrfs_end_transaction(trans, root);
         }
   
         if (nr_unlink)
                 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
         if (nr_truncate)
                 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
+ 
+ out:
+       if (ret)
+               printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret);
+       btrfs_free_path(path);
+       return ret;
   }
   
   /*
@@@ -2507,6 -2536,8 +2537,8 @@@ static void btrfs_read_locked_inode(str
         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
   
         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
+       if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
+               inode->i_mapping->flags &= ~__GFP_FS;
   
         /*
          * try to precache a NULL acl entry for files that don't have
@@@ -2635,10 -2666,10 +2667,10 @@@ failed
    * recovery code.  It remove a link in a directory with a given name, and
    * also drops the back refs in the inode to the directory
    */
- int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
-                      struct btrfs_root *root,
-                      struct inode *dir, struct inode *inode,
-                      const char *name, int name_len)
+ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct inode *dir, struct inode *inode,
+                               const char *name, int name_len)
   {
         struct btrfs_path *path;
         int ret = 0;
@@@ -2710,12 -2741,25 +2742,25 @@@ err
         btrfs_i_size_write(dir, dir->i_size - name_len * 2);
         inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
         btrfs_update_inode(trans, root, dir);
-       btrfs_drop_nlink(inode);
-       ret = btrfs_update_inode(trans, root, inode);
   out:
         return ret;
   }
   
+ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root,
+                      struct inode *dir, struct inode *inode,
+                      const char *name, int name_len)
+ {
+       int ret;
+       ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
+       if (!ret) {
+               btrfs_drop_nlink(inode);
+               ret = btrfs_update_inode(trans, root, inode);
+       }
+       return ret;
+ }
+               
+ 
   /* helper to check if there is any shared block in the path */
   static int check_path_shared(struct btrfs_root *root,
                              struct btrfs_path *path)
@@@ -3537,7 -3581,13 +3582,13 @@@ out
         return ret;
   }
   
- int btrfs_cont_expand(struct inode *inode, loff_t size)
+ /*
+  * This function puts in dummy file extents for the area we're creating a hole
+  * for.  So if we are truncating this file to a larger size we need to insert
+  * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for
+  * the range between oldsize and size
+  */
+ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
   {
         struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(inode)->root;
@@@ -3545,7 -3595,7 +3596,7 @@@
         struct extent_map *em = NULL;
         struct extent_state *cached_state = NULL;
         u64 mask = root->sectorsize - 1;
-       u64 hole_start = (inode->i_size + mask) & ~mask;
+       u64 hole_start = (oldsize + mask) & ~mask;
         u64 block_end = (size + mask) & ~mask;
         u64 last_byte;
         u64 cur_offset;
@@@ -3590,13 -3640,15 +3641,15 @@@
                         err = btrfs_drop_extents(trans, inode, cur_offset,
                                                  cur_offset + hole_size,
                                                  &hint_byte, 1);
-                       BUG_ON(err);
+                       if (err)
+                               break;
   
                         err = btrfs_insert_file_extent(trans, root,
                                         inode->i_ino, cur_offset, 0,
                                         0, hole_size, 0, hole_size,
                                         0, 0, 0);
-                       BUG_ON(err);
+                       if (err)
+                               break;
   
                         btrfs_drop_extent_cache(inode, hole_start,
                                         last_byte - 1, 0);
@@@ -3616,81 -3668,41 +3669,41 @@@
         return err;
   }
   
- static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
+ static int btrfs_setsize(struct inode *inode, loff_t newsize)
   {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
-       unsigned long nr;
+       loff_t oldsize = i_size_read(inode);
         int ret;
   
-       if (attr->ia_size == inode->i_size)
+       if (newsize == oldsize)
                 return 0;
   
-       if (attr->ia_size > inode->i_size) {
-               unsigned long limit;
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (attr->ia_size > inode->i_sb->s_maxbytes)
-                       return -EFBIG;
-               if (limit != RLIM_INFINITY && attr->ia_size > limit) {
-                       send_sig(SIGXFSZ, current, 0);
-                       return -EFBIG;
-               }
-       }
- 
-       trans = btrfs_start_transaction(root, 5);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
- 
-       btrfs_set_trans_block_group(trans, inode);
- 
-       ret = btrfs_orphan_add(trans, inode);
-       BUG_ON(ret);
- 
-       nr = trans->blocks_used;
-       btrfs_end_transaction(trans, root);
-       btrfs_btree_balance_dirty(root, nr);
- 
-       if (attr->ia_size > inode->i_size) {
-               ret = btrfs_cont_expand(inode, attr->ia_size);
+       if (newsize > oldsize) {
+               i_size_write(inode, newsize);
+               btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+               truncate_pagecache(inode, oldsize, newsize);
+               ret = btrfs_cont_expand(inode, oldsize, newsize);
                 if (ret) {
-                       btrfs_truncate(inode);
+                       btrfs_setsize(inode, oldsize);
                         return ret;
                 }
   
-               i_size_write(inode, attr->ia_size);
-               btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+               mark_inode_dirty(inode);
+       } else {
   
-               trans = btrfs_start_transaction(root, 0);
-               BUG_ON(IS_ERR(trans));
-               btrfs_set_trans_block_group(trans, inode);
-               trans->block_rsv = root->orphan_block_rsv;
-               BUG_ON(!trans->block_rsv);
+               /*
+                * We're truncating a file that used to have good data down to
+                * zero. Make sure it gets into the ordered flush list so that
+                * any new writes get down to disk quickly.
+                */
+               if (newsize == 0)
+                       BTRFS_I(inode)->ordered_data_close = 1;
   
-               ret = btrfs_update_inode(trans, root, inode);
-               BUG_ON(ret);
-               if (inode->i_nlink > 0) {
-                       ret = btrfs_orphan_del(trans, inode);
-                       BUG_ON(ret);
-               }
-               nr = trans->blocks_used;
-               btrfs_end_transaction(trans, root);
-               btrfs_btree_balance_dirty(root, nr);
-               return 0;
+               /* we don't support swapfiles, so vmtruncate shouldn't fail */
+               truncate_setsize(inode, newsize);
+               ret = btrfs_truncate(inode);
         }
   
-       /*
-        * We're truncating a file that used to have good data down to
-        * zero. Make sure it gets into the ordered flush list so that
-        * any new writes get down to disk quickly.
-        */
-       if (attr->ia_size == 0)
-               BTRFS_I(inode)->ordered_data_close = 1;
- 
-       /* we don't support swapfiles, so vmtruncate shouldn't fail */
-       ret = vmtruncate(inode, attr->ia_size);
-       BUG_ON(ret);
- 
-       return 0;
+       return ret;
   }
   
   static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
@@@ -3707,7 -3719,7 +3720,7 @@@
                 return err;
   
         if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
-               err = btrfs_setattr_size(inode, attr);
+               err = btrfs_setsize(inode, attr->ia_size);
                 if (err)
                         return err;
         }
@@@ -3730,6 -3742,8 +3743,8 @@@ void btrfs_evict_inode(struct inode *in
         unsigned long nr;
         int ret;
   
+       trace_btrfs_inode_evict(inode);
+ 
         truncate_inode_pages(&inode->i_data, 0);
         if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
                                root == root->fs_info->tree_root))
@@@ -4072,7 -4086,6 +4087,6 @@@ struct inode *btrfs_iget(struct super_b
                 BTRFS_I(inode)->root = root;
                 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
                 btrfs_read_locked_inode(inode);
- 
                 inode_tree_add(inode);
                 unlock_new_inode(inode);
                 if (new)
@@@ -4147,8 -4160,10 +4161,10 @@@ struct inode *btrfs_lookup_dentry(struc
         if (!IS_ERR(inode) && root != sub_root) {
                 down_read(&root->fs_info->cleanup_work_sem);
                 if (!(inode->i_sb->s_flags & MS_RDONLY))
-                       btrfs_orphan_cleanup(sub_root);
+                       ret = btrfs_orphan_cleanup(sub_root);
                 up_read(&root->fs_info->cleanup_work_sem);
+               if (ret)
+                       inode = ERR_PTR(ret);
         }
   
         return inode;
@@@ -4282,6 -4297,9 +4298,9 @@@ static int btrfs_real_readdir(struct fi
                 while (di_cur < di_total) {
                         struct btrfs_key location;
   
+                       if (verify_dir_item(root, leaf, di))
+                               break;
+ 
                         name_len = btrfs_dir_name_len(leaf, di);
                         if (name_len <= sizeof(tmp_name)) {
                                 name_ptr = tmp_name;
@@@ -4517,6 -4535,8 +4536,8 @@@ static struct inode *btrfs_new_inode(st
                 return ERR_PTR(-ENOMEM);
   
         if (dir) {
+               trace_btrfs_inode_request(dir);
+ 
                 ret = btrfs_set_inode_index(dir, index);
                 if (ret) {
                         iput(inode);
@@@ -4585,12 -4605,16 +4606,16 @@@
         if ((mode & S_IFREG)) {
                 if (btrfs_test_opt(root, NODATASUM))
                         BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
-               if (btrfs_test_opt(root, NODATACOW))
+               if (btrfs_test_opt(root, NODATACOW) ||
+                   (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
                         BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
         }
   
         insert_inode_hash(inode);
         inode_tree_add(inode);
+ 
+       trace_btrfs_inode_new(inode);
+ 
         return inode;
   fail:
         if (dir)
@@@ -4705,7 -4729,7 +4730,7 @@@ static int btrfs_mknod(struct inode *di
         if (IS_ERR(inode))
                 goto out_unlock;
   
- -      err = btrfs_init_inode_security(trans, inode, dir);
+ +      err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
         if (err) {
                 drop_inode = 1;
                 goto out_unlock;
@@@ -4766,7 -4790,7 +4791,7 @@@ static int btrfs_create(struct inode *d
         if (IS_ERR(inode))
                 goto out_unlock;
   
- -      err = btrfs_init_inode_security(trans, inode, dir);
+ +      err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
         if (err) {
                 drop_inode = 1;
                 goto out_unlock;
@@@ -4807,9 -4831,15 +4832,12 @@@ static int btrfs_link(struct dentry *ol
         int err;
         int drop_inode = 0;
   
- -      if (inode->i_nlink == 0)
- -              return -ENOENT;
- -
         /* do not allow sys_link's with other subvols of the same device */
         if (root->objectid != BTRFS_I(inode)->root->objectid)
-               return -EPERM;
+               return -EXDEV;
+ 
+       if (inode->i_nlink == ~0U)
+               return -EMLINK;
   
         btrfs_inc_nlink(inode);
         inode->i_ctime = CURRENT_TIME;
@@@ -4892,7 -4922,7 +4920,7 @@@ static int btrfs_mkdir(struct inode *di
   
         drop_on_err = 1;
   
- -      err = btrfs_init_inode_security(trans, inode, dir);
+ +      err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
         if (err)
                 goto out_fail;
   
@@@ -5265,6 -5295,9 +5293,9 @@@ insert
         }
         write_unlock(&em_tree->lock);
   out:
+ 
+       trace_btrfs_get_extent(root, em);
+ 
         if (path)
                 btrfs_free_path(path);
         if (trans) {
@@@ -5748,6 -5781,10 +5779,10 @@@ static void btrfs_endio_direct_read(str
   
         kfree(dip->csums);
         kfree(dip);
+ 
+       /* If we had a csum failure make sure to clear the uptodate flag */
+       if (err)
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
         dio_end_io(bio, err);
   }
   
@@@ -5849,6 -5886,10 +5884,10 @@@ out_done
   
         kfree(dip->csums);
         kfree(dip);
+ 
+       /* If we had an error make sure to clear the uptodate flag */
+       if (err)
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
         dio_end_io(bio, err);
   }
   
@@@ -5922,9 -5963,12 +5961,12 @@@ static inline int __btrfs_submit_dio_bi
                                    __btrfs_submit_bio_start_direct_io,
                                    __btrfs_submit_bio_done);
                 goto err;
-       } else if (!skip_sum)
-               btrfs_lookup_bio_sums_dio(root, inode, bio,
+       } else if (!skip_sum) {
+               ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
                                           file_offset, csums);
+               if (ret)
+                       goto err;
+       }
   
         ret = btrfs_map_bio(root, rw, bio, 0, 1);
   err:
@@@ -5948,6 -5992,7 +5990,7 @@@ static int btrfs_submit_direct_hook(in
         int nr_pages = 0;
         u32 *csums = dip->csums;
         int ret = 0;
+       int write = rw & REQ_WRITE;
   
         bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
         if (!bio)
@@@ -5984,7 -6029,8 +6027,8 @@@
                                 goto out_err;
                         }
   
-                       if (!skip_sum)
+                       /* Write's use the ordered csums */
+                       if (!write && !skip_sum)
                                 csums = csums + nr_pages;
                         start_sector += submit_len >> 9;
                         file_offset += submit_len;
@@@ -6052,7 -6098,8 +6096,8 @@@ static void btrfs_submit_direct(int rw
         }
         dip->csums = NULL;
   
-       if (!skip_sum) {
+       /* Write's use the ordered csum stuff, so we don't need dip->csums */
+       if (!write && !skip_sum) {
                 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
                 if (!dip->csums) {
                         kfree(dip);
@@@ -6474,28 -6521,42 +6519,42 @@@ out
         return ret;
   }
   
- static void btrfs_truncate(struct inode *inode)
+ static int btrfs_truncate(struct inode *inode)
   {
         struct btrfs_root *root = BTRFS_I(inode)->root;
         int ret;
+       int err = 0;
         struct btrfs_trans_handle *trans;
         unsigned long nr;
         u64 mask = root->sectorsize - 1;
   
-       if (!S_ISREG(inode->i_mode)) {
-               WARN_ON(1);
-               return;
-       }
- 
         ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
         if (ret)
-               return;
+               return ret;
   
         btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
         btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
   
+       trans = btrfs_start_transaction(root, 5);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+ 
+       btrfs_set_trans_block_group(trans, inode);
+ 
+       ret = btrfs_orphan_add(trans, inode);
+       if (ret) {
+               btrfs_end_transaction(trans, root);
+               return ret;
+       }
+ 
+       nr = trans->blocks_used;
+       btrfs_end_transaction(trans, root);
+       btrfs_btree_balance_dirty(root, nr);
+ 
+       /* Now start a transaction for the truncate */
         trans = btrfs_start_transaction(root, 0);
-       BUG_ON(IS_ERR(trans));
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
         btrfs_set_trans_block_group(trans, inode);
         trans->block_rsv = root->orphan_block_rsv;
   
@@@ -6522,29 -6583,38 +6581,38 @@@
         while (1) {
                 if (!trans) {
                         trans = btrfs_start_transaction(root, 0);
-                       BUG_ON(IS_ERR(trans));
+                       if (IS_ERR(trans))
+                               return PTR_ERR(trans);
                         btrfs_set_trans_block_group(trans, inode);
                         trans->block_rsv = root->orphan_block_rsv;
                 }
   
                 ret = btrfs_block_rsv_check(trans, root,
                                             root->orphan_block_rsv, 0, 5);
-               if (ret) {
-                       BUG_ON(ret != -EAGAIN);
+               if (ret == -EAGAIN) {
                         ret = btrfs_commit_transaction(trans, root);
-                       BUG_ON(ret);
+                       if (ret)
+                               return ret;
                         trans = NULL;
                         continue;
+               } else if (ret) {
+                       err = ret;
+                       break;
                 }
   
                 ret = btrfs_truncate_inode_items(trans, root, inode,
                                                  inode->i_size,
                                                  BTRFS_EXTENT_DATA_KEY);
-               if (ret != -EAGAIN)
+               if (ret != -EAGAIN) {
+                       err = ret;
                         break;
+               }
   
                 ret = btrfs_update_inode(trans, root, inode);
-               BUG_ON(ret);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
   
                 nr = trans->blocks_used;
                 btrfs_end_transaction(trans, root);
@@@ -6554,16 -6624,27 +6622,27 @@@
   
         if (ret == 0 && inode->i_nlink > 0) {
                 ret = btrfs_orphan_del(trans, inode);
-               BUG_ON(ret);
+               if (ret)
+                       err = ret;
+       } else if (ret && inode->i_nlink > 0) {
+               /*
+                * Failed to do the truncate, remove us from the in memory
+                * orphan list.
+                */
+               ret = btrfs_orphan_del(NULL, inode);
         }
   
         ret = btrfs_update_inode(trans, root, inode);
-       BUG_ON(ret);
+       if (ret && !err)
+               err = ret;
   
         nr = trans->blocks_used;
         ret = btrfs_end_transaction_throttle(trans, root);
-       BUG_ON(ret);
+       if (ret && !err)
+               err = ret;
         btrfs_btree_balance_dirty(root, nr);
+ 
+       return err;
   }
   
   /*
@@@ -6630,9 -6711,8 +6709,8 @@@ struct inode *btrfs_alloc_inode(struct 
         ei->index_cnt = (u64)-1;
         ei->last_unlink_trans = 0;
   
-       spin_lock_init(&ei->accounting_lock);
         atomic_set(&ei->outstanding_extents, 0);
-       ei->reserved_extents = 0;
+       atomic_set(&ei->reserved_extents, 0);
   
         ei->ordered_data_close = 0;
         ei->orphan_meta_reserved = 0;
@@@ -6668,7 -6748,7 +6746,7 @@@ void btrfs_destroy_inode(struct inode *
         WARN_ON(!list_empty(&inode->i_dentry));
         WARN_ON(inode->i_data.nrpages);
         WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
-       WARN_ON(BTRFS_I(inode)->reserved_extents);
+       WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
   
         /*
          * This can happen where we create an inode, but somebody else also
@@@ -6760,6 -6840,8 +6838,8 @@@ void btrfs_destroy_cachep(void
                 kmem_cache_destroy(btrfs_transaction_cachep);
         if (btrfs_path_cachep)
                 kmem_cache_destroy(btrfs_path_cachep);
+       if (btrfs_free_space_cachep)
+               kmem_cache_destroy(btrfs_free_space_cachep);
   }
   
   int btrfs_init_cachep(void)
@@@ -6788,6 -6870,12 +6868,12 @@@
         if (!btrfs_path_cachep)
                 goto fail;
   
+       btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache",
+                       sizeof(struct btrfs_free_space), 0,
+                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+       if (!btrfs_free_space_cachep)
+               goto fail;
+ 
         return 0;
   fail:
         btrfs_destroy_cachep();
@@@ -6806,6 -6894,26 +6892,26 @@@ static int btrfs_getattr(struct vfsmoun
         return 0;
   }
   
+ /*
+  * If a file is moved, it will inherit the cow and compression flags of the new
+  * directory.
+  */
+ static void fixup_inode_flags(struct inode *dir, struct inode *inode)
+ {
+       struct btrfs_inode *b_dir = BTRFS_I(dir);
+       struct btrfs_inode *b_inode = BTRFS_I(inode);
+ 
+       if (b_dir->flags & BTRFS_INODE_NODATACOW)
+               b_inode->flags |= BTRFS_INODE_NODATACOW;
+       else
+               b_inode->flags &= ~BTRFS_INODE_NODATACOW;
+ 
+       if (b_dir->flags & BTRFS_INODE_COMPRESS)
+               b_inode->flags |= BTRFS_INODE_COMPRESS;
+       else
+               b_inode->flags &= ~BTRFS_INODE_COMPRESS;
+ }
+ 
   static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                            struct inode *new_dir, struct dentry *new_dentry)
   {
@@@ -6908,11 -7016,12 +7014,12 @@@
                                         old_dentry->d_name.name,
                                         old_dentry->d_name.len);
         } else {
-               btrfs_inc_nlink(old_dentry->d_inode);
-               ret = btrfs_unlink_inode(trans, root, old_dir,
-                                        old_dentry->d_inode,
-                                        old_dentry->d_name.name,
-                                        old_dentry->d_name.len);
+               ret = __btrfs_unlink_inode(trans, root, old_dir,
+                                       old_dentry->d_inode,
+                                       old_dentry->d_name.name,
+                                       old_dentry->d_name.len);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, root, old_inode);
         }
         BUG_ON(ret);
   
@@@ -6939,6 -7048,8 +7046,8 @@@
                 }
         }
   
+       fixup_inode_flags(new_dir, old_inode);
+ 
         ret = btrfs_add_link(trans, new_dir, old_inode,
                              new_dentry->d_name.name,
                              new_dentry->d_name.len, 0, index);
@@@ -7104,7 -7215,7 +7213,7 @@@ static int btrfs_symlink(struct inode *
         if (IS_ERR(inode))
                 goto out_unlock;
   
- -      err = btrfs_init_inode_security(trans, inode, dir);
+ +      err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
         if (err) {
                 drop_inode = 1;
                 goto out_unlock;
@@@ -7340,6 -7451,7 +7449,6 @@@ static const struct address_space_opera
         .writepage      = btrfs_writepage,
         .writepages     = btrfs_writepages,
         .readpages      = btrfs_readpages,
- -      .sync_page      = block_sync_page,
         .direct_IO      = btrfs_direct_IO,
         .invalidatepage = btrfs_invalidatepage,
         .releasepage    = btrfs_releasepage,
@@@ -7355,7 -7467,6 +7464,6 @@@ static const struct address_space_opera
   };
   
   static const struct inode_operations btrfs_file_inode_operations = {
-       .truncate       = btrfs_truncate,
         .getattr        = btrfs_getattr,
         .setattr        = btrfs_setattr,
         .setxattr       = btrfs_setxattr,
diff --combined fs/btrfs/ioctl.c

index d1bace3,6b70e0e..7c07fe2
--- 1/fs/btrfs/ioctl.c
--- 2/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@@ -40,6 -40,7 +40,7 @@@
   #include <linux/xattr.h>
   #include <linux/vmalloc.h>
   #include <linux/slab.h>
+ #include <linux/blkdev.h>
   #include "compat.h"
   #include "ctree.h"
   #include "disk-io.h"
@@@ -138,6 -139,24 +139,24 @@@ static int btrfs_ioctl_getflags(struct 
         return 0;
   }
   
+ static int check_flags(unsigned int flags)
+ {
+       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+                     FS_NOATIME_FL | FS_NODUMP_FL | \
+                     FS_SYNC_FL | FS_DIRSYNC_FL | \
+                     FS_NOCOMP_FL | FS_COMPR_FL | \
+                     FS_NOCOW_FL | FS_COW_FL))
+               return -EOPNOTSUPP;
+ 
+       if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
+               return -EINVAL;
+ 
+       if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
+               return -EINVAL;
+ 
+       return 0;
+ }
+ 
   static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
   {
         struct inode *inode = file->f_path.dentry->d_inode;
@@@ -153,12 -172,11 +172,11 @@@
         if (copy_from_user(&flags, arg, sizeof(flags)))
                 return -EFAULT;
   
-       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
-                     FS_NOATIME_FL | FS_NODUMP_FL | \
-                     FS_SYNC_FL | FS_DIRSYNC_FL))
-               return -EOPNOTSUPP;
+       ret = check_flags(flags);
+       if (ret)
+               return ret;
   
- -      if (!is_owner_or_cap(inode))
+ +      if (!inode_owner_or_capable(inode))
                 return -EACCES;
   
         mutex_lock(&inode->i_mutex);
@@@ -201,6 -219,22 +219,22 @@@
         else
                 ip->flags &= ~BTRFS_INODE_DIRSYNC;
   
+       /*
+        * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
+        * flag may be changed automatically if compression code won't make
+        * things smaller.
+        */
+       if (flags & FS_NOCOMP_FL) {
+               ip->flags &= ~BTRFS_INODE_COMPRESS;
+               ip->flags |= BTRFS_INODE_NOCOMPRESS;
+       } else if (flags & FS_COMPR_FL) {
+               ip->flags |= BTRFS_INODE_COMPRESS;
+               ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+       }
+       if (flags & FS_NOCOW_FL)
+               ip->flags |= BTRFS_INODE_NODATACOW;
+       else if (flags & FS_COW_FL)
+               ip->flags &= ~BTRFS_INODE_NODATACOW;
   
         trans = btrfs_join_transaction(root, 1);
         BUG_ON(IS_ERR(trans));
@@@ -213,9 -247,11 +247,11 @@@
         btrfs_end_transaction(trans, root);
   
         mnt_drop_write(file->f_path.mnt);
+ 
+       ret = 0;
    out_unlock:
         mutex_unlock(&inode->i_mutex);
-       return 0;
+       return ret;
   }
   
   static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
@@@ -225,6 -261,49 +261,49 @@@
         return put_user(inode->i_generation, arg);
   }
   
+ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
+ {
+       struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_device *device;
+       struct request_queue *q;
+       struct fstrim_range range;
+       u64 minlen = ULLONG_MAX;
+       u64 num_devices = 0;
+       int ret;
+ 
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+ 
+       mutex_lock(&fs_info->fs_devices->device_list_mutex);
+       list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
+               if (!device->bdev)
+                       continue;
+               q = bdev_get_queue(device->bdev);
+               if (blk_queue_discard(q)) {
+                       num_devices++;
+                       minlen = min((u64)q->limits.discard_granularity,
+                                    minlen);
+               }
+       }
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+       if (!num_devices)
+               return -EOPNOTSUPP;
+ 
+       if (copy_from_user(&range, arg, sizeof(range)))
+               return -EFAULT;
+ 
+       range.minlen = max(range.minlen, minlen);
+       ret = btrfs_trim_fs(root, &range);
+       if (ret < 0)
+               return ret;
+ 
+       if (copy_to_user(arg, &range, sizeof(range)))
+               return -EFAULT;
+ 
+       return 0;
+ }
+ 
   static noinline int create_subvol(struct btrfs_root *root,
                                   struct dentry *dentry,
                                   char *name, int namelen,
@@@ -409,7 -488,9 +488,9 @@@ static int create_snapshot(struct btrfs
         if (ret)
                 goto fail;
   
-       btrfs_orphan_cleanup(pending_snapshot->snap);
+       ret = btrfs_orphan_cleanup(pending_snapshot->snap);
+       if (ret)
+               goto fail;
   
         parent = dget_parent(dentry);
         inode = btrfs_lookup_dentry(parent->d_inode, dentry);
@@@ -1077,7 -1158,7 +1158,7 @@@ static noinline int btrfs_ioctl_subvol_
         if (flags & ~BTRFS_SUBVOL_RDONLY)
                 return -EOPNOTSUPP;
   
- -      if (!is_owner_or_cap(inode))
+ +      if (!inode_owner_or_capable(inode))
                 return -EACCES;
   
         down_write(&root->fs_info->subvol_sem);
@@@ -2348,12 -2429,15 +2429,15 @@@ static noinline long btrfs_ioctl_start_
         struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
         struct btrfs_trans_handle *trans;
         u64 transid;
+       int ret;
   
         trans = btrfs_start_transaction(root, 0);
         if (IS_ERR(trans))
                 return PTR_ERR(trans);
         transid = trans->transid;
-       btrfs_commit_transaction_async(trans, root, 0);
+       ret = btrfs_commit_transaction_async(trans, root, 0);
+       if (ret)
+               return ret;
   
         if (argp)
                 if (copy_to_user(argp, &transid, sizeof(transid)))
@@@ -2388,6 -2472,8 +2472,8 @@@ long btrfs_ioctl(struct file *file, uns
                 return btrfs_ioctl_setflags(file, argp);
         case FS_IOC_GETVERSION:
                 return btrfs_ioctl_getversion(file, argp);
+       case FITRIM:
+               return btrfs_ioctl_fitrim(file, argp);
         case BTRFS_IOC_SNAP_CREATE:
                 return btrfs_ioctl_snap_create(file, argp, 0);
         case BTRFS_IOC_SNAP_CREATE_V2:
diff --combined fs/btrfs/volumes.c

index 9d554e8,8b9fb8c..309a57b
--- 1/fs/btrfs/volumes.c
--- 2/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@@ -33,17 -33,6 +33,6 @@@
   #include "volumes.h"
   #include "async-thread.h"
   
- struct map_lookup {
-       u64 type;
-       int io_align;
-       int io_width;
-       int stripe_len;
-       int sector_size;
-       int num_stripes;
-       int sub_stripes;
-       struct btrfs_bio_stripe stripes[];
- };
- 
   static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 struct btrfs_device *device);
@@@ -162,6 -151,7 +151,6 @@@ static noinline int run_scheduled_bios(
         struct bio *cur;
         int again = 0;
         unsigned long num_run;
- -      unsigned long num_sync_run;
         unsigned long batch_run = 0;
         unsigned long limit;
         unsigned long last_waited = 0;
@@@ -172,6 -162,11 +161,6 @@@
         limit = btrfs_async_submit_limit(fs_info);
         limit = limit * 2 / 3;
   
- -      /* we want to make sure that every time we switch from the sync
- -       * list to the normal list, we unplug
- -       */
- -      num_sync_run = 0;
- -
   loop:
         spin_lock(&device->io_lock);
   
@@@ -217,6 -212,15 +206,6 @@@ loop_lock
   
         spin_unlock(&device->io_lock);
   
- -      /*
- -       * if we're doing the regular priority list, make sure we unplug
- -       * for any high prio bios we've sent down
- -       */
- -      if (pending_bios == &device->pending_bios && num_sync_run > 0) {
- -              num_sync_run = 0;
- -              blk_run_backing_dev(bdi, NULL);
- -      }
- -
         while (pending) {
   
                 rmb();
@@@ -244,11 -248,19 +233,11 @@@
   
                 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
   
- -              if (cur->bi_rw & REQ_SYNC)
- -                      num_sync_run++;
- -
                 submit_bio(cur->bi_rw, cur);
                 num_run++;
                 batch_run++;
- -              if (need_resched()) {
- -                      if (num_sync_run) {
- -                              blk_run_backing_dev(bdi, NULL);
- -                              num_sync_run = 0;
- -                      }
+ +              if (need_resched())
                         cond_resched();
- -              }
   
                 /*
                  * we made progress, there is more work to do and the bdi
@@@ -281,8 -293,13 +270,8 @@@
                                  * against it before looping
                                  */
                                 last_waited = ioc->last_waited;
- -                              if (need_resched()) {
- -                                      if (num_sync_run) {
- -                                              blk_run_backing_dev(bdi, NULL);
- -                                              num_sync_run = 0;
- -                                      }
+ +                              if (need_resched())
                                         cond_resched();
- -                              }
                                 continue;
                         }
                         spin_lock(&device->io_lock);
@@@ -295,6 -312,22 +284,6 @@@
                 }
         }
   
- -      if (num_sync_run) {
- -              num_sync_run = 0;
- -              blk_run_backing_dev(bdi, NULL);
- -      }
- -      /*
- -       * IO has already been through a long path to get here.  Checksumming,
- -       * async helper threads, perhaps compression.  We've done a pretty
- -       * good job of collecting a batch of IO and should just unplug
- -       * the device right away.
- -       *
- -       * This will help anyone who is waiting on the IO, they might have
- -       * already unplugged, but managed to do so before the bio they
- -       * cared about found its way down here.
- -       */
- -      blk_run_backing_dev(bdi, NULL);
- -
         cond_resched();
         if (again)
                 goto loop;
@@@ -1879,6 -1912,8 +1868,8 @@@ static int btrfs_relocate_chunk(struct 
   
         BUG_ON(ret);
   
+       trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
+ 
         if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
                 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
                 BUG_ON(ret);
@@@ -2606,6 -2641,8 +2597,8 @@@ static int __btrfs_alloc_chunk(struct b
         *num_bytes = chunk_bytes_by_type(type, calc_size,
                                          map->num_stripes, sub_stripes);
   
+       trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes);
+ 
         em = alloc_extent_map(GFP_NOFS);
         if (!em) {
                 ret = -ENOMEM;
@@@ -2714,6 -2751,7 +2707,7 @@@ static int __finish_chunk_alloc(struct 
                                              item_size);
                 BUG_ON(ret);
         }
+ 
         kfree(chunk);
         return 0;
   }
@@@ -2911,14 -2949,17 +2905,17 @@@ static int find_live_mirror(struct map_
   static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
                              u64 logical, u64 *length,
                              struct btrfs_multi_bio **multi_ret,
- -                           int mirror_num, struct page *unplug_page)
+ +                           int mirror_num)
   {
         struct extent_map *em;
         struct map_lookup *map;
         struct extent_map_tree *em_tree = &map_tree->map_tree;
         u64 offset;
         u64 stripe_offset;
+       u64 stripe_end_offset;
         u64 stripe_nr;
+       u64 stripe_nr_orig;
+       u64 stripe_nr_end;
         int stripes_allocated = 8;
         int stripes_required = 1;
         int stripe_index;
@@@ -2927,7 -2968,7 +2924,7 @@@
         int max_errors = 0;
         struct btrfs_multi_bio *multi = NULL;
   
-       if (multi_ret && !(rw & REQ_WRITE))
+       if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
                 stripes_allocated = 1;
   again:
         if (multi_ret) {
@@@ -2943,6 -2984,11 +2940,6 @@@
         em = lookup_extent_mapping(em_tree, logical, *length);
         read_unlock(&em_tree->lock);
   
- -      if (!em && unplug_page) {
- -              kfree(multi);
- -              return 0;
- -      }
- -
         if (!em) {
                 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
                        (unsigned long long)logical,
@@@ -2968,7 -3014,15 +2965,15 @@@
                         max_errors = 1;
                 }
         }
-       if (multi_ret && (rw & REQ_WRITE) &&
+       if (rw & REQ_DISCARD) {
+               if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+                                BTRFS_BLOCK_GROUP_RAID1 |
+                                BTRFS_BLOCK_GROUP_DUP |
+                                BTRFS_BLOCK_GROUP_RAID10)) {
+                       stripes_required = map->num_stripes;
+               }
+       }
+       if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
             stripes_allocated < stripes_required) {
                 stripes_allocated = map->num_stripes;
                 free_extent_map(em);
@@@ -2988,23 -3042,37 +2993,37 @@@
         /* stripe_offset is the offset of this block in its stripe*/
         stripe_offset = offset - stripe_offset;
   
-       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
-                        BTRFS_BLOCK_GROUP_RAID10 |
-                        BTRFS_BLOCK_GROUP_DUP)) {
+       if (rw & REQ_DISCARD)
+               *length = min_t(u64, em->len - offset, *length);
+       else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+                             BTRFS_BLOCK_GROUP_RAID1 |
+                             BTRFS_BLOCK_GROUP_RAID10 |
+                             BTRFS_BLOCK_GROUP_DUP)) {
                 /* we limit the length of each bio to what fits in a stripe */
                 *length = min_t(u64, em->len - offset,
-                             map->stripe_len - stripe_offset);
+                               map->stripe_len - stripe_offset);
         } else {
                 *length = em->len - offset;
         }
   
- -      if (!multi_ret && !unplug_page)
+ +      if (!multi_ret)
                 goto out;
   
         num_stripes = 1;
         stripe_index = 0;
-       if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-               if (rw & REQ_WRITE)
+       stripe_nr_orig = stripe_nr;
+       stripe_nr_end = (offset + *length + map->stripe_len - 1) &
+                       (~(map->stripe_len - 1));
+       do_div(stripe_nr_end, map->stripe_len);
+       stripe_end_offset = stripe_nr_end * map->stripe_len -
+                           (offset + *length);
+       if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+               if (rw & REQ_DISCARD)
+                       num_stripes = min_t(u64, map->num_stripes,
+                                           stripe_nr_end - stripe_nr_orig);
+               stripe_index = do_div(stripe_nr, map->num_stripes);
+       } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- -              if (unplug_page || (rw & (REQ_WRITE | REQ_DISCARD)))
++              if (rw & (REQ_WRITE | REQ_DISCARD))
                         num_stripes = map->num_stripes;
                 else if (mirror_num)
                         stripe_index = mirror_num - 1;
@@@ -3015,7 -3083,7 +3034,7 @@@
                 }
   
         } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-               if (rw & REQ_WRITE)
+               if (rw & (REQ_WRITE | REQ_DISCARD))
                         num_stripes = map->num_stripes;
                 else if (mirror_num)
                         stripe_index = mirror_num - 1;
@@@ -3026,8 -3094,12 +3045,12 @@@
                 stripe_index = do_div(stripe_nr, factor);
                 stripe_index *= map->sub_stripes;
   
- -              if (unplug_page || (rw & REQ_WRITE))
+ +              if (rw & REQ_WRITE)
                         num_stripes = map->sub_stripes;
+               else if (rw & REQ_DISCARD)
+                       num_stripes = min_t(u64, map->sub_stripes *
+                                           (stripe_nr_end - stripe_nr_orig),
+                                           map->num_stripes);
                 else if (mirror_num)
                         stripe_index += mirror_num - 1;
                 else {
@@@ -3045,12 -3117,115 +3068,101 @@@
         }
         BUG_ON(stripe_index >= map->num_stripes);
   
-       for (i = 0; i < num_stripes; i++) {
-               multi->stripes[i].physical =
-                       map->stripes[stripe_index].physical +
-                       stripe_offset + stripe_nr * map->stripe_len;
-               multi->stripes[i].dev = map->stripes[stripe_index].dev;
-               stripe_index++;
+       if (rw & REQ_DISCARD) {
+               for (i = 0; i < num_stripes; i++) {
+                       multi->stripes[i].physical =
+                               map->stripes[stripe_index].physical +
+                               stripe_offset + stripe_nr * map->stripe_len;
+                       multi->stripes[i].dev = map->stripes[stripe_index].dev;
+ 
+                       if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+                               u64 stripes;
+                               u32 last_stripe = 0;
+                               int j;
+ 
+                               div_u64_rem(stripe_nr_end - 1,
+                                           map->num_stripes,
+                                           &last_stripe);
+ 
+                               for (j = 0; j < map->num_stripes; j++) {
+                                       u32 test;
+ 
+                                       div_u64_rem(stripe_nr_end - 1 - j,
+                                                   map->num_stripes, &test);
+                                       if (test == stripe_index)
+                                               break;
+                               }
+                               stripes = stripe_nr_end - 1 - j;
+                               do_div(stripes, map->num_stripes);
+                               multi->stripes[i].length = map->stripe_len *
+                                       (stripes - stripe_nr + 1);
+ 
+                               if (i == 0) {
+                                       multi->stripes[i].length -=
+                                               stripe_offset;
+                                       stripe_offset = 0;
+                               }
+                               if (stripe_index == last_stripe)
+                                       multi->stripes[i].length -=
+                                               stripe_end_offset;
+                       } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+                               u64 stripes;
+                               int j;
+                               int factor = map->num_stripes /
+                                            map->sub_stripes;
+                               u32 last_stripe = 0;
+ 
+                               div_u64_rem(stripe_nr_end - 1,
+                                           factor, &last_stripe);
+                               last_stripe *= map->sub_stripes;
+ 
+                               for (j = 0; j < factor; j++) {
+                                       u32 test;
+ 
+                                       div_u64_rem(stripe_nr_end - 1 - j,
+                                                   factor, &test);
+ 
+                                       if (test ==
+                                           stripe_index / map->sub_stripes)
+                                               break;
+                               }
+                               stripes = stripe_nr_end - 1 - j;
+                               do_div(stripes, factor);
+                               multi->stripes[i].length = map->stripe_len *
+                                       (stripes - stripe_nr + 1);
+ 
+                               if (i < map->sub_stripes) {
+                                       multi->stripes[i].length -=
+                                               stripe_offset;
+                                       if (i == map->sub_stripes - 1)
+                                               stripe_offset = 0;
+                               }
+                               if (stripe_index >= last_stripe &&
+                                   stripe_index <= (last_stripe +
+                                                    map->sub_stripes - 1)) {
+                                       multi->stripes[i].length -=
+                                               stripe_end_offset;
+                               }
+                       } else
+                               multi->stripes[i].length = *length;
+ 
+                       stripe_index++;
+                       if (stripe_index == map->num_stripes) {
+                               /* This could only happen for RAID0/10 */
+                               stripe_index = 0;
+                               stripe_nr++;
+                       }
+               }
+       } else {
+               for (i = 0; i < num_stripes; i++) {
- -                      if (unplug_page) {
- -                              struct btrfs_device *device;
- -                              struct backing_dev_info *bdi;
- -
- -                              device = map->stripes[stripe_index].dev;
- -                              if (device->bdev) {
- -                                      bdi = blk_get_backing_dev_info(device->
- -                                                                     bdev);
- -                                      if (bdi->unplug_io_fn)
- -                                              bdi->unplug_io_fn(bdi,
- -                                                                unplug_page);
- -                              }
- -                      } else {
- -                              multi->stripes[i].physical =
- -                                      map->stripes[stripe_index].physical +
- -                                      stripe_offset +
- -                                      stripe_nr * map->stripe_len;
- -                              multi->stripes[i].dev =
- -                                      map->stripes[stripe_index].dev;
- -                      }
++                      multi->stripes[i].physical =
++                              map->stripes[stripe_index].physical +
++                              stripe_offset +
++                              stripe_nr * map->stripe_len;
++                      multi->stripes[i].dev =
++                              map->stripes[stripe_index].dev;
+                       stripe_index++;
+               }
         }
         if (multi_ret) {
                 *multi_ret = multi;
@@@ -3067,7 -3242,7 +3179,7 @@@ int btrfs_map_block(struct btrfs_mappin
                       struct btrfs_multi_bio **multi_ret, int mirror_num)
   {
         return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
- -                               mirror_num, NULL);
+ +                               mirror_num);
   }
   
   int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@@ -3133,6 -3308,14 +3245,6 @@@
   
         free_extent_map(em);
         return 0;
- -}
- -
- -int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
- -                    u64 logical, struct page *page)
- -{
- -      u64 length = PAGE_CACHE_SIZE;
- -      return __btrfs_map_block(map_tree, READ, logical, &length,
- -                               NULL, 0, page);
   }
   
   static void end_bio_multi_stripe(struct bio *bio, int err)
diff --combined fs/btrfs/xattr.c

index d779cef,e5d22f2..a5303b8
--- 1/fs/btrfs/xattr.c
--- 2/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@@ -242,6 -242,8 +242,8 @@@ ssize_t btrfs_listxattr(struct dentry *
                         break;
   
                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+               if (verify_dir_item(root, leaf, di))
+                       continue;
   
                 name_len = btrfs_dir_name_len(leaf, di);
                 total_size += name_len + 1;
@@@ -370,8 -372,7 +372,8 @@@ int btrfs_removexattr(struct dentry *de
   }
   
   int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
- -                            struct inode *inode, struct inode *dir)
+ +                            struct inode *inode, struct inode *dir,
+ +                            const struct qstr *qstr)
   {
         int err;
         size_t len;
@@@ -379,8 -380,7 +381,8 @@@
         char *suffix;
         char *name;
   
- -      err = security_inode_init_security(inode, dir, &suffix, &value, &len);
+ +      err = security_inode_init_security(inode, dir, qstr, &suffix, &value,
+ +                                         &len);
         if (err) {
                 if (err == -EOPNOTSUPP)
                         return 0;
diff --combined include/linux/fs.h

index b677bd7,de9dd81..52f283c
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -102,9 -102,6 +102,9 @@@ struct inodes_stat_t 
   /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
   #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
   
+ +/* File is opened with O_PATH; almost nothing can be done with it */
+ +#define FMODE_PATH            ((__force fmode_t)0x4000)
+ +
   /* File was opened by fanotify and shouldn't generate fanotify events */
   #define FMODE_NONOTIFY                ((__force fmode_t)0x1000000)
   
@@@ -138,10 -135,16 +138,10 @@@
    *                    block layer could (in theory) choose to ignore this
    *                    request if it runs into resource problems.
    * WRITE              A normal async write. Device will be plugged.
- - * WRITE_SYNC_PLUG    Synchronous write. Identical to WRITE, but passes down
+ + * WRITE_SYNC         Synchronous write. Identical to WRITE, but passes down
    *                    the hint that someone will be waiting on this IO
- - *                    shortly. The device must still be unplugged explicitly,
- - *                    WRITE_SYNC_PLUG does not do this as we could be
- - *                    submitting more writes before we actually wait on any
- - *                    of them.
- - * WRITE_SYNC         Like WRITE_SYNC_PLUG, but also unplugs the device
- - *                    immediately after submission. The write equivalent
- - *                    of READ_SYNC.
- - * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
+ + *                    shortly. The write equivalent of READ_SYNC.
+ + * WRITE_ODIRECT      Special case write for O_DIRECT only.
    * WRITE_FLUSH                Like WRITE_SYNC but with preceding cache flush.
    * WRITE_FUA          Like WRITE_SYNC but data is guaranteed to be on
    *                    non-volatile media on completion.
@@@ -157,14 -160,18 +157,14 @@@
   #define WRITE                 RW_MASK
   #define READA                 RWA_MASK
   
- -#define READ_SYNC             (READ | REQ_SYNC | REQ_UNPLUG)
+ +#define READ_SYNC             (READ | REQ_SYNC)
   #define READ_META             (READ | REQ_META)
- -#define WRITE_SYNC_PLUG               (WRITE | REQ_SYNC | REQ_NOIDLE)
- -#define WRITE_SYNC            (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
- -#define WRITE_ODIRECT_PLUG    (WRITE | REQ_SYNC)
+ +#define WRITE_SYNC            (WRITE | REQ_SYNC | REQ_NOIDLE)
+ +#define WRITE_ODIRECT         (WRITE | REQ_SYNC)
   #define WRITE_META            (WRITE | REQ_META)
- -#define WRITE_FLUSH           (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
- -                               REQ_FLUSH)
- -#define WRITE_FUA             (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
- -                               REQ_FUA)
- -#define WRITE_FLUSH_FUA               (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
- -                               REQ_FLUSH | REQ_FUA)
+ +#define WRITE_FLUSH           (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
+ +#define WRITE_FUA             (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
+ +#define WRITE_FLUSH_FUA               (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
   
   #define SEL_IN                1
   #define SEL_OUT               2
@@@ -357,6 -364,8 +357,8 @@@
   #define FS_TOPDIR_FL                  0x00020000 /* Top of directory hierarchies*/
   #define FS_EXTENT_FL                  0x00080000 /* Extents */
   #define FS_DIRECTIO_FL                        0x00100000 /* Use direct i/o */
+ #define FS_NOCOW_FL                   0x00800000 /* Do not cow file */
+ #define FS_COW_FL                     0x02000000 /* Cow file */
   #define FS_RESERVED_FL                        0x80000000 /* reserved for ext2 lib */
   
   #define FS_FL_USER_VISIBLE            0x0003DFFF /* User visible flags */
@@@ -576,6 -585,7 +578,6 @@@ typedef int (*read_actor_t)(read_descri
   struct address_space_operations {
         int (*writepage)(struct page *page, struct writeback_control *wbc);
         int (*readpage)(struct file *, struct page *);
- -      void (*sync_page)(struct page *);
   
         /* Write back some dirty pages from this mapping. */
         int (*writepages)(struct address_space *, struct writeback_control *);
@@@ -651,9 -661,9 +653,9 @@@ struct address_space 
   
   struct block_device {
         dev_t                   bd_dev;  /* not a kdev_t - it's a search key */
+ +      int                     bd_openers;
         struct inode *          bd_inode;       /* will die */
         struct super_block *    bd_super;
- -      int                     bd_openers;
         struct mutex            bd_mutex;       /* open/close mutex */
         struct list_head        bd_inodes;
         void *                  bd_claiming;
@@@ -790,7 -800,8 +792,7 @@@ struct inode 
   #endif
   
   #ifdef CONFIG_IMA
- -      /* protected by i_lock */
- -      unsigned int            i_readcount; /* struct files open RO */
+ +      atomic_t                i_readcount; /* struct files open RO */
   #endif
         atomic_t                i_writecount;
   #ifdef CONFIG_SECURITY
@@@ -969,13 -980,6 +971,13 @@@ struct file 
   #endif
   };
   
+ +struct file_handle {
+ +      __u32 handle_bytes;
+ +      int handle_type;
+ +      /* file identifier */
+ +      unsigned char f_handle[0];
+ +};
+ +
   #define get_file(x)   atomic_long_inc(&(x)->f_count)
   #define fput_atomic(x)        atomic_long_add_unless(&(x)->f_count, -1, 1)
   #define file_count(x) atomic_long_read(&(x)->f_count)
@@@ -1399,7 -1403,6 +1401,7 @@@ struct super_block 
         wait_queue_head_t       s_wait_unfrozen;
   
         char s_id[32];                          /* Informational name */
+ +      u8 s_uuid[16];                          /* UUID */
   
         void                    *s_fs_info;     /* Filesystem private info */
         fmode_t                 s_mode;
@@@ -1446,13 -1449,8 +1448,13 @@@ enum 
   #define put_fs_excl() atomic_dec(&current->fs_excl)
   #define has_fs_excl() atomic_read(&current->fs_excl)
   
- -#define is_owner_or_cap(inode)        \
- -      ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER))
+ +/*
+ + * until VFS tracks user namespaces for inodes, just make all files
+ + * belong to init_user_ns
+ + */
+ +extern struct user_namespace init_user_ns;
+ +#define inode_userns(inode) (&init_user_ns)
+ +extern bool inode_owner_or_capable(const struct inode *inode);
   
   /* not quite ready to be deprecated, but... */
   extern void lock_super(struct super_block *);
@@@ -1625,8 -1623,6 +1627,8 @@@ struct super_operations 
         void (*umount_begin) (struct super_block *);
   
         int (*show_options)(struct seq_file *, struct vfsmount *);
+ +      int (*show_devname)(struct seq_file *, struct vfsmount *);
+ +      int (*show_path)(struct seq_file *, struct vfsmount *);
         int (*show_stats)(struct seq_file *, struct vfsmount *);
   #ifdef CONFIG_QUOTA
         ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
@@@ -1636,7 -1632,7 +1638,7 @@@
   };
   
   /*
- - * Inode state bits.  Protected by inode_lock.
+ + * Inode state bits.  Protected by inode->i_lock
    *
    * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
    * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
@@@ -1790,6 -1786,8 +1792,6 @@@ int sync_inode_metadata(struct inode *i
   struct file_system_type {
         const char *name;
         int fs_flags;
- -      int (*get_sb) (struct file_system_type *, int,
- -                     const char *, void *, struct vfsmount *);
         struct dentry *(*mount) (struct file_system_type *, int,
                        const char *, void *);
         void (*kill_sb) (struct super_block *);
@@@ -1812,12 -1810,24 +1814,12 @@@ extern struct dentry *mount_ns(struct f
   extern struct dentry *mount_bdev(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data,
         int (*fill_super)(struct super_block *, void *, int));
- -extern int get_sb_bdev(struct file_system_type *fs_type,
- -      int flags, const char *dev_name, void *data,
- -      int (*fill_super)(struct super_block *, void *, int),
- -      struct vfsmount *mnt);
   extern struct dentry *mount_single(struct file_system_type *fs_type,
         int flags, void *data,
         int (*fill_super)(struct super_block *, void *, int));
- -extern int get_sb_single(struct file_system_type *fs_type,
- -      int flags, void *data,
- -      int (*fill_super)(struct super_block *, void *, int),
- -      struct vfsmount *mnt);
   extern struct dentry *mount_nodev(struct file_system_type *fs_type,
         int flags, void *data,
         int (*fill_super)(struct super_block *, void *, int));
- -extern int get_sb_nodev(struct file_system_type *fs_type,
- -      int flags, void *data,
- -      int (*fill_super)(struct super_block *, void *, int),
- -      struct vfsmount *mnt);
   void generic_shutdown_super(struct super_block *sb);
   void kill_block_super(struct super_block *sb);
   void kill_anon_super(struct super_block *sb);
@@@ -1833,6 -1843,7 +1835,6 @@@ extern struct dentry *mount_pseudo(stru
         const struct super_operations *ops,
         const struct dentry_operations *dops,
         unsigned long);
- -extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
   
   static inline void sb_mark_dirty(struct super_block *sb)
   {
@@@ -1865,8 -1876,6 +1867,8 @@@ extern void drop_collected_mounts(struc
   extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
                           struct vfsmount *);
   extern int vfs_statfs(struct path *, struct kstatfs *);
+ +extern int user_statfs(const char __user *, struct kstatfs *);
+ +extern int fd_statfs(int, struct kstatfs *);
   extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
   extern int freeze_super(struct super_block *super);
   extern int thaw_super(struct super_block *super);
@@@ -1983,8 -1992,6 +1985,8 @@@ extern int do_fallocate(struct file *fi
   extern long do_sys_open(int dfd, const char __user *filename, int flags,
                         int mode);
   extern struct file *filp_open(const char *, int, int);
+ +extern struct file *file_open_root(struct dentry *, struct vfsmount *,
+ +                                 const char *, int);
   extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
                                  const struct cred *);
   extern int filp_close(struct file *, fl_owner_t id);
@@@ -2195,31 -2202,15 +2197,31 @@@ static inline void allow_write_access(s
         if (file)
                 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
   }
+ +#ifdef CONFIG_IMA
+ +static inline void i_readcount_dec(struct inode *inode)
+ +{
+ +      BUG_ON(!atomic_read(&inode->i_readcount));
+ +      atomic_dec(&inode->i_readcount);
+ +}
+ +static inline void i_readcount_inc(struct inode *inode)
+ +{
+ +      atomic_inc(&inode->i_readcount);
+ +}
+ +#else
+ +static inline void i_readcount_dec(struct inode *inode)
+ +{
+ +      return;
+ +}
+ +static inline void i_readcount_inc(struct inode *inode)
+ +{
+ +      return;
+ +}
+ +#endif
   extern int do_pipe_flags(int *, int);
   extern struct file *create_read_pipe(struct file *f, int flags);
   extern struct file *create_write_pipe(int flags);
   extern void free_write_pipe(struct file *);
   
- -extern struct file *do_filp_open(int dfd, const char *pathname,
- -              int open_flag, int mode, int acc_mode);
- -extern int may_open(struct path *, int, int);
- -
   extern int kernel_read(struct file *, loff_t, char *, unsigned long);
   extern struct file * open_exec(const char *);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 28 Mar 2011 22:31:05 +0000 (15:31 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 28 Mar 2011 22:31:05 +0000 (15:31 -0700)
		1	2
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent_io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/xattr.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history