Btrfs: Back port to 2.6.18-el kernels
authorChris Mason <chris.mason@oracle.com>
Tue, 18 Dec 2007 21:15:09 +0000 (16:15 -0500)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:03:58 +0000 (11:03 -0400)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent_map.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/super.c
fs/btrfs/sysfs.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h

index 32b2446..2559f5d 100644 (file)
@@ -19,6 +19,7 @@
 #ifndef __BTRFS__
 #define __BTRFS__
 
+#include <linux/version.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/fs.h>
@@ -334,7 +335,11 @@ struct btrfs_fs_info {
        struct list_head trans_list;
        struct list_head hashers;
        struct list_head dead_roots;
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+       struct work_struct trans_work;
+#else
        struct delayed_work trans_work;
+#endif
        struct kobject super_kobj;
        struct completion kobj_unregister;
        int do_barriers;
@@ -916,6 +921,14 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) {
        ((unsigned long)(btrfs_leaf_data(leaf) + \
        btrfs_item_offset_nr(leaf, slot)))
 
+static inline struct dentry *fdentry(struct file *file) {
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+       return file->f_dentry;
+#else
+       return file->f_path.dentry;
+#endif
+}
+
 /* extent-tree.c */
 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
                                  struct btrfs_path *count_path,
index 7213012..ebb2db6 100644 (file)
@@ -593,8 +593,11 @@ struct btrfs_root *open_ctree(struct super_block *sb)
        fs_info->do_barriers = 1;
        fs_info->closing = 0;
        fs_info->total_pinned = 0;
-
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+       INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
+#else
        INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
+#endif
        BTRFS_I(fs_info->btree_inode)->root = tree_root;
        memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
               sizeof(struct btrfs_key));
index 8326a18..0d1e59a 100644 (file)
@@ -1585,8 +1585,18 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
 
 static int submit_one_bio(int rw, struct bio *bio)
 {
+       u64 maxsector;
        int ret = 0;
+
        bio_get(bio);
+
+        maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
+       if (maxsector < bio->bi_sector) {
+               printk("sector too large max %Lu got %llu\n", maxsector,
+                       (unsigned long long)bio->bi_sector);
+               WARN_ON(1);
+       }
+
        submit_bio(rw, bio);
        if (bio_flagged(bio, BIO_EOPNOTSUPP))
                ret = -EOPNOTSUPP;
@@ -1678,8 +1688,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree,
 
        while (cur <= end) {
                if (cur >= last_byte) {
+                       char *userpage;
                        iosize = PAGE_CACHE_SIZE - page_offset;
-                       zero_user_page(page, page_offset, iosize, KM_USER0);
+                       userpage = kmap_atomic(page, KM_USER0);
+                       memset(userpage + page_offset, 0, iosize);
+                       flush_dcache_page(page);
+                       kunmap_atomic(userpage, KM_USER0);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            GFP_NOFS);
                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
@@ -1707,7 +1721,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree,
 
                /* we've found a hole, just zero and go on */
                if (block_start == EXTENT_MAP_HOLE) {
-                       zero_user_page(page, page_offset, iosize, KM_USER0);
+                       char *userpage;
+                       userpage = kmap_atomic(page, KM_USER0);
+                       memset(userpage + page_offset, 0, iosize);
+                       flush_dcache_page(page);
+                       kunmap_atomic(userpage, KM_USER0);
+
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            GFP_NOFS);
                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
@@ -1804,9 +1823,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        }
 
        if (page->index == end_index) {
+               char *userpage;
+
                size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
-               zero_user_page(page, offset,
-                              PAGE_CACHE_SIZE - offset, KM_USER0);
+
+               userpage = kmap_atomic(page, KM_USER0);
+               memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
+               flush_dcache_page(page);
+               kunmap_atomic(userpage, KM_USER0);
        }
 
        set_page_extent_mapped(page);
@@ -1921,6 +1945,129 @@ done:
        return 0;
 }
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+
+/* Taken directly from 2.6.23 for 2.6.18 back port */
+typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
+                                void *data);
+
+/**
+ * write_cache_pages - walk the list of dirty pages of the given address space
+ * and write all of them.
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ * @writepage: function called for each page
+ * @data: data passed to writepage function
+ *
+ * If a page is already under I/O, write_cache_pages() skips it, even
+ * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them.  If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ */
+static int write_cache_pages(struct address_space *mapping,
+                     struct writeback_control *wbc, writepage_t writepage,
+                     void *data)
+{
+       struct backing_dev_info *bdi = mapping->backing_dev_info;
+       int ret = 0;
+       int done = 0;
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;            /* Inclusive */
+       int scanned = 0;
+       int range_whole = 0;
+
+       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+               wbc->encountered_congestion = 1;
+               return 0;
+       }
+
+       pagevec_init(&pvec, 0);
+       if (wbc->range_cyclic) {
+               index = mapping->writeback_index; /* Start from prev offset */
+               end = -1;
+       } else {
+               index = wbc->range_start >> PAGE_CACHE_SHIFT;
+               end = wbc->range_end >> PAGE_CACHE_SHIFT;
+               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+                       range_whole = 1;
+               scanned = 1;
+       }
+retry:
+       while (!done && (index <= end) &&
+              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                             PAGECACHE_TAG_DIRTY,
+                                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+               unsigned i;
+
+               scanned = 1;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       /*
+                        * At this point we hold neither mapping->tree_lock nor
+                        * lock on the page itself: the page may be truncated or
+                        * invalidated (changing page->mapping to NULL), or even
+                        * swizzled back from swapper_space to tmpfs file
+                        * mapping
+                        */
+                       lock_page(page);
+
+                       if (unlikely(page->mapping != mapping)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (!wbc->range_cyclic && page->index > end) {
+                               done = 1;
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (wbc->sync_mode != WB_SYNC_NONE)
+                               wait_on_page_writeback(page);
+
+                       if (PageWriteback(page) ||
+                           !clear_page_dirty_for_io(page)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       ret = (*writepage)(page, wbc, data);
+
+                       if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
+                               unlock_page(page);
+                               ret = 0;
+                       }
+                       if (ret || (--(wbc->nr_to_write) <= 0))
+                               done = 1;
+                       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                               wbc->encountered_congestion = 1;
+                               done = 1;
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       if (!scanned && !done) {
+               /*
+                * We hit the last page and there is more work to be done: wrap
+                * back to the start of the file
+                */
+               scanned = 1;
+               index = 0;
+               goto retry;
+       }
+       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+               mapping->writeback_index = index;
+       return ret;
+}
+#endif
+
 int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                          get_extent_t *get_extent,
                          struct writeback_control *wbc)
@@ -1945,18 +2092,20 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
        ret = __extent_writepage(page, wbc, &epd);
 
        write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
-       if (epd.bio)
+       if (epd.bio) {
                submit_one_bio(WRITE, epd.bio);
+       }
        return ret;
 }
 EXPORT_SYMBOL(extent_write_full_page);
 
+
 int extent_writepages(struct extent_map_tree *tree,
                      struct address_space *mapping,
                      get_extent_t *get_extent,
                      struct writeback_control *wbc)
 {
-       int ret;
+       int ret = 0;
        struct extent_page_data epd = {
                .bio = NULL,
                .tree = tree,
@@ -1964,8 +2113,9 @@ int extent_writepages(struct extent_map_tree *tree,
        };
 
        ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
-       if (epd.bio)
+       if (epd.bio) {
                submit_one_bio(WRITE, epd.bio);
+       }
        return ret;
 }
 EXPORT_SYMBOL(extent_writepages);
@@ -2106,7 +2256,9 @@ int extent_prepare_write(struct extent_map_tree *tree,
                        flush_dcache_page(page);
                        kunmap_atomic(kaddr, KM_USER0);
                }
-               if (!isnew && !PageUptodate(page) &&
+               if ((em->block_start != EXTENT_MAP_HOLE &&
+                    em->block_start != EXTENT_MAP_INLINE) && 
+                   !isnew && !PageUptodate(page) &&
                    (block_off_end > to || block_off_start < from) &&
                    !test_range_bit(tree, block_start, cur_end,
                                    EXTENT_UPTODATE, 1)) {
index 62fcd79..461b096 100644 (file)
@@ -231,7 +231,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 {
        int err = 0;
        int i;
-       struct inode *inode = file->f_path.dentry->d_inode;
+       struct inode *inode = fdentry(file)->d_inode;
        struct extent_map *em;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        u64 hint_byte;
@@ -652,7 +652,7 @@ static int prepare_pages(struct btrfs_root *root,
 {
        int i;
        unsigned long index = pos >> PAGE_CACHE_SHIFT;
-       struct inode *inode = file->f_path.dentry->d_inode;
+       struct inode *inode = fdentry(file)->d_inode;
        int err = 0;
        u64 start_pos;
 
@@ -666,7 +666,11 @@ static int prepare_pages(struct btrfs_root *root,
                        err = -ENOMEM;
                        BUG_ON(1);
                }
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+               ClearPageDirty(pages[i]);
+#else
                cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
+#endif
                wait_on_page_writeback(pages[i]);
                set_page_extent_mapped(pages[i]);
                WARN_ON(!PageLocked(pages[i]));
@@ -682,7 +686,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
        ssize_t num_written = 0;
        ssize_t err = 0;
        int ret = 0;
-       struct inode *inode = file->f_path.dentry->d_inode;
+       struct inode *inode = fdentry(file)->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct page **pages = NULL;
        int nrptrs;
@@ -707,7 +711,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                goto out;
        if (count == 0)
                goto out;
-       err = remove_suid(file->f_path.dentry);
+       err = remove_suid(fdentry(file));
        if (err)
                goto out;
        file_update_time(file);
@@ -862,6 +866,9 @@ struct file_operations btrfs_file_operations = {
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
        .splice_read    = generic_file_splice_read,
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+       .sendfile       = generic_file_sendfile,
+#endif
        .write          = btrfs_file_write,
        .mmap           = btrfs_file_mmap,
        .open           = generic_file_open,
index 686dd03..6a7d916 100644 (file)
@@ -532,7 +532,11 @@ err:
                dir->i_size -= name_len * 2;
                dir->i_mtime = dir->i_ctime = CURRENT_TIME;
                btrfs_update_inode(trans, root, dir);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+               dentry->d_inode->i_nlink--;
+#else
                drop_nlink(dentry->d_inode);
+#endif
                ret = btrfs_update_inode(trans, root, dentry->d_inode);
                dir->i_sb->s_dirt = 1;
        }
@@ -1139,7 +1143,7 @@ static unsigned char btrfs_filetype_table[] = {
 
 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-       struct inode *inode = filp->f_path.dentry->d_inode;
+       struct inode *inode = filp->f_dentry->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_item *item;
        struct btrfs_dir_item *di;
@@ -1554,7 +1558,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        if (inode->i_nlink == 0)
                return -ENOENT;
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+       inode->i_nlink++;
+#else
        inc_nlink(inode);
+#endif
        mutex_lock(&root->fs_info->fs_mutex);
        trans = btrfs_start_transaction(root, 1);
 
@@ -1825,6 +1833,9 @@ insert:
        if (ret == -EEXIST) {
                free_extent_map(em);
                em = NULL;
+               if (0 && failed_insert == 1) {
+                       btrfs_drop_extent_cache(inode, start, end);
+               }
                failed_insert++;
                if (failed_insert > 5) {
                        printk("failing to insert %Lu %Lu\n", start, end);
@@ -1942,7 +1953,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
  */
 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 {
-       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       struct inode *inode = fdentry(vma->vm_file)->d_inode;
        unsigned long end;
        loff_t size;
        int ret = -EINVAL;
@@ -2243,7 +2254,7 @@ static unsigned long force_ra(struct address_space *mapping,
 }
 
 int btrfs_defrag_file(struct file *file) {
-       struct inode *inode = file->f_path.dentry->d_inode;
+       struct inode *inode = fdentry(file)->d_inode;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        struct page *page;
        unsigned long last_index;
@@ -2329,7 +2340,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg)
 
 static int btrfs_ioctl_defrag(struct file *file)
 {
-       struct inode *inode = file->f_path.dentry->d_inode;
+       struct inode *inode = fdentry(file)->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
        switch (inode->i_mode & S_IFMT) {
@@ -2350,7 +2361,7 @@ static int btrfs_ioctl_defrag(struct file *file)
 long btrfs_ioctl(struct file *file, unsigned int
                cmd, unsigned long arg)
 {
-       struct btrfs_root *root = BTRFS_I(file->f_path.dentry->d_inode)->root;
+       struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
 
        switch (cmd) {
        case BTRFS_IOC_SNAP_CREATE:
index b732391..7cab680 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/compat.h>
 #include <linux/parser.h>
 #include <linux/ctype.h>
+#include <linux/namei.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
index 1671c73..cd673ca 100644 (file)
@@ -189,12 +189,29 @@ static struct kset btrfs_kset;
 int btrfs_sysfs_add_super(struct btrfs_fs_info *fs)
 {
        int error;
+       char *name;
+       char c;
+       int len = strlen(fs->sb->s_id) + 1;
+       int i;
+
+       name = kmalloc(len, GFP_NOFS);
+       if (!name) {
+               error = -ENOMEM;
+               goto fail;
+       }
+
+       for (i = 0; i < len; i++) {
+               c = fs->sb->s_id[i];
+               if (c == '/' || c == '\\')
+                       c = '!';
+               name[i] = c;
+       }
+       name[len] = '\0';
 
        fs->super_kobj.kset = &btrfs_kset;
        fs->super_kobj.ktype = &btrfs_super_ktype;
 
-       error = kobject_set_name(&fs->super_kobj, "%s",
-                                fs->sb->s_id);
+       error = kobject_set_name(&fs->super_kobj, "%s", name);
        if (error)
                goto fail;
 
@@ -202,9 +219,11 @@ int btrfs_sysfs_add_super(struct btrfs_fs_info *fs)
        if (error)
                goto fail;
 
+       kfree(name);
        return 0;
 
 fail:
+       kfree(name);
        printk(KERN_ERR "btrfs: sysfs creation for super failed\n");
        return error;
 }
index 0229e69..1ad611b 100644 (file)
@@ -614,12 +614,20 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
        }
        return 0;
 }
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+void btrfs_transaction_cleaner(void *p)
+#else
 void btrfs_transaction_cleaner(struct work_struct *work)
+#endif
 {
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+       struct btrfs_fs_info *fs_info = p;
+#else
        struct btrfs_fs_info *fs_info = container_of(work,
                                                     struct btrfs_fs_info,
                                                     trans_work.work);
 
+#endif
        struct btrfs_root *root = fs_info->tree_root;
        struct btrfs_transaction *cur;
        struct btrfs_trans_handle *trans;
index ae39fcf..eef840b 100644 (file)
@@ -73,7 +73,12 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root);
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+void btrfs_transaction_cleaner(void *p);
+#else
 void btrfs_transaction_cleaner(struct work_struct *work);
+#endif
+
 void btrfs_transaction_flush_work(struct btrfs_root *root);
 void btrfs_transaction_queue_work(struct btrfs_root *root, int delay);
 void btrfs_init_transaction_sys(void);