ceph: factor out libceph from Ceph file system
authorYehuda Sadeh <yehuda@hq.newdream.net>
Tue, 6 Apr 2010 22:14:15 +0000 (15:14 -0700)
committerSage Weil <sage@newdream.net>
Wed, 20 Oct 2010 22:37:28 +0000 (15:37 -0700)
This factors out protocol and low-level storage parts of ceph into a
separate libceph module living in net/ceph and include/linux/ceph.  This
is mostly a matter of moving files around.  However, a few key pieces
of the interface change as well:

 - ceph_client becomes ceph_fs_client and ceph_client, where the latter
   captures the mon and osd clients, and the fs_client gets the mds client
   and file system specific pieces.
 - Mount option parsing and debugfs setup is correspondingly broken into
   two pieces.
 - The mon client gets a generic handler callback for otherwise unknown
   messages (mds map, in this case).
 - The basic supported/required feature bits can be expanded (and are by
   ceph_fs_client).

No functional change, aside from some subtle error handling cases that got
cleaned up in the refactoring process.

Signed-off-by: Sage Weil <sage@newdream.net>
73 files changed:
MAINTAINERS
fs/ceph/Kconfig
fs/ceph/Makefile
fs/ceph/README [deleted file]
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/ceph_frag.c
fs/ceph/debugfs.c
fs/ceph/dir.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/ioctl.c
fs/ceph/locks.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/mdsmap.c
fs/ceph/snap.c
fs/ceph/strings.c [moved from fs/ceph/ceph_strings.c with 59% similarity]
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
include/linux/ceph/auth.h [moved from fs/ceph/auth.h with 97% similarity]
include/linux/ceph/buffer.h [moved from fs/ceph/buffer.h with 100% similarity]
include/linux/ceph/ceph_debug.h [moved from fs/ceph/ceph_debug.h with 86% similarity]
include/linux/ceph/ceph_frag.h [moved from fs/ceph/ceph_frag.h with 100% similarity]
include/linux/ceph/ceph_fs.h [moved from fs/ceph/ceph_fs.h with 100% similarity]
include/linux/ceph/ceph_hash.h [moved from fs/ceph/ceph_hash.h with 100% similarity]
include/linux/ceph/debugfs.h [new file with mode: 0644]
include/linux/ceph/decode.h [moved from fs/ceph/decode.h with 100% similarity]
include/linux/ceph/libceph.h [new file with mode: 0644]
include/linux/ceph/mdsmap.h [moved from fs/ceph/mdsmap.h with 100% similarity]
include/linux/ceph/messenger.h [moved from fs/ceph/messenger.h with 97% similarity]
include/linux/ceph/mon_client.h [moved from fs/ceph/mon_client.h with 99% similarity]
include/linux/ceph/msgpool.h [moved from fs/ceph/msgpool.h with 100% similarity]
include/linux/ceph/msgr.h [moved from fs/ceph/msgr.h with 100% similarity]
include/linux/ceph/osd_client.h [moved from fs/ceph/osd_client.h with 99% similarity]
include/linux/ceph/osdmap.h [moved from fs/ceph/osdmap.h with 99% similarity]
include/linux/ceph/pagelist.h [moved from fs/ceph/pagelist.h with 100% similarity]
include/linux/ceph/rados.h [moved from fs/ceph/rados.h with 100% similarity]
include/linux/ceph/types.h [moved from fs/ceph/types.h with 100% similarity]
include/linux/crush/crush.h [moved from fs/ceph/crush/crush.h with 100% similarity]
include/linux/crush/hash.h [moved from fs/ceph/crush/hash.h with 100% similarity]
include/linux/crush/mapper.h [moved from fs/ceph/crush/mapper.h with 100% similarity]
net/Kconfig
net/Makefile
net/ceph/Kconfig [new file with mode: 0644]
net/ceph/Makefile [new file with mode: 0644]
net/ceph/armor.c [moved from fs/ceph/armor.c with 100% similarity]
net/ceph/auth.c [moved from fs/ceph/auth.c with 97% similarity]
net/ceph/auth_none.c [moved from fs/ceph/auth_none.c with 96% similarity]
net/ceph/auth_none.h [moved from fs/ceph/auth_none.h with 94% similarity]
net/ceph/auth_x.c [moved from fs/ceph/auth_x.c with 99% similarity]
net/ceph/auth_x.h [moved from fs/ceph/auth_x.h with 96% similarity]
net/ceph/auth_x_protocol.h [moved from fs/ceph/auth_x_protocol.h with 100% similarity]
net/ceph/buffer.c [moved from fs/ceph/buffer.c with 86% similarity]
net/ceph/ceph_common.c [new file with mode: 0644]
net/ceph/ceph_fs.c [moved from fs/ceph/ceph_fs.c with 92% similarity]
net/ceph/ceph_hash.c [moved from fs/ceph/ceph_hash.c with 98% similarity]
net/ceph/ceph_strings.c [new file with mode: 0644]
net/ceph/crush/crush.c [moved from fs/ceph/crush/crush.c with 99% similarity]
net/ceph/crush/hash.c [moved from fs/ceph/crush/hash.c with 99% similarity]
net/ceph/crush/mapper.c [moved from fs/ceph/crush/mapper.c with 99% similarity]
net/ceph/crypto.c [moved from fs/ceph/crypto.c with 99% similarity]
net/ceph/crypto.h [moved from fs/ceph/crypto.h with 95% similarity]
net/ceph/debugfs.c [new file with mode: 0644]
net/ceph/messenger.c [moved from fs/ceph/messenger.c with 96% similarity]
net/ceph/mon_client.c [moved from fs/ceph/mon_client.c with 94% similarity]
net/ceph/msgpool.c [moved from fs/ceph/msgpool.c with 95% similarity]
net/ceph/osd_client.c [moved from fs/ceph/osd_client.c with 97% similarity]
net/ceph/osdmap.c [moved from fs/ceph/osdmap.c with 98% similarity]
net/ceph/pagelist.c [moved from fs/ceph/pagelist.c with 90% similarity]
net/ceph/pagevec.c [new file with mode: 0644]

index 7679bf3..48d0565 100644 (file)
@@ -1527,6 +1527,8 @@ T:        git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 S:     Supported
 F:     Documentation/filesystems/ceph.txt
 F:     fs/ceph
+F:     net/ceph
+F:     include/linux/ceph
 
 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 M:     David Vrabel <david.vrabel@csr.com>
index 0fcd264..9eb134e 100644 (file)
@@ -1,9 +1,11 @@
 config CEPH_FS
         tristate "Ceph distributed file system (EXPERIMENTAL)"
        depends on INET && EXPERIMENTAL
+       select CEPH_LIB
        select LIBCRC32C
        select CRYPTO_AES
        select CRYPTO
+       default n
        help
          Choose Y or M here to include support for mounting the
          experimental Ceph distributed file system.  Ceph is an extremely
@@ -14,15 +16,3 @@ config CEPH_FS
 
          If unsure, say N.
 
-config CEPH_FS_PRETTYDEBUG
-       bool "Include file:line in ceph debug output"
-       depends on CEPH_FS
-       default n
-       help
-         If you say Y here, debug output will include a filename and
-         line to aid debugging.  This icnreases kernel size and slows
-         execution slightly when debug call sites are enabled (e.g.,
-         via CONFIG_DYNAMIC_DEBUG).
-
-         If unsure, say N.
-
index 278e117..9e6c4f2 100644 (file)
@@ -8,15 +8,8 @@ obj-$(CONFIG_CEPH_FS) += ceph.o
 
 ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
        export.o caps.o snap.o xattr.o \
-       messenger.o msgpool.o buffer.o pagelist.o \
-       mds_client.o mdsmap.o \
-       mon_client.o \
-       osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
-       debugfs.o \
-       auth.o auth_none.o \
-       crypto.o armor.o \
-       auth_x.o \
-       ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o
+       mds_client.o mdsmap.o strings.o ceph_frag.o \
+       debugfs.o
 
 else
 #Otherwise we were called directly from the command
diff --git a/fs/ceph/README b/fs/ceph/README
deleted file mode 100644 (file)
index 18352fa..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-#
-# The following files are shared by (and manually synchronized
-# between) the Ceph userland and kernel client.
-#
-# userland                  kernel
-src/include/ceph_fs.h      fs/ceph/ceph_fs.h
-src/include/ceph_fs.cc     fs/ceph/ceph_fs.c
-src/include/msgr.h         fs/ceph/msgr.h
-src/include/rados.h        fs/ceph/rados.h
-src/include/ceph_strings.cc fs/ceph/ceph_strings.c
-src/include/ceph_frag.h            fs/ceph/ceph_frag.h
-src/include/ceph_frag.cc    fs/ceph/ceph_frag.c
-src/include/ceph_hash.h            fs/ceph/ceph_hash.h
-src/include/ceph_hash.cc    fs/ceph/ceph_hash.c
-src/crush/crush.c          fs/ceph/crush/crush.c
-src/crush/crush.h          fs/ceph/crush/crush.h
-src/crush/mapper.c         fs/ceph/crush/mapper.c
-src/crush/mapper.h         fs/ceph/crush/mapper.h
-src/crush/hash.h           fs/ceph/crush/hash.h
-src/crush/hash.c           fs/ceph/crush/hash.c
index efbc604..51bcc5c 100644 (file)
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/backing-dev.h>
 #include <linux/fs.h>
@@ -10,7 +10,8 @@
 #include <linux/task_io_accounting_ops.h>
 
 #include "super.h"
-#include "osd_client.h"
+#include "mds_client.h"
+#include <linux/ceph/osd_client.h>
 
 /*
  * Ceph address space ops.
@@ -193,7 +194,8 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 {
        struct inode *inode = filp->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
+       struct ceph_osd_client *osdc = 
+               &ceph_inode_to_client(inode)->client->osdc;
        int err = 0;
        u64 len = PAGE_CACHE_SIZE;
 
@@ -265,7 +267,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 {
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
+       struct ceph_osd_client *osdc =
+               &ceph_inode_to_client(inode)->client->osdc;
        int rc = 0;
        struct page **pages;
        loff_t offset;
@@ -365,7 +368,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 {
        struct inode *inode;
        struct ceph_inode_info *ci;
-       struct ceph_client *client;
+       struct ceph_fs_client *fsc;
        struct ceph_osd_client *osdc;
        loff_t page_off = page->index << PAGE_CACHE_SHIFT;
        int len = PAGE_CACHE_SIZE;
@@ -383,8 +386,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        }
        inode = page->mapping->host;
        ci = ceph_inode(inode);
-       client = ceph_inode_to_client(inode);
-       osdc = &client->osdc;
+       fsc = ceph_inode_to_client(inode);
+       osdc = &fsc->client->osdc;
 
        /* verify this is a writeable snap context */
        snapc = (void *)page->private;
@@ -414,10 +417,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
             inode, page, page->index, page_off, len, snapc);
 
-       writeback_stat = atomic_long_inc_return(&client->writeback_count);
+       writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
        if (writeback_stat >
-           CONGESTION_ON_THRESH(client->mount_args->congestion_kb))
-               set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC);
+           CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
+               set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
 
        set_page_writeback(page);
        err = ceph_osdc_writepages(osdc, ceph_vino(inode),
@@ -496,7 +499,7 @@ static void writepages_finish(struct ceph_osd_request *req,
        struct address_space *mapping = inode->i_mapping;
        __s32 rc = -EIO;
        u64 bytes = 0;
-       struct ceph_client *client = ceph_inode_to_client(inode);
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        long writeback_stat;
        unsigned issued = ceph_caps_issued(ci);
 
@@ -529,10 +532,10 @@ static void writepages_finish(struct ceph_osd_request *req,
                WARN_ON(!PageUptodate(page));
 
                writeback_stat =
-                       atomic_long_dec_return(&client->writeback_count);
+                       atomic_long_dec_return(&fsc->writeback_count);
                if (writeback_stat <
-                   CONGESTION_OFF_THRESH(client->mount_args->congestion_kb))
-                       clear_bdi_congested(&client->backing_dev_info,
+                   CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
+                       clear_bdi_congested(&fsc->backing_dev_info,
                                            BLK_RW_ASYNC);
 
                ceph_put_snap_context((void *)page->private);
@@ -569,13 +572,13 @@ static void writepages_finish(struct ceph_osd_request *req,
  * mempool.  we avoid the mempool if we can because req->r_num_pages
  * may be less than the maximum write size.
  */
-static void alloc_page_vec(struct ceph_client *client,
+static void alloc_page_vec(struct ceph_fs_client *fsc,
                           struct ceph_osd_request *req)
 {
        req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages,
                               GFP_NOFS);
        if (!req->r_pages) {
-               req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS);
+               req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS);
                req->r_pages_from_pool = 1;
                WARN_ON(!req->r_pages);
        }
@@ -590,7 +593,7 @@ static int ceph_writepages_start(struct address_space *mapping,
        struct inode *inode = mapping->host;
        struct backing_dev_info *bdi = mapping->backing_dev_info;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_client *client;
+       struct ceph_fs_client *fsc;
        pgoff_t index, start, end;
        int range_whole = 0;
        int should_loop = 1;
@@ -617,13 +620,13 @@ static int ceph_writepages_start(struct address_space *mapping,
             wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
             (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-       client = ceph_inode_to_client(inode);
-       if (client->mount_state == CEPH_MOUNT_SHUTDOWN) {
+       fsc = ceph_inode_to_client(inode);
+       if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
                pr_warning("writepage_start %p on forced umount\n", inode);
                return -EIO; /* we're in a forced umount, don't write! */
        }
-       if (client->mount_args->wsize && client->mount_args->wsize < wsize)
-               wsize = client->mount_args->wsize;
+       if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
+               wsize = fsc->mount_options->wsize;
        if (wsize < PAGE_CACHE_SIZE)
                wsize = PAGE_CACHE_SIZE;
        max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
@@ -769,7 +772,7 @@ get_more_pages:
                                offset = (unsigned long long)page->index
                                        << PAGE_CACHE_SHIFT;
                                len = wsize;
-                               req = ceph_osdc_new_request(&client->osdc,
+                               req = ceph_osdc_new_request(&fsc->client->osdc,
                                            &ci->i_layout,
                                            ceph_vino(inode),
                                            offset, &len,
@@ -782,7 +785,7 @@ get_more_pages:
                                            &inode->i_mtime, true, 1);
                                max_pages = req->r_num_pages;
 
-                               alloc_page_vec(client, req);
+                               alloc_page_vec(fsc, req);
                                req->r_callback = writepages_finish;
                                req->r_inode = inode;
                        }
@@ -794,10 +797,10 @@ get_more_pages:
                             inode, page, page->index);
 
                        writeback_stat =
-                              atomic_long_inc_return(&client->writeback_count);
+                              atomic_long_inc_return(&fsc->writeback_count);
                        if (writeback_stat > CONGESTION_ON_THRESH(
-                                   client->mount_args->congestion_kb)) {
-                               set_bdi_congested(&client->backing_dev_info,
+                                   fsc->mount_options->congestion_kb)) {
+                               set_bdi_congested(&fsc->backing_dev_info,
                                                  BLK_RW_ASYNC);
                        }
 
@@ -846,7 +849,7 @@ get_more_pages:
                op->payload_len = cpu_to_le32(len);
                req->r_request->hdr.data_len = cpu_to_le32(len);
 
-               ceph_osdc_start_request(&client->osdc, req, true);
+               ceph_osdc_start_request(&fsc->client->osdc, req, true);
                req = NULL;
 
                /* continue? */
@@ -915,7 +918,7 @@ static int ceph_update_writeable_page(struct file *file,
 {
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        loff_t page_off = pos & PAGE_CACHE_MASK;
        int pos_in_page = pos & ~PAGE_CACHE_MASK;
        int end_in_page = pos_in_page + len;
@@ -1053,8 +1056,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
                          struct page *page, void *fsdata)
 {
        struct inode *inode = file->f_dentry->d_inode;
-       struct ceph_client *client = ceph_inode_to_client(inode);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
        int check_cap = 0;
 
@@ -1123,7 +1126,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct inode *inode = vma->vm_file->f_dentry->d_inode;
        struct page *page = vmf->page;
-       struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        loff_t off = page->index << PAGE_CACHE_SHIFT;
        loff_t size, len;
        int ret;
index 5e9da99..3cff67c 100644 (file)
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
@@ -9,8 +9,9 @@
 #include <linux/writeback.h>
 
 #include "super.h"
-#include "decode.h"
-#include "messenger.h"
+#include "mds_client.h"
+#include <linux/ceph/decode.h>
+#include <linux/ceph/messenger.h>
 
 /*
  * Capability management
@@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
        spin_unlock(&mdsc->caps_list_lock);
 }
 
-void ceph_reservation_status(struct ceph_client *client,
+void ceph_reservation_status(struct ceph_fs_client *fsc,
                             int *total, int *avail, int *used, int *reserved,
                             int *min)
 {
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_mds_client *mdsc = fsc->mdsc;
 
        if (total)
                *total = mdsc->caps_total_count;
@@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
 static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
                               struct ceph_inode_info *ci)
 {
-       struct ceph_mount_args *ma = mdsc->client->mount_args;
+       struct ceph_mount_options *ma = mdsc->fsc->mount_options;
 
        ci->i_hold_caps_min = round_jiffies(jiffies +
                                            ma->caps_wanted_delay_min * HZ);
@@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode,
                 unsigned seq, unsigned mseq, u64 realmino, int flags,
                 struct ceph_cap_reservation *caps_reservation)
 {
-       struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_cap *new_cap = NULL;
        struct ceph_cap *cap;
@@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap)
        struct ceph_mds_session *session = cap->session;
        struct ceph_inode_info *ci = cap->ci;
        struct ceph_mds_client *mdsc =
-               &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+               ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
        int removed = 0;
 
        dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
@@ -1210,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
        int mds;
        struct ceph_cap_snap *capsnap;
        u32 mseq;
-       struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
                                                    session->s_mutex */
        u64 next_follows = 0;  /* keep track of how far we've gotten through the
@@ -1336,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
 void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 {
        struct ceph_mds_client *mdsc =
-               &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+               ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
        struct inode *inode = &ci->vfs_inode;
        int was = ci->i_dirty_caps;
        int dirty = 0;
@@ -1378,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 static int __mark_caps_flushing(struct inode *inode,
                                 struct ceph_mds_session *session)
 {
-       struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
        int flushing;
 
@@ -1462,8 +1463,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
 void ceph_check_caps(struct ceph_inode_info *ci, int flags,
                     struct ceph_mds_session *session)
 {
-       struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct inode *inode = &ci->vfs_inode;
        struct ceph_cap *cap;
        int file_wanted, used;
@@ -1706,7 +1707,7 @@ ack:
 static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
                          unsigned *flush_tid)
 {
-       struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
        int unlock_session = session ? 0 : 1;
        int flushing = 0;
@@ -1872,7 +1873,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
                                       caps_are_flushed(inode, flush_tid));
        } else {
                struct ceph_mds_client *mdsc =
-                       &ceph_sb_to_client(inode->i_sb)->mdsc;
+                       ceph_sb_to_client(inode->i_sb)->mdsc;
 
                spin_lock(&inode->i_lock);
                if (__ceph_caps_dirty(ci))
@@ -2465,7 +2466,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
        __releases(inode->i_lock)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        unsigned seq = le32_to_cpu(m->seq);
        int dirty = le32_to_cpu(m->dirty);
        int cleaned = 0;
@@ -2713,7 +2714,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                      struct ceph_msg *msg)
 {
        struct ceph_mds_client *mdsc = session->s_mdsc;
-       struct super_block *sb = mdsc->client->sb;
+       struct super_block *sb = mdsc->fsc->sb;
        struct inode *inode;
        struct ceph_cap *cap;
        struct ceph_mds_caps *h;
index ab6cf35..bdce8b1 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Ceph 'frag' type
  */
-#include "types.h"
+#include <linux/module.h>
+#include <linux/ceph/types.h>
 
 int ceph_frag_compare(__u32 a, __u32 b)
 {
index 6fd8b20..94fe1d2 100644 (file)
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
-#include "super.h"
-#include "mds_client.h"
-#include "mon_client.h"
-#include "auth.h"
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
 
 #ifdef CONFIG_DEBUG_FS
 
-/*
- * Implement /sys/kernel/debug/ceph fun
- *
- * /sys/kernel/debug/ceph/client*  - an instance of the ceph client
- *      .../osdmap      - current osdmap
- *      .../mdsmap      - current mdsmap
- *      .../monmap      - current monmap
- *      .../osdc        - active osd requests
- *      .../mdsc        - active mds requests
- *      .../monc        - mon client state
- *      .../dentry_lru  - dump contents of dentry lru
- *      .../caps        - expose cap (reservation) stats
- *      .../bdi         - symlink to ../../bdi/something
- */
-
-static struct dentry *ceph_debugfs_dir;
-
-static int monmap_show(struct seq_file *s, void *p)
-{
-       int i;
-       struct ceph_client *client = s->private;
-
-       if (client->monc.monmap == NULL)
-               return 0;
-
-       seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
-       for (i = 0; i < client->monc.monmap->num_mon; i++) {
-               struct ceph_entity_inst *inst =
-                       &client->monc.monmap->mon_inst[i];
-
-               seq_printf(s, "\t%s%lld\t%s\n",
-                          ENTITY_NAME(inst->name),
-                          pr_addr(&inst->addr.in_addr));
-       }
-       return 0;
-}
+#include "super.h"
+#include "mds_client.h"
 
 static int mdsmap_show(struct seq_file *s, void *p)
 {
        int i;
-       struct ceph_client *client = s->private;
+       struct ceph_fs_client *fsc = s->private;
 
-       if (client->mdsc.mdsmap == NULL)
+       if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
                return 0;
-       seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch);
-       seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root);
+       seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
+       seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
        seq_printf(s, "session_timeout %d\n",
-                      client->mdsc.mdsmap->m_session_timeout);
+                      fsc->mdsc->mdsmap->m_session_timeout);
        seq_printf(s, "session_autoclose %d\n",
-                      client->mdsc.mdsmap->m_session_autoclose);
-       for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) {
+                      fsc->mdsc->mdsmap->m_session_autoclose);
+       for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
                struct ceph_entity_addr *addr =
-                       &client->mdsc.mdsmap->m_info[i].addr;
-               int state = client->mdsc.mdsmap->m_info[i].state;
+                       &fsc->mdsc->mdsmap->m_info[i].addr;
+               int state = fsc->mdsc->mdsmap->m_info[i].state;
 
-               seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr),
+               seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
+                              ceph_pr_addr(&addr->in_addr),
                               ceph_mds_state_name(state));
        }
        return 0;
 }
 
-static int osdmap_show(struct seq_file *s, void *p)
-{
-       int i;
-       struct ceph_client *client = s->private;
-       struct rb_node *n;
-
-       if (client->osdc.osdmap == NULL)
-               return 0;
-       seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
-       seq_printf(s, "flags%s%s\n",
-                  (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
-                  " NEARFULL" : "",
-                  (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
-                  " FULL" : "");
-       for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
-               struct ceph_pg_pool_info *pool =
-                       rb_entry(n, struct ceph_pg_pool_info, node);
-               seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
-                          pool->id, pool->v.pg_num, pool->pg_num_mask,
-                          pool->v.lpg_num, pool->lpg_num_mask);
-       }
-       for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
-               struct ceph_entity_addr *addr =
-                       &client->osdc.osdmap->osd_addr[i];
-               int state = client->osdc.osdmap->osd_state[i];
-               char sb[64];
-
-               seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
-                          i, pr_addr(&addr->in_addr),
-                          ((client->osdc.osdmap->osd_weight[i]*100) >> 16),
-                          ceph_osdmap_state_str(sb, sizeof(sb), state));
-       }
-       return 0;
-}
-
-static int monc_show(struct seq_file *s, void *p)
-{
-       struct ceph_client *client = s->private;
-       struct ceph_mon_generic_request *req;
-       struct ceph_mon_client *monc = &client->monc;
-       struct rb_node *rp;
-
-       mutex_lock(&monc->mutex);
-
-       if (monc->have_mdsmap)
-               seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
-       if (monc->have_osdmap)
-               seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
-       if (monc->want_next_osdmap)
-               seq_printf(s, "want next osdmap\n");
-
-       for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
-               __u16 op;
-               req = rb_entry(rp, struct ceph_mon_generic_request, node);
-               op = le16_to_cpu(req->request->hdr.type);
-               if (op == CEPH_MSG_STATFS)
-                       seq_printf(s, "%lld statfs\n", req->tid);
-               else
-                       seq_printf(s, "%lld unknown\n", req->tid);
-       }
-
-       mutex_unlock(&monc->mutex);
-       return 0;
-}
-
+/*
+ * mdsc debugfs
+ */
 static int mdsc_show(struct seq_file *s, void *p)
 {
-       struct ceph_client *client = s->private;
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = s->private;
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        struct rb_node *rp;
        int pathlen;
@@ -214,61 +119,12 @@ static int mdsc_show(struct seq_file *s, void *p)
        return 0;
 }
 
-static int osdc_show(struct seq_file *s, void *pp)
-{
-       struct ceph_client *client = s->private;
-       struct ceph_osd_client *osdc = &client->osdc;
-       struct rb_node *p;
-
-       mutex_lock(&osdc->request_mutex);
-       for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
-               struct ceph_osd_request *req;
-               struct ceph_osd_request_head *head;
-               struct ceph_osd_op *op;
-               int num_ops;
-               int opcode, olen;
-               int i;
-
-               req = rb_entry(p, struct ceph_osd_request, r_node);
-
-               seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
-                          req->r_osd ? req->r_osd->o_osd : -1,
-                          le32_to_cpu(req->r_pgid.pool),
-                          le16_to_cpu(req->r_pgid.ps));
-
-               head = req->r_request->front.iov_base;
-               op = (void *)(head + 1);
-
-               num_ops = le16_to_cpu(head->num_ops);
-               olen = le32_to_cpu(head->object_len);
-               seq_printf(s, "%.*s", olen,
-                          (const char *)(head->ops + num_ops));
-
-               if (req->r_reassert_version.epoch)
-                       seq_printf(s, "\t%u'%llu",
-                          (unsigned)le32_to_cpu(req->r_reassert_version.epoch),
-                          le64_to_cpu(req->r_reassert_version.version));
-               else
-                       seq_printf(s, "\t");
-
-               for (i = 0; i < num_ops; i++) {
-                       opcode = le16_to_cpu(op->op);
-                       seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
-                       op++;
-               }
-
-               seq_printf(s, "\n");
-       }
-       mutex_unlock(&osdc->request_mutex);
-       return 0;
-}
-
 static int caps_show(struct seq_file *s, void *p)
 {
-       struct ceph_client *client = s->private;
+       struct ceph_fs_client *fsc = s->private;
        int total, avail, used, reserved, min;
 
-       ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
+       ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
        seq_printf(s, "total\t\t%d\n"
                   "avail\t\t%d\n"
                   "used\t\t%d\n"
@@ -280,8 +136,8 @@ static int caps_show(struct seq_file *s, void *p)
 
 static int dentry_lru_show(struct seq_file *s, void *ptr)
 {
-       struct ceph_client *client = s->private;
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = s->private;
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_dentry_info *di;
 
        spin_lock(&mdsc->dentry_lru_lock);
@@ -295,199 +151,124 @@ static int dentry_lru_show(struct seq_file *s, void *ptr)
        return 0;
 }
 
-#define DEFINE_SHOW_FUNC(name)                                         \
-static int name##_open(struct inode *inode, struct file *file)         \
-{                                                                      \
-       struct seq_file *sf;                                            \
-       int ret;                                                        \
-                                                                       \
-       ret = single_open(file, name, NULL);                            \
-       sf = file->private_data;                                        \
-       sf->private = inode->i_private;                                 \
-       return ret;                                                     \
-}                                                                      \
-                                                                       \
-static const struct file_operations name##_fops = {                    \
-       .open           = name##_open,                                  \
-       .read           = seq_read,                                     \
-       .llseek         = seq_lseek,                                    \
-       .release        = single_release,                               \
-};
-
-DEFINE_SHOW_FUNC(monmap_show)
-DEFINE_SHOW_FUNC(mdsmap_show)
-DEFINE_SHOW_FUNC(osdmap_show)
-DEFINE_SHOW_FUNC(monc_show)
-DEFINE_SHOW_FUNC(mdsc_show)
-DEFINE_SHOW_FUNC(osdc_show)
-DEFINE_SHOW_FUNC(dentry_lru_show)
-DEFINE_SHOW_FUNC(caps_show)
+CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
+CEPH_DEFINE_SHOW_FUNC(mdsc_show)
+CEPH_DEFINE_SHOW_FUNC(caps_show)
+CEPH_DEFINE_SHOW_FUNC(dentry_lru_show)
+
 
+/*
+ * debugfs
+ */
 static int congestion_kb_set(void *data, u64 val)
 {
-       struct ceph_client *client = (struct ceph_client *)data;
-
-       if (client)
-               client->mount_args->congestion_kb = (int)val;
+       struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
 
+       fsc->mount_options->congestion_kb = (int)val;
        return 0;
 }
 
 static int congestion_kb_get(void *data, u64 *val)
 {
-       struct ceph_client *client = (struct ceph_client *)data;
-
-       if (client)
-               *val = (u64)client->mount_args->congestion_kb;
+       struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
 
+       *val = (u64)fsc->mount_options->congestion_kb;
        return 0;
 }
 
-
 DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
                        congestion_kb_set, "%llu\n");
 
-int __init ceph_debugfs_init(void)
-{
-       ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
-       if (!ceph_debugfs_dir)
-               return -ENOMEM;
-       return 0;
-}
 
-void ceph_debugfs_cleanup(void)
+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 {
-       debugfs_remove(ceph_debugfs_dir);
+       dout("ceph_fs_debugfs_cleanup\n");
+       debugfs_remove(fsc->debugfs_bdi);
+       debugfs_remove(fsc->debugfs_congestion_kb);
+       debugfs_remove(fsc->debugfs_mdsmap);
+       debugfs_remove(fsc->debugfs_caps);
+       debugfs_remove(fsc->debugfs_mdsc);
+       debugfs_remove(fsc->debugfs_dentry_lru);
 }
 
-int ceph_debugfs_client_init(struct ceph_client *client)
+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 {
-       int ret = 0;
-       char name[80];
-
-       snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
-                client->monc.auth->global_id);
-
-       client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
-       if (!client->debugfs_dir)
-               goto out;
+       char name[100];
+       int err = -ENOMEM;
 
-       client->monc.debugfs_file = debugfs_create_file("monc",
-                                                     0600,
-                                                     client->debugfs_dir,
-                                                     client,
-                                                     &monc_show_fops);
-       if (!client->monc.debugfs_file)
-               goto out;
-
-       client->mdsc.debugfs_file = debugfs_create_file("mdsc",
-                                                     0600,
-                                                     client->debugfs_dir,
-                                                     client,
-                                                     &mdsc_show_fops);
-       if (!client->mdsc.debugfs_file)
+       dout("ceph_fs_debugfs_init\n");
+       fsc->debugfs_congestion_kb =
+               debugfs_create_file("writeback_congestion_kb",
+                                   0600,
+                                   fsc->client->debugfs_dir,
+                                   fsc,
+                                   &congestion_kb_fops);
+       if (!fsc->debugfs_congestion_kb)
                goto out;
 
-       client->osdc.debugfs_file = debugfs_create_file("osdc",
-                                                     0600,
-                                                     client->debugfs_dir,
-                                                     client,
-                                                     &osdc_show_fops);
-       if (!client->osdc.debugfs_file)
-               goto out;
+       dout("a\n");
 
-       client->debugfs_monmap = debugfs_create_file("monmap",
-                                       0600,
-                                       client->debugfs_dir,
-                                       client,
-                                       &monmap_show_fops);
-       if (!client->debugfs_monmap)
+       snprintf(name, sizeof(name), "../../bdi/%s",
+                dev_name(fsc->backing_dev_info.dev));
+       fsc->debugfs_bdi =
+               debugfs_create_symlink("bdi",
+                                      fsc->client->debugfs_dir,
+                                      name);
+       if (!fsc->debugfs_bdi)
                goto out;
 
-       client->debugfs_mdsmap = debugfs_create_file("mdsmap",
+       dout("b\n");
+       fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
                                        0600,
-                                       client->debugfs_dir,
-                                       client,
+                                       fsc->client->debugfs_dir,
+                                       fsc,
                                        &mdsmap_show_fops);
-       if (!client->debugfs_mdsmap)
+       if (!fsc->debugfs_mdsmap)
                goto out;
 
-       client->debugfs_osdmap = debugfs_create_file("osdmap",
-                                       0600,
-                                       client->debugfs_dir,
-                                       client,
-                                       &osdmap_show_fops);
-       if (!client->debugfs_osdmap)
+       dout("ca\n");
+       fsc->debugfs_mdsc = debugfs_create_file("mdsc",
+                                               0600,
+                                               fsc->client->debugfs_dir,
+                                               fsc,
+                                               &mdsc_show_fops);
+       if (!fsc->debugfs_mdsc)
                goto out;
 
-       client->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
-                                       0600,
-                                       client->debugfs_dir,
-                                       client,
-                                       &dentry_lru_show_fops);
-       if (!client->debugfs_dentry_lru)
-               goto out;
-
-       client->debugfs_caps = debugfs_create_file("caps",
+       dout("da\n");
+       fsc->debugfs_caps = debugfs_create_file("caps",
                                                   0400,
-                                                  client->debugfs_dir,
-                                                  client,
+                                                  fsc->client->debugfs_dir,
+                                                  fsc,
                                                   &caps_show_fops);
-       if (!client->debugfs_caps)
+       if (!fsc->debugfs_caps)
                goto out;
 
-       client->debugfs_congestion_kb =
-               debugfs_create_file("writeback_congestion_kb",
-                                   0600,
-                                   client->debugfs_dir,
-                                   client,
-                                   &congestion_kb_fops);
-       if (!client->debugfs_congestion_kb)
+       dout("ea\n");
+       fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
+                                       0600,
+                                       fsc->client->debugfs_dir,
+                                       fsc,
+                                       &dentry_lru_show_fops);
+       if (!fsc->debugfs_dentry_lru)
                goto out;
 
-       sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev));
-       client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir,
-                                                    name);
-
        return 0;
 
 out:
-       ceph_debugfs_client_cleanup(client);
-       return ret;
+       ceph_fs_debugfs_cleanup(fsc);
+       return err;
 }
 
-void ceph_debugfs_client_cleanup(struct ceph_client *client)
-{
-       debugfs_remove(client->debugfs_bdi);
-       debugfs_remove(client->debugfs_caps);
-       debugfs_remove(client->debugfs_dentry_lru);
-       debugfs_remove(client->debugfs_osdmap);
-       debugfs_remove(client->debugfs_mdsmap);
-       debugfs_remove(client->debugfs_monmap);
-       debugfs_remove(client->osdc.debugfs_file);
-       debugfs_remove(client->mdsc.debugfs_file);
-       debugfs_remove(client->monc.debugfs_file);
-       debugfs_remove(client->debugfs_congestion_kb);
-       debugfs_remove(client->debugfs_dir);
-}
 
 #else  /* CONFIG_DEBUG_FS */
 
-int __init ceph_debugfs_init(void)
-{
-       return 0;
-}
-
-void ceph_debugfs_cleanup(void)
-{
-}
-
-int ceph_debugfs_client_init(struct ceph_client *client)
+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 {
        return 0;
 }
 
-void ceph_debugfs_client_cleanup(struct ceph_client *client)
+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 {
 }
 
index a1986eb..a8d2aac 100644 (file)
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/spinlock.h>
 #include <linux/fs_struct.h>
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 
 #include "super.h"
+#include "mds_client.h"
 
 /*
  * Directory operations: readdir, lookup, create, link, unlink,
@@ -227,15 +228,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
        struct ceph_file_info *fi = filp->private_data;
        struct inode *inode = filp->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_client *client = ceph_inode_to_client(inode);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        unsigned frag = fpos_frag(filp->f_pos);
        int off = fpos_off(filp->f_pos);
        int err;
        u32 ftype;
        struct ceph_mds_reply_info_parsed *rinfo;
-       const int max_entries = client->mount_args->max_readdir;
-       const int max_bytes = client->mount_args->max_readdir_bytes;
+       const int max_entries = fsc->mount_options->max_readdir;
+       const int max_bytes = fsc->mount_options->max_readdir_bytes;
 
        dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
        if (fi->at_end)
@@ -267,7 +268,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
        /* can we use the dcache? */
        spin_lock(&inode->i_lock);
        if ((filp->f_pos == 2 || fi->dentry) &&
-           !ceph_test_opt(client, NOASYNCREADDIR) &&
+           !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
            ceph_snap(inode) != CEPH_SNAPDIR &&
            (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
            __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
@@ -487,14 +488,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
                                  struct dentry *dentry, int err)
 {
-       struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
        struct inode *parent = dentry->d_parent->d_inode;
 
        /* .snap dir? */
        if (err == -ENOENT &&
            ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */
            strcmp(dentry->d_name.name,
-                  client->mount_args->snapdir_name) == 0) {
+                  fsc->mount_options->snapdir_name) == 0) {
                struct inode *inode = ceph_get_snapdir(parent);
                dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
                     dentry, dentry->d_name.len, dentry->d_name.name, inode);
@@ -539,8 +540,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                                  struct nameidata *nd)
 {
-       struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        int op;
        int err;
@@ -572,7 +573,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                spin_lock(&dir->i_lock);
                dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
                if (strncmp(dentry->d_name.name,
-                           client->mount_args->snapdir_name,
+                           fsc->mount_options->snapdir_name,
                            dentry->d_name.len) &&
                    !is_root_ceph_dentry(dir, dentry) &&
                    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
@@ -629,8 +630,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 static int ceph_mknod(struct inode *dir, struct dentry *dentry,
                      int mode, dev_t rdev)
 {
-       struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        int err;
 
@@ -685,8 +686,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode,
 static int ceph_symlink(struct inode *dir, struct dentry *dentry,
                            const char *dest)
 {
-       struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        int err;
 
@@ -716,8 +717,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 
 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-       struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        int err = -EROFS;
        int op;
@@ -758,8 +759,8 @@ out:
 static int ceph_link(struct dentry *old_dentry, struct inode *dir,
                     struct dentry *dentry)
 {
-       struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        int err;
 
@@ -813,8 +814,8 @@ static int drop_caps_for_unlink(struct inode *inode)
  */
 static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct inode *inode = dentry->d_inode;
        struct ceph_mds_request *req;
        int err = -EROFS;
@@ -854,8 +855,8 @@ out:
 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
                       struct inode *new_dir, struct dentry *new_dentry)
 {
-       struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        int err;
 
@@ -1076,7 +1077,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
        struct ceph_inode_info *ci = ceph_inode(inode);
        int left;
 
-       if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
+       if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
                return -EISDIR;
 
        if (!cf->dir_info) {
@@ -1177,7 +1178,7 @@ void ceph_dentry_lru_add(struct dentry *dn)
        dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
             dn->d_name.len, dn->d_name.name);
        if (di) {
-               mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
+               mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
                spin_lock(&mdsc->dentry_lru_lock);
                list_add_tail(&di->lru, &mdsc->dentry_lru);
                mdsc->num_dentry++;
@@ -1193,7 +1194,7 @@ void ceph_dentry_lru_touch(struct dentry *dn)
        dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
             dn->d_name.len, dn->d_name.name, di->offset);
        if (di) {
-               mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
+               mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
                spin_lock(&mdsc->dentry_lru_lock);
                list_move_tail(&di->lru, &mdsc->dentry_lru);
                spin_unlock(&mdsc->dentry_lru_lock);
@@ -1208,7 +1209,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
        dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
             dn->d_name.len, dn->d_name.name);
        if (di) {
-               mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
+               mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
                spin_lock(&mdsc->dentry_lru_lock);
                list_del_init(&di->lru);
                mdsc->num_dentry--;
index e38423e..2297d94 100644 (file)
@@ -1,10 +1,11 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/exportfs.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 
 #include "super.h"
+#include "mds_client.h"
 
 /*
  * NFS export support
@@ -120,7 +121,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
 static struct dentry *__cfh_to_dentry(struct super_block *sb,
                                      struct ceph_nfs_confh *cfh)
 {
-       struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
        struct inode *inode;
        struct dentry *dentry;
        struct ceph_vino vino;
index 66e4da6..e77c28c 100644 (file)
@@ -1,5 +1,6 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/file.h>
@@ -38,8 +39,8 @@
 static struct ceph_mds_request *
 prepare_open_request(struct super_block *sb, int flags, int create_mode)
 {
-       struct ceph_client *client = ceph_sb_to_client(sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        int want_auth = USE_ANY_MDS;
        int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
@@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 int ceph_open(struct inode *inode, struct file *file)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        struct ceph_file_info *cf = file->private_data;
        struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
@@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
                                struct nameidata *nd, int mode,
                                int locked_dir)
 {
-       struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct file *file = nd->intent.open.file;
        struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
        struct ceph_mds_request *req;
@@ -270,163 +271,6 @@ int ceph_release(struct inode *inode, struct file *file)
 }
 
 /*
- * build a vector of user pages
- */
-static struct page **get_direct_page_vector(const char __user *data,
-                                           int num_pages,
-                                           loff_t off, size_t len)
-{
-       struct page **pages;
-       int rc;
-
-       pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
-       if (!pages)
-               return ERR_PTR(-ENOMEM);
-
-       down_read(&current->mm->mmap_sem);
-       rc = get_user_pages(current, current->mm, (unsigned long)data,
-                           num_pages, 0, 0, pages, NULL);
-       up_read(&current->mm->mmap_sem);
-       if (rc < 0)
-               goto fail;
-       return pages;
-
-fail:
-       kfree(pages);
-       return ERR_PTR(rc);
-}
-
-static void put_page_vector(struct page **pages, int num_pages)
-{
-       int i;
-
-       for (i = 0; i < num_pages; i++)
-               put_page(pages[i]);
-       kfree(pages);
-}
-
-void ceph_release_page_vector(struct page **pages, int num_pages)
-{
-       int i;
-
-       for (i = 0; i < num_pages; i++)
-               __free_pages(pages[i], 0);
-       kfree(pages);
-}
-
-/*
- * allocate a vector new pages
- */
-static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
-{
-       struct page **pages;
-       int i;
-
-       pages = kmalloc(sizeof(*pages) * num_pages, flags);
-       if (!pages)
-               return ERR_PTR(-ENOMEM);
-       for (i = 0; i < num_pages; i++) {
-               pages[i] = __page_cache_alloc(flags);
-               if (pages[i] == NULL) {
-                       ceph_release_page_vector(pages, i);
-                       return ERR_PTR(-ENOMEM);
-               }
-       }
-       return pages;
-}
-
-/*
- * copy user data into a page vector
- */
-static int copy_user_to_page_vector(struct page **pages,
-                                   const char __user *data,
-                                   loff_t off, size_t len)
-{
-       int i = 0;
-       int po = off & ~PAGE_CACHE_MASK;
-       int left = len;
-       int l, bad;
-
-       while (left > 0) {
-               l = min_t(int, PAGE_CACHE_SIZE-po, left);
-               bad = copy_from_user(page_address(pages[i]) + po, data, l);
-               if (bad == l)
-                       return -EFAULT;
-               data += l - bad;
-               left -= l - bad;
-               po += l - bad;
-               if (po == PAGE_CACHE_SIZE) {
-                       po = 0;
-                       i++;
-               }
-       }
-       return len;
-}
-
-/*
- * copy user data from a page vector into a user pointer
- */
-static int copy_page_vector_to_user(struct page **pages, char __user *data,
-                                   loff_t off, size_t len)
-{
-       int i = 0;
-       int po = off & ~PAGE_CACHE_MASK;
-       int left = len;
-       int l, bad;
-
-       while (left > 0) {
-               l = min_t(int, left, PAGE_CACHE_SIZE-po);
-               bad = copy_to_user(data, page_address(pages[i]) + po, l);
-               if (bad == l)
-                       return -EFAULT;
-               data += l - bad;
-               left -= l - bad;
-               if (po) {
-                       po += l - bad;
-                       if (po == PAGE_CACHE_SIZE)
-                               po = 0;
-               }
-               i++;
-       }
-       return len;
-}
-
-/*
- * Zero an extent within a page vector.  Offset is relative to the
- * start of the first page.
- */
-static void zero_page_vector_range(int off, int len, struct page **pages)
-{
-       int i = off >> PAGE_CACHE_SHIFT;
-
-       off &= ~PAGE_CACHE_MASK;
-
-       dout("zero_page_vector_page %u~%u\n", off, len);
-
-       /* leading partial page? */
-       if (off) {
-               int end = min((int)PAGE_CACHE_SIZE, off + len);
-               dout("zeroing %d %p head from %d\n", i, pages[i],
-                    (int)off);
-               zero_user_segment(pages[i], off, end);
-               len -= (end - off);
-               i++;
-       }
-       while (len >= PAGE_CACHE_SIZE) {
-               dout("zeroing %d %p len=%d\n", i, pages[i], len);
-               zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
-               len -= PAGE_CACHE_SIZE;
-               i++;
-       }
-       /* trailing partial page? */
-       if (len) {
-               dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
-               zero_user_segment(pages[i], 0, len);
-       }
-}
-
-
-/*
  * Read a range of bytes striped over one or more objects.  Iterate over
  * objects we stripe over.  (That's not atomic, but good enough for now.)
  *
@@ -438,7 +282,7 @@ static int striped_read(struct inode *inode,
                        struct page **pages, int num_pages,
                        int *checkeof)
 {
-       struct ceph_client *client = ceph_inode_to_client(inode);
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 pos, this_len;
        int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
@@ -459,7 +303,7 @@ static int striped_read(struct inode *inode,
 
 more:
        this_len = left;
-       ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode),
+       ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
                                  &ci->i_layout, pos, &this_len,
                                  ci->i_truncate_seq,
                                  ci->i_truncate_size,
@@ -477,8 +321,8 @@ more:
 
                if (read < pos - off) {
                        dout(" zero gap %llu to %llu\n", off + read, pos);
-                       zero_page_vector_range(page_off + read,
-                                              pos - off - read, pages);
+                       ceph_zero_page_vector_range(page_off + read,
+                                                   pos - off - read, pages);
                }
                pos += ret;
                read = pos - off;
@@ -495,8 +339,8 @@ more:
                /* was original extent fully inside i_size? */
                if (pos + left <= inode->i_size) {
                        dout("zero tail\n");
-                       zero_page_vector_range(page_off + read, len - read,
-                                              pages);
+                       ceph_zero_page_vector_range(page_off + read, len - read,
+                                                   pages);
                        read = len;
                        goto out;
                }
@@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
             (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
        if (file->f_flags & O_DIRECT) {
-               pages = get_direct_page_vector(data, num_pages, off, len);
+               pages = ceph_get_direct_page_vector(data, num_pages, off, len);
 
                /*
                 * flush any page cache pages in this range.  this
@@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
        ret = striped_read(inode, off, len, pages, num_pages, checkeof);
 
        if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
-               ret = copy_page_vector_to_user(pages, data, off, ret);
+               ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
        if (ret >= 0)
                *poff = off + ret;
 
 done:
        if (file->f_flags & O_DIRECT)
-               put_page_vector(pages, num_pages);
+               ceph_put_page_vector(pages, num_pages);
        else
                ceph_release_page_vector(pages, num_pages);
        dout("sync_read result %d\n", ret);
@@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 {
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_client *client = ceph_inode_to_client(inode);
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_osd_request *req;
        struct page **pages;
        int num_pages;
@@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
         */
 more:
        len = left;
-       req = ceph_osdc_new_request(&client->osdc, &ci->i_layout,
+       req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                    ceph_vino(inode), pos, &len,
                                    CEPH_OSD_OP_WRITE, flags,
                                    ci->i_snap_realm->cached_context,
@@ -655,7 +499,7 @@ more:
        num_pages = calc_pages_for(pos, len);
 
        if (file->f_flags & O_DIRECT) {
-               pages = get_direct_page_vector(data, num_pages, pos, len);
+               pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
                if (IS_ERR(pages)) {
                        ret = PTR_ERR(pages);
                        goto out;
@@ -673,7 +517,7 @@ more:
                        ret = PTR_ERR(pages);
                        goto out;
                }
-               ret = copy_user_to_page_vector(pages, data, pos, len);
+               ret = ceph_copy_user_to_page_vector(pages, data, pos, len);
                if (ret < 0) {
                        ceph_release_page_vector(pages, num_pages);
                        goto out;
@@ -689,7 +533,7 @@ more:
        req->r_num_pages = num_pages;
        req->r_inode = inode;
 
-       ret = ceph_osdc_start_request(&client->osdc, req, false);
+       ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
        if (!ret) {
                if (req->r_safe_callback) {
                        /*
@@ -701,11 +545,11 @@ more:
                        spin_unlock(&ci->i_unsafe_lock);
                        ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
                }
-               ret = ceph_osdc_wait_request(&client->osdc, req);
+               ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
        }
 
        if (file->f_flags & O_DIRECT)
-               put_page_vector(pages, num_pages);
+               ceph_put_page_vector(pages, num_pages);
        else if (file->f_flags & O_SYNC)
                ceph_release_page_vector(pages, num_pages);
 
@@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
        struct ceph_file_info *fi = file->private_data;
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
+       struct ceph_osd_client *osdc =
+               &ceph_sb_to_client(inode->i_sb)->client->osdc;
        loff_t endoff = pos + iov->iov_len;
        int want, got = 0;
        int ret, err;
index 62377ec..1d6a45b 100644 (file)
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/module.h>
 #include <linux/fs.h>
@@ -13,7 +13,8 @@
 #include <linux/pagevec.h>
 
 #include "super.h"
-#include "decode.h"
+#include "mds_client.h"
+#include <linux/ceph/decode.h>
 
 /*
  * Ceph inode operations
@@ -384,7 +385,7 @@ void ceph_destroy_inode(struct inode *inode)
         */
        if (ci->i_snap_realm) {
                struct ceph_mds_client *mdsc =
-                       &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+                       ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
                struct ceph_snap_realm *realm = ci->i_snap_realm;
 
                dout(" dropping residual ref to snap realm %p\n", realm);
@@ -685,7 +686,7 @@ static int fill_inode(struct inode *inode,
                }
 
                /* it may be better to set st_size in getattr instead? */
-               if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
+               if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
                        inode->i_size = ci->i_rbytes;
                break;
        default:
@@ -901,7 +902,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
        struct inode *in = NULL;
        struct ceph_mds_reply_inode *ininfo;
        struct ceph_vino vino;
-       struct ceph_client *client = ceph_sb_to_client(sb);
+       struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
        int i = 0;
        int err = 0;
 
@@ -965,7 +966,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
         */
        if (rinfo->head->is_dentry && !req->r_aborted &&
            (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
-                                              client->mount_args->snapdir_name,
+                                              fsc->mount_options->snapdir_name,
                                               req->r_dentry->d_name.len))) {
                /*
                 * lookup link rename   : null -> possibly existing inode
@@ -1533,7 +1534,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        struct inode *parent_inode = dentry->d_parent->d_inode;
        const unsigned int ia_valid = attr->ia_valid;
        struct ceph_mds_request *req;
-       struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
        int issued;
        int release = 0, dirtied = 0;
        int mask = 0;
@@ -1728,8 +1729,8 @@ out:
  */
 int ceph_do_getattr(struct inode *inode, int mask)
 {
-       struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
        int err;
 
index 76e307d..899578b 100644 (file)
@@ -1,8 +1,10 @@
 #include <linux/in.h>
 
-#include "ioctl.h"
 #include "super.h"
-#include "ceph_debug.h"
+#include "mds_client.h"
+#include <linux/ceph/ceph_debug.h>
+
+#include "ioctl.h"
 
 
 /*
@@ -37,7 +39,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 {
        struct inode *inode = file->f_dentry->d_inode;
        struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
-       struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_mds_request *req;
        struct ceph_ioctl_layout l;
        int err, i;
@@ -98,7 +100,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
        struct ceph_ioctl_dataloc dl;
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
+       struct ceph_osd_client *osdc =
+               &ceph_sb_to_client(inode->i_sb)->client->osdc;
        u64 len = 1, olen;
        u64 tmp;
        struct ceph_object_layout ol;
index ff4e753..087afe9 100644 (file)
@@ -1,11 +1,11 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/file.h>
 #include <linux/namei.h>
 
 #include "super.h"
 #include "mds_client.h"
-#include "pagelist.h"
+#include <linux/ceph/pagelist.h>
 
 /**
  * Implement fcntl and flock locking functions.
@@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 {
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_mds_client *mdsc =
-               &ceph_sb_to_client(inode->i_sb)->mdsc;
+               ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_mds_request *req;
        int err;
 
index fad95f8..3356823 100644 (file)
@@ -1,17 +1,20 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/wait.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
-#include "mds_client.h"
-#include "mon_client.h"
 #include "super.h"
-#include "messenger.h"
-#include "decode.h"
-#include "auth.h"
-#include "pagelist.h"
+#include "mds_client.h"
+
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/pagelist.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
 
 /*
  * A cluster of MDS (metadata server) daemons is responsible for
@@ -286,8 +289,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
             atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
        if (atomic_dec_and_test(&s->s_ref)) {
                if (s->s_authorizer)
-                       s->s_mdsc->client->monc.auth->ops->destroy_authorizer(
-                               s->s_mdsc->client->monc.auth, s->s_authorizer);
+                    s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
+                            s->s_mdsc->fsc->client->monc.auth,
+                            s->s_authorizer);
                kfree(s);
        }
 }
@@ -344,7 +348,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
        s->s_seq = 0;
        mutex_init(&s->s_mutex);
 
-       ceph_con_init(mdsc->client->msgr, &s->s_con);
+       ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
        s->s_con.private = s;
        s->s_con.ops = &mds_con_ops;
        s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
@@ -599,7 +603,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
        } else if (req->r_dentry) {
                struct inode *dir = req->r_dentry->d_parent->d_inode;
 
-               if (dir->i_sb != mdsc->client->sb) {
+               if (dir->i_sb != mdsc->fsc->sb) {
                        /* not this fs! */
                        inode = req->r_dentry->d_inode;
                } else if (ceph_snap(dir) != CEPH_NOSNAP) {
@@ -884,7 +888,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        __ceph_remove_cap(cap);
        if (!__ceph_is_any_real_caps(ci)) {
                struct ceph_mds_client *mdsc =
-                       &ceph_sb_to_client(inode->i_sb)->mdsc;
+                       ceph_sb_to_client(inode->i_sb)->mdsc;
 
                spin_lock(&mdsc->cap_dirty_lock);
                if (!list_empty(&ci->i_dirty_item)) {
@@ -1146,7 +1150,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
        struct ceph_msg *msg, *partial = NULL;
        struct ceph_mds_cap_release *head;
        int err = -ENOMEM;
-       int extra = mdsc->client->mount_args->cap_release_safety;
+       int extra = mdsc->fsc->mount_options->cap_release_safety;
        int num;
 
        dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
@@ -2085,7 +2089,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
        /* insert trace into our cache */
        mutex_lock(&req->r_fill_mutex);
-       err = ceph_fill_trace(mdsc->client->sb, req, req->r_session);
+       err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
        if (err == 0) {
                if (result == 0 && rinfo->dir_nr)
                        ceph_readdir_prepopulate(req, req->r_session);
@@ -2613,7 +2617,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
                         struct ceph_mds_session *session,
                         struct ceph_msg *msg)
 {
-       struct super_block *sb = mdsc->client->sb;
+       struct super_block *sb = mdsc->fsc->sb;
        struct inode *inode;
        struct ceph_inode_info *ci;
        struct dentry *parent, *dentry;
@@ -2891,10 +2895,16 @@ static void delayed_work(struct work_struct *work)
        schedule_delayed(mdsc);
 }
 
+int ceph_mdsc_init(struct ceph_fs_client *fsc)
 
-int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
 {
-       mdsc->client = client;
+       struct ceph_mds_client *mdsc;
+
+       mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
+       if (!mdsc)
+               return -ENOMEM;
+       mdsc->fsc = fsc;
+       fsc->mdsc = mdsc;
        mutex_init(&mdsc->mutex);
        mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
        if (mdsc->mdsmap == NULL)
@@ -2927,7 +2937,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
        INIT_LIST_HEAD(&mdsc->dentry_lru);
 
        ceph_caps_init(mdsc);
-       ceph_adjust_min_caps(mdsc, client->min_caps);
+       ceph_adjust_min_caps(mdsc, fsc->min_caps);
 
        return 0;
 }
@@ -2939,7 +2949,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
 static void wait_requests(struct ceph_mds_client *mdsc)
 {
        struct ceph_mds_request *req;
-       struct ceph_client *client = mdsc->client;
+       struct ceph_fs_client *fsc = mdsc->fsc;
 
        mutex_lock(&mdsc->mutex);
        if (__get_oldest_req(mdsc)) {
@@ -2947,7 +2957,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
 
                dout("wait_requests waiting for requests\n");
                wait_for_completion_timeout(&mdsc->safe_umount_waiters,
-                                   client->mount_args->mount_timeout * HZ);
+                                   fsc->client->options->mount_timeout * HZ);
 
                /* tear down remaining requests */
                mutex_lock(&mdsc->mutex);
@@ -3030,7 +3040,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 {
        u64 want_tid, want_flush;
 
-       if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
+       if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
                return;
 
        dout("sync\n");
@@ -3053,7 +3063,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc)
 {
        int i, n = 0;
 
-       if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
+       if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
                return true;
 
        mutex_lock(&mdsc->mutex);
@@ -3071,8 +3081,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 {
        struct ceph_mds_session *session;
        int i;
-       struct ceph_client *client = mdsc->client;
-       unsigned long timeout = client->mount_args->mount_timeout * HZ;
+       struct ceph_fs_client *fsc = mdsc->fsc;
+       unsigned long timeout = fsc->client->options->mount_timeout * HZ;
 
        dout("close_sessions\n");
 
@@ -3119,7 +3129,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
        dout("stopped\n");
 }
 
-void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
+static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 {
        dout("stop\n");
        cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
@@ -3129,6 +3139,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
        ceph_caps_finalize(mdsc);
 }
 
+void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
+{
+       struct ceph_mds_client *mdsc = fsc->mdsc;
+
+       ceph_mdsc_stop(mdsc);
+       fsc->mdsc = NULL;
+       kfree(mdsc);
+}
+
 
 /*
  * handle mds map update.
@@ -3145,14 +3164,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 
        ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
        ceph_decode_copy(&p, &fsid, sizeof(fsid));
-       if (ceph_check_fsid(mdsc->client, &fsid) < 0)
+       if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
                return;
        epoch = ceph_decode_32(&p);
        maplen = ceph_decode_32(&p);
        dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
 
        /* do we need it? */
-       ceph_monc_got_mdsmap(&mdsc->client->monc, epoch);
+       ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
        mutex_lock(&mdsc->mutex);
        if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
                dout("handle_map epoch %u <= our %u\n",
@@ -3176,7 +3195,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
        } else {
                mdsc->mdsmap = newmap;  /* first mds map */
        }
-       mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
+       mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
 
        __wake_requests(mdsc, &mdsc->waiting_for_map);
 
@@ -3277,7 +3296,7 @@ static int get_authorizer(struct ceph_connection *con,
 {
        struct ceph_mds_session *s = con->private;
        struct ceph_mds_client *mdsc = s->s_mdsc;
-       struct ceph_auth_client *ac = mdsc->client->monc.auth;
+       struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
        int ret = 0;
 
        if (force_new && s->s_authorizer) {
@@ -3311,7 +3330,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
 {
        struct ceph_mds_session *s = con->private;
        struct ceph_mds_client *mdsc = s->s_mdsc;
-       struct ceph_auth_client *ac = mdsc->client->monc.auth;
+       struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
 
        return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
 }
@@ -3320,12 +3339,12 @@ static int invalidate_authorizer(struct ceph_connection *con)
 {
        struct ceph_mds_session *s = con->private;
        struct ceph_mds_client *mdsc = s->s_mdsc;
-       struct ceph_auth_client *ac = mdsc->client->monc.auth;
+       struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
 
        if (ac->ops->invalidate_authorizer)
                ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
 
-       return ceph_monc_validate_auth(&mdsc->client->monc);
+       return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
 }
 
 static const struct ceph_connection_operations mds_con_ops = {
@@ -3338,7 +3357,4 @@ static const struct ceph_connection_operations mds_con_ops = {
        .peer_reset = peer_reset,
 };
 
-
-
-
 /* eof */
index c98267c..d66d63c 100644 (file)
@@ -8,9 +8,9 @@
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 
-#include "types.h"
-#include "messenger.h"
-#include "mdsmap.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/mdsmap.h>
 
 /*
  * Some lock dependencies:
@@ -26,7 +26,7 @@
  *
  */
 
-struct ceph_client;
+struct ceph_fs_client;
 struct ceph_cap;
 
 /*
@@ -230,7 +230,7 @@ struct ceph_mds_request {
  * mds client state
  */
 struct ceph_mds_client {
-       struct ceph_client      *client;
+       struct ceph_fs_client  *fsc;
        struct mutex            mutex;         /* all nested structures */
 
        struct ceph_mdsmap      *mdsmap;
@@ -289,11 +289,6 @@ struct ceph_mds_client {
        int             caps_avail_count;    /* unused, unreserved */
        int             caps_min_count;      /* keep at least this many
                                                (unreserved) */
-
-#ifdef CONFIG_DEBUG_FS
-       struct dentry     *debugfs_file;
-#endif
-
        spinlock_t        dentry_lru_lock;
        struct list_head  dentry_lru;
        int               num_dentry;
@@ -316,10 +311,9 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s);
 extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
                             struct ceph_msg *msg, int mds);
 
-extern int ceph_mdsc_init(struct ceph_mds_client *mdsc,
-                          struct ceph_client *client);
+extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
 extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
-extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc);
+extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
 
 extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
 
index 040be6d..73b7d44 100644 (file)
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/bug.h>
 #include <linux/err.h>
@@ -6,9 +6,9 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#include "mdsmap.h"
-#include "messenger.h"
-#include "decode.h"
+#include <linux/ceph/mdsmap.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
 
 #include "super.h"
 
@@ -117,7 +117,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
                }
 
                dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
-                    i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr),
+                    i+1, n, global_id, mds, inc,
+                    ceph_pr_addr(&addr.in_addr),
                     ceph_mds_state_name(state));
                if (mds >= 0 && mds < m->m_max_mds && state > 0) {
                        m->m_info[mds].global_id = global_id;
index 190b6c4..39c243a 100644 (file)
@@ -1,10 +1,12 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/sort.h>
 #include <linux/slab.h>
 
 #include "super.h"
-#include "decode.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
 
 /*
  * Snapshots in ceph are driven in large part by cooperation from the
@@ -526,7 +528,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
                            struct ceph_cap_snap *capsnap)
 {
        struct inode *inode = &ci->vfs_inode;
-       struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 
        BUG_ON(capsnap->writing);
        capsnap->size = inode->i_size;
@@ -747,7 +749,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
                      struct ceph_mds_session *session,
                      struct ceph_msg *msg)
 {
-       struct super_block *sb = mdsc->client->sb;
+       struct super_block *sb = mdsc->fsc->sb;
        int mds = session->s_mds;
        u64 split;
        int op;
similarity index 59%
rename from fs/ceph/ceph_strings.c
rename to fs/ceph/strings.c
index c6179d3..cd5097d 100644 (file)
@@ -1,71 +1,9 @@
 /*
- * Ceph string constants
+ * Ceph fs string constants
  */
-#include "types.h"
+#include <linux/module.h>
+#include <linux/ceph/types.h>
 
-const char *ceph_entity_type_name(int type)
-{
-       switch (type) {
-       case CEPH_ENTITY_TYPE_MDS: return "mds";
-       case CEPH_ENTITY_TYPE_OSD: return "osd";
-       case CEPH_ENTITY_TYPE_MON: return "mon";
-       case CEPH_ENTITY_TYPE_CLIENT: return "client";
-       case CEPH_ENTITY_TYPE_AUTH: return "auth";
-       default: return "unknown";
-       }
-}
-
-const char *ceph_osd_op_name(int op)
-{
-       switch (op) {
-       case CEPH_OSD_OP_READ: return "read";
-       case CEPH_OSD_OP_STAT: return "stat";
-
-       case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
-
-       case CEPH_OSD_OP_WRITE: return "write";
-       case CEPH_OSD_OP_DELETE: return "delete";
-       case CEPH_OSD_OP_TRUNCATE: return "truncate";
-       case CEPH_OSD_OP_ZERO: return "zero";
-       case CEPH_OSD_OP_WRITEFULL: return "writefull";
-       case CEPH_OSD_OP_ROLLBACK: return "rollback";
-
-       case CEPH_OSD_OP_APPEND: return "append";
-       case CEPH_OSD_OP_STARTSYNC: return "startsync";
-       case CEPH_OSD_OP_SETTRUNC: return "settrunc";
-       case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
-
-       case CEPH_OSD_OP_TMAPUP: return "tmapup";
-       case CEPH_OSD_OP_TMAPGET: return "tmapget";
-       case CEPH_OSD_OP_TMAPPUT: return "tmapput";
-
-       case CEPH_OSD_OP_GETXATTR: return "getxattr";
-       case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
-       case CEPH_OSD_OP_SETXATTR: return "setxattr";
-       case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
-       case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
-       case CEPH_OSD_OP_RMXATTR: return "rmxattr";
-       case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
-
-       case CEPH_OSD_OP_PULL: return "pull";
-       case CEPH_OSD_OP_PUSH: return "push";
-       case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
-       case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
-       case CEPH_OSD_OP_SCRUB: return "scrub";
-
-       case CEPH_OSD_OP_WRLOCK: return "wrlock";
-       case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
-       case CEPH_OSD_OP_RDLOCK: return "rdlock";
-       case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
-       case CEPH_OSD_OP_UPLOCK: return "uplock";
-       case CEPH_OSD_OP_DNLOCK: return "dnlock";
-
-       case CEPH_OSD_OP_CALL: return "call";
-
-       case CEPH_OSD_OP_PGLS: return "pgls";
-       }
-       return "???";
-}
 
 const char *ceph_mds_state_name(int s)
 {
@@ -177,17 +115,3 @@ const char *ceph_snap_op_name(int o)
        }
        return "???";
 }
-
-const char *ceph_pool_op_name(int op)
-{
-       switch (op) {
-       case POOL_OP_CREATE: return "create";
-       case POOL_OP_DELETE: return "delete";
-       case POOL_OP_AUID_CHANGE: return "auid change";
-       case POOL_OP_CREATE_SNAP: return "create snap";
-       case POOL_OP_DELETE_SNAP: return "delete snap";
-       case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
-       case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
-       }
-       return "???";
-}
index 9922628..d6e0e04 100644 (file)
@@ -1,5 +1,5 @@
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/backing-dev.h>
 #include <linux/ctype.h>
 #include <linux/statfs.h>
 #include <linux/string.h>
 
-#include "decode.h"
 #include "super.h"
-#include "mon_client.h"
-#include "auth.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
 
 /*
  * Ceph superblock operations
  * Handle the basics of mounting, unmounting.
  */
 
-
-/*
- * find filename portion of a path (/foo/bar/baz -> baz)
- */
-const char *ceph_file_part(const char *s, int len)
-{
-       const char *e = s + len;
-
-       while (e != s && *(e-1) != '/')
-               e--;
-       return e;
-}
-
-
 /*
  * super ops
  */
 static void ceph_put_super(struct super_block *s)
 {
-       struct ceph_client *client = ceph_sb_to_client(s);
+       struct ceph_fs_client *fsc = ceph_sb_to_client(s);
 
        dout("put_super\n");
-       ceph_mdsc_close_sessions(&client->mdsc);
+       ceph_mdsc_close_sessions(fsc->mdsc);
 
        /*
         * ensure we release the bdi before put_anon_super releases
         * the device name.
         */
-       if (s->s_bdi == &client->backing_dev_info) {
-               bdi_unregister(&client->backing_dev_info);
+       if (s->s_bdi == &fsc->backing_dev_info) {
+               bdi_unregister(&fsc->backing_dev_info);
                s->s_bdi = NULL;
        }
 
@@ -64,14 +53,14 @@ static void ceph_put_super(struct super_block *s)
 
 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-       struct ceph_client *client = ceph_inode_to_client(dentry->d_inode);
-       struct ceph_monmap *monmap = client->monc.monmap;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
+       struct ceph_monmap *monmap = fsc->client->monc.monmap;
        struct ceph_statfs st;
        u64 fsid;
        int err;
 
        dout("statfs\n");
-       err = ceph_monc_do_statfs(&client->monc, &st);
+       err = ceph_monc_do_statfs(&fsc->client->monc, &st);
        if (err < 0)
                return err;
 
@@ -104,238 +93,28 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static int ceph_sync_fs(struct super_block *sb, int wait)
 {
-       struct ceph_client *client = ceph_sb_to_client(sb);
+       struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 
        if (!wait) {
                dout("sync_fs (non-blocking)\n");
-               ceph_flush_dirty_caps(&client->mdsc);
+               ceph_flush_dirty_caps(fsc->mdsc);
                dout("sync_fs (non-blocking) done\n");
                return 0;
        }
 
        dout("sync_fs (blocking)\n");
-       ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc);
-       ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc);
+       ceph_osdc_sync(&fsc->client->osdc);
+       ceph_mdsc_sync(fsc->mdsc);
        dout("sync_fs (blocking) done\n");
        return 0;
 }
 
-static int default_congestion_kb(void)
-{
-       int congestion_kb;
-
-       /*
-        * Copied from NFS
-        *
-        * congestion size, scale with available memory.
-        *
-        *  64MB:    8192k
-        * 128MB:   11585k
-        * 256MB:   16384k
-        * 512MB:   23170k
-        *   1GB:   32768k
-        *   2GB:   46340k
-        *   4GB:   65536k
-        *   8GB:   92681k
-        *  16GB:  131072k
-        *
-        * This allows larger machines to have larger/more transfers.
-        * Limit the default to 256M
-        */
-       congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
-       if (congestion_kb > 256*1024)
-               congestion_kb = 256*1024;
-
-       return congestion_kb;
-}
-
-/**
- * ceph_show_options - Show mount options in /proc/mounts
- * @m: seq_file to write to
- * @mnt: mount descriptor
- */
-static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
-{
-       struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb);
-       struct ceph_mount_args *args = client->mount_args;
-
-       if (args->flags & CEPH_OPT_FSID)
-               seq_printf(m, ",fsid=%pU", &args->fsid);
-       if (args->flags & CEPH_OPT_NOSHARE)
-               seq_puts(m, ",noshare");
-       if (args->flags & CEPH_OPT_DIRSTAT)
-               seq_puts(m, ",dirstat");
-       if ((args->flags & CEPH_OPT_RBYTES) == 0)
-               seq_puts(m, ",norbytes");
-       if (args->flags & CEPH_OPT_NOCRC)
-               seq_puts(m, ",nocrc");
-       if (args->flags & CEPH_OPT_NOASYNCREADDIR)
-               seq_puts(m, ",noasyncreaddir");
-
-       if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
-               seq_printf(m, ",mount_timeout=%d", args->mount_timeout);
-       if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
-               seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl);
-       if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
-               seq_printf(m, ",osdtimeout=%d", args->osd_timeout);
-       if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
-               seq_printf(m, ",osdkeepalivetimeout=%d",
-                        args->osd_keepalive_timeout);
-       if (args->wsize)
-               seq_printf(m, ",wsize=%d", args->wsize);
-       if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
-               seq_printf(m, ",rsize=%d", args->rsize);
-       if (args->congestion_kb != default_congestion_kb())
-               seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb);
-       if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
-               seq_printf(m, ",caps_wanted_delay_min=%d",
-                        args->caps_wanted_delay_min);
-       if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
-               seq_printf(m, ",caps_wanted_delay_max=%d",
-                          args->caps_wanted_delay_max);
-       if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
-               seq_printf(m, ",cap_release_safety=%d",
-                          args->cap_release_safety);
-       if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT)
-               seq_printf(m, ",readdir_max_entries=%d", args->max_readdir);
-       if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
-               seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes);
-       if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
-               seq_printf(m, ",snapdirname=%s", args->snapdir_name);
-       if (args->name)
-               seq_printf(m, ",name=%s", args->name);
-       if (args->secret)
-               seq_puts(m, ",secret=<hidden>");
-       return 0;
-}
-
-/*
- * caches
- */
-struct kmem_cache *ceph_inode_cachep;
-struct kmem_cache *ceph_cap_cachep;
-struct kmem_cache *ceph_dentry_cachep;
-struct kmem_cache *ceph_file_cachep;
-
-static void ceph_inode_init_once(void *foo)
-{
-       struct ceph_inode_info *ci = foo;
-       inode_init_once(&ci->vfs_inode);
-}
-
-static int __init init_caches(void)
-{
-       ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
-                                     sizeof(struct ceph_inode_info),
-                                     __alignof__(struct ceph_inode_info),
-                                     (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
-                                     ceph_inode_init_once);
-       if (ceph_inode_cachep == NULL)
-               return -ENOMEM;
-
-       ceph_cap_cachep = KMEM_CACHE(ceph_cap,
-                                    SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-       if (ceph_cap_cachep == NULL)
-               goto bad_cap;
-
-       ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
-                                       SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-       if (ceph_dentry_cachep == NULL)
-               goto bad_dentry;
-
-       ceph_file_cachep = KMEM_CACHE(ceph_file_info,
-                                     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-       if (ceph_file_cachep == NULL)
-               goto bad_file;
-
-       return 0;
-
-bad_file:
-       kmem_cache_destroy(ceph_dentry_cachep);
-bad_dentry:
-       kmem_cache_destroy(ceph_cap_cachep);
-bad_cap:
-       kmem_cache_destroy(ceph_inode_cachep);
-       return -ENOMEM;
-}
-
-static void destroy_caches(void)
-{
-       kmem_cache_destroy(ceph_inode_cachep);
-       kmem_cache_destroy(ceph_cap_cachep);
-       kmem_cache_destroy(ceph_dentry_cachep);
-       kmem_cache_destroy(ceph_file_cachep);
-}
-
-
-/*
- * ceph_umount_begin - initiate forced umount.  Tear down down the
- * mount, skipping steps that may hang while waiting for server(s).
- */
-static void ceph_umount_begin(struct super_block *sb)
-{
-       struct ceph_client *client = ceph_sb_to_client(sb);
-
-       dout("ceph_umount_begin - starting forced umount\n");
-       if (!client)
-               return;
-       client->mount_state = CEPH_MOUNT_SHUTDOWN;
-       return;
-}
-
-static const struct super_operations ceph_super_ops = {
-       .alloc_inode    = ceph_alloc_inode,
-       .destroy_inode  = ceph_destroy_inode,
-       .write_inode    = ceph_write_inode,
-       .sync_fs        = ceph_sync_fs,
-       .put_super      = ceph_put_super,
-       .show_options   = ceph_show_options,
-       .statfs         = ceph_statfs,
-       .umount_begin   = ceph_umount_begin,
-};
-
-
-const char *ceph_msg_type_name(int type)
-{
-       switch (type) {
-       case CEPH_MSG_SHUTDOWN: return "shutdown";
-       case CEPH_MSG_PING: return "ping";
-       case CEPH_MSG_AUTH: return "auth";
-       case CEPH_MSG_AUTH_REPLY: return "auth_reply";
-       case CEPH_MSG_MON_MAP: return "mon_map";
-       case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
-       case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
-       case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
-       case CEPH_MSG_STATFS: return "statfs";
-       case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
-       case CEPH_MSG_MDS_MAP: return "mds_map";
-       case CEPH_MSG_CLIENT_SESSION: return "client_session";
-       case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
-       case CEPH_MSG_CLIENT_REQUEST: return "client_request";
-       case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
-       case CEPH_MSG_CLIENT_REPLY: return "client_reply";
-       case CEPH_MSG_CLIENT_CAPS: return "client_caps";
-       case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
-       case CEPH_MSG_CLIENT_SNAP: return "client_snap";
-       case CEPH_MSG_CLIENT_LEASE: return "client_lease";
-       case CEPH_MSG_OSD_MAP: return "osd_map";
-       case CEPH_MSG_OSD_OP: return "osd_op";
-       case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
-       default: return "unknown";
-       }
-}
-
-
 /*
  * mount options
  */
 enum {
        Opt_wsize,
        Opt_rsize,
-       Opt_osdtimeout,
-       Opt_osdkeepalivetimeout,
-       Opt_mount_timeout,
-       Opt_osd_idle_ttl,
        Opt_caps_wanted_delay_min,
        Opt_caps_wanted_delay_max,
        Opt_cap_release_safety,
@@ -344,29 +123,19 @@ enum {
        Opt_congestion_kb,
        Opt_last_int,
        /* int args above */
-       Opt_fsid,
        Opt_snapdirname,
-       Opt_name,
-       Opt_secret,
        Opt_last_string,
        /* string args above */
-       Opt_ip,
-       Opt_noshare,
        Opt_dirstat,
        Opt_nodirstat,
        Opt_rbytes,
        Opt_norbytes,
-       Opt_nocrc,
        Opt_noasyncreaddir,
 };
 
-static match_table_t arg_tokens = {
+static match_table_t fsopt_tokens = {
        {Opt_wsize, "wsize=%d"},
        {Opt_rsize, "rsize=%d"},
-       {Opt_osdtimeout, "osdtimeout=%d"},
-       {Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
-       {Opt_mount_timeout, "mount_timeout=%d"},
-       {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
        {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
        {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
        {Opt_cap_release_safety, "cap_release_safety=%d"},
@@ -374,403 +143,459 @@ static match_table_t arg_tokens = {
        {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
        {Opt_congestion_kb, "write_congestion_kb=%d"},
        /* int args above */
-       {Opt_fsid, "fsid=%s"},
        {Opt_snapdirname, "snapdirname=%s"},
-       {Opt_name, "name=%s"},
-       {Opt_secret, "secret=%s"},
        /* string args above */
-       {Opt_ip, "ip=%s"},
-       {Opt_noshare, "noshare"},
        {Opt_dirstat, "dirstat"},
        {Opt_nodirstat, "nodirstat"},
        {Opt_rbytes, "rbytes"},
        {Opt_norbytes, "norbytes"},
-       {Opt_nocrc, "nocrc"},
        {Opt_noasyncreaddir, "noasyncreaddir"},
        {-1, NULL}
 };
 
-static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+static int parse_fsopt_token(char *c, void *private)
 {
-       int i = 0;
-       char tmp[3];
-       int err = -EINVAL;
-       int d;
-
-       dout("parse_fsid '%s'\n", str);
-       tmp[2] = 0;
-       while (*str && i < 16) {
-               if (ispunct(*str)) {
-                       str++;
-                       continue;
+       struct ceph_mount_options *fsopt = private;
+       substring_t argstr[MAX_OPT_ARGS];
+       int token, intval, ret;
+
+       token = match_token((char *)c, fsopt_tokens, argstr);
+       if (token < 0)
+               return -EINVAL;
+
+       if (token < Opt_last_int) {
+               ret = match_int(&argstr[0], &intval);
+               if (ret < 0) {
+                       pr_err("bad mount option arg (not int) "
+                              "at '%s'\n", c);
+                       return ret;
                }
-               if (!isxdigit(str[0]) || !isxdigit(str[1]))
-                       break;
-               tmp[0] = str[0];
-               tmp[1] = str[1];
-               if (sscanf(tmp, "%x", &d) < 1)
-                       break;
-               fsid->fsid[i] = d & 0xff;
-               i++;
-               str += 2;
+               dout("got int token %d val %d\n", token, intval);
+       } else if (token > Opt_last_int && token < Opt_last_string) {
+               dout("got string token %d val %s\n", token,
+                    argstr[0].from);
+       } else {
+               dout("got token %d\n", token);
        }
 
-       if (i == 16)
-               err = 0;
-       dout("parse_fsid ret %d got fsid %pU", err, fsid);
-       return err;
+       switch (token) {
+       case Opt_snapdirname:
+               kfree(fsopt->snapdir_name);
+               fsopt->snapdir_name = kstrndup(argstr[0].from,
+                                              argstr[0].to-argstr[0].from,
+                                              GFP_KERNEL);
+               if (!fsopt->snapdir_name)
+                       return -ENOMEM;
+               break;
+
+               /* misc */
+       case Opt_wsize:
+               fsopt->wsize = intval;
+               break;
+       case Opt_rsize:
+               fsopt->rsize = intval;
+               break;
+       case Opt_caps_wanted_delay_min:
+               fsopt->caps_wanted_delay_min = intval;
+               break;
+       case Opt_caps_wanted_delay_max:
+               fsopt->caps_wanted_delay_max = intval;
+               break;
+       case Opt_readdir_max_entries:
+               fsopt->max_readdir = intval;
+               break;
+       case Opt_readdir_max_bytes:
+               fsopt->max_readdir_bytes = intval;
+               break;
+       case Opt_congestion_kb:
+               fsopt->congestion_kb = intval;
+               break;
+       case Opt_dirstat:
+               fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
+               break;
+       case Opt_nodirstat:
+               fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
+               break;
+       case Opt_rbytes:
+               fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
+               break;
+       case Opt_norbytes:
+               fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
+               break;
+       case Opt_noasyncreaddir:
+               fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
+               break;
+       default:
+               BUG_ON(token);
+       }
+       return 0;
 }
 
-static struct ceph_mount_args *parse_mount_args(int flags, char *options,
-                                               const char *dev_name,
-                                               const char **path)
+static void destroy_mount_options(struct ceph_mount_options *args)
 {
-       struct ceph_mount_args *args;
-       const char *c;
-       int err = -ENOMEM;
-       substring_t argstr[MAX_OPT_ARGS];
+       dout("destroy_mount_options %p\n", args);
+       kfree(args->snapdir_name);
+       kfree(args);
+}
 
-       args = kzalloc(sizeof(*args), GFP_KERNEL);
-       if (!args)
-               return ERR_PTR(-ENOMEM);
-       args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr),
-                                GFP_KERNEL);
-       if (!args->mon_addr)
-               goto out;
+static int strcmp_null(const char *s1, const char *s2)
+{
+       if (!s1 && !s2)
+               return 0;
+       if (s1 && !s2)
+               return -1;
+       if (!s1 && s2)
+               return 1;
+       return strcmp(s1, s2);
+}
 
-       dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name);
-
-       /* start with defaults */
-       args->sb_flags = flags;
-       args->flags = CEPH_OPT_DEFAULT;
-       args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
-       args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
-       args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
-       args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
-       args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
-       args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
-       args->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
-       args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
-       args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
-       args->max_readdir = CEPH_MAX_READDIR_DEFAULT;
-       args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
-       args->congestion_kb = default_congestion_kb();
-
-       /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
-       err = -EINVAL;
-       if (!dev_name)
-               goto out;
-       *path = strstr(dev_name, ":/");
-       if (*path == NULL) {
-               pr_err("device name is missing path (no :/ in %s)\n",
-                      dev_name);
-               goto out;
-       }
+static int compare_mount_options(struct ceph_mount_options *new_fsopt,
+                                struct ceph_options *new_opt,
+                                struct ceph_fs_client *fsc)
+{
+       struct ceph_mount_options *fsopt1 = new_fsopt;
+       struct ceph_mount_options *fsopt2 = fsc->mount_options;
+       int ofs = offsetof(struct ceph_mount_options, snapdir_name);
+       int ret;
 
-       /* get mon ip(s) */
-       err = ceph_parse_ips(dev_name, *path, args->mon_addr,
-                            CEPH_MAX_MON, &args->num_mon);
-       if (err < 0)
-               goto out;
+       ret = memcmp(fsopt1, fsopt2, ofs);
+       if (ret)
+               return ret;
+
+       ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
+       if (ret)
+               return ret;
+
+       return ceph_compare_options(new_opt, fsc->client);
+}
+
+static int parse_mount_options(struct ceph_mount_options **pfsopt,
+                              struct ceph_options **popt,
+                              int flags, char *options,
+                              const char *dev_name,
+                              const char **path)
+{
+       struct ceph_mount_options *fsopt;
+       const char *dev_name_end;
+       int err = -ENOMEM;
+
+       fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
+       if (!fsopt)
+               return -ENOMEM;
+
+       dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
+
+        fsopt->sb_flags = flags;
+        fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
+
+        fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
+        fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+        fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
+        fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
+        fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
+        fsopt->congestion_kb = default_congestion_kb();
+       
+        /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
+        err = -EINVAL;
+        if (!dev_name)
+                goto out;
+        *path = strstr(dev_name, ":/");
+        if (*path == NULL) {
+                pr_err("device name is missing path (no :/ in %s)\n",
+                       dev_name);
+                goto out;
+        }
+       dev_name_end = *path;
+       dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
 
        /* path on server */
        *path += 2;
        dout("server path '%s'\n", *path);
 
-       /* parse mount options */
-       while ((c = strsep(&options, ",")) != NULL) {
-               int token, intval, ret;
-               if (!*c)
-                       continue;
-               err = -EINVAL;
-               token = match_token((char *)c, arg_tokens, argstr);
-               if (token < 0) {
-                       pr_err("bad mount option at '%s'\n", c);
-                       goto out;
-               }
-               if (token < Opt_last_int) {
-                       ret = match_int(&argstr[0], &intval);
-                       if (ret < 0) {
-                               pr_err("bad mount option arg (not int) "
-                                      "at '%s'\n", c);
-                               continue;
-                       }
-                       dout("got int token %d val %d\n", token, intval);
-               } else if (token > Opt_last_int && token < Opt_last_string) {
-                       dout("got string token %d val %s\n", token,
-                            argstr[0].from);
-               } else {
-                       dout("got token %d\n", token);
-               }
-               switch (token) {
-               case Opt_ip:
-                       err = ceph_parse_ips(argstr[0].from,
-                                            argstr[0].to,
-                                            &args->my_addr,
-                                            1, NULL);
-                       if (err < 0)
-                               goto out;
-                       args->flags |= CEPH_OPT_MYIP;
-                       break;
-
-               case Opt_fsid:
-                       err = parse_fsid(argstr[0].from, &args->fsid);
-                       if (err == 0)
-                               args->flags |= CEPH_OPT_FSID;
-                       break;
-               case Opt_snapdirname:
-                       kfree(args->snapdir_name);
-                       args->snapdir_name = kstrndup(argstr[0].from,
-                                             argstr[0].to-argstr[0].from,
-                                             GFP_KERNEL);
-                       break;
-               case Opt_name:
-                       args->name = kstrndup(argstr[0].from,
-                                             argstr[0].to-argstr[0].from,
-                                             GFP_KERNEL);
-                       break;
-               case Opt_secret:
-                       args->secret = kstrndup(argstr[0].from,
-                                               argstr[0].to-argstr[0].from,
-                                               GFP_KERNEL);
-                       break;
-
-                       /* misc */
-               case Opt_wsize:
-                       args->wsize = intval;
-                       break;
-               case Opt_rsize:
-                       args->rsize = intval;
-                       break;
-               case Opt_osdtimeout:
-                       args->osd_timeout = intval;
-                       break;
-               case Opt_osdkeepalivetimeout:
-                       args->osd_keepalive_timeout = intval;
-                       break;
-               case Opt_osd_idle_ttl:
-                       args->osd_idle_ttl = intval;
-                       break;
-               case Opt_mount_timeout:
-                       args->mount_timeout = intval;
-                       break;
-               case Opt_caps_wanted_delay_min:
-                       args->caps_wanted_delay_min = intval;
-                       break;
-               case Opt_caps_wanted_delay_max:
-                       args->caps_wanted_delay_max = intval;
-                       break;
-               case Opt_readdir_max_entries:
-                       args->max_readdir = intval;
-                       break;
-               case Opt_readdir_max_bytes:
-                       args->max_readdir_bytes = intval;
-                       break;
-               case Opt_congestion_kb:
-                       args->congestion_kb = intval;
-                       break;
-
-               case Opt_noshare:
-                       args->flags |= CEPH_OPT_NOSHARE;
-                       break;
-
-               case Opt_dirstat:
-                       args->flags |= CEPH_OPT_DIRSTAT;
-                       break;
-               case Opt_nodirstat:
-                       args->flags &= ~CEPH_OPT_DIRSTAT;
-                       break;
-               case Opt_rbytes:
-                       args->flags |= CEPH_OPT_RBYTES;
-                       break;
-               case Opt_norbytes:
-                       args->flags &= ~CEPH_OPT_RBYTES;
-                       break;
-               case Opt_nocrc:
-                       args->flags |= CEPH_OPT_NOCRC;
-                       break;
-               case Opt_noasyncreaddir:
-                       args->flags |= CEPH_OPT_NOASYNCREADDIR;
-                       break;
-
-               default:
-                       BUG_ON(token);
-               }
-       }
-       return args;
+       err = ceph_parse_options(popt, options, dev_name, dev_name_end,
+                                parse_fsopt_token, (void *)fsopt);
+       if (err)
+               goto out;
+
+       /* success */
+       *pfsopt = fsopt;
+       return 0;
 
 out:
-       kfree(args->mon_addr);
-       kfree(args);
-       return ERR_PTR(err);
+       destroy_mount_options(fsopt);
+       return err;
 }
 
-static void destroy_mount_args(struct ceph_mount_args *args)
+/**
+ * ceph_show_options - Show mount options in /proc/mounts
+ * @m: seq_file to write to
+ * @mnt: mount descriptor
+ */
+static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
 {
-       dout("destroy_mount_args %p\n", args);
-       kfree(args->snapdir_name);
-       args->snapdir_name = NULL;
-       kfree(args->name);
-       args->name = NULL;
-       kfree(args->secret);
-       args->secret = NULL;
-       kfree(args);
+       struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
+       struct ceph_mount_options *fsopt = fsc->mount_options;
+       struct ceph_options *opt = fsc->client->options;
+
+       if (opt->flags & CEPH_OPT_FSID)
+               seq_printf(m, ",fsid=%pU", &opt->fsid);
+       if (opt->flags & CEPH_OPT_NOSHARE)
+               seq_puts(m, ",noshare");
+       if (opt->flags & CEPH_OPT_NOCRC)
+               seq_puts(m, ",nocrc");
+
+       if (opt->name)
+               seq_printf(m, ",name=%s", opt->name);
+       if (opt->secret)
+               seq_puts(m, ",secret=<hidden>");
+
+       if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+               seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
+       if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+               seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
+       if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
+               seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
+       if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+               seq_printf(m, ",osdkeepalivetimeout=%d",
+                          opt->osd_keepalive_timeout);
+
+       if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
+               seq_puts(m, ",dirstat");
+       if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
+               seq_puts(m, ",norbytes");
+       if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
+               seq_puts(m, ",noasyncreaddir");
+
+       if (fsopt->wsize)
+               seq_printf(m, ",wsize=%d", fsopt->wsize);
+       if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
+               seq_printf(m, ",rsize=%d", fsopt->rsize);
+       if (fsopt->congestion_kb != default_congestion_kb())
+               seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
+       if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
+               seq_printf(m, ",caps_wanted_delay_min=%d",
+                        fsopt->caps_wanted_delay_min);
+       if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
+               seq_printf(m, ",caps_wanted_delay_max=%d",
+                          fsopt->caps_wanted_delay_max);
+       if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
+               seq_printf(m, ",cap_release_safety=%d",
+                          fsopt->cap_release_safety);
+       if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
+               seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
+       if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
+               seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
+       if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
+               seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+       return 0;
 }
 
 /*
- * create a fresh client instance
+ * handle any mon messages the standard library doesn't understand.
+ * return error if we don't either.
  */
-static struct ceph_client *ceph_create_client(struct ceph_mount_args *args)
+static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
 {
-       struct ceph_client *client;
+       struct ceph_fs_client *fsc = client->private;
+       int type = le16_to_cpu(msg->hdr.type);
+
+       switch (type) {
+       case CEPH_MSG_MDS_MAP:
+               ceph_mdsc_handle_map(fsc->mdsc, msg);
+               return 0;
+
+       default:
+               return -1;
+       }
+}
+
+/*
+ * create a new fs client
+ */
+struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
+                                       struct ceph_options *opt)
+{
+       struct ceph_fs_client *fsc;
        int err = -ENOMEM;
 
-       client = kzalloc(sizeof(*client), GFP_KERNEL);
-       if (client == NULL)
+       fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
+       if (!fsc)
                return ERR_PTR(-ENOMEM);
 
-       mutex_init(&client->mount_mutex);
-
-       init_waitqueue_head(&client->auth_wq);
+       fsc->client = ceph_create_client(opt, fsc);
+       if (IS_ERR(fsc->client)) {
+               err = PTR_ERR(fsc->client);
+               goto fail;
+       }
+       fsc->client->extra_mon_dispatch = extra_mon_dispatch;
+       fsc->client->supported_features |= CEPH_FEATURE_FLOCK;
+       fsc->client->monc.want_mdsmap = 1;
 
-       client->sb = NULL;
-       client->mount_state = CEPH_MOUNT_MOUNTING;
-       client->mount_args = args;
+       fsc->mount_options = fsopt;
 
-       client->msgr = NULL;
+       fsc->sb = NULL;
+       fsc->mount_state = CEPH_MOUNT_MOUNTING;
 
-       client->auth_err = 0;
-       atomic_long_set(&client->writeback_count, 0);
+       atomic_long_set(&fsc->writeback_count, 0);
 
-       err = bdi_init(&client->backing_dev_info);
+       err = bdi_init(&fsc->backing_dev_info);
        if (err < 0)
-               goto fail;
+               goto fail_client;
 
        err = -ENOMEM;
-       client->wb_wq = create_workqueue("ceph-writeback");
-       if (client->wb_wq == NULL)
+       fsc->wb_wq = create_workqueue("ceph-writeback");
+       if (fsc->wb_wq == NULL)
                goto fail_bdi;
-       client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
-       if (client->pg_inv_wq == NULL)
+       fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
+       if (fsc->pg_inv_wq == NULL)
                goto fail_wb_wq;
-       client->trunc_wq = create_singlethread_workqueue("ceph-trunc");
-       if (client->trunc_wq == NULL)
+       fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc");
+       if (fsc->trunc_wq == NULL)
                goto fail_pg_inv_wq;
 
        /* set up mempools */
        err = -ENOMEM;
-       client->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
-                             client->mount_args->wsize >> PAGE_CACHE_SHIFT);
-       if (!client->wb_pagevec_pool)
+       fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
+                             fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
+       if (!fsc->wb_pagevec_pool)
                goto fail_trunc_wq;
 
        /* caps */
-       client->min_caps = args->max_readdir;
+       fsc->min_caps = fsopt->max_readdir;
+
+       return fsc;
 
-       /* subsystems */
-       err = ceph_monc_init(&client->monc, client);
-       if (err < 0)
-               goto fail_mempool;
-       err = ceph_osdc_init(&client->osdc, client);
-       if (err < 0)
-               goto fail_monc;
-       err = ceph_mdsc_init(&client->mdsc, client);
-       if (err < 0)
-               goto fail_osdc;
-       return client;
-
-fail_osdc:
-       ceph_osdc_stop(&client->osdc);
-fail_monc:
-       ceph_monc_stop(&client->monc);
-fail_mempool:
-       mempool_destroy(client->wb_pagevec_pool);
 fail_trunc_wq:
-       destroy_workqueue(client->trunc_wq);
+       destroy_workqueue(fsc->trunc_wq);
 fail_pg_inv_wq:
-       destroy_workqueue(client->pg_inv_wq);
+       destroy_workqueue(fsc->pg_inv_wq);
 fail_wb_wq:
-       destroy_workqueue(client->wb_wq);
+       destroy_workqueue(fsc->wb_wq);
 fail_bdi:
-       bdi_destroy(&client->backing_dev_info);
+       bdi_destroy(&fsc->backing_dev_info);
+fail_client:
+       ceph_destroy_client(fsc->client);
 fail:
-       kfree(client);
+       kfree(fsc);
        return ERR_PTR(err);
 }
 
-static void ceph_destroy_client(struct ceph_client *client)
+void destroy_fs_client(struct ceph_fs_client *fsc)
 {
-       dout("destroy_client %p\n", client);
+       dout("destroy_fs_client %p\n", fsc);
 
-       /* unmount */
-       ceph_mdsc_stop(&client->mdsc);
-       ceph_osdc_stop(&client->osdc);
+       destroy_workqueue(fsc->wb_wq);
+       destroy_workqueue(fsc->pg_inv_wq);
+       destroy_workqueue(fsc->trunc_wq);
 
-       /*
-        * make sure mds and osd connections close out before destroying
-        * the auth module, which is needed to free those connections'
-        * ceph_authorizers.
-        */
-       ceph_msgr_flush();
-
-       ceph_monc_stop(&client->monc);
+       bdi_destroy(&fsc->backing_dev_info);
 
-       ceph_debugfs_client_cleanup(client);
-       destroy_workqueue(client->wb_wq);
-       destroy_workqueue(client->pg_inv_wq);
-       destroy_workqueue(client->trunc_wq);
+       mempool_destroy(fsc->wb_pagevec_pool);
 
-       bdi_destroy(&client->backing_dev_info);
+       destroy_mount_options(fsc->mount_options);
 
-       if (client->msgr)
-               ceph_messenger_destroy(client->msgr);
-       mempool_destroy(client->wb_pagevec_pool);
+       ceph_fs_debugfs_cleanup(fsc);
 
-       destroy_mount_args(client->mount_args);
+       ceph_destroy_client(fsc->client);
 
-       kfree(client);
-       dout("destroy_client %p done\n", client);
+       kfree(fsc);
+       dout("destroy_fs_client %p done\n", fsc);
 }
 
 /*
- * Initially learn our fsid, or verify an fsid matches.
+ * caches
  */
-int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
+struct kmem_cache *ceph_inode_cachep;
+struct kmem_cache *ceph_cap_cachep;
+struct kmem_cache *ceph_dentry_cachep;
+struct kmem_cache *ceph_file_cachep;
+
+static void ceph_inode_init_once(void *foo)
 {
-       if (client->have_fsid) {
-               if (ceph_fsid_compare(&client->fsid, fsid)) {
-                       pr_err("bad fsid, had %pU got %pU",
-                              &client->fsid, fsid);
-                       return -1;
-               }
-       } else {
-               pr_info("client%lld fsid %pU\n", client->monc.auth->global_id,
-                       fsid);
-               memcpy(&client->fsid, fsid, sizeof(*fsid));
-               ceph_debugfs_client_init(client);
-               client->have_fsid = true;
-       }
+       struct ceph_inode_info *ci = foo;
+       inode_init_once(&ci->vfs_inode);
+}
+
+static int __init init_caches(void)
+{
+       ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
+                                     sizeof(struct ceph_inode_info),
+                                     __alignof__(struct ceph_inode_info),
+                                     (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+                                     ceph_inode_init_once);
+       if (ceph_inode_cachep == NULL)
+               return -ENOMEM;
+
+       ceph_cap_cachep = KMEM_CACHE(ceph_cap,
+                                    SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+       if (ceph_cap_cachep == NULL)
+               goto bad_cap;
+
+       ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
+                                       SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+       if (ceph_dentry_cachep == NULL)
+               goto bad_dentry;
+
+       ceph_file_cachep = KMEM_CACHE(ceph_file_info,
+                                     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+       if (ceph_file_cachep == NULL)
+               goto bad_file;
+
        return 0;
+
+bad_file:
+       kmem_cache_destroy(ceph_dentry_cachep);
+bad_dentry:
+       kmem_cache_destroy(ceph_cap_cachep);
+bad_cap:
+       kmem_cache_destroy(ceph_inode_cachep);
+       return -ENOMEM;
 }
 
+static void destroy_caches(void)
+{
+       kmem_cache_destroy(ceph_inode_cachep);
+       kmem_cache_destroy(ceph_cap_cachep);
+       kmem_cache_destroy(ceph_dentry_cachep);
+       kmem_cache_destroy(ceph_file_cachep);
+}
+
+
 /*
- * true if we have the mon map (and have thus joined the cluster)
+ * ceph_umount_begin - initiate forced umount.  Tear down down the
+ * mount, skipping steps that may hang while waiting for server(s).
  */
-static int have_mon_and_osd_map(struct ceph_client *client)
+static void ceph_umount_begin(struct super_block *sb)
 {
-       return client->monc.monmap && client->monc.monmap->epoch &&
-              client->osdc.osdmap && client->osdc.osdmap->epoch;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+       dout("ceph_umount_begin - starting forced umount\n");
+       if (!fsc)
+               return;
+       fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
+       return;
 }
 
+static const struct super_operations ceph_super_ops = {
+       .alloc_inode    = ceph_alloc_inode,
+       .destroy_inode  = ceph_destroy_inode,
+       .write_inode    = ceph_write_inode,
+       .sync_fs        = ceph_sync_fs,
+       .put_super      = ceph_put_super,
+       .show_options   = ceph_show_options,
+       .statfs         = ceph_statfs,
+       .umount_begin   = ceph_umount_begin,
+};
+
 /*
  * Bootstrap mount by opening the root directory.  Note the mount
  * @started time from caller, and time out if this takes too long.
  */
-static struct dentry *open_root_dentry(struct ceph_client *client,
+static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
                                       const char *path,
                                       unsigned long started)
 {
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req = NULL;
        int err;
        struct dentry *root;
@@ -784,14 +609,14 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
        req->r_ino1.ino = CEPH_INO_ROOT;
        req->r_ino1.snap = CEPH_NOSNAP;
        req->r_started = started;
-       req->r_timeout = client->mount_args->mount_timeout * HZ;
+       req->r_timeout = fsc->client->options->mount_timeout * HZ;
        req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
        req->r_num_caps = 2;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
        if (err == 0) {
                dout("open_root_inode success\n");
                if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
-                   client->sb->s_root == NULL)
+                   fsc->sb->s_root == NULL)
                        root = d_alloc_root(req->r_target_inode);
                else
                        root = d_obtain_alias(req->r_target_inode);
@@ -804,105 +629,86 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
        return root;
 }
 
+
+
+
 /*
  * mount: join the ceph cluster, and open root directory.
  */
-static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
+static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
                      const char *path)
 {
-       struct ceph_entity_addr *myaddr = NULL;
        int err;
-       unsigned long timeout = client->mount_args->mount_timeout * HZ;
        unsigned long started = jiffies;  /* note the start time */
        struct dentry *root;
+       int first = 0;   /* first vfsmount for this super_block */
 
        dout("mount start\n");
-       mutex_lock(&client->mount_mutex);
-
-       /* initialize the messenger */
-       if (client->msgr == NULL) {
-               if (ceph_test_opt(client, MYIP))
-                       myaddr = &client->mount_args->my_addr;
-               client->msgr = ceph_messenger_create(myaddr);
-               if (IS_ERR(client->msgr)) {
-                       err = PTR_ERR(client->msgr);
-                       client->msgr = NULL;
-                       goto out;
-               }
-               client->msgr->nocrc = ceph_test_opt(client, NOCRC);
-       }
+       mutex_lock(&fsc->client->mount_mutex);
 
-       /* open session, and wait for mon, mds, and osd maps */
-       err = ceph_monc_open_session(&client->monc);
+       err = __ceph_open_session(fsc->client, started);
        if (err < 0)
                goto out;
 
-       while (!have_mon_and_osd_map(client)) {
-               err = -EIO;
-               if (timeout && time_after_eq(jiffies, started + timeout))
-                       goto out;
-
-               /* wait */
-               dout("mount waiting for mon_map\n");
-               err = wait_event_interruptible_timeout(client->auth_wq,
-                      have_mon_and_osd_map(client) || (client->auth_err < 0),
-                      timeout);
-               if (err == -EINTR || err == -ERESTARTSYS)
-                       goto out;
-               if (client->auth_err < 0) {
-                       err = client->auth_err;
-                       goto out;
-               }
-       }
-
        dout("mount opening root\n");
-       root = open_root_dentry(client, "", started);
+       root = open_root_dentry(fsc, "", started);
        if (IS_ERR(root)) {
                err = PTR_ERR(root);
                goto out;
        }
-       if (client->sb->s_root)
+       if (fsc->sb->s_root) {
                dput(root);
-       else
-               client->sb->s_root = root;
+       } else {
+               fsc->sb->s_root = root;
+               first = 1;
+
+               err = ceph_fs_debugfs_init(fsc);
+               if (err < 0)
+                       goto fail;
+       }
 
        if (path[0] == 0) {
                dget(root);
        } else {
                dout("mount opening base mountpoint\n");
-               root = open_root_dentry(client, path, started);
+               root = open_root_dentry(fsc, path, started);
                if (IS_ERR(root)) {
                        err = PTR_ERR(root);
-                       dput(client->sb->s_root);
-                       client->sb->s_root = NULL;
-                       goto out;
+                       goto fail;
                }
        }
 
        mnt->mnt_root = root;
-       mnt->mnt_sb = client->sb;
+       mnt->mnt_sb = fsc->sb;
 
-       client->mount_state = CEPH_MOUNT_MOUNTED;
+       fsc->mount_state = CEPH_MOUNT_MOUNTED;
        dout("mount success\n");
        err = 0;
 
 out:
-       mutex_unlock(&client->mount_mutex);
+       mutex_unlock(&fsc->client->mount_mutex);
        return err;
+
+fail:
+       if (first) {
+               dput(fsc->sb->s_root);
+               fsc->sb->s_root = NULL;
+       }
+       goto out;
 }
 
 static int ceph_set_super(struct super_block *s, void *data)
 {
-       struct ceph_client *client = data;
+       struct ceph_fs_client *fsc = data;
        int ret;
 
        dout("set_super %p data %p\n", s, data);
 
-       s->s_flags = client->mount_args->sb_flags;
+       s->s_flags = fsc->mount_options->sb_flags;
        s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
 
-       s->s_fs_info = client;
-       client->sb = s;
+       s->s_fs_info = fsc;
+       fsc->sb = s;
 
        s->s_op = &ceph_super_ops;
        s->s_export_op = &ceph_export_ops;
@@ -917,7 +723,7 @@ static int ceph_set_super(struct super_block *s, void *data)
 
 fail:
        s->s_fs_info = NULL;
-       client->sb = NULL;
+       fsc->sb = NULL;
        return ret;
 }
 
@@ -926,30 +732,23 @@ fail:
  */
 static int ceph_compare_super(struct super_block *sb, void *data)
 {
-       struct ceph_client *new = data;
-       struct ceph_mount_args *args = new->mount_args;
-       struct ceph_client *other = ceph_sb_to_client(sb);
-       int i;
+       struct ceph_fs_client *new = data;
+       struct ceph_mount_options *fsopt = new->mount_options;
+       struct ceph_options *opt = new->client->options;
+       struct ceph_fs_client *other = ceph_sb_to_client(sb);
 
        dout("ceph_compare_super %p\n", sb);
-       if (args->flags & CEPH_OPT_FSID) {
-               if (ceph_fsid_compare(&args->fsid, &other->fsid)) {
-                       dout("fsid doesn't match\n");
-                       return 0;
-               }
-       } else {
-               /* do we share (a) monitor? */
-               for (i = 0; i < new->monc.monmap->num_mon; i++)
-                       if (ceph_monmap_contains(other->monc.monmap,
-                                        &new->monc.monmap->mon_inst[i].addr))
-                               break;
-               if (i == new->monc.monmap->num_mon) {
-                       dout("mon ip not part of monmap\n");
-                       return 0;
-               }
-               dout("mon ip matches existing sb %p\n", sb);
+
+       if (compare_mount_options(fsopt, opt, other)) {
+               dout("monitor(s)/mount options don't match\n");
+               return 0;
        }
-       if (args->sb_flags != other->mount_args->sb_flags) {
+       if ((opt->flags & CEPH_OPT_FSID) &&
+           ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
+               dout("fsid doesn't match\n");
+               return 0;
+       }
+       if (fsopt->sb_flags != other->mount_options->sb_flags) {
                dout("flags differ\n");
                return 0;
        }
@@ -961,19 +760,20 @@ static int ceph_compare_super(struct super_block *sb, void *data)
  */
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
-static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
+static int ceph_register_bdi(struct super_block *sb,
+                            struct ceph_fs_client *fsc)
 {
        int err;
 
        /* set ra_pages based on rsize mount option? */
-       if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
-               client->backing_dev_info.ra_pages =
-                       (client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
+       if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
+               fsc->backing_dev_info.ra_pages =
+                       (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
                        >> PAGE_SHIFT;
-       err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d",
+       err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
                           atomic_long_inc_return(&bdi_seq));
        if (!err)
-               sb->s_bdi = &client->backing_dev_info;
+               sb->s_bdi = &fsc->backing_dev_info;
        return err;
 }
 
@@ -982,46 +782,52 @@ static int ceph_get_sb(struct file_system_type *fs_type,
                       struct vfsmount *mnt)
 {
        struct super_block *sb;
-       struct ceph_client *client;
+       struct ceph_fs_client *fsc;
        int err;
        int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
        const char *path = NULL;
-       struct ceph_mount_args *args;
+       struct ceph_mount_options *fsopt = NULL;
+       struct ceph_options *opt = NULL;
 
        dout("ceph_get_sb\n");
-       args = parse_mount_args(flags, data, dev_name, &path);
-       if (IS_ERR(args)) {
-               err = PTR_ERR(args);
+       err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
+       if (err < 0)
                goto out_final;
-       }
 
        /* create client (which we may/may not use) */
-       client = ceph_create_client(args);
-       if (IS_ERR(client)) {
-               err = PTR_ERR(client);
+       fsc = create_fs_client(fsopt, opt);
+       if (IS_ERR(fsc)) {
+               err = PTR_ERR(fsc);
+               kfree(fsopt);
+               kfree(opt);
                goto out_final;
        }
 
-       if (client->mount_args->flags & CEPH_OPT_NOSHARE)
+       err = ceph_mdsc_init(fsc);
+       if (err < 0)
+               goto out;
+
+       if (ceph_test_opt(fsc->client, NOSHARE))
                compare_super = NULL;
-       sb = sget(fs_type, compare_super, ceph_set_super, client);
+       sb = sget(fs_type, compare_super, ceph_set_super, fsc);
        if (IS_ERR(sb)) {
                err = PTR_ERR(sb);
                goto out;
        }
 
-       if (ceph_sb_to_client(sb) != client) {
-               ceph_destroy_client(client);
-               client = ceph_sb_to_client(sb);
-               dout("get_sb got existing client %p\n", client);
+       if (ceph_sb_to_client(sb) != fsc) {
+               ceph_mdsc_destroy(fsc);
+               destroy_fs_client(fsc);
+               fsc = ceph_sb_to_client(sb);
+               dout("get_sb got existing client %p\n", fsc);
        } else {
-               dout("get_sb using new client %p\n", client);
-               err = ceph_register_bdi(sb, client);
+               dout("get_sb using new client %p\n", fsc);
+               err = ceph_register_bdi(sb, fsc);
                if (err < 0)
                        goto out_splat;
        }
 
-       err = ceph_mount(client, mnt, path);
+       err = ceph_mount(fsc, mnt, path);
        if (err < 0)
                goto out_splat;
        dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
@@ -1029,12 +835,13 @@ static int ceph_get_sb(struct file_system_type *fs_type,
        return 0;
 
 out_splat:
-       ceph_mdsc_close_sessions(&client->mdsc);
+       ceph_mdsc_close_sessions(fsc->mdsc);
        deactivate_locked_super(sb);
        goto out_final;
 
 out:
-       ceph_destroy_client(client);
+       ceph_mdsc_destroy(fsc);
+       destroy_fs_client(fsc);
 out_final:
        dout("ceph_get_sb fail %d\n", err);
        return err;
@@ -1042,11 +849,12 @@ out_final:
 
 static void ceph_kill_sb(struct super_block *s)
 {
-       struct ceph_client *client = ceph_sb_to_client(s);
+       struct ceph_fs_client *fsc = ceph_sb_to_client(s);
        dout("kill_sb %p\n", s);
-       ceph_mdsc_pre_umount(&client->mdsc);
+       ceph_mdsc_pre_umount(fsc->mdsc);
        kill_anon_super(s);    /* will call put_super after sb is r/o */
-       ceph_destroy_client(client);
+       ceph_mdsc_destroy(fsc);
+       destroy_fs_client(fsc);
 }
 
 static struct file_system_type ceph_fs_type = {
@@ -1062,36 +870,20 @@ static struct file_system_type ceph_fs_type = {
 
 static int __init init_ceph(void)
 {
-       int ret = 0;
-
-       ret = ceph_debugfs_init();
-       if (ret < 0)
-               goto out;
-
-       ret = ceph_msgr_init();
-       if (ret < 0)
-               goto out_debugfs;
-
-       ret = init_caches();
+       int ret = init_caches();
        if (ret)
-               goto out_msgr;
+               goto out;
 
        ret = register_filesystem(&ceph_fs_type);
        if (ret)
                goto out_icache;
 
-       pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
-               CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
-               CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
-               CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
+       pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
+
        return 0;
 
 out_icache:
        destroy_caches();
-out_msgr:
-       ceph_msgr_exit();
-out_debugfs:
-       ceph_debugfs_cleanup();
 out:
        return ret;
 }
@@ -1101,8 +893,6 @@ static void __exit exit_ceph(void)
        dout("exit_ceph\n");
        unregister_filesystem(&ceph_fs_type);
        destroy_caches();
-       ceph_msgr_exit();
-       ceph_debugfs_cleanup();
 }
 
 module_init(init_ceph);
index b87638e..e2e9044 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _FS_CEPH_SUPER_H
 #define _FS_CEPH_SUPER_H
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <asm/unaligned.h>
 #include <linux/backing-dev.h>
 #include <linux/writeback.h>
 #include <linux/slab.h>
 
-#include "types.h"
-#include "messenger.h"
-#include "msgpool.h"
-#include "mon_client.h"
-#include "mds_client.h"
-#include "osd_client.h"
-#include "ceph_fs.h"
+#include <linux/ceph/libceph.h>
 
 /* f_type in struct statfs */
 #define CEPH_SUPER_MAGIC 0x00c36400
 #define CEPH_BLOCK_SHIFT   20  /* 1 MB */
 #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
 
-/*
- * Supported features
- */
-#define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK
-#define CEPH_FEATURE_REQUIRED  CEPH_FEATURE_NOSRCADDR
+#define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
+#define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
+#define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
 
-/*
- * mount options
- */
-#define CEPH_OPT_FSID             (1<<0)
-#define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
-#define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
-#define CEPH_OPT_DIRSTAT          (1<<4) /* funky `cat dirname` for stats */
-#define CEPH_OPT_RBYTES           (1<<5) /* dir st_bytes = rbytes */
-#define CEPH_OPT_NOCRC            (1<<6) /* no data crc on writes */
-#define CEPH_OPT_NOASYNCREADDIR   (1<<7) /* no dcache readdir */
+#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
 
-#define CEPH_OPT_DEFAULT   (CEPH_OPT_RBYTES)
+#define ceph_set_mount_opt(fsc, opt) \
+       (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
+#define ceph_test_mount_opt(fsc, opt) \
+       (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
 
-#define ceph_set_opt(client, opt) \
-       (client)->mount_args->flags |= CEPH_OPT_##opt;
-#define ceph_test_opt(client, opt) \
-       (!!((client)->mount_args->flags & CEPH_OPT_##opt))
+#define CEPH_MAX_READDIR_DEFAULT        1024
+#define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
+#define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
 
-
-struct ceph_mount_args {
-       int sb_flags;
+struct ceph_mount_options {
        int flags;
-       struct ceph_fsid fsid;
-       struct ceph_entity_addr my_addr;
-       int num_mon;
-       struct ceph_entity_addr *mon_addr;
-       int mount_timeout;
-       int osd_idle_ttl;
-       int osd_timeout;
-       int osd_keepalive_timeout;
+       int sb_flags;
+
        int wsize;
        int rsize;            /* max readahead */
        int congestion_kb;    /* max writeback in flight */
@@ -73,82 +50,25 @@ struct ceph_mount_args {
        int cap_release_safety;
        int max_readdir;       /* max readdir result (entires) */
        int max_readdir_bytes; /* max readdir result (bytes) */
-       char *snapdir_name;   /* default ".snap" */
-       char *name;
-       char *secret;
-};
-
-/*
- * defaults
- */
-#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
-#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
-#define CEPH_OSD_KEEPALIVE_DEFAULT  5
-#define CEPH_OSD_IDLE_TTL_DEFAULT    60
-#define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */
-#define CEPH_MAX_READDIR_DEFAULT    1024
-#define CEPH_MAX_READDIR_BYTES_DEFAULT    (512*1024)
-
-#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
-#define CEPH_MSG_MAX_DATA_LEN  (16*1024*1024)
-
-#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
-#define CEPH_AUTH_NAME_DEFAULT   "guest"
-/*
- * Delay telling the MDS we no longer want caps, in case we reopen
- * the file.  Delay a minimum amount of time, even if we send a cap
- * message for some other reason.  Otherwise, take the oppotunity to
- * update the mds to avoid sending another message later.
- */
-#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
-#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
-
-#define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4)
-
-/* mount state */
-enum {
-       CEPH_MOUNT_MOUNTING,
-       CEPH_MOUNT_MOUNTED,
-       CEPH_MOUNT_UNMOUNTING,
-       CEPH_MOUNT_UNMOUNTED,
-       CEPH_MOUNT_SHUTDOWN,
-};
 
-/*
- * subtract jiffies
- */
-static inline unsigned long time_sub(unsigned long a, unsigned long b)
-{
-       BUG_ON(time_after(b, a));
-       return (long)a - (long)b;
-}
-
-/*
- * per-filesystem client state
- *
- * possibly shared by multiple mount points, if they are
- * mounting the same ceph filesystem/cluster.
- */
-struct ceph_client {
-       struct ceph_fsid fsid;
-       bool have_fsid;
+       /*
+        * everything above this point can be memcmp'd; everything below
+        * is handled in compare_mount_options()
+        */
 
-       struct mutex mount_mutex;       /* serialize mount attempts */
-       struct ceph_mount_args *mount_args;
+       char *snapdir_name;   /* default ".snap" */
+};
 
+struct ceph_fs_client {
        struct super_block *sb;
 
-       unsigned long mount_state;
-       wait_queue_head_t auth_wq;
-
-       int auth_err;
+       struct ceph_mount_options *mount_options;
+       struct ceph_client *client;
 
+       unsigned long mount_state;
        int min_caps;                  /* min caps i added */
 
-       struct ceph_messenger *msgr;   /* messenger instance */
-       struct ceph_mon_client monc;
-       struct ceph_mds_client mdsc;
-       struct ceph_osd_client osdc;
+       struct ceph_mds_client *mdsc;
 
        /* writeback */
        mempool_t *wb_pagevec_pool;
@@ -160,14 +80,14 @@ struct ceph_client {
        struct backing_dev_info backing_dev_info;
 
 #ifdef CONFIG_DEBUG_FS
-       struct dentry *debugfs_monmap;
-       struct dentry *debugfs_mdsmap, *debugfs_osdmap;
-       struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps;
+       struct dentry *debugfs_dentry_lru, *debugfs_caps;
        struct dentry *debugfs_congestion_kb;
        struct dentry *debugfs_bdi;
+       struct dentry *debugfs_mdsc, *debugfs_mdsmap;
 #endif
 };
 
+
 /*
  * File i/o capability.  This tracks shared state with the metadata
  * server that allows us to cache or writeback attributes or to read
@@ -275,6 +195,20 @@ struct ceph_inode_xattr {
        int should_free_val;
 };
 
+/*
+ * Ceph dentry state
+ */
+struct ceph_dentry_info {
+       struct ceph_mds_session *lease_session;
+       u32 lease_gen, lease_shared_gen;
+       u32 lease_seq;
+       unsigned long lease_renew_after, lease_renew_from;
+       struct list_head lru;
+       struct dentry *dentry;
+       u64 time;
+       u64 offset;
+};
+
 struct ceph_inode_xattrs_info {
        /*
         * (still encoded) xattr blob. we avoid the overhead of parsing
@@ -296,11 +230,6 @@ struct ceph_inode_xattrs_info {
 /*
  * Ceph inode.
  */
-#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
-#define CEPH_I_NODELAY   4  /* do not delay cap release */
-#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
-
 struct ceph_inode_info {
        struct ceph_vino i_vino;   /* ceph ino + snap */
 
@@ -391,6 +320,63 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
        return container_of(inode, struct ceph_inode_info, vfs_inode);
 }
 
+static inline struct ceph_vino ceph_vino(struct inode *inode)
+{
+       return ceph_inode(inode)->i_vino;
+}
+
+/*
+ * ino_t is <64 bits on many architectures, blech.
+ *
+ * don't include snap in ino hash, at least for now.
+ */
+static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
+{
+       ino_t ino = (ino_t)vino.ino;  /* ^ (vino.snap << 20); */
+#if BITS_PER_LONG == 32
+       ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
+       if (!ino)
+               ino = 1;
+#endif
+       return ino;
+}
+
+/* for printf-style formatting */
+#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
+
+static inline u64 ceph_ino(struct inode *inode)
+{
+       return ceph_inode(inode)->i_vino.ino;
+}
+static inline u64 ceph_snap(struct inode *inode)
+{
+       return ceph_inode(inode)->i_vino.snap;
+}
+
+static inline int ceph_ino_compare(struct inode *inode, void *data)
+{
+       struct ceph_vino *pvino = (struct ceph_vino *)data;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       return ci->i_vino.ino == pvino->ino &&
+               ci->i_vino.snap == pvino->snap;
+}
+
+static inline struct inode *ceph_find_inode(struct super_block *sb,
+                                           struct ceph_vino vino)
+{
+       ino_t t = ceph_vino_to_ino(vino);
+       return ilookup5(sb, t, ceph_ino_compare, &vino);
+}
+
+
+/*
+ * Ceph inode.
+ */
+#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
+#define CEPH_I_NODELAY   4  /* do not delay cap release */
+#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
+
 static inline void ceph_i_clear(struct inode *inode, unsigned mask)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
@@ -432,20 +418,6 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
                            struct ceph_inode_frag *pfrag,
                            int *found);
 
-/*
- * Ceph dentry state
- */
-struct ceph_dentry_info {
-       struct ceph_mds_session *lease_session;
-       u32 lease_gen, lease_shared_gen;
-       u32 lease_seq;
-       unsigned long lease_renew_after, lease_renew_from;
-       struct list_head lru;
-       struct dentry *dentry;
-       u64 time;
-       u64 offset;
-};
-
 static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
 {
        return (struct ceph_dentry_info *)dentry->d_fsdata;
@@ -456,22 +428,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
        return ((loff_t)frag << 32) | (loff_t)off;
 }
 
-/*
- * ino_t is <64 bits on many architectures, blech.
- *
- * don't include snap in ino hash, at least for now.
- */
-static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
-{
-       ino_t ino = (ino_t)vino.ino;  /* ^ (vino.snap << 20); */
-#if BITS_PER_LONG == 32
-       ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
-       if (!ino)
-               ino = 1;
-#endif
-       return ino;
-}
-
 static inline int ceph_set_ino_cb(struct inode *inode, void *data)
 {
        ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
@@ -479,39 +435,6 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data)
        return 0;
 }
 
-static inline struct ceph_vino ceph_vino(struct inode *inode)
-{
-       return ceph_inode(inode)->i_vino;
-}
-
-/* for printf-style formatting */
-#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
-
-static inline u64 ceph_ino(struct inode *inode)
-{
-       return ceph_inode(inode)->i_vino.ino;
-}
-static inline u64 ceph_snap(struct inode *inode)
-{
-       return ceph_inode(inode)->i_vino.snap;
-}
-
-static inline int ceph_ino_compare(struct inode *inode, void *data)
-{
-       struct ceph_vino *pvino = (struct ceph_vino *)data;
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       return ci->i_vino.ino == pvino->ino &&
-               ci->i_vino.snap == pvino->snap;
-}
-
-static inline struct inode *ceph_find_inode(struct super_block *sb,
-                                           struct ceph_vino vino)
-{
-       ino_t t = ceph_vino_to_ino(vino);
-       return ilookup5(sb, t, ceph_ino_compare, &vino);
-}
-
-
 /*
  * caps helpers
  */
@@ -576,18 +499,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
                             struct ceph_cap_reservation *ctx, int need);
 extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
                               struct ceph_cap_reservation *ctx);
-extern void ceph_reservation_status(struct ceph_client *client,
+extern void ceph_reservation_status(struct ceph_fs_client *client,
                                    int *total, int *avail, int *used,
                                    int *reserved, int *min);
 
-static inline struct ceph_client *ceph_inode_to_client(struct inode *inode)
+static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
 {
-       return (struct ceph_client *)inode->i_sb->s_fs_info;
+       return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
 }
 
-static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb)
+static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
 {
-       return (struct ceph_client *)sb->s_fs_info;
+       return (struct ceph_fs_client *)sb->s_fs_info;
 }
 
 
@@ -617,51 +540,6 @@ struct ceph_file_info {
 
 
 /*
- * snapshots
- */
-
-/*
- * A "snap context" is the set of existing snapshots when we
- * write data.  It is used by the OSD to guide its COW behavior.
- *
- * The ceph_snap_context is refcounted, and attached to each dirty
- * page, indicating which context the dirty data belonged when it was
- * dirtied.
- */
-struct ceph_snap_context {
-       atomic_t nref;
-       u64 seq;
-       int num_snaps;
-       u64 snaps[];
-};
-
-static inline struct ceph_snap_context *
-ceph_get_snap_context(struct ceph_snap_context *sc)
-{
-       /*
-       printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
-              atomic_read(&sc->nref)+1);
-       */
-       if (sc)
-               atomic_inc(&sc->nref);
-       return sc;
-}
-
-static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
-{
-       if (!sc)
-               return;
-       /*
-       printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
-              atomic_read(&sc->nref)-1);
-       */
-       if (atomic_dec_and_test(&sc->nref)) {
-               /*printk(" deleting snap_context %p\n", sc);*/
-               kfree(sc);
-       }
-}
-
-/*
  * A "snap realm" describes a subset of the file hierarchy sharing
  * the same set of snapshots that apply to it.  The realms themselves
  * are organized into a hierarchy, such that children inherit (some of)
@@ -699,16 +577,33 @@ struct ceph_snap_realm {
        spinlock_t inodes_with_caps_lock;
 };
 
-
-
-/*
- * calculate the number of pages a given length and offset map onto,
- * if we align the data.
- */
-static inline int calc_pages_for(u64 off, u64 len)
+static inline int default_congestion_kb(void)
 {
-       return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
-               (off >> PAGE_CACHE_SHIFT);
+       int congestion_kb;
+
+       /*
+        * Copied from NFS
+        *
+        * congestion size, scale with available memory.
+        *
+        *  64MB:    8192k
+        * 128MB:   11585k
+        * 256MB:   16384k
+        * 512MB:   23170k
+        *   1GB:   32768k
+        *   2GB:   46340k
+        *   4GB:   65536k
+        *   8GB:   92681k
+        *  16GB:  131072k
+        *
+        * This allows larger machines to have larger/more transfers.
+        * Limit the default to 256M
+        */
+       congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+       if (congestion_kb > 256*1024)
+               congestion_kb = 256*1024;
+
+       return congestion_kb;
 }
 
 
@@ -741,16 +636,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
                           ci_item)->writing;
 }
 
-
-/* super.c */
-extern struct kmem_cache *ceph_inode_cachep;
-extern struct kmem_cache *ceph_cap_cachep;
-extern struct kmem_cache *ceph_dentry_cachep;
-extern struct kmem_cache *ceph_file_cachep;
-
-extern const char *ceph_msg_type_name(int type);
-extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
-
 /* inode.c */
 extern const struct inode_operations ceph_file_iops;
 
@@ -857,12 +742,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 /* file.c */
 extern const struct file_operations ceph_file_fops;
 extern const struct address_space_operations ceph_aops;
+extern int ceph_copy_to_page_vector(struct page **pages,
+                                   const char *data,
+                                   loff_t off, size_t len);
+extern int ceph_copy_from_page_vector(struct page **pages,
+                                   char *data,
+                                   loff_t off, size_t len);
+extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
                                       struct nameidata *nd, int mode,
                                       int locked_dir);
 extern int ceph_release(struct inode *inode, struct file *filp);
-extern void ceph_release_page_vector(struct page **pages, int num_pages);
 
 /* dir.c */
 extern const struct file_operations ceph_dir_fops;
@@ -892,12 +783,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 /* export.c */
 extern const struct export_operations ceph_export_ops;
 
-/* debugfs.c */
-extern int ceph_debugfs_init(void);
-extern void ceph_debugfs_cleanup(void);
-extern int ceph_debugfs_client_init(struct ceph_client *client);
-extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
-
 /* locks.c */
 extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
 extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
@@ -914,4 +799,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry)
        return NULL;
 }
 
+/* debugfs.c */
+extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
+extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
+
 #endif /* _FS_CEPH_SUPER_H */
index 9578af6..70e9199 100644 (file)
@@ -1,6 +1,9 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
+
 #include "super.h"
-#include "decode.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
 
 #include <linux/xattr.h>
 #include <linux/slab.h>
@@ -620,12 +623,12 @@ out:
 static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
                              const char *value, size_t size, int flags)
 {
-       struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
        struct inode *inode = dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct inode *parent_inode = dentry->d_parent->d_inode;
        struct ceph_mds_request *req;
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        int err;
        int i, nr_pages;
        struct page **pages = NULL;
@@ -777,8 +780,8 @@ out:
 
 static int ceph_send_removexattr(struct dentry *dentry, const char *name)
 {
-       struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
-       struct ceph_mds_client *mdsc = &client->mdsc;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct inode *inode = dentry->d_inode;
        struct inode *parent_inode = dentry->d_parent->d_inode;
        struct ceph_mds_request *req;
similarity index 97%
rename from fs/ceph/auth.h
rename to include/linux/ceph/auth.h
index d38a2fb..7fff521 100644 (file)
@@ -1,8 +1,8 @@
 #ifndef _FS_CEPH_AUTH_H
 #define _FS_CEPH_AUTH_H
 
-#include "types.h"
-#include "buffer.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
 
 /*
  * Abstract interface for communicating with the authenticate module.
similarity index 100%
rename from fs/ceph/buffer.h
rename to include/linux/ceph/buffer.h
similarity index 86%
rename from fs/ceph/ceph_debug.h
rename to include/linux/ceph/ceph_debug.h
index 1818c23..aa2e191 100644 (file)
@@ -3,7 +3,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#ifdef CONFIG_CEPH_FS_PRETTYDEBUG
+#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
 
 /*
  * wrap pr_debug to include a filename:lineno prefix on each line.
@@ -14,7 +14,8 @@
 # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
 extern const char *ceph_file_part(const char *s, int len);
 #  define dout(fmt, ...)                                               \
-       pr_debug(" %12.12s:%-4d : " fmt,                                \
+       pr_debug("%.*s %12.12s:%-4d : " fmt,                            \
+                8 - (int)sizeof(KBUILD_MODNAME), "    ",               \
                 ceph_file_part(__FILE__, sizeof(__FILE__)),            \
                 __LINE__, ##__VA_ARGS__)
 # else
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h
new file mode 100644 (file)
index 0000000..2a79702
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef _FS_CEPH_DEBUGFS_H
+#define _FS_CEPH_DEBUGFS_H
+
+#include "ceph_debug.h"
+#include "types.h"
+
+#define CEPH_DEFINE_SHOW_FUNC(name)                                    \
+static int name##_open(struct inode *inode, struct file *file)         \
+{                                                                      \
+       struct seq_file *sf;                                            \
+       int ret;                                                        \
+                                                                       \
+       ret = single_open(file, name, NULL);                            \
+       sf = file->private_data;                                        \
+       sf->private = inode->i_private;                                 \
+       return ret;                                                     \
+}                                                                      \
+                                                                       \
+static const struct file_operations name##_fops = {                    \
+       .open           = name##_open,                                  \
+       .read           = seq_read,                                     \
+       .llseek         = seq_lseek,                                    \
+       .release        = single_release,                               \
+};
+
+/* debugfs.c */
+extern int ceph_debugfs_init(void);
+extern void ceph_debugfs_cleanup(void);
+extern int ceph_debugfs_client_init(struct ceph_client *client);
+extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
+
+#endif
+
similarity index 100%
rename from fs/ceph/decode.h
rename to include/linux/ceph/decode.h
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
new file mode 100644 (file)
index 0000000..f22b2e9
--- /dev/null
@@ -0,0 +1,249 @@
+#ifndef _FS_CEPH_LIBCEPH_H
+#define _FS_CEPH_LIBCEPH_H
+
+#include "ceph_debug.h"
+
+#include <asm/unaligned.h>
+#include <linux/backing-dev.h>
+#include <linux/completion.h>
+#include <linux/exportfs.h>
+#include <linux/fs.h>
+#include <linux/mempool.h>
+#include <linux/pagemap.h>
+#include <linux/wait.h>
+#include <linux/writeback.h>
+#include <linux/slab.h>
+
+#include "types.h"
+#include "messenger.h"
+#include "msgpool.h"
+#include "mon_client.h"
+#include "osd_client.h"
+#include "ceph_fs.h"
+
+/*
+ * Supported features
+ */
+#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR
+#define CEPH_FEATURE_REQUIRED_DEFAULT  CEPH_FEATURE_NOSRCADDR
+
+/*
+ * mount options
+ */
+#define CEPH_OPT_FSID             (1<<0)
+#define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
+#define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
+#define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
+
+#define CEPH_OPT_DEFAULT   (0);
+
+#define ceph_set_opt(client, opt) \
+       (client)->options->flags |= CEPH_OPT_##opt;
+#define ceph_test_opt(client, opt) \
+       (!!((client)->options->flags & CEPH_OPT_##opt))
+
+struct ceph_options {
+       int flags;
+       struct ceph_fsid fsid;
+       struct ceph_entity_addr my_addr;
+       int mount_timeout;
+       int osd_idle_ttl;
+       int osd_timeout;
+       int osd_keepalive_timeout;
+
+       /*
+        * any type that can't be simply compared or doesn't need need
+        * to be compared should go beyond this point,
+        * ceph_compare_options() should be updated accordingly
+        */
+
+       struct ceph_entity_addr *mon_addr; /* should be the first
+                                             pointer type of args */
+       int num_mon;
+       char *name;
+       char *secret;
+};
+
+/*
+ * defaults
+ */
+#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
+#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
+#define CEPH_OSD_KEEPALIVE_DEFAULT  5
+#define CEPH_OSD_IDLE_TTL_DEFAULT    60
+#define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */
+
+#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
+#define CEPH_MSG_MAX_DATA_LEN  (16*1024*1024)
+
+#define CEPH_AUTH_NAME_DEFAULT   "guest"
+
+/*
+ * Delay telling the MDS we no longer want caps, in case we reopen
+ * the file.  Delay a minimum amount of time, even if we send a cap
+ * message for some other reason.  Otherwise, take the oppotunity to
+ * update the mds to avoid sending another message later.
+ */
+#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
+#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
+
+#define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4)
+
+/* mount state */
+enum {
+       CEPH_MOUNT_MOUNTING,
+       CEPH_MOUNT_MOUNTED,
+       CEPH_MOUNT_UNMOUNTING,
+       CEPH_MOUNT_UNMOUNTED,
+       CEPH_MOUNT_SHUTDOWN,
+};
+
+/*
+ * subtract jiffies
+ */
+static inline unsigned long time_sub(unsigned long a, unsigned long b)
+{
+       BUG_ON(time_after(b, a));
+       return (long)a - (long)b;
+}
+
+struct ceph_mds_client;
+
+/*
+ * per client state
+ *
+ * possibly shared by multiple mount points, if they are
+ * mounting the same ceph filesystem/cluster.
+ */
+struct ceph_client {
+       struct ceph_fsid fsid;
+       bool have_fsid;
+
+       void *private;
+
+       struct ceph_options *options;
+
+       struct mutex mount_mutex;      /* serialize mount attempts */
+       wait_queue_head_t auth_wq;
+       int auth_err;
+
+       int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
+
+       u32 supported_features;
+       u32 required_features;
+
+       struct ceph_messenger *msgr;   /* messenger instance */
+       struct ceph_mon_client monc;
+       struct ceph_osd_client osdc;
+
+#ifdef CONFIG_DEBUG_FS
+       struct dentry *debugfs_dir;
+       struct dentry *debugfs_monmap;
+       struct dentry *debugfs_osdmap;
+#endif
+};
+
+
+
+/*
+ * snapshots
+ */
+
+/*
+ * A "snap context" is the set of existing snapshots when we
+ * write data.  It is used by the OSD to guide its COW behavior.
+ *
+ * The ceph_snap_context is refcounted, and attached to each dirty
+ * page, indicating which context the dirty data belonged when it was
+ * dirtied.
+ */
+struct ceph_snap_context {
+       atomic_t nref;
+       u64 seq;
+       int num_snaps;
+       u64 snaps[];
+};
+
+static inline struct ceph_snap_context *
+ceph_get_snap_context(struct ceph_snap_context *sc)
+{
+       /*
+       printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
+              atomic_read(&sc->nref)+1);
+       */
+       if (sc)
+               atomic_inc(&sc->nref);
+       return sc;
+}
+
+static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
+{
+       if (!sc)
+               return;
+       /*
+       printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
+              atomic_read(&sc->nref)-1);
+       */
+       if (atomic_dec_and_test(&sc->nref)) {
+               /*printk(" deleting snap_context %p\n", sc);*/
+               kfree(sc);
+       }
+}
+
+/*
+ * calculate the number of pages a given length and offset map onto,
+ * if we align the data.
+ */
+static inline int calc_pages_for(u64 off, u64 len)
+{
+       return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
+               (off >> PAGE_CACHE_SHIFT);
+}
+
+/* ceph_common.c */
+extern const char *ceph_msg_type_name(int type);
+extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
+extern struct kmem_cache *ceph_inode_cachep;
+extern struct kmem_cache *ceph_cap_cachep;
+extern struct kmem_cache *ceph_dentry_cachep;
+extern struct kmem_cache *ceph_file_cachep;
+
+extern int ceph_parse_options(struct ceph_options **popt, char *options,
+                             const char *dev_name, const char *dev_name_end,
+                             int (*parse_extra_token)(char *c, void *private),
+                             void *private);
+extern void ceph_destroy_options(struct ceph_options *opt);
+extern int ceph_compare_options(struct ceph_options *new_opt,
+                               struct ceph_client *client);
+extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
+                                             void *private);
+extern u64 ceph_client_id(struct ceph_client *client);
+extern void ceph_destroy_client(struct ceph_client *client);
+extern int __ceph_open_session(struct ceph_client *client,
+                              unsigned long started);
+extern int ceph_open_session(struct ceph_client *client);
+
+/* pagevec.c */
+extern void ceph_release_page_vector(struct page **pages, int num_pages);
+
+extern struct page **ceph_get_direct_page_vector(const char __user *data,
+                                           int num_pages,
+                                           loff_t off, size_t len);
+extern void ceph_put_page_vector(struct page **pages, int num_pages);
+extern void ceph_release_page_vector(struct page **pages, int num_pages);
+extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
+extern int ceph_copy_user_to_page_vector(struct page **pages,
+                                        const char __user *data,
+                                        loff_t off, size_t len);
+extern int ceph_copy_to_page_vector(struct page **pages,
+                                   const char *data,
+                                   loff_t off, size_t len);
+extern int ceph_copy_from_page_vector(struct page **pages,
+                                   char *data,
+                                   loff_t off, size_t len);
+extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
+                                   loff_t off, size_t len);
+extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
+
+
+#endif /* _FS_CEPH_SUPER_H */
similarity index 100%
rename from fs/ceph/mdsmap.h
rename to include/linux/ceph/mdsmap.h
similarity index 97%
rename from fs/ceph/messenger.h
rename to include/linux/ceph/messenger.h
index 5a79450..5956d62 100644 (file)
@@ -65,6 +65,9 @@ struct ceph_messenger {
         */
        u32 global_seq;
        spinlock_t global_seq_lock;
+
+       u32 supported_features;
+       u32 required_features;
 };
 
 /*
@@ -209,7 +212,7 @@ struct ceph_connection {
 };
 
 
-extern const char *pr_addr(const struct sockaddr_storage *ss);
+extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
 extern int ceph_parse_ips(const char *c, const char *end,
                          struct ceph_entity_addr *addr,
                          int max_count, int *count);
@@ -220,7 +223,8 @@ extern void ceph_msgr_exit(void);
 extern void ceph_msgr_flush(void);
 
 extern struct ceph_messenger *ceph_messenger_create(
-       struct ceph_entity_addr *myaddr);
+       struct ceph_entity_addr *myaddr,
+       u32 features, u32 required);
 extern void ceph_messenger_destroy(struct ceph_messenger *);
 
 extern void ceph_con_init(struct ceph_messenger *msgr,
similarity index 99%
rename from fs/ceph/mon_client.h
rename to include/linux/ceph/mon_client.h
index 8e396f2..545f859 100644 (file)
@@ -79,6 +79,7 @@ struct ceph_mon_client {
        u64 last_tid;
 
        /* mds/osd map */
+       int want_mdsmap;
        int want_next_osdmap; /* 1 = want, 2 = want+asked */
        u32 have_osdmap, have_mdsmap;
 
similarity index 100%
rename from fs/ceph/msgr.h
rename to include/linux/ceph/msgr.h
similarity index 99%
rename from fs/ceph/osd_client.h
rename to include/linux/ceph/osd_client.h
index 65c9c56..6c91fb0 100644 (file)
@@ -69,6 +69,7 @@ struct ceph_osd_request {
        struct list_head  r_unsafe_item;
 
        struct inode *r_inode;                /* for use by callbacks */
+       void *r_priv;                         /* ditto */
 
        char              r_oid[40];          /* object name */
        int               r_oid_len;
similarity index 99%
rename from fs/ceph/osdmap.h
rename to include/linux/ceph/osdmap.h
index a592b21..ba4c205 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/rbtree.h>
 #include "types.h"
 #include "ceph_fs.h"
-#include "crush/crush.h"
+#include <linux/crush/crush.h>
 
 /*
  * The osd map describes the current membership of the osd cluster and
similarity index 100%
rename from fs/ceph/rados.h
rename to include/linux/ceph/rados.h
similarity index 100%
rename from fs/ceph/types.h
rename to include/linux/ceph/types.h
index e926884..55fd82e 100644 (file)
@@ -293,6 +293,7 @@ source "net/wimax/Kconfig"
 source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
 source "net/caif/Kconfig"
+source "net/ceph/Kconfig"
 
 
 endif   # if NET
index ea60fbc..6b7bfd7 100644 (file)
@@ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL)          += sysctl_net.o
 endif
 obj-$(CONFIG_WIMAX)            += wimax/
 obj-$(CONFIG_DNS_RESOLVER)     += dns_resolver/
+obj-$(CONFIG_CEPH_LIB)         += ceph/
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
new file mode 100644 (file)
index 0000000..ad42404
--- /dev/null
@@ -0,0 +1,28 @@
+config CEPH_LIB
+        tristate "Ceph core library (EXPERIMENTAL)"
+       depends on INET && EXPERIMENTAL
+       select LIBCRC32C
+       select CRYPTO_AES
+       select CRYPTO
+       default n
+       help
+         Choose Y or M here to include cephlib, which provides the
+         common functionality to both the Ceph filesystem and
+         to the rados block device (rbd).
+
+         More information at http://ceph.newdream.net/.
+
+         If unsure, say N.
+
+config CEPH_LIB_PRETTYDEBUG
+       bool "Include file:line in ceph debug output"
+       depends on CEPH_LIB
+       default n
+       help
+         If you say Y here, debug output will include a filename and
+         line to aid debugging.  This increases kernel size and slows
+         execution slightly when debug call sites are enabled (e.g.,
+         via CONFIG_DYNAMIC_DEBUG).
+
+         If unsure, say N.
+
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
new file mode 100644 (file)
index 0000000..aab1cab
--- /dev/null
@@ -0,0 +1,37 @@
+#
+# Makefile for CEPH filesystem.
+#
+
+ifneq ($(KERNELRELEASE),)
+
+obj-$(CONFIG_CEPH_LIB) += libceph.o
+
+libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
+       mon_client.o \
+       osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+       debugfs.o \
+       auth.o auth_none.o \
+       crypto.o armor.o \
+       auth_x.o \
+       ceph_fs.o ceph_strings.o ceph_hash.o \
+       pagevec.o
+
+else
+#Otherwise we were called directly from the command
+# line; invoke the kernel build system.
+
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
+PWD := $(shell pwd)
+
+default: all
+
+all:
+       $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
+
+modules_install:
+       $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
+
+clean:
+       $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
+
+endif
similarity index 100%
rename from fs/ceph/armor.c
rename to net/ceph/armor.c
similarity index 97%
rename from fs/ceph/auth.c
rename to net/ceph/auth.c
index 6d2e306..549c1f4 100644 (file)
@@ -1,16 +1,16 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 
-#include "types.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/messenger.h>
 #include "auth_none.h"
 #include "auth_x.h"
-#include "decode.h"
-#include "super.h"
 
-#include "messenger.h"
 
 /*
  * get protocol handler
similarity index 96%
rename from fs/ceph/auth_none.c
rename to net/ceph/auth_none.c
index ad1dc21..214c2bb 100644 (file)
@@ -1,14 +1,15 @@
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
 #include "auth_none.h"
-#include "auth.h"
-#include "decode.h"
 
 static void reset(struct ceph_auth_client *ac)
 {
similarity index 94%
rename from fs/ceph/auth_none.h
rename to net/ceph/auth_none.h
index 8164df1..ed7d088 100644 (file)
@@ -2,8 +2,7 @@
 #define _FS_CEPH_AUTH_NONE_H
 
 #include <linux/slab.h>
-
-#include "auth.h"
+#include <linux/ceph/auth.h>
 
 /*
  * null security mode.
similarity index 99%
rename from fs/ceph/auth_x.c
rename to net/ceph/auth_x.c
index a2d002c..7fd5dfc 100644 (file)
@@ -1,16 +1,17 @@
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
+#include "crypto.h"
 #include "auth_x.h"
 #include "auth_x_protocol.h"
-#include "crypto.h"
-#include "auth.h"
-#include "decode.h"
 
 #define TEMP_TICKET_BUF_LEN    256
 
similarity index 96%
rename from fs/ceph/auth_x.h
rename to net/ceph/auth_x.h
index ff6f818..e02da7a 100644 (file)
@@ -3,8 +3,9 @@
 
 #include <linux/rbtree.h>
 
+#include <linux/ceph/auth.h>
+
 #include "crypto.h"
-#include "auth.h"
 #include "auth_x_protocol.h"
 
 /*
similarity index 86%
rename from fs/ceph/buffer.c
rename to net/ceph/buffer.c
index cd39f17..53d8abf 100644 (file)
@@ -1,10 +1,11 @@
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
+#include <linux/module.h>
 #include <linux/slab.h>
 
-#include "buffer.h"
-#include "decode.h"
+#include <linux/ceph/buffer.h>
+#include <linux/ceph/decode.h>
 
 struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
 {
@@ -32,6 +33,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
        dout("buffer_new %p\n", b);
        return b;
 }
+EXPORT_SYMBOL(ceph_buffer_new);
 
 void ceph_buffer_release(struct kref *kref)
 {
@@ -46,6 +48,7 @@ void ceph_buffer_release(struct kref *kref)
        }
        kfree(b);
 }
+EXPORT_SYMBOL(ceph_buffer_release);
 
 int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
 {
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
new file mode 100644 (file)
index 0000000..f6f2eeb
--- /dev/null
@@ -0,0 +1,529 @@
+
+#include <linux/ceph/ceph_debug.h>
+#include <linux/backing-dev.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/inet.h>
+#include <linux/in6.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/statfs.h>
+#include <linux/string.h>
+
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/debugfs.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+
+
+
+/*
+ * find filename portion of a path (/foo/bar/baz -> baz)
+ */
+const char *ceph_file_part(const char *s, int len)
+{
+       const char *e = s + len;
+
+       while (e != s && *(e-1) != '/')
+               e--;
+       return e;
+}
+EXPORT_SYMBOL(ceph_file_part);
+
+const char *ceph_msg_type_name(int type)
+{
+       switch (type) {
+       case CEPH_MSG_SHUTDOWN: return "shutdown";
+       case CEPH_MSG_PING: return "ping";
+       case CEPH_MSG_AUTH: return "auth";
+       case CEPH_MSG_AUTH_REPLY: return "auth_reply";
+       case CEPH_MSG_MON_MAP: return "mon_map";
+       case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
+       case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
+       case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
+       case CEPH_MSG_STATFS: return "statfs";
+       case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+       case CEPH_MSG_MDS_MAP: return "mds_map";
+       case CEPH_MSG_CLIENT_SESSION: return "client_session";
+       case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
+       case CEPH_MSG_CLIENT_REQUEST: return "client_request";
+       case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
+       case CEPH_MSG_CLIENT_REPLY: return "client_reply";
+       case CEPH_MSG_CLIENT_CAPS: return "client_caps";
+       case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+       case CEPH_MSG_CLIENT_SNAP: return "client_snap";
+       case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+       case CEPH_MSG_OSD_MAP: return "osd_map";
+       case CEPH_MSG_OSD_OP: return "osd_op";
+       case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
+       default: return "unknown";
+       }
+}
+EXPORT_SYMBOL(ceph_msg_type_name);
+
+/*
+ * Initially learn our fsid, or verify an fsid matches.
+ */
+int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
+{
+       if (client->have_fsid) {
+               if (ceph_fsid_compare(&client->fsid, fsid)) {
+                       pr_err("bad fsid, had %pU got %pU",
+                              &client->fsid, fsid);
+                       return -1;
+               }
+       } else {
+               pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
+               memcpy(&client->fsid, fsid, sizeof(*fsid));
+               ceph_debugfs_client_init(client);
+               client->have_fsid = true;
+       }
+       return 0;
+}
+EXPORT_SYMBOL(ceph_check_fsid);
+
+static int strcmp_null(const char *s1, const char *s2)
+{
+       if (!s1 && !s2)
+               return 0;
+       if (s1 && !s2)
+               return -1;
+       if (!s1 && s2)
+               return 1;
+       return strcmp(s1, s2);
+}
+
+int ceph_compare_options(struct ceph_options *new_opt,
+                        struct ceph_client *client)
+{
+       struct ceph_options *opt1 = new_opt;
+       struct ceph_options *opt2 = client->options;
+       int ofs = offsetof(struct ceph_options, mon_addr);
+       int i;
+       int ret;
+
+       ret = memcmp(opt1, opt2, ofs);
+       if (ret)
+               return ret;
+
+       ret = strcmp_null(opt1->name, opt2->name);
+       if (ret)
+               return ret;
+
+       ret = strcmp_null(opt1->secret, opt2->secret);
+       if (ret)
+               return ret;
+
+       /* any matching mon ip implies a match */
+       for (i = 0; i < opt1->num_mon; i++) {
+               if (ceph_monmap_contains(client->monc.monmap,
+                                &opt1->mon_addr[i]))
+                       return 0;
+       }
+       return -1;
+}
+EXPORT_SYMBOL(ceph_compare_options);
+
+
+static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+{
+       int i = 0;
+       char tmp[3];
+       int err = -EINVAL;
+       int d;
+
+       dout("parse_fsid '%s'\n", str);
+       tmp[2] = 0;
+       while (*str && i < 16) {
+               if (ispunct(*str)) {
+                       str++;
+                       continue;
+               }
+               if (!isxdigit(str[0]) || !isxdigit(str[1]))
+                       break;
+               tmp[0] = str[0];
+               tmp[1] = str[1];
+               if (sscanf(tmp, "%x", &d) < 1)
+                       break;
+               fsid->fsid[i] = d & 0xff;
+               i++;
+               str += 2;
+       }
+
+       if (i == 16)
+               err = 0;
+       dout("parse_fsid ret %d got fsid %pU", err, fsid);
+       return err;
+}
+
+/*
+ * ceph options
+ */
+enum {
+       Opt_osdtimeout,
+       Opt_osdkeepalivetimeout,
+       Opt_mount_timeout,
+       Opt_osd_idle_ttl,
+       Opt_last_int,
+       /* int args above */
+       Opt_fsid,
+       Opt_name,
+       Opt_secret,
+       Opt_ip,
+       Opt_last_string,
+       /* string args above */
+       Opt_noshare,
+       Opt_nocrc,
+};
+
+static match_table_t opt_tokens = {
+       {Opt_osdtimeout, "osdtimeout=%d"},
+       {Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
+       {Opt_mount_timeout, "mount_timeout=%d"},
+       {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+       /* int args above */
+       {Opt_fsid, "fsid=%s"},
+       {Opt_name, "name=%s"},
+       {Opt_secret, "secret=%s"},
+       {Opt_ip, "ip=%s"},
+       /* string args above */
+       {Opt_noshare, "noshare"},
+       {Opt_nocrc, "nocrc"},
+       {-1, NULL}
+};
+
+void ceph_destroy_options(struct ceph_options *opt)
+{
+       dout("destroy_options %p\n", opt);
+       kfree(opt->name);
+       kfree(opt->secret);
+       kfree(opt);
+}
+EXPORT_SYMBOL(ceph_destroy_options);
+
+int ceph_parse_options(struct ceph_options **popt, char *options,
+                      const char *dev_name, const char *dev_name_end,
+                      int (*parse_extra_token)(char *c, void *private),
+                      void *private)
+{
+       struct ceph_options *opt;
+       const char *c;
+       int err = -ENOMEM;
+       substring_t argstr[MAX_OPT_ARGS];
+
+       opt = kzalloc(sizeof(*opt), GFP_KERNEL);
+       if (!opt)
+               return err;
+       opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
+                               GFP_KERNEL);
+       if (!opt->mon_addr)
+               goto out;
+
+       dout("parse_options %p options '%s' dev_name '%s'\n", opt, options,
+            dev_name);
+
+       /* start with defaults */
+       opt->flags = CEPH_OPT_DEFAULT;
+       opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
+       opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
+       opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
+       opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
+
+       /* get mon ip(s) */
+       /* ip1[:port1][,ip2[:port2]...] */
+       err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr,
+                            CEPH_MAX_MON, &opt->num_mon);
+       if (err < 0)
+               goto out;
+
+       /* parse mount options */
+       while ((c = strsep(&options, ",")) != NULL) {
+               int token, intval, ret;
+               if (!*c)
+                       continue;
+               err = -EINVAL;
+               token = match_token((char *)c, opt_tokens, argstr);
+               if (token < 0) {
+                       /* extra? */
+                       err = parse_extra_token((char *)c, private);
+                       if (err < 0) {
+                               pr_err("bad option at '%s'\n", c);
+                               goto out;
+                       }
+                       continue;
+               }
+               if (token < Opt_last_int) {
+                       ret = match_int(&argstr[0], &intval);
+                       if (ret < 0) {
+                               pr_err("bad mount option arg (not int) "
+                                      "at '%s'\n", c);
+                               continue;
+                       }
+                       dout("got int token %d val %d\n", token, intval);
+               } else if (token > Opt_last_int && token < Opt_last_string) {
+                       dout("got string token %d val %s\n", token,
+                            argstr[0].from);
+               } else {
+                       dout("got token %d\n", token);
+               }
+               switch (token) {
+               case Opt_ip:
+                       err = ceph_parse_ips(argstr[0].from,
+                                            argstr[0].to,
+                                            &opt->my_addr,
+                                            1, NULL);
+                       if (err < 0)
+                               goto out;
+                       opt->flags |= CEPH_OPT_MYIP;
+                       break;
+
+               case Opt_fsid:
+                       err = parse_fsid(argstr[0].from, &opt->fsid);
+                       if (err == 0)
+                               opt->flags |= CEPH_OPT_FSID;
+                       break;
+               case Opt_name:
+                       opt->name = kstrndup(argstr[0].from,
+                                             argstr[0].to-argstr[0].from,
+                                             GFP_KERNEL);
+                       break;
+               case Opt_secret:
+                       opt->secret = kstrndup(argstr[0].from,
+                                               argstr[0].to-argstr[0].from,
+                                               GFP_KERNEL);
+                       break;
+
+                       /* misc */
+               case Opt_osdtimeout:
+                       opt->osd_timeout = intval;
+                       break;
+               case Opt_osdkeepalivetimeout:
+                       opt->osd_keepalive_timeout = intval;
+                       break;
+               case Opt_osd_idle_ttl:
+                       opt->osd_idle_ttl = intval;
+                       break;
+               case Opt_mount_timeout:
+                       opt->mount_timeout = intval;
+                       break;
+
+               case Opt_noshare:
+                       opt->flags |= CEPH_OPT_NOSHARE;
+                       break;
+
+               case Opt_nocrc:
+                       opt->flags |= CEPH_OPT_NOCRC;
+                       break;
+
+               default:
+                       BUG_ON(token);
+               }
+       }
+
+       /* success */
+       *popt = opt;
+       return 0;
+
+out:
+       ceph_destroy_options(opt);
+       return err;
+}
+EXPORT_SYMBOL(ceph_parse_options);
+
+u64 ceph_client_id(struct ceph_client *client)
+{
+       return client->monc.auth->global_id;
+}
+EXPORT_SYMBOL(ceph_client_id);
+
+/*
+ * create a fresh client instance
+ */
+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
+{
+       struct ceph_client *client;
+       int err = -ENOMEM;
+
+       client = kzalloc(sizeof(*client), GFP_KERNEL);
+       if (client == NULL)
+               return ERR_PTR(-ENOMEM);
+
+       client->private = private;
+       client->options = opt;
+
+       mutex_init(&client->mount_mutex);
+       init_waitqueue_head(&client->auth_wq);
+       client->auth_err = 0;
+
+       client->extra_mon_dispatch = NULL;
+       client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT;
+       client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT;
+
+       client->msgr = NULL;
+
+       /* subsystems */
+       err = ceph_monc_init(&client->monc, client);
+       if (err < 0)
+               goto fail;
+       err = ceph_osdc_init(&client->osdc, client);
+       if (err < 0)
+               goto fail_monc;
+
+       return client;
+
+fail_monc:
+       ceph_monc_stop(&client->monc);
+fail:
+       kfree(client);
+       return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ceph_create_client);
+
+void ceph_destroy_client(struct ceph_client *client)
+{
+       dout("destroy_client %p\n", client);
+
+       /* unmount */
+       ceph_osdc_stop(&client->osdc);
+
+       /*
+        * make sure mds and osd connections close out before destroying
+        * the auth module, which is needed to free those connections'
+        * ceph_authorizers.
+        */
+       ceph_msgr_flush();
+
+       ceph_monc_stop(&client->monc);
+
+       ceph_debugfs_client_cleanup(client);
+
+       if (client->msgr)
+               ceph_messenger_destroy(client->msgr);
+
+       ceph_destroy_options(client->options);
+
+       kfree(client);
+       dout("destroy_client %p done\n", client);
+}
+EXPORT_SYMBOL(ceph_destroy_client);
+
+/*
+ * true if we have the mon map (and have thus joined the cluster)
+ */
+static int have_mon_and_osd_map(struct ceph_client *client)
+{
+       return client->monc.monmap && client->monc.monmap->epoch &&
+              client->osdc.osdmap && client->osdc.osdmap->epoch;
+}
+
+/*
+ * mount: join the ceph cluster, and open root directory.
+ */
+int __ceph_open_session(struct ceph_client *client, unsigned long started)
+{
+       struct ceph_entity_addr *myaddr = NULL;
+       int err;
+       unsigned long timeout = client->options->mount_timeout * HZ;
+
+       /* initialize the messenger */
+       if (client->msgr == NULL) {
+               if (ceph_test_opt(client, MYIP))
+                       myaddr = &client->options->my_addr;
+               client->msgr = ceph_messenger_create(myaddr,
+                                       client->supported_features,
+                                       client->required_features);
+               if (IS_ERR(client->msgr)) {
+                       client->msgr = NULL;
+                       return PTR_ERR(client->msgr);
+               }
+               client->msgr->nocrc = ceph_test_opt(client, NOCRC);
+       }
+
+       /* open session, and wait for mon and osd maps */
+       err = ceph_monc_open_session(&client->monc);
+       if (err < 0)
+               return err;
+
+       while (!have_mon_and_osd_map(client)) {
+               err = -EIO;
+               if (timeout && time_after_eq(jiffies, started + timeout))
+                       return err;
+
+               /* wait */
+               dout("mount waiting for mon_map\n");
+               err = wait_event_interruptible_timeout(client->auth_wq,
+                       have_mon_and_osd_map(client) || (client->auth_err < 0),
+                       timeout);
+               if (err == -EINTR || err == -ERESTARTSYS)
+                       return err;
+               if (client->auth_err < 0)
+                       return client->auth_err;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(__ceph_open_session);
+
+
+int ceph_open_session(struct ceph_client *client)
+{
+       int ret;
+       unsigned long started = jiffies;  /* note the start time */
+
+       dout("open_session start\n");
+       mutex_lock(&client->mount_mutex);
+
+       ret = __ceph_open_session(client, started);
+
+       mutex_unlock(&client->mount_mutex);
+       return ret;
+}
+EXPORT_SYMBOL(ceph_open_session);
+
+
+static int __init init_ceph_lib(void)
+{
+       int ret = 0;
+
+       ret = ceph_debugfs_init();
+       if (ret < 0)
+               goto out;
+
+       ret = ceph_msgr_init();
+       if (ret < 0)
+               goto out_debugfs;
+
+       pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
+               CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
+               CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
+               CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
+
+       return 0;
+
+out_debugfs:
+       ceph_debugfs_cleanup();
+out:
+       return ret;
+}
+
+static void __exit exit_ceph_lib(void)
+{
+       dout("exit_ceph_lib\n");
+       ceph_msgr_exit();
+       ceph_debugfs_cleanup();
+}
+
+module_init(init_ceph_lib);
+module_exit(exit_ceph_lib);
+
+MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
+MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
+MODULE_DESCRIPTION("Ceph filesystem for Linux");
+MODULE_LICENSE("GPL");
similarity index 92%
rename from fs/ceph/ceph_fs.c
rename to net/ceph/ceph_fs.c
index 3ac6cc7..a3a3a31 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Some non-inline ceph helpers
  */
-#include "types.h"
+#include <linux/module.h>
+#include <linux/ceph/types.h>
 
 /*
  * return true if @layout appears to be valid
@@ -52,6 +53,7 @@ int ceph_flags_to_mode(int flags)
 
        return mode;
 }
+EXPORT_SYMBOL(ceph_flags_to_mode);
 
 int ceph_caps_for_mode(int mode)
 {
@@ -70,3 +72,4 @@ int ceph_caps_for_mode(int mode)
 
        return caps;
 }
+EXPORT_SYMBOL(ceph_caps_for_mode);
similarity index 98%
rename from fs/ceph/ceph_hash.c
rename to net/ceph/ceph_hash.c
index bd57001..815ef88 100644 (file)
@@ -1,5 +1,5 @@
 
-#include "types.h"
+#include <linux/ceph/types.h>
 
 /*
  * Robert Jenkin's hash function.
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
new file mode 100644 (file)
index 0000000..3fbda04
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Ceph string constants
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+const char *ceph_entity_type_name(int type)
+{
+       switch (type) {
+       case CEPH_ENTITY_TYPE_MDS: return "mds";
+       case CEPH_ENTITY_TYPE_OSD: return "osd";
+       case CEPH_ENTITY_TYPE_MON: return "mon";
+       case CEPH_ENTITY_TYPE_CLIENT: return "client";
+       case CEPH_ENTITY_TYPE_AUTH: return "auth";
+       default: return "unknown";
+       }
+}
+
+const char *ceph_osd_op_name(int op)
+{
+       switch (op) {
+       case CEPH_OSD_OP_READ: return "read";
+       case CEPH_OSD_OP_STAT: return "stat";
+
+       case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
+
+       case CEPH_OSD_OP_WRITE: return "write";
+       case CEPH_OSD_OP_DELETE: return "delete";
+       case CEPH_OSD_OP_TRUNCATE: return "truncate";
+       case CEPH_OSD_OP_ZERO: return "zero";
+       case CEPH_OSD_OP_WRITEFULL: return "writefull";
+       case CEPH_OSD_OP_ROLLBACK: return "rollback";
+
+       case CEPH_OSD_OP_APPEND: return "append";
+       case CEPH_OSD_OP_STARTSYNC: return "startsync";
+       case CEPH_OSD_OP_SETTRUNC: return "settrunc";
+       case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
+
+       case CEPH_OSD_OP_TMAPUP: return "tmapup";
+       case CEPH_OSD_OP_TMAPGET: return "tmapget";
+       case CEPH_OSD_OP_TMAPPUT: return "tmapput";
+
+       case CEPH_OSD_OP_GETXATTR: return "getxattr";
+       case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
+       case CEPH_OSD_OP_SETXATTR: return "setxattr";
+       case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
+       case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
+       case CEPH_OSD_OP_RMXATTR: return "rmxattr";
+       case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
+
+       case CEPH_OSD_OP_PULL: return "pull";
+       case CEPH_OSD_OP_PUSH: return "push";
+       case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
+       case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
+       case CEPH_OSD_OP_SCRUB: return "scrub";
+
+       case CEPH_OSD_OP_WRLOCK: return "wrlock";
+       case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
+       case CEPH_OSD_OP_RDLOCK: return "rdlock";
+       case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
+       case CEPH_OSD_OP_UPLOCK: return "uplock";
+       case CEPH_OSD_OP_DNLOCK: return "dnlock";
+
+       case CEPH_OSD_OP_CALL: return "call";
+
+       case CEPH_OSD_OP_PGLS: return "pgls";
+       }
+       return "???";
+}
+
+
+const char *ceph_pool_op_name(int op)
+{
+       switch (op) {
+       case POOL_OP_CREATE: return "create";
+       case POOL_OP_DELETE: return "delete";
+       case POOL_OP_AUID_CHANGE: return "auid change";
+       case POOL_OP_CREATE_SNAP: return "create snap";
+       case POOL_OP_DELETE_SNAP: return "delete snap";
+       case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
+       case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
+       }
+       return "???";
+}
similarity index 99%
rename from fs/ceph/crush/crush.c
rename to net/ceph/crush/crush.c
index fabd302..d6ebb13 100644 (file)
@@ -8,7 +8,7 @@
 # define BUG_ON(x) assert(!(x))
 #endif
 
-#include "crush.h"
+#include <linux/crush/crush.h>
 
 const char *crush_bucket_alg_name(int alg)
 {
similarity index 99%
rename from fs/ceph/crush/hash.c
rename to net/ceph/crush/hash.c
index 5873aed..5bb63e3 100644 (file)
@@ -1,6 +1,6 @@
 
 #include <linux/types.h>
-#include "hash.h"
+#include <linux/crush/hash.h>
 
 /*
  * Robert Jenkins' function for mixing 32-bit values
similarity index 99%
rename from fs/ceph/crush/mapper.c
rename to net/ceph/crush/mapper.c
index a4eec13..42599e3 100644 (file)
@@ -18,8 +18,8 @@
 # define kfree(x) free(x)
 #endif
 
-#include "crush.h"
-#include "hash.h"
+#include <linux/crush/crush.h>
+#include <linux/crush/hash.h>
 
 /*
  * Implement the core CRUSH mapping algorithm.
similarity index 99%
rename from fs/ceph/crypto.c
rename to net/ceph/crypto.c
index a3e627f..7b505b0 100644 (file)
@@ -1,13 +1,13 @@
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/err.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <crypto/hash.h>
 
+#include <linux/ceph/decode.h>
 #include "crypto.h"
-#include "decode.h"
 
 int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
 {
similarity index 95%
rename from fs/ceph/crypto.h
rename to net/ceph/crypto.h
index bdf3860..f9eccac 100644 (file)
@@ -1,8 +1,8 @@
 #ifndef _FS_CEPH_CRYPTO_H
 #define _FS_CEPH_CRYPTO_H
 
-#include "types.h"
-#include "buffer.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
 
 /*
  * cryptographic secret
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
new file mode 100644 (file)
index 0000000..33d0499
--- /dev/null
@@ -0,0 +1,268 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/*
+ * Implement /sys/kernel/debug/ceph fun
+ *
+ * /sys/kernel/debug/ceph/client*  - an instance of the ceph client
+ *      .../osdmap      - current osdmap
+ *      .../monmap      - current monmap
+ *      .../osdc        - active osd requests
+ *      .../monc        - mon client state
+ *      .../dentry_lru  - dump contents of dentry lru
+ *      .../caps        - expose cap (reservation) stats
+ *      .../bdi         - symlink to ../../bdi/something
+ */
+
+static struct dentry *ceph_debugfs_dir;
+
+static int monmap_show(struct seq_file *s, void *p)
+{
+       int i;
+       struct ceph_client *client = s->private;
+
+       if (client->monc.monmap == NULL)
+               return 0;
+
+       seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
+       for (i = 0; i < client->monc.monmap->num_mon; i++) {
+               struct ceph_entity_inst *inst =
+                       &client->monc.monmap->mon_inst[i];
+
+               seq_printf(s, "\t%s%lld\t%s\n",
+                          ENTITY_NAME(inst->name),
+                          ceph_pr_addr(&inst->addr.in_addr));
+       }
+       return 0;
+}
+
+static int osdmap_show(struct seq_file *s, void *p)
+{
+       int i;
+       struct ceph_client *client = s->private;
+       struct rb_node *n;
+
+       if (client->osdc.osdmap == NULL)
+               return 0;
+       seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
+       seq_printf(s, "flags%s%s\n",
+                  (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
+                  " NEARFULL" : "",
+                  (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
+                  " FULL" : "");
+       for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
+               struct ceph_pg_pool_info *pool =
+                       rb_entry(n, struct ceph_pg_pool_info, node);
+               seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
+                          pool->id, pool->v.pg_num, pool->pg_num_mask,
+                          pool->v.lpg_num, pool->lpg_num_mask);
+       }
+       for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
+               struct ceph_entity_addr *addr =
+                       &client->osdc.osdmap->osd_addr[i];
+               int state = client->osdc.osdmap->osd_state[i];
+               char sb[64];
+
+               seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
+                          i, ceph_pr_addr(&addr->in_addr),
+                          ((client->osdc.osdmap->osd_weight[i]*100) >> 16),
+                          ceph_osdmap_state_str(sb, sizeof(sb), state));
+       }
+       return 0;
+}
+
+static int monc_show(struct seq_file *s, void *p)
+{
+       struct ceph_client *client = s->private;
+       struct ceph_mon_generic_request *req;
+       struct ceph_mon_client *monc = &client->monc;
+       struct rb_node *rp;
+
+       mutex_lock(&monc->mutex);
+
+       if (monc->have_mdsmap)
+               seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
+       if (monc->have_osdmap)
+               seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
+       if (monc->want_next_osdmap)
+               seq_printf(s, "want next osdmap\n");
+
+       for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
+               __u16 op;
+               req = rb_entry(rp, struct ceph_mon_generic_request, node);
+               op = le16_to_cpu(req->request->hdr.type);
+               if (op == CEPH_MSG_STATFS)
+                       seq_printf(s, "%lld statfs\n", req->tid);
+               else
+                       seq_printf(s, "%lld unknown\n", req->tid);
+       }
+
+       mutex_unlock(&monc->mutex);
+       return 0;
+}
+
+static int osdc_show(struct seq_file *s, void *pp)
+{
+       struct ceph_client *client = s->private;
+       struct ceph_osd_client *osdc = &client->osdc;
+       struct rb_node *p;
+
+       mutex_lock(&osdc->request_mutex);
+       for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+               struct ceph_osd_request *req;
+               struct ceph_osd_request_head *head;
+               struct ceph_osd_op *op;
+               int num_ops;
+               int opcode, olen;
+               int i;
+
+               req = rb_entry(p, struct ceph_osd_request, r_node);
+
+               seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
+                          req->r_osd ? req->r_osd->o_osd : -1,
+                          le32_to_cpu(req->r_pgid.pool),
+                          le16_to_cpu(req->r_pgid.ps));
+
+               head = req->r_request->front.iov_base;
+               op = (void *)(head + 1);
+
+               num_ops = le16_to_cpu(head->num_ops);
+               olen = le32_to_cpu(head->object_len);
+               seq_printf(s, "%.*s", olen,
+                          (const char *)(head->ops + num_ops));
+
+               if (req->r_reassert_version.epoch)
+                       seq_printf(s, "\t%u'%llu",
+                          (unsigned)le32_to_cpu(req->r_reassert_version.epoch),
+                          le64_to_cpu(req->r_reassert_version.version));
+               else
+                       seq_printf(s, "\t");
+
+               for (i = 0; i < num_ops; i++) {
+                       opcode = le16_to_cpu(op->op);
+                       seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
+                       op++;
+               }
+
+               seq_printf(s, "\n");
+       }
+       mutex_unlock(&osdc->request_mutex);
+       return 0;
+}
+
+CEPH_DEFINE_SHOW_FUNC(monmap_show)
+CEPH_DEFINE_SHOW_FUNC(osdmap_show)
+CEPH_DEFINE_SHOW_FUNC(monc_show)
+CEPH_DEFINE_SHOW_FUNC(osdc_show)
+
+int ceph_debugfs_init(void)
+{
+       ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
+       if (!ceph_debugfs_dir)
+               return -ENOMEM;
+       return 0;
+}
+
+void ceph_debugfs_cleanup(void)
+{
+       debugfs_remove(ceph_debugfs_dir);
+}
+
+int ceph_debugfs_client_init(struct ceph_client *client)
+{
+       int ret = -ENOMEM;
+       char name[80];
+
+       snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
+                client->monc.auth->global_id);
+
+       client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
+       if (!client->debugfs_dir)
+               goto out;
+
+       client->monc.debugfs_file = debugfs_create_file("monc",
+                                                     0600,
+                                                     client->debugfs_dir,
+                                                     client,
+                                                     &monc_show_fops);
+       if (!client->monc.debugfs_file)
+               goto out;
+
+       client->osdc.debugfs_file = debugfs_create_file("osdc",
+                                                     0600,
+                                                     client->debugfs_dir,
+                                                     client,
+                                                     &osdc_show_fops);
+       if (!client->osdc.debugfs_file)
+               goto out;
+
+       client->debugfs_monmap = debugfs_create_file("monmap",
+                                       0600,
+                                       client->debugfs_dir,
+                                       client,
+                                       &monmap_show_fops);
+       if (!client->debugfs_monmap)
+               goto out;
+
+       client->debugfs_osdmap = debugfs_create_file("osdmap",
+                                       0600,
+                                       client->debugfs_dir,
+                                       client,
+                                       &osdmap_show_fops);
+       if (!client->debugfs_osdmap)
+               goto out;
+
+       return 0;
+
+out:
+       ceph_debugfs_client_cleanup(client);
+       return ret;
+}
+
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
+{
+       debugfs_remove(client->debugfs_osdmap);
+       debugfs_remove(client->debugfs_monmap);
+       debugfs_remove(client->osdc.debugfs_file);
+       debugfs_remove(client->monc.debugfs_file);
+       debugfs_remove(client->debugfs_dir);
+}
+
+#else  /* CONFIG_DEBUG_FS */
+
+int ceph_debugfs_init(void)
+{
+       return 0;
+}
+
+void ceph_debugfs_cleanup(void)
+{
+}
+
+int ceph_debugfs_client_init(struct ceph_client *client,
+                            int (*module_debugfs_init)(struct ceph_client *))
+{
+       return 0;
+}
+
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
+{
+}
+
+#endif  /* CONFIG_DEBUG_FS */
+
+EXPORT_SYMBOL(ceph_debugfs_init);
+EXPORT_SYMBOL(ceph_debugfs_cleanup);
similarity index 96%
rename from fs/ceph/messenger.c
rename to net/ceph/messenger.c
index 17a09b3..0e8157e 100644 (file)
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/crc32c.h>
 #include <linux/ctype.h>
 #include <linux/blkdev.h>
 #include <net/tcp.h>
 
-#include "super.h"
-#include "messenger.h"
-#include "decode.h"
-#include "pagelist.h"
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/pagelist.h>
 
 /*
  * Ceph uses the messenger to exchange ceph_msg messages with other
@@ -50,7 +50,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
 static DEFINE_SPINLOCK(addr_str_lock);
 static int last_addr_str;
 
-const char *pr_addr(const struct sockaddr_storage *ss)
+const char *ceph_pr_addr(const struct sockaddr_storage *ss)
 {
        int i;
        char *s;
@@ -81,6 +81,7 @@ const char *pr_addr(const struct sockaddr_storage *ss)
 
        return s;
 }
+EXPORT_SYMBOL(ceph_pr_addr);
 
 static void encode_my_addr(struct ceph_messenger *msgr)
 {
@@ -93,7 +94,7 @@ static void encode_my_addr(struct ceph_messenger *msgr)
  */
 struct workqueue_struct *ceph_msgr_wq;
 
-int __init ceph_msgr_init(void)
+int ceph_msgr_init(void)
 {
        ceph_msgr_wq = create_workqueue("ceph-msgr");
        if (IS_ERR(ceph_msgr_wq)) {
@@ -104,16 +105,19 @@ int __init ceph_msgr_init(void)
        }
        return 0;
 }
+EXPORT_SYMBOL(ceph_msgr_init);
 
 void ceph_msgr_exit(void)
 {
        destroy_workqueue(ceph_msgr_wq);
 }
+EXPORT_SYMBOL(ceph_msgr_exit);
 
 void ceph_msgr_flush(void)
 {
        flush_workqueue(ceph_msgr_wq);
 }
+EXPORT_SYMBOL(ceph_msgr_flush);
 
 
 /*
@@ -223,19 +227,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
 
        set_sock_callbacks(sock, con);
 
-       dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
+       dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
 
        ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
                                 O_NONBLOCK);
        if (ret == -EINPROGRESS) {
                dout("connect %s EINPROGRESS sk_state = %u\n",
-                    pr_addr(&con->peer_addr.in_addr),
+                    ceph_pr_addr(&con->peer_addr.in_addr),
                     sock->sk->sk_state);
                ret = 0;
        }
        if (ret < 0) {
                pr_err("connect %s error %d\n",
-                      pr_addr(&con->peer_addr.in_addr), ret);
+                      ceph_pr_addr(&con->peer_addr.in_addr), ret);
                sock_release(sock);
                con->sock = NULL;
                con->error_msg = "connect error";
@@ -336,7 +340,8 @@ static void reset_connection(struct ceph_connection *con)
  */
 void ceph_con_close(struct ceph_connection *con)
 {
-       dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr));
+       dout("con_close %p peer %s\n", con,
+            ceph_pr_addr(&con->peer_addr.in_addr));
        set_bit(CLOSED, &con->state);  /* in case there's queued work */
        clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */
        clear_bit(LOSSYTX, &con->state);  /* so we retry next connect */
@@ -349,19 +354,21 @@ void ceph_con_close(struct ceph_connection *con)
        mutex_unlock(&con->mutex);
        queue_con(con);
 }
+EXPORT_SYMBOL(ceph_con_close);
 
 /*
  * Reopen a closed connection, with a new peer address.
  */
 void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
 {
-       dout("con_open %p %s\n", con, pr_addr(&addr->in_addr));
+       dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
        set_bit(OPENING, &con->state);
        clear_bit(CLOSED, &con->state);
        memcpy(&con->peer_addr, addr, sizeof(*addr));
        con->delay = 0;      /* reset backoff memory */
        queue_con(con);
 }
+EXPORT_SYMBOL(ceph_con_open);
 
 /*
  * return true if this connection ever successfully opened
@@ -408,6 +415,7 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
        INIT_LIST_HEAD(&con->out_sent);
        INIT_DELAYED_WORK(&con->work, con_work);
 }
+EXPORT_SYMBOL(ceph_con_init);
 
 
 /*
@@ -652,7 +660,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
        dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
             con->connect_seq, global_seq, proto);
 
-       con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED);
+       con->out_connect.features = cpu_to_le64(msgr->supported_features);
        con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
        con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
        con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1013,7 +1021,7 @@ static int verify_hello(struct ceph_connection *con)
 {
        if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
                pr_err("connect to %s got bad banner\n",
-                      pr_addr(&con->peer_addr.in_addr));
+                      ceph_pr_addr(&con->peer_addr.in_addr));
                con->error_msg = "protocol error, bad banner";
                return -1;
        }
@@ -1116,7 +1124,7 @@ int ceph_parse_ips(const char *c, const char *end,
 
                addr_set_port(ss, port);
 
-               dout("parse_ips got %s\n", pr_addr(ss));
+               dout("parse_ips got %s\n", ceph_pr_addr(ss));
 
                if (p == end)
                        break;
@@ -1136,6 +1144,7 @@ bad:
        pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
        return -EINVAL;
 }
+EXPORT_SYMBOL(ceph_parse_ips);
 
 static int process_banner(struct ceph_connection *con)
 {
@@ -1157,9 +1166,9 @@ static int process_banner(struct ceph_connection *con)
            !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
              con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
                pr_warning("wrong peer, want %s/%d, got %s/%d\n",
-                          pr_addr(&con->peer_addr.in_addr),
+                          ceph_pr_addr(&con->peer_addr.in_addr),
                           (int)le32_to_cpu(con->peer_addr.nonce),
-                          pr_addr(&con->actual_peer_addr.in_addr),
+                          ceph_pr_addr(&con->actual_peer_addr.in_addr),
                           (int)le32_to_cpu(con->actual_peer_addr.nonce));
                con->error_msg = "wrong peer at address";
                return -1;
@@ -1177,7 +1186,7 @@ static int process_banner(struct ceph_connection *con)
                addr_set_port(&con->msgr->inst.addr.in_addr, port);
                encode_my_addr(con->msgr);
                dout("process_banner learned my addr is %s\n",
-                    pr_addr(&con->msgr->inst.addr.in_addr));
+                    ceph_pr_addr(&con->msgr->inst.addr.in_addr));
        }
 
        set_bit(NEGOTIATING, &con->state);
@@ -1198,8 +1207,8 @@ static void fail_protocol(struct ceph_connection *con)
 
 static int process_connect(struct ceph_connection *con)
 {
-       u64 sup_feat = CEPH_FEATURE_SUPPORTED;
-       u64 req_feat = CEPH_FEATURE_REQUIRED;
+       u64 sup_feat = con->msgr->supported_features;
+       u64 req_feat = con->msgr->required_features;
        u64 server_feat = le64_to_cpu(con->in_reply.features);
 
        dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
@@ -1209,7 +1218,7 @@ static int process_connect(struct ceph_connection *con)
                pr_err("%s%lld %s feature set mismatch,"
                       " my %llx < server's %llx, missing %llx\n",
                       ENTITY_NAME(con->peer_name),
-                      pr_addr(&con->peer_addr.in_addr),
+                      ceph_pr_addr(&con->peer_addr.in_addr),
                       sup_feat, server_feat, server_feat & ~sup_feat);
                con->error_msg = "missing required protocol features";
                fail_protocol(con);
@@ -1219,7 +1228,7 @@ static int process_connect(struct ceph_connection *con)
                pr_err("%s%lld %s protocol version mismatch,"
                       " my %d != server's %d\n",
                       ENTITY_NAME(con->peer_name),
-                      pr_addr(&con->peer_addr.in_addr),
+                      ceph_pr_addr(&con->peer_addr.in_addr),
                       le32_to_cpu(con->out_connect.protocol_version),
                       le32_to_cpu(con->in_reply.protocol_version));
                con->error_msg = "protocol version mismatch";
@@ -1253,7 +1262,7 @@ static int process_connect(struct ceph_connection *con)
                     le32_to_cpu(con->in_connect.connect_seq));
                pr_err("%s%lld %s connection reset\n",
                       ENTITY_NAME(con->peer_name),
-                      pr_addr(&con->peer_addr.in_addr));
+                      ceph_pr_addr(&con->peer_addr.in_addr));
                reset_connection(con);
                prepare_write_connect(con->msgr, con, 0);
                prepare_read_connect(con);
@@ -1298,7 +1307,7 @@ static int process_connect(struct ceph_connection *con)
                        pr_err("%s%lld %s protocol feature mismatch,"
                               " my required %llx > server's %llx, need %llx\n",
                               ENTITY_NAME(con->peer_name),
-                              pr_addr(&con->peer_addr.in_addr),
+                              ceph_pr_addr(&con->peer_addr.in_addr),
                               req_feat, server_feat, req_feat & ~server_feat);
                        con->error_msg = "missing required protocol features";
                        fail_protocol(con);
@@ -1525,7 +1534,7 @@ static int read_partial_message(struct ceph_connection *con)
        if ((s64)seq - (s64)con->in_seq < 1) {
                pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
                        ENTITY_NAME(con->peer_name),
-                       pr_addr(&con->peer_addr.in_addr),
+                       ceph_pr_addr(&con->peer_addr.in_addr),
                        seq, con->in_seq + 1);
                con->in_base_pos = -front_len - middle_len - data_len -
                        sizeof(m->footer);
@@ -2023,9 +2032,9 @@ out:
 static void ceph_fault(struct ceph_connection *con)
 {
        pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
-              pr_addr(&con->peer_addr.in_addr), con->error_msg);
+              ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
        dout("fault %p state %lu to peer %s\n",
-            con, con->state, pr_addr(&con->peer_addr.in_addr));
+            con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
 
        if (test_bit(LOSSYTX, &con->state)) {
                dout("fault on LOSSYTX channel\n");
@@ -2085,7 +2094,9 @@ out:
 /*
  * create a new messenger instance
  */
-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
+struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
+                                            u32 supported_features,
+                                            u32 required_features)
 {
        struct ceph_messenger *msgr;
 
@@ -2093,6 +2104,9 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
        if (msgr == NULL)
                return ERR_PTR(-ENOMEM);
 
+       msgr->supported_features = supported_features;
+       msgr->required_features = required_features;
+
        spin_lock_init(&msgr->global_seq_lock);
 
        /* the zero page is needed if a request is "canceled" while the message
@@ -2115,6 +2129,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
        dout("messenger_create %p\n", msgr);
        return msgr;
 }
+EXPORT_SYMBOL(ceph_messenger_create);
 
 void ceph_messenger_destroy(struct ceph_messenger *msgr)
 {
@@ -2124,6 +2139,7 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
        kfree(msgr);
        dout("destroyed messenger %p\n", msgr);
 }
+EXPORT_SYMBOL(ceph_messenger_destroy);
 
 /*
  * Queue up an outgoing message on the given connection.
@@ -2160,6 +2176,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
        if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
                queue_con(con);
 }
+EXPORT_SYMBOL(ceph_con_send);
 
 /*
  * Revoke a message that was previously queued for send
@@ -2225,6 +2242,7 @@ void ceph_con_keepalive(struct ceph_connection *con)
            test_and_set_bit(WRITE_PENDING, &con->state) == 0)
                queue_con(con);
 }
+EXPORT_SYMBOL(ceph_con_keepalive);
 
 
 /*
@@ -2299,6 +2317,7 @@ out:
        pr_err("msg_new can't create type %d front %d\n", type, front_len);
        return NULL;
 }
+EXPORT_SYMBOL(ceph_msg_new);
 
 /*
  * Allocate "middle" portion of a message, if it is needed and wasn't
@@ -2410,6 +2429,7 @@ void ceph_msg_last_put(struct kref *kref)
        else
                ceph_msg_kfree(m);
 }
+EXPORT_SYMBOL(ceph_msg_last_put);
 
 void ceph_msg_dump(struct ceph_msg *msg)
 {
@@ -2430,3 +2450,4 @@ void ceph_msg_dump(struct ceph_msg *msg)
                       DUMP_PREFIX_OFFSET, 16, 1,
                       &msg->footer, sizeof(msg->footer), true);
 }
+EXPORT_SYMBOL(ceph_msg_dump);
similarity index 94%
rename from fs/ceph/mon_client.c
rename to net/ceph/mon_client.c
index b2a5a3e..8a07939 100644 (file)
@@ -1,14 +1,16 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
+#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/sched.h>
 
-#include "mon_client.h"
-#include "super.h"
-#include "auth.h"
-#include "decode.h"
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/decode.h>
+
+#include <linux/ceph/auth.h>
 
 /*
  * Interact with Ceph monitor cluster.  Handle requests for new map
@@ -74,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
             m->num_mon);
        for (i = 0; i < m->num_mon; i++)
                dout("monmap_decode  mon%d is %s\n", i,
-                    pr_addr(&m->mon_inst[i].addr.in_addr));
+                    ceph_pr_addr(&m->mon_inst[i].addr.in_addr));
        return m;
 
 bad:
@@ -191,30 +193,33 @@ static void __send_subscribe(struct ceph_mon_client *monc)
                struct ceph_msg *msg = monc->m_subscribe;
                struct ceph_mon_subscribe_item *i;
                void *p, *end;
+               int num;
 
                p = msg->front.iov_base;
                end = p + msg->front_max;
 
-               dout("__send_subscribe to 'mdsmap' %u+\n",
-                    (unsigned)monc->have_mdsmap);
+               num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
+               ceph_encode_32(&p, num);
+
                if (monc->want_next_osdmap) {
                        dout("__send_subscribe to 'osdmap' %u\n",
                             (unsigned)monc->have_osdmap);
-                       ceph_encode_32(&p, 3);
                        ceph_encode_string(&p, end, "osdmap", 6);
                        i = p;
                        i->have = cpu_to_le64(monc->have_osdmap);
                        i->onetime = 1;
                        p += sizeof(*i);
                        monc->want_next_osdmap = 2;  /* requested */
-               } else {
-                       ceph_encode_32(&p, 2);
                }
-               ceph_encode_string(&p, end, "mdsmap", 6);
-               i = p;
-               i->have = cpu_to_le64(monc->have_mdsmap);
-               i->onetime = 0;
-               p += sizeof(*i);
+               if (monc->want_mdsmap) {
+                       dout("__send_subscribe to 'mdsmap' %u+\n",
+                            (unsigned)monc->have_mdsmap);
+                       ceph_encode_string(&p, end, "mdsmap", 6);
+                       i = p;
+                       i->have = cpu_to_le64(monc->have_mdsmap);
+                       i->onetime = 0;
+                       p += sizeof(*i);
+               }
                ceph_encode_string(&p, end, "monmap", 6);
                i = p;
                i->have = 0;
@@ -243,7 +248,8 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
        mutex_lock(&monc->mutex);
        if (monc->hunting) {
                pr_info("mon%d %s session established\n",
-                       monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr));
+                       monc->cur_mon,
+                       ceph_pr_addr(&monc->con->peer_addr.in_addr));
                monc->hunting = false;
        }
        dout("handle_subscribe_ack after %d seconds\n", seconds);
@@ -266,6 +272,7 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
        mutex_unlock(&monc->mutex);
        return 0;
 }
+EXPORT_SYMBOL(ceph_monc_got_mdsmap);
 
 int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
 {
@@ -310,6 +317,7 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
        mutex_unlock(&monc->mutex);
        return 0;
 }
+EXPORT_SYMBOL(ceph_monc_open_session);
 
 /*
  * The monitor responds with mount ack indicate mount success.  The
@@ -540,6 +548,7 @@ out:
        kref_put(&req->kref, release_generic_request);
        return err;
 }
+EXPORT_SYMBOL(ceph_monc_do_statfs);
 
 /*
  * pool ops
@@ -651,6 +660,7 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc,
                                   pool, 0, (char *)snapid, sizeof(*snapid));
 
 }
+EXPORT_SYMBOL(ceph_monc_create_snapid);
 
 int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
                            u32 pool, u64 snapid)
@@ -708,9 +718,9 @@ static void delayed_work(struct work_struct *work)
  */
 static int build_initial_monmap(struct ceph_mon_client *monc)
 {
-       struct ceph_mount_args *args = monc->client->mount_args;
-       struct ceph_entity_addr *mon_addr = args->mon_addr;
-       int num_mon = args->num_mon;
+       struct ceph_options *opt = monc->client->options;
+       struct ceph_entity_addr *mon_addr = opt->mon_addr;
+       int num_mon = opt->num_mon;
        int i;
 
        /* build initial monmap */
@@ -728,11 +738,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
        }
        monc->monmap->num_mon = num_mon;
        monc->have_fsid = false;
-
-       /* release addr memory */
-       kfree(args->mon_addr);
-       args->mon_addr = NULL;
-       args->num_mon = 0;
        return 0;
 }
 
@@ -753,8 +758,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
        monc->con = NULL;
 
        /* authentication */
-       monc->auth = ceph_auth_init(cl->mount_args->name,
-                                   cl->mount_args->secret);
+       monc->auth = ceph_auth_init(cl->options->name,
+                                   cl->options->secret);
        if (IS_ERR(monc->auth))
                return PTR_ERR(monc->auth);
        monc->auth->want_keys =
@@ -808,6 +813,7 @@ out_monmap:
 out:
        return err;
 }
+EXPORT_SYMBOL(ceph_monc_init);
 
 void ceph_monc_stop(struct ceph_mon_client *monc)
 {
@@ -832,6 +838,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
 
        kfree(monc->monmap);
 }
+EXPORT_SYMBOL(ceph_monc_stop);
 
 static void handle_auth_reply(struct ceph_mon_client *monc,
                              struct ceph_msg *msg)
@@ -889,6 +896,7 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc)
        mutex_unlock(&monc->mutex);
        return ret;
 }
+EXPORT_SYMBOL(ceph_monc_validate_auth);
 
 /*
  * handle incoming message
@@ -922,15 +930,16 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
                ceph_monc_handle_map(monc, msg);
                break;
 
-       case CEPH_MSG_MDS_MAP:
-               ceph_mdsc_handle_map(&monc->client->mdsc, msg);
-               break;
-
        case CEPH_MSG_OSD_MAP:
                ceph_osdc_handle_map(&monc->client->osdc, msg);
                break;
 
        default:
+               /* can the chained handler handle it? */
+               if (monc->client->extra_mon_dispatch &&
+                   monc->client->extra_mon_dispatch(monc->client, msg) == 0)
+                       break;
+                       
                pr_err("received unknown message type %d %s\n", type,
                       ceph_msg_type_name(type));
        }
@@ -994,7 +1003,7 @@ static void mon_fault(struct ceph_connection *con)
        if (monc->con && !monc->hunting)
                pr_info("mon%d %s session lost, "
                        "hunting for new mon\n", monc->cur_mon,
-                       pr_addr(&monc->con->peer_addr.in_addr));
+                       ceph_pr_addr(&monc->con->peer_addr.in_addr));
 
        __close_session(monc);
        if (!monc->hunting) {
similarity index 95%
rename from fs/ceph/msgpool.c
rename to net/ceph/msgpool.c
index dd65a64..d5f2d97 100644 (file)
@@ -1,11 +1,11 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 
-#include "msgpool.h"
+#include <linux/ceph/msgpool.h>
 
 static void *alloc_fn(gfp_t gfp_mask, void *arg)
 {
similarity index 97%
rename from fs/ceph/osd_client.c
rename to net/ceph/osd_client.c
index 0edb43f..7939199 100644 (file)
@@ -1,5 +1,6 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
+#include <linux/module.h>
 #include <linux/err.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/bio.h>
 #endif
 
-#include "super.h"
-#include "osd_client.h"
-#include "messenger.h"
-#include "decode.h"
-#include "auth.h"
-#include "pagelist.h"
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/pagelist.h>
 
 #define OSD_OP_FRONT_LEN       4096
 #define OSD_OPREPLY_FRONT_LEN  512
@@ -70,11 +71,14 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
                op->extent.length = objlen;
        }
        req->r_num_pages = calc_pages_for(off, *plen);
+       if (op->op == CEPH_OSD_OP_WRITE)
+               op->payload_len = *plen;
 
        dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
             *bno, objoff, objlen, req->r_num_pages);
 
 }
+EXPORT_SYMBOL(ceph_calc_raw_layout);
 
 /*
  * Implement client access to distributed object storage cluster.
@@ -154,19 +158,7 @@ void ceph_osdc_release_request(struct kref *kref)
        else
                kfree(req);
 }
-
-static int op_needs_trail(int op)
-{
-       switch (op) {
-       case CEPH_OSD_OP_GETXATTR:
-       case CEPH_OSD_OP_SETXATTR:
-       case CEPH_OSD_OP_CMPXATTR:
-       case CEPH_OSD_OP_CALL:
-               return 1;
-       default:
-               return 0;
-       }
-}
+EXPORT_SYMBOL(ceph_osdc_release_request);
 
 static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail)
 {
@@ -268,6 +260,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 
        return req;
 }
+EXPORT_SYMBOL(ceph_osdc_alloc_request);
 
 static void osd_req_encode_op(struct ceph_osd_request *req,
                              struct ceph_osd_op *dst,
@@ -403,6 +396,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
        msg->hdr.front_len = cpu_to_le32(msg_size);
        return;
 }
+EXPORT_SYMBOL(ceph_osdc_build_request);
 
 /*
  * build new request AND message, calculate layout, and adjust file
@@ -460,6 +454,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
        return req;
 }
+EXPORT_SYMBOL(ceph_osdc_new_request);
 
 /*
  * We keep osd requests in an rbtree, sorted by ->r_tid.
@@ -614,7 +609,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
        dout("__move_osd_to_lru %p\n", osd);
        BUG_ON(!list_empty(&osd->o_osd_lru));
        list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
-       osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ;
+       osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
 }
 
 static void __remove_osd_from_lru(struct ceph_osd *osd)
@@ -708,7 +703,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
 static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
 {
        schedule_delayed_work(&osdc->timeout_work,
-                       osdc->client->mount_args->osd_keepalive_timeout * HZ);
+                       osdc->client->options->osd_keepalive_timeout * HZ);
 }
 
 static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
@@ -909,9 +904,9 @@ static void handle_timeout(struct work_struct *work)
                container_of(work, struct ceph_osd_client, timeout_work.work);
        struct ceph_osd_request *req, *last_req = NULL;
        struct ceph_osd *osd;
-       unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
+       unsigned long timeout = osdc->client->options->osd_timeout * HZ;
        unsigned long keepalive =
-               osdc->client->mount_args->osd_keepalive_timeout * HZ;
+               osdc->client->options->osd_keepalive_timeout * HZ;
        unsigned long last_stamp = 0;
        struct rb_node *p;
        struct list_head slow_osds;
@@ -998,7 +993,7 @@ static void handle_osds_timeout(struct work_struct *work)
                container_of(work, struct ceph_osd_client,
                             osds_timeout_work.work);
        unsigned long delay =
-               osdc->client->mount_args->osd_idle_ttl * HZ >> 2;
+               osdc->client->options->osd_idle_ttl * HZ >> 2;
 
        dout("osds timeout\n");
        down_read(&osdc->map_sem);
@@ -1360,6 +1355,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
        up_read(&osdc->map_sem);
        return rc;
 }
+EXPORT_SYMBOL(ceph_osdc_start_request);
 
 /*
  * wait for a request to complete
@@ -1382,6 +1378,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
        dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
        return req->r_result;
 }
+EXPORT_SYMBOL(ceph_osdc_wait_request);
 
 /*
  * sync - wait for all in-flight requests to flush.  avoid starvation.
@@ -1415,6 +1412,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
        mutex_unlock(&osdc->request_mutex);
        dout("sync done (thru tid %llu)\n", last_tid);
 }
+EXPORT_SYMBOL(ceph_osdc_sync);
 
 /*
  * init, shutdown
@@ -1440,7 +1438,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
        INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
 
        schedule_delayed_work(&osdc->osds_timeout_work,
-          round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ));
+          round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
 
        err = -ENOMEM;
        osdc->req_mempool = mempool_create_kmalloc_pool(10,
@@ -1466,6 +1464,7 @@ out_mempool:
 out:
        return err;
 }
+EXPORT_SYMBOL(ceph_osdc_init);
 
 void ceph_osdc_stop(struct ceph_osd_client *osdc)
 {
@@ -1480,6 +1479,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
        ceph_msgpool_destroy(&osdc->msgpool_op);
        ceph_msgpool_destroy(&osdc->msgpool_op_reply);
 }
+EXPORT_SYMBOL(ceph_osdc_stop);
 
 /*
  * Read some contiguous pages.  If we cross a stripe boundary, shorten
@@ -1517,6 +1517,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
        dout("readpages result %d\n", rc);
        return rc;
 }
+EXPORT_SYMBOL(ceph_osdc_readpages);
 
 /*
  * do a synchronous write on N pages
@@ -1559,6 +1560,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
        dout("writepages result %d\n", rc);
        return rc;
 }
+EXPORT_SYMBOL(ceph_osdc_writepages);
 
 /*
  * handle incoming message
similarity index 98%
rename from fs/ceph/osdmap.c
rename to net/ceph/osdmap.c
index 3ccd117..d73f3f6 100644 (file)
@@ -1,14 +1,15 @@
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <asm/div64.h>
 
-#include "super.h"
-#include "osdmap.h"
-#include "crush/hash.h"
-#include "crush/mapper.h"
-#include "decode.h"
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osdmap.h>
+#include <linux/ceph/decode.h>
+#include <linux/crush/hash.h>
+#include <linux/crush/mapper.h>
 
 char *ceph_osdmap_state_str(char *str, int len, int state)
 {
@@ -429,6 +430,7 @@ int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
        }
        return -ENOENT;
 }
+EXPORT_SYMBOL(ceph_pg_poolid_by_name);
 
 static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
 {
@@ -979,6 +981,7 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 
        dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
 }
+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
 
 /*
  * calculate an object layout (i.e. pgid) from an oid,
@@ -1024,6 +1027,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
        ol->ol_stripe_unit = fl->fl_object_stripe_unit;
        return 0;
 }
+EXPORT_SYMBOL(ceph_calc_object_layout);
 
 /*
  * Calculate raw osd vector for the given pgid.  Return pointer to osd
@@ -1121,3 +1125,4 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
                        return osds[i];
        return -1;
 }
+EXPORT_SYMBOL(ceph_calc_pg_primary);
similarity index 90%
rename from fs/ceph/pagelist.c
rename to net/ceph/pagelist.c
index 326e1c0..3b146cf 100644 (file)
@@ -1,9 +1,9 @@
 
+#include <linux/module.h>
 #include <linux/gfp.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
-
-#include "pagelist.h"
+#include <linux/ceph/pagelist.h>
 
 static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
 {
@@ -25,6 +25,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl)
        }
        return 0;
 }
+EXPORT_SYMBOL(ceph_pagelist_release);
 
 static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
 {
@@ -61,3 +62,4 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
        pl->room -= len;
        return 0;
 }
+EXPORT_SYMBOL(ceph_pagelist_append);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
new file mode 100644 (file)
index 0000000..54caf06
--- /dev/null
@@ -0,0 +1,223 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+
+#include <linux/ceph/libceph.h>
+
+/*
+ * build a vector of user pages
+ */
+struct page **ceph_get_direct_page_vector(const char __user *data,
+                                                int num_pages,
+                                                loff_t off, size_t len)
+{
+       struct page **pages;
+       int rc;
+
+       pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
+       if (!pages)
+               return ERR_PTR(-ENOMEM);
+
+       down_read(&current->mm->mmap_sem);
+       rc = get_user_pages(current, current->mm, (unsigned long)data,
+                           num_pages, 0, 0, pages, NULL);
+       up_read(&current->mm->mmap_sem);
+       if (rc < 0)
+               goto fail;
+       return pages;
+
+fail:
+       kfree(pages);
+       return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(ceph_get_direct_page_vector);
+
+void ceph_put_page_vector(struct page **pages, int num_pages)
+{
+       int i;
+
+       for (i = 0; i < num_pages; i++)
+               put_page(pages[i]);
+       kfree(pages);
+}
+EXPORT_SYMBOL(ceph_put_page_vector);
+
+void ceph_release_page_vector(struct page **pages, int num_pages)
+{
+       int i;
+
+       for (i = 0; i < num_pages; i++)
+               __free_pages(pages[i], 0);
+       kfree(pages);
+}
+EXPORT_SYMBOL(ceph_release_page_vector);
+
+/*
+ * allocate a vector new pages
+ */
+struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
+{
+       struct page **pages;
+       int i;
+
+       pages = kmalloc(sizeof(*pages) * num_pages, flags);
+       if (!pages)
+               return ERR_PTR(-ENOMEM);
+       for (i = 0; i < num_pages; i++) {
+               pages[i] = __page_cache_alloc(flags);
+               if (pages[i] == NULL) {
+                       ceph_release_page_vector(pages, i);
+                       return ERR_PTR(-ENOMEM);
+               }
+       }
+       return pages;
+}
+EXPORT_SYMBOL(ceph_alloc_page_vector);
+
+/*
+ * copy user data into a page vector
+ */
+int ceph_copy_user_to_page_vector(struct page **pages,
+                                        const char __user *data,
+                                        loff_t off, size_t len)
+{
+       int i = 0;
+       int po = off & ~PAGE_CACHE_MASK;
+       int left = len;
+       int l, bad;
+
+       while (left > 0) {
+               l = min_t(int, PAGE_CACHE_SIZE-po, left);
+               bad = copy_from_user(page_address(pages[i]) + po, data, l);
+               if (bad == l)
+                       return -EFAULT;
+               data += l - bad;
+               left -= l - bad;
+               po += l - bad;
+               if (po == PAGE_CACHE_SIZE) {
+                       po = 0;
+                       i++;
+               }
+       }
+       return len;
+}
+EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
+
+int ceph_copy_to_page_vector(struct page **pages,
+                                   const char *data,
+                                   loff_t off, size_t len)
+{
+       int i = 0;
+       size_t po = off & ~PAGE_CACHE_MASK;
+       size_t left = len;
+       size_t l;
+
+       while (left > 0) {
+               l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+               memcpy(page_address(pages[i]) + po, data, l);
+               data += l;
+               left -= l;
+               po += l;
+               if (po == PAGE_CACHE_SIZE) {
+                       po = 0;
+                       i++;
+               }
+       }
+       return len;
+}
+EXPORT_SYMBOL(ceph_copy_to_page_vector);
+
+int ceph_copy_from_page_vector(struct page **pages,
+                                   char *data,
+                                   loff_t off, size_t len)
+{
+       int i = 0;
+       size_t po = off & ~PAGE_CACHE_MASK;
+       size_t left = len;
+       size_t l;
+
+       while (left > 0) {
+               l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+               memcpy(data, page_address(pages[i]) + po, l);
+               data += l;
+               left -= l;
+               po += l;
+               if (po == PAGE_CACHE_SIZE) {
+                       po = 0;
+                       i++;
+               }
+       }
+       return len;
+}
+EXPORT_SYMBOL(ceph_copy_from_page_vector);
+
+/*
+ * copy user data from a page vector into a user pointer
+ */
+int ceph_copy_page_vector_to_user(struct page **pages,
+                                        char __user *data,
+                                        loff_t off, size_t len)
+{
+       int i = 0;
+       int po = off & ~PAGE_CACHE_MASK;
+       int left = len;
+       int l, bad;
+
+       while (left > 0) {
+               l = min_t(int, left, PAGE_CACHE_SIZE-po);
+               bad = copy_to_user(data, page_address(pages[i]) + po, l);
+               if (bad == l)
+                       return -EFAULT;
+               data += l - bad;
+               left -= l - bad;
+               if (po) {
+                       po += l - bad;
+                       if (po == PAGE_CACHE_SIZE)
+                               po = 0;
+               }
+               i++;
+       }
+       return len;
+}
+EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
+
+/*
+ * Zero an extent within a page vector.  Offset is relative to the
+ * start of the first page.
+ */
+void ceph_zero_page_vector_range(int off, int len, struct page **pages)
+{
+       int i = off >> PAGE_CACHE_SHIFT;
+
+       off &= ~PAGE_CACHE_MASK;
+
+       dout("zero_page_vector_page %u~%u\n", off, len);
+
+       /* leading partial page? */
+       if (off) {
+               int end = min((int)PAGE_CACHE_SIZE, off + len);
+               dout("zeroing %d %p head from %d\n", i, pages[i],
+                    (int)off);
+               zero_user_segment(pages[i], off, end);
+               len -= (end - off);
+               i++;
+       }
+       while (len >= PAGE_CACHE_SIZE) {
+               dout("zeroing %d %p len=%d\n", i, pages[i], len);
+               zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
+               len -= PAGE_CACHE_SIZE;
+               i++;
+       }
+       /* trailing partial page? */
+       if (len) {
+               dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
+               zero_user_segment(pages[i], 0, len);
+       }
+}
+EXPORT_SYMBOL(ceph_zero_page_vector_range);
+