Merge tag 'selinux-pr-20190521' of git://git.kernel.org/pub/scm/linux/kernel/git...
[platform/kernel/linux-starfive.git] / fs / ceph / super.c
1 // SPDX-License-Identifier: GPL-2.0-only
2
3 #include <linux/ceph/ceph_debug.h>
4
5 #include <linux/backing-dev.h>
6 #include <linux/ctype.h>
7 #include <linux/fs.h>
8 #include <linux/inet.h>
9 #include <linux/in6.h>
10 #include <linux/module.h>
11 #include <linux/mount.h>
12 #include <linux/parser.h>
13 #include <linux/sched.h>
14 #include <linux/seq_file.h>
15 #include <linux/slab.h>
16 #include <linux/statfs.h>
17 #include <linux/string.h>
18
19 #include "super.h"
20 #include "mds_client.h"
21 #include "cache.h"
22
23 #include <linux/ceph/ceph_features.h>
24 #include <linux/ceph/decode.h>
25 #include <linux/ceph/mon_client.h>
26 #include <linux/ceph/auth.h>
27 #include <linux/ceph/debugfs.h>
28
29 /*
30  * Ceph superblock operations
31  *
32  * Handle the basics of mounting, unmounting.
33  */
34
35 /*
36  * super ops
37  */
38 static void ceph_put_super(struct super_block *s)
39 {
40         struct ceph_fs_client *fsc = ceph_sb_to_client(s);
41
42         dout("put_super\n");
43         ceph_mdsc_close_sessions(fsc->mdsc);
44 }
45
46 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
47 {
48         struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
49         struct ceph_mon_client *monc = &fsc->client->monc;
50         struct ceph_statfs st;
51         u64 fsid;
52         int err;
53         u64 data_pool;
54
55         if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
56                 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0];
57         } else {
58                 data_pool = CEPH_NOPOOL;
59         }
60
61         dout("statfs\n");
62         err = ceph_monc_do_statfs(monc, data_pool, &st);
63         if (err < 0)
64                 return err;
65
66         /* fill in kstatfs */
67         buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
68
69         /*
70          * express utilization in terms of large blocks to avoid
71          * overflow on 32-bit machines.
72          *
73          * NOTE: for the time being, we make bsize == frsize to humor
74          * not-yet-ancient versions of glibc that are broken.
75          * Someday, we will probably want to report a real block
76          * size...  whatever that may mean for a network file system!
77          */
78         buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
79         buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
80
81         /*
82          * By default use root quota for stats; fallback to overall filesystem
83          * usage if using 'noquotadf' mount option or if the root dir doesn't
84          * have max_bytes quota set.
85          */
86         if (ceph_test_mount_opt(fsc, NOQUOTADF) ||
87             !ceph_quota_update_statfs(fsc, buf)) {
88                 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
89                 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
90                 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
91         }
92
93         buf->f_files = le64_to_cpu(st.num_objects);
94         buf->f_ffree = -1;
95         buf->f_namelen = NAME_MAX;
96
97         /* Must convert the fsid, for consistent values across arches */
98         mutex_lock(&monc->mutex);
99         fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^
100                le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1));
101         mutex_unlock(&monc->mutex);
102
103         buf->f_fsid.val[0] = fsid & 0xffffffff;
104         buf->f_fsid.val[1] = fsid >> 32;
105
106         return 0;
107 }
108
109
110 static int ceph_sync_fs(struct super_block *sb, int wait)
111 {
112         struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
113
114         if (!wait) {
115                 dout("sync_fs (non-blocking)\n");
116                 ceph_flush_dirty_caps(fsc->mdsc);
117                 dout("sync_fs (non-blocking) done\n");
118                 return 0;
119         }
120
121         dout("sync_fs (blocking)\n");
122         ceph_osdc_sync(&fsc->client->osdc);
123         ceph_mdsc_sync(fsc->mdsc);
124         dout("sync_fs (blocking) done\n");
125         return 0;
126 }
127
128 /*
129  * mount options
130  */
131 enum {
132         Opt_wsize,
133         Opt_rsize,
134         Opt_rasize,
135         Opt_caps_wanted_delay_min,
136         Opt_caps_wanted_delay_max,
137         Opt_caps_max,
138         Opt_readdir_max_entries,
139         Opt_readdir_max_bytes,
140         Opt_congestion_kb,
141         Opt_last_int,
142         /* int args above */
143         Opt_snapdirname,
144         Opt_mds_namespace,
145         Opt_fscache_uniq,
146         Opt_last_string,
147         /* string args above */
148         Opt_dirstat,
149         Opt_nodirstat,
150         Opt_rbytes,
151         Opt_norbytes,
152         Opt_asyncreaddir,
153         Opt_noasyncreaddir,
154         Opt_dcache,
155         Opt_nodcache,
156         Opt_ino32,
157         Opt_noino32,
158         Opt_fscache,
159         Opt_nofscache,
160         Opt_poolperm,
161         Opt_nopoolperm,
162         Opt_require_active_mds,
163         Opt_norequire_active_mds,
164 #ifdef CONFIG_CEPH_FS_POSIX_ACL
165         Opt_acl,
166 #endif
167         Opt_noacl,
168         Opt_quotadf,
169         Opt_noquotadf,
170         Opt_copyfrom,
171         Opt_nocopyfrom,
172 };
173
174 static match_table_t fsopt_tokens = {
175         {Opt_wsize, "wsize=%d"},
176         {Opt_rsize, "rsize=%d"},
177         {Opt_rasize, "rasize=%d"},
178         {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
179         {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
180         {Opt_caps_max, "caps_max=%d"},
181         {Opt_readdir_max_entries, "readdir_max_entries=%d"},
182         {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
183         {Opt_congestion_kb, "write_congestion_kb=%d"},
184         /* int args above */
185         {Opt_snapdirname, "snapdirname=%s"},
186         {Opt_mds_namespace, "mds_namespace=%s"},
187         {Opt_fscache_uniq, "fsc=%s"},
188         /* string args above */
189         {Opt_dirstat, "dirstat"},
190         {Opt_nodirstat, "nodirstat"},
191         {Opt_rbytes, "rbytes"},
192         {Opt_norbytes, "norbytes"},
193         {Opt_asyncreaddir, "asyncreaddir"},
194         {Opt_noasyncreaddir, "noasyncreaddir"},
195         {Opt_dcache, "dcache"},
196         {Opt_nodcache, "nodcache"},
197         {Opt_ino32, "ino32"},
198         {Opt_noino32, "noino32"},
199         {Opt_fscache, "fsc"},
200         {Opt_nofscache, "nofsc"},
201         {Opt_poolperm, "poolperm"},
202         {Opt_nopoolperm, "nopoolperm"},
203         {Opt_require_active_mds, "require_active_mds"},
204         {Opt_norequire_active_mds, "norequire_active_mds"},
205 #ifdef CONFIG_CEPH_FS_POSIX_ACL
206         {Opt_acl, "acl"},
207 #endif
208         {Opt_noacl, "noacl"},
209         {Opt_quotadf, "quotadf"},
210         {Opt_noquotadf, "noquotadf"},
211         {Opt_copyfrom, "copyfrom"},
212         {Opt_nocopyfrom, "nocopyfrom"},
213         {-1, NULL}
214 };
215
216 static int parse_fsopt_token(char *c, void *private)
217 {
218         struct ceph_mount_options *fsopt = private;
219         substring_t argstr[MAX_OPT_ARGS];
220         int token, intval, ret;
221
222         token = match_token((char *)c, fsopt_tokens, argstr);
223         if (token < 0)
224                 return -EINVAL;
225
226         if (token < Opt_last_int) {
227                 ret = match_int(&argstr[0], &intval);
228                 if (ret < 0) {
229                         pr_err("bad option arg (not int) at '%s'\n", c);
230                         return ret;
231                 }
232                 dout("got int token %d val %d\n", token, intval);
233         } else if (token > Opt_last_int && token < Opt_last_string) {
234                 dout("got string token %d val %s\n", token,
235                      argstr[0].from);
236         } else {
237                 dout("got token %d\n", token);
238         }
239
240         switch (token) {
241         case Opt_snapdirname:
242                 kfree(fsopt->snapdir_name);
243                 fsopt->snapdir_name = kstrndup(argstr[0].from,
244                                                argstr[0].to-argstr[0].from,
245                                                GFP_KERNEL);
246                 if (!fsopt->snapdir_name)
247                         return -ENOMEM;
248                 break;
249         case Opt_mds_namespace:
250                 kfree(fsopt->mds_namespace);
251                 fsopt->mds_namespace = kstrndup(argstr[0].from,
252                                                 argstr[0].to-argstr[0].from,
253                                                 GFP_KERNEL);
254                 if (!fsopt->mds_namespace)
255                         return -ENOMEM;
256                 break;
257         case Opt_fscache_uniq:
258                 kfree(fsopt->fscache_uniq);
259                 fsopt->fscache_uniq = kstrndup(argstr[0].from,
260                                                argstr[0].to-argstr[0].from,
261                                                GFP_KERNEL);
262                 if (!fsopt->fscache_uniq)
263                         return -ENOMEM;
264                 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
265                 break;
266                 /* misc */
267         case Opt_wsize:
268                 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE)
269                         return -EINVAL;
270                 fsopt->wsize = ALIGN(intval, PAGE_SIZE);
271                 break;
272         case Opt_rsize:
273                 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_READ_SIZE)
274                         return -EINVAL;
275                 fsopt->rsize = ALIGN(intval, PAGE_SIZE);
276                 break;
277         case Opt_rasize:
278                 if (intval < 0)
279                         return -EINVAL;
280                 fsopt->rasize = ALIGN(intval, PAGE_SIZE);
281                 break;
282         case Opt_caps_wanted_delay_min:
283                 if (intval < 1)
284                         return -EINVAL;
285                 fsopt->caps_wanted_delay_min = intval;
286                 break;
287         case Opt_caps_wanted_delay_max:
288                 if (intval < 1)
289                         return -EINVAL;
290                 fsopt->caps_wanted_delay_max = intval;
291                 break;
292         case Opt_caps_max:
293                 if (intval < 0)
294                         return -EINVAL;
295                 fsopt->caps_max = intval;
296                 break;
297         case Opt_readdir_max_entries:
298                 if (intval < 1)
299                         return -EINVAL;
300                 fsopt->max_readdir = intval;
301                 break;
302         case Opt_readdir_max_bytes:
303                 if (intval < (int)PAGE_SIZE && intval != 0)
304                         return -EINVAL;
305                 fsopt->max_readdir_bytes = intval;
306                 break;
307         case Opt_congestion_kb:
308                 if (intval < 1024) /* at least 1M */
309                         return -EINVAL;
310                 fsopt->congestion_kb = intval;
311                 break;
312         case Opt_dirstat:
313                 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
314                 break;
315         case Opt_nodirstat:
316                 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
317                 break;
318         case Opt_rbytes:
319                 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
320                 break;
321         case Opt_norbytes:
322                 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
323                 break;
324         case Opt_asyncreaddir:
325                 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
326                 break;
327         case Opt_noasyncreaddir:
328                 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
329                 break;
330         case Opt_dcache:
331                 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
332                 break;
333         case Opt_nodcache:
334                 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
335                 break;
336         case Opt_ino32:
337                 fsopt->flags |= CEPH_MOUNT_OPT_INO32;
338                 break;
339         case Opt_noino32:
340                 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
341                 break;
342         case Opt_fscache:
343                 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
344                 kfree(fsopt->fscache_uniq);
345                 fsopt->fscache_uniq = NULL;
346                 break;
347         case Opt_nofscache:
348                 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
349                 kfree(fsopt->fscache_uniq);
350                 fsopt->fscache_uniq = NULL;
351                 break;
352         case Opt_poolperm:
353                 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
354                 break;
355         case Opt_nopoolperm:
356                 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
357                 break;
358         case Opt_require_active_mds:
359                 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
360                 break;
361         case Opt_norequire_active_mds:
362                 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
363                 break;
364         case Opt_quotadf:
365                 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF;
366                 break;
367         case Opt_noquotadf:
368                 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF;
369                 break;
370         case Opt_copyfrom:
371                 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM;
372                 break;
373         case Opt_nocopyfrom:
374                 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM;
375                 break;
376 #ifdef CONFIG_CEPH_FS_POSIX_ACL
377         case Opt_acl:
378                 fsopt->sb_flags |= SB_POSIXACL;
379                 break;
380 #endif
381         case Opt_noacl:
382                 fsopt->sb_flags &= ~SB_POSIXACL;
383                 break;
384         default:
385                 BUG_ON(token);
386         }
387         return 0;
388 }
389
390 static void destroy_mount_options(struct ceph_mount_options *args)
391 {
392         dout("destroy_mount_options %p\n", args);
393         kfree(args->snapdir_name);
394         kfree(args->mds_namespace);
395         kfree(args->server_path);
396         kfree(args->fscache_uniq);
397         kfree(args);
398 }
399
400 static int strcmp_null(const char *s1, const char *s2)
401 {
402         if (!s1 && !s2)
403                 return 0;
404         if (s1 && !s2)
405                 return -1;
406         if (!s1 && s2)
407                 return 1;
408         return strcmp(s1, s2);
409 }
410
411 static int compare_mount_options(struct ceph_mount_options *new_fsopt,
412                                  struct ceph_options *new_opt,
413                                  struct ceph_fs_client *fsc)
414 {
415         struct ceph_mount_options *fsopt1 = new_fsopt;
416         struct ceph_mount_options *fsopt2 = fsc->mount_options;
417         int ofs = offsetof(struct ceph_mount_options, snapdir_name);
418         int ret;
419
420         ret = memcmp(fsopt1, fsopt2, ofs);
421         if (ret)
422                 return ret;
423
424         ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
425         if (ret)
426                 return ret;
427         ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
428         if (ret)
429                 return ret;
430         ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
431         if (ret)
432                 return ret;
433         ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
434         if (ret)
435                 return ret;
436
437         return ceph_compare_options(new_opt, fsc->client);
438 }
439
440 static int parse_mount_options(struct ceph_mount_options **pfsopt,
441                                struct ceph_options **popt,
442                                int flags, char *options,
443                                const char *dev_name)
444 {
445         struct ceph_mount_options *fsopt;
446         const char *dev_name_end;
447         int err;
448
449         if (!dev_name || !*dev_name)
450                 return -EINVAL;
451
452         fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
453         if (!fsopt)
454                 return -ENOMEM;
455
456         dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
457
458         fsopt->sb_flags = flags;
459         fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
460
461         fsopt->wsize = CEPH_MAX_WRITE_SIZE;
462         fsopt->rsize = CEPH_MAX_READ_SIZE;
463         fsopt->rasize = CEPH_RASIZE_DEFAULT;
464         fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
465         if (!fsopt->snapdir_name) {
466                 err = -ENOMEM;
467                 goto out;
468         }
469
470         fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
471         fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
472         fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
473         fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
474         fsopt->congestion_kb = default_congestion_kb();
475
476         /*
477          * Distinguish the server list from the path in "dev_name".
478          * Internally we do not include the leading '/' in the path.
479          *
480          * "dev_name" will look like:
481          *     <server_spec>[,<server_spec>...]:[<path>]
482          * where
483          *     <server_spec> is <ip>[:<port>]
484          *     <path> is optional, but if present must begin with '/'
485          */
486         dev_name_end = strchr(dev_name, '/');
487         if (dev_name_end) {
488                 if (strlen(dev_name_end) > 1) {
489                         fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
490                         if (!fsopt->server_path) {
491                                 err = -ENOMEM;
492                                 goto out;
493                         }
494                 }
495         } else {
496                 dev_name_end = dev_name + strlen(dev_name);
497         }
498         err = -EINVAL;
499         dev_name_end--;         /* back up to ':' separator */
500         if (dev_name_end < dev_name || *dev_name_end != ':') {
501                 pr_err("device name is missing path (no : separator in %s)\n",
502                                 dev_name);
503                 goto out;
504         }
505         dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
506         if (fsopt->server_path)
507                 dout("server path '%s'\n", fsopt->server_path);
508
509         *popt = ceph_parse_options(options, dev_name, dev_name_end,
510                                  parse_fsopt_token, (void *)fsopt);
511         if (IS_ERR(*popt)) {
512                 err = PTR_ERR(*popt);
513                 goto out;
514         }
515
516         /* success */
517         *pfsopt = fsopt;
518         return 0;
519
520 out:
521         destroy_mount_options(fsopt);
522         return err;
523 }
524
525 /**
526  * ceph_show_options - Show mount options in /proc/mounts
527  * @m: seq_file to write to
528  * @root: root of that (sub)tree
529  */
530 static int ceph_show_options(struct seq_file *m, struct dentry *root)
531 {
532         struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
533         struct ceph_mount_options *fsopt = fsc->mount_options;
534         size_t pos;
535         int ret;
536
537         /* a comma between MNT/MS and client options */
538         seq_putc(m, ',');
539         pos = m->count;
540
541         ret = ceph_print_client_options(m, fsc->client, false);
542         if (ret)
543                 return ret;
544
545         /* retract our comma if no client options */
546         if (m->count == pos)
547                 m->count--;
548
549         if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
550                 seq_puts(m, ",dirstat");
551         if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
552                 seq_puts(m, ",rbytes");
553         if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
554                 seq_puts(m, ",noasyncreaddir");
555         if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
556                 seq_puts(m, ",nodcache");
557         if (fsopt->flags & CEPH_MOUNT_OPT_INO32)
558                 seq_puts(m, ",ino32");
559         if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
560                 seq_show_option(m, "fsc", fsopt->fscache_uniq);
561         }
562         if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
563                 seq_puts(m, ",nopoolperm");
564         if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF)
565                 seq_puts(m, ",noquotadf");
566
567 #ifdef CONFIG_CEPH_FS_POSIX_ACL
568         if (fsopt->sb_flags & SB_POSIXACL)
569                 seq_puts(m, ",acl");
570         else
571                 seq_puts(m, ",noacl");
572 #endif
573
574         if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
575                 seq_puts(m, ",copyfrom");
576
577         if (fsopt->mds_namespace)
578                 seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
579         if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
580                 seq_printf(m, ",wsize=%d", fsopt->wsize);
581         if (fsopt->rsize != CEPH_MAX_READ_SIZE)
582                 seq_printf(m, ",rsize=%d", fsopt->rsize);
583         if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
584                 seq_printf(m, ",rasize=%d", fsopt->rasize);
585         if (fsopt->congestion_kb != default_congestion_kb())
586                 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
587         if (fsopt->caps_max)
588                 seq_printf(m, ",caps_max=%d", fsopt->caps_max);
589         if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
590                 seq_printf(m, ",caps_wanted_delay_min=%d",
591                          fsopt->caps_wanted_delay_min);
592         if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
593                 seq_printf(m, ",caps_wanted_delay_max=%d",
594                            fsopt->caps_wanted_delay_max);
595         if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
596                 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
597         if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
598                 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
599         if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
600                 seq_show_option(m, "snapdirname", fsopt->snapdir_name);
601
602         return 0;
603 }
604
605 /*
606  * handle any mon messages the standard library doesn't understand.
607  * return error if we don't either.
608  */
609 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
610 {
611         struct ceph_fs_client *fsc = client->private;
612         int type = le16_to_cpu(msg->hdr.type);
613
614         switch (type) {
615         case CEPH_MSG_MDS_MAP:
616                 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg);
617                 return 0;
618         case CEPH_MSG_FS_MAP_USER:
619                 ceph_mdsc_handle_fsmap(fsc->mdsc, msg);
620                 return 0;
621         default:
622                 return -1;
623         }
624 }
625
626 /*
627  * create a new fs client
628  *
629  * Success or not, this function consumes @fsopt and @opt.
630  */
631 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
632                                         struct ceph_options *opt)
633 {
634         struct ceph_fs_client *fsc;
635         int page_count;
636         size_t size;
637         int err;
638
639         fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
640         if (!fsc) {
641                 err = -ENOMEM;
642                 goto fail;
643         }
644
645         fsc->client = ceph_create_client(opt, fsc);
646         if (IS_ERR(fsc->client)) {
647                 err = PTR_ERR(fsc->client);
648                 goto fail;
649         }
650         opt = NULL; /* fsc->client now owns this */
651
652         fsc->client->extra_mon_dispatch = extra_mon_dispatch;
653         ceph_set_opt(fsc->client, ABORT_ON_FULL);
654
655         if (!fsopt->mds_namespace) {
656                 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
657                                    0, true);
658         } else {
659                 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP,
660                                    0, false);
661         }
662
663         fsc->mount_options = fsopt;
664
665         fsc->sb = NULL;
666         fsc->mount_state = CEPH_MOUNT_MOUNTING;
667
668         atomic_long_set(&fsc->writeback_count, 0);
669
670         err = -ENOMEM;
671         /*
672          * The number of concurrent works can be high but they don't need
673          * to be processed in parallel, limit concurrency.
674          */
675         fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
676         if (!fsc->wb_wq)
677                 goto fail_client;
678         fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
679         if (!fsc->pg_inv_wq)
680                 goto fail_wb_wq;
681         fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
682         if (!fsc->trunc_wq)
683                 goto fail_pg_inv_wq;
684         fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1);
685         if (!fsc->cap_wq)
686                 goto fail_trunc_wq;
687
688         /* set up mempools */
689         err = -ENOMEM;
690         page_count = fsc->mount_options->wsize >> PAGE_SHIFT;
691         size = sizeof (struct page *) * (page_count ? page_count : 1);
692         fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
693         if (!fsc->wb_pagevec_pool)
694                 goto fail_cap_wq;
695
696         return fsc;
697
698 fail_cap_wq:
699         destroy_workqueue(fsc->cap_wq);
700 fail_trunc_wq:
701         destroy_workqueue(fsc->trunc_wq);
702 fail_pg_inv_wq:
703         destroy_workqueue(fsc->pg_inv_wq);
704 fail_wb_wq:
705         destroy_workqueue(fsc->wb_wq);
706 fail_client:
707         ceph_destroy_client(fsc->client);
708 fail:
709         kfree(fsc);
710         if (opt)
711                 ceph_destroy_options(opt);
712         destroy_mount_options(fsopt);
713         return ERR_PTR(err);
714 }
715
716 static void flush_fs_workqueues(struct ceph_fs_client *fsc)
717 {
718         flush_workqueue(fsc->wb_wq);
719         flush_workqueue(fsc->pg_inv_wq);
720         flush_workqueue(fsc->trunc_wq);
721         flush_workqueue(fsc->cap_wq);
722 }
723
724 static void destroy_fs_client(struct ceph_fs_client *fsc)
725 {
726         dout("destroy_fs_client %p\n", fsc);
727
728         destroy_workqueue(fsc->wb_wq);
729         destroy_workqueue(fsc->pg_inv_wq);
730         destroy_workqueue(fsc->trunc_wq);
731         destroy_workqueue(fsc->cap_wq);
732
733         mempool_destroy(fsc->wb_pagevec_pool);
734
735         destroy_mount_options(fsc->mount_options);
736
737         ceph_destroy_client(fsc->client);
738
739         kfree(fsc);
740         dout("destroy_fs_client %p done\n", fsc);
741 }
742
743 /*
744  * caches
745  */
746 struct kmem_cache *ceph_inode_cachep;
747 struct kmem_cache *ceph_cap_cachep;
748 struct kmem_cache *ceph_cap_flush_cachep;
749 struct kmem_cache *ceph_dentry_cachep;
750 struct kmem_cache *ceph_file_cachep;
751 struct kmem_cache *ceph_dir_file_cachep;
752
753 static void ceph_inode_init_once(void *foo)
754 {
755         struct ceph_inode_info *ci = foo;
756         inode_init_once(&ci->vfs_inode);
757 }
758
759 static int __init init_caches(void)
760 {
761         int error = -ENOMEM;
762
763         ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
764                                       sizeof(struct ceph_inode_info),
765                                       __alignof__(struct ceph_inode_info),
766                                       SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
767                                       SLAB_ACCOUNT, ceph_inode_init_once);
768         if (!ceph_inode_cachep)
769                 return -ENOMEM;
770
771         ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD);
772         if (!ceph_cap_cachep)
773                 goto bad_cap;
774         ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
775                                            SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
776         if (!ceph_cap_flush_cachep)
777                 goto bad_cap_flush;
778
779         ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
780                                         SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
781         if (!ceph_dentry_cachep)
782                 goto bad_dentry;
783
784         ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD);
785         if (!ceph_file_cachep)
786                 goto bad_file;
787
788         ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD);
789         if (!ceph_dir_file_cachep)
790                 goto bad_dir_file;
791
792         error = ceph_fscache_register();
793         if (error)
794                 goto bad_fscache;
795
796         return 0;
797
798 bad_fscache:
799         kmem_cache_destroy(ceph_dir_file_cachep);
800 bad_dir_file:
801         kmem_cache_destroy(ceph_file_cachep);
802 bad_file:
803         kmem_cache_destroy(ceph_dentry_cachep);
804 bad_dentry:
805         kmem_cache_destroy(ceph_cap_flush_cachep);
806 bad_cap_flush:
807         kmem_cache_destroy(ceph_cap_cachep);
808 bad_cap:
809         kmem_cache_destroy(ceph_inode_cachep);
810         return error;
811 }
812
813 static void destroy_caches(void)
814 {
815         /*
816          * Make sure all delayed rcu free inodes are flushed before we
817          * destroy cache.
818          */
819         rcu_barrier();
820
821         kmem_cache_destroy(ceph_inode_cachep);
822         kmem_cache_destroy(ceph_cap_cachep);
823         kmem_cache_destroy(ceph_cap_flush_cachep);
824         kmem_cache_destroy(ceph_dentry_cachep);
825         kmem_cache_destroy(ceph_file_cachep);
826         kmem_cache_destroy(ceph_dir_file_cachep);
827
828         ceph_fscache_unregister();
829 }
830
831
832 /*
833  * ceph_umount_begin - initiate forced umount.  Tear down down the
834  * mount, skipping steps that may hang while waiting for server(s).
835  */
836 static void ceph_umount_begin(struct super_block *sb)
837 {
838         struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
839
840         dout("ceph_umount_begin - starting forced umount\n");
841         if (!fsc)
842                 return;
843         fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
844         ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
845         ceph_mdsc_force_umount(fsc->mdsc);
846         return;
847 }
848
849 static int ceph_remount(struct super_block *sb, int *flags, char *data)
850 {
851         sync_filesystem(sb);
852         return 0;
853 }
854
855 static const struct super_operations ceph_super_ops = {
856         .alloc_inode    = ceph_alloc_inode,
857         .destroy_inode  = ceph_destroy_inode,
858         .free_inode     = ceph_free_inode,
859         .write_inode    = ceph_write_inode,
860         .drop_inode     = ceph_drop_inode,
861         .sync_fs        = ceph_sync_fs,
862         .put_super      = ceph_put_super,
863         .remount_fs     = ceph_remount,
864         .show_options   = ceph_show_options,
865         .statfs         = ceph_statfs,
866         .umount_begin   = ceph_umount_begin,
867 };
868
869 /*
870  * Bootstrap mount by opening the root directory.  Note the mount
871  * @started time from caller, and time out if this takes too long.
872  */
873 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
874                                        const char *path,
875                                        unsigned long started)
876 {
877         struct ceph_mds_client *mdsc = fsc->mdsc;
878         struct ceph_mds_request *req = NULL;
879         int err;
880         struct dentry *root;
881
882         /* open dir */
883         dout("open_root_inode opening '%s'\n", path);
884         req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
885         if (IS_ERR(req))
886                 return ERR_CAST(req);
887         req->r_path1 = kstrdup(path, GFP_NOFS);
888         if (!req->r_path1) {
889                 root = ERR_PTR(-ENOMEM);
890                 goto out;
891         }
892
893         req->r_ino1.ino = CEPH_INO_ROOT;
894         req->r_ino1.snap = CEPH_NOSNAP;
895         req->r_started = started;
896         req->r_timeout = fsc->client->options->mount_timeout;
897         req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
898         req->r_num_caps = 2;
899         err = ceph_mdsc_do_request(mdsc, NULL, req);
900         if (err == 0) {
901                 struct inode *inode = req->r_target_inode;
902                 req->r_target_inode = NULL;
903                 dout("open_root_inode success\n");
904                 root = d_make_root(inode);
905                 if (!root) {
906                         root = ERR_PTR(-ENOMEM);
907                         goto out;
908                 }
909                 dout("open_root_inode success, root dentry is %p\n", root);
910         } else {
911                 root = ERR_PTR(err);
912         }
913 out:
914         ceph_mdsc_put_request(req);
915         return root;
916 }
917
918
919
920
921 /*
922  * mount: join the ceph cluster, and open root directory.
923  */
924 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
925 {
926         int err;
927         unsigned long started = jiffies;  /* note the start time */
928         struct dentry *root;
929
930         dout("mount start %p\n", fsc);
931         mutex_lock(&fsc->client->mount_mutex);
932
933         if (!fsc->sb->s_root) {
934                 const char *path;
935                 err = __ceph_open_session(fsc->client, started);
936                 if (err < 0)
937                         goto out;
938
939                 /* setup fscache */
940                 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
941                         err = ceph_fscache_register_fs(fsc);
942                         if (err < 0)
943                                 goto out;
944                 }
945
946                 if (!fsc->mount_options->server_path) {
947                         path = "";
948                         dout("mount opening path \\t\n");
949                 } else {
950                         path = fsc->mount_options->server_path + 1;
951                         dout("mount opening path %s\n", path);
952                 }
953
954                 err = ceph_fs_debugfs_init(fsc);
955                 if (err < 0)
956                         goto out;
957
958                 root = open_root_dentry(fsc, path, started);
959                 if (IS_ERR(root)) {
960                         err = PTR_ERR(root);
961                         goto out;
962                 }
963                 fsc->sb->s_root = dget(root);
964         } else {
965                 root = dget(fsc->sb->s_root);
966         }
967
968         fsc->mount_state = CEPH_MOUNT_MOUNTED;
969         dout("mount success\n");
970         mutex_unlock(&fsc->client->mount_mutex);
971         return root;
972
973 out:
974         mutex_unlock(&fsc->client->mount_mutex);
975         return ERR_PTR(err);
976 }
977
978 static int ceph_set_super(struct super_block *s, void *data)
979 {
980         struct ceph_fs_client *fsc = data;
981         int ret;
982
983         dout("set_super %p data %p\n", s, data);
984
985         s->s_flags = fsc->mount_options->sb_flags;
986         s->s_maxbytes = MAX_LFS_FILESIZE;
987
988         s->s_xattr = ceph_xattr_handlers;
989         s->s_fs_info = fsc;
990         fsc->sb = s;
991         fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
992
993         s->s_op = &ceph_super_ops;
994         s->s_d_op = &ceph_dentry_ops;
995         s->s_export_op = &ceph_export_ops;
996
997         s->s_time_gran = 1000;  /* 1000 ns == 1 us */
998
999         ret = set_anon_super(s, NULL);  /* what is that second arg for? */
1000         if (ret != 0)
1001                 goto fail;
1002
1003         return ret;
1004
1005 fail:
1006         s->s_fs_info = NULL;
1007         fsc->sb = NULL;
1008         return ret;
1009 }
1010
1011 /*
1012  * share superblock if same fs AND options
1013  */
1014 static int ceph_compare_super(struct super_block *sb, void *data)
1015 {
1016         struct ceph_fs_client *new = data;
1017         struct ceph_mount_options *fsopt = new->mount_options;
1018         struct ceph_options *opt = new->client->options;
1019         struct ceph_fs_client *other = ceph_sb_to_client(sb);
1020
1021         dout("ceph_compare_super %p\n", sb);
1022
1023         if (compare_mount_options(fsopt, opt, other)) {
1024                 dout("monitor(s)/mount options don't match\n");
1025                 return 0;
1026         }
1027         if ((opt->flags & CEPH_OPT_FSID) &&
1028             ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
1029                 dout("fsid doesn't match\n");
1030                 return 0;
1031         }
1032         if (fsopt->sb_flags != other->mount_options->sb_flags) {
1033                 dout("flags differ\n");
1034                 return 0;
1035         }
1036         return 1;
1037 }
1038
1039 /*
1040  * construct our own bdi so we can control readahead, etc.
1041  */
1042 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
1043
1044 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
1045 {
1046         int err;
1047
1048         err = super_setup_bdi_name(sb, "ceph-%ld",
1049                                    atomic_long_inc_return(&bdi_seq));
1050         if (err)
1051                 return err;
1052
1053         /* set ra_pages based on rasize mount option? */
1054         sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT;
1055
1056         /* set io_pages based on max osd read size */
1057         sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT;
1058
1059         return 0;
1060 }
1061
1062 static struct dentry *ceph_mount(struct file_system_type *fs_type,
1063                        int flags, const char *dev_name, void *data)
1064 {
1065         struct super_block *sb;
1066         struct ceph_fs_client *fsc;
1067         struct dentry *res;
1068         int err;
1069         int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
1070         struct ceph_mount_options *fsopt = NULL;
1071         struct ceph_options *opt = NULL;
1072
1073         dout("ceph_mount\n");
1074
1075 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1076         flags |= SB_POSIXACL;
1077 #endif
1078         err = parse_mount_options(&fsopt, &opt, flags, data, dev_name);
1079         if (err < 0) {
1080                 res = ERR_PTR(err);
1081                 goto out_final;
1082         }
1083
1084         /* create client (which we may/may not use) */
1085         fsc = create_fs_client(fsopt, opt);
1086         if (IS_ERR(fsc)) {
1087                 res = ERR_CAST(fsc);
1088                 goto out_final;
1089         }
1090
1091         err = ceph_mdsc_init(fsc);
1092         if (err < 0) {
1093                 res = ERR_PTR(err);
1094                 goto out;
1095         }
1096
1097         if (ceph_test_opt(fsc->client, NOSHARE))
1098                 compare_super = NULL;
1099         sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
1100         if (IS_ERR(sb)) {
1101                 res = ERR_CAST(sb);
1102                 goto out;
1103         }
1104
1105         if (ceph_sb_to_client(sb) != fsc) {
1106                 ceph_mdsc_destroy(fsc);
1107                 destroy_fs_client(fsc);
1108                 fsc = ceph_sb_to_client(sb);
1109                 dout("get_sb got existing client %p\n", fsc);
1110         } else {
1111                 dout("get_sb using new client %p\n", fsc);
1112                 err = ceph_setup_bdi(sb, fsc);
1113                 if (err < 0) {
1114                         res = ERR_PTR(err);
1115                         goto out_splat;
1116                 }
1117         }
1118
1119         res = ceph_real_mount(fsc);
1120         if (IS_ERR(res))
1121                 goto out_splat;
1122         dout("root %p inode %p ino %llx.%llx\n", res,
1123              d_inode(res), ceph_vinop(d_inode(res)));
1124         return res;
1125
1126 out_splat:
1127         ceph_mdsc_close_sessions(fsc->mdsc);
1128         deactivate_locked_super(sb);
1129         goto out_final;
1130
1131 out:
1132         ceph_mdsc_destroy(fsc);
1133         destroy_fs_client(fsc);
1134 out_final:
1135         dout("ceph_mount fail %ld\n", PTR_ERR(res));
1136         return res;
1137 }
1138
1139 static void ceph_kill_sb(struct super_block *s)
1140 {
1141         struct ceph_fs_client *fsc = ceph_sb_to_client(s);
1142         dev_t dev = s->s_dev;
1143
1144         dout("kill_sb %p\n", s);
1145
1146         ceph_mdsc_pre_umount(fsc->mdsc);
1147         flush_fs_workqueues(fsc);
1148
1149         generic_shutdown_super(s);
1150
1151         fsc->client->extra_mon_dispatch = NULL;
1152         ceph_fs_debugfs_cleanup(fsc);
1153
1154         ceph_fscache_unregister_fs(fsc);
1155
1156         ceph_mdsc_destroy(fsc);
1157
1158         destroy_fs_client(fsc);
1159         free_anon_bdev(dev);
1160 }
1161
1162 static struct file_system_type ceph_fs_type = {
1163         .owner          = THIS_MODULE,
1164         .name           = "ceph",
1165         .mount          = ceph_mount,
1166         .kill_sb        = ceph_kill_sb,
1167         .fs_flags       = FS_RENAME_DOES_D_MOVE,
1168 };
1169 MODULE_ALIAS_FS("ceph");
1170
1171 static int __init init_ceph(void)
1172 {
1173         int ret = init_caches();
1174         if (ret)
1175                 goto out;
1176
1177         ceph_flock_init();
1178         ceph_xattr_init();
1179         ret = register_filesystem(&ceph_fs_type);
1180         if (ret)
1181                 goto out_xattr;
1182
1183         pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
1184
1185         return 0;
1186
1187 out_xattr:
1188         ceph_xattr_exit();
1189         destroy_caches();
1190 out:
1191         return ret;
1192 }
1193
1194 static void __exit exit_ceph(void)
1195 {
1196         dout("exit_ceph\n");
1197         unregister_filesystem(&ceph_fs_type);
1198         ceph_xattr_exit();
1199         destroy_caches();
1200 }
1201
1202 module_init(init_ceph);
1203 module_exit(exit_ceph);
1204
1205 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
1206 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
1207 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
1208 MODULE_DESCRIPTION("Ceph filesystem for Linux");
1209 MODULE_LICENSE("GPL");