LXR linux/fs/ceph/super.c

   1
   2#include <linux/ceph/ceph_debug.h>
   3
   4#include <linux/backing-dev.h>
   5#include <linux/ctype.h>
   6#include <linux/fs.h>
   7#include <linux/inet.h>
   8#include <linux/in6.h>
   9#include <linux/module.h>
  10#include <linux/mount.h>
  11#include <linux/parser.h>
  12#include <linux/sched.h>
  13#include <linux/seq_file.h>
  14#include <linux/slab.h>
  15#include <linux/statfs.h>
  16#include <linux/string.h>
  17
  18#include "super.h"
  19#include "mds_client.h"
  20
  21#include <linux/ceph/decode.h>
  22#include <linux/ceph/mon_client.h>
  23#include <linux/ceph/auth.h>
  24#include <linux/ceph/debugfs.h>
  25
  26/*
  27 * Ceph superblock operations
  28 *
  29 * Handle the basics of mounting, unmounting.
  30 */
  31
  32/*
  33 * super ops
  34 */
  35static void ceph_put_super(struct super_block *s)
  36{
  37        struct ceph_fs_client *fsc = ceph_sb_to_client(s);
  38
  39        dout("put_super\n");
  40        ceph_mdsc_close_sessions(fsc->mdsc);
  41
  42        /*
  43         * ensure we release the bdi before put_anon_super releases
  44         * the device name.
  45         */
  46        if (s->s_bdi == &fsc->backing_dev_info) {
  47                bdi_unregister(&fsc->backing_dev_info);
  48                s->s_bdi = NULL;
  49        }
  50
  51        return;
  52}
  53
  54static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
  55{
  56        struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
  57        struct ceph_monmap *monmap = fsc->client->monc.monmap;
  58        struct ceph_statfs st;
  59        u64 fsid;
  60        int err;
  61
  62        dout("statfs\n");
  63        err = ceph_monc_do_statfs(&fsc->client->monc, &st);
  64        if (err < 0)
  65                return err;
  66
  67        /* fill in kstatfs */
  68        buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
  69
  70        /*
  71         * express utilization in terms of large blocks to avoid
  72         * overflow on 32-bit machines.
  73         */
  74        buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
  75        buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
  76        buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >>
  77                (CEPH_BLOCK_SHIFT-10);
  78        buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
  79
  80        buf->f_files = le64_to_cpu(st.num_objects);
  81        buf->f_ffree = -1;
  82        buf->f_namelen = NAME_MAX;
  83        buf->f_frsize = PAGE_CACHE_SIZE;
  84
  85        /* leave fsid little-endian, regardless of host endianness */
  86        fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
  87        buf->f_fsid.val[0] = fsid & 0xffffffff;
  88        buf->f_fsid.val[1] = fsid >> 32;
  89
  90        return 0;
  91}
  92
  93
  94static int ceph_sync_fs(struct super_block *sb, int wait)
  95{
  96        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
  97
  98        if (!wait) {
  99                dout("sync_fs (non-blocking)\n");
 100                ceph_flush_dirty_caps(fsc->mdsc);
 101                dout("sync_fs (non-blocking) done\n");
 102                return 0;
 103        }
 104
 105        dout("sync_fs (blocking)\n");
 106        ceph_osdc_sync(&fsc->client->osdc);
 107        ceph_mdsc_sync(fsc->mdsc);
 108        dout("sync_fs (blocking) done\n");
 109        return 0;
 110}
 111
 112/*
 113 * mount options
 114 */
 115enum {
 116        Opt_wsize,
 117        Opt_rsize,
 118        Opt_caps_wanted_delay_min,
 119        Opt_caps_wanted_delay_max,
 120        Opt_cap_release_safety,
 121        Opt_readdir_max_entries,
 122        Opt_readdir_max_bytes,
 123        Opt_congestion_kb,
 124        Opt_last_int,
 125        /* int args above */
 126        Opt_snapdirname,
 127        Opt_last_string,
 128        /* string args above */
 129        Opt_dirstat,
 130        Opt_nodirstat,
 131        Opt_rbytes,
 132        Opt_norbytes,
 133        Opt_noasyncreaddir,
 134};
 135
 136static match_table_t fsopt_tokens = {
 137        {Opt_wsize, "wsize=%d"},
 138        {Opt_rsize, "rsize=%d"},
 139        {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
 140        {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
 141        {Opt_cap_release_safety, "cap_release_safety=%d"},
 142        {Opt_readdir_max_entries, "readdir_max_entries=%d"},
 143        {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
 144        {Opt_congestion_kb, "write_congestion_kb=%d"},
 145        /* int args above */
 146        {Opt_snapdirname, "snapdirname=%s"},
 147        /* string args above */
 148        {Opt_dirstat, "dirstat"},
 149        {Opt_nodirstat, "nodirstat"},
 150        {Opt_rbytes, "rbytes"},
 151        {Opt_norbytes, "norbytes"},
 152        {Opt_noasyncreaddir, "noasyncreaddir"},
 153        {-1, NULL}
 154};
 155
 156static int parse_fsopt_token(char *c, void *private)
 157{
 158        struct ceph_mount_options *fsopt = private;
 159        substring_t argstr[MAX_OPT_ARGS];
 160        int token, intval, ret;
 161
 162        token = match_token((char *)c, fsopt_tokens, argstr);
 163        if (token < 0)
 164                return -EINVAL;
 165
 166        if (token < Opt_last_int) {
 167                ret = match_int(&argstr[0], &intval);
 168                if (ret < 0) {
 169                        pr_err("bad mount option arg (not int) "
 170                               "at '%s'\n", c);
 171                        return ret;
 172                }
 173                dout("got int token %d val %d\n", token, intval);
 174        } else if (token > Opt_last_int && token < Opt_last_string) {
 175                dout("got string token %d val %s\n", token,
 176                     argstr[0].from);
 177        } else {
 178                dout("got token %d\n", token);
 179        }
 180
 181        switch (token) {
 182        case Opt_snapdirname:
 183                kfree(fsopt->snapdir_name);
 184                fsopt->snapdir_name = kstrndup(argstr[0].from,
 185                                               argstr[0].to-argstr[0].from,
 186                                               GFP_KERNEL);
 187                if (!fsopt->snapdir_name)
 188                        return -ENOMEM;
 189                break;
 190
 191                /* misc */
 192        case Opt_wsize:
 193                fsopt->wsize = intval;
 194                break;
 195        case Opt_rsize:
 196                fsopt->rsize = intval;
 197                break;
 198        case Opt_caps_wanted_delay_min:
 199                fsopt->caps_wanted_delay_min = intval;
 200                break;
 201        case Opt_caps_wanted_delay_max:
 202                fsopt->caps_wanted_delay_max = intval;
 203                break;
 204        case Opt_readdir_max_entries:
 205                fsopt->max_readdir = intval;
 206                break;
 207        case Opt_readdir_max_bytes:
 208                fsopt->max_readdir_bytes = intval;
 209                break;
 210        case Opt_congestion_kb:
 211                fsopt->congestion_kb = intval;
 212                break;
 213        case Opt_dirstat:
 214                fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
 215                break;
 216        case Opt_nodirstat:
 217                fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
 218                break;
 219        case Opt_rbytes:
 220                fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
 221                break;
 222        case Opt_norbytes:
 223                fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
 224                break;
 225        case Opt_noasyncreaddir:
 226                fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
 227                break;
 228        default:
 229                BUG_ON(token);
 230        }
 231        return 0;
 232}
 233
 234static void destroy_mount_options(struct ceph_mount_options *args)
 235{
 236        dout("destroy_mount_options %p\n", args);
 237        kfree(args->snapdir_name);
 238        kfree(args);
 239}
 240
 241static int strcmp_null(const char *s1, const char *s2)
 242{
 243        if (!s1 && !s2)
 244                return 0;
 245        if (s1 && !s2)
 246                return -1;
 247        if (!s1 && s2)
 248                return 1;
 249        return strcmp(s1, s2);
 250}
 251
 252static int compare_mount_options(struct ceph_mount_options *new_fsopt,
 253                                 struct ceph_options *new_opt,
 254                                 struct ceph_fs_client *fsc)
 255{
 256        struct ceph_mount_options *fsopt1 = new_fsopt;
 257        struct ceph_mount_options *fsopt2 = fsc->mount_options;
 258        int ofs = offsetof(struct ceph_mount_options, snapdir_name);
 259        int ret;
 260
 261        ret = memcmp(fsopt1, fsopt2, ofs);
 262        if (ret)
 263                return ret;
 264
 265        ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
 266        if (ret)
 267                return ret;
 268
 269        return ceph_compare_options(new_opt, fsc->client);
 270}
 271
 272static int parse_mount_options(struct ceph_mount_options **pfsopt,
 273                               struct ceph_options **popt,
 274                               int flags, char *options,
 275                               const char *dev_name,
 276                               const char **path)
 277{
 278        struct ceph_mount_options *fsopt;
 279        const char *dev_name_end;
 280        int err = -ENOMEM;
 281
 282        fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
 283        if (!fsopt)
 284                return -ENOMEM;
 285
 286        dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
 287
 288        fsopt->sb_flags = flags;
 289        fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
 290
 291        fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
 292        fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
 293        fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
 294        fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
 295        fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
 296        fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
 297        fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
 298        fsopt->congestion_kb = default_congestion_kb();
 299        
 300        /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
 301        err = -EINVAL;
 302        if (!dev_name)
 303                goto out;
 304        *path = strstr(dev_name, ":/");
 305        if (*path == NULL) {
 306                pr_err("device name is missing path (no :/ in %s)\n",
 307                       dev_name);
 308                goto out;
 309        }
 310        dev_name_end = *path;
 311        dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
 312
 313        /* path on server */
 314        *path += 2;
 315        dout("server path '%s'\n", *path);
 316
 317        err = ceph_parse_options(popt, options, dev_name, dev_name_end,
 318                                 parse_fsopt_token, (void *)fsopt);
 319        if (err)
 320                goto out;
 321
 322        /* success */
 323        *pfsopt = fsopt;
 324        return 0;
 325
 326out:
 327        destroy_mount_options(fsopt);
 328        return err;
 329}
 330
 331/**
 332 * ceph_show_options - Show mount options in /proc/mounts
 333 * @m: seq_file to write to
 334 * @mnt: mount descriptor
 335 */
 336static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
 337{
 338        struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
 339        struct ceph_mount_options *fsopt = fsc->mount_options;
 340        struct ceph_options *opt = fsc->client->options;
 341
 342        if (opt->flags & CEPH_OPT_FSID)
 343                seq_printf(m, ",fsid=%pU", &opt->fsid);
 344        if (opt->flags & CEPH_OPT_NOSHARE)
 345                seq_puts(m, ",noshare");
 346        if (opt->flags & CEPH_OPT_NOCRC)
 347                seq_puts(m, ",nocrc");
 348
 349        if (opt->name)
 350                seq_printf(m, ",name=%s", opt->name);
 351        if (opt->secret)
 352                seq_puts(m, ",secret=<hidden>");
 353
 354        if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
 355                seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
 356        if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
 357                seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
 358        if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
 359                seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
 360        if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
 361                seq_printf(m, ",osdkeepalivetimeout=%d",
 362                           opt->osd_keepalive_timeout);
 363
 364        if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
 365                seq_puts(m, ",dirstat");
 366        if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
 367                seq_puts(m, ",norbytes");
 368        if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
 369                seq_puts(m, ",noasyncreaddir");
 370
 371        if (fsopt->wsize)
 372                seq_printf(m, ",wsize=%d", fsopt->wsize);
 373        if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
 374                seq_printf(m, ",rsize=%d", fsopt->rsize);
 375        if (fsopt->congestion_kb != default_congestion_kb())
 376                seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
 377        if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
 378                seq_printf(m, ",caps_wanted_delay_min=%d",
 379                         fsopt->caps_wanted_delay_min);
 380        if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
 381                seq_printf(m, ",caps_wanted_delay_max=%d",
 382                           fsopt->caps_wanted_delay_max);
 383        if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
 384                seq_printf(m, ",cap_release_safety=%d",
 385                           fsopt->cap_release_safety);
 386        if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
 387                seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
 388        if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
 389                seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
 390        if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
 391                seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
 392        return 0;
 393}
 394
 395/*
 396 * handle any mon messages the standard library doesn't understand.
 397 * return error if we don't either.
 398 */
 399static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
 400{
 401        struct ceph_fs_client *fsc = client->private;
 402        int type = le16_to_cpu(msg->hdr.type);
 403
 404        switch (type) {
 405        case CEPH_MSG_MDS_MAP:
 406                ceph_mdsc_handle_map(fsc->mdsc, msg);
 407                return 0;
 408
 409        default:
 410                return -1;
 411        }
 412}
 413
 414/*
 415 * create a new fs client
 416 */
 417struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 418                                        struct ceph_options *opt)
 419{
 420        struct ceph_fs_client *fsc;
 421        int err = -ENOMEM;
 422
 423        fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
 424        if (!fsc)
 425                return ERR_PTR(-ENOMEM);
 426
 427        fsc->client = ceph_create_client(opt, fsc);
 428        if (IS_ERR(fsc->client)) {
 429                err = PTR_ERR(fsc->client);
 430                goto fail;
 431        }
 432        fsc->client->extra_mon_dispatch = extra_mon_dispatch;
 433        fsc->client->supported_features |= CEPH_FEATURE_FLOCK |
 434                CEPH_FEATURE_DIRLAYOUTHASH;
 435        fsc->client->monc.want_mdsmap = 1;
 436
 437        fsc->mount_options = fsopt;
 438
 439        fsc->sb = NULL;
 440        fsc->mount_state = CEPH_MOUNT_MOUNTING;
 441
 442        atomic_long_set(&fsc->writeback_count, 0);
 443
 444        err = bdi_init(&fsc->backing_dev_info);
 445        if (err < 0)
 446                goto fail_client;
 447
 448        err = -ENOMEM;
 449        /*
 450         * The number of concurrent works can be high but they don't need
 451         * to be processed in parallel, limit concurrency.
 452         */
 453        fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
 454        if (fsc->wb_wq == NULL)
 455                goto fail_bdi;
 456        fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
 457        if (fsc->pg_inv_wq == NULL)
 458                goto fail_wb_wq;
 459        fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
 460        if (fsc->trunc_wq == NULL)
 461                goto fail_pg_inv_wq;
 462
 463        /* set up mempools */
 464        err = -ENOMEM;
 465        fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
 466                              fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
 467        if (!fsc->wb_pagevec_pool)
 468                goto fail_trunc_wq;
 469
 470        /* caps */
 471        fsc->min_caps = fsopt->max_readdir;
 472
 473        return fsc;
 474
 475fail_trunc_wq:
 476        destroy_workqueue(fsc->trunc_wq);
 477fail_pg_inv_wq:
 478        destroy_workqueue(fsc->pg_inv_wq);
 479fail_wb_wq:
 480        destroy_workqueue(fsc->wb_wq);
 481fail_bdi:
 482        bdi_destroy(&fsc->backing_dev_info);
 483fail_client:
 484        ceph_destroy_client(fsc->client);
 485fail:
 486        kfree(fsc);
 487        return ERR_PTR(err);
 488}
 489
 490void destroy_fs_client(struct ceph_fs_client *fsc)
 491{
 492        dout("destroy_fs_client %p\n", fsc);
 493
 494        destroy_workqueue(fsc->wb_wq);
 495        destroy_workqueue(fsc->pg_inv_wq);
 496        destroy_workqueue(fsc->trunc_wq);
 497
 498        bdi_destroy(&fsc->backing_dev_info);
 499
 500        mempool_destroy(fsc->wb_pagevec_pool);
 501
 502        destroy_mount_options(fsc->mount_options);
 503
 504        ceph_fs_debugfs_cleanup(fsc);
 505
 506        ceph_destroy_client(fsc->client);
 507
 508        kfree(fsc);
 509        dout("destroy_fs_client %p done\n", fsc);
 510}
 511
 512/*
 513 * caches
 514 */
 515struct kmem_cache *ceph_inode_cachep;
 516struct kmem_cache *ceph_cap_cachep;
 517struct kmem_cache *ceph_dentry_cachep;
 518struct kmem_cache *ceph_file_cachep;
 519
 520static void ceph_inode_init_once(void *foo)
 521{
 522        struct ceph_inode_info *ci = foo;
 523        inode_init_once(&ci->vfs_inode);
 524}
 525
 526static int __init init_caches(void)
 527{
 528        ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
 529                                      sizeof(struct ceph_inode_info),
 530                                      __alignof__(struct ceph_inode_info),
 531                                      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
 532                                      ceph_inode_init_once);
 533        if (ceph_inode_cachep == NULL)
 534                return -ENOMEM;
 535
 536        ceph_cap_cachep = KMEM_CACHE(ceph_cap,
 537                                     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 538        if (ceph_cap_cachep == NULL)
 539                goto bad_cap;
 540
 541        ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
 542                                        SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 543        if (ceph_dentry_cachep == NULL)
 544                goto bad_dentry;
 545
 546        ceph_file_cachep = KMEM_CACHE(ceph_file_info,
 547                                      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 548        if (ceph_file_cachep == NULL)
 549                goto bad_file;
 550
 551        return 0;
 552
 553bad_file:
 554        kmem_cache_destroy(ceph_dentry_cachep);
 555bad_dentry:
 556        kmem_cache_destroy(ceph_cap_cachep);
 557bad_cap:
 558        kmem_cache_destroy(ceph_inode_cachep);
 559        return -ENOMEM;
 560}
 561
 562static void destroy_caches(void)
 563{
 564        kmem_cache_destroy(ceph_inode_cachep);
 565        kmem_cache_destroy(ceph_cap_cachep);
 566        kmem_cache_destroy(ceph_dentry_cachep);
 567        kmem_cache_destroy(ceph_file_cachep);
 568}
 569
 570
 571/*
 572 * ceph_umount_begin - initiate forced umount.  Tear down down the
 573 * mount, skipping steps that may hang while waiting for server(s).
 574 */
 575static void ceph_umount_begin(struct super_block *sb)
 576{
 577        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 578
 579        dout("ceph_umount_begin - starting forced umount\n");
 580        if (!fsc)
 581                return;
 582        fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
 583        return;
 584}
 585
 586static const struct super_operations ceph_super_ops = {
 587        .alloc_inode    = ceph_alloc_inode,
 588        .destroy_inode  = ceph_destroy_inode,
 589        .write_inode    = ceph_write_inode,
 590        .sync_fs        = ceph_sync_fs,
 591        .put_super      = ceph_put_super,
 592        .show_options   = ceph_show_options,
 593        .statfs         = ceph_statfs,
 594        .umount_begin   = ceph_umount_begin,
 595};
 596
 597/*
 598 * Bootstrap mount by opening the root directory.  Note the mount
 599 * @started time from caller, and time out if this takes too long.
 600 */
 601static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 602                                       const char *path,
 603                                       unsigned long started)
 604{
 605        struct ceph_mds_client *mdsc = fsc->mdsc;
 606        struct ceph_mds_request *req = NULL;
 607        int err;
 608        struct dentry *root;
 609
 610        /* open dir */
 611        dout("open_root_inode opening '%s'\n", path);
 612        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
 613        if (IS_ERR(req))
 614                return ERR_CAST(req);
 615        req->r_path1 = kstrdup(path, GFP_NOFS);
 616        req->r_ino1.ino = CEPH_INO_ROOT;
 617        req->r_ino1.snap = CEPH_NOSNAP;
 618        req->r_started = started;
 619        req->r_timeout = fsc->client->options->mount_timeout * HZ;
 620        req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
 621        req->r_num_caps = 2;
 622        err = ceph_mdsc_do_request(mdsc, NULL, req);
 623        if (err == 0) {
 624                dout("open_root_inode success\n");
 625                if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
 626                    fsc->sb->s_root == NULL)
 627                        root = d_alloc_root(req->r_target_inode);
 628                else
 629                        root = d_obtain_alias(req->r_target_inode);
 630                req->r_target_inode = NULL;
 631                dout("open_root_inode success, root dentry is %p\n", root);
 632        } else {
 633                root = ERR_PTR(err);
 634        }
 635        ceph_mdsc_put_request(req);
 636        return root;
 637}
 638
 639
 640
 641
 642/*
 643 * mount: join the ceph cluster, and open root directory.
 644 */
 645static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 646                      const char *path)
 647{
 648        int err;
 649        unsigned long started = jiffies;  /* note the start time */
 650        struct dentry *root;
 651        int first = 0;   /* first vfsmount for this super_block */
 652
 653        dout("mount start\n");
 654        mutex_lock(&fsc->client->mount_mutex);
 655
 656        err = __ceph_open_session(fsc->client, started);
 657        if (err < 0)
 658                goto out;
 659
 660        dout("mount opening root\n");
 661        root = open_root_dentry(fsc, "", started);
 662        if (IS_ERR(root)) {
 663                err = PTR_ERR(root);
 664                goto out;
 665        }
 666        if (fsc->sb->s_root) {
 667                dput(root);
 668        } else {
 669                fsc->sb->s_root = root;
 670                first = 1;
 671
 672                err = ceph_fs_debugfs_init(fsc);
 673                if (err < 0)
 674                        goto fail;
 675        }
 676
 677        if (path[0] == 0) {
 678                dget(root);
 679        } else {
 680                dout("mount opening base mountpoint\n");
 681                root = open_root_dentry(fsc, path, started);
 682                if (IS_ERR(root)) {
 683                        err = PTR_ERR(root);
 684                        goto fail;
 685                }
 686        }
 687
 688        fsc->mount_state = CEPH_MOUNT_MOUNTED;
 689        dout("mount success\n");
 690        mutex_unlock(&fsc->client->mount_mutex);
 691        return root;
 692
 693out:
 694        mutex_unlock(&fsc->client->mount_mutex);
 695        return ERR_PTR(err);
 696
 697fail:
 698        if (first) {
 699                dput(fsc->sb->s_root);
 700                fsc->sb->s_root = NULL;
 701        }
 702        goto out;
 703}
 704
 705static int ceph_set_super(struct super_block *s, void *data)
 706{
 707        struct ceph_fs_client *fsc = data;
 708        int ret;
 709
 710        dout("set_super %p data %p\n", s, data);
 711
 712        s->s_flags = fsc->mount_options->sb_flags;
 713        s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
 714
 715        s->s_fs_info = fsc;
 716        fsc->sb = s;
 717
 718        s->s_op = &ceph_super_ops;
 719        s->s_export_op = &ceph_export_ops;
 720
 721        s->s_time_gran = 1000;  /* 1000 ns == 1 us */
 722
 723        ret = set_anon_super(s, NULL);  /* what is that second arg for? */
 724        if (ret != 0)
 725                goto fail;
 726
 727        return ret;
 728
 729fail:
 730        s->s_fs_info = NULL;
 731        fsc->sb = NULL;
 732        return ret;
 733}
 734
 735/*
 736 * share superblock if same fs AND options
 737 */
 738static int ceph_compare_super(struct super_block *sb, void *data)
 739{
 740        struct ceph_fs_client *new = data;
 741        struct ceph_mount_options *fsopt = new->mount_options;
 742        struct ceph_options *opt = new->client->options;
 743        struct ceph_fs_client *other = ceph_sb_to_client(sb);
 744
 745        dout("ceph_compare_super %p\n", sb);
 746
 747        if (compare_mount_options(fsopt, opt, other)) {
 748                dout("monitor(s)/mount options don't match\n");
 749                return 0;
 750        }
 751        if ((opt->flags & CEPH_OPT_FSID) &&
 752            ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
 753                dout("fsid doesn't match\n");
 754                return 0;
 755        }
 756        if (fsopt->sb_flags != other->mount_options->sb_flags) {
 757                dout("flags differ\n");
 758                return 0;
 759        }
 760        return 1;
 761}
 762
 763/*
 764 * construct our own bdi so we can control readahead, etc.
 765 */
 766static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 767
 768static int ceph_register_bdi(struct super_block *sb,
 769                             struct ceph_fs_client *fsc)
 770{
 771        int err;
 772
 773        /* set ra_pages based on rsize mount option? */
 774        if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
 775                fsc->backing_dev_info.ra_pages =
 776                        (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
 777                        >> PAGE_SHIFT;
 778        err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
 779                           atomic_long_inc_return(&bdi_seq));
 780        if (!err)
 781                sb->s_bdi = &fsc->backing_dev_info;
 782        return err;
 783}
 784
 785static struct dentry *ceph_mount(struct file_system_type *fs_type,
 786                       int flags, const char *dev_name, void *data)
 787{
 788        struct super_block *sb;
 789        struct ceph_fs_client *fsc;
 790        struct dentry *res;
 791        int err;
 792        int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
 793        const char *path = NULL;
 794        struct ceph_mount_options *fsopt = NULL;
 795        struct ceph_options *opt = NULL;
 796
 797        dout("ceph_mount\n");
 798        err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
 799        if (err < 0) {
 800                res = ERR_PTR(err);
 801                goto out_final;
 802        }
 803
 804        /* create client (which we may/may not use) */
 805        fsc = create_fs_client(fsopt, opt);
 806        if (IS_ERR(fsc)) {
 807                res = ERR_CAST(fsc);
 808                kfree(fsopt);
 809                kfree(opt);
 810                goto out_final;
 811        }
 812
 813        err = ceph_mdsc_init(fsc);
 814        if (err < 0) {
 815                res = ERR_PTR(err);
 816                goto out;
 817        }
 818
 819        if (ceph_test_opt(fsc->client, NOSHARE))
 820                compare_super = NULL;
 821        sb = sget(fs_type, compare_super, ceph_set_super, fsc);
 822        if (IS_ERR(sb)) {
 823                res = ERR_CAST(sb);
 824                goto out;
 825        }
 826
 827        if (ceph_sb_to_client(sb) != fsc) {
 828                ceph_mdsc_destroy(fsc);
 829                destroy_fs_client(fsc);
 830                fsc = ceph_sb_to_client(sb);
 831                dout("get_sb got existing client %p\n", fsc);
 832        } else {
 833                dout("get_sb using new client %p\n", fsc);
 834                err = ceph_register_bdi(sb, fsc);
 835                if (err < 0) {
 836                        res = ERR_PTR(err);
 837                        goto out_splat;
 838                }
 839        }
 840
 841        res = ceph_real_mount(fsc, path);
 842        if (IS_ERR(res))
 843                goto out_splat;
 844        dout("root %p inode %p ino %llx.%llx\n", res,
 845             res->d_inode, ceph_vinop(res->d_inode));
 846        return res;
 847
 848out_splat:
 849        ceph_mdsc_close_sessions(fsc->mdsc);
 850        deactivate_locked_super(sb);
 851        goto out_final;
 852
 853out:
 854        ceph_mdsc_destroy(fsc);
 855        destroy_fs_client(fsc);
 856out_final:
 857        dout("ceph_mount fail %ld\n", PTR_ERR(res));
 858        return res;
 859}
 860
 861static void ceph_kill_sb(struct super_block *s)
 862{
 863        struct ceph_fs_client *fsc = ceph_sb_to_client(s);
 864        dout("kill_sb %p\n", s);
 865        ceph_mdsc_pre_umount(fsc->mdsc);
 866        kill_anon_super(s);    /* will call put_super after sb is r/o */
 867        ceph_mdsc_destroy(fsc);
 868        destroy_fs_client(fsc);
 869}
 870
 871static struct file_system_type ceph_fs_type = {
 872        .owner          = THIS_MODULE,
 873        .name           = "ceph",
 874        .mount          = ceph_mount,
 875        .kill_sb        = ceph_kill_sb,
 876        .fs_flags       = FS_RENAME_DOES_D_MOVE,
 877};
 878
 879#define _STRINGIFY(x) #x
 880#define STRINGIFY(x) _STRINGIFY(x)
 881
 882static int __init init_ceph(void)
 883{
 884        int ret = init_caches();
 885        if (ret)
 886                goto out;
 887
 888        ret = register_filesystem(&ceph_fs_type);
 889        if (ret)
 890                goto out_icache;
 891
 892        pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
 893
 894        return 0;
 895
 896out_icache:
 897        destroy_caches();
 898out:
 899        return ret;
 900}
 901
 902static void __exit exit_ceph(void)
 903{
 904        dout("exit_ceph\n");
 905        unregister_filesystem(&ceph_fs_type);
 906        destroy_caches();
 907}
 908
 909module_init(init_ceph);
 910module_exit(exit_ceph);
 911
 912MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
 913MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
 914MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
 915MODULE_DESCRIPTION("Ceph filesystem for Linux");
 916MODULE_LICENSE("GPL");
 917