linux/fs/exofs/super.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2005, 2006
   3 * Avishay Traeger (avishay@gmail.com)
   4 * Copyright (C) 2008, 2009
   5 * Boaz Harrosh <bharrosh@panasas.com>
   6 *
   7 * Copyrights for code taken from ext2:
   8 *     Copyright (C) 1992, 1993, 1994, 1995
   9 *     Remy Card (card@masi.ibp.fr)
  10 *     Laboratoire MASI - Institut Blaise Pascal
  11 *     Universite Pierre et Marie Curie (Paris VI)
  12 *     from
  13 *     linux/fs/minix/inode.c
  14 *     Copyright (C) 1991, 1992  Linus Torvalds
  15 *
  16 * This file is part of exofs.
  17 *
  18 * exofs is free software; you can redistribute it and/or modify
  19 * it under the terms of the GNU General Public License as published by
  20 * the Free Software Foundation.  Since it is based on ext2, and the only
  21 * valid version of GPL for the Linux kernel is version 2, the only valid
  22 * version of GPL for exofs is version 2.
  23 *
  24 * exofs is distributed in the hope that it will be useful,
  25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  27 * GNU General Public License for more details.
  28 *
  29 * You should have received a copy of the GNU General Public License
  30 * along with exofs; if not, write to the Free Software
  31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  32 */
  33
  34#include <linux/string.h>
  35#include <linux/parser.h>
  36#include <linux/vfs.h>
  37#include <linux/random.h>
  38#include <linux/exportfs.h>
  39#include <linux/slab.h>
  40
  41#include "exofs.h"
  42
  43/******************************************************************************
  44 * MOUNT OPTIONS
  45 *****************************************************************************/
  46
  47/*
  48 * struct to hold what we get from mount options
  49 */
  50struct exofs_mountopt {
  51        const char *dev_name;
  52        uint64_t pid;
  53        int timeout;
  54};
  55
  56/*
  57 * exofs-specific mount-time options.
  58 */
  59enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
  60
  61/*
  62 * Our mount-time options.  These should ideally be 64-bit unsigned, but the
  63 * kernel's parsing functions do not currently support that.  32-bit should be
  64 * sufficient for most applications now.
  65 */
  66static match_table_t tokens = {
  67        {Opt_pid, "pid=%u"},
  68        {Opt_to, "to=%u"},
  69        {Opt_err, NULL}
  70};
  71
  72/*
  73 * The main option parsing method.  Also makes sure that all of the mandatory
  74 * mount options were set.
  75 */
  76static int parse_options(char *options, struct exofs_mountopt *opts)
  77{
  78        char *p;
  79        substring_t args[MAX_OPT_ARGS];
  80        int option;
  81        bool s_pid = false;
  82
  83        EXOFS_DBGMSG("parse_options %s\n", options);
  84        /* defaults */
  85        memset(opts, 0, sizeof(*opts));
  86        opts->timeout = BLK_DEFAULT_SG_TIMEOUT;
  87
  88        while ((p = strsep(&options, ",")) != NULL) {
  89                int token;
  90                char str[32];
  91
  92                if (!*p)
  93                        continue;
  94
  95                token = match_token(p, tokens, args);
  96                switch (token) {
  97                case Opt_pid:
  98                        if (0 == match_strlcpy(str, &args[0], sizeof(str)))
  99                                return -EINVAL;
 100                        opts->pid = simple_strtoull(str, NULL, 0);
 101                        if (opts->pid < EXOFS_MIN_PID) {
 102                                EXOFS_ERR("Partition ID must be >= %u",
 103                                          EXOFS_MIN_PID);
 104                                return -EINVAL;
 105                        }
 106                        s_pid = 1;
 107                        break;
 108                case Opt_to:
 109                        if (match_int(&args[0], &option))
 110                                return -EINVAL;
 111                        if (option <= 0) {
 112                                EXOFS_ERR("Timout must be > 0");
 113                                return -EINVAL;
 114                        }
 115                        opts->timeout = option * HZ;
 116                        break;
 117                }
 118        }
 119
 120        if (!s_pid) {
 121                EXOFS_ERR("Need to specify the following options:\n");
 122                EXOFS_ERR("    -o pid=pid_no_to_use\n");
 123                return -EINVAL;
 124        }
 125
 126        return 0;
 127}
 128
 129/******************************************************************************
 130 * INODE CACHE
 131 *****************************************************************************/
 132
 133/*
 134 * Our inode cache.  Isn't it pretty?
 135 */
 136static struct kmem_cache *exofs_inode_cachep;
 137
 138/*
 139 * Allocate an inode in the cache
 140 */
 141static struct inode *exofs_alloc_inode(struct super_block *sb)
 142{
 143        struct exofs_i_info *oi;
 144
 145        oi = kmem_cache_alloc(exofs_inode_cachep, GFP_KERNEL);
 146        if (!oi)
 147                return NULL;
 148
 149        oi->vfs_inode.i_version = 1;
 150        return &oi->vfs_inode;
 151}
 152
 153static void exofs_i_callback(struct rcu_head *head)
 154{
 155        struct inode *inode = container_of(head, struct inode, i_rcu);
 156        INIT_LIST_HEAD(&inode->i_dentry);
 157        kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
 158}
 159
 160/*
 161 * Remove an inode from the cache
 162 */
 163static void exofs_destroy_inode(struct inode *inode)
 164{
 165        call_rcu(&inode->i_rcu, exofs_i_callback);
 166}
 167
 168/*
 169 * Initialize the inode
 170 */
 171static void exofs_init_once(void *foo)
 172{
 173        struct exofs_i_info *oi = foo;
 174
 175        inode_init_once(&oi->vfs_inode);
 176}
 177
 178/*
 179 * Create and initialize the inode cache
 180 */
 181static int init_inodecache(void)
 182{
 183        exofs_inode_cachep = kmem_cache_create("exofs_inode_cache",
 184                                sizeof(struct exofs_i_info), 0,
 185                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
 186                                exofs_init_once);
 187        if (exofs_inode_cachep == NULL)
 188                return -ENOMEM;
 189        return 0;
 190}
 191
 192/*
 193 * Destroy the inode cache
 194 */
 195static void destroy_inodecache(void)
 196{
 197        kmem_cache_destroy(exofs_inode_cachep);
 198}
 199
 200/******************************************************************************
 201 * SUPERBLOCK FUNCTIONS
 202 *****************************************************************************/
 203static const struct super_operations exofs_sops;
 204static const struct export_operations exofs_export_ops;
 205
 206/*
 207 * Write the superblock to the OSD
 208 */
 209int exofs_sync_fs(struct super_block *sb, int wait)
 210{
 211        struct exofs_sb_info *sbi;
 212        struct exofs_fscb *fscb;
 213        struct exofs_io_state *ios;
 214        int ret = -ENOMEM;
 215
 216        lock_super(sb);
 217        sbi = sb->s_fs_info;
 218        fscb = &sbi->s_fscb;
 219
 220        ret = exofs_get_io_state(&sbi->layout, &ios);
 221        if (ret)
 222                goto out;
 223
 224        /* Note: We only write the changing part of the fscb. .i.e upto the
 225         *       the fscb->s_dev_table_oid member. There is no read-modify-write
 226         *       here.
 227         */
 228        ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
 229        memset(fscb, 0, ios->length);
 230        fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
 231        fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
 232        fscb->s_magic = cpu_to_le16(sb->s_magic);
 233        fscb->s_newfs = 0;
 234        fscb->s_version = EXOFS_FSCB_VER;
 235
 236        ios->obj.id = EXOFS_SUPER_ID;
 237        ios->offset = 0;
 238        ios->kern_buff = fscb;
 239        ios->cred = sbi->s_cred;
 240
 241        ret = exofs_sbi_write(ios);
 242        if (unlikely(ret)) {
 243                EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
 244                goto out;
 245        }
 246        sb->s_dirt = 0;
 247
 248out:
 249        EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
 250        exofs_put_io_state(ios);
 251        unlock_super(sb);
 252        return ret;
 253}
 254
 255static void exofs_write_super(struct super_block *sb)
 256{
 257        if (!(sb->s_flags & MS_RDONLY))
 258                exofs_sync_fs(sb, 1);
 259        else
 260                sb->s_dirt = 0;
 261}
 262
 263static void _exofs_print_device(const char *msg, const char *dev_path,
 264                                struct osd_dev *od, u64 pid)
 265{
 266        const struct osd_dev_info *odi = osduld_device_info(od);
 267
 268        printk(KERN_NOTICE "exofs: %s %s osd_name-%s pid-0x%llx\n",
 269                msg, dev_path ?: "", odi->osdname, _LLU(pid));
 270}
 271
 272void exofs_free_sbi(struct exofs_sb_info *sbi)
 273{
 274        while (sbi->layout.s_numdevs) {
 275                int i = --sbi->layout.s_numdevs;
 276                struct osd_dev *od = sbi->layout.s_ods[i];
 277
 278                if (od) {
 279                        sbi->layout.s_ods[i] = NULL;
 280                        osduld_put_device(od);
 281                }
 282        }
 283        kfree(sbi);
 284}
 285
 286/*
 287 * This function is called when the vfs is freeing the superblock.  We just
 288 * need to free our own part.
 289 */
 290static void exofs_put_super(struct super_block *sb)
 291{
 292        int num_pend;
 293        struct exofs_sb_info *sbi = sb->s_fs_info;
 294
 295        if (sb->s_dirt)
 296                exofs_write_super(sb);
 297
 298        /* make sure there are no pending commands */
 299        for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
 300             num_pend = atomic_read(&sbi->s_curr_pending)) {
 301                wait_queue_head_t wq;
 302                init_waitqueue_head(&wq);
 303                wait_event_timeout(wq,
 304                                  (atomic_read(&sbi->s_curr_pending) == 0),
 305                                  msecs_to_jiffies(100));
 306        }
 307
 308        _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
 309                            sbi->layout.s_pid);
 310
 311        bdi_destroy(&sbi->bdi);
 312        exofs_free_sbi(sbi);
 313        sb->s_fs_info = NULL;
 314}
 315
 316static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
 317                                    struct exofs_device_table *dt)
 318{
 319        u64 stripe_length;
 320
 321        sbi->data_map.odm_num_comps   =
 322                                le32_to_cpu(dt->dt_data_map.cb_num_comps);
 323        sbi->data_map.odm_stripe_unit =
 324                                le64_to_cpu(dt->dt_data_map.cb_stripe_unit);
 325        sbi->data_map.odm_group_width =
 326                                le32_to_cpu(dt->dt_data_map.cb_group_width);
 327        sbi->data_map.odm_group_depth =
 328                                le32_to_cpu(dt->dt_data_map.cb_group_depth);
 329        sbi->data_map.odm_mirror_cnt  =
 330                                le32_to_cpu(dt->dt_data_map.cb_mirror_cnt);
 331        sbi->data_map.odm_raid_algorithm  =
 332                                le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
 333
 334/* FIXME: Only raid0 for now. if not so, do not mount */
 335        if (sbi->data_map.odm_num_comps != numdevs) {
 336                EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n",
 337                          sbi->data_map.odm_num_comps, numdevs);
 338                return -EINVAL;
 339        }
 340        if (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) {
 341                EXOFS_ERR("Only RAID_0 for now\n");
 342                return -EINVAL;
 343        }
 344        if (0 != (numdevs % (sbi->data_map.odm_mirror_cnt + 1))) {
 345                EXOFS_ERR("Data Map wrong, numdevs=%d mirrors=%d\n",
 346                          numdevs, sbi->data_map.odm_mirror_cnt);
 347                return -EINVAL;
 348        }
 349
 350        if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) {
 351                EXOFS_ERR("Stripe Unit(0x%llx)"
 352                          " must be Multples of PAGE_SIZE(0x%lx)\n",
 353                          _LLU(sbi->data_map.odm_stripe_unit), PAGE_SIZE);
 354                return -EINVAL;
 355        }
 356
 357        sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit;
 358        sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1;
 359
 360        if (sbi->data_map.odm_group_width) {
 361                sbi->layout.group_width = sbi->data_map.odm_group_width;
 362                sbi->layout.group_depth = sbi->data_map.odm_group_depth;
 363                if (!sbi->layout.group_depth) {
 364                        EXOFS_ERR("group_depth == 0 && group_width != 0\n");
 365                        return -EINVAL;
 366                }
 367                sbi->layout.group_count = sbi->data_map.odm_num_comps /
 368                                                sbi->layout.mirrors_p1 /
 369                                                sbi->data_map.odm_group_width;
 370        } else {
 371                if (sbi->data_map.odm_group_depth) {
 372                        printk(KERN_NOTICE "Warning: group_depth ignored "
 373                                "group_width == 0 && group_depth == %d\n",
 374                                sbi->data_map.odm_group_depth);
 375                        sbi->data_map.odm_group_depth = 0;
 376                }
 377                sbi->layout.group_width = sbi->data_map.odm_num_comps /
 378                                                        sbi->layout.mirrors_p1;
 379                sbi->layout.group_depth = -1;
 380                sbi->layout.group_count = 1;
 381        }
 382
 383        stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit;
 384        if (stripe_length >= (1ULL << 32)) {
 385                EXOFS_ERR("Total Stripe length(0x%llx)"
 386                          " >= 32bit is not supported\n", _LLU(stripe_length));
 387                return -EINVAL;
 388        }
 389
 390        return 0;
 391}
 392
 393/* @odi is valid only as long as @fscb_dev is valid */
 394static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
 395                             struct osd_dev_info *odi)
 396{
 397        odi->systemid_len = le32_to_cpu(dt_dev->systemid_len);
 398        memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len);
 399
 400        odi->osdname_len = le32_to_cpu(dt_dev->osdname_len);
 401        odi->osdname = dt_dev->osdname;
 402
 403        /* FIXME support long names. Will need a _put function */
 404        if (dt_dev->long_name_offset)
 405                return -EINVAL;
 406
 407        /* Make sure osdname is printable!
 408         * mkexofs should give us space for a null-terminator else the
 409         * device-table is invalid.
 410         */
 411        if (unlikely(odi->osdname_len >= sizeof(dt_dev->osdname)))
 412                odi->osdname_len = sizeof(dt_dev->osdname) - 1;
 413        dt_dev->osdname[odi->osdname_len] = 0;
 414
 415        /* If it's all zeros something is bad we read past end-of-obj */
 416        return !(odi->systemid_len || odi->osdname_len);
 417}
 418
 419static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
 420                                       unsigned table_count)
 421{
 422        struct exofs_sb_info *sbi = *psbi;
 423        struct osd_dev *fscb_od;
 424        struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
 425                                 .id = EXOFS_DEVTABLE_ID};
 426        struct exofs_device_table *dt;
 427        unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
 428                                             sizeof(*dt);
 429        unsigned numdevs, i;
 430        int ret;
 431
 432        dt = kmalloc(table_bytes, GFP_KERNEL);
 433        if (unlikely(!dt)) {
 434                EXOFS_ERR("ERROR: allocating %x bytes for device table\n",
 435                          table_bytes);
 436                return -ENOMEM;
 437        }
 438
 439        fscb_od = sbi->layout.s_ods[0];
 440        sbi->layout.s_ods[0] = NULL;
 441        sbi->layout.s_numdevs = 0;
 442        ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
 443        if (unlikely(ret)) {
 444                EXOFS_ERR("ERROR: reading device table\n");
 445                goto out;
 446        }
 447
 448        numdevs = le64_to_cpu(dt->dt_num_devices);
 449        if (unlikely(!numdevs)) {
 450                ret = -EINVAL;
 451                goto out;
 452        }
 453        WARN_ON(table_count != numdevs);
 454
 455        ret = _read_and_match_data_map(sbi, numdevs, dt);
 456        if (unlikely(ret))
 457                goto out;
 458
 459        if (likely(numdevs > 1)) {
 460                unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]);
 461
 462                sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
 463                if (unlikely(!sbi)) {
 464                        ret = -ENOMEM;
 465                        goto out;
 466                }
 467                memset(&sbi->layout.s_ods[1], 0,
 468                       size - sizeof(sbi->layout.s_ods[0]));
 469                *psbi = sbi;
 470        }
 471
 472        for (i = 0; i < numdevs; i++) {
 473                struct exofs_fscb fscb;
 474                struct osd_dev_info odi;
 475                struct osd_dev *od;
 476
 477                if (exofs_devs_2_odi(&dt->dt_dev_table[i], &odi)) {
 478                        EXOFS_ERR("ERROR: Read all-zeros device entry\n");
 479                        ret = -EINVAL;
 480                        goto out;
 481                }
 482
 483                printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",
 484                       i, odi.osdname);
 485
 486                /* On all devices the device table is identical. The user can
 487                 * specify any one of the participating devices on the command
 488                 * line. We always keep them in device-table order.
 489                 */
 490                if (fscb_od && osduld_device_same(fscb_od, &odi)) {
 491                        sbi->layout.s_ods[i] = fscb_od;
 492                        ++sbi->layout.s_numdevs;
 493                        fscb_od = NULL;
 494                        continue;
 495                }
 496
 497                od = osduld_info_lookup(&odi);
 498                if (unlikely(IS_ERR(od))) {
 499                        ret = PTR_ERR(od);
 500                        EXOFS_ERR("ERROR: device requested is not found "
 501                                  "osd_name-%s =>%d\n", odi.osdname, ret);
 502                        goto out;
 503                }
 504
 505                sbi->layout.s_ods[i] = od;
 506                ++sbi->layout.s_numdevs;
 507
 508                /* Read the fscb of the other devices to make sure the FS
 509                 * partition is there.
 510                 */
 511                ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb,
 512                                      sizeof(fscb));
 513                if (unlikely(ret)) {
 514                        EXOFS_ERR("ERROR: Malformed participating device "
 515                                  "error reading fscb osd_name-%s\n",
 516                                  odi.osdname);
 517                        goto out;
 518                }
 519
 520                /* TODO: verify other information is correct and FS-uuid
 521                 *       matches. Benny what did you say about device table
 522                 *       generation and old devices?
 523                 */
 524        }
 525
 526out:
 527        kfree(dt);
 528        if (unlikely(!ret && fscb_od)) {
 529                EXOFS_ERR(
 530                      "ERROR: Bad device-table container device not present\n");
 531                osduld_put_device(fscb_od);
 532                ret = -EINVAL;
 533        }
 534
 535        return ret;
 536}
 537
 538/*
 539 * Read the superblock from the OSD and fill in the fields
 540 */
 541static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 542{
 543        struct inode *root;
 544        struct exofs_mountopt *opts = data;
 545        struct exofs_sb_info *sbi;      /*extended info                  */
 546        struct osd_dev *od;             /* Master device                 */
 547        struct exofs_fscb fscb;         /*on-disk superblock info        */
 548        struct osd_obj_id obj;
 549        unsigned table_count;
 550        int ret;
 551
 552        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 553        if (!sbi)
 554                return -ENOMEM;
 555
 556        ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
 557        if (ret)
 558                goto free_bdi;
 559
 560        /* use mount options to fill superblock */
 561        od = osduld_path_lookup(opts->dev_name);
 562        if (IS_ERR(od)) {
 563                ret = PTR_ERR(od);
 564                goto free_sbi;
 565        }
 566
 567        /* Default layout in case we do not have a device-table */
 568        sbi->layout.stripe_unit = PAGE_SIZE;
 569        sbi->layout.mirrors_p1 = 1;
 570        sbi->layout.group_width = 1;
 571        sbi->layout.group_depth = -1;
 572        sbi->layout.group_count = 1;
 573        sbi->layout.s_ods[0] = od;
 574        sbi->layout.s_numdevs = 1;
 575        sbi->layout.s_pid = opts->pid;
 576        sbi->s_timeout = opts->timeout;
 577
 578        /* fill in some other data by hand */
 579        memset(sb->s_id, 0, sizeof(sb->s_id));
 580        strcpy(sb->s_id, "exofs");
 581        sb->s_blocksize = EXOFS_BLKSIZE;
 582        sb->s_blocksize_bits = EXOFS_BLKSHIFT;
 583        sb->s_maxbytes = MAX_LFS_FILESIZE;
 584        atomic_set(&sbi->s_curr_pending, 0);
 585        sb->s_bdev = NULL;
 586        sb->s_dev = 0;
 587
 588        obj.partition = sbi->layout.s_pid;
 589        obj.id = EXOFS_SUPER_ID;
 590        exofs_make_credential(sbi->s_cred, &obj);
 591
 592        ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
 593        if (unlikely(ret))
 594                goto free_sbi;
 595
 596        sb->s_magic = le16_to_cpu(fscb.s_magic);
 597        sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
 598        sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
 599
 600        /* make sure what we read from the object store is correct */
 601        if (sb->s_magic != EXOFS_SUPER_MAGIC) {
 602                if (!silent)
 603                        EXOFS_ERR("ERROR: Bad magic value\n");
 604                ret = -EINVAL;
 605                goto free_sbi;
 606        }
 607        if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) {
 608                EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
 609                          EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
 610                ret = -EINVAL;
 611                goto free_sbi;
 612        }
 613
 614        /* start generation numbers from a random point */
 615        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 616        spin_lock_init(&sbi->s_next_gen_lock);
 617
 618        table_count = le64_to_cpu(fscb.s_dev_table_count);
 619        if (table_count) {
 620                ret = exofs_read_lookup_dev_table(&sbi, table_count);
 621                if (unlikely(ret))
 622                        goto free_sbi;
 623        }
 624
 625        /* set up operation vectors */
 626        sb->s_bdi = &sbi->bdi;
 627        sb->s_fs_info = sbi;
 628        sb->s_op = &exofs_sops;
 629        sb->s_export_op = &exofs_export_ops;
 630        root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
 631        if (IS_ERR(root)) {
 632                EXOFS_ERR("ERROR: exofs_iget failed\n");
 633                ret = PTR_ERR(root);
 634                goto free_sbi;
 635        }
 636        sb->s_root = d_alloc_root(root);
 637        if (!sb->s_root) {
 638                iput(root);
 639                EXOFS_ERR("ERROR: get root inode failed\n");
 640                ret = -ENOMEM;
 641                goto free_sbi;
 642        }
 643
 644        if (!S_ISDIR(root->i_mode)) {
 645                dput(sb->s_root);
 646                sb->s_root = NULL;
 647                EXOFS_ERR("ERROR: corrupt root inode (mode = %hd)\n",
 648                       root->i_mode);
 649                ret = -EINVAL;
 650                goto free_sbi;
 651        }
 652
 653        _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
 654                            sbi->layout.s_pid);
 655        return 0;
 656
 657free_sbi:
 658        bdi_destroy(&sbi->bdi);
 659free_bdi:
 660        EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
 661                  opts->dev_name, sbi->layout.s_pid, ret);
 662        exofs_free_sbi(sbi);
 663        return ret;
 664}
 665
 666/*
 667 * Set up the superblock (calls exofs_fill_super eventually)
 668 */
 669static struct dentry *exofs_mount(struct file_system_type *type,
 670                          int flags, const char *dev_name,
 671                          void *data)
 672{
 673        struct exofs_mountopt opts;
 674        int ret;
 675
 676        ret = parse_options(data, &opts);
 677        if (ret)
 678                return ERR_PTR(ret);
 679
 680        opts.dev_name = dev_name;
 681        return mount_nodev(type, flags, &opts, exofs_fill_super);
 682}
 683
 684/*
 685 * Return information about the file system state in the buffer.  This is used
 686 * by the 'df' command, for example.
 687 */
 688static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
 689{
 690        struct super_block *sb = dentry->d_sb;
 691        struct exofs_sb_info *sbi = sb->s_fs_info;
 692        struct exofs_io_state *ios;
 693        struct osd_attr attrs[] = {
 694                ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
 695                        OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
 696                ATTR_DEF(OSD_APAGE_PARTITION_INFORMATION,
 697                        OSD_ATTR_PI_USED_CAPACITY, sizeof(__be64)),
 698        };
 699        uint64_t capacity = ULLONG_MAX;
 700        uint64_t used = ULLONG_MAX;
 701        uint8_t cred_a[OSD_CAP_LEN];
 702        int ret;
 703
 704        ret = exofs_get_io_state(&sbi->layout, &ios);
 705        if (ret) {
 706                EXOFS_DBGMSG("exofs_get_io_state failed.\n");
 707                return ret;
 708        }
 709
 710        exofs_make_credential(cred_a, &ios->obj);
 711        ios->cred = sbi->s_cred;
 712        ios->in_attr = attrs;
 713        ios->in_attr_len = ARRAY_SIZE(attrs);
 714
 715        ret = exofs_sbi_read(ios);
 716        if (unlikely(ret))
 717                goto out;
 718
 719        ret = extract_attr_from_ios(ios, &attrs[0]);
 720        if (likely(!ret)) {
 721                capacity = get_unaligned_be64(attrs[0].val_ptr);
 722                if (unlikely(!capacity))
 723                        capacity = ULLONG_MAX;
 724        } else
 725                EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
 726
 727        ret = extract_attr_from_ios(ios, &attrs[1]);
 728        if (likely(!ret))
 729                used = get_unaligned_be64(attrs[1].val_ptr);
 730        else
 731                EXOFS_DBGMSG("exofs_statfs: get used-space failed.\n");
 732
 733        /* fill in the stats buffer */
 734        buf->f_type = EXOFS_SUPER_MAGIC;
 735        buf->f_bsize = EXOFS_BLKSIZE;
 736        buf->f_blocks = capacity >> 9;
 737        buf->f_bfree = (capacity - used) >> 9;
 738        buf->f_bavail = buf->f_bfree;
 739        buf->f_files = sbi->s_numfiles;
 740        buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles;
 741        buf->f_namelen = EXOFS_NAME_LEN;
 742
 743out:
 744        exofs_put_io_state(ios);
 745        return ret;
 746}
 747
 748static const struct super_operations exofs_sops = {
 749        .alloc_inode    = exofs_alloc_inode,
 750        .destroy_inode  = exofs_destroy_inode,
 751        .write_inode    = exofs_write_inode,
 752        .evict_inode    = exofs_evict_inode,
 753        .put_super      = exofs_put_super,
 754        .write_super    = exofs_write_super,
 755        .sync_fs        = exofs_sync_fs,
 756        .statfs         = exofs_statfs,
 757};
 758
 759/******************************************************************************
 760 * EXPORT OPERATIONS
 761 *****************************************************************************/
 762
 763struct dentry *exofs_get_parent(struct dentry *child)
 764{
 765        unsigned long ino = exofs_parent_ino(child);
 766
 767        if (!ino)
 768                return NULL;
 769
 770        return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));
 771}
 772
 773static struct inode *exofs_nfs_get_inode(struct super_block *sb,
 774                u64 ino, u32 generation)
 775{
 776        struct inode *inode;
 777
 778        inode = exofs_iget(sb, ino);
 779        if (IS_ERR(inode))
 780                return ERR_CAST(inode);
 781        if (generation && inode->i_generation != generation) {
 782                /* we didn't find the right inode.. */
 783                iput(inode);
 784                return ERR_PTR(-ESTALE);
 785        }
 786        return inode;
 787}
 788
 789static struct dentry *exofs_fh_to_dentry(struct super_block *sb,
 790                                struct fid *fid, int fh_len, int fh_type)
 791{
 792        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 793                                    exofs_nfs_get_inode);
 794}
 795
 796static struct dentry *exofs_fh_to_parent(struct super_block *sb,
 797                                struct fid *fid, int fh_len, int fh_type)
 798{
 799        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 800                                    exofs_nfs_get_inode);
 801}
 802
 803static const struct export_operations exofs_export_ops = {
 804        .fh_to_dentry = exofs_fh_to_dentry,
 805        .fh_to_parent = exofs_fh_to_parent,
 806        .get_parent = exofs_get_parent,
 807};
 808
 809/******************************************************************************
 810 * INSMOD/RMMOD
 811 *****************************************************************************/
 812
 813/*
 814 * struct that describes this file system
 815 */
 816static struct file_system_type exofs_type = {
 817        .owner          = THIS_MODULE,
 818        .name           = "exofs",
 819        .mount          = exofs_mount,
 820        .kill_sb        = generic_shutdown_super,
 821};
 822
 823static int __init init_exofs(void)
 824{
 825        int err;
 826
 827        err = init_inodecache();
 828        if (err)
 829                goto out;
 830
 831        err = register_filesystem(&exofs_type);
 832        if (err)
 833                goto out_d;
 834
 835        return 0;
 836out_d:
 837        destroy_inodecache();
 838out:
 839        return err;
 840}
 841
 842static void __exit exit_exofs(void)
 843{
 844        unregister_filesystem(&exofs_type);
 845        destroy_inodecache();
 846}
 847
 848MODULE_AUTHOR("Avishay Traeger <avishay@gmail.com>");
 849MODULE_DESCRIPTION("exofs");
 850MODULE_LICENSE("GPL");
 851
 852module_init(init_exofs)
 853module_exit(exit_exofs)
 854