linux/drivers/block/osdblk.c
<<
>>
Prefs
   1
   2/*
   3   osdblk.c -- Export a single SCSI OSD object as a Linux block device
   4
   5
   6   Copyright 2009 Red Hat, Inc.
   7
   8   This program is free software; you can redistribute it and/or modify
   9   it under the terms of the GNU General Public License as published by
  10   the Free Software Foundation.
  11
  12   This program is distributed in the hope that it will be useful,
  13   but WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15   GNU General Public License for more details.
  16
  17   You should have received a copy of the GNU General Public License
  18   along with this program; see the file COPYING.  If not, write to
  19   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  20
  21
  22   Instructions for use
  23   --------------------
  24
  25   1) Map a Linux block device to an existing OSD object.
  26
  27      In this example, we will use partition id 1234, object id 5678,
  28      OSD device /dev/osd1.
  29
  30      $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add
  31
  32
  33   2) List all active blkdev<->object mappings.
  34
  35      In this example, we have performed step #1 twice, creating two blkdevs,
  36      mapped to two separate OSD objects.
  37
  38      $ cat /sys/class/osdblk/list
  39      0 174 1234 5678 /dev/osd1
  40      1 179 1994 897123 /dev/osd0
  41
  42      The columns, in order, are:
  43      - blkdev unique id
  44      - blkdev assigned major
  45      - OSD object partition id
  46      - OSD object id
  47      - OSD device
  48
  49
  50   3) Remove an active blkdev<->object mapping.
  51
  52      In this example, we remove the mapping with blkdev unique id 1.
  53
  54      $ echo 1 > /sys/class/osdblk/remove
  55
  56
  57   NOTE:  The actual creation and deletion of OSD objects is outside the scope
  58   of this driver.
  59
  60 */
  61
  62#include <linux/kernel.h>
  63#include <linux/device.h>
  64#include <linux/module.h>
  65#include <linux/fs.h>
  66#include <linux/slab.h>
  67#include <scsi/osd_initiator.h>
  68#include <scsi/osd_attributes.h>
  69#include <scsi/osd_sec.h>
  70#include <scsi/scsi_device.h>
  71
  72#define DRV_NAME "osdblk"
  73#define PFX DRV_NAME ": "
  74
  75/* #define _OSDBLK_DEBUG */
  76#ifdef _OSDBLK_DEBUG
  77#define OSDBLK_DEBUG(fmt, a...) \
  78        printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a)
  79#else
  80#define OSDBLK_DEBUG(fmt, a...) \
  81        do { if (0) printk(fmt, ##a); } while (0)
  82#endif
  83
  84MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
  85MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko");
  86MODULE_LICENSE("GPL");
  87
  88struct osdblk_device;
  89
  90enum {
  91        OSDBLK_MINORS_PER_MAJOR = 256,          /* max minors per blkdev */
  92        OSDBLK_MAX_REQ          = 32,           /* max parallel requests */
  93        OSDBLK_OP_TIMEOUT       = 4 * 60,       /* sync OSD req timeout */
  94};
  95
  96struct osdblk_request {
  97        struct request          *rq;            /* blk layer request */
  98        struct bio              *bio;           /* cloned bio */
  99        struct osdblk_device    *osdev;         /* associated blkdev */
 100};
 101
 102struct osdblk_device {
 103        int                     id;             /* blkdev unique id */
 104
 105        int                     major;          /* blkdev assigned major */
 106        struct gendisk          *disk;          /* blkdev's gendisk and rq */
 107        struct request_queue    *q;
 108
 109        struct osd_dev          *osd;           /* associated OSD */
 110
 111        char                    name[32];       /* blkdev name, e.g. osdblk34 */
 112
 113        spinlock_t              lock;           /* queue lock */
 114
 115        struct osd_obj_id       obj;            /* OSD partition, obj id */
 116        uint8_t                 obj_cred[OSD_CAP_LEN]; /* OSD cred */
 117
 118        struct osdblk_request   req[OSDBLK_MAX_REQ]; /* request table */
 119
 120        struct list_head        node;
 121
 122        char                    osd_path[0];    /* OSD device path */
 123};
 124
 125static struct class *class_osdblk;              /* /sys/class/osdblk */
 126static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
 127static LIST_HEAD(osdblkdev_list);
 128
 129static const struct block_device_operations osdblk_bd_ops = {
 130        .owner          = THIS_MODULE,
 131};
 132
 133static const struct osd_attr g_attr_logical_length = ATTR_DEF(
 134        OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
 135
 136static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
 137                                   const struct osd_obj_id *obj)
 138{
 139        osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
 140}
 141
 142/* copied from exofs; move to libosd? */
 143/*
 144 * Perform a synchronous OSD operation.  copied from exofs; move to libosd?
 145 */
 146static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
 147{
 148        int ret;
 149
 150        or->timeout = timeout;
 151        ret = osd_finalize_request(or, 0, credential, NULL);
 152        if (ret)
 153                return ret;
 154
 155        ret = osd_execute_request(or);
 156
 157        /* osd_req_decode_sense(or, ret); */
 158        return ret;
 159}
 160
 161/*
 162 * Perform an asynchronous OSD operation.  copied from exofs; move to libosd?
 163 */
 164static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done,
 165                   void *caller_context, u8 *cred)
 166{
 167        int ret;
 168
 169        ret = osd_finalize_request(or, 0, cred, NULL);
 170        if (ret)
 171                return ret;
 172
 173        ret = osd_execute_request_async(or, async_done, caller_context);
 174
 175        return ret;
 176}
 177
 178/* copied from exofs; move to libosd? */
 179static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
 180{
 181        struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
 182        void *iter = NULL;
 183        int nelem;
 184
 185        do {
 186                nelem = 1;
 187                osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
 188                if ((cur_attr.attr_page == attr->attr_page) &&
 189                    (cur_attr.attr_id == attr->attr_id)) {
 190                        attr->len = cur_attr.len;
 191                        attr->val_ptr = cur_attr.val_ptr;
 192                        return 0;
 193                }
 194        } while (iter);
 195
 196        return -EIO;
 197}
 198
 199static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out)
 200{
 201        struct osd_request *or;
 202        struct osd_attr attr;
 203        int ret;
 204
 205        /* start request */
 206        or = osd_start_request(osdev->osd, GFP_KERNEL);
 207        if (!or)
 208                return -ENOMEM;
 209
 210        /* create a get-attributes(length) request */
 211        osd_req_get_attributes(or, &osdev->obj);
 212
 213        osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
 214
 215        /* execute op synchronously */
 216        ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred);
 217        if (ret)
 218                goto out;
 219
 220        /* extract length from returned attribute info */
 221        attr = g_attr_logical_length;
 222        ret = extract_attr_from_req(or, &attr);
 223        if (ret)
 224                goto out;
 225
 226        *size_out = get_unaligned_be64(attr.val_ptr);
 227
 228out:
 229        osd_end_request(or);
 230        return ret;
 231
 232}
 233
 234static void osdblk_osd_complete(struct osd_request *or, void *private)
 235{
 236        struct osdblk_request *orq = private;
 237        struct osd_sense_info osi;
 238        int ret = osd_req_decode_sense(or, &osi);
 239
 240        if (ret) {
 241                ret = -EIO;
 242                OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret);
 243        }
 244
 245        /* complete OSD request */
 246        osd_end_request(or);
 247
 248        /* complete request passed to osdblk by block layer */
 249        __blk_end_request_all(orq->rq, ret);
 250}
 251
 252static void bio_chain_put(struct bio *chain)
 253{
 254        struct bio *tmp;
 255
 256        while (chain) {
 257                tmp = chain;
 258                chain = chain->bi_next;
 259
 260                bio_put(tmp);
 261        }
 262}
 263
 264static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
 265{
 266        struct bio *tmp, *new_chain = NULL, *tail = NULL;
 267
 268        while (old_chain) {
 269                tmp = bio_clone_kmalloc(old_chain, gfpmask);
 270                if (!tmp)
 271                        goto err_out;
 272
 273                tmp->bi_bdev = NULL;
 274                gfpmask &= ~__GFP_WAIT;
 275                tmp->bi_next = NULL;
 276
 277                if (!new_chain)
 278                        new_chain = tail = tmp;
 279                else {
 280                        tail->bi_next = tmp;
 281                        tail = tmp;
 282                }
 283
 284                old_chain = old_chain->bi_next;
 285        }
 286
 287        return new_chain;
 288
 289err_out:
 290        OSDBLK_DEBUG("bio_chain_clone with err\n");
 291        bio_chain_put(new_chain);
 292        return NULL;
 293}
 294
 295static void osdblk_rq_fn(struct request_queue *q)
 296{
 297        struct osdblk_device *osdev = q->queuedata;
 298
 299        while (1) {
 300                struct request *rq;
 301                struct osdblk_request *orq;
 302                struct osd_request *or;
 303                struct bio *bio;
 304                bool do_write, do_flush;
 305
 306                /* peek at request from block layer */
 307                rq = blk_fetch_request(q);
 308                if (!rq)
 309                        break;
 310
 311                /* filter out block requests we don't understand */
 312                if (rq->cmd_type != REQ_TYPE_FS) {
 313                        blk_end_request_all(rq, 0);
 314                        continue;
 315                }
 316
 317                /* deduce our operation (read, write, flush) */
 318                /* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
 319                 * into a clearly defined set of RPC commands:
 320                 * read, write, flush, scsi command, power mgmt req,
 321                 * driver-specific, etc.
 322                 */
 323
 324                do_flush = rq->cmd_flags & REQ_FLUSH;
 325                do_write = (rq_data_dir(rq) == WRITE);
 326
 327                if (!do_flush) { /* osd_flush does not use a bio */
 328                        /* a bio clone to be passed down to OSD request */
 329                        bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
 330                        if (!bio)
 331                                break;
 332                } else
 333                        bio = NULL;
 334
 335                /* alloc internal OSD request, for OSD command execution */
 336                or = osd_start_request(osdev->osd, GFP_ATOMIC);
 337                if (!or) {
 338                        bio_chain_put(bio);
 339                        OSDBLK_DEBUG("osd_start_request with err\n");
 340                        break;
 341                }
 342
 343                orq = &osdev->req[rq->tag];
 344                orq->rq = rq;
 345                orq->bio = bio;
 346                orq->osdev = osdev;
 347
 348                /* init OSD command: flush, write or read */
 349                if (do_flush)
 350                        osd_req_flush_object(or, &osdev->obj,
 351                                             OSD_CDB_FLUSH_ALL, 0, 0);
 352                else if (do_write)
 353                        osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
 354                                      bio, blk_rq_bytes(rq));
 355                else
 356                        osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
 357                                     bio, blk_rq_bytes(rq));
 358
 359                OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n",
 360                        do_flush ? "flush" : do_write ?
 361                                "write" : "read", blk_rq_bytes(rq),
 362                        blk_rq_pos(rq) * 512ULL);
 363
 364                /* begin OSD command execution */
 365                if (osd_async_op(or, osdblk_osd_complete, orq,
 366                                 osdev->obj_cred)) {
 367                        osd_end_request(or);
 368                        blk_requeue_request(q, rq);
 369                        bio_chain_put(bio);
 370                        OSDBLK_DEBUG("osd_execute_request_async with err\n");
 371                        break;
 372                }
 373
 374                /* remove the special 'flush' marker, now that the command
 375                 * is executing
 376                 */
 377                rq->special = NULL;
 378        }
 379}
 380
 381static void osdblk_free_disk(struct osdblk_device *osdev)
 382{
 383        struct gendisk *disk = osdev->disk;
 384
 385        if (!disk)
 386                return;
 387
 388        if (disk->flags & GENHD_FL_UP)
 389                del_gendisk(disk);
 390        if (disk->queue)
 391                blk_cleanup_queue(disk->queue);
 392        put_disk(disk);
 393}
 394
 395static int osdblk_init_disk(struct osdblk_device *osdev)
 396{
 397        struct gendisk *disk;
 398        struct request_queue *q;
 399        int rc;
 400        u64 obj_size = 0;
 401
 402        /* contact OSD, request size info about the object being mapped */
 403        rc = osdblk_get_obj_size(osdev, &obj_size);
 404        if (rc)
 405                return rc;
 406
 407        /* create gendisk info */
 408        disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR);
 409        if (!disk)
 410                return -ENOMEM;
 411
 412        sprintf(disk->disk_name, DRV_NAME "%d", osdev->id);
 413        disk->major = osdev->major;
 414        disk->first_minor = 0;
 415        disk->fops = &osdblk_bd_ops;
 416        disk->private_data = osdev;
 417
 418        /* init rq */
 419        q = blk_init_queue(osdblk_rq_fn, &osdev->lock);
 420        if (!q) {
 421                put_disk(disk);
 422                return -ENOMEM;
 423        }
 424
 425        /* switch queue to TCQ mode; allocate tag map */
 426        rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL);
 427        if (rc) {
 428                blk_cleanup_queue(q);
 429                put_disk(disk);
 430                return rc;
 431        }
 432
 433        /* Set our limits to the lower device limits, because osdblk cannot
 434         * sleep when allocating a lower-request and therefore cannot be
 435         * bouncing.
 436         */
 437        blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
 438
 439        blk_queue_prep_rq(q, blk_queue_start_tag);
 440        blk_queue_flush(q, REQ_FLUSH);
 441
 442        disk->queue = q;
 443
 444        q->queuedata = osdev;
 445
 446        osdev->disk = disk;
 447        osdev->q = q;
 448
 449        /* finally, announce the disk to the world */
 450        set_capacity(disk, obj_size / 512ULL);
 451        add_disk(disk);
 452
 453        printk(KERN_INFO "%s: Added of size 0x%llx\n",
 454                disk->disk_name, (unsigned long long)obj_size);
 455
 456        return 0;
 457}
 458
 459/********************************************************************
 460 * /sys/class/osdblk/
 461 *                   add        map OSD object to blkdev
 462 *                   remove     unmap OSD object
 463 *                   list       show mappings
 464 *******************************************************************/
 465
 466static void class_osdblk_release(struct class *cls)
 467{
 468        kfree(cls);
 469}
 470
 471static ssize_t class_osdblk_list(struct class *c,
 472                                struct class_attribute *attr,
 473                                char *data)
 474{
 475        int n = 0;
 476        struct list_head *tmp;
 477
 478        mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
 479
 480        list_for_each(tmp, &osdblkdev_list) {
 481                struct osdblk_device *osdev;
 482
 483                osdev = list_entry(tmp, struct osdblk_device, node);
 484
 485                n += sprintf(data+n, "%d %d %llu %llu %s\n",
 486                        osdev->id,
 487                        osdev->major,
 488                        osdev->obj.partition,
 489                        osdev->obj.id,
 490                        osdev->osd_path);
 491        }
 492
 493        mutex_unlock(&ctl_mutex);
 494        return n;
 495}
 496
 497static ssize_t class_osdblk_add(struct class *c,
 498                                struct class_attribute *attr,
 499                                const char *buf, size_t count)
 500{
 501        struct osdblk_device *osdev;
 502        ssize_t rc;
 503        int irc, new_id = 0;
 504        struct list_head *tmp;
 505
 506        if (!try_module_get(THIS_MODULE))
 507                return -ENODEV;
 508
 509        /* new osdblk_device object */
 510        osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL);
 511        if (!osdev) {
 512                rc = -ENOMEM;
 513                goto err_out_mod;
 514        }
 515
 516        /* static osdblk_device initialization */
 517        spin_lock_init(&osdev->lock);
 518        INIT_LIST_HEAD(&osdev->node);
 519
 520        /* generate unique id: find highest unique id, add one */
 521
 522        mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
 523
 524        list_for_each(tmp, &osdblkdev_list) {
 525                struct osdblk_device *osdev;
 526
 527                osdev = list_entry(tmp, struct osdblk_device, node);
 528                if (osdev->id > new_id)
 529                        new_id = osdev->id + 1;
 530        }
 531
 532        osdev->id = new_id;
 533
 534        /* add to global list */
 535        list_add_tail(&osdev->node, &osdblkdev_list);
 536
 537        mutex_unlock(&ctl_mutex);
 538
 539        /* parse add command */
 540        if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id,
 541                   osdev->osd_path) != 3) {
 542                rc = -EINVAL;
 543                goto err_out_slot;
 544        }
 545
 546        /* initialize rest of new object */
 547        sprintf(osdev->name, DRV_NAME "%d", osdev->id);
 548
 549        /* contact requested OSD */
 550        osdev->osd = osduld_path_lookup(osdev->osd_path);
 551        if (IS_ERR(osdev->osd)) {
 552                rc = PTR_ERR(osdev->osd);
 553                goto err_out_slot;
 554        }
 555
 556        /* build OSD credential */
 557        osdblk_make_credential(osdev->obj_cred, &osdev->obj);
 558
 559        /* register our block device */
 560        irc = register_blkdev(0, osdev->name);
 561        if (irc < 0) {
 562                rc = irc;
 563                goto err_out_osd;
 564        }
 565
 566        osdev->major = irc;
 567
 568        /* set up and announce blkdev mapping */
 569        rc = osdblk_init_disk(osdev);
 570        if (rc)
 571                goto err_out_blkdev;
 572
 573        return count;
 574
 575err_out_blkdev:
 576        unregister_blkdev(osdev->major, osdev->name);
 577err_out_osd:
 578        osduld_put_device(osdev->osd);
 579err_out_slot:
 580        mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
 581        list_del_init(&osdev->node);
 582        mutex_unlock(&ctl_mutex);
 583
 584        kfree(osdev);
 585err_out_mod:
 586        OSDBLK_DEBUG("Error adding device %s\n", buf);
 587        module_put(THIS_MODULE);
 588        return rc;
 589}
 590
 591static ssize_t class_osdblk_remove(struct class *c,
 592                                        struct class_attribute *attr,
 593                                        const char *buf,
 594                                        size_t count)
 595{
 596        struct osdblk_device *osdev = NULL;
 597        int target_id, rc;
 598        unsigned long ul;
 599        struct list_head *tmp;
 600
 601        rc = strict_strtoul(buf, 10, &ul);
 602        if (rc)
 603                return rc;
 604
 605        /* convert to int; abort if we lost anything in the conversion */
 606        target_id = (int) ul;
 607        if (target_id != ul)
 608                return -EINVAL;
 609
 610        /* remove object from list immediately */
 611        mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
 612
 613        list_for_each(tmp, &osdblkdev_list) {
 614                osdev = list_entry(tmp, struct osdblk_device, node);
 615                if (osdev->id == target_id) {
 616                        list_del_init(&osdev->node);
 617                        break;
 618                }
 619                osdev = NULL;
 620        }
 621
 622        mutex_unlock(&ctl_mutex);
 623
 624        if (!osdev)
 625                return -ENOENT;
 626
 627        /* clean up and free blkdev and associated OSD connection */
 628        osdblk_free_disk(osdev);
 629        unregister_blkdev(osdev->major, osdev->name);
 630        osduld_put_device(osdev->osd);
 631        kfree(osdev);
 632
 633        /* release module ref */
 634        module_put(THIS_MODULE);
 635
 636        return count;
 637}
 638
 639static struct class_attribute class_osdblk_attrs[] = {
 640        __ATTR(add,     0200, NULL, class_osdblk_add),
 641        __ATTR(remove,  0200, NULL, class_osdblk_remove),
 642        __ATTR(list,    0444, class_osdblk_list, NULL),
 643        __ATTR_NULL
 644};
 645
 646static int osdblk_sysfs_init(void)
 647{
 648        int ret = 0;
 649
 650        /*
 651         * create control files in sysfs
 652         * /sys/class/osdblk/...
 653         */
 654        class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL);
 655        if (!class_osdblk)
 656                return -ENOMEM;
 657
 658        class_osdblk->name = DRV_NAME;
 659        class_osdblk->owner = THIS_MODULE;
 660        class_osdblk->class_release = class_osdblk_release;
 661        class_osdblk->class_attrs = class_osdblk_attrs;
 662
 663        ret = class_register(class_osdblk);
 664        if (ret) {
 665                kfree(class_osdblk);
 666                class_osdblk = NULL;
 667                printk(PFX "failed to create class osdblk\n");
 668                return ret;
 669        }
 670
 671        return 0;
 672}
 673
 674static void osdblk_sysfs_cleanup(void)
 675{
 676        if (class_osdblk)
 677                class_destroy(class_osdblk);
 678        class_osdblk = NULL;
 679}
 680
 681static int __init osdblk_init(void)
 682{
 683        int rc;
 684
 685        rc = osdblk_sysfs_init();
 686        if (rc)
 687                return rc;
 688
 689        return 0;
 690}
 691
 692static void __exit osdblk_exit(void)
 693{
 694        osdblk_sysfs_cleanup();
 695}
 696
 697module_init(osdblk_init);
 698module_exit(osdblk_exit);
 699
 700