linux/fs/nfs/blocklayout/dev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2014-2016 Christoph Hellwig.
   4 */
   5#include <linux/sunrpc/svc.h>
   6#include <linux/blkdev.h>
   7#include <linux/nfs4.h>
   8#include <linux/nfs_fs.h>
   9#include <linux/nfs_xdr.h>
  10#include <linux/pr.h>
  11
  12#include "blocklayout.h"
  13
  14#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
  15
  16static void
  17bl_free_device(struct pnfs_block_dev *dev)
  18{
  19        if (dev->nr_children) {
  20                int i;
  21
  22                for (i = 0; i < dev->nr_children; i++)
  23                        bl_free_device(&dev->children[i]);
  24                kfree(dev->children);
  25        } else {
  26                if (dev->pr_registered) {
  27                        const struct pr_ops *ops =
  28                                dev->bdev->bd_disk->fops->pr_ops;
  29                        int error;
  30
  31                        error = ops->pr_register(dev->bdev, dev->pr_key, 0,
  32                                false);
  33                        if (error)
  34                                pr_err("failed to unregister PR key.\n");
  35                }
  36
  37                if (dev->bdev)
  38                        blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
  39        }
  40}
  41
  42void
  43bl_free_deviceid_node(struct nfs4_deviceid_node *d)
  44{
  45        struct pnfs_block_dev *dev =
  46                container_of(d, struct pnfs_block_dev, node);
  47
  48        bl_free_device(dev);
  49        kfree_rcu(dev, node.rcu);
  50}
  51
  52static int
  53nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
  54{
  55        __be32 *p;
  56        int i;
  57
  58        p = xdr_inline_decode(xdr, 4);
  59        if (!p)
  60                return -EIO;
  61        b->type = be32_to_cpup(p++);
  62
  63        switch (b->type) {
  64        case PNFS_BLOCK_VOLUME_SIMPLE:
  65                p = xdr_inline_decode(xdr, 4);
  66                if (!p)
  67                        return -EIO;
  68                b->simple.nr_sigs = be32_to_cpup(p++);
  69                if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) {
  70                        dprintk("Bad signature count: %d\n", b->simple.nr_sigs);
  71                        return -EIO;
  72                }
  73
  74                b->simple.len = 4 + 4;
  75                for (i = 0; i < b->simple.nr_sigs; i++) {
  76                        p = xdr_inline_decode(xdr, 8 + 4);
  77                        if (!p)
  78                                return -EIO;
  79                        p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
  80                        b->simple.sigs[i].sig_len = be32_to_cpup(p++);
  81                        if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
  82                                pr_info("signature too long: %d\n",
  83                                        b->simple.sigs[i].sig_len);
  84                                return -EIO;
  85                        }
  86
  87                        p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
  88                        if (!p)
  89                                return -EIO;
  90                        memcpy(&b->simple.sigs[i].sig, p,
  91                                b->simple.sigs[i].sig_len);
  92
  93                        b->simple.len += 8 + 4 + \
  94                                (XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);
  95                }
  96                break;
  97        case PNFS_BLOCK_VOLUME_SLICE:
  98                p = xdr_inline_decode(xdr, 8 + 8 + 4);
  99                if (!p)
 100                        return -EIO;
 101                p = xdr_decode_hyper(p, &b->slice.start);
 102                p = xdr_decode_hyper(p, &b->slice.len);
 103                b->slice.volume = be32_to_cpup(p++);
 104                break;
 105        case PNFS_BLOCK_VOLUME_CONCAT:
 106                p = xdr_inline_decode(xdr, 4);
 107                if (!p)
 108                        return -EIO;
 109
 110                b->concat.volumes_count = be32_to_cpup(p++);
 111                if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
 112                        dprintk("Too many volumes: %d\n", b->concat.volumes_count);
 113                        return -EIO;
 114                }
 115
 116                p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
 117                if (!p)
 118                        return -EIO;
 119                for (i = 0; i < b->concat.volumes_count; i++)
 120                        b->concat.volumes[i] = be32_to_cpup(p++);
 121                break;
 122        case PNFS_BLOCK_VOLUME_STRIPE:
 123                p = xdr_inline_decode(xdr, 8 + 4);
 124                if (!p)
 125                        return -EIO;
 126
 127                p = xdr_decode_hyper(p, &b->stripe.chunk_size);
 128                b->stripe.volumes_count = be32_to_cpup(p++);
 129                if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
 130                        dprintk("Too many volumes: %d\n", b->stripe.volumes_count);
 131                        return -EIO;
 132                }
 133
 134                p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
 135                if (!p)
 136                        return -EIO;
 137                for (i = 0; i < b->stripe.volumes_count; i++)
 138                        b->stripe.volumes[i] = be32_to_cpup(p++);
 139                break;
 140        case PNFS_BLOCK_VOLUME_SCSI:
 141                p = xdr_inline_decode(xdr, 4 + 4 + 4);
 142                if (!p)
 143                        return -EIO;
 144                b->scsi.code_set = be32_to_cpup(p++);
 145                b->scsi.designator_type = be32_to_cpup(p++);
 146                b->scsi.designator_len = be32_to_cpup(p++);
 147                p = xdr_inline_decode(xdr, b->scsi.designator_len);
 148                if (!p)
 149                        return -EIO;
 150                if (b->scsi.designator_len > 256)
 151                        return -EIO;
 152                memcpy(&b->scsi.designator, p, b->scsi.designator_len);
 153                p = xdr_inline_decode(xdr, 8);
 154                if (!p)
 155                        return -EIO;
 156                p = xdr_decode_hyper(p, &b->scsi.pr_key);
 157                break;
 158        default:
 159                dprintk("unknown volume type!\n");
 160                return -EIO;
 161        }
 162
 163        return 0;
 164}
 165
 166static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
 167                struct pnfs_block_dev_map *map)
 168{
 169        map->start = dev->start;
 170        map->len = dev->len;
 171        map->disk_offset = dev->disk_offset;
 172        map->bdev = dev->bdev;
 173        return true;
 174}
 175
 176static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
 177                struct pnfs_block_dev_map *map)
 178{
 179        int i;
 180
 181        for (i = 0; i < dev->nr_children; i++) {
 182                struct pnfs_block_dev *child = &dev->children[i];
 183
 184                if (child->start > offset ||
 185                    child->start + child->len <= offset)
 186                        continue;
 187
 188                child->map(child, offset - child->start, map);
 189                return true;
 190        }
 191
 192        dprintk("%s: ran off loop!\n", __func__);
 193        return false;
 194}
 195
 196static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
 197                struct pnfs_block_dev_map *map)
 198{
 199        struct pnfs_block_dev *child;
 200        u64 chunk;
 201        u32 chunk_idx;
 202        u64 disk_offset;
 203
 204        chunk = div_u64(offset, dev->chunk_size);
 205        div_u64_rem(chunk, dev->nr_children, &chunk_idx);
 206
 207        if (chunk_idx >= dev->nr_children) {
 208                dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
 209                        __func__, chunk_idx, offset, dev->chunk_size);
 210                /* error, should not happen */
 211                return false;
 212        }
 213
 214        /* truncate offset to the beginning of the stripe */
 215        offset = chunk * dev->chunk_size;
 216
 217        /* disk offset of the stripe */
 218        disk_offset = div_u64(offset, dev->nr_children);
 219
 220        child = &dev->children[chunk_idx];
 221        child->map(child, disk_offset, map);
 222
 223        map->start += offset;
 224        map->disk_offset += disk_offset;
 225        map->len = dev->chunk_size;
 226        return true;
 227}
 228
 229static int
 230bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
 231                struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
 232
 233
 234static int
 235bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
 236                struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
 237{
 238        struct pnfs_block_volume *v = &volumes[idx];
 239        struct block_device *bdev;
 240        dev_t dev;
 241
 242        dev = bl_resolve_deviceid(server, v, gfp_mask);
 243        if (!dev)
 244                return -EIO;
 245
 246        bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
 247        if (IS_ERR(bdev)) {
 248                printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
 249                        MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
 250                return PTR_ERR(bdev);
 251        }
 252        d->bdev = bdev;
 253
 254
 255        d->len = bdev_nr_bytes(d->bdev);
 256        d->map = bl_map_simple;
 257
 258        printk(KERN_INFO "pNFS: using block device %s\n",
 259                d->bdev->bd_disk->disk_name);
 260        return 0;
 261}
 262
 263static bool
 264bl_validate_designator(struct pnfs_block_volume *v)
 265{
 266        switch (v->scsi.designator_type) {
 267        case PS_DESIGNATOR_EUI64:
 268                if (v->scsi.code_set != PS_CODE_SET_BINARY)
 269                        return false;
 270
 271                if (v->scsi.designator_len != 8 &&
 272                    v->scsi.designator_len != 10 &&
 273                    v->scsi.designator_len != 16)
 274                        return false;
 275
 276                return true;
 277        case PS_DESIGNATOR_NAA:
 278                if (v->scsi.code_set != PS_CODE_SET_BINARY)
 279                        return false;
 280
 281                if (v->scsi.designator_len != 8 &&
 282                    v->scsi.designator_len != 16)
 283                        return false;
 284
 285                return true;
 286        case PS_DESIGNATOR_T10:
 287        case PS_DESIGNATOR_NAME:
 288                pr_err("pNFS: unsupported designator "
 289                        "(code set %d, type %d, len %d.\n",
 290                        v->scsi.code_set,
 291                        v->scsi.designator_type,
 292                        v->scsi.designator_len);
 293                return false;
 294        default:
 295                pr_err("pNFS: invalid designator "
 296                        "(code set %d, type %d, len %d.\n",
 297                        v->scsi.code_set,
 298                        v->scsi.designator_type,
 299                        v->scsi.designator_len);
 300                return false;
 301        }
 302}
 303
 304/*
 305 * Try to open the udev path for the WWN.  At least on Debian the udev
 306 * by-id path will always point to the dm-multipath device if one exists.
 307 */
 308static struct block_device *
 309bl_open_udev_path(struct pnfs_block_volume *v)
 310{
 311        struct block_device *bdev;
 312        const char *devname;
 313
 314        devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN",
 315                                v->scsi.designator_len, v->scsi.designator);
 316        if (!devname)
 317                return ERR_PTR(-ENOMEM);
 318
 319        bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
 320        if (IS_ERR(bdev)) {
 321                pr_warn("pNFS: failed to open device %s (%ld)\n",
 322                        devname, PTR_ERR(bdev));
 323        }
 324
 325        kfree(devname);
 326        return bdev;
 327}
 328
 329/*
 330 * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
 331 * wwn- links will only point to the first discovered SCSI device there.
 332 */
 333static struct block_device *
 334bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v)
 335{
 336        struct block_device *bdev;
 337        const char *devname;
 338
 339        devname = kasprintf(GFP_KERNEL,
 340                        "/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
 341                        v->scsi.designator_type,
 342                        v->scsi.designator_len, v->scsi.designator);
 343        if (!devname)
 344                return ERR_PTR(-ENOMEM);
 345
 346        bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
 347        kfree(devname);
 348        return bdev;
 349}
 350
 351static int
 352bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
 353                struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
 354{
 355        struct pnfs_block_volume *v = &volumes[idx];
 356        struct block_device *bdev;
 357        const struct pr_ops *ops;
 358        int error;
 359
 360        if (!bl_validate_designator(v))
 361                return -EINVAL;
 362
 363        bdev = bl_open_dm_mpath_udev_path(v);
 364        if (IS_ERR(bdev))
 365                bdev = bl_open_udev_path(v);
 366        if (IS_ERR(bdev))
 367                return PTR_ERR(bdev);
 368        d->bdev = bdev;
 369
 370        d->len = bdev_nr_bytes(d->bdev);
 371        d->map = bl_map_simple;
 372        d->pr_key = v->scsi.pr_key;
 373
 374        pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
 375                d->bdev->bd_disk->disk_name, d->pr_key);
 376
 377        ops = d->bdev->bd_disk->fops->pr_ops;
 378        if (!ops) {
 379                pr_err("pNFS: block device %s does not support reservations.",
 380                                d->bdev->bd_disk->disk_name);
 381                error = -EINVAL;
 382                goto out_blkdev_put;
 383        }
 384
 385        error = ops->pr_register(d->bdev, 0, d->pr_key, true);
 386        if (error) {
 387                pr_err("pNFS: failed to register key for block device %s.",
 388                                d->bdev->bd_disk->disk_name);
 389                goto out_blkdev_put;
 390        }
 391
 392        d->pr_registered = true;
 393        return 0;
 394
 395out_blkdev_put:
 396        blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
 397        return error;
 398}
 399
 400static int
 401bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
 402                struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
 403{
 404        struct pnfs_block_volume *v = &volumes[idx];
 405        int ret;
 406
 407        ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
 408        if (ret)
 409                return ret;
 410
 411        d->disk_offset = v->slice.start;
 412        d->len = v->slice.len;
 413        return 0;
 414}
 415
 416static int
 417bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
 418                struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
 419{
 420        struct pnfs_block_volume *v = &volumes[idx];
 421        u64 len = 0;
 422        int ret, i;
 423
 424        d->children = kcalloc(v->concat.volumes_count,
 425                        sizeof(struct pnfs_block_dev), GFP_KERNEL);
 426        if (!d->children)
 427                return -ENOMEM;
 428
 429        for (i = 0; i < v->concat.volumes_count; i++) {
 430                ret = bl_parse_deviceid(server, &d->children[i],
 431                                volumes, v->concat.volumes[i], gfp_mask);
 432                if (ret)
 433                        return ret;
 434
 435                d->nr_children++;
 436                d->children[i].start += len;
 437                len += d->children[i].len;
 438        }
 439
 440        d->len = len;
 441        d->map = bl_map_concat;
 442        return 0;
 443}
 444
 445static int
 446bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
 447                struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
 448{
 449        struct pnfs_block_volume *v = &volumes[idx];
 450        u64 len = 0;
 451        int ret, i;
 452
 453        d->children = kcalloc(v->stripe.volumes_count,
 454                        sizeof(struct pnfs_block_dev), GFP_KERNEL);
 455        if (!d->children)
 456                return -ENOMEM;
 457
 458        for (i = 0; i < v->stripe.volumes_count; i++) {
 459                ret = bl_parse_deviceid(server, &d->children[i],
 460                                volumes, v->stripe.volumes[i], gfp_mask);
 461                if (ret)
 462                        return ret;
 463
 464                d->nr_children++;
 465                len += d->children[i].len;
 466        }
 467
 468        d->len = len;
 469        d->chunk_size = v->stripe.chunk_size;
 470        d->map = bl_map_stripe;
 471        return 0;
 472}
 473
 474static int
 475bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
 476                struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
 477{
 478        switch (volumes[idx].type) {
 479        case PNFS_BLOCK_VOLUME_SIMPLE:
 480                return bl_parse_simple(server, d, volumes, idx, gfp_mask);
 481        case PNFS_BLOCK_VOLUME_SLICE:
 482                return bl_parse_slice(server, d, volumes, idx, gfp_mask);
 483        case PNFS_BLOCK_VOLUME_CONCAT:
 484                return bl_parse_concat(server, d, volumes, idx, gfp_mask);
 485        case PNFS_BLOCK_VOLUME_STRIPE:
 486                return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
 487        case PNFS_BLOCK_VOLUME_SCSI:
 488                return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
 489        default:
 490                dprintk("unsupported volume type: %d\n", volumes[idx].type);
 491                return -EIO;
 492        }
 493}
 494
 495struct nfs4_deviceid_node *
 496bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
 497                gfp_t gfp_mask)
 498{
 499        struct nfs4_deviceid_node *node = NULL;
 500        struct pnfs_block_volume *volumes;
 501        struct pnfs_block_dev *top;
 502        struct xdr_stream xdr;
 503        struct xdr_buf buf;
 504        struct page *scratch;
 505        int nr_volumes, ret, i;
 506        __be32 *p;
 507
 508        scratch = alloc_page(gfp_mask);
 509        if (!scratch)
 510                goto out;
 511
 512        xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
 513        xdr_set_scratch_page(&xdr, scratch);
 514
 515        p = xdr_inline_decode(&xdr, sizeof(__be32));
 516        if (!p)
 517                goto out_free_scratch;
 518        nr_volumes = be32_to_cpup(p++);
 519
 520        volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
 521                          gfp_mask);
 522        if (!volumes)
 523                goto out_free_scratch;
 524
 525        for (i = 0; i < nr_volumes; i++) {
 526                ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
 527                if (ret < 0)
 528                        goto out_free_volumes;
 529        }
 530
 531        top = kzalloc(sizeof(*top), gfp_mask);
 532        if (!top)
 533                goto out_free_volumes;
 534
 535        ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
 536
 537        node = &top->node;
 538        nfs4_init_deviceid_node(node, server, &pdev->dev_id);
 539        if (ret)
 540                nfs4_mark_deviceid_unavailable(node);
 541
 542out_free_volumes:
 543        kfree(volumes);
 544out_free_scratch:
 545        __free_page(scratch);
 546out:
 547        return node;
 548}
 549