linux/drivers/nvdimm/core.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of version 2 of the GNU General Public License as
   6 * published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 */
  13#include <linux/libnvdimm.h>
  14#include <linux/badblocks.h>
  15#include <linux/export.h>
  16#include <linux/module.h>
  17#include <linux/blkdev.h>
  18#include <linux/device.h>
  19#include <linux/ctype.h>
  20#include <linux/ndctl.h>
  21#include <linux/mutex.h>
  22#include <linux/slab.h>
  23#include "nd-core.h"
  24#include "nd.h"
  25
  26LIST_HEAD(nvdimm_bus_list);
  27DEFINE_MUTEX(nvdimm_bus_list_mutex);
  28static DEFINE_IDA(nd_ida);
  29
  30void nvdimm_bus_lock(struct device *dev)
  31{
  32        struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
  33
  34        if (!nvdimm_bus)
  35                return;
  36        mutex_lock(&nvdimm_bus->reconfig_mutex);
  37}
  38EXPORT_SYMBOL(nvdimm_bus_lock);
  39
  40void nvdimm_bus_unlock(struct device *dev)
  41{
  42        struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
  43
  44        if (!nvdimm_bus)
  45                return;
  46        mutex_unlock(&nvdimm_bus->reconfig_mutex);
  47}
  48EXPORT_SYMBOL(nvdimm_bus_unlock);
  49
  50bool is_nvdimm_bus_locked(struct device *dev)
  51{
  52        struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
  53
  54        if (!nvdimm_bus)
  55                return false;
  56        return mutex_is_locked(&nvdimm_bus->reconfig_mutex);
  57}
  58EXPORT_SYMBOL(is_nvdimm_bus_locked);
  59
  60u64 nd_fletcher64(void *addr, size_t len, bool le)
  61{
  62        u32 *buf = addr;
  63        u32 lo32 = 0;
  64        u64 hi32 = 0;
  65        int i;
  66
  67        for (i = 0; i < len / sizeof(u32); i++) {
  68                lo32 += le ? le32_to_cpu((__le32) buf[i]) : buf[i];
  69                hi32 += lo32;
  70        }
  71
  72        return hi32 << 32 | lo32;
  73}
  74EXPORT_SYMBOL_GPL(nd_fletcher64);
  75
  76static void nvdimm_bus_release(struct device *dev)
  77{
  78        struct nvdimm_bus *nvdimm_bus;
  79
  80        nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
  81        ida_simple_remove(&nd_ida, nvdimm_bus->id);
  82        kfree(nvdimm_bus);
  83}
  84
  85struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
  86{
  87        struct nvdimm_bus *nvdimm_bus;
  88
  89        nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
  90        WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
  91        return nvdimm_bus;
  92}
  93EXPORT_SYMBOL_GPL(to_nvdimm_bus);
  94
  95struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
  96{
  97        /* struct nvdimm_bus definition is private to libnvdimm */
  98        return nvdimm_bus->nd_desc;
  99}
 100EXPORT_SYMBOL_GPL(to_nd_desc);
 101
 102struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
 103{
 104        struct device *dev;
 105
 106        for (dev = nd_dev; dev; dev = dev->parent)
 107                if (dev->release == nvdimm_bus_release)
 108                        break;
 109        dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
 110        if (dev)
 111                return to_nvdimm_bus(dev);
 112        return NULL;
 113}
 114
 115static bool is_uuid_sep(char sep)
 116{
 117        if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
 118                return true;
 119        return false;
 120}
 121
 122static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
 123                size_t len)
 124{
 125        const char *str = buf;
 126        u8 uuid[16];
 127        int i;
 128
 129        for (i = 0; i < 16; i++) {
 130                if (!isxdigit(str[0]) || !isxdigit(str[1])) {
 131                        dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
 132                                        __func__, i, str - buf, str[0],
 133                                        str + 1 - buf, str[1]);
 134                        return -EINVAL;
 135                }
 136
 137                uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
 138                str += 2;
 139                if (is_uuid_sep(*str))
 140                        str++;
 141        }
 142
 143        memcpy(uuid_out, uuid, sizeof(uuid));
 144        return 0;
 145}
 146
 147/**
 148 * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
 149 * @dev: container device for the uuid property
 150 * @uuid_out: uuid buffer to replace
 151 * @buf: raw sysfs buffer to parse
 152 *
 153 * Enforce that uuids can only be changed while the device is disabled
 154 * (driver detached)
 155 * LOCKING: expects device_lock() is held on entry
 156 */
 157int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
 158                size_t len)
 159{
 160        u8 uuid[16];
 161        int rc;
 162
 163        if (dev->driver)
 164                return -EBUSY;
 165
 166        rc = nd_uuid_parse(dev, uuid, buf, len);
 167        if (rc)
 168                return rc;
 169
 170        kfree(*uuid_out);
 171        *uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
 172        if (!(*uuid_out))
 173                return -ENOMEM;
 174
 175        return 0;
 176}
 177
 178ssize_t nd_sector_size_show(unsigned long current_lbasize,
 179                const unsigned long *supported, char *buf)
 180{
 181        ssize_t len = 0;
 182        int i;
 183
 184        for (i = 0; supported[i]; i++)
 185                if (current_lbasize == supported[i])
 186                        len += sprintf(buf + len, "[%ld] ", supported[i]);
 187                else
 188                        len += sprintf(buf + len, "%ld ", supported[i]);
 189        len += sprintf(buf + len, "\n");
 190        return len;
 191}
 192
 193ssize_t nd_sector_size_store(struct device *dev, const char *buf,
 194                unsigned long *current_lbasize, const unsigned long *supported)
 195{
 196        unsigned long lbasize;
 197        int rc, i;
 198
 199        if (dev->driver)
 200                return -EBUSY;
 201
 202        rc = kstrtoul(buf, 0, &lbasize);
 203        if (rc)
 204                return rc;
 205
 206        for (i = 0; supported[i]; i++)
 207                if (lbasize == supported[i])
 208                        break;
 209
 210        if (supported[i]) {
 211                *current_lbasize = lbasize;
 212                return 0;
 213        } else {
 214                return -EINVAL;
 215        }
 216}
 217
 218void __nd_iostat_start(struct bio *bio, unsigned long *start)
 219{
 220        struct gendisk *disk = bio->bi_bdev->bd_disk;
 221        const int rw = bio_data_dir(bio);
 222        int cpu = part_stat_lock();
 223
 224        *start = jiffies;
 225        part_round_stats(cpu, &disk->part0);
 226        part_stat_inc(cpu, &disk->part0, ios[rw]);
 227        part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
 228        part_inc_in_flight(&disk->part0, rw);
 229        part_stat_unlock();
 230}
 231EXPORT_SYMBOL(__nd_iostat_start);
 232
 233void nd_iostat_end(struct bio *bio, unsigned long start)
 234{
 235        struct gendisk *disk = bio->bi_bdev->bd_disk;
 236        unsigned long duration = jiffies - start;
 237        const int rw = bio_data_dir(bio);
 238        int cpu = part_stat_lock();
 239
 240        part_stat_add(cpu, &disk->part0, ticks[rw], duration);
 241        part_round_stats(cpu, &disk->part0);
 242        part_dec_in_flight(&disk->part0, rw);
 243        part_stat_unlock();
 244}
 245EXPORT_SYMBOL(nd_iostat_end);
 246
 247static ssize_t commands_show(struct device *dev,
 248                struct device_attribute *attr, char *buf)
 249{
 250        int cmd, len = 0;
 251        struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
 252        struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
 253
 254        for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG)
 255                len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd));
 256        len += sprintf(buf + len, "\n");
 257        return len;
 258}
 259static DEVICE_ATTR_RO(commands);
 260
 261static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus)
 262{
 263        struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
 264        struct device *parent = nvdimm_bus->dev.parent;
 265
 266        if (nd_desc->provider_name)
 267                return nd_desc->provider_name;
 268        else if (parent)
 269                return dev_name(parent);
 270        else
 271                return "unknown";
 272}
 273
 274static ssize_t provider_show(struct device *dev,
 275                struct device_attribute *attr, char *buf)
 276{
 277        struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
 278
 279        return sprintf(buf, "%s\n", nvdimm_bus_provider(nvdimm_bus));
 280}
 281static DEVICE_ATTR_RO(provider);
 282
 283static int flush_namespaces(struct device *dev, void *data)
 284{
 285        device_lock(dev);
 286        device_unlock(dev);
 287        return 0;
 288}
 289
 290static int flush_regions_dimms(struct device *dev, void *data)
 291{
 292        device_lock(dev);
 293        device_unlock(dev);
 294        device_for_each_child(dev, NULL, flush_namespaces);
 295        return 0;
 296}
 297
 298static ssize_t wait_probe_show(struct device *dev,
 299                struct device_attribute *attr, char *buf)
 300{
 301        struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
 302        struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
 303        int rc;
 304
 305        if (nd_desc->flush_probe) {
 306                rc = nd_desc->flush_probe(nd_desc);
 307                if (rc)
 308                        return rc;
 309        }
 310        nd_synchronize();
 311        device_for_each_child(dev, NULL, flush_regions_dimms);
 312        return sprintf(buf, "1\n");
 313}
 314static DEVICE_ATTR_RO(wait_probe);
 315
 316static struct attribute *nvdimm_bus_attributes[] = {
 317        &dev_attr_commands.attr,
 318        &dev_attr_wait_probe.attr,
 319        &dev_attr_provider.attr,
 320        NULL,
 321};
 322
 323struct attribute_group nvdimm_bus_attribute_group = {
 324        .attrs = nvdimm_bus_attributes,
 325};
 326EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
 327
 328struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 329                struct nvdimm_bus_descriptor *nd_desc, struct module *module)
 330{
 331        struct nvdimm_bus *nvdimm_bus;
 332        int rc;
 333
 334        nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
 335        if (!nvdimm_bus)
 336                return NULL;
 337        INIT_LIST_HEAD(&nvdimm_bus->list);
 338        INIT_LIST_HEAD(&nvdimm_bus->poison_list);
 339        init_waitqueue_head(&nvdimm_bus->probe_wait);
 340        nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
 341        mutex_init(&nvdimm_bus->reconfig_mutex);
 342        if (nvdimm_bus->id < 0) {
 343                kfree(nvdimm_bus);
 344                return NULL;
 345        }
 346        nvdimm_bus->nd_desc = nd_desc;
 347        nvdimm_bus->module = module;
 348        nvdimm_bus->dev.parent = parent;
 349        nvdimm_bus->dev.release = nvdimm_bus_release;
 350        nvdimm_bus->dev.groups = nd_desc->attr_groups;
 351        dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
 352        rc = device_register(&nvdimm_bus->dev);
 353        if (rc) {
 354                dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
 355                goto err;
 356        }
 357
 358        rc = nvdimm_bus_create_ndctl(nvdimm_bus);
 359        if (rc)
 360                goto err;
 361
 362        mutex_lock(&nvdimm_bus_list_mutex);
 363        list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
 364        mutex_unlock(&nvdimm_bus_list_mutex);
 365
 366        return nvdimm_bus;
 367 err:
 368        put_device(&nvdimm_bus->dev);
 369        return NULL;
 370}
 371EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
 372
 373static void set_badblock(struct badblocks *bb, sector_t s, int num)
 374{
 375        dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
 376                        (u64) s * 512, (u64) num * 512);
 377        /* this isn't an error as the hardware will still throw an exception */
 378        if (badblocks_set(bb, s, num, 1))
 379                dev_info_once(bb->dev, "%s: failed for sector %llx\n",
 380                                __func__, (u64) s);
 381}
 382
 383/**
 384 * __add_badblock_range() - Convert a physical address range to bad sectors
 385 * @bb:         badblocks instance to populate
 386 * @ns_offset:  namespace offset where the error range begins (in bytes)
 387 * @len:        number of bytes of poison to be added
 388 *
 389 * This assumes that the range provided with (ns_offset, len) is within
 390 * the bounds of physical addresses for this namespace, i.e. lies in the
 391 * interval [ns_start, ns_start + ns_size)
 392 */
 393static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
 394{
 395        const unsigned int sector_size = 512;
 396        sector_t start_sector;
 397        u64 num_sectors;
 398        u32 rem;
 399
 400        start_sector = div_u64(ns_offset, sector_size);
 401        num_sectors = div_u64_rem(len, sector_size, &rem);
 402        if (rem)
 403                num_sectors++;
 404
 405        if (unlikely(num_sectors > (u64)INT_MAX)) {
 406                u64 remaining = num_sectors;
 407                sector_t s = start_sector;
 408
 409                while (remaining) {
 410                        int done = min_t(u64, remaining, INT_MAX);
 411
 412                        set_badblock(bb, s, done);
 413                        remaining -= done;
 414                        s += done;
 415                }
 416        } else
 417                set_badblock(bb, start_sector, num_sectors);
 418}
 419
 420static void badblocks_populate(struct list_head *poison_list,
 421                struct badblocks *bb, const struct resource *res)
 422{
 423        struct nd_poison *pl;
 424
 425        if (list_empty(poison_list))
 426                return;
 427
 428        list_for_each_entry(pl, poison_list, list) {
 429                u64 pl_end = pl->start + pl->length - 1;
 430
 431                /* Discard intervals with no intersection */
 432                if (pl_end < res->start)
 433                        continue;
 434                if (pl->start >  res->end)
 435                        continue;
 436                /* Deal with any overlap after start of the namespace */
 437                if (pl->start >= res->start) {
 438                        u64 start = pl->start;
 439                        u64 len;
 440
 441                        if (pl_end <= res->end)
 442                                len = pl->length;
 443                        else
 444                                len = res->start + resource_size(res)
 445                                        - pl->start;
 446                        __add_badblock_range(bb, start - res->start, len);
 447                        continue;
 448                }
 449                /* Deal with overlap for poison starting before the namespace */
 450                if (pl->start < res->start) {
 451                        u64 len;
 452
 453                        if (pl_end < res->end)
 454                                len = pl->start + pl->length - res->start;
 455                        else
 456                                len = resource_size(res);
 457                        __add_badblock_range(bb, 0, len);
 458                }
 459        }
 460}
 461
 462/**
 463 * nvdimm_badblocks_populate() - Convert a list of poison ranges to badblocks
 464 * @region: parent region of the range to interrogate
 465 * @bb: badblocks instance to populate
 466 * @res: resource range to consider
 467 *
 468 * The poison list generated during bus initialization may contain
 469 * multiple, possibly overlapping physical address ranges.  Compare each
 470 * of these ranges to the resource range currently being initialized,
 471 * and add badblocks entries for all matching sub-ranges
 472 */
 473void nvdimm_badblocks_populate(struct nd_region *nd_region,
 474                struct badblocks *bb, const struct resource *res)
 475{
 476        struct nvdimm_bus *nvdimm_bus;
 477        struct list_head *poison_list;
 478
 479        if (!is_nd_pmem(&nd_region->dev)) {
 480                dev_WARN_ONCE(&nd_region->dev, 1,
 481                                "%s only valid for pmem regions\n", __func__);
 482                return;
 483        }
 484        nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
 485        poison_list = &nvdimm_bus->poison_list;
 486
 487        nvdimm_bus_lock(&nvdimm_bus->dev);
 488        badblocks_populate(poison_list, bb, res);
 489        nvdimm_bus_unlock(&nvdimm_bus->dev);
 490}
 491EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
 492
 493static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 494{
 495        struct nd_poison *pl;
 496
 497        pl = kzalloc(sizeof(*pl), GFP_KERNEL);
 498        if (!pl)
 499                return -ENOMEM;
 500
 501        pl->start = addr;
 502        pl->length = length;
 503        list_add_tail(&pl->list, &nvdimm_bus->poison_list);
 504
 505        return 0;
 506}
 507
 508static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 509{
 510        struct nd_poison *pl;
 511
 512        if (list_empty(&nvdimm_bus->poison_list))
 513                return add_poison(nvdimm_bus, addr, length);
 514
 515        /*
 516         * There is a chance this is a duplicate, check for those first.
 517         * This will be the common case as ARS_STATUS returns all known
 518         * errors in the SPA space, and we can't query it per region
 519         */
 520        list_for_each_entry(pl, &nvdimm_bus->poison_list, list)
 521                if (pl->start == addr) {
 522                        /* If length has changed, update this list entry */
 523                        if (pl->length != length)
 524                                pl->length = length;
 525                        return 0;
 526                }
 527
 528        /*
 529         * If not a duplicate or a simple length update, add the entry as is,
 530         * as any overlapping ranges will get resolved when the list is consumed
 531         * and converted to badblocks
 532         */
 533        return add_poison(nvdimm_bus, addr, length);
 534}
 535
 536int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 537{
 538        int rc;
 539
 540        nvdimm_bus_lock(&nvdimm_bus->dev);
 541        rc = bus_add_poison(nvdimm_bus, addr, length);
 542        nvdimm_bus_unlock(&nvdimm_bus->dev);
 543
 544        return rc;
 545}
 546EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
 547
 548static void free_poison_list(struct list_head *poison_list)
 549{
 550        struct nd_poison *pl, *next;
 551
 552        list_for_each_entry_safe(pl, next, poison_list, list) {
 553                list_del(&pl->list);
 554                kfree(pl);
 555        }
 556        list_del_init(poison_list);
 557}
 558
 559static int child_unregister(struct device *dev, void *data)
 560{
 561        /*
 562         * the singular ndctl class device per bus needs to be
 563         * "device_destroy"ed, so skip it here
 564         *
 565         * i.e. remove classless children
 566         */
 567        if (dev->class)
 568                /* pass */;
 569        else
 570                nd_device_unregister(dev, ND_SYNC);
 571        return 0;
 572}
 573
 574void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
 575{
 576        if (!nvdimm_bus)
 577                return;
 578
 579        mutex_lock(&nvdimm_bus_list_mutex);
 580        list_del_init(&nvdimm_bus->list);
 581        mutex_unlock(&nvdimm_bus_list_mutex);
 582
 583        nd_synchronize();
 584        device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
 585
 586        nvdimm_bus_lock(&nvdimm_bus->dev);
 587        free_poison_list(&nvdimm_bus->poison_list);
 588        nvdimm_bus_unlock(&nvdimm_bus->dev);
 589
 590        nvdimm_bus_destroy_ndctl(nvdimm_bus);
 591
 592        device_unregister(&nvdimm_bus->dev);
 593}
 594EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
 595
 596#ifdef CONFIG_BLK_DEV_INTEGRITY
 597int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 598{
 599        struct blk_integrity bi;
 600
 601        if (meta_size == 0)
 602                return 0;
 603
 604        bi.profile = NULL;
 605        bi.tuple_size = meta_size;
 606        bi.tag_size = meta_size;
 607
 608        blk_integrity_register(disk, &bi);
 609        blk_queue_max_integrity_segments(disk->queue, 1);
 610
 611        return 0;
 612}
 613EXPORT_SYMBOL(nd_integrity_init);
 614
 615#else /* CONFIG_BLK_DEV_INTEGRITY */
 616int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 617{
 618        return 0;
 619}
 620EXPORT_SYMBOL(nd_integrity_init);
 621
 622#endif
 623
 624static __init int libnvdimm_init(void)
 625{
 626        int rc;
 627
 628        rc = nvdimm_bus_init();
 629        if (rc)
 630                return rc;
 631        rc = nvdimm_init();
 632        if (rc)
 633                goto err_dimm;
 634        rc = nd_region_init();
 635        if (rc)
 636                goto err_region;
 637        return 0;
 638 err_region:
 639        nvdimm_exit();
 640 err_dimm:
 641        nvdimm_bus_exit();
 642        return rc;
 643}
 644
 645static __exit void libnvdimm_exit(void)
 646{
 647        WARN_ON(!list_empty(&nvdimm_bus_list));
 648        nd_region_exit();
 649        nvdimm_exit();
 650        nvdimm_bus_exit();
 651}
 652
 653MODULE_LICENSE("GPL v2");
 654MODULE_AUTHOR("Intel Corporation");
 655subsys_initcall(libnvdimm_init);
 656module_exit(libnvdimm_exit);
 657