linux/drivers/dax/super.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2017 Intel Corporation. All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of version 2 of the GNU General Public License as
   6 * published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 */
  13#include <linux/pagemap.h>
  14#include <linux/module.h>
  15#include <linux/mount.h>
  16#include <linux/magic.h>
  17#include <linux/genhd.h>
  18#include <linux/cdev.h>
  19#include <linux/hash.h>
  20#include <linux/slab.h>
  21#include <linux/dax.h>
  22#include <linux/fs.h>
  23#include <linux/idr.h>
  24#include <linux/backing-dev.h>
  25#include "stringhash.h"
  26
  27static dev_t dax_devt;
  28DEFINE_STATIC_SRCU(dax_srcu);
  29static struct vfsmount *dax_mnt;
  30static DEFINE_IDA(dax_minor_ida);
  31static struct kmem_cache *dax_cache __read_mostly;
  32static struct super_block *dax_superblock __read_mostly;
  33
  34#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
  35static struct hlist_head dax_host_list[DAX_HASH_SIZE];
  36static DEFINE_SPINLOCK(dax_host_lock);
  37
  38int dax_read_lock(void)
  39{
  40        return srcu_read_lock(&dax_srcu);
  41}
  42EXPORT_SYMBOL_GPL(dax_read_lock);
  43
  44void dax_read_unlock(int id)
  45{
  46        srcu_read_unlock(&dax_srcu, id);
  47}
  48EXPORT_SYMBOL_GPL(dax_read_unlock);
  49
  50#ifdef CONFIG_BLOCK
  51int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
  52                pgoff_t *pgoff)
  53{
  54        phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
  55
  56        if (pgoff)
  57                *pgoff = PHYS_PFN(phys_off);
  58        if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
  59                return -EINVAL;
  60        return 0;
  61}
  62EXPORT_SYMBOL(bdev_dax_pgoff);
  63
  64/**
  65 * __bdev_dax_supported() - Check if the device supports dax for filesystem
  66 * @sb: The superblock of the device
  67 * @blocksize: The block size of the device
  68 *
  69 * This is a library function for filesystems to check if the block device
  70 * can be mounted with dax option.
  71 *
  72 * Return: negative errno if unsupported, 0 if supported.
  73 */
  74int __bdev_dax_supported(struct super_block *sb, int blocksize)
  75{
  76        struct block_device *bdev = sb->s_bdev;
  77        struct dax_device *dax_dev;
  78        pgoff_t pgoff;
  79        int err, id;
  80        void *kaddr;
  81        pfn_t pfn;
  82        long len;
  83
  84        if (blocksize != PAGE_SIZE) {
  85                pr_err("VFS (%s): error: unsupported blocksize for dax\n",
  86                                sb->s_id);
  87                return -EINVAL;
  88        }
  89
  90        err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
  91        if (err) {
  92                pr_err("VFS (%s): error: unaligned partition for dax\n",
  93                                sb->s_id);
  94                return err;
  95        }
  96
  97        dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
  98        if (!dax_dev) {
  99                pr_err("VFS (%s): error: device does not support dax\n",
 100                                sb->s_id);
 101                return -EOPNOTSUPP;
 102        }
 103
 104        id = dax_read_lock();
 105        len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
 106        dax_read_unlock(id);
 107
 108        put_dax(dax_dev);
 109
 110        if (len < 1) {
 111                pr_err("VFS (%s): error: dax access failed (%ld)",
 112                                sb->s_id, len);
 113                return len < 0 ? len : -EIO;
 114        }
 115
 116        return 0;
 117}
 118EXPORT_SYMBOL_GPL(__bdev_dax_supported);
 119#endif
 120
 121enum dax_device_flags {
 122        /* !alive + rcu grace period == no new operations / mappings */
 123        DAXDEV_ALIVE,
 124};
 125
 126/**
 127 * struct dax_device - anchor object for dax services
 128 * @inode: core vfs
 129 * @cdev: optional character interface for "device dax"
 130 * @host: optional name for lookups where the device path is not available
 131 * @private: dax driver private data
 132 * @flags: state and boolean properties
 133 */
 134struct dax_device {
 135        struct hlist_node list;
 136        struct inode inode;
 137        struct cdev cdev;
 138        const char *host;
 139        void *private;
 140        unsigned long flags;
 141        const struct dax_operations *ops;
 142};
 143
 144/**
 145 * dax_direct_access() - translate a device pgoff to an absolute pfn
 146 * @dax_dev: a dax_device instance representing the logical memory range
 147 * @pgoff: offset in pages from the start of the device to translate
 148 * @nr_pages: number of consecutive pages caller can handle relative to @pfn
 149 * @kaddr: output parameter that returns a virtual address mapping of pfn
 150 * @pfn: output parameter that returns an absolute pfn translation of @pgoff
 151 *
 152 * Return: negative errno if an error occurs, otherwise the number of
 153 * pages accessible at the device relative @pgoff.
 154 */
 155long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 156                void **kaddr, pfn_t *pfn)
 157{
 158        long avail;
 159
 160        /*
 161         * The device driver is allowed to sleep, in order to make the
 162         * memory directly accessible.
 163         */
 164        might_sleep();
 165
 166        if (!dax_dev)
 167                return -EOPNOTSUPP;
 168
 169        if (!dax_alive(dax_dev))
 170                return -ENXIO;
 171
 172        if (nr_pages < 0)
 173                return nr_pages;
 174
 175        avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages,
 176                        kaddr, pfn);
 177        if (!avail)
 178                return -ERANGE;
 179        return min(avail, nr_pages);
 180}
 181EXPORT_SYMBOL_GPL(dax_direct_access);
 182
 183bool dax_alive(struct dax_device *dax_dev)
 184{
 185        lockdep_assert_held(&dax_srcu);
 186        return test_bit(DAXDEV_ALIVE, &dax_dev->flags);
 187}
 188EXPORT_SYMBOL_GPL(dax_alive);
 189
 190static int dax_host_hash(const char *host)
 191{
 192        return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
 193}
 194
 195/*
 196 * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring
 197 * that any fault handlers or operations that might have seen
 198 * dax_alive(), have completed.  Any operations that start after
 199 * synchronize_srcu() has run will abort upon seeing !dax_alive().
 200 */
 201void kill_dax(struct dax_device *dax_dev)
 202{
 203        if (!dax_dev)
 204                return;
 205
 206        clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
 207
 208        synchronize_srcu(&dax_srcu);
 209
 210        spin_lock(&dax_host_lock);
 211        hlist_del_init(&dax_dev->list);
 212        spin_unlock(&dax_host_lock);
 213
 214        dax_dev->private = NULL;
 215}
 216EXPORT_SYMBOL_GPL(kill_dax);
 217
 218static struct inode *dax_alloc_inode(struct super_block *sb)
 219{
 220        struct dax_device *dax_dev;
 221        struct inode *inode;
 222
 223        dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
 224        inode = &dax_dev->inode;
 225        inode->i_rdev = 0;
 226        return inode;
 227}
 228
 229static struct dax_device *to_dax_dev(struct inode *inode)
 230{
 231        return container_of(inode, struct dax_device, inode);
 232}
 233
 234static void dax_i_callback(struct rcu_head *head)
 235{
 236        struct inode *inode = container_of(head, struct inode, i_rcu);
 237        struct dax_device *dax_dev = to_dax_dev(inode);
 238
 239        kfree(dax_dev->host);
 240        dax_dev->host = NULL;
 241        if (inode->i_rdev)
 242                ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
 243        kmem_cache_free(dax_cache, dax_dev);
 244}
 245
 246static void dax_destroy_inode(struct inode *inode)
 247{
 248        struct dax_device *dax_dev = to_dax_dev(inode);
 249
 250        WARN_ONCE(test_bit(DAXDEV_ALIVE, &dax_dev->flags),
 251                        "kill_dax() must be called before final iput()\n");
 252        call_rcu(&inode->i_rcu, dax_i_callback);
 253}
 254
 255static const struct super_operations dax_sops = {
 256        .statfs = simple_statfs,
 257        .alloc_inode = dax_alloc_inode,
 258        .destroy_inode = dax_destroy_inode,
 259        .drop_inode = generic_delete_inode,
 260};
 261
 262static struct dentry *dax_mount(struct file_system_type *fs_type,
 263                int flags, const char *dev_name, void *data)
 264{
 265        return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
 266}
 267
 268static struct file_system_type dax_fs_type = {
 269        .name = "dax",
 270        .mount = dax_mount,
 271        .kill_sb = kill_anon_super,
 272};
 273
 274static int dax_test(struct inode *inode, void *data)
 275{
 276        dev_t devt = *(dev_t *) data;
 277
 278        return inode->i_rdev == devt;
 279}
 280
 281static int dax_set(struct inode *inode, void *data)
 282{
 283        dev_t devt = *(dev_t *) data;
 284
 285        inode->i_rdev = devt;
 286        return 0;
 287}
 288
 289static struct dax_device *dax_dev_get(dev_t devt)
 290{
 291        struct dax_device *dax_dev;
 292        struct inode *inode;
 293
 294        inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
 295                        dax_test, dax_set, &devt);
 296
 297        if (!inode)
 298                return NULL;
 299
 300        dax_dev = to_dax_dev(inode);
 301        if (inode->i_state & I_NEW) {
 302                set_bit(DAXDEV_ALIVE, &dax_dev->flags);
 303                inode->i_cdev = &dax_dev->cdev;
 304                inode->i_mode = S_IFCHR;
 305                inode->i_flags = S_DAX;
 306                inode->i_mapping->backing_dev_info = &noop_backing_dev_info;
 307                mapping_set_gfp_mask(&inode->i_data, GFP_USER);
 308                unlock_new_inode(inode);
 309        }
 310
 311        return dax_dev;
 312}
 313
 314static void dax_add_host(struct dax_device *dax_dev, const char *host)
 315{
 316        int hash;
 317
 318        /*
 319         * Unconditionally init dax_dev since it's coming from a
 320         * non-zeroed slab cache
 321         */
 322        INIT_HLIST_NODE(&dax_dev->list);
 323        dax_dev->host = host;
 324        if (!host)
 325                return;
 326
 327        hash = dax_host_hash(host);
 328        spin_lock(&dax_host_lock);
 329        hlist_add_head(&dax_dev->list, &dax_host_list[hash]);
 330        spin_unlock(&dax_host_lock);
 331}
 332
 333struct dax_device *alloc_dax(void *private, const char *__host,
 334                const struct dax_operations *ops)
 335{
 336        struct dax_device *dax_dev;
 337        const char *host;
 338        dev_t devt;
 339        int minor;
 340
 341        host = kstrdup(__host, GFP_KERNEL);
 342        if (__host && !host)
 343                return NULL;
 344
 345        minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
 346        if (minor < 0)
 347                goto err_minor;
 348
 349        devt = MKDEV(MAJOR(dax_devt), minor);
 350        dax_dev = dax_dev_get(devt);
 351        if (!dax_dev)
 352                goto err_dev;
 353
 354        dax_add_host(dax_dev, host);
 355        dax_dev->ops = ops;
 356        dax_dev->private = private;
 357        return dax_dev;
 358
 359 err_dev:
 360        ida_simple_remove(&dax_minor_ida, minor);
 361 err_minor:
 362        kfree(host);
 363        return NULL;
 364}
 365EXPORT_SYMBOL_GPL(alloc_dax);
 366
 367void put_dax(struct dax_device *dax_dev)
 368{
 369        if (!dax_dev)
 370                return;
 371        iput(&dax_dev->inode);
 372}
 373EXPORT_SYMBOL_GPL(put_dax);
 374
 375/**
 376 * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
 377 * @host: alternate name for the device registered by a dax driver
 378 */
 379struct dax_device *dax_get_by_host(const char *host)
 380{
 381        struct dax_device *dax_dev, *found = NULL;
 382        int hash, id;
 383
 384        if (!host)
 385                return NULL;
 386
 387        hash = dax_host_hash(host);
 388
 389        id = dax_read_lock();
 390        spin_lock(&dax_host_lock);
 391        hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
 392                if (!dax_alive(dax_dev)
 393                                || strcmp(host, dax_dev->host) != 0)
 394                        continue;
 395
 396                if (igrab(&dax_dev->inode))
 397                        found = dax_dev;
 398                break;
 399        }
 400        spin_unlock(&dax_host_lock);
 401        dax_read_unlock(id);
 402
 403        return found;
 404}
 405EXPORT_SYMBOL_GPL(dax_get_by_host);
 406
 407/**
 408 * inode_dax: convert a public inode into its dax_dev
 409 * @inode: An inode with i_cdev pointing to a dax_dev
 410 *
 411 * Note this is not equivalent to to_dax_dev() which is for private
 412 * internal use where we know the inode filesystem type == dax_fs_type.
 413 */
 414struct dax_device *inode_dax(struct inode *inode)
 415{
 416        struct cdev *cdev = inode->i_cdev;
 417
 418        return container_of(cdev, struct dax_device, cdev);
 419}
 420EXPORT_SYMBOL_GPL(inode_dax);
 421
 422struct inode *dax_inode(struct dax_device *dax_dev)
 423{
 424        return &dax_dev->inode;
 425}
 426EXPORT_SYMBOL_GPL(dax_inode);
 427
 428void *dax_get_private(struct dax_device *dax_dev)
 429{
 430        return dax_dev->private;
 431}
 432EXPORT_SYMBOL_GPL(dax_get_private);
 433
 434static void init_once(void *_dax_dev)
 435{
 436        struct dax_device *dax_dev = _dax_dev;
 437        struct inode *inode = &dax_dev->inode;
 438
 439        memset(dax_dev, 0, sizeof(*dax_dev));
 440        inode_init_once(inode);
 441}
 442
 443static int __dax_fs_init(void)
 444{
 445        int rc;
 446
 447        dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0,
 448                        (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
 449                         SLAB_MEM_SPREAD),
 450                        init_once);
 451        if (!dax_cache)
 452                return -ENOMEM;
 453
 454        rc = register_filesystem(&dax_fs_type);
 455        if (rc)
 456                goto err_register_fs;
 457
 458        dax_mnt = kern_mount(&dax_fs_type);
 459        if (IS_ERR(dax_mnt)) {
 460                rc = PTR_ERR(dax_mnt);
 461                goto err_mount;
 462        }
 463        dax_superblock = dax_mnt->mnt_sb;
 464
 465        return 0;
 466
 467 err_mount:
 468        unregister_filesystem(&dax_fs_type);
 469 err_register_fs:
 470        kmem_cache_destroy(dax_cache);
 471
 472        return rc;
 473}
 474
 475static void __dax_fs_exit(void)
 476{
 477        kern_unmount(dax_mnt);
 478        unregister_filesystem(&dax_fs_type);
 479        kmem_cache_destroy(dax_cache);
 480}
 481
 482static int __init dax_fs_init(void)
 483{
 484        int rc;
 485
 486        rc = __dax_fs_init();
 487        if (rc)
 488                return rc;
 489
 490        rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
 491        if (rc)
 492                __dax_fs_exit();
 493        return rc;
 494}
 495
 496static void __exit dax_fs_exit(void)
 497{
 498        unregister_chrdev_region(dax_devt, MINORMASK+1);
 499        ida_destroy(&dax_minor_ida);
 500        __dax_fs_exit();
 501}
 502
 503MODULE_AUTHOR("Intel Corporation");
 504MODULE_LICENSE("GPL v2");
 505subsys_initcall(dax_fs_init);
 506module_exit(dax_fs_exit);
 507