linux/drivers/nvme/host/nvme.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * Copyright (c) 2011-2014, Intel Corporation.
   4 */
   5
   6#ifndef _NVME_H
   7#define _NVME_H
   8
   9#include <linux/nvme.h>
  10#include <linux/cdev.h>
  11#include <linux/pci.h>
  12#include <linux/kref.h>
  13#include <linux/blk-mq.h>
  14#include <linux/lightnvm.h>
  15#include <linux/sed-opal.h>
  16#include <linux/fault-inject.h>
  17#include <linux/rcupdate.h>
  18
  19extern unsigned int nvme_io_timeout;
  20#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
  21
  22extern unsigned int admin_timeout;
  23#define ADMIN_TIMEOUT   (admin_timeout * HZ)
  24
  25#define NVME_DEFAULT_KATO       5
  26#define NVME_KATO_GRACE         10
  27
  28extern struct workqueue_struct *nvme_wq;
  29extern struct workqueue_struct *nvme_reset_wq;
  30extern struct workqueue_struct *nvme_delete_wq;
  31
  32enum {
  33        NVME_NS_LBA             = 0,
  34        NVME_NS_LIGHTNVM        = 1,
  35};
  36
  37/*
  38 * List of workarounds for devices that required behavior not specified in
  39 * the standard.
  40 */
  41enum nvme_quirks {
  42        /*
  43         * Prefers I/O aligned to a stripe size specified in a vendor
  44         * specific Identify field.
  45         */
  46        NVME_QUIRK_STRIPE_SIZE                  = (1 << 0),
  47
  48        /*
  49         * The controller doesn't handle Identify value others than 0 or 1
  50         * correctly.
  51         */
  52        NVME_QUIRK_IDENTIFY_CNS                 = (1 << 1),
  53
  54        /*
  55         * The controller deterministically returns O's on reads to
  56         * logical blocks that deallocate was called on.
  57         */
  58        NVME_QUIRK_DEALLOCATE_ZEROES            = (1 << 2),
  59
  60        /*
  61         * The controller needs a delay before starts checking the device
  62         * readiness, which is done by reading the NVME_CSTS_RDY bit.
  63         */
  64        NVME_QUIRK_DELAY_BEFORE_CHK_RDY         = (1 << 3),
  65
  66        /*
  67         * APST should not be used.
  68         */
  69        NVME_QUIRK_NO_APST                      = (1 << 4),
  70
  71        /*
  72         * The deepest sleep state should not be used.
  73         */
  74        NVME_QUIRK_NO_DEEPEST_PS                = (1 << 5),
  75
  76        /*
  77         * Supports the LighNVM command set if indicated in vs[1].
  78         */
  79        NVME_QUIRK_LIGHTNVM                     = (1 << 6),
  80
  81        /*
  82         * Set MEDIUM priority on SQ creation
  83         */
  84        NVME_QUIRK_MEDIUM_PRIO_SQ               = (1 << 7),
  85
  86        /*
  87         * Ignore device provided subnqn.
  88         */
  89        NVME_QUIRK_IGNORE_DEV_SUBNQN            = (1 << 8),
  90
  91        /*
  92         * Broken Write Zeroes.
  93         */
  94        NVME_QUIRK_DISABLE_WRITE_ZEROES         = (1 << 9),
  95};
  96
  97/*
  98 * Common request structure for NVMe passthrough.  All drivers must have
  99 * this structure as the first member of their request-private data.
 100 */
 101struct nvme_request {
 102        struct nvme_command     *cmd;
 103        union nvme_result       result;
 104        u8                      retries;
 105        u8                      flags;
 106        u16                     status;
 107        struct nvme_ctrl        *ctrl;
 108};
 109
 110/*
 111 * Mark a bio as coming in through the mpath node.
 112 */
 113#define REQ_NVME_MPATH          REQ_DRV
 114
 115enum {
 116        NVME_REQ_CANCELLED              = (1 << 0),
 117        NVME_REQ_USERCMD                = (1 << 1),
 118};
 119
 120static inline struct nvme_request *nvme_req(struct request *req)
 121{
 122        return blk_mq_rq_to_pdu(req);
 123}
 124
 125static inline u16 nvme_req_qid(struct request *req)
 126{
 127        if (!req->rq_disk)
 128                return 0;
 129        return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
 130}
 131
 132/* The below value is the specific amount of delay needed before checking
 133 * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the
 134 * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
 135 * found empirically.
 136 */
 137#define NVME_QUIRK_DELAY_AMOUNT         2300
 138
 139enum nvme_ctrl_state {
 140        NVME_CTRL_NEW,
 141        NVME_CTRL_LIVE,
 142        NVME_CTRL_ADMIN_ONLY,    /* Only admin queue live */
 143        NVME_CTRL_RESETTING,
 144        NVME_CTRL_CONNECTING,
 145        NVME_CTRL_DELETING,
 146        NVME_CTRL_DEAD,
 147};
 148
 149struct nvme_ctrl {
 150        bool comp_seen;
 151        enum nvme_ctrl_state state;
 152        bool identified;
 153        spinlock_t lock;
 154        struct mutex scan_lock;
 155        const struct nvme_ctrl_ops *ops;
 156        struct request_queue *admin_q;
 157        struct request_queue *connect_q;
 158        struct device *dev;
 159        int instance;
 160        int numa_node;
 161        struct blk_mq_tag_set *tagset;
 162        struct blk_mq_tag_set *admin_tagset;
 163        struct list_head namespaces;
 164        struct rw_semaphore namespaces_rwsem;
 165        struct device ctrl_device;
 166        struct device *device;  /* char device */
 167        struct cdev cdev;
 168        struct work_struct reset_work;
 169        struct work_struct delete_work;
 170
 171        struct nvme_subsystem *subsys;
 172        struct list_head subsys_entry;
 173
 174        struct opal_dev *opal_dev;
 175
 176        char name[12];
 177        u16 cntlid;
 178
 179        u32 ctrl_config;
 180        u16 mtfa;
 181        u32 queue_count;
 182
 183        u64 cap;
 184        u32 page_size;
 185        u32 max_hw_sectors;
 186        u32 max_segments;
 187        u16 crdt[3];
 188        u16 oncs;
 189        u16 oacs;
 190        u16 nssa;
 191        u16 nr_streams;
 192        u32 max_namespaces;
 193        atomic_t abort_limit;
 194        u8 vwc;
 195        u32 vs;
 196        u32 sgls;
 197        u16 kas;
 198        u8 npss;
 199        u8 apsta;
 200        u32 oaes;
 201        u32 aen_result;
 202        u32 ctratt;
 203        unsigned int shutdown_timeout;
 204        unsigned int kato;
 205        bool subsystem;
 206        unsigned long quirks;
 207        struct nvme_id_power_state psd[32];
 208        struct nvme_effects_log *effects;
 209        struct work_struct scan_work;
 210        struct work_struct async_event_work;
 211        struct delayed_work ka_work;
 212        struct nvme_command ka_cmd;
 213        struct work_struct fw_act_work;
 214        unsigned long events;
 215
 216#ifdef CONFIG_NVME_MULTIPATH
 217        /* asymmetric namespace access: */
 218        u8 anacap;
 219        u8 anatt;
 220        u32 anagrpmax;
 221        u32 nanagrpid;
 222        struct mutex ana_lock;
 223        struct nvme_ana_rsp_hdr *ana_log_buf;
 224        size_t ana_log_size;
 225        struct timer_list anatt_timer;
 226        struct work_struct ana_work;
 227#endif
 228
 229        /* Power saving configuration */
 230        u64 ps_max_latency_us;
 231        bool apst_enabled;
 232
 233        /* PCIe only: */
 234        u32 hmpre;
 235        u32 hmmin;
 236        u32 hmminds;
 237        u16 hmmaxd;
 238
 239        /* Fabrics only */
 240        u16 sqsize;
 241        u32 ioccsz;
 242        u32 iorcsz;
 243        u16 icdoff;
 244        u16 maxcmd;
 245        int nr_reconnects;
 246        struct nvmf_ctrl_options *opts;
 247
 248        struct page *discard_page;
 249        unsigned long discard_page_busy;
 250};
 251
 252enum nvme_iopolicy {
 253        NVME_IOPOLICY_NUMA,
 254        NVME_IOPOLICY_RR,
 255};
 256
 257struct nvme_subsystem {
 258        int                     instance;
 259        struct device           dev;
 260        /*
 261         * Because we unregister the device on the last put we need
 262         * a separate refcount.
 263         */
 264        struct kref             ref;
 265        struct list_head        entry;
 266        struct mutex            lock;
 267        struct list_head        ctrls;
 268        struct list_head        nsheads;
 269        char                    subnqn[NVMF_NQN_SIZE];
 270        char                    serial[20];
 271        char                    model[40];
 272        char                    firmware_rev[8];
 273        u8                      cmic;
 274        u16                     vendor_id;
 275        struct ida              ns_ida;
 276#ifdef CONFIG_NVME_MULTIPATH
 277        enum nvme_iopolicy      iopolicy;
 278#endif
 279};
 280
 281/*
 282 * Container structure for uniqueue namespace identifiers.
 283 */
 284struct nvme_ns_ids {
 285        u8      eui64[8];
 286        u8      nguid[16];
 287        uuid_t  uuid;
 288};
 289
 290/*
 291 * Anchor structure for namespaces.  There is one for each namespace in a
 292 * NVMe subsystem that any of our controllers can see, and the namespace
 293 * structure for each controller is chained of it.  For private namespaces
 294 * there is a 1:1 relation to our namespace structures, that is ->list
 295 * only ever has a single entry for private namespaces.
 296 */
 297struct nvme_ns_head {
 298        struct list_head        list;
 299        struct srcu_struct      srcu;
 300        struct nvme_subsystem   *subsys;
 301        unsigned                ns_id;
 302        struct nvme_ns_ids      ids;
 303        struct list_head        entry;
 304        struct kref             ref;
 305        int                     instance;
 306#ifdef CONFIG_NVME_MULTIPATH
 307        struct gendisk          *disk;
 308        struct bio_list         requeue_list;
 309        spinlock_t              requeue_lock;
 310        struct work_struct      requeue_work;
 311        struct mutex            lock;
 312        struct nvme_ns __rcu    *current_path[];
 313#endif
 314};
 315
 316#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 317struct nvme_fault_inject {
 318        struct fault_attr attr;
 319        struct dentry *parent;
 320        bool dont_retry;        /* DNR, do not retry */
 321        u16 status;             /* status code */
 322};
 323#endif
 324
 325struct nvme_ns {
 326        struct list_head list;
 327
 328        struct nvme_ctrl *ctrl;
 329        struct request_queue *queue;
 330        struct gendisk *disk;
 331#ifdef CONFIG_NVME_MULTIPATH
 332        enum nvme_ana_state ana_state;
 333        u32 ana_grpid;
 334#endif
 335        struct list_head siblings;
 336        struct nvm_dev *ndev;
 337        struct kref kref;
 338        struct nvme_ns_head *head;
 339
 340        int lba_shift;
 341        u16 ms;
 342        u16 sgs;
 343        u32 sws;
 344        bool ext;
 345        u8 pi_type;
 346        unsigned long flags;
 347#define NVME_NS_REMOVING        0
 348#define NVME_NS_DEAD            1
 349#define NVME_NS_ANA_PENDING     2
 350        u16 noiob;
 351
 352#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 353        struct nvme_fault_inject fault_inject;
 354#endif
 355
 356};
 357
 358struct nvme_ctrl_ops {
 359        const char *name;
 360        struct module *module;
 361        unsigned int flags;
 362#define NVME_F_FABRICS                  (1 << 0)
 363#define NVME_F_METADATA_SUPPORTED       (1 << 1)
 364#define NVME_F_PCI_P2PDMA               (1 << 2)
 365        int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 366        int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
 367        int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
 368        void (*free_ctrl)(struct nvme_ctrl *ctrl);
 369        void (*submit_async_event)(struct nvme_ctrl *ctrl);
 370        void (*delete_ctrl)(struct nvme_ctrl *ctrl);
 371        int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
 372};
 373
 374#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 375void nvme_fault_inject_init(struct nvme_ns *ns);
 376void nvme_fault_inject_fini(struct nvme_ns *ns);
 377void nvme_should_fail(struct request *req);
 378#else
 379static inline void nvme_fault_inject_init(struct nvme_ns *ns) {}
 380static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {}
 381static inline void nvme_should_fail(struct request *req) {}
 382#endif
 383
 384static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
 385{
 386        if (!ctrl->subsystem)
 387                return -ENOTTY;
 388        return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
 389}
 390
 391static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 392{
 393        return (sector >> (ns->lba_shift - 9));
 394}
 395
 396static inline void nvme_end_request(struct request *req, __le16 status,
 397                union nvme_result result)
 398{
 399        struct nvme_request *rq = nvme_req(req);
 400
 401        rq->status = le16_to_cpu(status) >> 1;
 402        rq->result = result;
 403        /* inject error when permitted by fault injection framework */
 404        nvme_should_fail(req);
 405        blk_mq_complete_request(req);
 406}
 407
 408static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
 409{
 410        get_device(ctrl->device);
 411}
 412
 413static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
 414{
 415        put_device(ctrl->device);
 416}
 417
 418void nvme_complete_rq(struct request *req);
 419bool nvme_cancel_request(struct request *req, void *data, bool reserved);
 420bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 421                enum nvme_ctrl_state new_state);
 422int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
 423int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
 424int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
 425int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 426                const struct nvme_ctrl_ops *ops, unsigned long quirks);
 427void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
 428void nvme_start_ctrl(struct nvme_ctrl *ctrl);
 429void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
 430void nvme_put_ctrl(struct nvme_ctrl *ctrl);
 431int nvme_init_identify(struct nvme_ctrl *ctrl);
 432
 433void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
 434
 435int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
 436                bool send);
 437
 438void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 439                volatile union nvme_result *res);
 440
 441void nvme_stop_queues(struct nvme_ctrl *ctrl);
 442void nvme_start_queues(struct nvme_ctrl *ctrl);
 443void nvme_kill_queues(struct nvme_ctrl *ctrl);
 444void nvme_sync_queues(struct nvme_ctrl *ctrl);
 445void nvme_unfreeze(struct nvme_ctrl *ctrl);
 446void nvme_wait_freeze(struct nvme_ctrl *ctrl);
 447void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
 448void nvme_start_freeze(struct nvme_ctrl *ctrl);
 449
 450#define NVME_QID_ANY -1
 451struct request *nvme_alloc_request(struct request_queue *q,
 452                struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
 453void nvme_cleanup_cmd(struct request *req);
 454blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 455                struct nvme_command *cmd);
 456int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 457                void *buf, unsigned bufflen);
 458int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 459                union nvme_result *result, void *buffer, unsigned bufflen,
 460                unsigned timeout, int qid, int at_head,
 461                blk_mq_req_flags_t flags, bool poll);
 462int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 463void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
 464int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
 465int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
 466int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
 467
 468int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
 469                void *log, size_t size, u64 offset);
 470
 471extern const struct attribute_group *nvme_ns_id_attr_groups[];
 472extern const struct block_device_operations nvme_ns_head_ops;
 473
 474#ifdef CONFIG_NVME_MULTIPATH
 475bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
 476void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
 477                        struct nvme_ctrl *ctrl, int *flags);
 478void nvme_failover_req(struct request *req);
 479void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 480int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
 481void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
 482void nvme_mpath_remove_disk(struct nvme_ns_head *head);
 483int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
 484void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 485void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 486void nvme_mpath_clear_current_path(struct nvme_ns *ns);
 487struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
 488
 489static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 490{
 491        struct nvme_ns_head *head = ns->head;
 492
 493        if (head->disk && list_empty(&head->list))
 494                kblockd_schedule_work(&head->requeue_work);
 495}
 496
 497extern struct device_attribute dev_attr_ana_grpid;
 498extern struct device_attribute dev_attr_ana_state;
 499extern struct device_attribute subsys_attr_iopolicy;
 500
 501#else
 502static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
 503{
 504        return false;
 505}
 506/*
 507 * Without the multipath code enabled, multiple controller per subsystems are
 508 * visible as devices and thus we cannot use the subsystem instance.
 509 */
 510static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
 511                                      struct nvme_ctrl *ctrl, int *flags)
 512{
 513        sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
 514}
 515
 516static inline void nvme_failover_req(struct request *req)
 517{
 518}
 519static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
 520{
 521}
 522static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
 523                struct nvme_ns_head *head)
 524{
 525        return 0;
 526}
 527static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
 528                struct nvme_id_ns *id)
 529{
 530}
 531static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 532{
 533}
 534static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 535{
 536}
 537static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 538{
 539}
 540static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
 541                struct nvme_id_ctrl *id)
 542{
 543        if (ctrl->subsys->cmic & (1 << 3))
 544                dev_warn(ctrl->device,
 545"Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n");
 546        return 0;
 547}
 548static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
 549{
 550}
 551static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
 552{
 553}
 554#endif /* CONFIG_NVME_MULTIPATH */
 555
 556#ifdef CONFIG_NVM
 557int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
 558void nvme_nvm_unregister(struct nvme_ns *ns);
 559extern const struct attribute_group nvme_nvm_attr_group;
 560int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
 561#else
 562static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
 563                                    int node)
 564{
 565        return 0;
 566}
 567
 568static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
 569static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
 570                                                        unsigned long arg)
 571{
 572        return -ENOTTY;
 573}
 574#endif /* CONFIG_NVM */
 575
 576static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
 577{
 578        return dev_to_disk(dev)->private_data;
 579}
 580
 581#endif /* _NVME_H */
 582