linux/drivers/nvme/target/io-cmd-bdev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * NVMe I/O command implementation.
   4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
   5 */
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7#include <linux/blkdev.h>
   8#include <linux/blk-integrity.h>
   9#include <linux/module.h>
  10#include "nvmet.h"
  11
  12void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
  13{
  14        const struct queue_limits *ql = &bdev_get_queue(bdev)->limits;
  15        /* Number of logical blocks per physical block. */
  16        const u32 lpp = ql->physical_block_size / ql->logical_block_size;
  17        /* Logical blocks per physical block, 0's based. */
  18        const __le16 lpp0b = to0based(lpp);
  19
  20        /*
  21         * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
  22         * NAWUPF, and NACWU are defined for this namespace and should be
  23         * used by the host for this namespace instead of the AWUN, AWUPF,
  24         * and ACWU fields in the Identify Controller data structure. If
  25         * any of these fields are zero that means that the corresponding
  26         * field from the identify controller data structure should be used.
  27         */
  28        id->nsfeat |= 1 << 1;
  29        id->nawun = lpp0b;
  30        id->nawupf = lpp0b;
  31        id->nacwu = lpp0b;
  32
  33        /*
  34         * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
  35         * NOWS are defined for this namespace and should be used by
  36         * the host for I/O optimization.
  37         */
  38        id->nsfeat |= 1 << 4;
  39        /* NPWG = Namespace Preferred Write Granularity. 0's based */
  40        id->npwg = lpp0b;
  41        /* NPWA = Namespace Preferred Write Alignment. 0's based */
  42        id->npwa = id->npwg;
  43        /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
  44        id->npdg = to0based(ql->discard_granularity / ql->logical_block_size);
  45        /* NPDG = Namespace Preferred Deallocate Alignment */
  46        id->npda = id->npdg;
  47        /* NOWS = Namespace Optimal Write Size */
  48        id->nows = to0based(ql->io_opt / ql->logical_block_size);
  49}
  50
  51void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
  52{
  53        if (ns->bdev) {
  54                blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
  55                ns->bdev = NULL;
  56        }
  57}
  58
  59static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
  60{
  61        struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
  62
  63        if (bi) {
  64                ns->metadata_size = bi->tuple_size;
  65                if (bi->profile == &t10_pi_type1_crc)
  66                        ns->pi_type = NVME_NS_DPS_PI_TYPE1;
  67                else if (bi->profile == &t10_pi_type3_crc)
  68                        ns->pi_type = NVME_NS_DPS_PI_TYPE3;
  69                else
  70                        /* Unsupported metadata type */
  71                        ns->metadata_size = 0;
  72        }
  73}
  74
  75int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
  76{
  77        int ret;
  78
  79        ns->bdev = blkdev_get_by_path(ns->device_path,
  80                        FMODE_READ | FMODE_WRITE, NULL);
  81        if (IS_ERR(ns->bdev)) {
  82                ret = PTR_ERR(ns->bdev);
  83                if (ret != -ENOTBLK) {
  84                        pr_err("failed to open block device %s: (%ld)\n",
  85                                        ns->device_path, PTR_ERR(ns->bdev));
  86                }
  87                ns->bdev = NULL;
  88                return ret;
  89        }
  90        ns->size = bdev_nr_bytes(ns->bdev);
  91        ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
  92
  93        ns->pi_type = 0;
  94        ns->metadata_size = 0;
  95        if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
  96                nvmet_bdev_ns_enable_integrity(ns);
  97
  98        if (bdev_is_zoned(ns->bdev)) {
  99                if (!nvmet_bdev_zns_enable(ns)) {
 100                        nvmet_bdev_ns_disable(ns);
 101                        return -EINVAL;
 102                }
 103                ns->csi = NVME_CSI_ZNS;
 104        }
 105
 106        return 0;
 107}
 108
 109void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
 110{
 111        ns->size = bdev_nr_bytes(ns->bdev);
 112}
 113
 114u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
 115{
 116        u16 status = NVME_SC_SUCCESS;
 117
 118        if (likely(blk_sts == BLK_STS_OK))
 119                return status;
 120        /*
 121         * Right now there exists M : 1 mapping between block layer error
 122         * to the NVMe status code (see nvme_error_status()). For consistency,
 123         * when we reverse map we use most appropriate NVMe Status code from
 124         * the group of the NVMe staus codes used in the nvme_error_status().
 125         */
 126        switch (blk_sts) {
 127        case BLK_STS_NOSPC:
 128                status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
 129                req->error_loc = offsetof(struct nvme_rw_command, length);
 130                break;
 131        case BLK_STS_TARGET:
 132                status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
 133                req->error_loc = offsetof(struct nvme_rw_command, slba);
 134                break;
 135        case BLK_STS_NOTSUPP:
 136                req->error_loc = offsetof(struct nvme_common_command, opcode);
 137                switch (req->cmd->common.opcode) {
 138                case nvme_cmd_dsm:
 139                case nvme_cmd_write_zeroes:
 140                        status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
 141                        break;
 142                default:
 143                        status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 144                }
 145                break;
 146        case BLK_STS_MEDIUM:
 147                status = NVME_SC_ACCESS_DENIED;
 148                req->error_loc = offsetof(struct nvme_rw_command, nsid);
 149                break;
 150        case BLK_STS_IOERR:
 151        default:
 152                status = NVME_SC_INTERNAL | NVME_SC_DNR;
 153                req->error_loc = offsetof(struct nvme_common_command, opcode);
 154        }
 155
 156        switch (req->cmd->common.opcode) {
 157        case nvme_cmd_read:
 158        case nvme_cmd_write:
 159                req->error_slba = le64_to_cpu(req->cmd->rw.slba);
 160                break;
 161        case nvme_cmd_write_zeroes:
 162                req->error_slba =
 163                        le64_to_cpu(req->cmd->write_zeroes.slba);
 164                break;
 165        default:
 166                req->error_slba = 0;
 167        }
 168        return status;
 169}
 170
 171static void nvmet_bio_done(struct bio *bio)
 172{
 173        struct nvmet_req *req = bio->bi_private;
 174
 175        nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
 176        nvmet_req_bio_put(req, bio);
 177}
 178
 179#ifdef CONFIG_BLK_DEV_INTEGRITY
 180static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
 181                                struct sg_mapping_iter *miter)
 182{
 183        struct blk_integrity *bi;
 184        struct bio_integrity_payload *bip;
 185        int rc;
 186        size_t resid, len;
 187
 188        bi = bdev_get_integrity(req->ns->bdev);
 189        if (unlikely(!bi)) {
 190                pr_err("Unable to locate bio_integrity\n");
 191                return -ENODEV;
 192        }
 193
 194        bip = bio_integrity_alloc(bio, GFP_NOIO,
 195                                        bio_max_segs(req->metadata_sg_cnt));
 196        if (IS_ERR(bip)) {
 197                pr_err("Unable to allocate bio_integrity_payload\n");
 198                return PTR_ERR(bip);
 199        }
 200
 201        bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
 202        /* virtual start sector must be in integrity interval units */
 203        bip_set_seed(bip, bio->bi_iter.bi_sector >>
 204                     (bi->interval_exp - SECTOR_SHIFT));
 205
 206        resid = bip->bip_iter.bi_size;
 207        while (resid > 0 && sg_miter_next(miter)) {
 208                len = min_t(size_t, miter->length, resid);
 209                rc = bio_integrity_add_page(bio, miter->page, len,
 210                                            offset_in_page(miter->addr));
 211                if (unlikely(rc != len)) {
 212                        pr_err("bio_integrity_add_page() failed; %d\n", rc);
 213                        sg_miter_stop(miter);
 214                        return -ENOMEM;
 215                }
 216
 217                resid -= len;
 218                if (len < miter->length)
 219                        miter->consumed -= miter->length - len;
 220        }
 221        sg_miter_stop(miter);
 222
 223        return 0;
 224}
 225#else
 226static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
 227                                struct sg_mapping_iter *miter)
 228{
 229        return -EINVAL;
 230}
 231#endif /* CONFIG_BLK_DEV_INTEGRITY */
 232
 233static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 234{
 235        unsigned int sg_cnt = req->sg_cnt;
 236        struct bio *bio;
 237        struct scatterlist *sg;
 238        struct blk_plug plug;
 239        sector_t sector;
 240        int op, i, rc;
 241        struct sg_mapping_iter prot_miter;
 242        unsigned int iter_flags;
 243        unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
 244
 245        if (!nvmet_check_transfer_len(req, total_len))
 246                return;
 247
 248        if (!req->sg_cnt) {
 249                nvmet_req_complete(req, 0);
 250                return;
 251        }
 252
 253        if (req->cmd->rw.opcode == nvme_cmd_write) {
 254                op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
 255                if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
 256                        op |= REQ_FUA;
 257                iter_flags = SG_MITER_TO_SG;
 258        } else {
 259                op = REQ_OP_READ;
 260                iter_flags = SG_MITER_FROM_SG;
 261        }
 262
 263        if (is_pci_p2pdma_page(sg_page(req->sg)))
 264                op |= REQ_NOMERGE;
 265
 266        sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
 267
 268        if (nvmet_use_inline_bvec(req)) {
 269                bio = &req->b.inline_bio;
 270                bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
 271        } else {
 272                bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
 273        }
 274        bio_set_dev(bio, req->ns->bdev);
 275        bio->bi_iter.bi_sector = sector;
 276        bio->bi_private = req;
 277        bio->bi_end_io = nvmet_bio_done;
 278        bio->bi_opf = op;
 279
 280        blk_start_plug(&plug);
 281        if (req->metadata_len)
 282                sg_miter_start(&prot_miter, req->metadata_sg,
 283                               req->metadata_sg_cnt, iter_flags);
 284
 285        for_each_sg(req->sg, sg, req->sg_cnt, i) {
 286                while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
 287                                != sg->length) {
 288                        struct bio *prev = bio;
 289
 290                        if (req->metadata_len) {
 291                                rc = nvmet_bdev_alloc_bip(req, bio,
 292                                                          &prot_miter);
 293                                if (unlikely(rc)) {
 294                                        bio_io_error(bio);
 295                                        return;
 296                                }
 297                        }
 298
 299                        bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
 300                        bio_set_dev(bio, req->ns->bdev);
 301                        bio->bi_iter.bi_sector = sector;
 302                        bio->bi_opf = op;
 303
 304                        bio_chain(bio, prev);
 305                        submit_bio(prev);
 306                }
 307
 308                sector += sg->length >> 9;
 309                sg_cnt--;
 310        }
 311
 312        if (req->metadata_len) {
 313                rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
 314                if (unlikely(rc)) {
 315                        bio_io_error(bio);
 316                        return;
 317                }
 318        }
 319
 320        submit_bio(bio);
 321        blk_finish_plug(&plug);
 322}
 323
 324static void nvmet_bdev_execute_flush(struct nvmet_req *req)
 325{
 326        struct bio *bio = &req->b.inline_bio;
 327
 328        if (!nvmet_check_transfer_len(req, 0))
 329                return;
 330
 331        bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
 332        bio_set_dev(bio, req->ns->bdev);
 333        bio->bi_private = req;
 334        bio->bi_end_io = nvmet_bio_done;
 335        bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 336
 337        submit_bio(bio);
 338}
 339
 340u16 nvmet_bdev_flush(struct nvmet_req *req)
 341{
 342        if (blkdev_issue_flush(req->ns->bdev))
 343                return NVME_SC_INTERNAL | NVME_SC_DNR;
 344        return 0;
 345}
 346
 347static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
 348                struct nvme_dsm_range *range, struct bio **bio)
 349{
 350        struct nvmet_ns *ns = req->ns;
 351        int ret;
 352
 353        ret = __blkdev_issue_discard(ns->bdev,
 354                        nvmet_lba_to_sect(ns, range->slba),
 355                        le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
 356                        GFP_KERNEL, 0, bio);
 357        if (ret && ret != -EOPNOTSUPP) {
 358                req->error_slba = le64_to_cpu(range->slba);
 359                return errno_to_nvme_status(req, ret);
 360        }
 361        return NVME_SC_SUCCESS;
 362}
 363
 364static void nvmet_bdev_execute_discard(struct nvmet_req *req)
 365{
 366        struct nvme_dsm_range range;
 367        struct bio *bio = NULL;
 368        int i;
 369        u16 status;
 370
 371        for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
 372                status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
 373                                sizeof(range));
 374                if (status)
 375                        break;
 376
 377                status = nvmet_bdev_discard_range(req, &range, &bio);
 378                if (status)
 379                        break;
 380        }
 381
 382        if (bio) {
 383                bio->bi_private = req;
 384                bio->bi_end_io = nvmet_bio_done;
 385                if (status)
 386                        bio_io_error(bio);
 387                else
 388                        submit_bio(bio);
 389        } else {
 390                nvmet_req_complete(req, status);
 391        }
 392}
 393
 394static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
 395{
 396        if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
 397                return;
 398
 399        switch (le32_to_cpu(req->cmd->dsm.attributes)) {
 400        case NVME_DSMGMT_AD:
 401                nvmet_bdev_execute_discard(req);
 402                return;
 403        case NVME_DSMGMT_IDR:
 404        case NVME_DSMGMT_IDW:
 405        default:
 406                /* Not supported yet */
 407                nvmet_req_complete(req, 0);
 408                return;
 409        }
 410}
 411
 412static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
 413{
 414        struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
 415        struct bio *bio = NULL;
 416        sector_t sector;
 417        sector_t nr_sector;
 418        int ret;
 419
 420        if (!nvmet_check_transfer_len(req, 0))
 421                return;
 422
 423        sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba);
 424        nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
 425                (req->ns->blksize_shift - 9));
 426
 427        ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
 428                        GFP_KERNEL, &bio, 0);
 429        if (bio) {
 430                bio->bi_private = req;
 431                bio->bi_end_io = nvmet_bio_done;
 432                submit_bio(bio);
 433        } else {
 434                nvmet_req_complete(req, errno_to_nvme_status(req, ret));
 435        }
 436}
 437
 438u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
 439{
 440        switch (req->cmd->common.opcode) {
 441        case nvme_cmd_read:
 442        case nvme_cmd_write:
 443                req->execute = nvmet_bdev_execute_rw;
 444                if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
 445                        req->metadata_len = nvmet_rw_metadata_len(req);
 446                return 0;
 447        case nvme_cmd_flush:
 448                req->execute = nvmet_bdev_execute_flush;
 449                return 0;
 450        case nvme_cmd_dsm:
 451                req->execute = nvmet_bdev_execute_dsm;
 452                return 0;
 453        case nvme_cmd_write_zeroes:
 454                req->execute = nvmet_bdev_execute_write_zeroes;
 455                return 0;
 456        default:
 457                return nvmet_report_invalid_opcode(req);
 458        }
 459}
 460