uboot/drivers/nvme/nvme.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2017 NXP Semiconductors
   3 * Copyright (C) 2017 Bin Meng <bmeng.cn@gmail.com>
   4 *
   5 * SPDX-License-Identifier:     GPL-2.0+
   6 */
   7
   8#include <common.h>
   9#include <dm.h>
  10#include <errno.h>
  11#include <memalign.h>
  12#include <pci.h>
  13#include <dm/device-internal.h>
  14#include "nvme.h"
  15
  16#define NVME_Q_DEPTH            2
  17#define NVME_AQ_DEPTH           2
  18#define NVME_SQ_SIZE(depth)     (depth * sizeof(struct nvme_command))
  19#define NVME_CQ_SIZE(depth)     (depth * sizeof(struct nvme_completion))
  20#define ADMIN_TIMEOUT           60
  21#define IO_TIMEOUT              30
  22#define MAX_PRP_POOL            512
  23
  24enum nvme_queue_id {
  25        NVME_ADMIN_Q,
  26        NVME_IO_Q,
  27        NVME_Q_NUM,
  28};
  29
  30/*
  31 * An NVM Express queue. Each device has at least two (one for admin
  32 * commands and one for I/O commands).
  33 */
  34struct nvme_queue {
  35        struct nvme_dev *dev;
  36        struct nvme_command *sq_cmds;
  37        struct nvme_completion *cqes;
  38        wait_queue_head_t sq_full;
  39        u32 __iomem *q_db;
  40        u16 q_depth;
  41        s16 cq_vector;
  42        u16 sq_head;
  43        u16 sq_tail;
  44        u16 cq_head;
  45        u16 qid;
  46        u8 cq_phase;
  47        u8 cqe_seen;
  48        unsigned long cmdid_data[];
  49};
  50
  51static int nvme_wait_ready(struct nvme_dev *dev, bool enabled)
  52{
  53        u32 bit = enabled ? NVME_CSTS_RDY : 0;
  54        int timeout;
  55        ulong start;
  56
  57        /* Timeout field in the CAP register is in 500 millisecond units */
  58        timeout = NVME_CAP_TIMEOUT(dev->cap) * 500;
  59
  60        start = get_timer(0);
  61        while (get_timer(start) < timeout) {
  62                if ((readl(&dev->bar->csts) & NVME_CSTS_RDY) == bit)
  63                        return 0;
  64        }
  65
  66        return -ETIME;
  67}
  68
  69static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
  70                           int total_len, u64 dma_addr)
  71{
  72        u32 page_size = dev->page_size;
  73        int offset = dma_addr & (page_size - 1);
  74        u64 *prp_pool;
  75        int length = total_len;
  76        int i, nprps;
  77        length -= (page_size - offset);
  78
  79        if (length <= 0) {
  80                *prp2 = 0;
  81                return 0;
  82        }
  83
  84        if (length)
  85                dma_addr += (page_size - offset);
  86
  87        if (length <= page_size) {
  88                *prp2 = dma_addr;
  89                return 0;
  90        }
  91
  92        nprps = DIV_ROUND_UP(length, page_size);
  93
  94        if (nprps > dev->prp_entry_num) {
  95                free(dev->prp_pool);
  96                dev->prp_pool = malloc(nprps << 3);
  97                if (!dev->prp_pool) {
  98                        printf("Error: malloc prp_pool fail\n");
  99                        return -ENOMEM;
 100                }
 101                dev->prp_entry_num = nprps;
 102        }
 103
 104        prp_pool = dev->prp_pool;
 105        i = 0;
 106        while (nprps) {
 107                if (i == ((page_size >> 3) - 1)) {
 108                        *(prp_pool + i) = cpu_to_le64((ulong)prp_pool +
 109                                        page_size);
 110                        i = 0;
 111                        prp_pool += page_size;
 112                }
 113                *(prp_pool + i++) = cpu_to_le64(dma_addr);
 114                dma_addr += page_size;
 115                nprps--;
 116        }
 117        *prp2 = (ulong)dev->prp_pool;
 118
 119        return 0;
 120}
 121
 122static __le16 nvme_get_cmd_id(void)
 123{
 124        static unsigned short cmdid;
 125
 126        return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0);
 127}
 128
 129static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index)
 130{
 131        u64 start = (ulong)&nvmeq->cqes[index];
 132        u64 stop = start + sizeof(struct nvme_completion);
 133
 134        invalidate_dcache_range(start, stop);
 135
 136        return le16_to_cpu(readw(&(nvmeq->cqes[index].status)));
 137}
 138
 139/**
 140 * nvme_submit_cmd() - copy a command into a queue and ring the doorbell
 141 *
 142 * @nvmeq:      The queue to use
 143 * @cmd:        The command to send
 144 */
 145static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
 146{
 147        u16 tail = nvmeq->sq_tail;
 148
 149        memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
 150        flush_dcache_range((ulong)&nvmeq->sq_cmds[tail],
 151                           (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd));
 152
 153        if (++tail == nvmeq->q_depth)
 154                tail = 0;
 155        writel(tail, nvmeq->q_db);
 156        nvmeq->sq_tail = tail;
 157}
 158
 159static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq,
 160                                struct nvme_command *cmd,
 161                                u32 *result, unsigned timeout)
 162{
 163        u16 head = nvmeq->cq_head;
 164        u16 phase = nvmeq->cq_phase;
 165        u16 status;
 166        ulong start_time;
 167        ulong timeout_us = timeout * 100000;
 168
 169        cmd->common.command_id = nvme_get_cmd_id();
 170        nvme_submit_cmd(nvmeq, cmd);
 171
 172        start_time = timer_get_us();
 173
 174        for (;;) {
 175                status = nvme_read_completion_status(nvmeq, head);
 176                if ((status & 0x01) == phase)
 177                        break;
 178                if (timeout_us > 0 && (timer_get_us() - start_time)
 179                    >= timeout_us)
 180                        return -ETIMEDOUT;
 181        }
 182
 183        status >>= 1;
 184        if (status) {
 185                printf("ERROR: status = %x, phase = %d, head = %d\n",
 186                       status, phase, head);
 187                status = 0;
 188                if (++head == nvmeq->q_depth) {
 189                        head = 0;
 190                        phase = !phase;
 191                }
 192                writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
 193                nvmeq->cq_head = head;
 194                nvmeq->cq_phase = phase;
 195
 196                return -EIO;
 197        }
 198
 199        if (result)
 200                *result = le32_to_cpu(readl(&(nvmeq->cqes[head].result)));
 201
 202        if (++head == nvmeq->q_depth) {
 203                head = 0;
 204                phase = !phase;
 205        }
 206        writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
 207        nvmeq->cq_head = head;
 208        nvmeq->cq_phase = phase;
 209
 210        return status;
 211}
 212
 213static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
 214                                 u32 *result)
 215{
 216        return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd,
 217                                    result, ADMIN_TIMEOUT);
 218}
 219
 220static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev,
 221                                           int qid, int depth)
 222{
 223        struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq));
 224        if (!nvmeq)
 225                return NULL;
 226        memset(nvmeq, 0, sizeof(*nvmeq));
 227
 228        nvmeq->cqes = (void *)memalign(4096, NVME_CQ_SIZE(depth));
 229        if (!nvmeq->cqes)
 230                goto free_nvmeq;
 231        memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth));
 232
 233        nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth));
 234        if (!nvmeq->sq_cmds)
 235                goto free_queue;
 236        memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth));
 237
 238        nvmeq->dev = dev;
 239
 240        nvmeq->cq_head = 0;
 241        nvmeq->cq_phase = 1;
 242        nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 243        nvmeq->q_depth = depth;
 244        nvmeq->qid = qid;
 245        dev->queue_count++;
 246        dev->queues[qid] = nvmeq;
 247
 248        return nvmeq;
 249
 250 free_queue:
 251        free((void *)nvmeq->cqes);
 252 free_nvmeq:
 253        free(nvmeq);
 254
 255        return NULL;
 256}
 257
 258static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 259{
 260        struct nvme_command c;
 261
 262        memset(&c, 0, sizeof(c));
 263        c.delete_queue.opcode = opcode;
 264        c.delete_queue.qid = cpu_to_le16(id);
 265
 266        return nvme_submit_admin_cmd(dev, &c, NULL);
 267}
 268
 269static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid)
 270{
 271        return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid);
 272}
 273
 274static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid)
 275{
 276        return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid);
 277}
 278
 279static int nvme_enable_ctrl(struct nvme_dev *dev)
 280{
 281        dev->ctrl_config &= ~NVME_CC_SHN_MASK;
 282        dev->ctrl_config |= NVME_CC_ENABLE;
 283        writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc);
 284
 285        return nvme_wait_ready(dev, true);
 286}
 287
 288static int nvme_disable_ctrl(struct nvme_dev *dev)
 289{
 290        dev->ctrl_config &= ~NVME_CC_SHN_MASK;
 291        dev->ctrl_config &= ~NVME_CC_ENABLE;
 292        writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc);
 293
 294        return nvme_wait_ready(dev, false);
 295}
 296
 297static void nvme_free_queue(struct nvme_queue *nvmeq)
 298{
 299        free((void *)nvmeq->cqes);
 300        free(nvmeq->sq_cmds);
 301        free(nvmeq);
 302}
 303
 304static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 305{
 306        int i;
 307
 308        for (i = dev->queue_count - 1; i >= lowest; i--) {
 309                struct nvme_queue *nvmeq = dev->queues[i];
 310                dev->queue_count--;
 311                dev->queues[i] = NULL;
 312                nvme_free_queue(nvmeq);
 313        }
 314}
 315
 316static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
 317{
 318        struct nvme_dev *dev = nvmeq->dev;
 319
 320        nvmeq->sq_tail = 0;
 321        nvmeq->cq_head = 0;
 322        nvmeq->cq_phase = 1;
 323        nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 324        memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth));
 325        flush_dcache_range((ulong)nvmeq->cqes,
 326                           (ulong)nvmeq->cqes + NVME_CQ_SIZE(nvmeq->q_depth));
 327        dev->online_queues++;
 328}
 329
 330static int nvme_configure_admin_queue(struct nvme_dev *dev)
 331{
 332        int result;
 333        u32 aqa;
 334        u64 cap = dev->cap;
 335        struct nvme_queue *nvmeq;
 336        /* most architectures use 4KB as the page size */
 337        unsigned page_shift = 12;
 338        unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
 339        unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
 340
 341        if (page_shift < dev_page_min) {
 342                debug("Device minimum page size (%u) too large for host (%u)\n",
 343                      1 << dev_page_min, 1 << page_shift);
 344                return -ENODEV;
 345        }
 346
 347        if (page_shift > dev_page_max) {
 348                debug("Device maximum page size (%u) smaller than host (%u)\n",
 349                      1 << dev_page_max, 1 << page_shift);
 350                page_shift = dev_page_max;
 351        }
 352
 353        result = nvme_disable_ctrl(dev);
 354        if (result < 0)
 355                return result;
 356
 357        nvmeq = dev->queues[NVME_ADMIN_Q];
 358        if (!nvmeq) {
 359                nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
 360                if (!nvmeq)
 361                        return -ENOMEM;
 362        }
 363
 364        aqa = nvmeq->q_depth - 1;
 365        aqa |= aqa << 16;
 366        aqa |= aqa << 16;
 367
 368        dev->page_size = 1 << page_shift;
 369
 370        dev->ctrl_config = NVME_CC_CSS_NVM;
 371        dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
 372        dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
 373        dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 374
 375        writel(aqa, &dev->bar->aqa);
 376        nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
 377        nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
 378
 379        result = nvme_enable_ctrl(dev);
 380        if (result)
 381                goto free_nvmeq;
 382
 383        nvmeq->cq_vector = 0;
 384
 385        nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0);
 386
 387        return result;
 388
 389 free_nvmeq:
 390        nvme_free_queues(dev, 0);
 391
 392        return result;
 393}
 394
 395static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
 396                            struct nvme_queue *nvmeq)
 397{
 398        struct nvme_command c;
 399        int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
 400
 401        memset(&c, 0, sizeof(c));
 402        c.create_cq.opcode = nvme_admin_create_cq;
 403        c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
 404        c.create_cq.cqid = cpu_to_le16(qid);
 405        c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
 406        c.create_cq.cq_flags = cpu_to_le16(flags);
 407        c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
 408
 409        return nvme_submit_admin_cmd(dev, &c, NULL);
 410}
 411
 412static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
 413                            struct nvme_queue *nvmeq)
 414{
 415        struct nvme_command c;
 416        int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
 417
 418        memset(&c, 0, sizeof(c));
 419        c.create_sq.opcode = nvme_admin_create_sq;
 420        c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
 421        c.create_sq.sqid = cpu_to_le16(qid);
 422        c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
 423        c.create_sq.sq_flags = cpu_to_le16(flags);
 424        c.create_sq.cqid = cpu_to_le16(qid);
 425
 426        return nvme_submit_admin_cmd(dev, &c, NULL);
 427}
 428
 429int nvme_identify(struct nvme_dev *dev, unsigned nsid,
 430                  unsigned cns, dma_addr_t dma_addr)
 431{
 432        struct nvme_command c;
 433        u32 page_size = dev->page_size;
 434        int offset = dma_addr & (page_size - 1);
 435        int length = sizeof(struct nvme_id_ctrl);
 436        int ret;
 437
 438        memset(&c, 0, sizeof(c));
 439        c.identify.opcode = nvme_admin_identify;
 440        c.identify.nsid = cpu_to_le32(nsid);
 441        c.identify.prp1 = cpu_to_le64(dma_addr);
 442
 443        length -= (page_size - offset);
 444        if (length <= 0) {
 445                c.identify.prp2 = 0;
 446        } else {
 447                dma_addr += (page_size - offset);
 448                c.identify.prp2 = cpu_to_le64(dma_addr);
 449        }
 450
 451        c.identify.cns = cpu_to_le32(cns);
 452
 453        ret = nvme_submit_admin_cmd(dev, &c, NULL);
 454        if (!ret)
 455                invalidate_dcache_range(dma_addr,
 456                                        dma_addr + sizeof(struct nvme_id_ctrl));
 457
 458        return ret;
 459}
 460
 461int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
 462                      dma_addr_t dma_addr, u32 *result)
 463{
 464        struct nvme_command c;
 465
 466        memset(&c, 0, sizeof(c));
 467        c.features.opcode = nvme_admin_get_features;
 468        c.features.nsid = cpu_to_le32(nsid);
 469        c.features.prp1 = cpu_to_le64(dma_addr);
 470        c.features.fid = cpu_to_le32(fid);
 471
 472        /*
 473         * TODO: add cache invalidate operation when the size of
 474         * the DMA buffer is known
 475         */
 476
 477        return nvme_submit_admin_cmd(dev, &c, result);
 478}
 479
 480int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 481                      dma_addr_t dma_addr, u32 *result)
 482{
 483        struct nvme_command c;
 484
 485        memset(&c, 0, sizeof(c));
 486        c.features.opcode = nvme_admin_set_features;
 487        c.features.prp1 = cpu_to_le64(dma_addr);
 488        c.features.fid = cpu_to_le32(fid);
 489        c.features.dword11 = cpu_to_le32(dword11);
 490
 491        /*
 492         * TODO: add cache flush operation when the size of
 493         * the DMA buffer is known
 494         */
 495
 496        return nvme_submit_admin_cmd(dev, &c, result);
 497}
 498
 499static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 500{
 501        struct nvme_dev *dev = nvmeq->dev;
 502        int result;
 503
 504        nvmeq->cq_vector = qid - 1;
 505        result = nvme_alloc_cq(dev, qid, nvmeq);
 506        if (result < 0)
 507                goto release_cq;
 508
 509        result = nvme_alloc_sq(dev, qid, nvmeq);
 510        if (result < 0)
 511                goto release_sq;
 512
 513        nvme_init_queue(nvmeq, qid);
 514
 515        return result;
 516
 517 release_sq:
 518        nvme_delete_sq(dev, qid);
 519 release_cq:
 520        nvme_delete_cq(dev, qid);
 521
 522        return result;
 523}
 524
 525static int nvme_set_queue_count(struct nvme_dev *dev, int count)
 526{
 527        int status;
 528        u32 result;
 529        u32 q_count = (count - 1) | ((count - 1) << 16);
 530
 531        status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES,
 532                        q_count, 0, &result);
 533
 534        if (status < 0)
 535                return status;
 536        if (status > 1)
 537                return 0;
 538
 539        return min(result & 0xffff, result >> 16) + 1;
 540}
 541
 542static void nvme_create_io_queues(struct nvme_dev *dev)
 543{
 544        unsigned int i;
 545
 546        for (i = dev->queue_count; i <= dev->max_qid; i++)
 547                if (!nvme_alloc_queue(dev, i, dev->q_depth))
 548                        break;
 549
 550        for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
 551                if (nvme_create_queue(dev->queues[i], i))
 552                        break;
 553}
 554
 555static int nvme_setup_io_queues(struct nvme_dev *dev)
 556{
 557        int nr_io_queues;
 558        int result;
 559
 560        nr_io_queues = 1;
 561        result = nvme_set_queue_count(dev, nr_io_queues);
 562        if (result <= 0)
 563                return result;
 564
 565        dev->max_qid = nr_io_queues;
 566
 567        /* Free previously allocated queues */
 568        nvme_free_queues(dev, nr_io_queues + 1);
 569        nvme_create_io_queues(dev);
 570
 571        return 0;
 572}
 573
 574static int nvme_get_info_from_identify(struct nvme_dev *dev)
 575{
 576        ALLOC_CACHE_ALIGN_BUFFER(char, buf, sizeof(struct nvme_id_ctrl));
 577        struct nvme_id_ctrl *ctrl = (struct nvme_id_ctrl *)buf;
 578        int ret;
 579        int shift = NVME_CAP_MPSMIN(dev->cap) + 12;
 580
 581        ret = nvme_identify(dev, 0, 1, (dma_addr_t)ctrl);
 582        if (ret)
 583                return -EIO;
 584
 585        dev->nn = le32_to_cpu(ctrl->nn);
 586        dev->vwc = ctrl->vwc;
 587        memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 588        memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 589        memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
 590        if (ctrl->mdts)
 591                dev->max_transfer_shift = (ctrl->mdts + shift);
 592        else {
 593                /*
 594                 * Maximum Data Transfer Size (MDTS) field indicates the maximum
 595                 * data transfer size between the host and the controller. The
 596                 * host should not submit a command that exceeds this transfer
 597                 * size. The value is in units of the minimum memory page size
 598                 * and is reported as a power of two (2^n).
 599                 *
 600                 * The spec also says: a value of 0h indicates no restrictions
 601                 * on transfer size. But in nvme_blk_read/write() below we have
 602                 * the following algorithm for maximum number of logic blocks
 603                 * per transfer:
 604                 *
 605                 * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
 606                 *
 607                 * In order for lbas not to overflow, the maximum number is 15
 608                 * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift).
 609                 * Let's use 20 which provides 1MB size.
 610                 */
 611                dev->max_transfer_shift = 20;
 612        }
 613
 614        return 0;
 615}
 616
 617int nvme_scan_namespace(void)
 618{
 619        struct uclass *uc;
 620        struct udevice *dev;
 621        int ret;
 622
 623        ret = uclass_get(UCLASS_NVME, &uc);
 624        if (ret)
 625                return ret;
 626
 627        uclass_foreach_dev(dev, uc) {
 628                ret = device_probe(dev);
 629                if (ret)
 630                        return ret;
 631        }
 632
 633        return 0;
 634}
 635
 636static int nvme_blk_probe(struct udevice *udev)
 637{
 638        struct nvme_dev *ndev = dev_get_priv(udev->parent);
 639        struct blk_desc *desc = dev_get_uclass_platdata(udev);
 640        struct nvme_ns *ns = dev_get_priv(udev);
 641        u8 flbas;
 642        ALLOC_CACHE_ALIGN_BUFFER(char, buf, sizeof(struct nvme_id_ns));
 643        struct nvme_id_ns *id = (struct nvme_id_ns *)buf;
 644        struct pci_child_platdata *pplat;
 645
 646        memset(ns, 0, sizeof(*ns));
 647        ns->dev = ndev;
 648        /* extract the namespace id from the block device name */
 649        ns->ns_id = trailing_strtol(udev->name) + 1;
 650        if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)id))
 651                return -EIO;
 652
 653        flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK;
 654        ns->flbas = flbas;
 655        ns->lba_shift = id->lbaf[flbas].ds;
 656        ns->mode_select_num_blocks = le64_to_cpu(id->nsze);
 657        ns->mode_select_block_len = 1 << ns->lba_shift;
 658        list_add(&ns->list, &ndev->namespaces);
 659
 660        desc->lba = ns->mode_select_num_blocks;
 661        desc->log2blksz = ns->lba_shift;
 662        desc->blksz = 1 << ns->lba_shift;
 663        desc->bdev = udev;
 664        pplat = dev_get_parent_platdata(udev->parent);
 665        sprintf(desc->vendor, "0x%.4x", pplat->vendor);
 666        memcpy(desc->product, ndev->serial, sizeof(ndev->serial));
 667        memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev));
 668        part_init(desc);
 669
 670        return 0;
 671}
 672
 673static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr,
 674                         lbaint_t blkcnt, void *buffer, bool read)
 675{
 676        struct nvme_ns *ns = dev_get_priv(udev);
 677        struct nvme_dev *dev = ns->dev;
 678        struct nvme_command c;
 679        struct blk_desc *desc = dev_get_uclass_platdata(udev);
 680        int status;
 681        u64 prp2;
 682        u64 total_len = blkcnt << desc->log2blksz;
 683        u64 temp_len = total_len;
 684
 685        u64 slba = blknr;
 686        u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
 687        u64 total_lbas = blkcnt;
 688
 689        if (!read)
 690                flush_dcache_range((unsigned long)buffer,
 691                                   (unsigned long)buffer + total_len);
 692
 693        c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write;
 694        c.rw.flags = 0;
 695        c.rw.nsid = cpu_to_le32(ns->ns_id);
 696        c.rw.control = 0;
 697        c.rw.dsmgmt = 0;
 698        c.rw.reftag = 0;
 699        c.rw.apptag = 0;
 700        c.rw.appmask = 0;
 701        c.rw.metadata = 0;
 702
 703        while (total_lbas) {
 704                if (total_lbas < lbas) {
 705                        lbas = (u16)total_lbas;
 706                        total_lbas = 0;
 707                } else {
 708                        total_lbas -= lbas;
 709                }
 710
 711                if (nvme_setup_prps(dev, &prp2,
 712                                    lbas << ns->lba_shift, (ulong)buffer))
 713                        return -EIO;
 714                c.rw.slba = cpu_to_le64(slba);
 715                slba += lbas;
 716                c.rw.length = cpu_to_le16(lbas - 1);
 717                c.rw.prp1 = cpu_to_le64((ulong)buffer);
 718                c.rw.prp2 = cpu_to_le64(prp2);
 719                status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
 720                                &c, NULL, IO_TIMEOUT);
 721                if (status)
 722                        break;
 723                temp_len -= (u32)lbas << ns->lba_shift;
 724                buffer += lbas << ns->lba_shift;
 725        }
 726
 727        if (read)
 728                invalidate_dcache_range((unsigned long)buffer,
 729                                        (unsigned long)buffer + total_len);
 730
 731        return (total_len - temp_len) >> desc->log2blksz;
 732}
 733
 734static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr,
 735                           lbaint_t blkcnt, void *buffer)
 736{
 737        return nvme_blk_rw(udev, blknr, blkcnt, buffer, true);
 738}
 739
 740static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr,
 741                            lbaint_t blkcnt, const void *buffer)
 742{
 743        return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false);
 744}
 745
 746static const struct blk_ops nvme_blk_ops = {
 747        .read   = nvme_blk_read,
 748        .write  = nvme_blk_write,
 749};
 750
 751U_BOOT_DRIVER(nvme_blk) = {
 752        .name   = "nvme-blk",
 753        .id     = UCLASS_BLK,
 754        .probe  = nvme_blk_probe,
 755        .ops    = &nvme_blk_ops,
 756        .priv_auto_alloc_size = sizeof(struct nvme_ns),
 757};
 758
 759static int nvme_bind(struct udevice *udev)
 760{
 761        static int ndev_num;
 762        char name[20];
 763
 764        sprintf(name, "nvme#%d", ndev_num++);
 765
 766        return device_set_name(udev, name);
 767}
 768
 769static int nvme_probe(struct udevice *udev)
 770{
 771        int ret;
 772        struct nvme_dev *ndev = dev_get_priv(udev);
 773
 774        ndev->instance = trailing_strtol(udev->name);
 775
 776        INIT_LIST_HEAD(&ndev->namespaces);
 777        ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0,
 778                        PCI_REGION_MEM);
 779        if (readl(&ndev->bar->csts) == -1) {
 780                ret = -ENODEV;
 781                printf("Error: %s: Out of memory!\n", udev->name);
 782                goto free_nvme;
 783        }
 784
 785        ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *));
 786        if (!ndev->queues) {
 787                ret = -ENOMEM;
 788                printf("Error: %s: Out of memory!\n", udev->name);
 789                goto free_nvme;
 790        }
 791        memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *));
 792
 793        ndev->prp_pool = malloc(MAX_PRP_POOL);
 794        if (!ndev->prp_pool) {
 795                ret = -ENOMEM;
 796                printf("Error: %s: Out of memory!\n", udev->name);
 797                goto free_nvme;
 798        }
 799        ndev->prp_entry_num = MAX_PRP_POOL >> 3;
 800
 801        ndev->cap = nvme_readq(&ndev->bar->cap);
 802        ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH);
 803        ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap);
 804        ndev->dbs = ((void __iomem *)ndev->bar) + 4096;
 805
 806        ret = nvme_configure_admin_queue(ndev);
 807        if (ret)
 808                goto free_queue;
 809
 810        ret = nvme_setup_io_queues(ndev);
 811        if (ret)
 812                goto free_queue;
 813
 814        nvme_get_info_from_identify(ndev);
 815
 816        return 0;
 817
 818free_queue:
 819        free((void *)ndev->queues);
 820free_nvme:
 821        return ret;
 822}
 823
 824U_BOOT_DRIVER(nvme) = {
 825        .name   = "nvme",
 826        .id     = UCLASS_NVME,
 827        .bind   = nvme_bind,
 828        .probe  = nvme_probe,
 829        .priv_auto_alloc_size = sizeof(struct nvme_dev),
 830};
 831
 832struct pci_device_id nvme_supported[] = {
 833        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, ~0) },
 834        {}
 835};
 836
 837U_BOOT_PCI_DEVICE(nvme, nvme_supported);
 838