dpdk/drivers/raw/ntb/ntb.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2019 Intel Corporation.
   3 */
   4#include <stdint.h>
   5#include <stdio.h>
   6#include <string.h>
   7#include <errno.h>
   8
   9#include <rte_common.h>
  10#include <rte_lcore.h>
  11#include <rte_cycles.h>
  12#include <rte_eal.h>
  13#include <rte_log.h>
  14#include <rte_pci.h>
  15#include <rte_mbuf.h>
  16#include <rte_bus_pci.h>
  17#include <rte_memzone.h>
  18#include <rte_memcpy.h>
  19#include <rte_rawdev.h>
  20#include <rte_rawdev_pmd.h>
  21
  22#include "ntb_hw_intel.h"
  23#include "rte_pmd_ntb.h"
  24#include "ntb.h"
  25
  26static const struct rte_pci_id pci_id_ntb_map[] = {
  27        { RTE_PCI_DEVICE(NTB_INTEL_VENDOR_ID, NTB_INTEL_DEV_ID_B2B_SKX) },
  28        { RTE_PCI_DEVICE(NTB_INTEL_VENDOR_ID, NTB_INTEL_DEV_ID_B2B_ICX) },
  29        { .vendor_id = 0, /* sentinel */ },
  30};
  31
  32/* Align with enum ntb_xstats_idx */
  33static struct rte_rawdev_xstats_name ntb_xstats_names[] = {
  34        {"Tx-packets"},
  35        {"Tx-bytes"},
  36        {"Tx-errors"},
  37        {"Rx-packets"},
  38        {"Rx-bytes"},
  39        {"Rx-missed"},
  40};
  41#define NTB_XSTATS_NUM RTE_DIM(ntb_xstats_names)
  42
  43static inline void
  44ntb_link_cleanup(struct rte_rawdev *dev)
  45{
  46        struct ntb_hw *hw = dev->dev_private;
  47        int status, i;
  48
  49        if (hw->ntb_ops->spad_write == NULL ||
  50            hw->ntb_ops->mw_set_trans == NULL) {
  51                NTB_LOG(ERR, "Not supported to clean up link.");
  52                return;
  53        }
  54
  55        /* Clean spad registers. */
  56        for (i = 0; i < hw->spad_cnt; i++) {
  57                status = (*hw->ntb_ops->spad_write)(dev, i, 0, 0);
  58                if (status)
  59                        NTB_LOG(ERR, "Failed to clean local spad.");
  60        }
  61
  62        /* Clear mw so that peer cannot access local memory.*/
  63        for (i = 0; i < hw->used_mw_num; i++) {
  64                status = (*hw->ntb_ops->mw_set_trans)(dev, i, 0, 0);
  65                if (status)
  66                        NTB_LOG(ERR, "Failed to clean mw.");
  67        }
  68}
  69
  70static inline int
  71ntb_handshake_work(const struct rte_rawdev *dev)
  72{
  73        struct ntb_hw *hw = dev->dev_private;
  74        uint32_t val;
  75        int ret, i;
  76
  77        if (hw->ntb_ops->spad_write == NULL ||
  78            hw->ntb_ops->mw_set_trans == NULL) {
  79                NTB_LOG(ERR, "Scratchpad/MW setting is not supported.");
  80                return -ENOTSUP;
  81        }
  82
  83        /* Tell peer the mw info of local side. */
  84        ret = (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_MWS, 1, hw->mw_cnt);
  85        if (ret < 0)
  86                return ret;
  87        for (i = 0; i < hw->mw_cnt; i++) {
  88                NTB_LOG(INFO, "Local %u mw size: 0x%"PRIx64"", i,
  89                                hw->mw_size[i]);
  90                val = hw->mw_size[i] >> 32;
  91                ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_H + 2 * i,
  92                                                 1, val);
  93                if (ret < 0)
  94                        return ret;
  95                val = hw->mw_size[i];
  96                ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_L + 2 * i,
  97                                                 1, val);
  98                if (ret < 0)
  99                        return ret;
 100        }
 101
 102        /* Tell peer about the queue info and map memory to the peer. */
 103        ret = (*hw->ntb_ops->spad_write)(dev, SPAD_Q_SZ, 1, hw->queue_size);
 104        if (ret < 0)
 105                return ret;
 106        ret = (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_QPS, 1,
 107                                         hw->queue_pairs);
 108        if (ret < 0)
 109                return ret;
 110        ret = (*hw->ntb_ops->spad_write)(dev, SPAD_USED_MWS, 1,
 111                                         hw->used_mw_num);
 112        if (ret < 0)
 113                return ret;
 114        for (i = 0; i < hw->used_mw_num; i++) {
 115                val = (uint64_t)(size_t)(hw->mz[i]->addr) >> 32;
 116                ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_BA_H + 2 * i,
 117                                                 1, val);
 118                if (ret < 0)
 119                        return ret;
 120                val = (uint64_t)(size_t)(hw->mz[i]->addr);
 121                ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_BA_L + 2 * i,
 122                                                 1, val);
 123                if (ret < 0)
 124                        return ret;
 125        }
 126
 127        for (i = 0; i < hw->used_mw_num; i++) {
 128                ret = (*hw->ntb_ops->mw_set_trans)(dev, i, hw->mz[i]->iova,
 129                                                   hw->mz[i]->len);
 130                if (ret < 0)
 131                        return ret;
 132        }
 133
 134        /* Ring doorbell 0 to tell peer the device is ready. */
 135        ret = (*hw->ntb_ops->peer_db_set)(dev, 0);
 136        if (ret < 0)
 137                return ret;
 138
 139        return 0;
 140}
 141
 142static void
 143ntb_dev_intr_handler(void *param)
 144{
 145        struct rte_rawdev *dev = (struct rte_rawdev *)param;
 146        struct ntb_hw *hw = dev->dev_private;
 147        uint32_t val_h, val_l;
 148        uint64_t peer_mw_size;
 149        uint64_t db_bits = 0;
 150        uint8_t peer_mw_cnt;
 151        int i = 0;
 152
 153        if (hw->ntb_ops->db_read == NULL ||
 154            hw->ntb_ops->db_clear == NULL ||
 155            hw->ntb_ops->peer_db_set == NULL) {
 156                NTB_LOG(ERR, "Doorbell is not supported.");
 157                return;
 158        }
 159
 160        db_bits = (*hw->ntb_ops->db_read)(dev);
 161        if (!db_bits)
 162                NTB_LOG(ERR, "No doorbells");
 163
 164        /* Doorbell 0 is for peer device ready. */
 165        if (db_bits & 1) {
 166                NTB_LOG(INFO, "DB0: Peer device is up.");
 167                /* Clear received doorbell. */
 168                (*hw->ntb_ops->db_clear)(dev, 1);
 169
 170                /**
 171                 * Peer dev is already up. All mw settings are already done.
 172                 * Skip them.
 173                 */
 174                if (hw->peer_dev_up)
 175                        return;
 176
 177                if (hw->ntb_ops->spad_read == NULL) {
 178                        NTB_LOG(ERR, "Scratchpad read is not supported.");
 179                        return;
 180                }
 181
 182                /* Check if mw setting on the peer is the same as local. */
 183                peer_mw_cnt = (*hw->ntb_ops->spad_read)(dev, SPAD_NUM_MWS, 0);
 184                if (peer_mw_cnt != hw->mw_cnt) {
 185                        NTB_LOG(ERR, "Both mw cnt must be the same.");
 186                        return;
 187                }
 188
 189                for (i = 0; i < hw->mw_cnt; i++) {
 190                        val_h = (*hw->ntb_ops->spad_read)
 191                                (dev, SPAD_MW0_SZ_H + 2 * i, 0);
 192                        val_l = (*hw->ntb_ops->spad_read)
 193                                (dev, SPAD_MW0_SZ_L + 2 * i, 0);
 194                        peer_mw_size = ((uint64_t)val_h << 32) | val_l;
 195                        NTB_LOG(DEBUG, "Peer %u mw size: 0x%"PRIx64"", i,
 196                                        peer_mw_size);
 197                        if (peer_mw_size != hw->mw_size[i]) {
 198                                NTB_LOG(ERR, "Mw config must be the same.");
 199                                return;
 200                        }
 201                }
 202
 203                hw->peer_dev_up = 1;
 204
 205                /**
 206                 * Handshake with peer. Spad_write & mw_set_trans only works
 207                 * when both devices are up. So write spad again when db is
 208                 * received. And set db again for the later device who may miss
 209                 * the 1st db.
 210                 */
 211                if (ntb_handshake_work(dev) < 0) {
 212                        NTB_LOG(ERR, "Handshake work failed.");
 213                        return;
 214                }
 215
 216                /* To get the link info. */
 217                if (hw->ntb_ops->get_link_status == NULL) {
 218                        NTB_LOG(ERR, "Not supported to get link status.");
 219                        return;
 220                }
 221                (*hw->ntb_ops->get_link_status)(dev);
 222                NTB_LOG(INFO, "Link is up. Link speed: %u. Link width: %u",
 223                        hw->link_speed, hw->link_width);
 224                return;
 225        }
 226
 227        if (db_bits & (1 << 1)) {
 228                NTB_LOG(INFO, "DB1: Peer device is down.");
 229                /* Clear received doorbell. */
 230                (*hw->ntb_ops->db_clear)(dev, 2);
 231
 232                /* Peer device will be down, So clean local side too. */
 233                ntb_link_cleanup(dev);
 234
 235                hw->peer_dev_up = 0;
 236                /* Response peer's dev_stop request. */
 237                (*hw->ntb_ops->peer_db_set)(dev, 2);
 238                return;
 239        }
 240
 241        if (db_bits & (1 << 2)) {
 242                NTB_LOG(INFO, "DB2: Peer device agrees dev to be down.");
 243                /* Clear received doorbell. */
 244                (*hw->ntb_ops->db_clear)(dev, (1 << 2));
 245                hw->peer_dev_up = 0;
 246                return;
 247        }
 248
 249        /* Clear other received doorbells. */
 250        (*hw->ntb_ops->db_clear)(dev, db_bits);
 251}
 252
 253static int
 254ntb_queue_conf_get(struct rte_rawdev *dev,
 255                   uint16_t queue_id,
 256                   rte_rawdev_obj_t queue_conf,
 257                   size_t conf_size)
 258{
 259        struct ntb_queue_conf *q_conf = queue_conf;
 260        struct ntb_hw *hw = dev->dev_private;
 261
 262        if (conf_size != sizeof(*q_conf))
 263                return -EINVAL;
 264
 265        q_conf->tx_free_thresh = hw->tx_queues[queue_id]->tx_free_thresh;
 266        q_conf->nb_desc = hw->rx_queues[queue_id]->nb_rx_desc;
 267        q_conf->rx_mp = hw->rx_queues[queue_id]->mpool;
 268
 269        return 0;
 270}
 271
 272static void
 273ntb_rxq_release_mbufs(struct ntb_rx_queue *q)
 274{
 275        int i;
 276
 277        if (!q || !q->sw_ring) {
 278                NTB_LOG(ERR, "Pointer to rxq or sw_ring is NULL");
 279                return;
 280        }
 281
 282        for (i = 0; i < q->nb_rx_desc; i++) {
 283                if (q->sw_ring[i].mbuf) {
 284                        rte_pktmbuf_free_seg(q->sw_ring[i].mbuf);
 285                        q->sw_ring[i].mbuf = NULL;
 286                }
 287        }
 288}
 289
 290static void
 291ntb_rxq_release(struct ntb_rx_queue *rxq)
 292{
 293        if (!rxq) {
 294                NTB_LOG(ERR, "Pointer to rxq is NULL");
 295                return;
 296        }
 297
 298        ntb_rxq_release_mbufs(rxq);
 299
 300        rte_free(rxq->sw_ring);
 301        rte_free(rxq);
 302}
 303
 304static int
 305ntb_rxq_setup(struct rte_rawdev *dev,
 306              uint16_t qp_id,
 307              rte_rawdev_obj_t queue_conf,
 308              size_t conf_size)
 309{
 310        struct ntb_queue_conf *rxq_conf = queue_conf;
 311        struct ntb_hw *hw = dev->dev_private;
 312        struct ntb_rx_queue *rxq;
 313
 314        if (conf_size != sizeof(*rxq_conf))
 315                return -EINVAL;
 316
 317        /* Allocate the rx queue data structure */
 318        rxq = rte_zmalloc_socket("ntb rx queue",
 319                                 sizeof(struct ntb_rx_queue),
 320                                 RTE_CACHE_LINE_SIZE,
 321                                 dev->socket_id);
 322        if (!rxq) {
 323                NTB_LOG(ERR, "Failed to allocate memory for "
 324                            "rx queue data structure.");
 325                return -ENOMEM;
 326        }
 327
 328        if (rxq_conf->rx_mp == NULL) {
 329                NTB_LOG(ERR, "Invalid null mempool pointer.");
 330                return -EINVAL;
 331        }
 332        rxq->nb_rx_desc = rxq_conf->nb_desc;
 333        rxq->mpool = rxq_conf->rx_mp;
 334        rxq->port_id = dev->dev_id;
 335        rxq->queue_id = qp_id;
 336        rxq->hw = hw;
 337
 338        /* Allocate the software ring. */
 339        rxq->sw_ring =
 340                rte_zmalloc_socket("ntb rx sw ring",
 341                                   sizeof(struct ntb_rx_entry) *
 342                                   rxq->nb_rx_desc,
 343                                   RTE_CACHE_LINE_SIZE,
 344                                   dev->socket_id);
 345        if (!rxq->sw_ring) {
 346                ntb_rxq_release(rxq);
 347                rxq = NULL;
 348                NTB_LOG(ERR, "Failed to allocate memory for SW ring");
 349                return -ENOMEM;
 350        }
 351
 352        hw->rx_queues[qp_id] = rxq;
 353
 354        return 0;
 355}
 356
 357static void
 358ntb_txq_release_mbufs(struct ntb_tx_queue *q)
 359{
 360        int i;
 361
 362        if (!q || !q->sw_ring) {
 363                NTB_LOG(ERR, "Pointer to txq or sw_ring is NULL");
 364                return;
 365        }
 366
 367        for (i = 0; i < q->nb_tx_desc; i++) {
 368                if (q->sw_ring[i].mbuf) {
 369                        rte_pktmbuf_free_seg(q->sw_ring[i].mbuf);
 370                        q->sw_ring[i].mbuf = NULL;
 371                }
 372        }
 373}
 374
 375static void
 376ntb_txq_release(struct ntb_tx_queue *txq)
 377{
 378        if (!txq) {
 379                NTB_LOG(ERR, "Pointer to txq is NULL");
 380                return;
 381        }
 382
 383        ntb_txq_release_mbufs(txq);
 384
 385        rte_free(txq->sw_ring);
 386        rte_free(txq);
 387}
 388
 389static int
 390ntb_txq_setup(struct rte_rawdev *dev,
 391              uint16_t qp_id,
 392              rte_rawdev_obj_t queue_conf,
 393              size_t conf_size)
 394{
 395        struct ntb_queue_conf *txq_conf = queue_conf;
 396        struct ntb_hw *hw = dev->dev_private;
 397        struct ntb_tx_queue *txq;
 398        uint16_t i, prev;
 399
 400        if (conf_size != sizeof(*txq_conf))
 401                return -EINVAL;
 402
 403        /* Allocate the TX queue data structure. */
 404        txq = rte_zmalloc_socket("ntb tx queue",
 405                                  sizeof(struct ntb_tx_queue),
 406                                  RTE_CACHE_LINE_SIZE,
 407                                  dev->socket_id);
 408        if (!txq) {
 409                NTB_LOG(ERR, "Failed to allocate memory for "
 410                            "tx queue structure");
 411                return -ENOMEM;
 412        }
 413
 414        txq->nb_tx_desc = txq_conf->nb_desc;
 415        txq->port_id = dev->dev_id;
 416        txq->queue_id = qp_id;
 417        txq->hw = hw;
 418
 419        /* Allocate software ring */
 420        txq->sw_ring =
 421                rte_zmalloc_socket("ntb tx sw ring",
 422                                   sizeof(struct ntb_tx_entry) *
 423                                   txq->nb_tx_desc,
 424                                   RTE_CACHE_LINE_SIZE,
 425                                   dev->socket_id);
 426        if (!txq->sw_ring) {
 427                ntb_txq_release(txq);
 428                txq = NULL;
 429                NTB_LOG(ERR, "Failed to allocate memory for SW TX ring");
 430                return -ENOMEM;
 431        }
 432
 433        prev = txq->nb_tx_desc - 1;
 434        for (i = 0; i < txq->nb_tx_desc; i++) {
 435                txq->sw_ring[i].mbuf = NULL;
 436                txq->sw_ring[i].last_id = i;
 437                txq->sw_ring[prev].next_id = i;
 438                prev = i;
 439        }
 440
 441        txq->tx_free_thresh = txq_conf->tx_free_thresh ?
 442                              txq_conf->tx_free_thresh :
 443                              NTB_DFLT_TX_FREE_THRESH;
 444        if (txq->tx_free_thresh >= txq->nb_tx_desc - 3) {
 445                NTB_LOG(ERR, "tx_free_thresh must be less than nb_desc - 3. "
 446                        "(tx_free_thresh=%u qp_id=%u)", txq->tx_free_thresh,
 447                        qp_id);
 448                return -EINVAL;
 449        }
 450
 451        hw->tx_queues[qp_id] = txq;
 452
 453        return 0;
 454}
 455
 456
 457static int
 458ntb_queue_setup(struct rte_rawdev *dev,
 459                uint16_t queue_id,
 460                rte_rawdev_obj_t queue_conf,
 461                size_t conf_size)
 462{
 463        struct ntb_hw *hw = dev->dev_private;
 464        int ret;
 465
 466        if (queue_id >= hw->queue_pairs)
 467                return -EINVAL;
 468
 469        ret = ntb_txq_setup(dev, queue_id, queue_conf, conf_size);
 470        if (ret < 0)
 471                return ret;
 472
 473        ret = ntb_rxq_setup(dev, queue_id, queue_conf, conf_size);
 474
 475        return ret;
 476}
 477
 478static int
 479ntb_queue_release(struct rte_rawdev *dev, uint16_t queue_id)
 480{
 481        struct ntb_hw *hw = dev->dev_private;
 482
 483        if (queue_id >= hw->queue_pairs)
 484                return -EINVAL;
 485
 486        ntb_txq_release(hw->tx_queues[queue_id]);
 487        hw->tx_queues[queue_id] = NULL;
 488        ntb_rxq_release(hw->rx_queues[queue_id]);
 489        hw->rx_queues[queue_id] = NULL;
 490
 491        return 0;
 492}
 493
 494static uint16_t
 495ntb_queue_count(struct rte_rawdev *dev)
 496{
 497        struct ntb_hw *hw = dev->dev_private;
 498        return hw->queue_pairs;
 499}
 500
 501static int
 502ntb_queue_init(struct rte_rawdev *dev, uint16_t qp_id)
 503{
 504        struct ntb_hw *hw = dev->dev_private;
 505        struct ntb_rx_queue *rxq = hw->rx_queues[qp_id];
 506        struct ntb_tx_queue *txq = hw->tx_queues[qp_id];
 507        volatile struct ntb_header *local_hdr;
 508        struct ntb_header *remote_hdr;
 509        uint16_t q_size = hw->queue_size;
 510        uint32_t hdr_offset;
 511        void *bar_addr;
 512        uint16_t i;
 513
 514        if (hw->ntb_ops->get_peer_mw_addr == NULL) {
 515                NTB_LOG(ERR, "Getting peer mw addr is not supported.");
 516                return -EINVAL;
 517        }
 518
 519        /* Put queue info into the start of shared memory. */
 520        hdr_offset = hw->hdr_size_per_queue * qp_id;
 521        local_hdr = (volatile struct ntb_header *)
 522                    ((size_t)hw->mz[0]->addr + hdr_offset);
 523        bar_addr = (*hw->ntb_ops->get_peer_mw_addr)(dev, 0);
 524        if (bar_addr == NULL)
 525                return -EINVAL;
 526        remote_hdr = (struct ntb_header *)
 527                     ((size_t)bar_addr + hdr_offset);
 528
 529        /* rxq init. */
 530        rxq->rx_desc_ring = (struct ntb_desc *)
 531                            (&remote_hdr->desc_ring);
 532        rxq->rx_used_ring = (volatile struct ntb_used *)
 533                            (&local_hdr->desc_ring[q_size]);
 534        rxq->avail_cnt = &remote_hdr->avail_cnt;
 535        rxq->used_cnt = &local_hdr->used_cnt;
 536
 537        for (i = 0; i < rxq->nb_rx_desc - 1; i++) {
 538                struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mpool);
 539                if (unlikely(!mbuf)) {
 540                        NTB_LOG(ERR, "Failed to allocate mbuf for RX");
 541                        return -ENOMEM;
 542                }
 543                mbuf->port = dev->dev_id;
 544
 545                rxq->sw_ring[i].mbuf = mbuf;
 546
 547                rxq->rx_desc_ring[i].addr = rte_pktmbuf_mtod(mbuf, size_t);
 548                rxq->rx_desc_ring[i].len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM;
 549        }
 550        rte_wmb();
 551        *rxq->avail_cnt = rxq->nb_rx_desc - 1;
 552        rxq->last_avail = rxq->nb_rx_desc - 1;
 553        rxq->last_used = 0;
 554
 555        /* txq init */
 556        txq->tx_desc_ring = (volatile struct ntb_desc *)
 557                            (&local_hdr->desc_ring);
 558        txq->tx_used_ring = (struct ntb_used *)
 559                            (&remote_hdr->desc_ring[q_size]);
 560        txq->avail_cnt = &local_hdr->avail_cnt;
 561        txq->used_cnt = &remote_hdr->used_cnt;
 562
 563        rte_wmb();
 564        *txq->used_cnt = 0;
 565        txq->last_used = 0;
 566        txq->last_avail = 0;
 567        txq->nb_tx_free = txq->nb_tx_desc - 1;
 568
 569        /* Set per queue stats. */
 570        for (i = 0; i < NTB_XSTATS_NUM; i++) {
 571                hw->ntb_xstats[i + NTB_XSTATS_NUM * (qp_id + 1)] = 0;
 572                hw->ntb_xstats_off[i + NTB_XSTATS_NUM * (qp_id + 1)] = 0;
 573        }
 574
 575        return 0;
 576}
 577
 578static inline void
 579ntb_enqueue_cleanup(struct ntb_tx_queue *txq)
 580{
 581        struct ntb_tx_entry *sw_ring = txq->sw_ring;
 582        uint16_t tx_free = txq->last_avail;
 583        uint16_t nb_to_clean, i;
 584
 585        /* avail_cnt + 1 represents where to rx next in the peer. */
 586        nb_to_clean = (*txq->avail_cnt - txq->last_avail + 1 +
 587                        txq->nb_tx_desc) & (txq->nb_tx_desc - 1);
 588        nb_to_clean = RTE_MIN(nb_to_clean, txq->tx_free_thresh);
 589        for (i = 0; i < nb_to_clean; i++) {
 590                if (sw_ring[tx_free].mbuf)
 591                        rte_pktmbuf_free_seg(sw_ring[tx_free].mbuf);
 592                tx_free = (tx_free + 1) & (txq->nb_tx_desc - 1);
 593        }
 594
 595        txq->nb_tx_free += nb_to_clean;
 596        txq->last_avail = tx_free;
 597}
 598
 599static int
 600ntb_enqueue_bufs(struct rte_rawdev *dev,
 601                 struct rte_rawdev_buf **buffers,
 602                 unsigned int count,
 603                 rte_rawdev_obj_t context)
 604{
 605        struct ntb_hw *hw = dev->dev_private;
 606        struct ntb_tx_queue *txq = hw->tx_queues[(size_t)context];
 607        struct ntb_tx_entry *sw_ring = txq->sw_ring;
 608        struct rte_mbuf *txm;
 609        struct ntb_used tx_used[NTB_MAX_DESC_SIZE];
 610        volatile struct ntb_desc *tx_item;
 611        uint16_t tx_last, nb_segs, off, last_used, avail_cnt;
 612        uint16_t nb_mbufs = 0;
 613        uint16_t nb_tx = 0;
 614        uint64_t bytes = 0;
 615        void *buf_addr;
 616        int i;
 617
 618        if (unlikely(hw->ntb_ops->ioremap == NULL)) {
 619                NTB_LOG(ERR, "Ioremap not supported.");
 620                return nb_tx;
 621        }
 622
 623        if (unlikely(dev->started == 0 || hw->peer_dev_up == 0)) {
 624                NTB_LOG(DEBUG, "Link is not up.");
 625                return nb_tx;
 626        }
 627
 628        if (txq->nb_tx_free < txq->tx_free_thresh)
 629                ntb_enqueue_cleanup(txq);
 630
 631        off = NTB_XSTATS_NUM * ((size_t)context + 1);
 632        last_used = txq->last_used;
 633        avail_cnt = *txq->avail_cnt;/* Where to alloc next. */
 634        for (nb_tx = 0; nb_tx < count; nb_tx++) {
 635                txm = (struct rte_mbuf *)(buffers[nb_tx]->buf_addr);
 636                if (txm == NULL || txq->nb_tx_free < txm->nb_segs)
 637                        break;
 638
 639                tx_last = (txq->last_used + txm->nb_segs - 1) &
 640                          (txq->nb_tx_desc - 1);
 641                nb_segs = txm->nb_segs;
 642                for (i = 0; i < nb_segs; i++) {
 643                        /* Not enough ring space for tx. */
 644                        if (txq->last_used == avail_cnt)
 645                                goto end_of_tx;
 646                        sw_ring[txq->last_used].mbuf = txm;
 647                        tx_item = txq->tx_desc_ring + txq->last_used;
 648
 649                        if (!tx_item->len) {
 650                                (hw->ntb_xstats[NTB_TX_ERRS_ID + off])++;
 651                                goto end_of_tx;
 652                        }
 653                        if (txm->data_len > tx_item->len) {
 654                                NTB_LOG(ERR, "Data length exceeds buf length."
 655                                        " Only %u data would be transmitted.",
 656                                        tx_item->len);
 657                                txm->data_len = tx_item->len;
 658                        }
 659
 660                        /* translate remote virtual addr to bar virtual addr */
 661                        buf_addr = (*hw->ntb_ops->ioremap)(dev, tx_item->addr);
 662                        if (buf_addr == NULL) {
 663                                (hw->ntb_xstats[NTB_TX_ERRS_ID + off])++;
 664                                NTB_LOG(ERR, "Null remap addr.");
 665                                goto end_of_tx;
 666                        }
 667                        rte_memcpy(buf_addr, rte_pktmbuf_mtod(txm, void *),
 668                                   txm->data_len);
 669
 670                        tx_used[nb_mbufs].len = txm->data_len;
 671                        tx_used[nb_mbufs++].flags = (txq->last_used ==
 672                                                    tx_last) ?
 673                                                    NTB_FLAG_EOP : 0;
 674
 675                        /* update stats */
 676                        bytes += txm->data_len;
 677
 678                        txm = txm->next;
 679
 680                        sw_ring[txq->last_used].next_id = (txq->last_used + 1) &
 681                                                  (txq->nb_tx_desc - 1);
 682                        sw_ring[txq->last_used].last_id = tx_last;
 683                        txq->last_used = (txq->last_used + 1) &
 684                                         (txq->nb_tx_desc - 1);
 685                }
 686                txq->nb_tx_free -= nb_segs;
 687        }
 688
 689end_of_tx:
 690        if (nb_tx) {
 691                uint16_t nb1, nb2;
 692                if (nb_mbufs > txq->nb_tx_desc - last_used) {
 693                        nb1 = txq->nb_tx_desc - last_used;
 694                        nb2 = nb_mbufs - txq->nb_tx_desc + last_used;
 695                } else {
 696                        nb1 = nb_mbufs;
 697                        nb2 = 0;
 698                }
 699                rte_memcpy(txq->tx_used_ring + last_used, tx_used,
 700                           sizeof(struct ntb_used) * nb1);
 701                rte_memcpy(txq->tx_used_ring, tx_used + nb1,
 702                           sizeof(struct ntb_used) * nb2);
 703                rte_wmb();
 704                *txq->used_cnt = txq->last_used;
 705
 706                /* update queue stats */
 707                hw->ntb_xstats[NTB_TX_BYTES_ID + off] += bytes;
 708                hw->ntb_xstats[NTB_TX_PKTS_ID + off] += nb_tx;
 709        }
 710
 711        return nb_tx;
 712}
 713
 714static int
 715ntb_dequeue_bufs(struct rte_rawdev *dev,
 716                 struct rte_rawdev_buf **buffers,
 717                 unsigned int count,
 718                 rte_rawdev_obj_t context)
 719{
 720        struct ntb_hw *hw = dev->dev_private;
 721        struct ntb_rx_queue *rxq = hw->rx_queues[(size_t)context];
 722        struct ntb_rx_entry *sw_ring = rxq->sw_ring;
 723        struct ntb_desc rx_desc[NTB_MAX_DESC_SIZE];
 724        struct rte_mbuf *first, *rxm_t;
 725        struct rte_mbuf *prev = NULL;
 726        volatile struct ntb_used *rx_item;
 727        uint16_t nb_mbufs = 0;
 728        uint16_t nb_rx = 0;
 729        uint64_t bytes = 0;
 730        uint16_t off, last_avail, used_cnt, used_nb;
 731        int i;
 732
 733        if (unlikely(dev->started == 0 || hw->peer_dev_up == 0)) {
 734                NTB_LOG(DEBUG, "Link is not up");
 735                return nb_rx;
 736        }
 737
 738        used_cnt = *rxq->used_cnt;
 739
 740        if (rxq->last_used == used_cnt)
 741                return nb_rx;
 742
 743        last_avail = rxq->last_avail;
 744        used_nb = (used_cnt - rxq->last_used) & (rxq->nb_rx_desc - 1);
 745        count = RTE_MIN(count, used_nb);
 746        for (nb_rx = 0; nb_rx < count; nb_rx++) {
 747                i = 0;
 748                while (true) {
 749                        rx_item = rxq->rx_used_ring + rxq->last_used;
 750                        rxm_t = sw_ring[rxq->last_used].mbuf;
 751                        rxm_t->data_len = rx_item->len;
 752                        rxm_t->data_off = RTE_PKTMBUF_HEADROOM;
 753                        rxm_t->port = rxq->port_id;
 754
 755                        if (!i) {
 756                                rxm_t->nb_segs = 1;
 757                                first = rxm_t;
 758                                first->pkt_len = 0;
 759                                buffers[nb_rx]->buf_addr = rxm_t;
 760                        } else {
 761                                prev->next = rxm_t;
 762                                first->nb_segs++;
 763                        }
 764
 765                        prev = rxm_t;
 766                        first->pkt_len += prev->data_len;
 767                        rxq->last_used = (rxq->last_used + 1) &
 768                                         (rxq->nb_rx_desc - 1);
 769
 770                        /* alloc new mbuf */
 771                        rxm_t = rte_mbuf_raw_alloc(rxq->mpool);
 772                        if (unlikely(rxm_t == NULL)) {
 773                                NTB_LOG(ERR, "recv alloc mbuf failed.");
 774                                goto end_of_rx;
 775                        }
 776                        rxm_t->port = rxq->port_id;
 777                        sw_ring[rxq->last_avail].mbuf = rxm_t;
 778                        i++;
 779
 780                        /* fill new desc */
 781                        rx_desc[nb_mbufs].addr =
 782                                        rte_pktmbuf_mtod(rxm_t, size_t);
 783                        rx_desc[nb_mbufs++].len = rxm_t->buf_len -
 784                                                  RTE_PKTMBUF_HEADROOM;
 785                        rxq->last_avail = (rxq->last_avail + 1) &
 786                                          (rxq->nb_rx_desc - 1);
 787
 788                        if (rx_item->flags & NTB_FLAG_EOP)
 789                                break;
 790                }
 791                /* update stats */
 792                bytes += first->pkt_len;
 793        }
 794
 795end_of_rx:
 796        if (nb_rx) {
 797                uint16_t nb1, nb2;
 798                if (nb_mbufs > rxq->nb_rx_desc - last_avail) {
 799                        nb1 = rxq->nb_rx_desc - last_avail;
 800                        nb2 = nb_mbufs - rxq->nb_rx_desc + last_avail;
 801                } else {
 802                        nb1 = nb_mbufs;
 803                        nb2 = 0;
 804                }
 805                rte_memcpy(rxq->rx_desc_ring + last_avail, rx_desc,
 806                           sizeof(struct ntb_desc) * nb1);
 807                rte_memcpy(rxq->rx_desc_ring, rx_desc + nb1,
 808                           sizeof(struct ntb_desc) * nb2);
 809                rte_wmb();
 810                *rxq->avail_cnt = rxq->last_avail;
 811
 812                /* update queue stats */
 813                off = NTB_XSTATS_NUM * ((size_t)context + 1);
 814                hw->ntb_xstats[NTB_RX_BYTES_ID + off] += bytes;
 815                hw->ntb_xstats[NTB_RX_PKTS_ID + off] += nb_rx;
 816                hw->ntb_xstats[NTB_RX_MISS_ID + off] += (count - nb_rx);
 817        }
 818
 819        return nb_rx;
 820}
 821
 822static int
 823ntb_dev_info_get(struct rte_rawdev *dev, rte_rawdev_obj_t dev_info,
 824                size_t dev_info_size)
 825{
 826        struct ntb_hw *hw = dev->dev_private;
 827        struct ntb_dev_info *info = dev_info;
 828
 829        if (dev_info_size != sizeof(*info)) {
 830                NTB_LOG(ERR, "Invalid size parameter to %s", __func__);
 831                return -EINVAL;
 832        }
 833
 834        info->mw_cnt = hw->mw_cnt;
 835        info->mw_size = hw->mw_size;
 836
 837        /**
 838         * Intel hardware requires that mapped memory base address should be
 839         * aligned with EMBARSZ and needs continuous memzone.
 840         */
 841        info->mw_size_align = (uint8_t)(hw->pci_dev->id.vendor_id ==
 842                                        NTB_INTEL_VENDOR_ID);
 843
 844        if (!hw->queue_size || !hw->queue_pairs) {
 845                NTB_LOG(ERR, "No queue size and queue num assigned.");
 846                return -EAGAIN;
 847        }
 848
 849        hw->hdr_size_per_queue = RTE_ALIGN(sizeof(struct ntb_header) +
 850                                hw->queue_size * sizeof(struct ntb_desc) +
 851                                hw->queue_size * sizeof(struct ntb_used),
 852                                RTE_CACHE_LINE_SIZE);
 853        info->ntb_hdr_size = hw->hdr_size_per_queue * hw->queue_pairs;
 854
 855        return 0;
 856}
 857
 858static int
 859ntb_dev_configure(const struct rte_rawdev *dev, rte_rawdev_obj_t config,
 860                size_t config_size)
 861{
 862        struct ntb_dev_config *conf = config;
 863        struct ntb_hw *hw = dev->dev_private;
 864        uint32_t xstats_num;
 865        int ret;
 866
 867        if (conf == NULL || config_size != sizeof(*conf))
 868                return -EINVAL;
 869
 870        hw->queue_pairs = conf->num_queues;
 871        hw->queue_size = conf->queue_size;
 872        hw->used_mw_num = conf->mz_num;
 873        hw->mz = conf->mz_list;
 874        hw->rx_queues = rte_zmalloc("ntb_rx_queues",
 875                        sizeof(struct ntb_rx_queue *) * hw->queue_pairs, 0);
 876        hw->tx_queues = rte_zmalloc("ntb_tx_queues",
 877                        sizeof(struct ntb_tx_queue *) * hw->queue_pairs, 0);
 878        /* First total stats, then per queue stats. */
 879        xstats_num = (hw->queue_pairs + 1) * NTB_XSTATS_NUM;
 880        hw->ntb_xstats = rte_zmalloc("ntb_xstats", xstats_num *
 881                                     sizeof(uint64_t), 0);
 882        hw->ntb_xstats_off = rte_zmalloc("ntb_xstats_off", xstats_num *
 883                                         sizeof(uint64_t), 0);
 884
 885        /* Start handshake with the peer. */
 886        ret = ntb_handshake_work(dev);
 887        if (ret < 0) {
 888                rte_free(hw->rx_queues);
 889                rte_free(hw->tx_queues);
 890                hw->rx_queues = NULL;
 891                hw->tx_queues = NULL;
 892                return ret;
 893        }
 894
 895        return 0;
 896}
 897
 898static int
 899ntb_dev_start(struct rte_rawdev *dev)
 900{
 901        struct ntb_hw *hw = dev->dev_private;
 902        uint32_t peer_base_l, peer_val;
 903        uint64_t peer_base_h;
 904        uint32_t i;
 905        int ret;
 906
 907        if (!hw->link_status || !hw->peer_dev_up)
 908                return -EINVAL;
 909
 910        /* Set total stats. */
 911        for (i = 0; i < NTB_XSTATS_NUM; i++) {
 912                hw->ntb_xstats[i] = 0;
 913                hw->ntb_xstats_off[i] = 0;
 914        }
 915
 916        for (i = 0; i < hw->queue_pairs; i++) {
 917                ret = ntb_queue_init(dev, i);
 918                if (ret) {
 919                        NTB_LOG(ERR, "Failed to init queue.");
 920                        goto err_q_init;
 921                }
 922        }
 923
 924        hw->peer_mw_base = rte_zmalloc("ntb_peer_mw_base", hw->mw_cnt *
 925                                        sizeof(uint64_t), 0);
 926        if (hw->peer_mw_base == NULL) {
 927                NTB_LOG(ERR, "Cannot allocate memory for peer mw base.");
 928                ret = -ENOMEM;
 929                goto err_q_init;
 930        }
 931
 932        if (hw->ntb_ops->spad_read == NULL) {
 933                ret = -ENOTSUP;
 934                goto err_up;
 935        }
 936
 937        peer_val = (*hw->ntb_ops->spad_read)(dev, SPAD_Q_SZ, 0);
 938        if (peer_val != hw->queue_size) {
 939                NTB_LOG(ERR, "Inconsistent queue size! (local: %u peer: %u)",
 940                        hw->queue_size, peer_val);
 941                ret = -EINVAL;
 942                goto err_up;
 943        }
 944
 945        peer_val = (*hw->ntb_ops->spad_read)(dev, SPAD_NUM_QPS, 0);
 946        if (peer_val != hw->queue_pairs) {
 947                NTB_LOG(ERR, "Inconsistent number of queues! (local: %u peer:"
 948                        " %u)", hw->queue_pairs, peer_val);
 949                ret = -EINVAL;
 950                goto err_up;
 951        }
 952
 953        hw->peer_used_mws = (*hw->ntb_ops->spad_read)(dev, SPAD_USED_MWS, 0);
 954
 955        for (i = 0; i < hw->peer_used_mws; i++) {
 956                peer_base_h = (*hw->ntb_ops->spad_read)(dev,
 957                                SPAD_MW0_BA_H + 2 * i, 0);
 958                peer_base_l = (*hw->ntb_ops->spad_read)(dev,
 959                                SPAD_MW0_BA_L + 2 * i, 0);
 960                hw->peer_mw_base[i] = (peer_base_h << 32) + peer_base_l;
 961        }
 962
 963        dev->started = 1;
 964
 965        return 0;
 966
 967err_up:
 968        rte_free(hw->peer_mw_base);
 969err_q_init:
 970        for (i = 0; i < hw->queue_pairs; i++) {
 971                ntb_rxq_release_mbufs(hw->rx_queues[i]);
 972                ntb_txq_release_mbufs(hw->tx_queues[i]);
 973        }
 974
 975        return ret;
 976}
 977
 978static void
 979ntb_dev_stop(struct rte_rawdev *dev)
 980{
 981        struct ntb_hw *hw = dev->dev_private;
 982        uint32_t time_out;
 983        int status, i;
 984
 985        if (!hw->peer_dev_up)
 986                goto clean;
 987
 988        ntb_link_cleanup(dev);
 989
 990        /* Notify the peer that device will be down. */
 991        if (hw->ntb_ops->peer_db_set == NULL) {
 992                NTB_LOG(ERR, "Peer doorbell setting is not supported.");
 993                return;
 994        }
 995        status = (*hw->ntb_ops->peer_db_set)(dev, 1);
 996        if (status) {
 997                NTB_LOG(ERR, "Failed to tell peer device is down.");
 998                return;
 999        }
1000
1001        /*
1002         * Set time out as 1s in case that the peer is stopped accidently
1003         * without any notification.
1004         */
1005        time_out = 1000000;
1006
1007        /* Wait for cleanup work down before db mask clear. */
1008        while (hw->peer_dev_up && time_out) {
1009                time_out -= 10;
1010                rte_delay_us(10);
1011        }
1012
1013clean:
1014        /* Clear doorbells mask. */
1015        if (hw->ntb_ops->db_set_mask == NULL) {
1016                NTB_LOG(ERR, "Doorbell mask setting is not supported.");
1017                return;
1018        }
1019        status = (*hw->ntb_ops->db_set_mask)(dev,
1020                                (((uint64_t)1 << hw->db_cnt) - 1));
1021        if (status)
1022                NTB_LOG(ERR, "Failed to clear doorbells.");
1023
1024        for (i = 0; i < hw->queue_pairs; i++) {
1025                ntb_rxq_release_mbufs(hw->rx_queues[i]);
1026                ntb_txq_release_mbufs(hw->tx_queues[i]);
1027        }
1028
1029        dev->started = 0;
1030}
1031
1032static int
1033ntb_dev_close(struct rte_rawdev *dev)
1034{
1035        struct ntb_hw *hw = dev->dev_private;
1036        struct rte_intr_handle *intr_handle;
1037        int i;
1038
1039        if (dev->started)
1040                ntb_dev_stop(dev);
1041
1042        /* free queues */
1043        for (i = 0; i < hw->queue_pairs; i++)
1044                ntb_queue_release(dev, i);
1045        hw->queue_pairs = 0;
1046
1047        intr_handle = hw->pci_dev->intr_handle;
1048        /* Clean datapath event and vec mapping */
1049        rte_intr_efd_disable(intr_handle);
1050        rte_intr_vec_list_free(intr_handle);
1051        /* Disable uio intr before callback unregister */
1052        rte_intr_disable(intr_handle);
1053
1054        /* Unregister callback func to eal lib */
1055        rte_intr_callback_unregister(intr_handle,
1056                                     ntb_dev_intr_handler, dev);
1057
1058        return 0;
1059}
1060
1061static int
1062ntb_dev_reset(struct rte_rawdev *rawdev __rte_unused)
1063{
1064        return 0;
1065}
1066
1067static int
1068ntb_attr_set(struct rte_rawdev *dev, const char *attr_name,
1069             uint64_t attr_value)
1070{
1071        struct ntb_hw *hw;
1072        int index;
1073
1074        if (dev == NULL || attr_name == NULL) {
1075                NTB_LOG(ERR, "Invalid arguments for setting attributes");
1076                return -EINVAL;
1077        }
1078
1079        hw = dev->dev_private;
1080
1081        if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
1082                if (hw->ntb_ops->spad_write == NULL)
1083                        return -ENOTSUP;
1084                index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
1085                if (index < 0 || index >= NTB_SPAD_USER_MAX_NUM) {
1086                        NTB_LOG(ERR, "Invalid attribute (%s)", attr_name);
1087                        return -EINVAL;
1088                }
1089                (*hw->ntb_ops->spad_write)(dev, hw->spad_user_list[index],
1090                                           1, attr_value);
1091                NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")",
1092                        attr_name, attr_value);
1093                return 0;
1094        }
1095
1096        if (!strncmp(attr_name, NTB_QUEUE_SZ_NAME, NTB_ATTR_NAME_LEN)) {
1097                hw->queue_size = attr_value;
1098                NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")",
1099                        attr_name, attr_value);
1100                return 0;
1101        }
1102
1103        if (!strncmp(attr_name, NTB_QUEUE_NUM_NAME, NTB_ATTR_NAME_LEN)) {
1104                hw->queue_pairs = attr_value;
1105                NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")",
1106                        attr_name, attr_value);
1107                return 0;
1108        }
1109
1110        /* Attribute not found. */
1111        NTB_LOG(ERR, "Attribute not found.");
1112        return -EINVAL;
1113}
1114
1115static int
1116ntb_attr_get(struct rte_rawdev *dev, const char *attr_name,
1117             uint64_t *attr_value)
1118{
1119        struct ntb_hw *hw;
1120        int index;
1121
1122        if (dev == NULL || attr_name == NULL || attr_value == NULL) {
1123                NTB_LOG(ERR, "Invalid arguments for getting attributes");
1124                return -EINVAL;
1125        }
1126
1127        hw = dev->dev_private;
1128
1129        if (!strncmp(attr_name, NTB_TOPO_NAME, NTB_ATTR_NAME_LEN)) {
1130                *attr_value = hw->topo;
1131                NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1132                        attr_name, *attr_value);
1133                return 0;
1134        }
1135
1136        if (!strncmp(attr_name, NTB_LINK_STATUS_NAME, NTB_ATTR_NAME_LEN)) {
1137                /* hw->link_status only indicates hw link status. */
1138                *attr_value = hw->link_status && hw->peer_dev_up;
1139                NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1140                        attr_name, *attr_value);
1141                return 0;
1142        }
1143
1144        if (!strncmp(attr_name, NTB_SPEED_NAME, NTB_ATTR_NAME_LEN)) {
1145                *attr_value = hw->link_speed;
1146                NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1147                        attr_name, *attr_value);
1148                return 0;
1149        }
1150
1151        if (!strncmp(attr_name, NTB_WIDTH_NAME, NTB_ATTR_NAME_LEN)) {
1152                *attr_value = hw->link_width;
1153                NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1154                        attr_name, *attr_value);
1155                return 0;
1156        }
1157
1158        if (!strncmp(attr_name, NTB_MW_CNT_NAME, NTB_ATTR_NAME_LEN)) {
1159                *attr_value = hw->mw_cnt;
1160                NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1161                        attr_name, *attr_value);
1162                return 0;
1163        }
1164
1165        if (!strncmp(attr_name, NTB_DB_CNT_NAME, NTB_ATTR_NAME_LEN)) {
1166                *attr_value = hw->db_cnt;
1167                NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1168                        attr_name, *attr_value);
1169                return 0;
1170        }
1171
1172        if (!strncmp(attr_name, NTB_SPAD_CNT_NAME, NTB_ATTR_NAME_LEN)) {
1173                *attr_value = hw->spad_cnt;
1174                NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1175                        attr_name, *attr_value);
1176                return 0;
1177        }
1178
1179        if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
1180                if (hw->ntb_ops->spad_read == NULL)
1181                        return -ENOTSUP;
1182                index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
1183                if (index < 0 || index >= NTB_SPAD_USER_MAX_NUM) {
1184                        NTB_LOG(ERR, "Attribute (%s) out of range", attr_name);
1185                        return -EINVAL;
1186                }
1187                *attr_value = (*hw->ntb_ops->spad_read)(dev,
1188                                hw->spad_user_list[index], 0);
1189                NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1190                        attr_name, *attr_value);
1191                return 0;
1192        }
1193
1194        /* Attribute not found. */
1195        NTB_LOG(ERR, "Attribute not found.");
1196        return -EINVAL;
1197}
1198
1199static inline uint64_t
1200ntb_stats_update(uint64_t offset, uint64_t stat)
1201{
1202        if (stat >= offset)
1203                return (stat - offset);
1204        else
1205                return (uint64_t)(((uint64_t)-1) - offset + stat + 1);
1206}
1207
1208static int
1209ntb_xstats_get(const struct rte_rawdev *dev,
1210               const unsigned int ids[],
1211               uint64_t values[],
1212               unsigned int n)
1213{
1214        struct ntb_hw *hw = dev->dev_private;
1215        uint32_t i, j, off, xstats_num;
1216
1217        /* Calculate total stats of all queues. */
1218        for (i = 0; i < NTB_XSTATS_NUM; i++) {
1219                hw->ntb_xstats[i] = 0;
1220                for (j = 0; j < hw->queue_pairs; j++) {
1221                        off = NTB_XSTATS_NUM * (j + 1) + i;
1222                        hw->ntb_xstats[i] +=
1223                        ntb_stats_update(hw->ntb_xstats_off[off],
1224                                         hw->ntb_xstats[off]);
1225                }
1226        }
1227
1228        xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1229        for (i = 0; i < n && ids[i] < xstats_num; i++) {
1230                if (ids[i] < NTB_XSTATS_NUM)
1231                        values[i] = hw->ntb_xstats[ids[i]];
1232                else
1233                        values[i] =
1234                        ntb_stats_update(hw->ntb_xstats_off[ids[i]],
1235                                         hw->ntb_xstats[ids[i]]);
1236        }
1237
1238        return i;
1239}
1240
1241static int
1242ntb_xstats_get_names(const struct rte_rawdev *dev,
1243                     struct rte_rawdev_xstats_name *xstats_names,
1244                     unsigned int size)
1245{
1246        struct ntb_hw *hw = dev->dev_private;
1247        uint32_t xstats_num, i, j, off;
1248
1249        xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1250        if (xstats_names == NULL || size < xstats_num)
1251                return xstats_num;
1252
1253        /* Total stats names */
1254        memcpy(xstats_names, ntb_xstats_names, sizeof(ntb_xstats_names));
1255
1256        /* Queue stats names */
1257        for (i = 0; i < hw->queue_pairs; i++) {
1258                for (j = 0; j < NTB_XSTATS_NUM; j++) {
1259                        off = j + (i + 1) * NTB_XSTATS_NUM;
1260                        snprintf(xstats_names[off].name,
1261                                sizeof(xstats_names[0].name),
1262                                "%s_q%u", ntb_xstats_names[j].name, i);
1263                }
1264        }
1265
1266        return xstats_num;
1267}
1268
1269static uint64_t
1270ntb_xstats_get_by_name(const struct rte_rawdev *dev,
1271                       const char *name, unsigned int *id)
1272{
1273        struct rte_rawdev_xstats_name *xstats_names;
1274        struct ntb_hw *hw = dev->dev_private;
1275        uint32_t xstats_num, i, j, off;
1276
1277        if (name == NULL)
1278                return -EINVAL;
1279
1280        xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1281        xstats_names = rte_zmalloc("ntb_stats_name",
1282                                   sizeof(struct rte_rawdev_xstats_name) *
1283                                   xstats_num, 0);
1284        ntb_xstats_get_names(dev, xstats_names, xstats_num);
1285
1286        /* Calculate total stats of all queues. */
1287        for (i = 0; i < NTB_XSTATS_NUM; i++) {
1288                for (j = 0; j < hw->queue_pairs; j++) {
1289                        off = NTB_XSTATS_NUM * (j + 1) + i;
1290                        hw->ntb_xstats[i] +=
1291                        ntb_stats_update(hw->ntb_xstats_off[off],
1292                                         hw->ntb_xstats[off]);
1293                }
1294        }
1295
1296        for (i = 0; i < xstats_num; i++) {
1297                if (!strncmp(name, xstats_names[i].name,
1298                    RTE_RAW_DEV_XSTATS_NAME_SIZE)) {
1299                        *id = i;
1300                        rte_free(xstats_names);
1301                        if (i < NTB_XSTATS_NUM)
1302                                return hw->ntb_xstats[i];
1303                        else
1304                                return ntb_stats_update(hw->ntb_xstats_off[i],
1305                                                        hw->ntb_xstats[i]);
1306                }
1307        }
1308
1309        NTB_LOG(ERR, "Cannot find the xstats name.");
1310
1311        return -EINVAL;
1312}
1313
1314static int
1315ntb_xstats_reset(struct rte_rawdev *dev,
1316                 const uint32_t ids[],
1317                 uint32_t nb_ids)
1318{
1319        struct ntb_hw *hw = dev->dev_private;
1320        uint32_t i, j, off, xstats_num;
1321
1322        xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1323        for (i = 0; i < nb_ids && ids[i] < xstats_num; i++) {
1324                if (ids[i] < NTB_XSTATS_NUM) {
1325                        for (j = 0; j < hw->queue_pairs; j++) {
1326                                off = NTB_XSTATS_NUM * (j + 1) + ids[i];
1327                                hw->ntb_xstats_off[off] = hw->ntb_xstats[off];
1328                        }
1329                } else {
1330                        hw->ntb_xstats_off[ids[i]] = hw->ntb_xstats[ids[i]];
1331                }
1332        }
1333
1334        return i;
1335}
1336
1337static const struct rte_rawdev_ops ntb_ops = {
1338        .dev_info_get         = ntb_dev_info_get,
1339        .dev_configure        = ntb_dev_configure,
1340        .dev_start            = ntb_dev_start,
1341        .dev_stop             = ntb_dev_stop,
1342        .dev_close            = ntb_dev_close,
1343        .dev_reset            = ntb_dev_reset,
1344
1345        .queue_def_conf       = ntb_queue_conf_get,
1346        .queue_setup          = ntb_queue_setup,
1347        .queue_release        = ntb_queue_release,
1348        .queue_count          = ntb_queue_count,
1349
1350        .enqueue_bufs         = ntb_enqueue_bufs,
1351        .dequeue_bufs         = ntb_dequeue_bufs,
1352
1353        .attr_get             = ntb_attr_get,
1354        .attr_set             = ntb_attr_set,
1355
1356        .xstats_get           = ntb_xstats_get,
1357        .xstats_get_names     = ntb_xstats_get_names,
1358        .xstats_get_by_name   = ntb_xstats_get_by_name,
1359        .xstats_reset         = ntb_xstats_reset,
1360};
1361
1362static int
1363ntb_init_hw(struct rte_rawdev *dev, struct rte_pci_device *pci_dev)
1364{
1365        struct ntb_hw *hw = dev->dev_private;
1366        struct rte_intr_handle *intr_handle;
1367        int ret, i;
1368
1369        hw->pci_dev = pci_dev;
1370        hw->peer_dev_up = 0;
1371        hw->link_status = NTB_LINK_DOWN;
1372        hw->link_speed = NTB_SPEED_NONE;
1373        hw->link_width = NTB_WIDTH_NONE;
1374
1375        switch (pci_dev->id.device_id) {
1376        case NTB_INTEL_DEV_ID_B2B_SKX:
1377        case NTB_INTEL_DEV_ID_B2B_ICX:
1378                hw->ntb_ops = &intel_ntb_ops;
1379                break;
1380        default:
1381                NTB_LOG(ERR, "Not supported device.");
1382                return -EINVAL;
1383        }
1384
1385        if (hw->ntb_ops->ntb_dev_init == NULL)
1386                return -ENOTSUP;
1387        ret = (*hw->ntb_ops->ntb_dev_init)(dev);
1388        if (ret) {
1389                NTB_LOG(ERR, "Unable to init ntb dev.");
1390                return ret;
1391        }
1392
1393        if (hw->ntb_ops->set_link == NULL)
1394                return -ENOTSUP;
1395        ret = (*hw->ntb_ops->set_link)(dev, 1);
1396        if (ret)
1397                return ret;
1398
1399        /* Init doorbell. */
1400        hw->db_valid_mask = RTE_LEN2MASK(hw->db_cnt, uint64_t);
1401
1402        intr_handle = pci_dev->intr_handle;
1403        /* Register callback func to eal lib */
1404        rte_intr_callback_register(intr_handle,
1405                                   ntb_dev_intr_handler, dev);
1406
1407        ret = rte_intr_efd_enable(intr_handle, hw->db_cnt);
1408        if (ret)
1409                return ret;
1410
1411        /* To clarify, the interrupt for each doorbell is already mapped
1412         * by default for intel gen3. They are mapped to msix vec 1-32,
1413         * and hardware intr is mapped to 0. Map all to 0 for uio.
1414         */
1415        if (!rte_intr_cap_multiple(intr_handle)) {
1416                for (i = 0; i < hw->db_cnt; i++) {
1417                        if (hw->ntb_ops->vector_bind == NULL)
1418                                return -ENOTSUP;
1419                        ret = (*hw->ntb_ops->vector_bind)(dev, i, 0);
1420                        if (ret)
1421                                return ret;
1422                }
1423        }
1424
1425        if (hw->ntb_ops->db_set_mask == NULL ||
1426            hw->ntb_ops->peer_db_set == NULL) {
1427                NTB_LOG(ERR, "Doorbell is not supported.");
1428                return -ENOTSUP;
1429        }
1430        hw->db_mask = 0;
1431        ret = (*hw->ntb_ops->db_set_mask)(dev, hw->db_mask);
1432        if (ret) {
1433                NTB_LOG(ERR, "Unable to enable intr for all dbs.");
1434                return ret;
1435        }
1436
1437        /* enable uio intr after callback register */
1438        rte_intr_enable(intr_handle);
1439
1440        return ret;
1441}
1442
1443static int
1444ntb_create(struct rte_pci_device *pci_dev, int socket_id)
1445{
1446        char name[RTE_RAWDEV_NAME_MAX_LEN];
1447        struct rte_rawdev *rawdev = NULL;
1448        int ret;
1449
1450        if (pci_dev == NULL) {
1451                NTB_LOG(ERR, "Invalid pci_dev.");
1452                return -EINVAL;
1453        }
1454
1455        memset(name, 0, sizeof(name));
1456        snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
1457                 pci_dev->addr.bus, pci_dev->addr.devid,
1458                 pci_dev->addr.function);
1459
1460        NTB_LOG(INFO, "Init %s on NUMA node %d", name, socket_id);
1461
1462        /* Allocate device structure. */
1463        rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct ntb_hw),
1464                                         socket_id);
1465        if (rawdev == NULL) {
1466                NTB_LOG(ERR, "Unable to allocate rawdev.");
1467                return -EINVAL;
1468        }
1469
1470        rawdev->dev_ops = &ntb_ops;
1471        rawdev->device = &pci_dev->device;
1472        rawdev->driver_name = pci_dev->driver->driver.name;
1473
1474        ret = ntb_init_hw(rawdev, pci_dev);
1475        if (ret < 0) {
1476                NTB_LOG(ERR, "Unable to init ntb hw.");
1477                goto fail;
1478        }
1479
1480        return ret;
1481
1482fail:
1483        if (rawdev != NULL)
1484                rte_rawdev_pmd_release(rawdev);
1485
1486        return ret;
1487}
1488
1489static int
1490ntb_destroy(struct rte_pci_device *pci_dev)
1491{
1492        char name[RTE_RAWDEV_NAME_MAX_LEN];
1493        struct rte_rawdev *rawdev;
1494        int ret;
1495
1496        if (pci_dev == NULL) {
1497                NTB_LOG(ERR, "Invalid pci_dev.");
1498                ret = -EINVAL;
1499                return ret;
1500        }
1501
1502        memset(name, 0, sizeof(name));
1503        snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
1504                 pci_dev->addr.bus, pci_dev->addr.devid,
1505                 pci_dev->addr.function);
1506
1507        NTB_LOG(INFO, "Closing %s on NUMA node %d", name, rte_socket_id());
1508
1509        rawdev = rte_rawdev_pmd_get_named_dev(name);
1510        if (rawdev == NULL) {
1511                NTB_LOG(ERR, "Invalid device name (%s)", name);
1512                ret = -EINVAL;
1513                return ret;
1514        }
1515
1516        ret = rte_rawdev_pmd_release(rawdev);
1517        if (ret)
1518                NTB_LOG(ERR, "Failed to destroy ntb rawdev.");
1519
1520        return ret;
1521}
1522
1523static int
1524ntb_probe(struct rte_pci_driver *pci_drv __rte_unused,
1525        struct rte_pci_device *pci_dev)
1526{
1527        return ntb_create(pci_dev, rte_socket_id());
1528}
1529
1530static int
1531ntb_remove(struct rte_pci_device *pci_dev)
1532{
1533        return ntb_destroy(pci_dev);
1534}
1535
1536
1537static struct rte_pci_driver rte_ntb_pmd = {
1538        .id_table = pci_id_ntb_map,
1539        .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_WC_ACTIVATE,
1540        .probe = ntb_probe,
1541        .remove = ntb_remove,
1542};
1543
1544RTE_PMD_REGISTER_PCI(raw_ntb, rte_ntb_pmd);
1545RTE_PMD_REGISTER_PCI_TABLE(raw_ntb, pci_id_ntb_map);
1546RTE_PMD_REGISTER_KMOD_DEP(raw_ntb, "* igb_uio | uio_pci_generic | vfio-pci");
1547RTE_LOG_REGISTER_DEFAULT(ntb_logtype, INFO);
1548