dpdk/drivers/net/mlx4/mlx4_ethdev.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2017 6WIND S.A.
   3 * Copyright 2017 Mellanox Technologies, Ltd
   4 */
   5
   6/**
   7 * @file
   8 * Miscellaneous control operations for mlx4 driver.
   9 */
  10
  11#include <dirent.h>
  12#include <errno.h>
  13#include <linux/ethtool.h>
  14#include <linux/sockios.h>
  15#include <net/if.h>
  16#include <netinet/ip.h>
  17#include <stddef.h>
  18#include <stdint.h>
  19#include <stdio.h>
  20#include <stdlib.h>
  21#include <string.h>
  22#include <sys/ioctl.h>
  23#include <sys/socket.h>
  24#include <unistd.h>
  25
  26/* Verbs headers do not support -pedantic. */
  27#ifdef PEDANTIC
  28#pragma GCC diagnostic ignored "-Wpedantic"
  29#endif
  30#include <infiniband/verbs.h>
  31#ifdef PEDANTIC
  32#pragma GCC diagnostic error "-Wpedantic"
  33#endif
  34
  35#include <rte_bus_pci.h>
  36#include <rte_errno.h>
  37#include <rte_ethdev_driver.h>
  38#include <rte_ether.h>
  39#include <rte_flow.h>
  40#include <rte_pci.h>
  41#include <rte_string_fns.h>
  42
  43#include "mlx4.h"
  44#include "mlx4_flow.h"
  45#include "mlx4_glue.h"
  46#include "mlx4_rxtx.h"
  47#include "mlx4_utils.h"
  48
  49/**
  50 * Get interface name from private structure.
  51 *
  52 * @param[in] priv
  53 *   Pointer to private structure.
  54 * @param[out] ifname
  55 *   Interface name output buffer.
  56 *
  57 * @return
  58 *   0 on success, negative errno value otherwise and rte_errno is set.
  59 */
  60int
  61mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE])
  62{
  63        DIR *dir;
  64        struct dirent *dent;
  65        unsigned int dev_type = 0;
  66        unsigned int dev_port_prev = ~0u;
  67        char match[IF_NAMESIZE] = "";
  68
  69        {
  70                MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
  71
  72                dir = opendir(path);
  73                if (dir == NULL) {
  74                        rte_errno = errno;
  75                        return -rte_errno;
  76                }
  77        }
  78        while ((dent = readdir(dir)) != NULL) {
  79                char *name = dent->d_name;
  80                FILE *file;
  81                unsigned int dev_port;
  82                int r;
  83
  84                if ((name[0] == '.') &&
  85                    ((name[1] == '\0') ||
  86                     ((name[1] == '.') && (name[2] == '\0'))))
  87                        continue;
  88
  89                MKSTR(path, "%s/device/net/%s/%s",
  90                      priv->ctx->device->ibdev_path, name,
  91                      (dev_type ? "dev_id" : "dev_port"));
  92
  93                file = fopen(path, "rb");
  94                if (file == NULL) {
  95                        if (errno != ENOENT)
  96                                continue;
  97                        /*
  98                         * Switch to dev_id when dev_port does not exist as
  99                         * is the case with Linux kernel versions < 3.15.
 100                         */
 101try_dev_id:
 102                        match[0] = '\0';
 103                        if (dev_type)
 104                                break;
 105                        dev_type = 1;
 106                        dev_port_prev = ~0u;
 107                        rewinddir(dir);
 108                        continue;
 109                }
 110                r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
 111                fclose(file);
 112                if (r != 1)
 113                        continue;
 114                /*
 115                 * Switch to dev_id when dev_port returns the same value for
 116                 * all ports. May happen when using a MOFED release older than
 117                 * 3.0 with a Linux kernel >= 3.15.
 118                 */
 119                if (dev_port == dev_port_prev)
 120                        goto try_dev_id;
 121                dev_port_prev = dev_port;
 122                if (dev_port == (priv->port - 1u))
 123                        strlcpy(match, name, sizeof(match));
 124        }
 125        closedir(dir);
 126        if (match[0] == '\0') {
 127                rte_errno = ENODEV;
 128                return -rte_errno;
 129        }
 130        strncpy(*ifname, match, sizeof(*ifname));
 131        return 0;
 132}
 133
 134/**
 135 * Perform ifreq ioctl() on associated Ethernet device.
 136 *
 137 * @param[in] priv
 138 *   Pointer to private structure.
 139 * @param req
 140 *   Request number to pass to ioctl().
 141 * @param[out] ifr
 142 *   Interface request structure output buffer.
 143 *
 144 * @return
 145 *   0 on success, negative errno value otherwise and rte_errno is set.
 146 */
 147static int
 148mlx4_ifreq(const struct mlx4_priv *priv, int req, struct ifreq *ifr)
 149{
 150        int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
 151        int ret;
 152
 153        if (sock == -1) {
 154                rte_errno = errno;
 155                return -rte_errno;
 156        }
 157        ret = mlx4_get_ifname(priv, &ifr->ifr_name);
 158        if (!ret && ioctl(sock, req, ifr) == -1) {
 159                rte_errno = errno;
 160                ret = -rte_errno;
 161        }
 162        close(sock);
 163        return ret;
 164}
 165
 166/**
 167 * Get MAC address by querying netdevice.
 168 *
 169 * @param[in] priv
 170 *   Pointer to private structure.
 171 * @param[out] mac
 172 *   MAC address output buffer.
 173 *
 174 * @return
 175 *   0 on success, negative errno value otherwise and rte_errno is set.
 176 */
 177int
 178mlx4_get_mac(struct mlx4_priv *priv, uint8_t (*mac)[RTE_ETHER_ADDR_LEN])
 179{
 180        struct ifreq request;
 181        int ret = mlx4_ifreq(priv, SIOCGIFHWADDR, &request);
 182
 183        if (ret)
 184                return ret;
 185        memcpy(mac, request.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);
 186        return 0;
 187}
 188
 189/**
 190 * Get device MTU.
 191 *
 192 * @param priv
 193 *   Pointer to private structure.
 194 * @param[out] mtu
 195 *   MTU value output buffer.
 196 *
 197 * @return
 198 *   0 on success, negative errno value otherwise and rte_errno is set.
 199 */
 200int
 201mlx4_mtu_get(struct mlx4_priv *priv, uint16_t *mtu)
 202{
 203        struct ifreq request;
 204        int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
 205
 206        if (ret)
 207                return ret;
 208        *mtu = request.ifr_mtu;
 209        return 0;
 210}
 211
 212/**
 213 * DPDK callback to change the MTU.
 214 *
 215 * @param priv
 216 *   Pointer to Ethernet device structure.
 217 * @param mtu
 218 *   MTU value to set.
 219 *
 220 * @return
 221 *   0 on success, negative errno value otherwise and rte_errno is set.
 222 */
 223int
 224mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 225{
 226        struct mlx4_priv *priv = dev->data->dev_private;
 227        struct ifreq request = { .ifr_mtu = mtu, };
 228        int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
 229
 230        if (ret)
 231                return ret;
 232        priv->mtu = mtu;
 233        return 0;
 234}
 235
 236/**
 237 * Set device flags.
 238 *
 239 * @param priv
 240 *   Pointer to private structure.
 241 * @param keep
 242 *   Bitmask for flags that must remain untouched.
 243 * @param flags
 244 *   Bitmask for flags to modify.
 245 *
 246 * @return
 247 *   0 on success, negative errno value otherwise and rte_errno is set.
 248 */
 249static int
 250mlx4_set_flags(struct mlx4_priv *priv, unsigned int keep, unsigned int flags)
 251{
 252        struct ifreq request;
 253        int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
 254
 255        if (ret)
 256                return ret;
 257        request.ifr_flags &= keep;
 258        request.ifr_flags |= flags & ~keep;
 259        return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
 260}
 261
 262/**
 263 * Change the link state (UP / DOWN).
 264 *
 265 * @param priv
 266 *   Pointer to Ethernet device private data.
 267 * @param up
 268 *   Nonzero for link up, otherwise link down.
 269 *
 270 * @return
 271 *   0 on success, negative errno value otherwise and rte_errno is set.
 272 */
 273static int
 274mlx4_dev_set_link(struct mlx4_priv *priv, int up)
 275{
 276        int err;
 277
 278        if (up) {
 279                err = mlx4_set_flags(priv, ~IFF_UP, IFF_UP);
 280                if (err)
 281                        return err;
 282        } else {
 283                err = mlx4_set_flags(priv, ~IFF_UP, ~IFF_UP);
 284                if (err)
 285                        return err;
 286        }
 287        return 0;
 288}
 289
 290/**
 291 * DPDK callback to bring the link DOWN.
 292 *
 293 * @param dev
 294 *   Pointer to Ethernet device structure.
 295 *
 296 * @return
 297 *   0 on success, negative errno value otherwise and rte_errno is set.
 298 */
 299int
 300mlx4_dev_set_link_down(struct rte_eth_dev *dev)
 301{
 302        struct mlx4_priv *priv = dev->data->dev_private;
 303
 304        return mlx4_dev_set_link(priv, 0);
 305}
 306
 307/**
 308 * DPDK callback to bring the link UP.
 309 *
 310 * @param dev
 311 *   Pointer to Ethernet device structure.
 312 *
 313 * @return
 314 *   0 on success, negative errno value otherwise and rte_errno is set.
 315 */
 316int
 317mlx4_dev_set_link_up(struct rte_eth_dev *dev)
 318{
 319        struct mlx4_priv *priv = dev->data->dev_private;
 320
 321        return mlx4_dev_set_link(priv, 1);
 322}
 323
 324/**
 325 * Supported Rx mode toggles.
 326 *
 327 * Even and odd values respectively stand for off and on.
 328 */
 329enum rxmode_toggle {
 330        RXMODE_TOGGLE_PROMISC_OFF,
 331        RXMODE_TOGGLE_PROMISC_ON,
 332        RXMODE_TOGGLE_ALLMULTI_OFF,
 333        RXMODE_TOGGLE_ALLMULTI_ON,
 334};
 335
 336/**
 337 * Helper function to toggle promiscuous and all multicast modes.
 338 *
 339 * @param dev
 340 *   Pointer to Ethernet device structure.
 341 * @param toggle
 342 *   Toggle to set.
 343 *
 344 * @return
 345 *   0 on success, a negative errno value otherwise and rte_errno is set.
 346 */
 347static int
 348mlx4_rxmode_toggle(struct rte_eth_dev *dev, enum rxmode_toggle toggle)
 349{
 350        struct mlx4_priv *priv = dev->data->dev_private;
 351        const char *mode;
 352        struct rte_flow_error error;
 353        int ret;
 354
 355        switch (toggle) {
 356        case RXMODE_TOGGLE_PROMISC_OFF:
 357        case RXMODE_TOGGLE_PROMISC_ON:
 358                mode = "promiscuous";
 359                dev->data->promiscuous = toggle & 1;
 360                break;
 361        case RXMODE_TOGGLE_ALLMULTI_OFF:
 362        case RXMODE_TOGGLE_ALLMULTI_ON:
 363                mode = "all multicast";
 364                dev->data->all_multicast = toggle & 1;
 365                break;
 366        default:
 367                mode = "undefined";
 368        }
 369
 370        ret = mlx4_flow_sync(priv, &error);
 371        if (!ret)
 372                return 0;
 373
 374        ERROR("cannot toggle %s mode (code %d, \"%s\"),"
 375              " flow error type %d, cause %p, message: %s",
 376              mode, rte_errno, strerror(rte_errno), error.type, error.cause,
 377              error.message ? error.message : "(unspecified)");
 378        return ret;
 379}
 380
 381/**
 382 * DPDK callback to enable promiscuous mode.
 383 *
 384 * @param dev
 385 *   Pointer to Ethernet device structure.
 386 *
 387 * @return
 388 *   0 on success, a negative errno value otherwise and rte_errno is set.
 389 */
 390int
 391mlx4_promiscuous_enable(struct rte_eth_dev *dev)
 392{
 393        return mlx4_rxmode_toggle(dev, RXMODE_TOGGLE_PROMISC_ON);
 394}
 395
 396/**
 397 * DPDK callback to disable promiscuous mode.
 398 *
 399 * @param dev
 400 *   Pointer to Ethernet device structure.
 401 *
 402 * @return
 403 *   0 on success, a negative errno value otherwise and rte_errno is set.
 404 */
 405int
 406mlx4_promiscuous_disable(struct rte_eth_dev *dev)
 407{
 408        return mlx4_rxmode_toggle(dev, RXMODE_TOGGLE_PROMISC_OFF);
 409}
 410
 411/**
 412 * DPDK callback to enable all multicast mode.
 413 *
 414 * @param dev
 415 *   Pointer to Ethernet device structure.
 416 *
 417 * @return
 418 *   0 on success, a negative errno value otherwise and rte_errno is set.
 419 */
 420int
 421mlx4_allmulticast_enable(struct rte_eth_dev *dev)
 422{
 423        return mlx4_rxmode_toggle(dev, RXMODE_TOGGLE_ALLMULTI_ON);
 424}
 425
 426/**
 427 * DPDK callback to disable all multicast mode.
 428 *
 429 * @param dev
 430 *   Pointer to Ethernet device structure.
 431 *
 432 * @return
 433 *   0 on success, a negative errno value otherwise and rte_errno is set.
 434 */
 435int
 436mlx4_allmulticast_disable(struct rte_eth_dev *dev)
 437{
 438        return mlx4_rxmode_toggle(dev, RXMODE_TOGGLE_ALLMULTI_OFF);
 439}
 440
 441/**
 442 * DPDK callback to remove a MAC address.
 443 *
 444 * @param dev
 445 *   Pointer to Ethernet device structure.
 446 * @param index
 447 *   MAC address index.
 448 */
 449void
 450mlx4_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 451{
 452        struct mlx4_priv *priv = dev->data->dev_private;
 453        struct rte_flow_error error;
 454
 455        if (index >= RTE_DIM(priv->mac) - priv->mac_mc) {
 456                rte_errno = EINVAL;
 457                return;
 458        }
 459        memset(&priv->mac[index], 0, sizeof(priv->mac[index]));
 460        if (!mlx4_flow_sync(priv, &error))
 461                return;
 462        ERROR("failed to synchronize flow rules after removing MAC address"
 463              " at index %d (code %d, \"%s\"),"
 464              " flow error type %d, cause %p, message: %s",
 465              index, rte_errno, strerror(rte_errno), error.type, error.cause,
 466              error.message ? error.message : "(unspecified)");
 467}
 468
 469/**
 470 * DPDK callback to add a MAC address.
 471 *
 472 * @param dev
 473 *   Pointer to Ethernet device structure.
 474 * @param mac_addr
 475 *   MAC address to register.
 476 * @param index
 477 *   MAC address index.
 478 * @param vmdq
 479 *   VMDq pool index to associate address with (ignored).
 480 *
 481 * @return
 482 *   0 on success, negative errno value otherwise and rte_errno is set.
 483 */
 484int
 485mlx4_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
 486                  uint32_t index, uint32_t vmdq)
 487{
 488        struct mlx4_priv *priv = dev->data->dev_private;
 489        struct rte_flow_error error;
 490        int ret;
 491
 492        (void)vmdq;
 493        if (index >= RTE_DIM(priv->mac) - priv->mac_mc) {
 494                rte_errno = EINVAL;
 495                return -rte_errno;
 496        }
 497        memcpy(&priv->mac[index], mac_addr, sizeof(priv->mac[index]));
 498        ret = mlx4_flow_sync(priv, &error);
 499        if (!ret)
 500                return 0;
 501        ERROR("failed to synchronize flow rules after adding MAC address"
 502              " at index %d (code %d, \"%s\"),"
 503              " flow error type %d, cause %p, message: %s",
 504              index, rte_errno, strerror(rte_errno), error.type, error.cause,
 505              error.message ? error.message : "(unspecified)");
 506        return ret;
 507}
 508
 509/**
 510 * DPDK callback to configure multicast addresses.
 511 *
 512 * @param dev
 513 *   Pointer to Ethernet device structure.
 514 * @param list
 515 *   List of MAC addresses to register.
 516 * @param num
 517 *   Number of entries in list.
 518 *
 519 * @return
 520 *   0 on success, negative errno value otherwise and rte_errno is set.
 521 */
 522int
 523mlx4_set_mc_addr_list(struct rte_eth_dev *dev, struct rte_ether_addr *list,
 524                      uint32_t num)
 525{
 526        struct mlx4_priv *priv = dev->data->dev_private;
 527        struct rte_flow_error error;
 528        int ret;
 529
 530        if (num > RTE_DIM(priv->mac)) {
 531                rte_errno = EINVAL;
 532                return -rte_errno;
 533        }
 534        /*
 535         * Make sure there is enough room to increase the number of
 536         * multicast entries without overwriting standard entries.
 537         */
 538        if (num > priv->mac_mc) {
 539                unsigned int i;
 540
 541                for (i = RTE_DIM(priv->mac) - num;
 542                     i != RTE_DIM(priv->mac) - priv->mac_mc;
 543                     ++i)
 544                        if (!rte_is_zero_ether_addr(&priv->mac[i])) {
 545                                rte_errno = EBUSY;
 546                                return -rte_errno;
 547                        }
 548        } else if (num < priv->mac_mc) {
 549                /* Clear unused entries. */
 550                memset(priv->mac + RTE_DIM(priv->mac) - priv->mac_mc,
 551                       0,
 552                       sizeof(priv->mac[0]) * (priv->mac_mc - num));
 553        }
 554        memcpy(priv->mac + RTE_DIM(priv->mac) - num, list, sizeof(*list) * num);
 555        priv->mac_mc = num;
 556        ret = mlx4_flow_sync(priv, &error);
 557        if (!ret)
 558                return 0;
 559        ERROR("failed to synchronize flow rules after modifying MC list,"
 560              " (code %d, \"%s\"), flow error type %d, cause %p, message: %s",
 561              rte_errno, strerror(rte_errno), error.type, error.cause,
 562              error.message ? error.message : "(unspecified)");
 563        return ret;
 564}
 565
 566/**
 567 * DPDK callback to configure a VLAN filter.
 568 *
 569 * @param dev
 570 *   Pointer to Ethernet device structure.
 571 * @param vlan_id
 572 *   VLAN ID to filter.
 573 * @param on
 574 *   Toggle filter.
 575 *
 576 * @return
 577 *   0 on success, negative errno value otherwise and rte_errno is set.
 578 */
 579int
 580mlx4_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 581{
 582        struct mlx4_priv *priv = dev->data->dev_private;
 583        struct rte_flow_error error;
 584        unsigned int vidx = vlan_id / 64;
 585        unsigned int vbit = vlan_id % 64;
 586        uint64_t *v;
 587        int ret;
 588
 589        if (vidx >= RTE_DIM(dev->data->vlan_filter_conf.ids)) {
 590                rte_errno = EINVAL;
 591                return -rte_errno;
 592        }
 593        v = &dev->data->vlan_filter_conf.ids[vidx];
 594        *v &= ~(UINT64_C(1) << vbit);
 595        *v |= (uint64_t)!!on << vbit;
 596        ret = mlx4_flow_sync(priv, &error);
 597        if (!ret)
 598                return 0;
 599        ERROR("failed to synchronize flow rules after %s VLAN filter on ID %u"
 600              " (code %d, \"%s\"), "
 601              " flow error type %d, cause %p, message: %s",
 602              on ? "enabling" : "disabling", vlan_id,
 603              rte_errno, strerror(rte_errno), error.type, error.cause,
 604              error.message ? error.message : "(unspecified)");
 605        return ret;
 606}
 607
 608/**
 609 * DPDK callback to set the primary MAC address.
 610 *
 611 * @param dev
 612 *   Pointer to Ethernet device structure.
 613 * @param mac_addr
 614 *   MAC address to register.
 615 *
 616 * @return
 617 *   0 on success, negative errno value otherwise and rte_errno is set.
 618 */
 619int
 620mlx4_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
 621{
 622        return mlx4_mac_addr_add(dev, mac_addr, 0, 0);
 623}
 624
 625/**
 626 * DPDK callback to get information about the device.
 627 *
 628 * @param dev
 629 *   Pointer to Ethernet device structure.
 630 * @param[out] info
 631 *   Info structure output buffer.
 632 */
 633int
 634mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 635{
 636        struct mlx4_priv *priv = dev->data->dev_private;
 637        unsigned int max;
 638
 639        /* FIXME: we should ask the device for these values. */
 640        info->min_rx_bufsize = 32;
 641        info->max_rx_pktlen = 65536;
 642        /*
 643         * Since we need one CQ per QP, the limit is the minimum number
 644         * between the two values.
 645         */
 646        max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ?
 647               priv->device_attr.max_qp : priv->device_attr.max_cq);
 648        /* max_rx_queues is uint16_t. */
 649        max = RTE_MIN(max, (unsigned int)UINT16_MAX);
 650        info->max_rx_queues = max;
 651        info->max_tx_queues = max;
 652        info->max_mac_addrs = RTE_DIM(priv->mac);
 653        info->tx_offload_capa = mlx4_get_tx_port_offloads(priv);
 654        info->rx_queue_offload_capa = mlx4_get_rx_queue_offloads(priv);
 655        info->rx_offload_capa = (mlx4_get_rx_port_offloads(priv) |
 656                                 info->rx_queue_offload_capa);
 657        info->if_index = priv->if_index;
 658        info->hash_key_size = MLX4_RSS_HASH_KEY_SIZE;
 659        info->speed_capa =
 660                        ETH_LINK_SPEED_1G |
 661                        ETH_LINK_SPEED_10G |
 662                        ETH_LINK_SPEED_20G |
 663                        ETH_LINK_SPEED_40G |
 664                        ETH_LINK_SPEED_56G;
 665        info->flow_type_rss_offloads = mlx4_conv_rss_types(priv, 0, 1);
 666
 667        return 0;
 668}
 669
 670/**
 671 * Get firmware version of a device.
 672 *
 673 * @param dev
 674 *   Ethernet device port.
 675 * @param fw_ver
 676 *   String output allocated by caller.
 677 * @param fw_size
 678 *   Size of the output string, including terminating null byte.
 679 *
 680 * @return
 681 *   0 on success, or the size of the non truncated string if too big.
 682 */
 683int mlx4_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)
 684{
 685        struct mlx4_priv *priv = dev->data->dev_private;
 686        struct ibv_device_attr *attr = &priv->device_attr;
 687        size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1;
 688
 689        if (fw_size < size)
 690                return size;
 691        if (fw_ver != NULL)
 692                strlcpy(fw_ver, attr->fw_ver, fw_size);
 693        return 0;
 694}
 695
 696/**
 697 * DPDK callback to get device statistics.
 698 *
 699 * @param dev
 700 *   Pointer to Ethernet device structure.
 701 * @param[out] stats
 702 *   Stats structure output buffer.
 703 */
 704int
 705mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 706{
 707        struct rte_eth_stats tmp;
 708        unsigned int i;
 709        unsigned int idx;
 710
 711        memset(&tmp, 0, sizeof(tmp));
 712        /* Add software counters. */
 713        for (i = 0; i != dev->data->nb_rx_queues; ++i) {
 714                struct rxq *rxq = dev->data->rx_queues[i];
 715
 716                if (rxq == NULL)
 717                        continue;
 718                idx = rxq->stats.idx;
 719                if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 720                        tmp.q_ipackets[idx] += rxq->stats.ipackets;
 721                        tmp.q_ibytes[idx] += rxq->stats.ibytes;
 722                        tmp.q_errors[idx] += (rxq->stats.idropped +
 723                                              rxq->stats.rx_nombuf);
 724                }
 725                tmp.ipackets += rxq->stats.ipackets;
 726                tmp.ibytes += rxq->stats.ibytes;
 727                tmp.ierrors += rxq->stats.idropped;
 728                tmp.rx_nombuf += rxq->stats.rx_nombuf;
 729        }
 730        for (i = 0; i != dev->data->nb_tx_queues; ++i) {
 731                struct txq *txq = dev->data->tx_queues[i];
 732
 733                if (txq == NULL)
 734                        continue;
 735                idx = txq->stats.idx;
 736                if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 737                        tmp.q_opackets[idx] += txq->stats.opackets;
 738                        tmp.q_obytes[idx] += txq->stats.obytes;
 739                }
 740                tmp.opackets += txq->stats.opackets;
 741                tmp.obytes += txq->stats.obytes;
 742                tmp.oerrors += txq->stats.odropped;
 743        }
 744        *stats = tmp;
 745        return 0;
 746}
 747
 748/**
 749 * DPDK callback to clear device statistics.
 750 *
 751 * @param dev
 752 *   Pointer to Ethernet device structure.
 753 *
 754 * @return
 755 *   alwasy 0 on success
 756 */
 757int
 758mlx4_stats_reset(struct rte_eth_dev *dev)
 759{
 760        unsigned int i;
 761
 762        for (i = 0; i != dev->data->nb_rx_queues; ++i) {
 763                struct rxq *rxq = dev->data->rx_queues[i];
 764
 765                if (rxq)
 766                        rxq->stats = (struct mlx4_rxq_stats){
 767                                .idx = rxq->stats.idx,
 768                        };
 769        }
 770        for (i = 0; i != dev->data->nb_tx_queues; ++i) {
 771                struct txq *txq = dev->data->tx_queues[i];
 772
 773                if (txq)
 774                        txq->stats = (struct mlx4_txq_stats){
 775                                .idx = txq->stats.idx,
 776                        };
 777        }
 778
 779        return 0;
 780}
 781
 782/**
 783 * DPDK callback to retrieve physical link information.
 784 *
 785 * @param dev
 786 *   Pointer to Ethernet device structure.
 787 * @param wait_to_complete
 788 *   Wait for request completion (ignored).
 789 *
 790 * @return
 791 *   0 on success, negative errno value otherwise and rte_errno is set.
 792 */
 793int
 794mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete)
 795{
 796        const struct mlx4_priv *priv = dev->data->dev_private;
 797        struct ethtool_cmd edata = {
 798                .cmd = ETHTOOL_GSET,
 799        };
 800        struct ifreq ifr;
 801        struct rte_eth_link dev_link;
 802        int link_speed = 0;
 803
 804        if (priv == NULL) {
 805                rte_errno = EINVAL;
 806                return -rte_errno;
 807        }
 808        (void)wait_to_complete;
 809        if (mlx4_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
 810                WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(rte_errno));
 811                return -rte_errno;
 812        }
 813        memset(&dev_link, 0, sizeof(dev_link));
 814        dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
 815                                (ifr.ifr_flags & IFF_RUNNING));
 816        ifr.ifr_data = (void *)&edata;
 817        if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
 818                WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
 819                     strerror(rte_errno));
 820                return -rte_errno;
 821        }
 822        link_speed = ethtool_cmd_speed(&edata);
 823        if (link_speed == -1)
 824                dev_link.link_speed = ETH_SPEED_NUM_NONE;
 825        else
 826                dev_link.link_speed = link_speed;
 827        dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
 828                                ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
 829        dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
 830                                  ETH_LINK_SPEED_FIXED);
 831        dev->data->dev_link = dev_link;
 832        return 0;
 833}
 834
 835/**
 836 * DPDK callback to get flow control status.
 837 *
 838 * @param dev
 839 *   Pointer to Ethernet device structure.
 840 * @param[out] fc_conf
 841 *   Flow control output buffer.
 842 *
 843 * @return
 844 *   0 on success, negative errno value otherwise and rte_errno is set.
 845 */
 846int
 847mlx4_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 848{
 849        struct mlx4_priv *priv = dev->data->dev_private;
 850        struct ifreq ifr;
 851        struct ethtool_pauseparam ethpause = {
 852                .cmd = ETHTOOL_GPAUSEPARAM,
 853        };
 854        int ret;
 855
 856        ifr.ifr_data = (void *)&ethpause;
 857        if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
 858                ret = rte_errno;
 859                WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
 860                     " failed: %s",
 861                     strerror(rte_errno));
 862                goto out;
 863        }
 864        fc_conf->autoneg = ethpause.autoneg;
 865        if (ethpause.rx_pause && ethpause.tx_pause)
 866                fc_conf->mode = RTE_FC_FULL;
 867        else if (ethpause.rx_pause)
 868                fc_conf->mode = RTE_FC_RX_PAUSE;
 869        else if (ethpause.tx_pause)
 870                fc_conf->mode = RTE_FC_TX_PAUSE;
 871        else
 872                fc_conf->mode = RTE_FC_NONE;
 873        ret = 0;
 874out:
 875        MLX4_ASSERT(ret >= 0);
 876        return -ret;
 877}
 878
 879/**
 880 * DPDK callback to modify flow control parameters.
 881 *
 882 * @param dev
 883 *   Pointer to Ethernet device structure.
 884 * @param[in] fc_conf
 885 *   Flow control parameters.
 886 *
 887 * @return
 888 *   0 on success, negative errno value otherwise and rte_errno is set.
 889 */
 890int
 891mlx4_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 892{
 893        struct mlx4_priv *priv = dev->data->dev_private;
 894        struct ifreq ifr;
 895        struct ethtool_pauseparam ethpause = {
 896                .cmd = ETHTOOL_SPAUSEPARAM,
 897        };
 898        int ret;
 899
 900        ifr.ifr_data = (void *)&ethpause;
 901        ethpause.autoneg = fc_conf->autoneg;
 902        if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
 903            (fc_conf->mode & RTE_FC_RX_PAUSE))
 904                ethpause.rx_pause = 1;
 905        else
 906                ethpause.rx_pause = 0;
 907        if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
 908            (fc_conf->mode & RTE_FC_TX_PAUSE))
 909                ethpause.tx_pause = 1;
 910        else
 911                ethpause.tx_pause = 0;
 912        if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
 913                ret = rte_errno;
 914                WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
 915                     " failed: %s",
 916                     strerror(rte_errno));
 917                goto out;
 918        }
 919        ret = 0;
 920out:
 921        MLX4_ASSERT(ret >= 0);
 922        return -ret;
 923}
 924
 925/**
 926 * DPDK callback to retrieve the received packet types that are recognized
 927 * by the device.
 928 *
 929 * @param dev
 930 *   Pointer to Ethernet device structure.
 931 *
 932 * @return
 933 *   Pointer to an array of recognized packet types if in Rx burst mode,
 934 *   NULL otherwise.
 935 */
 936const uint32_t *
 937mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 938{
 939        static const uint32_t ptypes[] = {
 940                /* refers to rxq_cq_to_pkt_type() */
 941                RTE_PTYPE_L2_ETHER,
 942                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
 943                RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
 944                RTE_PTYPE_L4_FRAG,
 945                RTE_PTYPE_L4_TCP,
 946                RTE_PTYPE_L4_UDP,
 947                RTE_PTYPE_UNKNOWN
 948        };
 949        static const uint32_t ptypes_l2tun[] = {
 950                /* refers to rxq_cq_to_pkt_type() */
 951                RTE_PTYPE_L2_ETHER,
 952                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
 953                RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
 954                RTE_PTYPE_L4_FRAG,
 955                RTE_PTYPE_L4_TCP,
 956                RTE_PTYPE_L4_UDP,
 957                RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
 958                RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
 959                RTE_PTYPE_UNKNOWN
 960        };
 961        struct mlx4_priv *priv = dev->data->dev_private;
 962
 963        if (dev->rx_pkt_burst == mlx4_rx_burst) {
 964                if (priv->hw_csum_l2tun)
 965                        return ptypes_l2tun;
 966                else
 967                        return ptypes;
 968        }
 969        return NULL;
 970}
 971
 972/**
 973 * Check if mlx4 device was removed.
 974 *
 975 * @param dev
 976 *   Pointer to Ethernet device structure.
 977 *
 978 * @return
 979 *   1 when device is removed, otherwise 0.
 980 */
 981int
 982mlx4_is_removed(struct rte_eth_dev *dev)
 983{
 984        struct ibv_device_attr device_attr;
 985        struct mlx4_priv *priv = dev->data->dev_private;
 986
 987        if (mlx4_glue->query_device(priv->ctx, &device_attr) == EIO)
 988                return 1;
 989        return 0;
 990}
 991