dpdk/drivers/net/bonding/rte_eth_bond_pmd.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2017 Intel Corporation
   3 */
   4#include <stdlib.h>
   5#include <stdbool.h>
   6#include <netinet/in.h>
   7
   8#include <rte_mbuf.h>
   9#include <rte_malloc.h>
  10#include <ethdev_driver.h>
  11#include <ethdev_vdev.h>
  12#include <rte_tcp.h>
  13#include <rte_udp.h>
  14#include <rte_ip.h>
  15#include <rte_ip_frag.h>
  16#include <rte_devargs.h>
  17#include <rte_kvargs.h>
  18#include <rte_bus_vdev.h>
  19#include <rte_alarm.h>
  20#include <rte_cycles.h>
  21#include <rte_string_fns.h>
  22
  23#include "rte_eth_bond.h"
  24#include "eth_bond_private.h"
  25#include "eth_bond_8023ad_private.h"
  26
  27#define REORDER_PERIOD_MS 10
  28#define DEFAULT_POLLING_INTERVAL_10_MS (10)
  29#define BOND_MAX_MAC_ADDRS 16
  30
  31#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
  32
  33/* Table for statistics in mode 5 TLB */
  34static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
  35
  36static inline size_t
  37get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
  38{
  39        size_t vlan_offset = 0;
  40
  41        if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
  42                rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
  43                struct rte_vlan_hdr *vlan_hdr =
  44                        (struct rte_vlan_hdr *)(eth_hdr + 1);
  45
  46                vlan_offset = sizeof(struct rte_vlan_hdr);
  47                *proto = vlan_hdr->eth_proto;
  48
  49                if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
  50                        vlan_hdr = vlan_hdr + 1;
  51                        *proto = vlan_hdr->eth_proto;
  52                        vlan_offset += sizeof(struct rte_vlan_hdr);
  53                }
  54        }
  55        return vlan_offset;
  56}
  57
  58static uint16_t
  59bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
  60{
  61        struct bond_dev_private *internals;
  62
  63        uint16_t num_rx_total = 0;
  64        uint16_t slave_count;
  65        uint16_t active_slave;
  66        int i;
  67
  68        /* Cast to structure, containing bonded device's port id and queue id */
  69        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
  70        internals = bd_rx_q->dev_private;
  71        slave_count = internals->active_slave_count;
  72        active_slave = bd_rx_q->active_slave;
  73
  74        for (i = 0; i < slave_count && nb_pkts; i++) {
  75                uint16_t num_rx_slave;
  76
  77                /* Offset of pointer to *bufs increases as packets are received
  78                 * from other slaves */
  79                num_rx_slave =
  80                        rte_eth_rx_burst(internals->active_slaves[active_slave],
  81                                         bd_rx_q->queue_id,
  82                                         bufs + num_rx_total, nb_pkts);
  83                num_rx_total += num_rx_slave;
  84                nb_pkts -= num_rx_slave;
  85                if (++active_slave == slave_count)
  86                        active_slave = 0;
  87        }
  88
  89        if (++bd_rx_q->active_slave >= slave_count)
  90                bd_rx_q->active_slave = 0;
  91        return num_rx_total;
  92}
  93
  94static uint16_t
  95bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
  96                uint16_t nb_pkts)
  97{
  98        struct bond_dev_private *internals;
  99
 100        /* Cast to structure, containing bonded device's port id and queue id */
 101        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
 102
 103        internals = bd_rx_q->dev_private;
 104
 105        return rte_eth_rx_burst(internals->current_primary_port,
 106                        bd_rx_q->queue_id, bufs, nb_pkts);
 107}
 108
 109static inline uint8_t
 110is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
 111{
 112        const uint16_t ether_type_slow_be =
 113                rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
 114
 115        return !((mbuf->ol_flags & RTE_MBUF_F_RX_VLAN) ? mbuf->vlan_tci : 0) &&
 116                (ethertype == ether_type_slow_be &&
 117                (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
 118}
 119
 120/*****************************************************************************
 121 * Flow director's setup for mode 4 optimization
 122 */
 123
 124static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
 125        .dst.addr_bytes = { 0 },
 126        .src.addr_bytes = { 0 },
 127        .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
 128};
 129
 130static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
 131        .dst.addr_bytes = { 0 },
 132        .src.addr_bytes = { 0 },
 133        .type = 0xFFFF,
 134};
 135
 136static struct rte_flow_item flow_item_8023ad[] = {
 137        {
 138                .type = RTE_FLOW_ITEM_TYPE_ETH,
 139                .spec = &flow_item_eth_type_8023ad,
 140                .last = NULL,
 141                .mask = &flow_item_eth_mask_type_8023ad,
 142        },
 143        {
 144                .type = RTE_FLOW_ITEM_TYPE_END,
 145                .spec = NULL,
 146                .last = NULL,
 147                .mask = NULL,
 148        }
 149};
 150
 151const struct rte_flow_attr flow_attr_8023ad = {
 152        .group = 0,
 153        .priority = 0,
 154        .ingress = 1,
 155        .egress = 0,
 156        .reserved = 0,
 157};
 158
 159int
 160bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
 161                uint16_t slave_port) {
 162        struct rte_eth_dev_info slave_info;
 163        struct rte_flow_error error;
 164        struct bond_dev_private *internals = bond_dev->data->dev_private;
 165
 166        const struct rte_flow_action_queue lacp_queue_conf = {
 167                .index = 0,
 168        };
 169
 170        const struct rte_flow_action actions[] = {
 171                {
 172                        .type = RTE_FLOW_ACTION_TYPE_QUEUE,
 173                        .conf = &lacp_queue_conf
 174                },
 175                {
 176                        .type = RTE_FLOW_ACTION_TYPE_END,
 177                }
 178        };
 179
 180        int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
 181                        flow_item_8023ad, actions, &error);
 182        if (ret < 0) {
 183                RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
 184                                __func__, error.message, slave_port,
 185                                internals->mode4.dedicated_queues.rx_qid);
 186                return -1;
 187        }
 188
 189        ret = rte_eth_dev_info_get(slave_port, &slave_info);
 190        if (ret != 0) {
 191                RTE_BOND_LOG(ERR,
 192                        "%s: Error during getting device (port %u) info: %s\n",
 193                        __func__, slave_port, strerror(-ret));
 194
 195                return ret;
 196        }
 197
 198        if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
 199                        slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
 200                RTE_BOND_LOG(ERR,
 201                        "%s: Slave %d capabilities doesn't allow to allocate additional queues",
 202                        __func__, slave_port);
 203                return -1;
 204        }
 205
 206        return 0;
 207}
 208
 209int
 210bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
 211        struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
 212        struct bond_dev_private *internals = bond_dev->data->dev_private;
 213        struct rte_eth_dev_info bond_info;
 214        uint16_t idx;
 215        int ret;
 216
 217        /* Verify if all slaves in bonding supports flow director and */
 218        if (internals->slave_count > 0) {
 219                ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
 220                if (ret != 0) {
 221                        RTE_BOND_LOG(ERR,
 222                                "%s: Error during getting device (port %u) info: %s\n",
 223                                __func__, bond_dev->data->port_id,
 224                                strerror(-ret));
 225
 226                        return ret;
 227                }
 228
 229                internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
 230                internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
 231
 232                for (idx = 0; idx < internals->slave_count; idx++) {
 233                        if (bond_ethdev_8023ad_flow_verify(bond_dev,
 234                                        internals->slaves[idx].port_id) != 0)
 235                                return -1;
 236                }
 237        }
 238
 239        return 0;
 240}
 241
 242int
 243bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
 244
 245        struct rte_flow_error error;
 246        struct bond_dev_private *internals = bond_dev->data->dev_private;
 247        struct rte_flow_action_queue lacp_queue_conf = {
 248                .index = internals->mode4.dedicated_queues.rx_qid,
 249        };
 250
 251        const struct rte_flow_action actions[] = {
 252                {
 253                        .type = RTE_FLOW_ACTION_TYPE_QUEUE,
 254                        .conf = &lacp_queue_conf
 255                },
 256                {
 257                        .type = RTE_FLOW_ACTION_TYPE_END,
 258                }
 259        };
 260
 261        internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
 262                        &flow_attr_8023ad, flow_item_8023ad, actions, &error);
 263        if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
 264                RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
 265                                "(slave_port=%d queue_id=%d)",
 266                                error.message, slave_port,
 267                                internals->mode4.dedicated_queues.rx_qid);
 268                return -1;
 269        }
 270
 271        return 0;
 272}
 273
 274static inline uint16_t
 275rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
 276                bool dedicated_rxq)
 277{
 278        /* Cast to structure, containing bonded device's port id and queue id */
 279        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
 280        struct bond_dev_private *internals = bd_rx_q->dev_private;
 281        struct rte_eth_dev *bonded_eth_dev =
 282                                        &rte_eth_devices[internals->port_id];
 283        struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
 284        struct rte_ether_hdr *hdr;
 285
 286        const uint16_t ether_type_slow_be =
 287                rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
 288        uint16_t num_rx_total = 0;      /* Total number of received packets */
 289        uint16_t slaves[RTE_MAX_ETHPORTS];
 290        uint16_t slave_count, idx;
 291
 292        uint8_t collecting;  /* current slave collecting status */
 293        const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
 294        const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
 295        uint8_t subtype;
 296        uint16_t i;
 297        uint16_t j;
 298        uint16_t k;
 299
 300        /* Copy slave list to protect against slave up/down changes during tx
 301         * bursting */
 302        slave_count = internals->active_slave_count;
 303        memcpy(slaves, internals->active_slaves,
 304                        sizeof(internals->active_slaves[0]) * slave_count);
 305
 306        idx = bd_rx_q->active_slave;
 307        if (idx >= slave_count) {
 308                bd_rx_q->active_slave = 0;
 309                idx = 0;
 310        }
 311        for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
 312                j = num_rx_total;
 313                collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
 314                                         COLLECTING);
 315
 316                /* Read packets from this slave */
 317                num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
 318                                &bufs[num_rx_total], nb_pkts - num_rx_total);
 319
 320                for (k = j; k < 2 && k < num_rx_total; k++)
 321                        rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
 322
 323                /* Handle slow protocol packets. */
 324                while (j < num_rx_total) {
 325                        if (j + 3 < num_rx_total)
 326                                rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
 327
 328                        hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
 329                        subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
 330
 331                        /* Remove packet from array if:
 332                         * - it is slow packet but no dedicated rxq is present,
 333                         * - slave is not in collecting state,
 334                         * - bonding interface is not in promiscuous mode:
 335                         *   - packet is unicast and address does not match,
 336                         *   - packet is multicast and bonding interface
 337                         *     is not in allmulti,
 338                         */
 339                        if (unlikely(
 340                                (!dedicated_rxq &&
 341                                 is_lacp_packets(hdr->ether_type, subtype,
 342                                                 bufs[j])) ||
 343                                !collecting ||
 344                                (!promisc &&
 345                                 ((rte_is_unicast_ether_addr(&hdr->dst_addr) &&
 346                                   !rte_is_same_ether_addr(bond_mac,
 347                                                       &hdr->dst_addr)) ||
 348                                  (!allmulti &&
 349                                   rte_is_multicast_ether_addr(&hdr->dst_addr)))))) {
 350
 351                                if (hdr->ether_type == ether_type_slow_be) {
 352                                        bond_mode_8023ad_handle_slow_pkt(
 353                                            internals, slaves[idx], bufs[j]);
 354                                } else
 355                                        rte_pktmbuf_free(bufs[j]);
 356
 357                                /* Packet is managed by mode 4 or dropped, shift the array */
 358                                num_rx_total--;
 359                                if (j < num_rx_total) {
 360                                        memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
 361                                                (num_rx_total - j));
 362                                }
 363                        } else
 364                                j++;
 365                }
 366                if (unlikely(++idx == slave_count))
 367                        idx = 0;
 368        }
 369
 370        if (++bd_rx_q->active_slave >= slave_count)
 371                bd_rx_q->active_slave = 0;
 372
 373        return num_rx_total;
 374}
 375
 376static uint16_t
 377bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 378                uint16_t nb_pkts)
 379{
 380        return rx_burst_8023ad(queue, bufs, nb_pkts, false);
 381}
 382
 383static uint16_t
 384bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 385                uint16_t nb_pkts)
 386{
 387        return rx_burst_8023ad(queue, bufs, nb_pkts, true);
 388}
 389
 390#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
 391uint32_t burstnumberRX;
 392uint32_t burstnumberTX;
 393
 394#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 395
 396static void
 397arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
 398{
 399        switch (arp_op) {
 400        case RTE_ARP_OP_REQUEST:
 401                strlcpy(buf, "ARP Request", buf_len);
 402                return;
 403        case RTE_ARP_OP_REPLY:
 404                strlcpy(buf, "ARP Reply", buf_len);
 405                return;
 406        case RTE_ARP_OP_REVREQUEST:
 407                strlcpy(buf, "Reverse ARP Request", buf_len);
 408                return;
 409        case RTE_ARP_OP_REVREPLY:
 410                strlcpy(buf, "Reverse ARP Reply", buf_len);
 411                return;
 412        case RTE_ARP_OP_INVREQUEST:
 413                strlcpy(buf, "Peer Identify Request", buf_len);
 414                return;
 415        case RTE_ARP_OP_INVREPLY:
 416                strlcpy(buf, "Peer Identify Reply", buf_len);
 417                return;
 418        default:
 419                break;
 420        }
 421        strlcpy(buf, "Unknown", buf_len);
 422        return;
 423}
 424#endif
 425#define MaxIPv4String   16
 426static void
 427ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
 428{
 429        uint32_t ipv4_addr;
 430
 431        ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
 432        snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
 433                (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
 434                ipv4_addr & 0xFF);
 435}
 436
 437#define MAX_CLIENTS_NUMBER      128
 438uint8_t active_clients;
 439struct client_stats_t {
 440        uint16_t port;
 441        uint32_t ipv4_addr;
 442        uint32_t ipv4_rx_packets;
 443        uint32_t ipv4_tx_packets;
 444};
 445struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
 446
 447static void
 448update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
 449{
 450        int i = 0;
 451
 452        for (; i < MAX_CLIENTS_NUMBER; i++)     {
 453                if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
 454                        /* Just update RX packets number for this client */
 455                        if (TXorRXindicator == &burstnumberRX)
 456                                client_stats[i].ipv4_rx_packets++;
 457                        else
 458                                client_stats[i].ipv4_tx_packets++;
 459                        return;
 460                }
 461        }
 462        /* We have a new client. Insert him to the table, and increment stats */
 463        if (TXorRXindicator == &burstnumberRX)
 464                client_stats[active_clients].ipv4_rx_packets++;
 465        else
 466                client_stats[active_clients].ipv4_tx_packets++;
 467        client_stats[active_clients].ipv4_addr = addr;
 468        client_stats[active_clients].port = port;
 469        active_clients++;
 470
 471}
 472
 473#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 474#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
 475        rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
 476                "%s port:%d SrcMAC:" RTE_ETHER_ADDR_PRT_FMT " SrcIP:%s " \
 477                "DstMAC:" RTE_ETHER_ADDR_PRT_FMT " DstIP:%s %s %d\n", \
 478                info,                                                   \
 479                port,                                                   \
 480                RTE_ETHER_ADDR_BYTES(&eth_h->src_addr),                  \
 481                src_ip,                                                 \
 482                RTE_ETHER_ADDR_BYTES(&eth_h->dst_addr),                  \
 483                dst_ip,                                                 \
 484                arp_op, ++burstnumber)
 485#endif
 486
 487static void
 488mode6_debug(const char __rte_unused *info,
 489        struct rte_ether_hdr *eth_h, uint16_t port,
 490        uint32_t __rte_unused *burstnumber)
 491{
 492        struct rte_ipv4_hdr *ipv4_h;
 493#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 494        struct rte_arp_hdr *arp_h;
 495        char dst_ip[16];
 496        char ArpOp[24];
 497        char buf[16];
 498#endif
 499        char src_ip[16];
 500
 501        uint16_t ether_type = eth_h->ether_type;
 502        uint16_t offset = get_vlan_offset(eth_h, &ether_type);
 503
 504#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 505        strlcpy(buf, info, 16);
 506#endif
 507
 508        if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
 509                ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
 510                ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
 511#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 512                ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
 513                MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
 514#endif
 515                update_client_stats(ipv4_h->src_addr, port, burstnumber);
 516        }
 517#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
 518        else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
 519                arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
 520                ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
 521                ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
 522                arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
 523                                ArpOp, sizeof(ArpOp));
 524                MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
 525        }
 526#endif
 527}
 528#endif
 529
 530static uint16_t
 531bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 532{
 533        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
 534        struct bond_dev_private *internals = bd_rx_q->dev_private;
 535        struct rte_ether_hdr *eth_h;
 536        uint16_t ether_type, offset;
 537        uint16_t nb_recv_pkts;
 538        int i;
 539
 540        nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
 541
 542        for (i = 0; i < nb_recv_pkts; i++) {
 543                eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
 544                ether_type = eth_h->ether_type;
 545                offset = get_vlan_offset(eth_h, &ether_type);
 546
 547                if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
 548#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
 549                        mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
 550#endif
 551                        bond_mode_alb_arp_recv(eth_h, offset, internals);
 552                }
 553#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
 554                else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
 555                        mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
 556#endif
 557        }
 558
 559        return nb_recv_pkts;
 560}
 561
 562static uint16_t
 563bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
 564                uint16_t nb_pkts)
 565{
 566        struct bond_dev_private *internals;
 567        struct bond_tx_queue *bd_tx_q;
 568
 569        struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
 570        uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
 571
 572        uint16_t num_of_slaves;
 573        uint16_t slaves[RTE_MAX_ETHPORTS];
 574
 575        uint16_t num_tx_total = 0, num_tx_slave;
 576
 577        static int slave_idx = 0;
 578        int i, cslave_idx = 0, tx_fail_total = 0;
 579
 580        bd_tx_q = (struct bond_tx_queue *)queue;
 581        internals = bd_tx_q->dev_private;
 582
 583        /* Copy slave list to protect against slave up/down changes during tx
 584         * bursting */
 585        num_of_slaves = internals->active_slave_count;
 586        memcpy(slaves, internals->active_slaves,
 587                        sizeof(internals->active_slaves[0]) * num_of_slaves);
 588
 589        if (num_of_slaves < 1)
 590                return num_tx_total;
 591
 592        /* Populate slaves mbuf with which packets are to be sent on it  */
 593        for (i = 0; i < nb_pkts; i++) {
 594                cslave_idx = (slave_idx + i) % num_of_slaves;
 595                slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
 596        }
 597
 598        /* increment current slave index so the next call to tx burst starts on the
 599         * next slave */
 600        slave_idx = ++cslave_idx;
 601
 602        /* Send packet burst on each slave device */
 603        for (i = 0; i < num_of_slaves; i++) {
 604                if (slave_nb_pkts[i] > 0) {
 605                        num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
 606                                        slave_bufs[i], slave_nb_pkts[i]);
 607
 608                        /* if tx burst fails move packets to end of bufs */
 609                        if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
 610                                int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
 611
 612                                tx_fail_total += tx_fail_slave;
 613
 614                                memcpy(&bufs[nb_pkts - tx_fail_total],
 615                                       &slave_bufs[i][num_tx_slave],
 616                                       tx_fail_slave * sizeof(bufs[0]));
 617                        }
 618                        num_tx_total += num_tx_slave;
 619                }
 620        }
 621
 622        return num_tx_total;
 623}
 624
 625static uint16_t
 626bond_ethdev_tx_burst_active_backup(void *queue,
 627                struct rte_mbuf **bufs, uint16_t nb_pkts)
 628{
 629        struct bond_dev_private *internals;
 630        struct bond_tx_queue *bd_tx_q;
 631
 632        bd_tx_q = (struct bond_tx_queue *)queue;
 633        internals = bd_tx_q->dev_private;
 634
 635        if (internals->active_slave_count < 1)
 636                return 0;
 637
 638        return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
 639                        bufs, nb_pkts);
 640}
 641
 642static inline uint16_t
 643ether_hash(struct rte_ether_hdr *eth_hdr)
 644{
 645        unaligned_uint16_t *word_src_addr =
 646                (unaligned_uint16_t *)eth_hdr->src_addr.addr_bytes;
 647        unaligned_uint16_t *word_dst_addr =
 648                (unaligned_uint16_t *)eth_hdr->dst_addr.addr_bytes;
 649
 650        return (word_src_addr[0] ^ word_dst_addr[0]) ^
 651                        (word_src_addr[1] ^ word_dst_addr[1]) ^
 652                        (word_src_addr[2] ^ word_dst_addr[2]);
 653}
 654
 655static inline uint32_t
 656ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
 657{
 658        return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
 659}
 660
 661static inline uint32_t
 662ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
 663{
 664        unaligned_uint32_t *word_src_addr =
 665                (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
 666        unaligned_uint32_t *word_dst_addr =
 667                (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
 668
 669        return (word_src_addr[0] ^ word_dst_addr[0]) ^
 670                        (word_src_addr[1] ^ word_dst_addr[1]) ^
 671                        (word_src_addr[2] ^ word_dst_addr[2]) ^
 672                        (word_src_addr[3] ^ word_dst_addr[3]);
 673}
 674
 675
 676void
 677burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
 678                uint16_t slave_count, uint16_t *slaves)
 679{
 680        struct rte_ether_hdr *eth_hdr;
 681        uint32_t hash;
 682        int i;
 683
 684        for (i = 0; i < nb_pkts; i++) {
 685                eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
 686
 687                hash = ether_hash(eth_hdr);
 688
 689                slaves[i] = (hash ^= hash >> 8) % slave_count;
 690        }
 691}
 692
 693void
 694burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
 695                uint16_t slave_count, uint16_t *slaves)
 696{
 697        uint16_t i;
 698        struct rte_ether_hdr *eth_hdr;
 699        uint16_t proto;
 700        size_t vlan_offset;
 701        uint32_t hash, l3hash;
 702
 703        for (i = 0; i < nb_pkts; i++) {
 704                eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
 705                l3hash = 0;
 706
 707                proto = eth_hdr->ether_type;
 708                hash = ether_hash(eth_hdr);
 709
 710                vlan_offset = get_vlan_offset(eth_hdr, &proto);
 711
 712                if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
 713                        struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
 714                                        ((char *)(eth_hdr + 1) + vlan_offset);
 715                        l3hash = ipv4_hash(ipv4_hdr);
 716
 717                } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
 718                        struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
 719                                        ((char *)(eth_hdr + 1) + vlan_offset);
 720                        l3hash = ipv6_hash(ipv6_hdr);
 721                }
 722
 723                hash = hash ^ l3hash;
 724                hash ^= hash >> 16;
 725                hash ^= hash >> 8;
 726
 727                slaves[i] = hash % slave_count;
 728        }
 729}
 730
 731void
 732burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
 733                uint16_t slave_count, uint16_t *slaves)
 734{
 735        struct rte_ether_hdr *eth_hdr;
 736        uint16_t proto;
 737        size_t vlan_offset;
 738        int i;
 739
 740        struct rte_udp_hdr *udp_hdr;
 741        struct rte_tcp_hdr *tcp_hdr;
 742        uint32_t hash, l3hash, l4hash;
 743
 744        for (i = 0; i < nb_pkts; i++) {
 745                eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
 746                size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
 747                proto = eth_hdr->ether_type;
 748                vlan_offset = get_vlan_offset(eth_hdr, &proto);
 749                l3hash = 0;
 750                l4hash = 0;
 751
 752                if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
 753                        struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
 754                                        ((char *)(eth_hdr + 1) + vlan_offset);
 755                        size_t ip_hdr_offset;
 756
 757                        l3hash = ipv4_hash(ipv4_hdr);
 758
 759                        /* there is no L4 header in fragmented packet */
 760                        if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
 761                                                                == 0)) {
 762                                ip_hdr_offset = (ipv4_hdr->version_ihl
 763                                        & RTE_IPV4_HDR_IHL_MASK) *
 764                                        RTE_IPV4_IHL_MULTIPLIER;
 765
 766                                if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
 767                                        tcp_hdr = (struct rte_tcp_hdr *)
 768                                                ((char *)ipv4_hdr +
 769                                                        ip_hdr_offset);
 770                                        if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
 771                                                        < pkt_end)
 772                                                l4hash = HASH_L4_PORTS(tcp_hdr);
 773                                } else if (ipv4_hdr->next_proto_id ==
 774                                                                IPPROTO_UDP) {
 775                                        udp_hdr = (struct rte_udp_hdr *)
 776                                                ((char *)ipv4_hdr +
 777                                                        ip_hdr_offset);
 778                                        if ((size_t)udp_hdr + sizeof(*udp_hdr)
 779                                                        < pkt_end)
 780                                                l4hash = HASH_L4_PORTS(udp_hdr);
 781                                }
 782                        }
 783                } else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
 784                        struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
 785                                        ((char *)(eth_hdr + 1) + vlan_offset);
 786                        l3hash = ipv6_hash(ipv6_hdr);
 787
 788                        if (ipv6_hdr->proto == IPPROTO_TCP) {
 789                                tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
 790                                l4hash = HASH_L4_PORTS(tcp_hdr);
 791                        } else if (ipv6_hdr->proto == IPPROTO_UDP) {
 792                                udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
 793                                l4hash = HASH_L4_PORTS(udp_hdr);
 794                        }
 795                }
 796
 797                hash = l3hash ^ l4hash;
 798                hash ^= hash >> 16;
 799                hash ^= hash >> 8;
 800
 801                slaves[i] = hash % slave_count;
 802        }
 803}
 804
 805struct bwg_slave {
 806        uint64_t bwg_left_int;
 807        uint64_t bwg_left_remainder;
 808        uint16_t slave;
 809};
 810
 811void
 812bond_tlb_activate_slave(struct bond_dev_private *internals) {
 813        int i;
 814
 815        for (i = 0; i < internals->active_slave_count; i++) {
 816                tlb_last_obytets[internals->active_slaves[i]] = 0;
 817        }
 818}
 819
 820static int
 821bandwidth_cmp(const void *a, const void *b)
 822{
 823        const struct bwg_slave *bwg_a = a;
 824        const struct bwg_slave *bwg_b = b;
 825        int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
 826        int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
 827                        (int64_t)bwg_a->bwg_left_remainder;
 828        if (diff > 0)
 829                return 1;
 830        else if (diff < 0)
 831                return -1;
 832        else if (diff2 > 0)
 833                return 1;
 834        else if (diff2 < 0)
 835                return -1;
 836        else
 837                return 0;
 838}
 839
 840static void
 841bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
 842                struct bwg_slave *bwg_slave)
 843{
 844        struct rte_eth_link link_status;
 845        int ret;
 846
 847        ret = rte_eth_link_get_nowait(port_id, &link_status);
 848        if (ret < 0) {
 849                RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
 850                             port_id, rte_strerror(-ret));
 851                return;
 852        }
 853        uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
 854        if (link_bwg == 0)
 855                return;
 856        link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
 857        bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
 858        bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
 859}
 860
 861static void
 862bond_ethdev_update_tlb_slave_cb(void *arg)
 863{
 864        struct bond_dev_private *internals = arg;
 865        struct rte_eth_stats slave_stats;
 866        struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
 867        uint16_t slave_count;
 868        uint64_t tx_bytes;
 869
 870        uint8_t update_stats = 0;
 871        uint16_t slave_id;
 872        uint16_t i;
 873
 874        internals->slave_update_idx++;
 875
 876
 877        if (internals->slave_update_idx >= REORDER_PERIOD_MS)
 878                update_stats = 1;
 879
 880        for (i = 0; i < internals->active_slave_count; i++) {
 881                slave_id = internals->active_slaves[i];
 882                rte_eth_stats_get(slave_id, &slave_stats);
 883                tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
 884                bandwidth_left(slave_id, tx_bytes,
 885                                internals->slave_update_idx, &bwg_array[i]);
 886                bwg_array[i].slave = slave_id;
 887
 888                if (update_stats) {
 889                        tlb_last_obytets[slave_id] = slave_stats.obytes;
 890                }
 891        }
 892
 893        if (update_stats == 1)
 894                internals->slave_update_idx = 0;
 895
 896        slave_count = i;
 897        qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
 898        for (i = 0; i < slave_count; i++)
 899                internals->tlb_slaves_order[i] = bwg_array[i].slave;
 900
 901        rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
 902                        (struct bond_dev_private *)internals);
 903}
 904
 905static uint16_t
 906bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 907{
 908        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
 909        struct bond_dev_private *internals = bd_tx_q->dev_private;
 910
 911        struct rte_eth_dev *primary_port =
 912                        &rte_eth_devices[internals->primary_port];
 913        uint16_t num_tx_total = 0;
 914        uint16_t i, j;
 915
 916        uint16_t num_of_slaves = internals->active_slave_count;
 917        uint16_t slaves[RTE_MAX_ETHPORTS];
 918
 919        struct rte_ether_hdr *ether_hdr;
 920        struct rte_ether_addr primary_slave_addr;
 921        struct rte_ether_addr active_slave_addr;
 922
 923        if (num_of_slaves < 1)
 924                return num_tx_total;
 925
 926        memcpy(slaves, internals->tlb_slaves_order,
 927                                sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
 928
 929
 930        rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
 931
 932        if (nb_pkts > 3) {
 933                for (i = 0; i < 3; i++)
 934                        rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
 935        }
 936
 937        for (i = 0; i < num_of_slaves; i++) {
 938                rte_eth_macaddr_get(slaves[i], &active_slave_addr);
 939                for (j = num_tx_total; j < nb_pkts; j++) {
 940                        if (j + 3 < nb_pkts)
 941                                rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
 942
 943                        ether_hdr = rte_pktmbuf_mtod(bufs[j],
 944                                                struct rte_ether_hdr *);
 945                        if (rte_is_same_ether_addr(&ether_hdr->src_addr,
 946                                                        &primary_slave_addr))
 947                                rte_ether_addr_copy(&active_slave_addr,
 948                                                &ether_hdr->src_addr);
 949#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
 950                                        mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
 951#endif
 952                }
 953
 954                num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
 955                                bufs + num_tx_total, nb_pkts - num_tx_total);
 956
 957                if (num_tx_total == nb_pkts)
 958                        break;
 959        }
 960
 961        return num_tx_total;
 962}
 963
 964void
 965bond_tlb_disable(struct bond_dev_private *internals)
 966{
 967        rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
 968}
 969
 970void
 971bond_tlb_enable(struct bond_dev_private *internals)
 972{
 973        bond_ethdev_update_tlb_slave_cb(internals);
 974}
 975
 976static uint16_t
 977bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 978{
 979        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
 980        struct bond_dev_private *internals = bd_tx_q->dev_private;
 981
 982        struct rte_ether_hdr *eth_h;
 983        uint16_t ether_type, offset;
 984
 985        struct client_data *client_info;
 986
 987        /*
 988         * We create transmit buffers for every slave and one additional to send
 989         * through tlb. In worst case every packet will be send on one port.
 990         */
 991        struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
 992        uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
 993
 994        /*
 995         * We create separate transmit buffers for update packets as they won't
 996         * be counted in num_tx_total.
 997         */
 998        struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
 999        uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1000
1001        struct rte_mbuf *upd_pkt;
1002        size_t pkt_size;
1003
1004        uint16_t num_send, num_not_send = 0;
1005        uint16_t num_tx_total = 0;
1006        uint16_t slave_idx;
1007
1008        int i, j;
1009
1010        /* Search tx buffer for ARP packets and forward them to alb */
1011        for (i = 0; i < nb_pkts; i++) {
1012                eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1013                ether_type = eth_h->ether_type;
1014                offset = get_vlan_offset(eth_h, &ether_type);
1015
1016                if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1017                        slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1018
1019                        /* Change src mac in eth header */
1020                        rte_eth_macaddr_get(slave_idx, &eth_h->src_addr);
1021
1022                        /* Add packet to slave tx buffer */
1023                        slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1024                        slave_bufs_pkts[slave_idx]++;
1025                } else {
1026                        /* If packet is not ARP, send it with TLB policy */
1027                        slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1028                                        bufs[i];
1029                        slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1030                }
1031        }
1032
1033        /* Update connected client ARP tables */
1034        if (internals->mode6.ntt) {
1035                for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1036                        client_info = &internals->mode6.client_table[i];
1037
1038                        if (client_info->in_use) {
1039                                /* Allocate new packet to send ARP update on current slave */
1040                                upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1041                                if (upd_pkt == NULL) {
1042                                        RTE_BOND_LOG(ERR,
1043                                                     "Failed to allocate ARP packet from pool");
1044                                        continue;
1045                                }
1046                                pkt_size = sizeof(struct rte_ether_hdr) +
1047                                        sizeof(struct rte_arp_hdr) +
1048                                        client_info->vlan_count *
1049                                        sizeof(struct rte_vlan_hdr);
1050                                upd_pkt->data_len = pkt_size;
1051                                upd_pkt->pkt_len = pkt_size;
1052
1053                                slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1054                                                internals);
1055
1056                                /* Add packet to update tx buffer */
1057                                update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1058                                update_bufs_pkts[slave_idx]++;
1059                        }
1060                }
1061                internals->mode6.ntt = 0;
1062        }
1063
1064        /* Send ARP packets on proper slaves */
1065        for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066                if (slave_bufs_pkts[i] > 0) {
1067                        num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1068                                        slave_bufs[i], slave_bufs_pkts[i]);
1069                        for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1070                                bufs[nb_pkts - 1 - num_not_send - j] =
1071                                                slave_bufs[i][nb_pkts - 1 - j];
1072                        }
1073
1074                        num_tx_total += num_send;
1075                        num_not_send += slave_bufs_pkts[i] - num_send;
1076
1077#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1078        /* Print TX stats including update packets */
1079                        for (j = 0; j < slave_bufs_pkts[i]; j++) {
1080                                eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1081                                                        struct rte_ether_hdr *);
1082                                mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1083                        }
1084#endif
1085                }
1086        }
1087
1088        /* Send update packets on proper slaves */
1089        for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1090                if (update_bufs_pkts[i] > 0) {
1091                        num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1092                                        update_bufs_pkts[i]);
1093                        for (j = num_send; j < update_bufs_pkts[i]; j++) {
1094                                rte_pktmbuf_free(update_bufs[i][j]);
1095                        }
1096#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1097                        for (j = 0; j < update_bufs_pkts[i]; j++) {
1098                                eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1099                                                        struct rte_ether_hdr *);
1100                                mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1101                        }
1102#endif
1103                }
1104        }
1105
1106        /* Send non-ARP packets using tlb policy */
1107        if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1108                num_send = bond_ethdev_tx_burst_tlb(queue,
1109                                slave_bufs[RTE_MAX_ETHPORTS],
1110                                slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1111
1112                for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1113                        bufs[nb_pkts - 1 - num_not_send - j] =
1114                                        slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1115                }
1116
1117                num_tx_total += num_send;
1118        }
1119
1120        return num_tx_total;
1121}
1122
1123static inline uint16_t
1124tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1125                 uint16_t *slave_port_ids, uint16_t slave_count)
1126{
1127        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1128        struct bond_dev_private *internals = bd_tx_q->dev_private;
1129
1130        /* Array to sort mbufs for transmission on each slave into */
1131        struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1132        /* Number of mbufs for transmission on each slave */
1133        uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1134        /* Mapping array generated by hash function to map mbufs to slaves */
1135        uint16_t bufs_slave_port_idxs[nb_bufs];
1136
1137        uint16_t slave_tx_count;
1138        uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1139
1140        uint16_t i;
1141
1142        /*
1143         * Populate slaves mbuf with the packets which are to be sent on it
1144         * selecting output slave using hash based on xmit policy
1145         */
1146        internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1147                        bufs_slave_port_idxs);
1148
1149        for (i = 0; i < nb_bufs; i++) {
1150                /* Populate slave mbuf arrays with mbufs for that slave. */
1151                uint16_t slave_idx = bufs_slave_port_idxs[i];
1152
1153                slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1154        }
1155
1156        /* Send packet burst on each slave device */
1157        for (i = 0; i < slave_count; i++) {
1158                if (slave_nb_bufs[i] == 0)
1159                        continue;
1160
1161                slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1162                                bd_tx_q->queue_id, slave_bufs[i],
1163                                slave_nb_bufs[i]);
1164
1165                total_tx_count += slave_tx_count;
1166
1167                /* If tx burst fails move packets to end of bufs */
1168                if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1169                        int slave_tx_fail_count = slave_nb_bufs[i] -
1170                                        slave_tx_count;
1171                        total_tx_fail_count += slave_tx_fail_count;
1172                        memcpy(&bufs[nb_bufs - total_tx_fail_count],
1173                               &slave_bufs[i][slave_tx_count],
1174                               slave_tx_fail_count * sizeof(bufs[0]));
1175                }
1176        }
1177
1178        return total_tx_count;
1179}
1180
1181static uint16_t
1182bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1183                uint16_t nb_bufs)
1184{
1185        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1186        struct bond_dev_private *internals = bd_tx_q->dev_private;
1187
1188        uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1189        uint16_t slave_count;
1190
1191        if (unlikely(nb_bufs == 0))
1192                return 0;
1193
1194        /* Copy slave list to protect against slave up/down changes during tx
1195         * bursting
1196         */
1197        slave_count = internals->active_slave_count;
1198        if (unlikely(slave_count < 1))
1199                return 0;
1200
1201        memcpy(slave_port_ids, internals->active_slaves,
1202                        sizeof(slave_port_ids[0]) * slave_count);
1203        return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1204                                slave_count);
1205}
1206
1207static inline uint16_t
1208tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1209                bool dedicated_txq)
1210{
1211        struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1212        struct bond_dev_private *internals = bd_tx_q->dev_private;
1213
1214        uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1215        uint16_t slave_count;
1216
1217        uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1218        uint16_t dist_slave_count;
1219
1220        uint16_t slave_tx_count;
1221
1222        uint16_t i;
1223
1224        /* Copy slave list to protect against slave up/down changes during tx
1225         * bursting */
1226        slave_count = internals->active_slave_count;
1227        if (unlikely(slave_count < 1))
1228                return 0;
1229
1230        memcpy(slave_port_ids, internals->active_slaves,
1231                        sizeof(slave_port_ids[0]) * slave_count);
1232
1233        if (dedicated_txq)
1234                goto skip_tx_ring;
1235
1236        /* Check for LACP control packets and send if available */
1237        for (i = 0; i < slave_count; i++) {
1238                struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1239                struct rte_mbuf *ctrl_pkt = NULL;
1240
1241                if (likely(rte_ring_empty(port->tx_ring)))
1242                        continue;
1243
1244                if (rte_ring_dequeue(port->tx_ring,
1245                                     (void **)&ctrl_pkt) != -ENOENT) {
1246                        slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1247                                        bd_tx_q->queue_id, &ctrl_pkt, 1);
1248                        /*
1249                         * re-enqueue LAG control plane packets to buffering
1250                         * ring if transmission fails so the packet isn't lost.
1251                         */
1252                        if (slave_tx_count != 1)
1253                                rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1254                }
1255        }
1256
1257skip_tx_ring:
1258        if (unlikely(nb_bufs == 0))
1259                return 0;
1260
1261        dist_slave_count = 0;
1262        for (i = 0; i < slave_count; i++) {
1263                struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1264
1265                if (ACTOR_STATE(port, DISTRIBUTING))
1266                        dist_slave_port_ids[dist_slave_count++] =
1267                                        slave_port_ids[i];
1268        }
1269
1270        if (unlikely(dist_slave_count < 1))
1271                return 0;
1272
1273        return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1274                                dist_slave_count);
1275}
1276
1277static uint16_t
1278bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1279                uint16_t nb_bufs)
1280{
1281        return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1282}
1283
1284static uint16_t
1285bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1286                uint16_t nb_bufs)
1287{
1288        return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1289}
1290
1291static uint16_t
1292bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1293                uint16_t nb_pkts)
1294{
1295        struct bond_dev_private *internals;
1296        struct bond_tx_queue *bd_tx_q;
1297
1298        uint16_t slaves[RTE_MAX_ETHPORTS];
1299        uint8_t tx_failed_flag = 0;
1300        uint16_t num_of_slaves;
1301
1302        uint16_t max_nb_of_tx_pkts = 0;
1303
1304        int slave_tx_total[RTE_MAX_ETHPORTS];
1305        int i, most_successful_tx_slave = -1;
1306
1307        bd_tx_q = (struct bond_tx_queue *)queue;
1308        internals = bd_tx_q->dev_private;
1309
1310        /* Copy slave list to protect against slave up/down changes during tx
1311         * bursting */
1312        num_of_slaves = internals->active_slave_count;
1313        memcpy(slaves, internals->active_slaves,
1314                        sizeof(internals->active_slaves[0]) * num_of_slaves);
1315
1316        if (num_of_slaves < 1)
1317                return 0;
1318
1319        /* Increment reference count on mbufs */
1320        for (i = 0; i < nb_pkts; i++)
1321                rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1322
1323        /* Transmit burst on each active slave */
1324        for (i = 0; i < num_of_slaves; i++) {
1325                slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1326                                        bufs, nb_pkts);
1327
1328                if (unlikely(slave_tx_total[i] < nb_pkts))
1329                        tx_failed_flag = 1;
1330
1331                /* record the value and slave index for the slave which transmits the
1332                 * maximum number of packets */
1333                if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1334                        max_nb_of_tx_pkts = slave_tx_total[i];
1335                        most_successful_tx_slave = i;
1336                }
1337        }
1338
1339        /* if slaves fail to transmit packets from burst, the calling application
1340         * is not expected to know about multiple references to packets so we must
1341         * handle failures of all packets except those of the most successful slave
1342         */
1343        if (unlikely(tx_failed_flag))
1344                for (i = 0; i < num_of_slaves; i++)
1345                        if (i != most_successful_tx_slave)
1346                                while (slave_tx_total[i] < nb_pkts)
1347                                        rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1348
1349        return max_nb_of_tx_pkts;
1350}
1351
1352static void
1353link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1354{
1355        struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1356
1357        if (bond_ctx->mode == BONDING_MODE_8023AD) {
1358                /**
1359                 * If in mode 4 then save the link properties of the first
1360                 * slave, all subsequent slaves must match these properties
1361                 */
1362                struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1363
1364                bond_link->link_autoneg = slave_link->link_autoneg;
1365                bond_link->link_duplex = slave_link->link_duplex;
1366                bond_link->link_speed = slave_link->link_speed;
1367        } else {
1368                /**
1369                 * In any other mode the link properties are set to default
1370                 * values of AUTONEG/DUPLEX
1371                 */
1372                ethdev->data->dev_link.link_autoneg = RTE_ETH_LINK_AUTONEG;
1373                ethdev->data->dev_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
1374        }
1375}
1376
1377static int
1378link_properties_valid(struct rte_eth_dev *ethdev,
1379                struct rte_eth_link *slave_link)
1380{
1381        struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1382
1383        if (bond_ctx->mode == BONDING_MODE_8023AD) {
1384                struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1385
1386                if (bond_link->link_duplex != slave_link->link_duplex ||
1387                        bond_link->link_autoneg != slave_link->link_autoneg ||
1388                        bond_link->link_speed != slave_link->link_speed)
1389                        return -1;
1390        }
1391
1392        return 0;
1393}
1394
1395int
1396mac_address_get(struct rte_eth_dev *eth_dev,
1397                struct rte_ether_addr *dst_mac_addr)
1398{
1399        struct rte_ether_addr *mac_addr;
1400
1401        if (eth_dev == NULL) {
1402                RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1403                return -1;
1404        }
1405
1406        if (dst_mac_addr == NULL) {
1407                RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1408                return -1;
1409        }
1410
1411        mac_addr = eth_dev->data->mac_addrs;
1412
1413        rte_ether_addr_copy(mac_addr, dst_mac_addr);
1414        return 0;
1415}
1416
1417int
1418mac_address_set(struct rte_eth_dev *eth_dev,
1419                struct rte_ether_addr *new_mac_addr)
1420{
1421        struct rte_ether_addr *mac_addr;
1422
1423        if (eth_dev == NULL) {
1424                RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1425                return -1;
1426        }
1427
1428        if (new_mac_addr == NULL) {
1429                RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1430                return -1;
1431        }
1432
1433        mac_addr = eth_dev->data->mac_addrs;
1434
1435        /* If new MAC is different to current MAC then update */
1436        if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1437                memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1438
1439        return 0;
1440}
1441
1442static const struct rte_ether_addr null_mac_addr;
1443
1444/*
1445 * Add additional MAC addresses to the slave
1446 */
1447int
1448slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1449                uint16_t slave_port_id)
1450{
1451        int i, ret;
1452        struct rte_ether_addr *mac_addr;
1453
1454        for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1455                mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1456                if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1457                        break;
1458
1459                ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1460                if (ret < 0) {
1461                        /* rollback */
1462                        for (i--; i > 0; i--)
1463                                rte_eth_dev_mac_addr_remove(slave_port_id,
1464                                        &bonded_eth_dev->data->mac_addrs[i]);
1465                        return ret;
1466                }
1467        }
1468
1469        return 0;
1470}
1471
1472/*
1473 * Remove additional MAC addresses from the slave
1474 */
1475int
1476slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1477                uint16_t slave_port_id)
1478{
1479        int i, rc, ret;
1480        struct rte_ether_addr *mac_addr;
1481
1482        rc = 0;
1483        for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1484                mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1485                if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1486                        break;
1487
1488                ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1489                /* save only the first error */
1490                if (ret < 0 && rc == 0)
1491                        rc = ret;
1492        }
1493
1494        return rc;
1495}
1496
1497int
1498mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1499{
1500        struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1501        bool set;
1502        int i;
1503
1504        /* Update slave devices MAC addresses */
1505        if (internals->slave_count < 1)
1506                return -1;
1507
1508        switch (internals->mode) {
1509        case BONDING_MODE_ROUND_ROBIN:
1510        case BONDING_MODE_BALANCE:
1511        case BONDING_MODE_BROADCAST:
1512                for (i = 0; i < internals->slave_count; i++) {
1513                        if (rte_eth_dev_default_mac_addr_set(
1514                                        internals->slaves[i].port_id,
1515                                        bonded_eth_dev->data->mac_addrs)) {
1516                                RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1517                                                internals->slaves[i].port_id);
1518                                return -1;
1519                        }
1520                }
1521                break;
1522        case BONDING_MODE_8023AD:
1523                bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1524                break;
1525        case BONDING_MODE_ACTIVE_BACKUP:
1526        case BONDING_MODE_TLB:
1527        case BONDING_MODE_ALB:
1528        default:
1529                set = true;
1530                for (i = 0; i < internals->slave_count; i++) {
1531                        if (internals->slaves[i].port_id ==
1532                                        internals->current_primary_port) {
1533                                if (rte_eth_dev_default_mac_addr_set(
1534                                                internals->current_primary_port,
1535                                                bonded_eth_dev->data->mac_addrs)) {
1536                                        RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1537                                                        internals->current_primary_port);
1538                                        set = false;
1539                                }
1540                        } else {
1541                                if (rte_eth_dev_default_mac_addr_set(
1542                                                internals->slaves[i].port_id,
1543                                                &internals->slaves[i].persisted_mac_addr)) {
1544                                        RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1545                                                        internals->slaves[i].port_id);
1546                                }
1547                        }
1548                }
1549                if (!set)
1550                        return -1;
1551        }
1552
1553        return 0;
1554}
1555
1556int
1557bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1558{
1559        struct bond_dev_private *internals;
1560
1561        internals = eth_dev->data->dev_private;
1562
1563        switch (mode) {
1564        case BONDING_MODE_ROUND_ROBIN:
1565                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1566                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1567                break;
1568        case BONDING_MODE_ACTIVE_BACKUP:
1569                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1570                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1571                break;
1572        case BONDING_MODE_BALANCE:
1573                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1574                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1575                break;
1576        case BONDING_MODE_BROADCAST:
1577                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1578                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579                break;
1580        case BONDING_MODE_8023AD:
1581                if (bond_mode_8023ad_enable(eth_dev) != 0)
1582                        return -1;
1583
1584                if (internals->mode4.dedicated_queues.enabled == 0) {
1585                        eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1586                        eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1587                        RTE_BOND_LOG(WARNING,
1588                                "Using mode 4, it is necessary to do TX burst "
1589                                "and RX burst at least every 100ms.");
1590                } else {
1591                        /* Use flow director's optimization */
1592                        eth_dev->rx_pkt_burst =
1593                                        bond_ethdev_rx_burst_8023ad_fast_queue;
1594                        eth_dev->tx_pkt_burst =
1595                                        bond_ethdev_tx_burst_8023ad_fast_queue;
1596                }
1597                break;
1598        case BONDING_MODE_TLB:
1599                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1600                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1601                break;
1602        case BONDING_MODE_ALB:
1603                if (bond_mode_alb_enable(eth_dev) != 0)
1604                        return -1;
1605
1606                eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1607                eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1608                break;
1609        default:
1610                return -1;
1611        }
1612
1613        internals->mode = mode;
1614
1615        return 0;
1616}
1617
1618
1619static int
1620slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1621                struct rte_eth_dev *slave_eth_dev)
1622{
1623        int errval = 0;
1624        struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1625        struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1626
1627        if (port->slow_pool == NULL) {
1628                char mem_name[256];
1629                int slave_id = slave_eth_dev->data->port_id;
1630
1631                snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1632                                slave_id);
1633                port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1634                        250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1635                        slave_eth_dev->data->numa_node);
1636
1637                /* Any memory allocation failure in initialization is critical because
1638                 * resources can't be free, so reinitialization is impossible. */
1639                if (port->slow_pool == NULL) {
1640                        rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1641                                slave_id, mem_name, rte_strerror(rte_errno));
1642                }
1643        }
1644
1645        if (internals->mode4.dedicated_queues.enabled == 1) {
1646                /* Configure slow Rx queue */
1647
1648                errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1649                                internals->mode4.dedicated_queues.rx_qid, 128,
1650                                rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1651                                NULL, port->slow_pool);
1652                if (errval != 0) {
1653                        RTE_BOND_LOG(ERR,
1654                                        "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1655                                        slave_eth_dev->data->port_id,
1656                                        internals->mode4.dedicated_queues.rx_qid,
1657                                        errval);
1658                        return errval;
1659                }
1660
1661                errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1662                                internals->mode4.dedicated_queues.tx_qid, 512,
1663                                rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1664                                NULL);
1665                if (errval != 0) {
1666                        RTE_BOND_LOG(ERR,
1667                                "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1668                                slave_eth_dev->data->port_id,
1669                                internals->mode4.dedicated_queues.tx_qid,
1670                                errval);
1671                        return errval;
1672                }
1673        }
1674        return 0;
1675}
1676
1677int
1678slave_configure(struct rte_eth_dev *bonded_eth_dev,
1679                struct rte_eth_dev *slave_eth_dev)
1680{
1681        struct bond_rx_queue *bd_rx_q;
1682        struct bond_tx_queue *bd_tx_q;
1683        uint16_t nb_rx_queues;
1684        uint16_t nb_tx_queues;
1685
1686        int errval;
1687        uint16_t q_id;
1688        struct rte_flow_error flow_error;
1689
1690        struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1691
1692        /* Stop slave */
1693        errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1694        if (errval != 0)
1695                RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1696                             slave_eth_dev->data->port_id, errval);
1697
1698        /* Enable interrupts on slave device if supported */
1699        if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1700                slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1701
1702        /* If RSS is enabled for bonding, try to enable it for slaves  */
1703        if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
1704                /* rss_key won't be empty if RSS is configured in bonded dev */
1705                slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1706                                        internals->rss_key_len;
1707                slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1708                                        internals->rss_key;
1709
1710                slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1711                                bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1712                slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1713                                bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1714        }
1715
1716        if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1717                        RTE_ETH_RX_OFFLOAD_VLAN_FILTER)
1718                slave_eth_dev->data->dev_conf.rxmode.offloads |=
1719                                RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
1720        else
1721                slave_eth_dev->data->dev_conf.rxmode.offloads &=
1722                                ~RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
1723
1724        slave_eth_dev->data->dev_conf.rxmode.mtu =
1725                        bonded_eth_dev->data->dev_conf.rxmode.mtu;
1726
1727        nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1728        nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1729
1730        if (internals->mode == BONDING_MODE_8023AD) {
1731                if (internals->mode4.dedicated_queues.enabled == 1) {
1732                        nb_rx_queues++;
1733                        nb_tx_queues++;
1734                }
1735        }
1736
1737        errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1738                                     bonded_eth_dev->data->mtu);
1739        if (errval != 0 && errval != -ENOTSUP) {
1740                RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1741                                slave_eth_dev->data->port_id, errval);
1742                return errval;
1743        }
1744
1745        /* Configure device */
1746        errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1747                        nb_rx_queues, nb_tx_queues,
1748                        &(slave_eth_dev->data->dev_conf));
1749        if (errval != 0) {
1750                RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1751                                slave_eth_dev->data->port_id, errval);
1752                return errval;
1753        }
1754
1755        /* Setup Rx Queues */
1756        for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1757                bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1758
1759                errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1760                                bd_rx_q->nb_rx_desc,
1761                                rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1762                                &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1763                if (errval != 0) {
1764                        RTE_BOND_LOG(ERR,
1765                                        "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1766                                        slave_eth_dev->data->port_id, q_id, errval);
1767                        return errval;
1768                }
1769        }
1770
1771        /* Setup Tx Queues */
1772        for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1773                bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1774
1775                errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1776                                bd_tx_q->nb_tx_desc,
1777                                rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1778                                &bd_tx_q->tx_conf);
1779                if (errval != 0) {
1780                        RTE_BOND_LOG(ERR,
1781                                "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1782                                slave_eth_dev->data->port_id, q_id, errval);
1783                        return errval;
1784                }
1785        }
1786
1787        if (internals->mode == BONDING_MODE_8023AD &&
1788                        internals->mode4.dedicated_queues.enabled == 1) {
1789                if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1790                                != 0)
1791                        return errval;
1792
1793                errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1794                                slave_eth_dev->data->port_id);
1795                if (errval != 0) {
1796                        RTE_BOND_LOG(ERR,
1797                                "bond_ethdev_8023ad_flow_verify: port=%d, err (%d)",
1798                                slave_eth_dev->data->port_id, errval);
1799                        return errval;
1800                }
1801
1802                if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1803                        rte_flow_destroy(slave_eth_dev->data->port_id,
1804                                        internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1805                                        &flow_error);
1806
1807                errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1808                                slave_eth_dev->data->port_id);
1809                if (errval != 0) {
1810                        RTE_BOND_LOG(ERR,
1811                                "bond_ethdev_8023ad_flow_set: port=%d, err (%d)",
1812                                slave_eth_dev->data->port_id, errval);
1813                        return errval;
1814                }
1815        }
1816
1817        /* Start device */
1818        errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1819        if (errval != 0) {
1820                RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1821                                slave_eth_dev->data->port_id, errval);
1822                return -1;
1823        }
1824
1825        /* If RSS is enabled for bonding, synchronize RETA */
1826        if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
1827                int i;
1828                struct bond_dev_private *internals;
1829
1830                internals = bonded_eth_dev->data->dev_private;
1831
1832                for (i = 0; i < internals->slave_count; i++) {
1833                        if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1834                                errval = rte_eth_dev_rss_reta_update(
1835                                                slave_eth_dev->data->port_id,
1836                                                &internals->reta_conf[0],
1837                                                internals->slaves[i].reta_size);
1838                                if (errval != 0) {
1839                                        RTE_BOND_LOG(WARNING,
1840                                                     "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1841                                                     " RSS Configuration for bonding may be inconsistent.",
1842                                                     slave_eth_dev->data->port_id, errval);
1843                                }
1844                                break;
1845                        }
1846                }
1847        }
1848
1849        /* If lsc interrupt is set, check initial slave's link status */
1850        if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1851                slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1852                bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1853                        RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1854                        NULL);
1855        }
1856
1857        return 0;
1858}
1859
1860void
1861slave_remove(struct bond_dev_private *internals,
1862                struct rte_eth_dev *slave_eth_dev)
1863{
1864        uint16_t i;
1865
1866        for (i = 0; i < internals->slave_count; i++)
1867                if (internals->slaves[i].port_id ==
1868                                slave_eth_dev->data->port_id)
1869                        break;
1870
1871        if (i < (internals->slave_count - 1)) {
1872                struct rte_flow *flow;
1873
1874                memmove(&internals->slaves[i], &internals->slaves[i + 1],
1875                                sizeof(internals->slaves[0]) *
1876                                (internals->slave_count - i - 1));
1877                TAILQ_FOREACH(flow, &internals->flow_list, next) {
1878                        memmove(&flow->flows[i], &flow->flows[i + 1],
1879                                sizeof(flow->flows[0]) *
1880                                (internals->slave_count - i - 1));
1881                        flow->flows[internals->slave_count - 1] = NULL;
1882                }
1883        }
1884
1885        internals->slave_count--;
1886
1887        /* force reconfiguration of slave interfaces */
1888        rte_eth_dev_internal_reset(slave_eth_dev);
1889}
1890
1891static void
1892bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1893
1894void
1895slave_add(struct bond_dev_private *internals,
1896                struct rte_eth_dev *slave_eth_dev)
1897{
1898        struct bond_slave_details *slave_details =
1899                        &internals->slaves[internals->slave_count];
1900
1901        slave_details->port_id = slave_eth_dev->data->port_id;
1902        slave_details->last_link_status = 0;
1903
1904        /* Mark slave devices that don't support interrupts so we can
1905         * compensate when we start the bond
1906         */
1907        if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1908                slave_details->link_status_poll_enabled = 1;
1909        }
1910
1911        slave_details->link_status_wait_to_complete = 0;
1912        /* clean tlb_last_obytes when adding port for bonding device */
1913        memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1914                        sizeof(struct rte_ether_addr));
1915}
1916
1917void
1918bond_ethdev_primary_set(struct bond_dev_private *internals,
1919                uint16_t slave_port_id)
1920{
1921        int i;
1922
1923        if (internals->active_slave_count < 1)
1924                internals->current_primary_port = slave_port_id;
1925        else
1926                /* Search bonded device slave ports for new proposed primary port */
1927                for (i = 0; i < internals->active_slave_count; i++) {
1928                        if (internals->active_slaves[i] == slave_port_id)
1929                                internals->current_primary_port = slave_port_id;
1930                }
1931}
1932
1933static int
1934bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1935
1936static int
1937bond_ethdev_start(struct rte_eth_dev *eth_dev)
1938{
1939        struct bond_dev_private *internals;
1940        int i;
1941
1942        /* slave eth dev will be started by bonded device */
1943        if (check_for_bonded_ethdev(eth_dev)) {
1944                RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1945                                eth_dev->data->port_id);
1946                return -1;
1947        }
1948
1949        eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
1950        eth_dev->data->dev_started = 1;
1951
1952        internals = eth_dev->data->dev_private;
1953
1954        if (internals->slave_count == 0) {
1955                RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1956                goto out_err;
1957        }
1958
1959        if (internals->user_defined_mac == 0) {
1960                struct rte_ether_addr *new_mac_addr = NULL;
1961
1962                for (i = 0; i < internals->slave_count; i++)
1963                        if (internals->slaves[i].port_id == internals->primary_port)
1964                                new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1965
1966                if (new_mac_addr == NULL)
1967                        goto out_err;
1968
1969                if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1970                        RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1971                                        eth_dev->data->port_id);
1972                        goto out_err;
1973                }
1974        }
1975
1976        if (internals->mode == BONDING_MODE_8023AD) {
1977                if (internals->mode4.dedicated_queues.enabled == 1) {
1978                        internals->mode4.dedicated_queues.rx_qid =
1979                                        eth_dev->data->nb_rx_queues;
1980                        internals->mode4.dedicated_queues.tx_qid =
1981                                        eth_dev->data->nb_tx_queues;
1982                }
1983        }
1984
1985
1986        /* Reconfigure each slave device if starting bonded device */
1987        for (i = 0; i < internals->slave_count; i++) {
1988                struct rte_eth_dev *slave_ethdev =
1989                                &(rte_eth_devices[internals->slaves[i].port_id]);
1990                if (slave_configure(eth_dev, slave_ethdev) != 0) {
1991                        RTE_BOND_LOG(ERR,
1992                                "bonded port (%d) failed to reconfigure slave device (%d)",
1993                                eth_dev->data->port_id,
1994                                internals->slaves[i].port_id);
1995                        goto out_err;
1996                }
1997                /* We will need to poll for link status if any slave doesn't
1998                 * support interrupts
1999                 */
2000                if (internals->slaves[i].link_status_poll_enabled)
2001                        internals->link_status_polling_enabled = 1;
2002        }
2003
2004        /* start polling if needed */
2005        if (internals->link_status_polling_enabled) {
2006                rte_eal_alarm_set(
2007                        internals->link_status_polling_interval_ms * 1000,
2008                        bond_ethdev_slave_link_status_change_monitor,
2009                        (void *)&rte_eth_devices[internals->port_id]);
2010        }
2011
2012        /* Update all slave devices MACs*/
2013        if (mac_address_slaves_update(eth_dev) != 0)
2014                goto out_err;
2015
2016        if (internals->user_defined_primary_port)
2017                bond_ethdev_primary_set(internals, internals->primary_port);
2018
2019        if (internals->mode == BONDING_MODE_8023AD)
2020                bond_mode_8023ad_start(eth_dev);
2021
2022        if (internals->mode == BONDING_MODE_TLB ||
2023                        internals->mode == BONDING_MODE_ALB)
2024                bond_tlb_enable(internals);
2025
2026        return 0;
2027
2028out_err:
2029        eth_dev->data->dev_started = 0;
2030        return -1;
2031}
2032
2033static void
2034bond_ethdev_free_queues(struct rte_eth_dev *dev)
2035{
2036        uint16_t i;
2037
2038        if (dev->data->rx_queues != NULL) {
2039                for (i = 0; i < dev->data->nb_rx_queues; i++) {
2040                        rte_free(dev->data->rx_queues[i]);
2041                        dev->data->rx_queues[i] = NULL;
2042                }
2043                dev->data->nb_rx_queues = 0;
2044        }
2045
2046        if (dev->data->tx_queues != NULL) {
2047                for (i = 0; i < dev->data->nb_tx_queues; i++) {
2048                        rte_free(dev->data->tx_queues[i]);
2049                        dev->data->tx_queues[i] = NULL;
2050                }
2051                dev->data->nb_tx_queues = 0;
2052        }
2053}
2054
2055int
2056bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2057{
2058        struct bond_dev_private *internals = eth_dev->data->dev_private;
2059        uint16_t i;
2060        int ret;
2061
2062        if (internals->mode == BONDING_MODE_8023AD) {
2063                struct port *port;
2064                void *pkt = NULL;
2065
2066                bond_mode_8023ad_stop(eth_dev);
2067
2068                /* Discard all messages to/from mode 4 state machines */
2069                for (i = 0; i < internals->active_slave_count; i++) {
2070                        port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2071
2072                        RTE_ASSERT(port->rx_ring != NULL);
2073                        while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2074                                rte_pktmbuf_free(pkt);
2075
2076                        RTE_ASSERT(port->tx_ring != NULL);
2077                        while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2078                                rte_pktmbuf_free(pkt);
2079                }
2080        }
2081
2082        if (internals->mode == BONDING_MODE_TLB ||
2083                        internals->mode == BONDING_MODE_ALB) {
2084                bond_tlb_disable(internals);
2085                for (i = 0; i < internals->active_slave_count; i++)
2086                        tlb_last_obytets[internals->active_slaves[i]] = 0;
2087        }
2088
2089        eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2090        eth_dev->data->dev_started = 0;
2091
2092        internals->link_status_polling_enabled = 0;
2093        for (i = 0; i < internals->slave_count; i++) {
2094                uint16_t slave_id = internals->slaves[i].port_id;
2095                if (find_slave_by_id(internals->active_slaves,
2096                                internals->active_slave_count, slave_id) !=
2097                                                internals->active_slave_count) {
2098                        internals->slaves[i].last_link_status = 0;
2099                        ret = rte_eth_dev_stop(slave_id);
2100                        if (ret != 0) {
2101                                RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2102                                             slave_id);
2103                                return ret;
2104                        }
2105                        deactivate_slave(eth_dev, slave_id);
2106                }
2107        }
2108
2109        return 0;
2110}
2111
2112int
2113bond_ethdev_close(struct rte_eth_dev *dev)
2114{
2115        struct bond_dev_private *internals = dev->data->dev_private;
2116        uint16_t bond_port_id = internals->port_id;
2117        int skipped = 0;
2118        struct rte_flow_error ferror;
2119
2120        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2121                return 0;
2122
2123        RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2124        while (internals->slave_count != skipped) {
2125                uint16_t port_id = internals->slaves[skipped].port_id;
2126
2127                if (rte_eth_dev_stop(port_id) != 0) {
2128                        RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2129                                     port_id);
2130                        skipped++;
2131                }
2132
2133                if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2134                        RTE_BOND_LOG(ERR,
2135                                     "Failed to remove port %d from bonded device %s",
2136                                     port_id, dev->device->name);
2137                        skipped++;
2138                }
2139        }
2140        bond_flow_ops.flush(dev, &ferror);
2141        bond_ethdev_free_queues(dev);
2142        rte_bitmap_reset(internals->vlan_filter_bmp);
2143        rte_bitmap_free(internals->vlan_filter_bmp);
2144        rte_free(internals->vlan_filter_bmpmem);
2145
2146        /* Try to release mempool used in mode6. If the bond
2147         * device is not mode6, free the NULL is not problem.
2148         */
2149        rte_mempool_free(internals->mode6.mempool);
2150
2151        if (internals->kvlist != NULL)
2152                rte_kvargs_free(internals->kvlist);
2153
2154        return 0;
2155}
2156
2157/* forward declaration */
2158static int bond_ethdev_configure(struct rte_eth_dev *dev);
2159
2160static int
2161bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2162{
2163        struct bond_dev_private *internals = dev->data->dev_private;
2164        struct bond_slave_details slave;
2165        int ret;
2166
2167        uint16_t max_nb_rx_queues = UINT16_MAX;
2168        uint16_t max_nb_tx_queues = UINT16_MAX;
2169        uint16_t max_rx_desc_lim = UINT16_MAX;
2170        uint16_t max_tx_desc_lim = UINT16_MAX;
2171
2172        dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2173
2174        dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2175                        internals->candidate_max_rx_pktlen :
2176                        RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2177
2178        /* Max number of tx/rx queues that the bonded device can support is the
2179         * minimum values of the bonded slaves, as all slaves must be capable
2180         * of supporting the same number of tx/rx queues.
2181         */
2182        if (internals->slave_count > 0) {
2183                struct rte_eth_dev_info slave_info;
2184                uint16_t idx;
2185
2186                for (idx = 0; idx < internals->slave_count; idx++) {
2187                        slave = internals->slaves[idx];
2188                        ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2189                        if (ret != 0) {
2190                                RTE_BOND_LOG(ERR,
2191                                        "%s: Error during getting device (port %u) info: %s\n",
2192                                        __func__,
2193                                        slave.port_id,
2194                                        strerror(-ret));
2195
2196                                return ret;
2197                        }
2198
2199                        if (slave_info.max_rx_queues < max_nb_rx_queues)
2200                                max_nb_rx_queues = slave_info.max_rx_queues;
2201
2202                        if (slave_info.max_tx_queues < max_nb_tx_queues)
2203                                max_nb_tx_queues = slave_info.max_tx_queues;
2204
2205                        if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2206                                max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2207
2208                        if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2209                                max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2210                }
2211        }
2212
2213        dev_info->max_rx_queues = max_nb_rx_queues;
2214        dev_info->max_tx_queues = max_nb_tx_queues;
2215
2216        memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2217               sizeof(dev_info->default_rxconf));
2218        memcpy(&dev_info->default_txconf, &internals->default_txconf,
2219               sizeof(dev_info->default_txconf));
2220
2221        dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2222        dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2223
2224        /**
2225         * If dedicated hw queues enabled for link bonding device in LACP mode
2226         * then we need to reduce the maximum number of data path queues by 1.
2227         */
2228        if (internals->mode == BONDING_MODE_8023AD &&
2229                internals->mode4.dedicated_queues.enabled == 1) {
2230                dev_info->max_rx_queues--;
2231                dev_info->max_tx_queues--;
2232        }
2233
2234        dev_info->min_rx_bufsize = 0;
2235
2236        dev_info->rx_offload_capa = internals->rx_offload_capa;
2237        dev_info->tx_offload_capa = internals->tx_offload_capa;
2238        dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2239        dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2240        dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2241
2242        dev_info->reta_size = internals->reta_size;
2243        dev_info->hash_key_size = internals->rss_key_len;
2244
2245        return 0;
2246}
2247
2248static int
2249bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2250{
2251        int res;
2252        uint16_t i;
2253        struct bond_dev_private *internals = dev->data->dev_private;
2254
2255        /* don't do this while a slave is being added */
2256        rte_spinlock_lock(&internals->lock);
2257
2258        if (on)
2259                rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2260        else
2261                rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2262
2263        for (i = 0; i < internals->slave_count; i++) {
2264                uint16_t port_id = internals->slaves[i].port_id;
2265
2266                res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2267                if (res == ENOTSUP)
2268                        RTE_BOND_LOG(WARNING,
2269                                     "Setting VLAN filter on slave port %u not supported.",
2270                                     port_id);
2271        }
2272
2273        rte_spinlock_unlock(&internals->lock);
2274        return 0;
2275}
2276
2277static int
2278bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2279                uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2280                const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2281{
2282        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2283                        rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2284                                        0, dev->data->numa_node);
2285        if (bd_rx_q == NULL)
2286                return -1;
2287
2288        bd_rx_q->queue_id = rx_queue_id;
2289        bd_rx_q->dev_private = dev->data->dev_private;
2290
2291        bd_rx_q->nb_rx_desc = nb_rx_desc;
2292
2293        memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2294        bd_rx_q->mb_pool = mb_pool;
2295
2296        dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2297
2298        return 0;
2299}
2300
2301static int
2302bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2303                uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2304                const struct rte_eth_txconf *tx_conf)
2305{
2306        struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2307                        rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2308                                        0, dev->data->numa_node);
2309
2310        if (bd_tx_q == NULL)
2311                return -1;
2312
2313        bd_tx_q->queue_id = tx_queue_id;
2314        bd_tx_q->dev_private = dev->data->dev_private;
2315
2316        bd_tx_q->nb_tx_desc = nb_tx_desc;
2317        memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2318
2319        dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2320
2321        return 0;
2322}
2323
2324static void
2325bond_ethdev_rx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2326{
2327        void *queue = dev->data->rx_queues[queue_id];
2328
2329        if (queue == NULL)
2330                return;
2331
2332        rte_free(queue);
2333}
2334
2335static void
2336bond_ethdev_tx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2337{
2338        void *queue = dev->data->tx_queues[queue_id];
2339
2340        if (queue == NULL)
2341                return;
2342
2343        rte_free(queue);
2344}
2345
2346static void
2347bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2348{
2349        struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2350        struct bond_dev_private *internals;
2351
2352        /* Default value for polling slave found is true as we don't want to
2353         * disable the polling thread if we cannot get the lock */
2354        int i, polling_slave_found = 1;
2355
2356        if (cb_arg == NULL)
2357                return;
2358
2359        bonded_ethdev = cb_arg;
2360        internals = bonded_ethdev->data->dev_private;
2361
2362        if (!bonded_ethdev->data->dev_started ||
2363                !internals->link_status_polling_enabled)
2364                return;
2365
2366        /* If device is currently being configured then don't check slaves link
2367         * status, wait until next period */
2368        if (rte_spinlock_trylock(&internals->lock)) {
2369                if (internals->slave_count > 0)
2370                        polling_slave_found = 0;
2371
2372                for (i = 0; i < internals->slave_count; i++) {
2373                        if (!internals->slaves[i].link_status_poll_enabled)
2374                                continue;
2375
2376                        slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2377                        polling_slave_found = 1;
2378
2379                        /* Update slave link status */
2380                        (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2381                                        internals->slaves[i].link_status_wait_to_complete);
2382
2383                        /* if link status has changed since last checked then call lsc
2384                         * event callback */
2385                        if (slave_ethdev->data->dev_link.link_status !=
2386                                        internals->slaves[i].last_link_status) {
2387                                internals->slaves[i].last_link_status =
2388                                                slave_ethdev->data->dev_link.link_status;
2389
2390                                bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2391                                                RTE_ETH_EVENT_INTR_LSC,
2392                                                &bonded_ethdev->data->port_id,
2393                                                NULL);
2394                        }
2395                }
2396                rte_spinlock_unlock(&internals->lock);
2397        }
2398
2399        if (polling_slave_found)
2400                /* Set alarm to continue monitoring link status of slave ethdev's */
2401                rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2402                                bond_ethdev_slave_link_status_change_monitor, cb_arg);
2403}
2404
2405static int
2406bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2407{
2408        int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2409
2410        struct bond_dev_private *bond_ctx;
2411        struct rte_eth_link slave_link;
2412
2413        bool one_link_update_succeeded;
2414        uint32_t idx;
2415        int ret;
2416
2417        bond_ctx = ethdev->data->dev_private;
2418
2419        ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2420
2421        if (ethdev->data->dev_started == 0 ||
2422                        bond_ctx->active_slave_count == 0) {
2423                ethdev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2424                return 0;
2425        }
2426
2427        ethdev->data->dev_link.link_status = RTE_ETH_LINK_UP;
2428
2429        if (wait_to_complete)
2430                link_update = rte_eth_link_get;
2431        else
2432                link_update = rte_eth_link_get_nowait;
2433
2434        switch (bond_ctx->mode) {
2435        case BONDING_MODE_BROADCAST:
2436                /**
2437                 * Setting link speed to UINT32_MAX to ensure we pick up the
2438                 * value of the first active slave
2439                 */
2440                ethdev->data->dev_link.link_speed = UINT32_MAX;
2441
2442                /**
2443                 * link speed is minimum value of all the slaves link speed as
2444                 * packet loss will occur on this slave if transmission at rates
2445                 * greater than this are attempted
2446                 */
2447                for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2448                        ret = link_update(bond_ctx->active_slaves[idx],
2449                                          &slave_link);
2450                        if (ret < 0) {
2451                                ethdev->data->dev_link.link_speed =
2452                                        RTE_ETH_SPEED_NUM_NONE;
2453                                RTE_BOND_LOG(ERR,
2454                                        "Slave (port %u) link get failed: %s",
2455                                        bond_ctx->active_slaves[idx],
2456                                        rte_strerror(-ret));
2457                                return 0;
2458                        }
2459
2460                        if (slave_link.link_speed <
2461                                        ethdev->data->dev_link.link_speed)
2462                                ethdev->data->dev_link.link_speed =
2463                                                slave_link.link_speed;
2464                }
2465                break;
2466        case BONDING_MODE_ACTIVE_BACKUP:
2467                /* Current primary slave */
2468                ret = link_update(bond_ctx->current_primary_port, &slave_link);
2469                if (ret < 0) {
2470                        RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2471                                bond_ctx->current_primary_port,
2472                                rte_strerror(-ret));
2473                        return 0;
2474                }
2475
2476                ethdev->data->dev_link.link_speed = slave_link.link_speed;
2477                break;
2478        case BONDING_MODE_8023AD:
2479                ethdev->data->dev_link.link_autoneg =
2480                                bond_ctx->mode4.slave_link.link_autoneg;
2481                ethdev->data->dev_link.link_duplex =
2482                                bond_ctx->mode4.slave_link.link_duplex;
2483                /* fall through */
2484                /* to update link speed */
2485        case BONDING_MODE_ROUND_ROBIN:
2486        case BONDING_MODE_BALANCE:
2487        case BONDING_MODE_TLB:
2488        case BONDING_MODE_ALB:
2489        default:
2490                /**
2491                 * In theses mode the maximum theoretical link speed is the sum
2492                 * of all the slaves
2493                 */
2494                ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2495                one_link_update_succeeded = false;
2496
2497                for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2498                        ret = link_update(bond_ctx->active_slaves[idx],
2499                                        &slave_link);
2500                        if (ret < 0) {
2501                                RTE_BOND_LOG(ERR,
2502                                        "Slave (port %u) link get failed: %s",
2503                                        bond_ctx->active_slaves[idx],
2504                                        rte_strerror(-ret));
2505                                continue;
2506                        }
2507
2508                        one_link_update_succeeded = true;
2509                        ethdev->data->dev_link.link_speed +=
2510                                        slave_link.link_speed;
2511                }
2512
2513                if (!one_link_update_succeeded) {
2514                        RTE_BOND_LOG(ERR, "All slaves link get failed");
2515                        return 0;
2516                }
2517        }
2518
2519
2520        return 0;
2521}
2522
2523
2524static int
2525bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2526{
2527        struct bond_dev_private *internals = dev->data->dev_private;
2528        struct rte_eth_stats slave_stats;
2529        int i, j;
2530
2531        for (i = 0; i < internals->slave_count; i++) {
2532                rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2533
2534                stats->ipackets += slave_stats.ipackets;
2535                stats->opackets += slave_stats.opackets;
2536                stats->ibytes += slave_stats.ibytes;
2537                stats->obytes += slave_stats.obytes;
2538                stats->imissed += slave_stats.imissed;
2539                stats->ierrors += slave_stats.ierrors;
2540                stats->oerrors += slave_stats.oerrors;
2541                stats->rx_nombuf += slave_stats.rx_nombuf;
2542
2543                for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2544                        stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2545                        stats->q_opackets[j] += slave_stats.q_opackets[j];
2546                        stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2547                        stats->q_obytes[j] += slave_stats.q_obytes[j];
2548                        stats->q_errors[j] += slave_stats.q_errors[j];
2549                }
2550
2551        }
2552
2553        return 0;
2554}
2555
2556static int
2557bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2558{
2559        struct bond_dev_private *internals = dev->data->dev_private;
2560        int i;
2561        int err;
2562        int ret;
2563
2564        for (i = 0, err = 0; i < internals->slave_count; i++) {
2565                ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2566                if (ret != 0)
2567                        err = ret;
2568        }
2569
2570        return err;
2571}
2572
2573static int
2574bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2575{
2576        struct bond_dev_private *internals = eth_dev->data->dev_private;
2577        int i;
2578        int ret = 0;
2579        uint16_t port_id;
2580
2581        switch (internals->mode) {
2582        /* Promiscuous mode is propagated to all slaves */
2583        case BONDING_MODE_ROUND_ROBIN:
2584        case BONDING_MODE_BALANCE:
2585        case BONDING_MODE_BROADCAST:
2586        case BONDING_MODE_8023AD: {
2587                unsigned int slave_ok = 0;
2588
2589                for (i = 0; i < internals->slave_count; i++) {
2590                        port_id = internals->slaves[i].port_id;
2591
2592                        ret = rte_eth_promiscuous_enable(port_id);
2593                        if (ret != 0)
2594                                RTE_BOND_LOG(ERR,
2595                                        "Failed to enable promiscuous mode for port %u: %s",
2596                                        port_id, rte_strerror(-ret));
2597                        else
2598                                slave_ok++;
2599                }
2600                /*
2601                 * Report success if operation is successful on at least
2602                 * on one slave. Otherwise return last error code.
2603                 */
2604                if (slave_ok > 0)
2605                        ret = 0;
2606                break;
2607        }
2608        /* Promiscuous mode is propagated only to primary slave */
2609        case BONDING_MODE_ACTIVE_BACKUP:
2610        case BONDING_MODE_TLB:
2611        case BONDING_MODE_ALB:
2612        default:
2613                /* Do not touch promisc when there cannot be primary ports */
2614                if (internals->slave_count == 0)
2615                        break;
2616                port_id = internals->current_primary_port;
2617                ret = rte_eth_promiscuous_enable(port_id);
2618                if (ret != 0)
2619                        RTE_BOND_LOG(ERR,
2620                                "Failed to enable promiscuous mode for port %u: %s",
2621                                port_id, rte_strerror(-ret));
2622        }
2623
2624        return ret;
2625}
2626
2627static int
2628bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2629{
2630        struct bond_dev_private *internals = dev->data->dev_private;
2631        int i;
2632        int ret = 0;
2633        uint16_t port_id;
2634
2635        switch (internals->mode) {
2636        /* Promiscuous mode is propagated to all slaves */
2637        case BONDING_MODE_ROUND_ROBIN:
2638        case BONDING_MODE_BALANCE:
2639        case BONDING_MODE_BROADCAST:
2640        case BONDING_MODE_8023AD: {
2641                unsigned int slave_ok = 0;
2642
2643                for (i = 0; i < internals->slave_count; i++) {
2644                        port_id = internals->slaves[i].port_id;
2645
2646                        if (internals->mode == BONDING_MODE_8023AD &&
2647                            bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2648                                        BOND_8023AD_FORCED_PROMISC) {
2649                                slave_ok++;
2650                                continue;
2651                        }
2652                        ret = rte_eth_promiscuous_disable(port_id);
2653                        if (ret != 0)
2654                                RTE_BOND_LOG(ERR,
2655                                        "Failed to disable promiscuous mode for port %u: %s",
2656                                        port_id, rte_strerror(-ret));
2657                        else
2658                                slave_ok++;
2659                }
2660                /*
2661                 * Report success if operation is successful on at least
2662                 * on one slave. Otherwise return last error code.
2663                 */
2664                if (slave_ok > 0)
2665                        ret = 0;
2666                break;
2667        }
2668        /* Promiscuous mode is propagated only to primary slave */
2669        case BONDING_MODE_ACTIVE_BACKUP:
2670        case BONDING_MODE_TLB:
2671        case BONDING_MODE_ALB:
2672        default:
2673                /* Do not touch promisc when there cannot be primary ports */
2674                if (internals->slave_count == 0)
2675                        break;
2676                port_id = internals->current_primary_port;
2677                ret = rte_eth_promiscuous_disable(port_id);
2678                if (ret != 0)
2679                        RTE_BOND_LOG(ERR,
2680                                "Failed to disable promiscuous mode for port %u: %s",
2681                                port_id, rte_strerror(-ret));
2682        }
2683
2684        return ret;
2685}
2686
2687static int
2688bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2689{
2690        struct bond_dev_private *internals = eth_dev->data->dev_private;
2691        int i;
2692        int ret = 0;
2693        uint16_t port_id;
2694
2695        switch (internals->mode) {
2696        /* allmulti mode is propagated to all slaves */
2697        case BONDING_MODE_ROUND_ROBIN:
2698        case BONDING_MODE_BALANCE:
2699        case BONDING_MODE_BROADCAST:
2700        case BONDING_MODE_8023AD: {
2701                unsigned int slave_ok = 0;
2702
2703                for (i = 0; i < internals->slave_count; i++) {
2704                        port_id = internals->slaves[i].port_id;
2705
2706                        ret = rte_eth_allmulticast_enable(port_id);
2707                        if (ret != 0)
2708                                RTE_BOND_LOG(ERR,
2709                                        "Failed to enable allmulti mode for port %u: %s",
2710                                        port_id, rte_strerror(-ret));
2711                        else
2712                                slave_ok++;
2713                }
2714                /*
2715                 * Report success if operation is successful on at least
2716                 * on one slave. Otherwise return last error code.
2717                 */
2718                if (slave_ok > 0)
2719                        ret = 0;
2720                break;
2721        }
2722        /* allmulti mode is propagated only to primary slave */
2723        case BONDING_MODE_ACTIVE_BACKUP:
2724        case BONDING_MODE_TLB:
2725        case BONDING_MODE_ALB:
2726        default:
2727                /* Do not touch allmulti when there cannot be primary ports */
2728                if (internals->slave_count == 0)
2729                        break;
2730                port_id = internals->current_primary_port;
2731                ret = rte_eth_allmulticast_enable(port_id);
2732                if (ret != 0)
2733                        RTE_BOND_LOG(ERR,
2734                                "Failed to enable allmulti mode for port %u: %s",
2735                                port_id, rte_strerror(-ret));
2736        }
2737
2738        return ret;
2739}
2740
2741static int
2742bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2743{
2744        struct bond_dev_private *internals = eth_dev->data->dev_private;
2745        int i;
2746        int ret = 0;
2747        uint16_t port_id;
2748
2749        switch (internals->mode) {
2750        /* allmulti mode is propagated to all slaves */
2751        case BONDING_MODE_ROUND_ROBIN:
2752        case BONDING_MODE_BALANCE:
2753        case BONDING_MODE_BROADCAST:
2754        case BONDING_MODE_8023AD: {
2755                unsigned int slave_ok = 0;
2756
2757                for (i = 0; i < internals->slave_count; i++) {
2758                        uint16_t port_id = internals->slaves[i].port_id;
2759
2760                        if (internals->mode == BONDING_MODE_8023AD &&
2761                            bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2762                                        BOND_8023AD_FORCED_ALLMULTI)
2763                                continue;
2764
2765                        ret = rte_eth_allmulticast_disable(port_id);
2766                        if (ret != 0)
2767                                RTE_BOND_LOG(ERR,
2768                                        "Failed to disable allmulti mode for port %u: %s",
2769                                        port_id, rte_strerror(-ret));
2770                        else
2771                                slave_ok++;
2772                }
2773                /*
2774                 * Report success if operation is successful on at least
2775                 * on one slave. Otherwise return last error code.
2776                 */
2777                if (slave_ok > 0)
2778                        ret = 0;
2779                break;
2780        }
2781        /* allmulti mode is propagated only to primary slave */
2782        case BONDING_MODE_ACTIVE_BACKUP:
2783        case BONDING_MODE_TLB:
2784        case BONDING_MODE_ALB:
2785        default:
2786                /* Do not touch allmulti when there cannot be primary ports */
2787                if (internals->slave_count == 0)
2788                        break;
2789                port_id = internals->current_primary_port;
2790                ret = rte_eth_allmulticast_disable(port_id);
2791                if (ret != 0)
2792                        RTE_BOND_LOG(ERR,
2793                                "Failed to disable allmulti mode for port %u: %s",
2794                                port_id, rte_strerror(-ret));
2795        }
2796
2797        return ret;
2798}
2799
2800static void
2801bond_ethdev_delayed_lsc_propagation(void *arg)
2802{
2803        if (arg == NULL)
2804                return;
2805
2806        rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2807                        RTE_ETH_EVENT_INTR_LSC, NULL);
2808}
2809
2810int
2811bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2812                void *param, void *ret_param __rte_unused)
2813{
2814        struct rte_eth_dev *bonded_eth_dev;
2815        struct bond_dev_private *internals;
2816        struct rte_eth_link link;
2817        int rc = -1;
2818        int ret;
2819
2820        uint8_t lsc_flag = 0;
2821        int valid_slave = 0;
2822        uint16_t active_pos;
2823        uint16_t i;
2824
2825        if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2826                return rc;
2827
2828        bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2829
2830        if (check_for_bonded_ethdev(bonded_eth_dev))
2831                return rc;
2832
2833        internals = bonded_eth_dev->data->dev_private;
2834
2835        /* If the device isn't started don't handle interrupts */
2836        if (!bonded_eth_dev->data->dev_started)
2837                return rc;
2838
2839        /* verify that port_id is a valid slave of bonded port */
2840        for (i = 0; i < internals->slave_count; i++) {
2841                if (internals->slaves[i].port_id == port_id) {
2842                        valid_slave = 1;
2843                        break;
2844                }
2845        }
2846
2847        if (!valid_slave)
2848                return rc;
2849
2850        /* Synchronize lsc callback parallel calls either by real link event
2851         * from the slaves PMDs or by the bonding PMD itself.
2852         */
2853        rte_spinlock_lock(&internals->lsc_lock);
2854
2855        /* Search for port in active port list */
2856        active_pos = find_slave_by_id(internals->active_slaves,
2857                        internals->active_slave_count, port_id);
2858
2859        ret = rte_eth_link_get_nowait(port_id, &link);
2860        if (ret < 0)
2861                RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2862
2863        if (ret == 0 && link.link_status) {
2864                if (active_pos < internals->active_slave_count)
2865                        goto link_update;
2866
2867                /* check link state properties if bonded link is up*/
2868                if (bonded_eth_dev->data->dev_link.link_status == RTE_ETH_LINK_UP) {
2869                        if (link_properties_valid(bonded_eth_dev, &link) != 0)
2870                                RTE_BOND_LOG(ERR, "Invalid link properties "
2871                                             "for slave %d in bonding mode %d",
2872                                             port_id, internals->mode);
2873                } else {
2874                        /* inherit slave link properties */
2875                        link_properties_set(bonded_eth_dev, &link);
2876                }
2877
2878                /* If no active slave ports then set this port to be
2879                 * the primary port.
2880                 */
2881                if (internals->active_slave_count < 1) {
2882                        /* If first active slave, then change link status */
2883                        bonded_eth_dev->data->dev_link.link_status =
2884                                                                RTE_ETH_LINK_UP;
2885                        internals->current_primary_port = port_id;
2886                        lsc_flag = 1;
2887
2888                        mac_address_slaves_update(bonded_eth_dev);
2889                }
2890
2891                activate_slave(bonded_eth_dev, port_id);
2892
2893                /* If the user has defined the primary port then default to
2894                 * using it.
2895                 */
2896                if (internals->user_defined_primary_port &&
2897                                internals->primary_port == port_id)
2898                        bond_ethdev_primary_set(internals, port_id);
2899        } else {
2900                if (active_pos == internals->active_slave_count)
2901                        goto link_update;
2902
2903                /* Remove from active slave list */
2904                deactivate_slave(bonded_eth_dev, port_id);
2905
2906                if (internals->active_slave_count < 1)
2907                        lsc_flag = 1;
2908
2909                /* Update primary id, take first active slave from list or if none
2910                 * available set to -1 */
2911                if (port_id == internals->current_primary_port) {
2912                        if (internals->active_slave_count > 0)
2913                                bond_ethdev_primary_set(internals,
2914                                                internals->active_slaves[0]);
2915                        else
2916                                internals->current_primary_port = internals->primary_port;
2917                        mac_address_slaves_update(bonded_eth_dev);
2918                }
2919        }
2920
2921link_update:
2922        /**
2923         * Update bonded device link properties after any change to active
2924         * slaves
2925         */
2926        bond_ethdev_link_update(bonded_eth_dev, 0);
2927
2928        if (lsc_flag) {
2929                /* Cancel any possible outstanding interrupts if delays are enabled */
2930                if (internals->link_up_delay_ms > 0 ||
2931                        internals->link_down_delay_ms > 0)
2932                        rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2933                                        bonded_eth_dev);
2934
2935                if (bonded_eth_dev->data->dev_link.link_status) {
2936                        if (internals->link_up_delay_ms > 0)
2937                                rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2938                                                bond_ethdev_delayed_lsc_propagation,
2939                                                (void *)bonded_eth_dev);
2940                        else
2941                                rte_eth_dev_callback_process(bonded_eth_dev,
2942                                                RTE_ETH_EVENT_INTR_LSC,
2943                                                NULL);
2944
2945                } else {
2946                        if (internals->link_down_delay_ms > 0)
2947                                rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2948                                                bond_ethdev_delayed_lsc_propagation,
2949                                                (void *)bonded_eth_dev);
2950                        else
2951                                rte_eth_dev_callback_process(bonded_eth_dev,
2952                                                RTE_ETH_EVENT_INTR_LSC,
2953                                                NULL);
2954                }
2955        }
2956
2957        rte_spinlock_unlock(&internals->lsc_lock);
2958
2959        return rc;
2960}
2961
2962static int
2963bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2964                struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2965{
2966        unsigned i, j;
2967        int result = 0;
2968        int slave_reta_size;
2969        unsigned reta_count;
2970        struct bond_dev_private *internals = dev->data->dev_private;
2971
2972        if (reta_size != internals->reta_size)
2973                return -EINVAL;
2974
2975         /* Copy RETA table */
2976        reta_count = (reta_size + RTE_ETH_RETA_GROUP_SIZE - 1) /
2977                        RTE_ETH_RETA_GROUP_SIZE;
2978
2979        for (i = 0; i < reta_count; i++) {
2980                internals->reta_conf[i].mask = reta_conf[i].mask;
2981                for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
2982                        if ((reta_conf[i].mask >> j) & 0x01)
2983                                internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2984        }
2985
2986        /* Fill rest of array */
2987        for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2988                memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2989                                sizeof(internals->reta_conf[0]) * reta_count);
2990
2991        /* Propagate RETA over slaves */
2992        for (i = 0; i < internals->slave_count; i++) {
2993                slave_reta_size = internals->slaves[i].reta_size;
2994                result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2995                                &internals->reta_conf[0], slave_reta_size);
2996                if (result < 0)
2997                        return result;
2998        }
2999
3000        return 0;
3001}
3002
3003static int
3004bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3005                struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3006{
3007        int i, j;
3008        struct bond_dev_private *internals = dev->data->dev_private;
3009
3010        if (reta_size != internals->reta_size)
3011                return -EINVAL;
3012
3013         /* Copy RETA table */
3014        for (i = 0; i < reta_size / RTE_ETH_RETA_GROUP_SIZE; i++)
3015                for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3016                        if ((reta_conf[i].mask >> j) & 0x01)
3017                                reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3018
3019        return 0;
3020}
3021
3022static int
3023bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3024                struct rte_eth_rss_conf *rss_conf)
3025{
3026        int i, result = 0;
3027        struct bond_dev_private *internals = dev->data->dev_private;
3028        struct rte_eth_rss_conf bond_rss_conf;
3029
3030        memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3031
3032        bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3033
3034        if (bond_rss_conf.rss_hf != 0)
3035                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3036
3037        if (bond_rss_conf.rss_key) {
3038                if (bond_rss_conf.rss_key_len < internals->rss_key_len)
3039                        return -EINVAL;
3040                else if (bond_rss_conf.rss_key_len > internals->rss_key_len)
3041                        RTE_BOND_LOG(WARNING, "rss_key will be truncated");
3042
3043                memcpy(internals->rss_key, bond_rss_conf.rss_key,
3044                                internals->rss_key_len);
3045                bond_rss_conf.rss_key_len = internals->rss_key_len;
3046        }
3047
3048        for (i = 0; i < internals->slave_count; i++) {
3049                result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3050                                &bond_rss_conf);
3051                if (result < 0)
3052                        return result;
3053        }
3054
3055        return 0;
3056}
3057
3058static int
3059bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3060                struct rte_eth_rss_conf *rss_conf)
3061{
3062        struct bond_dev_private *internals = dev->data->dev_private;
3063
3064        rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3065        rss_conf->rss_key_len = internals->rss_key_len;
3066        if (rss_conf->rss_key)
3067                memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3068
3069        return 0;
3070}
3071
3072static int
3073bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3074{
3075        struct rte_eth_dev *slave_eth_dev;
3076        struct bond_dev_private *internals = dev->data->dev_private;
3077        int ret, i;
3078
3079        rte_spinlock_lock(&internals->lock);
3080
3081        for (i = 0; i < internals->slave_count; i++) {
3082                slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3083                if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3084                        rte_spinlock_unlock(&internals->lock);
3085                        return -ENOTSUP;
3086                }
3087        }
3088        for (i = 0; i < internals->slave_count; i++) {
3089                ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3090                if (ret < 0) {
3091                        rte_spinlock_unlock(&internals->lock);
3092                        return ret;
3093                }
3094        }
3095
3096        rte_spinlock_unlock(&internals->lock);
3097        return 0;
3098}
3099
3100static int
3101bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3102                        struct rte_ether_addr *addr)
3103{
3104        if (mac_address_set(dev, addr)) {
3105                RTE_BOND_LOG(ERR, "Failed to update MAC address");
3106                return -EINVAL;
3107        }
3108
3109        return 0;
3110}
3111
3112static int
3113bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3114                  const struct rte_flow_ops **ops)
3115{
3116        *ops = &bond_flow_ops;
3117        return 0;
3118}
3119
3120static int
3121bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3122                        struct rte_ether_addr *mac_addr,
3123                        __rte_unused uint32_t index, uint32_t vmdq)
3124{
3125        struct rte_eth_dev *slave_eth_dev;
3126        struct bond_dev_private *internals = dev->data->dev_private;
3127        int ret, i;
3128
3129        rte_spinlock_lock(&internals->lock);
3130
3131        for (i = 0; i < internals->slave_count; i++) {
3132                slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3133                if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3134                         *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3135                        ret = -ENOTSUP;
3136                        goto end;
3137                }
3138        }
3139
3140        for (i = 0; i < internals->slave_count; i++) {
3141                ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3142                                mac_addr, vmdq);
3143                if (ret < 0) {
3144                        /* rollback */
3145                        for (i--; i >= 0; i--)
3146                                rte_eth_dev_mac_addr_remove(
3147                                        internals->slaves[i].port_id, mac_addr);
3148                        goto end;
3149                }
3150        }
3151
3152        ret = 0;
3153end:
3154        rte_spinlock_unlock(&internals->lock);
3155        return ret;
3156}
3157
3158static void
3159bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3160{
3161        struct rte_eth_dev *slave_eth_dev;
3162        struct bond_dev_private *internals = dev->data->dev_private;
3163        int i;
3164
3165        rte_spinlock_lock(&internals->lock);
3166
3167        for (i = 0; i < internals->slave_count; i++) {
3168                slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3169                if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3170                        goto end;
3171        }
3172
3173        struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3174
3175        for (i = 0; i < internals->slave_count; i++)
3176                rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3177                                mac_addr);
3178
3179end:
3180        rte_spinlock_unlock(&internals->lock);
3181}
3182
3183const struct eth_dev_ops default_dev_ops = {
3184        .dev_start            = bond_ethdev_start,
3185        .dev_stop             = bond_ethdev_stop,
3186        .dev_close            = bond_ethdev_close,
3187        .dev_configure        = bond_ethdev_configure,
3188        .dev_infos_get        = bond_ethdev_info,
3189        .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3190        .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3191        .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3192        .rx_queue_release     = bond_ethdev_rx_queue_release,
3193        .tx_queue_release     = bond_ethdev_tx_queue_release,
3194        .link_update          = bond_ethdev_link_update,
3195        .stats_get            = bond_ethdev_stats_get,
3196        .stats_reset          = bond_ethdev_stats_reset,
3197        .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3198        .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3199        .allmulticast_enable  = bond_ethdev_allmulticast_enable,
3200        .allmulticast_disable = bond_ethdev_allmulticast_disable,
3201        .reta_update          = bond_ethdev_rss_reta_update,
3202        .reta_query           = bond_ethdev_rss_reta_query,
3203        .rss_hash_update      = bond_ethdev_rss_hash_update,
3204        .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3205        .mtu_set              = bond_ethdev_mtu_set,
3206        .mac_addr_set         = bond_ethdev_mac_address_set,
3207        .mac_addr_add         = bond_ethdev_mac_addr_add,
3208        .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3209        .flow_ops_get         = bond_flow_ops_get
3210};
3211
3212static int
3213bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3214{
3215        const char *name = rte_vdev_device_name(dev);
3216        uint8_t socket_id = dev->device.numa_node;
3217        struct bond_dev_private *internals = NULL;
3218        struct rte_eth_dev *eth_dev = NULL;
3219        uint32_t vlan_filter_bmp_size;
3220
3221        /* now do all data allocation - for eth_dev structure, dummy pci driver
3222         * and internal (private) data
3223         */
3224
3225        /* reserve an ethdev entry */
3226        eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3227        if (eth_dev == NULL) {
3228                RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3229                goto err;
3230        }
3231
3232        internals = eth_dev->data->dev_private;
3233        eth_dev->data->nb_rx_queues = (uint16_t)1;
3234        eth_dev->data->nb_tx_queues = (uint16_t)1;
3235
3236        /* Allocate memory for storing MAC addresses */
3237        eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3238                        BOND_MAX_MAC_ADDRS, 0, socket_id);
3239        if (eth_dev->data->mac_addrs == NULL) {
3240                RTE_BOND_LOG(ERR,
3241                             "Failed to allocate %u bytes needed to store MAC addresses",
3242                             RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3243                goto err;
3244        }
3245
3246        eth_dev->dev_ops = &default_dev_ops;
3247        eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3248                                        RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3249
3250        rte_spinlock_init(&internals->lock);
3251        rte_spinlock_init(&internals->lsc_lock);
3252
3253        internals->port_id = eth_dev->data->port_id;
3254        internals->mode = BONDING_MODE_INVALID;
3255        internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3256        internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3257        internals->burst_xmit_hash = burst_xmit_l2_hash;
3258        internals->user_defined_mac = 0;
3259
3260        internals->link_status_polling_enabled = 0;
3261
3262        internals->link_status_polling_interval_ms =
3263                DEFAULT_POLLING_INTERVAL_10_MS;
3264        internals->link_down_delay_ms = 0;
3265        internals->link_up_delay_ms = 0;
3266
3267        internals->slave_count = 0;
3268        internals->active_slave_count = 0;
3269        internals->rx_offload_capa = 0;
3270        internals->tx_offload_capa = 0;
3271        internals->rx_queue_offload_capa = 0;
3272        internals->tx_queue_offload_capa = 0;
3273        internals->candidate_max_rx_pktlen = 0;
3274        internals->max_rx_pktlen = 0;
3275
3276        /* Initially allow to choose any offload type */
3277        internals->flow_type_rss_offloads = RTE_ETH_RSS_PROTO_MASK;
3278
3279        memset(&internals->default_rxconf, 0,
3280               sizeof(internals->default_rxconf));
3281        memset(&internals->default_txconf, 0,
3282               sizeof(internals->default_txconf));
3283
3284        memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3285        memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3286
3287        memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3288        memset(internals->slaves, 0, sizeof(internals->slaves));
3289
3290        TAILQ_INIT(&internals->flow_list);
3291        internals->flow_isolated_valid = 0;
3292
3293        /* Set mode 4 default configuration */
3294        bond_mode_8023ad_setup(eth_dev, NULL);
3295        if (bond_ethdev_mode_set(eth_dev, mode)) {
3296                RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3297                                 eth_dev->data->port_id, mode);
3298                goto err;
3299        }
3300
3301        vlan_filter_bmp_size =
3302                rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3303        internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3304                                                   RTE_CACHE_LINE_SIZE);
3305        if (internals->vlan_filter_bmpmem == NULL) {
3306                RTE_BOND_LOG(ERR,
3307                             "Failed to allocate vlan bitmap for bonded device %u",
3308                             eth_dev->data->port_id);
3309                goto err;
3310        }
3311
3312        internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3313                        internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3314        if (internals->vlan_filter_bmp == NULL) {
3315                RTE_BOND_LOG(ERR,
3316                             "Failed to init vlan bitmap for bonded device %u",
3317                             eth_dev->data->port_id);
3318                rte_free(internals->vlan_filter_bmpmem);
3319                goto err;
3320        }
3321
3322        return eth_dev->data->port_id;
3323
3324err:
3325        rte_free(internals);
3326        if (eth_dev != NULL)
3327                eth_dev->data->dev_private = NULL;
3328        rte_eth_dev_release_port(eth_dev);
3329        return -1;
3330}
3331
3332static int
3333bond_probe(struct rte_vdev_device *dev)
3334{
3335        const char *name;
3336        struct bond_dev_private *internals;
3337        struct rte_kvargs *kvlist;
3338        uint8_t bonding_mode;
3339        int arg_count, port_id;
3340        int socket_id;
3341        uint8_t agg_mode;
3342        struct rte_eth_dev *eth_dev;
3343
3344        if (!dev)
3345                return -EINVAL;
3346
3347        name = rte_vdev_device_name(dev);
3348        RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3349
3350        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3351                eth_dev = rte_eth_dev_attach_secondary(name);
3352                if (!eth_dev) {
3353                        RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3354                        return -1;
3355                }
3356                /* TODO: request info from primary to set up Rx and Tx */
3357                eth_dev->dev_ops = &default_dev_ops;
3358                eth_dev->device = &dev->device;
3359                rte_eth_dev_probing_finish(eth_dev);
3360                return 0;
3361        }
3362
3363        kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3364                pmd_bond_init_valid_arguments);
3365        if (kvlist == NULL)
3366                return -1;
3367
3368        /* Parse link bonding mode */
3369        if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3370                if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3371                                &bond_ethdev_parse_slave_mode_kvarg,
3372                                &bonding_mode) != 0) {
3373                        RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3374                                        name);
3375                        goto parse_error;
3376                }
3377        } else {
3378                RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3379                                "device %s", name);
3380                goto parse_error;
3381        }
3382
3383        /* Parse socket id to create bonding device on */
3384        arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3385        if (arg_count == 1) {
3386                if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3387                                &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3388                                != 0) {
3389                        RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3390                                        "bonded device %s", name);
3391                        goto parse_error;
3392                }
3393        } else if (arg_count > 1) {
3394                RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3395                                "bonded device %s", name);
3396                goto parse_error;
3397        } else {
3398                socket_id = rte_socket_id();
3399        }
3400
3401        dev->device.numa_node = socket_id;
3402
3403        /* Create link bonding eth device */
3404        port_id = bond_alloc(dev, bonding_mode);
3405        if (port_id < 0) {
3406                RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3407                                "socket %u.",   name, bonding_mode, socket_id);
3408                goto parse_error;
3409        }
3410        internals = rte_eth_devices[port_id].data->dev_private;
3411        internals->kvlist = kvlist;
3412
3413        if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3414                if (rte_kvargs_process(kvlist,
3415                                PMD_BOND_AGG_MODE_KVARG,
3416                                &bond_ethdev_parse_slave_agg_mode_kvarg,
3417                                &agg_mode) != 0) {
3418                        RTE_BOND_LOG(ERR,
3419                                        "Failed to parse agg selection mode for bonded device %s",
3420                                        name);
3421                        goto parse_error;
3422                }
3423
3424                if (internals->mode == BONDING_MODE_8023AD)
3425                        internals->mode4.agg_selection = agg_mode;
3426        } else {
3427                internals->mode4.agg_selection = AGG_STABLE;
3428        }
3429
3430        rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3431        RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3432                        "socket %u.",   name, port_id, bonding_mode, socket_id);
3433        return 0;
3434
3435parse_error:
3436        rte_kvargs_free(kvlist);
3437
3438        return -1;
3439}
3440
3441static int
3442bond_remove(struct rte_vdev_device *dev)
3443{
3444        struct rte_eth_dev *eth_dev;
3445        struct bond_dev_private *internals;
3446        const char *name;
3447        int ret = 0;
3448
3449        if (!dev)
3450                return -EINVAL;
3451
3452        name = rte_vdev_device_name(dev);
3453        RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3454
3455        /* find an ethdev entry */
3456        eth_dev = rte_eth_dev_allocated(name);
3457        if (eth_dev == NULL)
3458                return 0; /* port already released */
3459
3460        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3461                return rte_eth_dev_release_port(eth_dev);
3462
3463        RTE_ASSERT(eth_dev->device == &dev->device);
3464
3465        internals = eth_dev->data->dev_private;
3466        if (internals->slave_count != 0)
3467                return -EBUSY;
3468
3469        if (eth_dev->data->dev_started == 1) {
3470                ret = bond_ethdev_stop(eth_dev);
3471                bond_ethdev_close(eth_dev);
3472        }
3473        rte_eth_dev_release_port(eth_dev);
3474
3475        return ret;
3476}
3477
3478/* this part will resolve the slave portids after all the other pdev and vdev
3479 * have been allocated */
3480static int
3481bond_ethdev_configure(struct rte_eth_dev *dev)
3482{
3483        const char *name = dev->device->name;
3484        struct bond_dev_private *internals = dev->data->dev_private;
3485        struct rte_kvargs *kvlist = internals->kvlist;
3486        int arg_count;
3487        uint16_t port_id = dev - rte_eth_devices;
3488        uint8_t agg_mode;
3489
3490        static const uint8_t default_rss_key[40] = {
3491                0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3492                0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3493                0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3494                0xBE, 0xAC, 0x01, 0xFA
3495        };
3496
3497        unsigned i, j;
3498
3499        /*
3500         * If RSS is enabled, fill table with default values and
3501         * set key to the value specified in port RSS configuration.
3502         * Fall back to default RSS key if the key is not specified
3503         */
3504        if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
3505                struct rte_eth_rss_conf *rss_conf =
3506                        &dev->data->dev_conf.rx_adv_conf.rss_conf;
3507                if (rss_conf->rss_key != NULL) {
3508                        if (internals->rss_key_len > rss_conf->rss_key_len) {
3509                                RTE_BOND_LOG(ERR, "Invalid rss key length(%u)",
3510                                                rss_conf->rss_key_len);
3511                                return -EINVAL;
3512                        }
3513
3514                        memcpy(internals->rss_key, rss_conf->rss_key,
3515                               internals->rss_key_len);
3516                } else {
3517                        if (internals->rss_key_len > sizeof(default_rss_key)) {
3518                                RTE_BOND_LOG(ERR,
3519                                       "There is no suitable default hash key");
3520                                return -EINVAL;
3521                        }
3522
3523                        memcpy(internals->rss_key, default_rss_key,
3524                               internals->rss_key_len);
3525                }
3526
3527                for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3528                        internals->reta_conf[i].mask = ~0LL;
3529                        for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3530                                internals->reta_conf[i].reta[j] =
3531                                                (i * RTE_ETH_RETA_GROUP_SIZE + j) %
3532                                                dev->data->nb_rx_queues;
3533                }
3534        }
3535
3536        /* set the max_rx_pktlen */
3537        internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3538
3539        /*
3540         * if no kvlist, it means that this bonded device has been created
3541         * through the bonding api.
3542         */
3543        if (!kvlist)
3544                return 0;
3545
3546        /* Parse MAC address for bonded device */
3547        arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3548        if (arg_count == 1) {
3549                struct rte_ether_addr bond_mac;
3550
3551                if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3552                                       &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3553                        RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3554                                     name);
3555                        return -1;
3556                }
3557
3558                /* Set MAC address */
3559                if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3560                        RTE_BOND_LOG(ERR,
3561                                     "Failed to set mac address on bonded device %s",
3562                                     name);
3563                        return -1;
3564                }
3565        } else if (arg_count > 1) {
3566                RTE_BOND_LOG(ERR,
3567                             "MAC address can be specified only once for bonded device %s",
3568                             name);
3569                return -1;
3570        }
3571
3572        /* Parse/set balance mode transmit policy */
3573        arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3574        if (arg_count == 1) {
3575                uint8_t xmit_policy;
3576
3577                if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3578                                       &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3579                    0) {
3580                        RTE_BOND_LOG(INFO,
3581                                     "Invalid xmit policy specified for bonded device %s",
3582                                     name);
3583                        return -1;
3584                }
3585
3586                /* Set balance mode transmit policy*/
3587                if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3588                        RTE_BOND_LOG(ERR,
3589                                     "Failed to set balance xmit policy on bonded device %s",
3590                                     name);
3591                        return -1;
3592                }
3593        } else if (arg_count > 1) {
3594                RTE_BOND_LOG(ERR,
3595                             "Transmit policy can be specified only once for bonded device %s",
3596                             name);
3597                return -1;
3598        }
3599
3600        if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3601                if (rte_kvargs_process(kvlist,
3602                                       PMD_BOND_AGG_MODE_KVARG,
3603                                       &bond_ethdev_parse_slave_agg_mode_kvarg,
3604                                       &agg_mode) != 0) {
3605                        RTE_BOND_LOG(ERR,
3606                                     "Failed to parse agg selection mode for bonded device %s",
3607                                     name);
3608                }
3609                if (internals->mode == BONDING_MODE_8023AD) {
3610                        int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3611                                        agg_mode);
3612                        if (ret < 0) {
3613                                RTE_BOND_LOG(ERR,
3614                                        "Invalid args for agg selection set for bonded device %s",
3615                                        name);
3616                                return -1;
3617                        }
3618                }
3619        }
3620
3621        /* Parse/add slave ports to bonded device */
3622        if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3623                struct bond_ethdev_slave_ports slave_ports;
3624                unsigned i;
3625
3626                memset(&slave_ports, 0, sizeof(slave_ports));
3627
3628                if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3629                                       &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3630                        RTE_BOND_LOG(ERR,
3631                                     "Failed to parse slave ports for bonded device %s",
3632                                     name);
3633                        return -1;
3634                }
3635
3636                for (i = 0; i < slave_ports.slave_count; i++) {
3637                        if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3638                                RTE_BOND_LOG(ERR,
3639                                             "Failed to add port %d as slave to bonded device %s",
3640                                             slave_ports.slaves[i], name);
3641                        }
3642                }
3643
3644        } else {
3645                RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3646                return -1;
3647        }
3648
3649        /* Parse/set primary slave port id*/
3650        arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3651        if (arg_count == 1) {
3652                uint16_t primary_slave_port_id;
3653
3654                if (rte_kvargs_process(kvlist,
3655                                       PMD_BOND_PRIMARY_SLAVE_KVARG,
3656                                       &bond_ethdev_parse_primary_slave_port_id_kvarg,
3657                                       &primary_slave_port_id) < 0) {
3658                        RTE_BOND_LOG(INFO,
3659                                     "Invalid primary slave port id specified for bonded device %s",
3660                                     name);
3661                        return -1;
3662                }
3663
3664                /* Set balance mode transmit policy*/
3665                if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3666                    != 0) {
3667                        RTE_BOND_LOG(ERR,
3668                                     "Failed to set primary slave port %d on bonded device %s",
3669                                     primary_slave_port_id, name);
3670                        return -1;
3671                }
3672        } else if (arg_count > 1) {
3673                RTE_BOND_LOG(INFO,
3674                             "Primary slave can be specified only once for bonded device %s",
3675                             name);
3676                return -1;
3677        }
3678
3679        /* Parse link status monitor polling interval */
3680        arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3681        if (arg_count == 1) {
3682                uint32_t lsc_poll_interval_ms;
3683
3684                if (rte_kvargs_process(kvlist,
3685                                       PMD_BOND_LSC_POLL_PERIOD_KVARG,
3686                                       &bond_ethdev_parse_time_ms_kvarg,
3687                                       &lsc_poll_interval_ms) < 0) {
3688                        RTE_BOND_LOG(INFO,
3689                                     "Invalid lsc polling interval value specified for bonded"
3690                                     " device %s", name);
3691                        return -1;
3692                }
3693
3694                if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3695                    != 0) {
3696                        RTE_BOND_LOG(ERR,
3697                                     "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3698                                     lsc_poll_interval_ms, name);
3699                        return -1;
3700                }
3701        } else if (arg_count > 1) {
3702                RTE_BOND_LOG(INFO,
3703                             "LSC polling interval can be specified only once for bonded"
3704                             " device %s", name);
3705                return -1;
3706        }
3707
3708        /* Parse link up interrupt propagation delay */
3709        arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3710        if (arg_count == 1) {
3711                uint32_t link_up_delay_ms;
3712
3713                if (rte_kvargs_process(kvlist,
3714                                       PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3715                                       &bond_ethdev_parse_time_ms_kvarg,
3716                                       &link_up_delay_ms) < 0) {
3717                        RTE_BOND_LOG(INFO,
3718                                     "Invalid link up propagation delay value specified for"
3719                                     " bonded device %s", name);
3720                        return -1;
3721                }
3722
3723                /* Set balance mode transmit policy*/
3724                if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3725                    != 0) {
3726                        RTE_BOND_LOG(ERR,
3727                                     "Failed to set link up propagation delay (%u ms) on bonded"
3728                                     " device %s", link_up_delay_ms, name);
3729                        return -1;
3730                }
3731        } else if (arg_count > 1) {
3732                RTE_BOND_LOG(INFO,
3733                             "Link up propagation delay can be specified only once for"
3734                             " bonded device %s", name);
3735                return -1;
3736        }
3737
3738        /* Parse link down interrupt propagation delay */
3739        arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3740        if (arg_count == 1) {
3741                uint32_t link_down_delay_ms;
3742
3743                if (rte_kvargs_process(kvlist,
3744                                       PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3745                                       &bond_ethdev_parse_time_ms_kvarg,
3746                                       &link_down_delay_ms) < 0) {
3747                        RTE_BOND_LOG(INFO,
3748                                     "Invalid link down propagation delay value specified for"
3749                                     " bonded device %s", name);
3750                        return -1;
3751                }
3752
3753                /* Set balance mode transmit policy*/
3754                if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3755                    != 0) {
3756                        RTE_BOND_LOG(ERR,
3757                                     "Failed to set link down propagation delay (%u ms) on bonded device %s",
3758                                     link_down_delay_ms, name);
3759                        return -1;
3760                }
3761        } else if (arg_count > 1) {
3762                RTE_BOND_LOG(INFO,
3763                             "Link down propagation delay can be specified only once for  bonded device %s",
3764                             name);
3765                return -1;
3766        }
3767
3768        return 0;
3769}
3770
3771struct rte_vdev_driver pmd_bond_drv = {
3772        .probe = bond_probe,
3773        .remove = bond_remove,
3774};
3775
3776RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3777RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3778
3779RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3780        "slave=<ifc> "
3781        "primary=<ifc> "
3782        "mode=[0-6] "
3783        "xmit_policy=[l2 | l23 | l34] "
3784        "agg_mode=[count | stable | bandwidth] "
3785        "socket_id=<int> "
3786        "mac=<mac addr> "
3787        "lsc_poll_period_ms=<int> "
3788        "up_delay=<int> "
3789        "down_delay=<int>");
3790
3791/* We can't use RTE_LOG_REGISTER_DEFAULT because of the forced name for
3792 * this library, see meson.build.
3793 */
3794RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);
3795